|
27 | 27 | __all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider") |
28 | 28 |
|
29 | 29 | import hashlib |
30 | | -from collections import OrderedDict |
31 | 30 | from mimetypes import guess_type |
32 | 31 | from typing import Any |
33 | 32 |
|
@@ -67,8 +66,6 @@ def __init__( |
67 | 66 | ) -> None: |
68 | 67 | self.cx: MongoClient | None = None |
69 | 68 | self.db: Database | None = None |
70 | | - self._hash_cache = OrderedDict() |
71 | | - self._hash_limit = 128 |
72 | 69 |
|
73 | 70 | if app is not None: |
74 | 71 | self.init_app(app, uri, *args, **kwargs) |
@@ -187,19 +184,20 @@ def get_upload(filename): |
187 | 184 | response.content_length = fileobj.length |
188 | 185 | response.last_modified = fileobj.upload_date |
189 | 186 |
|
190 | | - # GridFS does not manage its own checksum, so we manage our own using its |
191 | | - # metadata storage, to be used for the etag. |
192 | | - sha1_sum = self._hash_cache.get(str(fileobj._id)) |
193 | | - if sha1_sum is None: |
194 | | - # Compute the checksum of the file for the etag. |
195 | | - pos = fileobj.tell() |
196 | | - raw_data = fileobj.read() |
197 | | - fileobj.seek(pos) |
198 | | - sha1_sum = hashlib.sha1(raw_data).hexdigest() |
199 | | - while len(self._hash_cache) >= self._hash_limit: |
200 | | - self._hash_cache.popitem() |
201 | | - self._hash_cache[str(fileobj._id)] = sha1_sum |
202 | | - response.set_etag(sha1_sum) |
| 187 | + # GridFS does not manage its own checksum. |
| 188 | + # Try to use a sha1 sum that we have added during a save_file. |
| 189 | + # Fall back to a legacy md5 sum if it exists. |
| 190 | + # Otherwise, compute the sha1 sum directly. |
| 191 | + try: |
| 192 | + etag = fileobj.sha1 |
| 193 | + except AttributeError: |
| 194 | + etag = fileobj.md5 |
| 195 | + if etag is None: |
| 196 | + pos = fileobj.tell() |
| 197 | + raw_data = fileobj.read() |
| 198 | + fileobj.seek(pos) |
| 199 | + etag = hashlib.sha1(raw_data).hexdigest() |
| 200 | + response.set_etag(etag) |
203 | 201 |
|
204 | 202 | response.cache_control.max_age = cache_for |
205 | 203 | response.cache_control.public = True |
@@ -249,5 +247,23 @@ def save_upload(filename): |
249 | 247 | db_obj = self.db |
250 | 248 | assert db_obj is not None, "Please initialize the app before calling save_file!" |
251 | 249 | storage = GridFS(db_obj, base) |
252 | | - id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs) |
253 | | - return id |
| 250 | + |
| 251 | + # GridFS does not manage its own checksum, so we attach a sha1 to the file |
| 252 | + # for use as an etag. |
| 253 | + hashingfile = _Wrapper(fileobj) |
| 254 | + with storage.new_file(filename=filename, content_type=content_type, **kwargs) as grid_file: |
| 255 | + grid_file.write(hashingfile) |
| 256 | + grid_file.sha1 = hashingfile.hash.hexdigest() |
| 257 | + return grid_file._id |
| 258 | + |
| 259 | + |
| 260 | +class _Wrapper: |
| 261 | + def __init__(self, file): |
| 262 | + self.file = file |
| 263 | + self.hash = hashlib.sha1() |
| 264 | + |
| 265 | + def read(self, n): |
| 266 | + data = self.file.read(n) |
| 267 | + if data: |
| 268 | + self.hash.update(data) |
| 269 | + return data |
0 commit comments