|
27 | 27 | __all__ = ("PyMongo", "ASCENDING", "DESCENDING", "BSONObjectIdConverter", "BSONProvider") |
28 | 28 |
|
29 | 29 | import hashlib |
| 30 | +from collections import OrderedDict |
30 | 31 | from mimetypes import guess_type |
31 | 32 | from typing import Any |
32 | 33 |
|
@@ -66,6 +67,8 @@ def __init__( |
66 | 67 | ) -> None: |
67 | 68 | self.cx: MongoClient | None = None |
68 | 69 | self.db: Database | None = None |
| 70 | + self._hash_cache = OrderedDict() |
| 71 | + self._hash_limit = 128 |
69 | 72 |
|
70 | 73 | if app is not None: |
71 | 74 | self.init_app(app, uri, *args, **kwargs) |
@@ -184,10 +187,18 @@ def get_upload(filename): |
184 | 187 | response.content_length = fileobj.length |
185 | 188 | response.last_modified = fileobj.upload_date |
186 | 189 |
|
187 | | - # Get or compute the sha1 sum for the etag. |
188 | | - metadata = fileobj.metadata |
189 | | - sha1_sum = metadata and metadata.get("sha1_sum") |
190 | | - sha1_sum = sha1_sum or self._compute_sha(fileobj) |
| 190 | + # GridFS does not manage its own checksum, so we manage our own using its |
| 191 | + # metadata storage, to be used for the etag. |
| 192 | + sha1_sum = self._hash_cache.get(str(fileobj._id)) |
| 193 | + if sha1_sum is None: |
| 194 | + # Compute the checksum of the file for the etag. |
| 195 | + pos = fileobj.tell() |
| 196 | + raw_data = fileobj.read() |
| 197 | + fileobj.seek(pos) |
| 198 | + sha1_sum = hashlib.sha1(raw_data).hexdigest() |
| 199 | + while len(self._hash_cache) >= self._hash_limit: |
| 200 | + self._hash_cache.popitem() |
| 201 | + self._hash_cache[str(fileobj._id)] = sha1_sum |
191 | 202 | response.set_etag(sha1_sum) |
192 | 203 |
|
193 | 204 | response.cache_control.max_age = cache_for |
@@ -238,19 +249,5 @@ def save_upload(filename): |
238 | 249 | db_obj = self.db |
239 | 250 | assert db_obj is not None, "Please initialize the app before calling save_file!" |
240 | 251 | storage = GridFS(db_obj, base) |
241 | | - |
242 | | - # GridFS does not manage its own hash, so we manage our own using its |
243 | | - # metadata storage, to be used for the etag. |
244 | | - sha1_sum = self._compute_sha(fileobj) |
245 | | - metadata = dict(sha1_sum=sha1_sum) |
246 | | - id = storage.put( |
247 | | - fileobj, filename=filename, content_type=content_type, metadata=metadata, **kwargs |
248 | | - ) |
| 252 | + id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs) |
249 | 253 | return id |
250 | | - |
251 | | - def _compute_sha(self, fileobj: Any) -> str: |
252 | | - """Compute the sha sum of a file object.""" |
253 | | - pos = fileobj.tell() |
254 | | - raw_data = fileobj.read() |
255 | | - fileobj.seek(pos) |
256 | | - return hashlib.sha1(raw_data).hexdigest() |
0 commit comments