Skip to content

Commit 54d670a

Browse files
authored
Expand support for date formats in image filenames (#809)
* feat: support more date formats in filenames * feat: update and centralize validation language
1 parent c213caa commit 54d670a

5 files changed

Lines changed: 68 additions & 19 deletions

File tree

ami/main/api/serializers.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import datetime
22

3+
from django.core.exceptions import ValidationError as DjangoValidationError
34
from django.db.models import QuerySet
45
from guardian.shortcuts import get_perms
56
from rest_framework import serializers
@@ -13,7 +14,6 @@
1314
from ami.ml.serializers import AlgorithmSerializer
1415
from ami.users.models import User
1516
from ami.users.roles import ProjectManager
16-
from ami.utils.dates import get_image_timestamp_from_filename
1717

1818
from ..models import (
1919
Classification,
@@ -32,6 +32,7 @@
3232
SourceImageUpload,
3333
TaxaList,
3434
Taxon,
35+
validate_filename_timestamp,
3536
)
3637

3738

@@ -1024,13 +1025,10 @@ def create(self, validated_data):
10241025

10251026
def validate_image(self, value):
10261027
# Ensure that image filename contains a timestamp
1027-
timestamp = get_image_timestamp_from_filename(value.name)
1028-
if timestamp is None:
1029-
# @TODO bring back EXIF support
1030-
raise serializers.ValidationError(
1031-
"Image filename does not contain a timestamp in the format YYYYMMDDHHMMSS "
1032-
" (e.g. 20210101120000-snapshot.jpg). EXIF support coming soon."
1033-
)
1028+
try:
1029+
validate_filename_timestamp(value.name)
1030+
except DjangoValidationError as e:
1031+
raise serializers.ValidationError(str(e))
10341032
return value
10351033

10361034

@@ -1100,8 +1098,6 @@ class Meta:
11001098
]
11011099

11021100
def get_permissions(self, instance, instance_data):
1103-
request = self.context.get("request")
1104-
11051101
request: Request = self.context["request"]
11061102
user = request.user
11071103
project = instance.get_project()

ami/main/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,7 @@ def validate_filename_timestamp(filename: str) -> None:
12421242
# Ensure filename has a timestamp
12431243
timestamp = ami.utils.dates.get_image_timestamp_from_filename(filename)
12441244
if not timestamp:
1245-
raise ValidationError("Filename must contain a timestamp in the format YYYYMMDDHHMMSS")
1245+
raise ValidationError("Image filename does not contain a valid timestamp (e.g. YYYYMMDDHHMMSS-snapshot.jpg).")
12461246

12471247

12481248
def create_source_image_from_upload(image: ImageFieldFile, deployment: Deployment, request=None) -> "SourceImage":

ami/utils/dates.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,13 @@
1111
def get_image_timestamp_from_filename(img_path, raise_error=False) -> datetime.datetime | None:
1212
"""
1313
Parse the date and time a photo was taken from its filename.
14+
All times are assumed to be in the local timezone. Timezone information is ignored.
15+
The maxium precision is seconds (milliseconds are ignored).
1416
15-
The timestamp must be in the format `YYYYMMDDHHMMSS` but can be
16-
preceded or followed by other characters (e.g. `84-20220916202959-snapshot.jpg`).
17+
Supports various formats with flexible delimiters. Supports text prefixes and suffixes.
18+
- Consecutive digits: YYYYMMDDHHMMSS
19+
- Date and time as separate groups: YYYYMMDD and HHMMSS with any delimiter between
20+
- Delimited formats within date or time groups
1721
1822
>>> out_fmt = "%Y-%m-%d %H:%M:%S"
1923
>>> # Aarhus date format
@@ -35,19 +39,34 @@ def get_image_timestamp_from_filename(img_path, raise_error=False) -> datetime.d
3539
"""
3640
name = pathlib.Path(img_path).stem
3741
date = None
42+
strptime_format = "%Y%m%d%H%M%S"
43+
44+
# Put more specific/longer patterns first if overlap is possible.
45+
# These could be combined into one pattern, but it would be less readable.
46+
consecutive_pattern = r"\d{14}" # YYYYMMDDHHMMSS
47+
two_groups_pattern = r"\d{8}[^\d]+\d{6}" # YYYYMMDD*HHMMSS
48+
# Allow single non-digit delimiters within components, and one or more between DD and HH
49+
delimited_pattern = r"\d{4}[^\d]\d{2}[^\d]\d{2}[^\d]+\d{2}[^\d]\d{2}[^\d]\d{2}" # YYYY*MM*DD*+HH*MM*SS
50+
51+
# Combine patterns with OR '|' but keep them in their own groups
52+
pattern = re.compile(f"({consecutive_pattern})|({two_groups_pattern})|({delimited_pattern})")
53+
54+
match = pattern.search(name)
55+
if match:
56+
# Get the full string matched by any of the patterns
57+
matched_string = match.group(0)
58+
# Remove all non-digit characters to create YYYYMMDDHHMMSS
59+
consecutive_date_string = re.sub(r"[^\d]", "", matched_string)
3860

39-
# Extract date from a filename using regex in the format %Y%m%d%H%M%S
40-
matches = re.search(r"(\d{14})", name)
41-
if matches:
4261
try:
43-
date = datetime.datetime.strptime(matches.group(), "%Y%m%d%H%M%S")
62+
date = datetime.datetime.strptime(consecutive_date_string, strptime_format)
4463
except ValueError:
4564
pass
4665

4766
if not date:
4867
try:
4968
date = dateutil.parser.parse(name, fuzzy=False) # Fuzzy will interpret "DSC_1974" as 1974-01-01
50-
except dateutil.parser.ParserError:
69+
except (dateutil.parser.ParserError, ValueError, OverflowError):
5170
pass
5271

5372
if not date and raise_error:

ami/utils/tests.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import datetime
2+
from unittest import TestCase
3+
4+
5+
class TestUtils(TestCase):
6+
def test_extract_timestamps(self):
7+
from ami.utils.dates import get_image_timestamp_from_filename
8+
9+
filenames_and_expected_dates = [
10+
("aarhus/20220810231507-00-07.jpg", "2022-08-10 23:15:07"),
11+
("diopsis/20230124191342.jpg", "2023-01-24 19:13:42"),
12+
("vermont_snapshots/20220622000459-108-snapshot.jpg", "2022-06-22 00:04:59"),
13+
("cyprus_snapshots/84-20220916202959-snapshot.jpg", "2022-09-16 20:29:59"),
14+
("wingscape/Project_20230801023001_4393.JPG", "2023-08-01 02:30:01"),
15+
("nikon/DSC_1974.JPG", None), # Example with no date
16+
("not_a_date/happybirthday.jpg", None), # Example with no date
17+
("cannon/IMG_20230801_123456.jpg", "2023-08-01 12:34:56"),
18+
("mothbox/2024_01_01 12_00_00.jpg", "2024-01-01 12:00:00"),
19+
("other_common/2024-01-01 12:00:00.jpg", "2024-01-01 12:00:00"),
20+
("other_common/2024-01-01T12:00:00.jpg", "2024-01-01 12:00:00"),
21+
]
22+
23+
for filename, expected_date in filenames_and_expected_dates:
24+
with self.subTest(filename=filename):
25+
# Convert the expected date string to a datetime object
26+
if expected_date is not None:
27+
expected_date = datetime.datetime.strptime(expected_date, "%Y-%m-%d %H:%M:%S")
28+
# Call the function and compare the result with the expected date
29+
# Only use raise_error=True when we expect a date to be present
30+
raise_error = expected_date is not None
31+
result = get_image_timestamp_from_filename(filename, raise_error=raise_error)
32+
self.assertEqual(
33+
result, expected_date, f"Failed for {filename}: expected {expected_date}, got {result}"
34+
)

ui/src/utils/language.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ const ENGLISH_STRINGS: { [key in STRING]: string } = {
418418

419419
/* MESSAGE */
420420
[STRING.MESSAGE_CAPTURE_FILENAME]:
421-
'Image filename must contain a timestamp in the format YYYYMMDDHHMMSS (e.g. 20210101120000-snapshot.jpg).',
421+
'Image filename must contain a timestamp with year, month, day, hours, minutes and seconds (e.g. 20210101120000-snapshot.jpg).',
422422
[STRING.MESSAGE_CAPTURE_LIMIT]:
423423
'A maximum of {{numCaptures}} images can be uploaded through the web browser. Configure a data source to upload data in bulk.',
424424
[STRING.MESSAGE_CAPTURE_SYNC_HIDDEN]:

0 commit comments

Comments
 (0)