Skip to content

Commit 1adb99d

Browse files
committed
Merge branch 'main' into 695-ability-to-disable-a-pipeline-for-a-given-project
2 parents 8979f1c + 9d737c2 commit 1adb99d

45 files changed

Lines changed: 553 additions & 257 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

ami/jobs/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ def run(cls, job: "Job"):
406406
results = job.pipeline.process_images(
407407
images=chunk,
408408
job_id=job.pk,
409+
project_id=job.project.pk,
409410
)
410411
job.logger.info(f"Processed image batch {i+1} in {time.time() - request_sent:.2f}s")
411412
except Exception as e:

ami/main/api/views.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,6 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
10041004
"event",
10051005
"deployment",
10061006
"determination__rank",
1007-
"detections__source_image",
10081007
]
10091008
ordering_fields = [
10101009
"created_at",
@@ -1287,27 +1286,34 @@ def list(self, request, *args, **kwargs):
12871286
return super().list(request, *args, **kwargs)
12881287

12891288

1290-
class ClassificationViewSet(DefaultViewSet):
1289+
class ClassificationViewSet(DefaultViewSet, ProjectMixin):
12911290
"""
12921291
API endpoint for viewing and adding classification results from a model.
12931292
"""
12941293

1295-
queryset = Classification.objects.all() # .select_related("taxon", "algorithm", "detection")
1294+
queryset = Classification.objects.all().select_related("taxon", "algorithm") # , "detection")
12961295
serializer_class = ClassificationSerializer
12971296
filterset_fields = [
1298-
"detection",
1299-
"detection__occurrence",
1297+
# Docs about slow loading API browser because of large choice fields
1298+
# https://www.django-rest-framework.org/topics/browsable-api/#handling-choicefield-with-large-numbers-of-items
13001299
"taxon",
13011300
"algorithm",
1302-
"detection__source_image",
13031301
"detection__source_image__project",
1302+
"detection__source_image__collections",
13041303
]
13051304
ordering_fields = [
13061305
"created_at",
13071306
"updated_at",
13081307
"score",
13091308
]
13101309

1310+
def get_queryset(self) -> QuerySet:
1311+
qs = super().get_queryset()
1312+
project = self.get_active_project()
1313+
if project:
1314+
qs = qs.filter(detection__source_image__project=project)
1315+
return qs
1316+
13111317
def get_serializer_class(self):
13121318
"""
13131319
Return a different serializer for list and detail views.

ami/main/charts.py

Lines changed: 158 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,53 @@
2828

2929

3030
def captures_per_hour(project_pk: int):
31-
# Captures per hour
31+
# Average captures per hour across all days
3232
SourceImage = apps.get_model("main", "SourceImage")
33-
captures_per_hour = list(
33+
34+
# First get captures per hour per day
35+
captures_by_day_hour = (
3436
SourceImage.objects.filter(project=project_pk)
35-
.values("timestamp__hour")
36-
.annotate(num_captures=models.Count("pk"))
37-
.order_by("timestamp__hour")
3837
.exclude(timestamp=None)
38+
.values("timestamp__date", "timestamp__hour")
39+
.annotate(count=models.Count("pk"))
40+
.order_by("timestamp__date", "timestamp__hour")
3941
)
4042

41-
if captures_per_hour:
42-
hours, counts = list(zip(*captures_per_hour))
43-
hours, counts = list(zip(*[(d["timestamp__hour"], d["num_captures"]) for d in captures_per_hour]))
44-
# hours = map(int, hours)
45-
hours, counts = shift_to_nighttime(list(hours), list(counts))
46-
# @TODO show a tick for every hour even if there are no captures
47-
hours = [datetime.datetime.strptime(str(h), "%H").strftime("%-I:00 %p") for h in hours]
48-
ticktext = [f"{hours[0]}:00", f"{hours[-1]}:00"]
43+
# Calculate average per hour
44+
hour_totals = {}
45+
hour_counts = {}
46+
47+
for entry in captures_by_day_hour:
48+
hour = entry["timestamp__hour"]
49+
count = entry["count"]
50+
51+
if hour not in hour_totals:
52+
hour_totals[hour] = 0
53+
hour_counts[hour] = 0
54+
55+
hour_totals[hour] += count
56+
hour_counts[hour] += 1
57+
58+
# Calculate averages
59+
avg_captures_per_hour = [
60+
{"hour": hour, "avg_captures": round(hour_totals[hour] / hour_counts[hour], 0)} for hour in hour_totals.keys()
61+
]
62+
avg_captures_per_hour.sort(key=lambda x: x["hour"])
63+
64+
if avg_captures_per_hour:
65+
hours = [entry["hour"] for entry in avg_captures_per_hour]
66+
avgs = [entry["avg_captures"] for entry in avg_captures_per_hour]
4967

68+
hours, avgs = shift_to_nighttime(hours, avgs)
69+
hours = [datetime.datetime.strptime(str(h), "%H").strftime("%-I:00 %p") for h in hours]
70+
ticktext = [f"{hours[0]}", f"{hours[-1]}"]
5071
else:
51-
hours, counts = [], []
72+
hours, avgs = [], []
5273
ticktext = []
5374

5475
return {
55-
"title": "Captures per hour",
56-
"data": {"x": hours, "y": counts, "ticktext": ticktext},
76+
"title": "Average captures per hour",
77+
"data": {"x": hours, "y": avgs, "ticktext": ticktext},
5778
"type": "bar",
5879
}
5980

@@ -168,32 +189,54 @@ def events_per_month(project_pk: int):
168189

169190

170191
def detections_per_hour(project_pk: int):
171-
# Detections per hour
192+
# Average detections per hour across all days
172193
Detection = apps.get_model("main", "Detection")
173-
detections_per_hour = list(
194+
195+
# First get detections per hour per day
196+
detections_by_day_hour = (
174197
Detection.objects.filter(occurrence__project=project_pk)
175-
.values("source_image__timestamp__hour")
176-
.annotate(num_detections=models.Count("id"))
177-
.order_by("source_image__timestamp__hour")
178198
.exclude(source_image__timestamp=None)
199+
.values("source_image__timestamp__date", "source_image__timestamp__hour")
200+
.annotate(count=models.Count("id"))
201+
.order_by("source_image__timestamp__date", "source_image__timestamp__hour")
179202
)
180203

181-
# hours, counts = list(zip(*detections_per_hour))
182-
if detections_per_hour:
183-
hours, counts = list(
184-
zip(*[(d["source_image__timestamp__hour"], d["num_detections"]) for d in detections_per_hour])
185-
)
186-
hours, counts = shift_to_nighttime(list(hours), list(counts))
187-
# @TODO show a tick for every hour even if there are no detections
204+
# Calculate average per hour
205+
hour_totals = {}
206+
hour_counts = {}
207+
208+
for entry in detections_by_day_hour:
209+
hour = entry["source_image__timestamp__hour"]
210+
count = entry["count"]
211+
212+
if hour not in hour_totals:
213+
hour_totals[hour] = 0
214+
hour_counts[hour] = 0
215+
216+
hour_totals[hour] += count
217+
hour_counts[hour] += 1
218+
219+
# Calculate averages
220+
avg_detections_per_hour = [
221+
{"hour": hour, "avg_detections": round(hour_totals[hour] / hour_counts[hour], 0)}
222+
for hour in hour_totals.keys()
223+
]
224+
avg_detections_per_hour.sort(key=lambda x: x["hour"])
225+
226+
if avg_detections_per_hour:
227+
hours = [entry["hour"] for entry in avg_detections_per_hour]
228+
avgs = [entry["avg_detections"] for entry in avg_detections_per_hour]
229+
230+
hours, avgs = shift_to_nighttime(hours, avgs)
188231
hours = [datetime.datetime.strptime(str(h), "%H").strftime("%-I:00 %p") for h in hours]
189-
ticktext = [f"{hours[0]}:00", f"{hours[-1]}:00"]
232+
ticktext = [f"{hours[0]}", f"{hours[-1]}"]
190233
else:
191-
hours, counts = [], []
234+
hours, avgs = [], []
192235
ticktext = []
193236

194237
return {
195-
"title": "Detections per hour",
196-
"data": {"x": hours, "y": counts, "ticktext": ticktext},
238+
"title": "Average detections per hour",
239+
"data": {"x": hours, "y": avgs, "ticktext": ticktext},
197240
"type": "bar",
198241
}
199242

@@ -263,7 +306,7 @@ def event_detections_per_hour(event_pk: int):
263306

264307

265308
def event_top_taxa(event_pk: int, top_n: int = 10):
266-
# Horiziontal bar chart of top taxa
309+
# Horizontal bar chart of top taxa
267310
Taxon = apps.get_model("main", "Taxon")
268311
top_taxa = (
269312
Taxon.objects.filter(occurrences__event=event_pk)
@@ -274,21 +317,96 @@ def event_top_taxa(event_pk: int, top_n: int = 10):
274317
)
275318

276319
if top_taxa:
277-
taxa, counts = list(zip(*[(t["name"], t["num_detections"]) for t in top_taxa]))
320+
taxa, counts = list(zip(*[(t["name"], t["num_detections"]) for t in reversed(top_taxa)]))
278321
taxa = [t or "Unknown" for t in taxa]
279322
counts = [c or 0 for c in counts]
280323
else:
281324
taxa, counts = [], []
282325

283-
# Restrict number of top species if too many
284-
MAX_SPECIES = 10
285-
if len(taxa) > MAX_SPECIES:
286-
taxa = taxa[:MAX_SPECIES]
287-
counts = counts[:MAX_SPECIES]
288-
289326
return {
290327
"title": "Top species",
291328
"data": {"x": counts, "y": taxa},
292329
"type": "bar",
293330
"orientation": "h",
294331
}
332+
333+
334+
def project_top_taxa(project_pk: int, top_n: int = 10):
335+
Taxon = apps.get_model("main", "Taxon")
336+
top_taxa = (
337+
Taxon.objects.all()
338+
.with_occurrence_counts(project=project_pk) # type: ignore
339+
.order_by("-occurrence_count")[:top_n]
340+
)
341+
342+
if top_taxa:
343+
taxa, counts = list(zip(*[(t.name, t.occurrence_count) for t in reversed(top_taxa)]))
344+
else:
345+
taxa, counts = [], []
346+
347+
return {
348+
"title": "Top species observed",
349+
"data": {"x": counts, "y": taxa},
350+
"type": "bar",
351+
"orientation": "h",
352+
}
353+
354+
355+
def unique_species_per_month(project_pk: int):
356+
# Unique species per month
357+
Occurrence = apps.get_model("main", "Occurrence")
358+
unique_species_per_month = (
359+
Occurrence.objects.filter(project=project_pk)
360+
.values_list("event__start__month")
361+
.annotate(num_species=models.Count("determination_id", distinct=True))
362+
.order_by("event__start__month")
363+
)
364+
365+
# Create a dictionary mapping month numbers to species counts
366+
month_to_count = {month: count for month, count in unique_species_per_month}
367+
368+
# Create lists for all 12 months, using 0 for months with no data
369+
all_months = list(range(1, 13)) # 1-12 for January-December
370+
counts = [month_to_count.get(month, 0) for month in all_months]
371+
372+
# Generate labels for all months
373+
labels = [datetime.date(3000, month, 1).strftime("%b") for month in all_months]
374+
375+
# Show all months as tick values
376+
tickvals = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
377+
378+
return {
379+
"title": "Unique species per month",
380+
"data": {"x": labels, "y": counts, "tickvals": tickvals},
381+
"type": "bar",
382+
}
383+
384+
385+
def average_occurrences_per_month(project_pk: int):
386+
# Average occurrences per month
387+
Occurrence = apps.get_model("main", "Occurrence")
388+
occurrences_per_month = (
389+
Occurrence.objects.filter(project=project_pk)
390+
.values_list("event__start__month")
391+
.annotate(num_occurrences=models.Count("id"))
392+
.order_by("event__start__month")
393+
)
394+
395+
# Create a dictionary mapping month numbers to occurrence counts
396+
month_to_count = {month: count for month, count in occurrences_per_month}
397+
398+
# Create lists for all 12 months, using 0 for months with no data
399+
all_months = list(range(1, 13)) # 1-12 for January-December
400+
counts = [month_to_count.get(month, 0) for month in all_months]
401+
402+
# Generate labels for all months
403+
labels = [datetime.date(3000, month, 1).strftime("%b") for month in all_months]
404+
405+
# Show all months as tick vals
406+
tickvals = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
407+
408+
return {
409+
"title": "Average occurrences per month",
410+
"data": {"x": labels, "y": counts, "tickvals": tickvals},
411+
"type": "bar",
412+
}

ami/main/models.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ def summary_data(self):
168168
else:
169169
plots.append(charts.events_per_month(project_pk=self.pk))
170170
# plots.append(charts.captures_per_month(project_pk=self.pk))
171+
plots.append(charts.project_top_taxa(project_pk=self.pk))
172+
plots.append(charts.average_occurrences_per_month(project_pk=self.pk))
173+
plots.append(charts.unique_species_per_month(project_pk=self.pk))
171174

172175
return plots
173176

@@ -2455,8 +2458,19 @@ def update_occurrence_determination(
24552458
return needs_update
24562459

24572460

2461+
class TaxonQuerySet(models.QuerySet):
2462+
def with_occurrence_counts(self, project: Project):
2463+
"""
2464+
Annotate each taxon with the count of its occurrences for a given project.
2465+
"""
2466+
qs = self
2467+
qs = qs.filter(occurrences__project=project)
2468+
2469+
return qs.annotate(occurrence_count=models.Count("occurrences", distinct=True))
2470+
2471+
24582472
@final
2459-
class TaxaManager(models.Manager):
2473+
class TaxonManager(models.Manager.from_queryset(TaxonQuerySet)):
24602474
def get_queryset(self):
24612475
# Prefetch parent and parents
24622476
# return super().get_queryset().select_related("parent").prefetch_related("parents")
@@ -2703,7 +2717,7 @@ class Taxon(BaseModel):
27032717
ordering = models.IntegerField(null=True, blank=True)
27042718
sort_phylogeny = models.BigIntegerField(blank=True, null=True)
27052719

2706-
objects: TaxaManager = TaxaManager()
2720+
objects: TaxonManager = TaxonManager()
27072721

27082722
# Type hints for auto-generated fields
27092723
parent_id: int | None

ami/ml/models/pipeline.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,9 @@ def collect_images(
931931
skip_processed=skip_processed,
932932
)
933933

934-
def choose_processing_service_for_pipeline(self, job_id, pipeline_name) -> ProcessingService:
934+
def choose_processing_service_for_pipeline(
935+
self, job_id: int, pipeline_name: str, project_id: int
936+
) -> ProcessingService:
935937
# @TODO use the cached `last_checked_latency` and a max age to avoid checking every time
936938

937939
job = None
@@ -942,7 +944,12 @@ def choose_processing_service_for_pipeline(self, job_id, pipeline_name) -> Proce
942944
job = Job.objects.get(pk=job_id)
943945
task_logger = job.logger
944946

945-
processing_services = self.processing_services.all()
947+
# get all processing services that are associated with the provided pipeline project
948+
processing_services = self.processing_services.filter(projects=project_id)
949+
task_logger.info(
950+
f"Searching processing services:"
951+
f"{[processing_service.name for processing_service in processing_services]}"
952+
)
946953

947954
# check the status of all processing services
948955
timeout = 5 * 60.0 # 5 minutes
@@ -972,8 +979,8 @@ def choose_processing_service_for_pipeline(self, job_id, pipeline_name) -> Proce
972979

973980
return processing_service_lowest_latency
974981

975-
def process_images(self, images: typing.Iterable[SourceImage], job_id: int | None = None):
976-
processing_service = self.choose_processing_service_for_pipeline(job_id, self.name)
982+
def process_images(self, images: typing.Iterable[SourceImage], project_id: int, job_id: int | None = None):
983+
processing_service = self.choose_processing_service_for_pipeline(job_id, self.name, project_id)
977984

978985
if not processing_service.endpoint_url:
979986
raise ValueError(

0 commit comments

Comments
 (0)