RolnickLab
diff --git a/‎ami/jobs/models.py‎
Lines changed: 1 addition & 0 deletions b/‎ami/jobs/models.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ami/main/api/views.py‎
Lines changed: 12 additions & 6 deletions b/‎ami/main/api/views.py‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎ami/main/charts.py‎
Lines changed: 158 additions & 40 deletions b/‎ami/main/charts.py‎
Lines changed: 158 additions & 40 deletions
diff --git a/‎ami/main/models.py‎
Lines changed: 16 additions & 2 deletions b/‎ami/main/models.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎ami/ml/models/pipeline.py‎
Lines changed: 11 additions & 4 deletions b/‎ami/ml/models/pipeline.py‎
Lines changed: 11 additions & 4 deletions
@@ -406,6 +406,7 @@ def run(cls, job: "Job"):
                 results = job.pipeline.process_images(
                     images=chunk,
                     job_id=job.pk,
+                    project_id=job.project.pk,
                 )
                 job.logger.info(f"Processed image batch {i+1} in {time.time() - request_sent:.2f}s")
             except Exception as e:
 
@@ -1004,7 +1004,6 @@ class OccurrenceViewSet(DefaultViewSet, ProjectMixin):
         "event",
         "deployment",
         "determination__rank",
-        "detections__source_image",
     ]
     ordering_fields = [
         "created_at",
@@ -1287,27 +1286,34 @@ def list(self, request, *args, **kwargs):
         return super().list(request, *args, **kwargs)
 
 
-class ClassificationViewSet(DefaultViewSet):
+class ClassificationViewSet(DefaultViewSet, ProjectMixin):
     """
     API endpoint for viewing and adding classification results from a model.
     """
 
-    queryset = Classification.objects.all()  # .select_related("taxon", "algorithm", "detection")
+    queryset = Classification.objects.all().select_related("taxon", "algorithm")  # , "detection")
     serializer_class = ClassificationSerializer
     filterset_fields = [
-        "detection",
-        "detection__occurrence",
+        # Docs about slow loading API browser because of large choice fields
+        # https://www.django-rest-framework.org/topics/browsable-api/#handling-choicefield-with-large-numbers-of-items
         "taxon",
         "algorithm",
-        "detection__source_image",
         "detection__source_image__project",
+        "detection__source_image__collections",
     ]
     ordering_fields = [
         "created_at",
         "updated_at",
         "score",
     ]
 
+    def get_queryset(self) -> QuerySet:
+        qs = super().get_queryset()
+        project = self.get_active_project()
+        if project:
+            qs = qs.filter(detection__source_image__project=project)
+        return qs
+
     def get_serializer_class(self):
         """
         Return a different serializer for list and detail views.
 
@@ -28,32 +28,53 @@
 
 
 def captures_per_hour(project_pk: int):
-    # Captures per hour
+    # Average captures per hour across all days
     SourceImage = apps.get_model("main", "SourceImage")
-    captures_per_hour = list(
+
+    # First get captures per hour per day
+    captures_by_day_hour = (
         SourceImage.objects.filter(project=project_pk)
-        .values("timestamp__hour")
-        .annotate(num_captures=models.Count("pk"))
-        .order_by("timestamp__hour")
         .exclude(timestamp=None)
+        .values("timestamp__date", "timestamp__hour")
+        .annotate(count=models.Count("pk"))
+        .order_by("timestamp__date", "timestamp__hour")
     )
 
-    if captures_per_hour:
-        hours, counts = list(zip(*captures_per_hour))
-        hours, counts = list(zip(*[(d["timestamp__hour"], d["num_captures"]) for d in captures_per_hour]))
-        # hours = map(int, hours)
-        hours, counts = shift_to_nighttime(list(hours), list(counts))
-        # @TODO show a tick for every hour even if there are no captures
-        hours = [datetime.datetime.strptime(str(h), "%H").strftime("%-I:00 %p") for h in hours]
-        ticktext = [f"{hours[0]}:00", f"{hours[-1]}:00"]
+    # Calculate average per hour
+    hour_totals = {}
+    hour_counts = {}
+
+    for entry in captures_by_day_hour:
+        hour = entry["timestamp__hour"]
+        count = entry["count"]
+
+        if hour not in hour_totals:
+            hour_totals[hour] = 0
+            hour_counts[hour] = 0
+
+        hour_totals[hour] += count
+        hour_counts[hour] += 1
+
+    # Calculate averages
+    avg_captures_per_hour = [
+        {"hour": hour, "avg_captures": round(hour_totals[hour] / hour_counts[hour], 0)} for hour in hour_totals.keys()
+    ]
+    avg_captures_per_hour.sort(key=lambda x: x["hour"])
+
+    if avg_captures_per_hour:
+        hours = [entry["hour"] for entry in avg_captures_per_hour]
+        avgs = [entry["avg_captures"] for entry in avg_captures_per_hour]
 
+        hours, avgs = shift_to_nighttime(hours, avgs)
+        hours = [datetime.datetime.strptime(str(h), "%H").strftime("%-I:00 %p") for h in hours]
+        ticktext = [f"{hours[0]}", f"{hours[-1]}"]
     else:
-        hours, counts = [], []
+        hours, avgs = [], []
         ticktext = []
 
     return {
-        "title": "Captures per hour",
-        "data": {"x": hours, "y": counts, "ticktext": ticktext},
+        "title": "Average captures per hour",
+        "data": {"x": hours, "y": avgs, "ticktext": ticktext},
         "type": "bar",
     }
 
@@ -168,32 +189,54 @@ def events_per_month(project_pk: int):
 
 
 def detections_per_hour(project_pk: int):
-    # Detections per hour
+    # Average detections per hour across all days
     Detection = apps.get_model("main", "Detection")
-    detections_per_hour = list(
+
+    # First get detections per hour per day
+    detections_by_day_hour = (
         Detection.objects.filter(occurrence__project=project_pk)
-        .values("source_image__timestamp__hour")
-        .annotate(num_detections=models.Count("id"))
-        .order_by("source_image__timestamp__hour")
         .exclude(source_image__timestamp=None)
+        .values("source_image__timestamp__date", "source_image__timestamp__hour")
+        .annotate(count=models.Count("id"))
+        .order_by("source_image__timestamp__date", "source_image__timestamp__hour")
     )
 
-    # hours, counts = list(zip(*detections_per_hour))
-    if detections_per_hour:
-        hours, counts = list(
-            zip(*[(d["source_image__timestamp__hour"], d["num_detections"]) for d in detections_per_hour])
-        )
-        hours, counts = shift_to_nighttime(list(hours), list(counts))
-        # @TODO show a tick for every hour even if there are no detections
+    # Calculate average per hour
+    hour_totals = {}
+    hour_counts = {}
+
+    for entry in detections_by_day_hour:
+        hour = entry["source_image__timestamp__hour"]
+        count = entry["count"]
+
+        if hour not in hour_totals:
+            hour_totals[hour] = 0
+            hour_counts[hour] = 0
+
+        hour_totals[hour] += count
+        hour_counts[hour] += 1
+
+    # Calculate averages
+    avg_detections_per_hour = [
+        {"hour": hour, "avg_detections": round(hour_totals[hour] / hour_counts[hour], 0)}
+        for hour in hour_totals.keys()
+    ]
+    avg_detections_per_hour.sort(key=lambda x: x["hour"])
+
+    if avg_detections_per_hour:
+        hours = [entry["hour"] for entry in avg_detections_per_hour]
+        avgs = [entry["avg_detections"] for entry in avg_detections_per_hour]
+
+        hours, avgs = shift_to_nighttime(hours, avgs)
         hours = [datetime.datetime.strptime(str(h), "%H").strftime("%-I:00 %p") for h in hours]
-        ticktext = [f"{hours[0]}:00", f"{hours[-1]}:00"]
+        ticktext = [f"{hours[0]}", f"{hours[-1]}"]
     else:
-        hours, counts = [], []
+        hours, avgs = [], []
         ticktext = []
 
     return {
-        "title": "Detections per hour",
-        "data": {"x": hours, "y": counts, "ticktext": ticktext},
+        "title": "Average detections per hour",
+        "data": {"x": hours, "y": avgs, "ticktext": ticktext},
         "type": "bar",
     }
 
@@ -263,7 +306,7 @@ def event_detections_per_hour(event_pk: int):
 
 
 def event_top_taxa(event_pk: int, top_n: int = 10):
-    # Horiziontal bar chart of top taxa
+    # Horizontal bar chart of top taxa
     Taxon = apps.get_model("main", "Taxon")
     top_taxa = (
         Taxon.objects.filter(occurrences__event=event_pk)
@@ -274,21 +317,96 @@ def event_top_taxa(event_pk: int, top_n: int = 10):
     )
 
     if top_taxa:
-        taxa, counts = list(zip(*[(t["name"], t["num_detections"]) for t in top_taxa]))
+        taxa, counts = list(zip(*[(t["name"], t["num_detections"]) for t in reversed(top_taxa)]))
         taxa = [t or "Unknown" for t in taxa]
         counts = [c or 0 for c in counts]
     else:
         taxa, counts = [], []
 
-    # Restrict number of top species if too many
-    MAX_SPECIES = 10
-    if len(taxa) > MAX_SPECIES:
-        taxa = taxa[:MAX_SPECIES]
-        counts = counts[:MAX_SPECIES]
-
     return {
         "title": "Top species",
         "data": {"x": counts, "y": taxa},
         "type": "bar",
         "orientation": "h",
     }
+
+
+def project_top_taxa(project_pk: int, top_n: int = 10):
+    Taxon = apps.get_model("main", "Taxon")
+    top_taxa = (
+        Taxon.objects.all()
+        .with_occurrence_counts(project=project_pk)  # type: ignore
+        .order_by("-occurrence_count")[:top_n]
+    )
+
+    if top_taxa:
+        taxa, counts = list(zip(*[(t.name, t.occurrence_count) for t in reversed(top_taxa)]))
+    else:
+        taxa, counts = [], []
+
+    return {
+        "title": "Top species observed",
+        "data": {"x": counts, "y": taxa},
+        "type": "bar",
+        "orientation": "h",
+    }
+
+
+def unique_species_per_month(project_pk: int):
+    # Unique species per month
+    Occurrence = apps.get_model("main", "Occurrence")
+    unique_species_per_month = (
+        Occurrence.objects.filter(project=project_pk)
+        .values_list("event__start__month")
+        .annotate(num_species=models.Count("determination_id", distinct=True))
+        .order_by("event__start__month")
+    )
+
+    # Create a dictionary mapping month numbers to species counts
+    month_to_count = {month: count for month, count in unique_species_per_month}
+
+    # Create lists for all 12 months, using 0 for months with no data
+    all_months = list(range(1, 13))  # 1-12 for January-December
+    counts = [month_to_count.get(month, 0) for month in all_months]
+
+    # Generate labels for all months
+    labels = [datetime.date(3000, month, 1).strftime("%b") for month in all_months]
+
+    # Show all months as tick values
+    tickvals = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+
+    return {
+        "title": "Unique species per month",
+        "data": {"x": labels, "y": counts, "tickvals": tickvals},
+        "type": "bar",
+    }
+
+
+def average_occurrences_per_month(project_pk: int):
+    # Average occurrences per month
+    Occurrence = apps.get_model("main", "Occurrence")
+    occurrences_per_month = (
+        Occurrence.objects.filter(project=project_pk)
+        .values_list("event__start__month")
+        .annotate(num_occurrences=models.Count("id"))
+        .order_by("event__start__month")
+    )
+
+    # Create a dictionary mapping month numbers to occurrence counts
+    month_to_count = {month: count for month, count in occurrences_per_month}
+
+    # Create lists for all 12 months, using 0 for months with no data
+    all_months = list(range(1, 13))  # 1-12 for January-December
+    counts = [month_to_count.get(month, 0) for month in all_months]
+
+    # Generate labels for all months
+    labels = [datetime.date(3000, month, 1).strftime("%b") for month in all_months]
+
+    # Show all months as tick vals
+    tickvals = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
+
+    return {
+        "title": "Average occurrences per month",
+        "data": {"x": labels, "y": counts, "tickvals": tickvals},
+        "type": "bar",
+    }
@@ -168,6 +168,9 @@ def summary_data(self):
         else:
             plots.append(charts.events_per_month(project_pk=self.pk))
             # plots.append(charts.captures_per_month(project_pk=self.pk))
+        plots.append(charts.project_top_taxa(project_pk=self.pk))
+        plots.append(charts.average_occurrences_per_month(project_pk=self.pk))
+        plots.append(charts.unique_species_per_month(project_pk=self.pk))
 
         return plots
 
@@ -2455,8 +2458,19 @@ def update_occurrence_determination(
     return needs_update
 
 
+class TaxonQuerySet(models.QuerySet):
+    def with_occurrence_counts(self, project: Project):
+        """
+        Annotate each taxon with the count of its occurrences for a given project.
+        """
+        qs = self
+        qs = qs.filter(occurrences__project=project)
+
+        return qs.annotate(occurrence_count=models.Count("occurrences", distinct=True))
+
+
 @final
-class TaxaManager(models.Manager):
+class TaxonManager(models.Manager.from_queryset(TaxonQuerySet)):
     def get_queryset(self):
         # Prefetch parent and parents
         # return super().get_queryset().select_related("parent").prefetch_related("parents")
@@ -2703,7 +2717,7 @@ class Taxon(BaseModel):
     ordering = models.IntegerField(null=True, blank=True)
     sort_phylogeny = models.BigIntegerField(blank=True, null=True)
 
-    objects: TaxaManager = TaxaManager()
+    objects: TaxonManager = TaxonManager()
 
     # Type hints for auto-generated fields
     parent_id: int | None
 
@@ -931,7 +931,9 @@ def collect_images(
             skip_processed=skip_processed,
         )
 
-    def choose_processing_service_for_pipeline(self, job_id, pipeline_name) -> ProcessingService:
+    def choose_processing_service_for_pipeline(
+        self, job_id: int, pipeline_name: str, project_id: int
+    ) -> ProcessingService:
         # @TODO use the cached `last_checked_latency` and a max age to avoid checking every time
 
         job = None
@@ -942,7 +944,12 @@ def choose_processing_service_for_pipeline(self, job_id, pipeline_name) -> Proce
             job = Job.objects.get(pk=job_id)
             task_logger = job.logger
 
-        processing_services = self.processing_services.all()
+        # get all processing services that are associated with the provided pipeline project
+        processing_services = self.processing_services.filter(projects=project_id)
+        task_logger.info(
+            f"Searching processing services:"
+            f"{[processing_service.name for processing_service in processing_services]}"
+        )
 
         # check the status of all processing services
         timeout = 5 * 60.0  # 5 minutes
@@ -972,8 +979,8 @@ def choose_processing_service_for_pipeline(self, job_id, pipeline_name) -> Proce
 
             return processing_service_lowest_latency
 
-    def process_images(self, images: typing.Iterable[SourceImage], job_id: int | None = None):
-        processing_service = self.choose_processing_service_for_pipeline(job_id, self.name)
+    def process_images(self, images: typing.Iterable[SourceImage], project_id: int, job_id: int | None = None):
+        processing_service = self.choose_processing_service_for_pipeline(job_id, self.name, project_id)
 
         if not processing_service.endpoint_url:
             raise ValueError(
Original file line number	Diff line number	Diff line change
`@@ -406,6 +406,7 @@ def run(cls, job: "Job"):`
`406`	`406`	`results = job.pipeline.process_images(`
`407`	`407`	`images=chunk,`
`408`	`408`	`job_id=job.pk,`
	`409`	`+ project_id=job.project.pk,`
`409`	`410`	`)`
`410`	`411`	`job.logger.info(f"Processed image batch {i+1} in {time.time() - request_sent:.2f}s")`
`411`	`412`	`except Exception as e:`