Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ami/main/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ class SourceImageAdmin(AdminBase):
"checksum",
"checksum_algorithm",
"created_at",
"get_was_processed",
)

list_filter = (
Expand Down
2 changes: 2 additions & 0 deletions ami/main/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,7 @@ class Meta:
"source_images",
"source_images_count",
"source_images_with_detections_count",
"source_images_processed_count",
"occurrences_count",
"taxa_count",
"description",
Expand Down Expand Up @@ -1498,6 +1499,7 @@ class EventTimelineIntervalSerializer(serializers.Serializer):
captures_count = serializers.IntegerField()
detections_count = serializers.IntegerField()
detections_avg = serializers.IntegerField()
was_processed = serializers.BooleanField()


class EventTimelineMetaSerializer(serializers.Serializer):
Expand Down
12 changes: 9 additions & 3 deletions ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def timeline(self, request, pk=None):
)
resolution = datetime.timedelta(minutes=resolution_minutes)

qs = SourceImage.objects.filter(event=event)
qs = SourceImage.objects.filter(event=event).with_was_processed() # type: ignore

# Bulk update all source images where detections_count is null
update_detection_counts(qs=qs, null_only=True)
Expand All @@ -401,7 +401,7 @@ def timeline(self, request, pk=None):
source_images = list(
qs.filter(timestamp__range=(start_time, end_time))
.order_by("timestamp")
.values("id", "timestamp", "detections_count")
.values("id", "timestamp", "detections_count", "was_processed")
)

timeline = []
Expand All @@ -418,6 +418,7 @@ def timeline(self, request, pk=None):
"captures_count": 0,
"detections_count": 0,
"detection_counts": [],
"was_processed": False,
}

while image_index < len(source_images) and source_images[image_index]["timestamp"] <= interval_end:
Expand All @@ -435,6 +436,7 @@ def timeline(self, request, pk=None):
# Remove zero values and calculate the mode
interval_data["detection_counts"] = [x for x in interval_data["detection_counts"] if x > 0]
interval_data["detections_avg"] = mode(interval_data["detection_counts"] or [0])
interval_data["was_processed"] = image["was_processed"]

timeline.append(interval_data)
current_time = interval_end
Expand Down Expand Up @@ -705,6 +707,7 @@ class SourceImageCollectionViewSet(DefaultViewSet, ProjectMixin):
SourceImageCollection.objects.all()
.with_source_images_count() # type: ignore
.with_source_images_with_detections_count()
.with_source_images_processed_count()
.prefetch_related("jobs")
)
serializer_class = SourceImageCollectionSerializer
Expand All @@ -720,6 +723,7 @@ class SourceImageCollectionViewSet(DefaultViewSet, ProjectMixin):
"method",
"source_images_count",
"source_images_with_detections_count",
"source_images_processed_count",
"occurrences_count",
]

Expand Down Expand Up @@ -894,7 +898,9 @@ class DetectionViewSet(DefaultViewSet, ProjectMixin):
API endpoint that allows detections to be viewed or edited.
"""

queryset = Detection.objects.all().select_related("source_image", "detection_algorithm")
queryset = Detection.objects.exclude(Q(bbox__isnull=True) | Q(bbox=None) | Q(bbox=[])).select_related(
"source_image", "detection_algorithm"
)
serializer_class = DetectionSerializer
filterset_fields = ["source_image", "detection_algorithm", "source_image__project"]
ordering_fields = ["created_at", "updated_at", "detection_score", "timestamp"]
Expand Down
45 changes: 42 additions & 3 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1739,6 +1739,17 @@ def with_taxa_count(self, project: Project | None = None, request=None):
taxa_count=Coalesce(models.Subquery(taxa_subquery, output_field=models.IntegerField()), 0)
)

def with_was_processed(self):
"""
Annotate each SourceImage with a boolean `was_processed` indicating
whether any detections exist for that image.

This mirrors `SourceImage.get_was_processed()` but as a queryset
annotation for efficient bulk queries.
"""
processed_exists = models.Exists(Detection.objects.filter(source_image_id=models.OuterRef("pk")))
return self.annotate(was_processed=processed_exists)


class SourceImageManager(models.Manager.from_queryset(SourceImageQuerySet)):
pass
Expand Down Expand Up @@ -1838,7 +1849,15 @@ def size_display(self) -> str:
return filesizeformat(self.size)

def get_detections_count(self) -> int:
return self.detections.distinct().count()
# Detections count excludes detections without bounding boxes
# Detections with null bounding boxes are valid and indicates the image was successfully processed
return self.detections.exclude(Q(bbox__isnull=True) | Q(bbox=[])).count()

def get_was_processed(self, algorithm_key: str | None = None) -> bool:
if algorithm_key:
return self.detections.filter(detection_algorithm__key=algorithm_key).exists()
else:
return self.detections.exists()

def get_base_url(self) -> str | None:
"""
Expand Down Expand Up @@ -2008,6 +2027,7 @@ def update_detection_counts(qs: models.QuerySet[SourceImage] | None = None, null

subquery = models.Subquery(
Detection.objects.filter(source_image_id=models.OuterRef("pk"))
.exclude(Q(bbox__isnull=True) | Q(bbox=[]))
.values("source_image_id")
.annotate(count=models.Count("id"))
.values("count"),
Expand Down Expand Up @@ -3716,7 +3736,23 @@ def with_source_images_count(self):
def with_source_images_with_detections_count(self):
return self.annotate(
source_images_with_detections_count=models.Count(
"images", filter=models.Q(images__detections__isnull=False), distinct=True
"images",
filter=(
models.Q(images__detections__isnull=False)
& ~models.Q(images__detections__bbox__isnull=True)
& ~models.Q(images__detections__bbox=None)
& ~models.Q(images__detections__bbox=[])
Comment on lines +3741 to +3744
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

~Q(images__detections__bbox=None) is redundant — same issue patched elsewhere in this PR.

In Django ORM, Q(bbox__isnull=True) and Q(bbox=None) are identical for JSONField: both emit bbox IS NULL. Lines 3742 and 3743 therefore negate the exact same SQL predicate, so the second negation is a no-op. The same redundancy was already corrected in get_detections_count() as part of this PR, but was carried over to this new method.

🛠️ Proposed fix
     def with_source_images_with_detections_count(self):
         return self.annotate(
             source_images_with_detections_count=models.Count(
                 "images",
                 filter=(
-                    models.Q(images__detections__isnull=False)
-                    & ~models.Q(images__detections__bbox__isnull=True)
-                    & ~models.Q(images__detections__bbox=None)
-                    & ~models.Q(images__detections__bbox=[])
+                    ~models.Q(images__detections__bbox__isnull=True)
+                    & ~models.Q(images__detections__bbox=[])
                 ),
                 distinct=True,
             )
         )

Q(images__detections__isnull=False) is also redundant once ~Q(bbox__isnull=True) is in place (a non-null bbox already implies a detection row exists).

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@ami/main/models.py` around lines 3741 - 3744, Remove the redundant
null-checks in the queryset: drop the negated
Q(models.Q(images__detections__bbox=None)) since Q(...__bbox__isnull=True) and
Q(...__bbox=None) are equivalent for JSONField, and also remove the
now-unnecessary models.Q(images__detections__isnull=False) because excluding
bbox IS NULL already guarantees a detection row; update the queryset fragment
that currently contains models.Q(images__detections__isnull=False) &
~models.Q(images__detections__bbox__isnull=True) &
~models.Q(images__detections__bbox=None) &
~models.Q(images__detections__bbox=[]) to only keep the meaningful predicates
(e.g., ~models.Q(images__detections__bbox__isnull=True) and
~models.Q(images__detections__bbox=[])).

),
distinct=True,
)
)

def with_source_images_processed_count(self):
return self.annotate(
source_images_processed_count=models.Count(
"images",
filter=models.Q(images__detections__isnull=False),
distinct=True,
)
)

Expand Down Expand Up @@ -3827,7 +3863,10 @@ def source_images_count(self) -> int | None:

def source_images_with_detections_count(self) -> int | None:
# This should always be pre-populated using queryset annotations
# return self.images.filter(detections__isnull=False).count()
return None

def source_images_processed_count(self) -> int | None:
# This should always be pre-populated using queryset annotations
return None

def occurrences_count(self) -> int | None:
Expand Down
61 changes: 59 additions & 2 deletions ami/ml/models/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
update_occurrence_determination,
)
from ami.ml.exceptions import PipelineNotConfigured
from ami.ml.models.algorithm import Algorithm, AlgorithmCategoryMap
from ami.ml.models.algorithm import Algorithm, AlgorithmCategoryMap, AlgorithmTaskType
from ami.ml.schemas import (
AlgorithmConfigResponse,
AlgorithmReference,
Expand Down Expand Up @@ -406,7 +406,10 @@ def get_or_create_detection(

:return: A tuple of the Detection object and a boolean indicating whether it was created
"""
serialized_bbox = list(detection_resp.bbox.dict().values())
if detection_resp.bbox is not None:
serialized_bbox = list(detection_resp.bbox.dict().values())
else:
serialized_bbox = None
detection_repr = f"Detection {detection_resp.source_image_id} {serialized_bbox}"

assert str(detection_resp.source_image_id) == str(
Expand Down Expand Up @@ -485,6 +488,7 @@ def create_detections(

existing_detections: list[Detection] = []
new_detections: list[Detection] = []

for detection_resp in detections:
source_image = source_image_map.get(detection_resp.source_image_id)
if not source_image:
Expand Down Expand Up @@ -810,6 +814,50 @@ class PipelineSaveResults:
total_time: float


def create_null_detections_for_undetected_images(
results: PipelineResultsResponse,
algorithms_known: dict[str, Algorithm],
logger: logging.Logger = logger,
) -> list[DetectionResponse]:
"""
Create null DetectionResponse objects (empty bbox) for images that have no detections.

:param results: The PipelineResultsResponse from the processing service
:param algorithms_known: Dictionary of algorithms keyed by algorithm key

:return: List of DetectionResponse objects with null bbox
"""
source_images_with_detections = {int(detection.source_image_id) for detection in results.detections}
null_detections_to_add = []

for source_img in results.source_images:
if int(source_img.id) not in source_images_with_detections:
Comment on lines +830 to +834
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

int() cast can raise ValueError on non-integer source image IDs.

Both int(detection.source_image_id) (line 830) and int(source_img.id) (line 834) will throw if the ID is ever non-numeric (e.g., UUID-based pipelines). The comparison should be done in string space to stay consistent with the schema types, or the cast should be guarded.

🛠️ Proposed fix — compare as strings
-    source_images_with_detections = {int(detection.source_image_id) for detection in results.detections}
+    source_images_with_detections = {str(detection.source_image_id) for detection in results.detections}

     for source_img in results.source_images:
-        if int(source_img.id) not in source_images_with_detections:
+        if str(source_img.id) not in source_images_with_detections:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
source_images_with_detections = {int(detection.source_image_id) for detection in results.detections}
null_detections_to_add = []
for source_img in results.source_images:
if int(source_img.id) not in source_images_with_detections:
source_images_with_detections = {str(detection.source_image_id) for detection in results.detections}
null_detections_to_add = []
for source_img in results.source_images:
if str(source_img.id) not in source_images_with_detections:
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@ami/ml/models/pipeline.py` around lines 830 - 834, The int() casts on
detection.source_image_id and source_img.id can raise ValueError for non-numeric
IDs; change the membership check to use string comparison instead of int
conversion by building source_images_with_detections from
str(detection.source_image_id) (iterating results.detections) and comparing
against str(source_img.id) when iterating results.source_images (or otherwise
guard the cast with a try/except and fall back to string comparison) so
comparisons are robust for UUIDs and other non-integer IDs.

detector_algorithm_reference = None
for known_algorithm in algorithms_known.values():
if known_algorithm.task_type == AlgorithmTaskType.DETECTION:
detector_algorithm_reference = AlgorithmReference(
name=known_algorithm.name, key=known_algorithm.key
)

if detector_algorithm_reference is None:
logger.error(
f"Could not identify the detector algorithm. "
f"A null detection was not created for Source Image {source_img.id}"
)
continue

null_detections_to_add.append(
DetectionResponse(
source_image_id=source_img.id,
bbox=None,
algorithm=detector_algorithm_reference,
timestamp=now(),
)
)

return null_detections_to_add


@celery_app.task(soft_time_limit=60 * 4, time_limit=60 * 5)
def save_results(
results: PipelineResultsResponse | None = None,
Expand Down Expand Up @@ -866,6 +914,15 @@ def save_results(
"Algorithms and category maps must be registered before processing, using /info endpoint."
)

# Ensure all images have detections
# if not, add a NULL detection (empty bbox) to the results
null_detections = create_null_detections_for_undetected_images(
results=results,
algorithms_known=algorithms_known,
logger=job_logger,
)
results.detections = results.detections + null_detections
Comment on lines +917 to +924
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Null detections will cause images to be re-queued on every subsequent pipeline run.

filter_processed_images (line 87) yields an image if any existing detection lacks classifications:

elif existing_detections.filter(classifications__isnull=True).exists():
    yield image

A null detection (bbox=None) has no classifications, so this condition is always True for it. Every image that was processed and returned no detections will be re-sent on the next job run, defeating the deduplication logic. filter_processed_images needs a new branch before the classifications__isnull check to recognise null-only detections as "processed, no animal found — skip".

🛠️ Proposed fix in filter_processed_images (lines 81–115)
     for image in images:
         existing_detections = image.detections.filter(detection_algorithm__in=pipeline_algorithms)
+        real_detections = existing_detections.filter(bbox__isnull=False)
+        null_only = existing_detections.exists() and not real_detections.exists()
+
         if not existing_detections.exists():
             task_logger.debug(f"Image {image} needs processing: has no existing detections from pipeline's detector")
             yield image
-        elif existing_detections.filter(classifications__isnull=True).exists():
+        elif null_only:
+            task_logger.debug(f"Image {image} was processed with no detections (null detection exists), skipping")
+            continue
+        elif real_detections.filter(classifications__isnull=True).exists():
             task_logger.debug(...)
             yield image
         else:
             pipeline_algorithm_ids = set(classification_algorithms.values_list("id", flat=True))
-            detection_algorithm_ids = set(existing_detections.values_list("classifications__algorithm_id", flat=True))
+            detection_algorithm_ids = set(real_detections.values_list("classifications__algorithm_id", flat=True))
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@ami/ml/models/pipeline.py` around lines 917 - 924, filter_processed_images is
incorrectly treating the null placeholder detections (created by
create_null_detections_for_undetected_images, which use detections with
bbox=None) as "unprocessed" because they have no classifications; add a branch
in filter_processed_images before the existing classifications__isnull check
that detects when existing_detections consists only of null detections (e.g.,
all detections have bbox==None or an equivalent null marker) and treat that case
as "processed - no animal found" so the image is skipped on subsequent runs;
update any comments/tests to reflect that null-only detection sets should not
cause re-queuing.


detections = create_detections(
detections=results.detections,
algorithms_known=algorithms_known,
Expand Down
4 changes: 2 additions & 2 deletions ami/ml/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ class Config:

class DetectionRequest(pydantic.BaseModel):
source_image: SourceImageRequest # the 'original' image
bbox: BoundingBox
bbox: BoundingBox | None = None
crop_image_url: str | None = None
algorithm: AlgorithmReference


class DetectionResponse(pydantic.BaseModel):
source_image_id: str
bbox: BoundingBox
bbox: BoundingBox | None = None
inference_time: float | None = None
algorithm: AlgorithmReference
timestamp: datetime.datetime
Expand Down
24 changes: 24 additions & 0 deletions ami/ml/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,30 @@ def test_project_pipeline_config(self):
final_config = self.pipeline.get_config(self.project.pk)
self.assertEqual(final_config["test_param"], "project_value")

def test_image_with_null_detection(self):
"""
Test saving results for a pipeline that returns null detections for some images.
"""
image = self.test_images[0]
results = self.fake_pipeline_results([image], self.pipeline)

# Manually change the results for a single image to a list of empty detections
results.detections = []

save_results(results)

image.save()
self.assertEqual(image.get_detections_count(), 0) # detections_count should exclude null detections
total_num_detections = image.detections.distinct().count()
self.assertEqual(total_num_detections, 1)

was_processed = image.get_was_processed()
self.assertEqual(was_processed, True)

# Also test filtering by algorithm
was_processed = image.get_was_processed(algorithm_key="random-detector")
self.assertEqual(was_processed, True)


class TestAlgorithmCategoryMaps(TestCase):
def setUp(self):
Expand Down
14 changes: 14 additions & 0 deletions ui/src/data-services/models/capture-set.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ export class CaptureSet extends Entity {
return this._data.source_images_with_detections_count
}

get numImagesProcessed(): number | undefined {
return this._data.source_images_processed_count
}

get numImagesWithDetectionsLabel(): string {
const pct =
this.numImagesWithDetections && this.numImages
Expand All @@ -86,6 +90,16 @@ export class CaptureSet extends Entity {
)}%)`
}

get numImagesProcessedLabel(): string {
const numProcessed = this.numImagesProcessed ?? 0
const pct =
this.numImages && this.numImages > 0
? (numProcessed / this.numImages) * 100
: 0

return `${numProcessed.toLocaleString()} (${pct.toFixed(0)}%)`
}

get numJobs(): number | undefined {
return this._data.jobs?.length
}
Expand Down
5 changes: 5 additions & 0 deletions ui/src/data-services/models/timeline-tick.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export type ServerTimelineTick = {
captures_count: number
detections_count: number
detections_avg: number
was_processed: boolean
}

export class TimelineTick {
Expand All @@ -31,6 +32,10 @@ export class TimelineTick {
return this._timelineTick.detections_avg ?? 0
}

get wasProcessed(): boolean {
return this._timelineTick.was_processed
}

get numCaptures(): number {
return this._timelineTick.captures_count ?? 0
}
Expand Down
10 changes: 10 additions & 0 deletions ui/src/pages/project/capture-sets/capture-set-columns.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ export const columns: (projectId: string) => TableColumn<CaptureSet>[] = (
<BasicTableCell value={item.numImagesWithDetectionsLabel} />
),
},
{
id: 'total-processed-captures',
name: translate(STRING.FIELD_LABEL_TOTAL_PROCESSED_CAPTURES),
styles: {
textAlign: TextAlign.Right,
},
renderCell: (item: Collection) => (
<BasicTableCell value={item.numImagesProcessedLabel} />
Comment on lines +113 to +114
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Collection is not defined or imported — TypeScript compilation will fail.

Line 113 uses item: Collection as the renderCell parameter type, but Collection is never imported in this file. Every other renderCell in the same array uses item: CaptureSet. This is a copy-paste error that will produce a TypeScript compile error.

🐛 Proposed fix
-    renderCell: (item: Collection) => (
+    renderCell: (item: CaptureSet) => (
       <BasicTableCell value={item.numImagesProcessedLabel} />
     ),
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
renderCell: (item: Collection) => (
<BasicTableCell value={item.numImagesProcessedLabel} />
renderCell: (item: CaptureSet) => (
<BasicTableCell value={item.numImagesProcessedLabel} />
),
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@ui/src/pages/project/capture-sets/capture-set-columns.tsx` around lines 113 -
114, The renderCell parameter is typed as Collection but that type isn't
imported—replace the incorrect type with CaptureSet (i.e., change the renderCell
signature from item: Collection to item: CaptureSet) to match the other
renderCell entries and ensure CaptureSet is imported at the top of the file so
the BasicTableCell usage of item.numImagesProcessedLabel compiles.

),
},
{
id: 'occurrences',
name: translate(STRING.FIELD_LABEL_OCCURRENCES),
Expand Down
1 change: 1 addition & 0 deletions ui/src/pages/project/capture-sets/capture-sets.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export const CaptureSets = () => {
settings: true,
captures: true,
'captures-with-detections': true,
'total-processed-captures': true,
status: true,
}
)
Expand Down
Loading