RolnickLab · mohamedelabbas1996 · Sep 18, 2025 · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/ami/main/admin.py b/ami/main/admin.py
@@ -2,16 +2,22 @@
 
 from django.contrib import admin
 from django.db import models
+from django.db.models import Count
 from django.db.models.query import QuerySet
 from django.http.request import HttpRequest
 from django.template.defaultfilters import filesizeformat
+from django.template.response import TemplateResponse
+from django.urls import reverse
 from django.utils.formats import number_format
+from django.utils.html import format_html
 from guardian.admin import GuardedModelAdmin
 
 import ami.utils
 from ami import tasks
 from ami.jobs.models import Job
+from ami.ml.models.algorithm import Algorithm
 from ami.ml.models.project_pipeline_config import ProjectPipelineConfig
+from ami.ml.post_processing.class_masking import update_single_occurrence
 from ami.ml.tasks import remove_duplicate_classifications
 
 from .models import (
@@ -288,20 +294,29 @@ class ClassificationInline(admin.TabularInline):
     model = Classification
     extra = 0
     fields = (
+        "classification_link",
         "taxon",
         "algorithm",
         "timestamp",
         "terminal",
         "created_at",
     )
     readonly_fields = (
+        "classification_link",
         "taxon",
         "algorithm",
         "timestamp",
         "terminal",
         "created_at",
     )
 
+    @admin.display(description="Classification")
+    def classification_link(self, obj: Classification) -> str:
+        if obj.pk:
+            url = reverse("admin:main_classification_change", args=[obj.pk])
+            return format_html('<a href="{}">{}</a>', url, f"Classification #{obj.pk}")
+        return "-"
+
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
         qs = super().get_queryset(request)
         return qs.select_related("taxon", "algorithm", "detection")
@@ -311,20 +326,29 @@ class DetectionInline(admin.TabularInline):
     model = Detection
     extra = 0
     fields = (
+        "detection_link",
         "detection_algorithm",
         "source_image",
         "timestamp",
         "created_at",
         "occurrence",
     )
     readonly_fields = (
+        "detection_link",
         "detection_algorithm",
         "source_image",
         "timestamp",
         "created_at",
         "occurrence",
     )
 
+    @admin.display(description="ID")
+    def detection_link(self, obj):
+        if obj.pk:
+            url = reverse("admin:main_detection_change", args=[obj.pk])
+            return format_html('<a href="{}">{}</a>', url, obj.pk)
+        return "-"
+
 
 @admin.register(Detection)
 class DetectionAdmin(admin.ModelAdmin[Detection]):
@@ -382,7 +406,7 @@ class OccurrenceAdmin(admin.ModelAdmin[Occurrence]):
         "determination__rank",
         "created_at",
     )
-    search_fields = ("determination__name", "determination__search_names")
+    search_fields = ("id", "determination__name", "determination__search_names")
 
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
         qs = super().get_queryset(request)
@@ -404,11 +428,83 @@ def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
     def detections_count(self, obj) -> int:
         return obj.detections_count
 
+    @admin.action(description="Run class masking (select taxa list & algorithm)")
+    def run_class_masking(self, request: HttpRequest, queryset: QuerySet[Occurrence]) -> TemplateResponse | None:
+        """
+        Run class masking on selected occurrences.
+        Shows an intermediate page to select a TaxaList and Algorithm.
+        """
+        if request.POST.get("confirm"):
+            taxa_list_id = request.POST.get("taxa_list")
+            algorithm_id = request.POST.get("algorithm")
+            if not taxa_list_id or not algorithm_id:
+                self.message_user(request, "Please select both a taxa list and an algorithm.", level="error")
+                return None
+
+            try:
+                taxa_list = TaxaList.objects.get(pk=taxa_list_id)
+                algorithm = Algorithm.objects.get(pk=algorithm_id)
+            except (TaxaList.DoesNotExist, Algorithm.DoesNotExist) as e:
+                self.message_user(request, f"Error: {e}", level="error")
+                return None
+
+            if not algorithm.category_map:
+                self.message_user(
+                    request, f"Algorithm '{algorithm.name}' does not have a category map.", level="error"
+                )
+                return None
+
+            count = 0
+            for occurrence in queryset:
+                try:
+                    update_single_occurrence(
+                        occurrence=occurrence,
+                        algorithm=algorithm,
+                        taxa_list=taxa_list,
+                    )
+                    count += 1
+                except Exception as e:
+                    self.message_user(
+                        request,
+                        f"Error processing occurrence {occurrence.pk}: {e}",
+                        level="error",
+                    )
+
+            self.message_user(request, f"Successfully ran class masking on {count} occurrence(s).")
+            return None
+
+        # Show intermediate confirmation page
+        taxa_lists = TaxaList.objects.annotate(taxa_count=Count("taxa")).filter(taxa_count__gt=0).order_by("name")
+        algorithms = Algorithm.objects.filter(category_map__isnull=False).order_by("name")
+
+        # Annotate algorithms with label count
+        alg_list = []
+        for alg in algorithms:
+            alg.labels_count = len(alg.category_map.labels) if alg.category_map else 0
+            alg_list.append(alg)
+
+        return TemplateResponse(
+            request,
+            "admin/main/class_masking_confirmation.html",
+            {
+                **self.admin_site.each_context(request),
+                "title": "Run class masking",
+                "queryset": queryset,
+                "occurrence_count": queryset.count(),
+                "taxa_lists": taxa_lists,
+                "algorithms": alg_list,
+                "opts": self.model._meta,
+                "action_checkbox_name": admin.helpers.ACTION_CHECKBOX_NAME,
+            },
+        )
+
     ordering = ("-created_at",)
 
     # Add classifications as inline
     inlines = [DetectionInline]
 
+    actions = [run_class_masking]
+
 
 @admin.register(Classification)
 class ClassificationAdmin(admin.ModelAdmin[Classification]):
@@ -432,6 +528,8 @@ class ClassificationAdmin(admin.ModelAdmin[Classification]):
         "taxon__rank",
     )
 
+    autocomplete_fields = ("taxon",)
+
     def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
         qs = super().get_queryset(request)
         return qs.select_related(
@@ -662,10 +760,32 @@ def run_small_size_filter(self, request: HttpRequest, queryset: QuerySet[SourceI
 
         self.message_user(request, f"Queued Small Size Filter for {queryset.count()} collection(s). Jobs: {jobs}")
 
+    @admin.action(description="Run Rank Rollup post-processing task (async)")
+    def run_rank_rollup(self, request: HttpRequest, queryset: QuerySet[SourceImageCollection]) -> None:
+        """Trigger the Rank Rollup post-processing job asynchronously."""
+        jobs = []
+        DEFAULT_THRESHOLDS = {"SPECIES": 0.8, "GENUS": 0.6, "FAMILY": 0.4}
+
+        for collection in queryset:
+            job = Job.objects.create(
+                name=f"Post-processing: RankRollup on Collection {collection.pk}",
+                project=collection.project,
+                job_type_key="post_processing",
+                params={
+                    "task": "rank_rollup",
+                    "config": {"source_image_collection_id": collection.pk, "thresholds": DEFAULT_THRESHOLDS},
+                },
+            )
+            job.enqueue()
+            jobs.append(job.pk)
+
+        self.message_user(request, f"Queued Rank Rollup for {queryset.count()} collection(s). Jobs: {jobs}")
+
     actions = [
         populate_collection,
         populate_collection_async,
         run_small_size_filter,
+        run_rank_rollup,
     ]
 
     # Hide images many-to-many field from form. This would list all source images in the database.

diff --git a/ami/main/api/serializers.py b/ami/main/api/serializers.py
@@ -874,10 +874,21 @@ class ClassificationPredictionItemSerializer(serializers.Serializer):
     logit = serializers.FloatField(read_only=True)
 
 
+class ClassificationAppliedToSerializer(serializers.ModelSerializer):
+    """Lightweight nested representation of the parent classification this was derived from."""
+
+    algorithm = AlgorithmSerializer(read_only=True)
+
+    class Meta:
+        model = Classification
+        fields = ["id", "created_at", "algorithm"]
+
+
 class ClassificationSerializer(DefaultSerializer):
     taxon = TaxonNestedSerializer(read_only=True)
     algorithm = AlgorithmSerializer(read_only=True)
     top_n = ClassificationPredictionItemSerializer(many=True, read_only=True)
+    applied_to = ClassificationAppliedToSerializer(read_only=True)
 
     class Meta:
         model = Classification
@@ -890,6 +901,7 @@ class Meta:
             "scores",
             "logits",
             "top_n",
+            "applied_to",
             "created_at",
             "updated_at",
         ]
@@ -912,6 +924,8 @@ class Meta(ClassificationSerializer.Meta):
 
 
 class ClassificationListSerializer(DefaultSerializer):
+    applied_to = ClassificationAppliedToSerializer(read_only=True)
+
     class Meta:
         model = Classification
         fields = [
@@ -920,6 +934,7 @@ class Meta:
             "taxon",
             "score",
             "algorithm",
+            "applied_to",
             "created_at",
             "updated_at",
         ]
@@ -939,6 +954,7 @@ class Meta:
             "score",
             "terminal",
             "algorithm",
+            "applied_to",
             "created_at",
         ]
 

diff --git a/ami/main/api/views.py b/ami/main/api/views.py
@@ -1225,7 +1225,17 @@ def get_queryset(self) -> QuerySet["Occurrence"]:
         if self.action != "list":
             qs = qs.prefetch_related(
                 Prefetch(
-                    "detections", queryset=Detection.objects.order_by("-timestamp").select_related("source_image")
+                    "detections",
+                    queryset=Detection.objects.order_by("-timestamp")
+                    .select_related("source_image")
+                    .prefetch_related(
+                        Prefetch(
+                            "classifications",
+                            queryset=Classification.objects.select_related(
+                                "taxon", "algorithm", "applied_to__algorithm"
+                            ),
+                        )
+                    ),
                 )
             )
 
@@ -1640,7 +1650,7 @@ class ClassificationViewSet(DefaultViewSet, ProjectMixin):
     API endpoint for viewing and adding classification results from a model.
     """
 
-    queryset = Classification.objects.all().select_related("taxon", "algorithm")  # , "detection")
+    queryset = Classification.objects.all().select_related("taxon", "algorithm", "applied_to__algorithm")
     serializer_class = ClassificationSerializer
     filterset_fields = [
         # Docs about slow loading API browser because of large choice fields

diff --git a/ami/ml/management/commands/run_class_masking.py b/ami/ml/management/commands/run_class_masking.py
@@ -0,0 +1,83 @@
+from django.core.management.base import BaseCommand, CommandError
+
+from ami.main.models import SourceImageCollection, TaxaList
+from ami.ml.models.algorithm import Algorithm
+from ami.ml.post_processing.class_masking import ClassMaskingTask
+
+
+class Command(BaseCommand):
+    help = (
+        "Run class masking post-processing on a source image collection. "
+        "Masks classifier logits for species not in the given taxa list and recalculates softmax scores."
+    )
+
+    def add_arguments(self, parser):
+        parser.add_argument("--collection-id", type=int, required=True, help="SourceImageCollection ID to process")
+        parser.add_argument("--taxa-list-id", type=int, required=True, help="TaxaList ID to use as the species mask")
+        parser.add_argument(
+            "--algorithm-id", type=int, required=True, help="Algorithm ID whose classifications to mask"
+        )
+        parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes")
+
+    def handle(self, *args, **options):
+        collection_id = options["collection_id"]
+        taxa_list_id = options["taxa_list_id"]
+        algorithm_id = options["algorithm_id"]
+        dry_run = options["dry_run"]
+
+        # Validate inputs
+        try:
+            collection = SourceImageCollection.objects.get(pk=collection_id)
+        except SourceImageCollection.DoesNotExist:
+            raise CommandError(f"SourceImageCollection {collection_id} does not exist.")
+
+        try:
+            taxa_list = TaxaList.objects.get(pk=taxa_list_id)
+        except TaxaList.DoesNotExist:
+            raise CommandError(f"TaxaList {taxa_list_id} does not exist.")
+
+        try:
+            algorithm = Algorithm.objects.get(pk=algorithm_id)
+        except Algorithm.DoesNotExist:
+            raise CommandError(f"Algorithm {algorithm_id} does not exist.")
+
+        if not algorithm.category_map:
+            raise CommandError(f"Algorithm '{algorithm.name}' does not have a category map.")
+
+        from ami.main.models import Classification
+
+        classification_count = (
+            Classification.objects.filter(
+                detection__source_image__collections=collection,
+                terminal=True,
+                algorithm=algorithm,
+                scores__isnull=False,
+            )
+            .distinct()
+            .count()
+        )
+
+        taxa_count = taxa_list.taxa.count()
+
+        self.stdout.write(
+            f"Collection:     {collection.name} (id={collection.pk})\n"
+            f"Taxa list:      {taxa_list.name} (id={taxa_list.pk}, {taxa_count} taxa)\n"
+            f"Algorithm:      {algorithm.name} (id={algorithm.pk})\n"
+            f"Classifications to process: {classification_count}"
+        )
+
+        if classification_count == 0:
+            raise CommandError("No terminal classifications with scores found for this collection/algorithm.")
+
+        if dry_run:
+            self.stdout.write(self.style.WARNING("Dry run — no changes made."))
+            return
+
+        self.stdout.write("Running class masking...")
+        task = ClassMaskingTask(
+            collection_id=collection_id,
+            taxa_list_id=taxa_list_id,
+            algorithm_id=algorithm_id,
+        )
+        task.run()
+        self.stdout.write(self.style.SUCCESS("Class masking completed."))
diff --git a/ami/ml/post_processing/__init__.py b/ami/ml/post_processing/__init__.py
@@ -1 +0,0 @@
-from . import small_size_filter  # noqa: F401