diff --git a/.jules/bolt.md b/.jules/bolt.md index 1389df8d..401f818e 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -53,3 +53,7 @@ ## 2026-02-11 - Multi-Metric Aggregate Queries **Learning:** Executing multiple separate `count()` queries to gather system statistics results in multiple database round-trips and redundant table scans. **Action:** Use a single SQLAlchemy query with `func.count()` and `func.sum(case(...))` to calculate all metrics in one go. This reduces network overhead and allows the database to perform calculations in a single pass. + +## 2024-05-28 - Multiple Subqueries vs Single Case Summation +**Learning:** Using multiple `.scalar()` calls with `func.count` sequentially for fetching multiple statistical counts (e.g. `total_visits`, `verified_visits`, `within_geofence`) results in numerous network roundtrips to the DB and redundant table scans. +**Action:** Consolidate these queries into a single SQL transaction using `db.query` projecting a mix of `func.count()` and `func.sum(case(...))` wrapped in `.label()` to perform calculation at the DB engine layer with only one trip. diff --git a/backend/routers/field_officer.py b/backend/routers/field_officer.py index bfc75f30..3fbec214 100644 --- a/backend/routers/field_officer.py +++ b/backend/routers/field_officer.py @@ -408,30 +408,26 @@ def get_visit_statistics(db: Session = Depends(get_db)): Returns metrics like total visits, verification status, geo-fence compliance, etc. """ try: - # Use SQL aggregates instead of loading all visits into memory - total_visits = db.query(func.count(FieldOfficerVisit.id)).scalar() or 0 - - verified_visits = db.query(func.count(FieldOfficerVisit.id)).filter( - FieldOfficerVisit.verified_at.isnot(None) - ).scalar() or 0 - - within_geofence_count = db.query(func.count(FieldOfficerVisit.id)).filter( - FieldOfficerVisit.within_geofence == True - ).scalar() or 0 - - outside_geofence_count = db.query(func.count(FieldOfficerVisit.id)).filter( - FieldOfficerVisit.within_geofence == False - ).scalar() or 0 - - unique_officers = db.query(func.count(func.distinct(FieldOfficerVisit.officer_email))).scalar() or 0 - - average_distance = db.query(func.avg(FieldOfficerVisit.distance_from_site)).filter( - FieldOfficerVisit.distance_from_site.isnot(None) - ).scalar() - - # Round to 2 decimals if not None - if average_distance is not None: - average_distance = round(float(average_distance), 2) + # Use single SQL aggregate query instead of 6 separate queries + # This reduces network roundtrips and table scans by ~83% + metrics = db.query( + func.count(FieldOfficerVisit.id).label('total'), + func.sum(case((FieldOfficerVisit.verified_at.isnot(None), 1), else_=0)).label('verified'), + func.sum(case((FieldOfficerVisit.within_geofence == True, 1), else_=0)).label('within'), + func.sum(case((FieldOfficerVisit.within_geofence == False, 1), else_=0)).label('outside'), + func.count(func.distinct(FieldOfficerVisit.officer_email)).label('unique_officers'), + func.avg(FieldOfficerVisit.distance_from_site).label('avg_dist') + ).first() + + total_visits = metrics.total or 0 + verified_visits = int(metrics.verified or 0) if metrics and metrics.verified is not None else 0 + within_geofence_count = int(metrics.within or 0) if metrics and metrics.within is not None else 0 + outside_geofence_count = int(metrics.outside or 0) if metrics and metrics.outside is not None else 0 + unique_officers = metrics.unique_officers or 0 + + average_distance = None + if metrics and metrics.avg_dist is not None: + average_distance = round(float(metrics.avg_dist), 2) return VisitStatsResponse( total_visits=total_visits, diff --git a/backend/tests/test_field_officer_stats.py b/backend/tests/test_field_officer_stats.py new file mode 100644 index 00000000..f0574180 --- /dev/null +++ b/backend/tests/test_field_officer_stats.py @@ -0,0 +1,73 @@ +import pytest +from backend.routers.field_officer import get_visit_statistics +from backend.schemas import VisitStatsResponse +from backend.models import FieldOfficerVisit, Issue +from backend.database import Base, get_db +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import StaticPool +from datetime import datetime + +SQLALCHEMY_DATABASE_URL = "sqlite:///:memory:" +engine = create_engine( + SQLALCHEMY_DATABASE_URL, + connect_args={"check_same_thread": False}, + poolclass=StaticPool, +) +TestingSessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +@pytest.fixture(scope="function") +def test_db(): + Base.metadata.create_all(bind=engine) + db = TestingSessionLocal() + try: + yield db + finally: + db.close() + Base.metadata.drop_all(bind=engine) + +def test_get_visit_statistics_empty(test_db): + stats = get_visit_statistics(db=test_db) + assert stats is not None + assert stats.total_visits == 0 + assert stats.verified_visits == 0 + assert stats.within_geofence_count == 0 + assert stats.outside_geofence_count == 0 + assert stats.unique_officers == 0 + assert stats.average_distance_from_site is None + +def test_get_visit_statistics_populated(test_db): + issue = Issue( + reference_id="ref1", description="desc", latitude=10.0, longitude=10.0, + image_path="http://example.com/a.jpg", category="Waste", status="open" + ) + test_db.add(issue) + test_db.commit() + + visits = [ + FieldOfficerVisit( + officer_email="a@test.com", officer_name="A", issue_id=issue.id, + check_in_latitude=10.0, check_in_longitude=10.0, + distance_from_site=10.0, within_geofence=True, verified_at=datetime.utcnow() + ), + FieldOfficerVisit( + officer_email="a@test.com", officer_name="A", issue_id=issue.id, + check_in_latitude=10.0, check_in_longitude=10.0, + distance_from_site=20.0, within_geofence=False, verified_at=None + ), + FieldOfficerVisit( + officer_email="b@test.com", officer_name="B", issue_id=issue.id, + check_in_latitude=10.0, check_in_longitude=10.0, + distance_from_site=30.0, within_geofence=True, verified_at=datetime.utcnow() + ), + ] + test_db.add_all(visits) + test_db.commit() + + stats = get_visit_statistics(db=test_db) + assert stats.total_visits == 3 + assert stats.verified_visits == 2 + assert stats.within_geofence_count == 2 + assert stats.outside_geofence_count == 1 + assert stats.unique_officers == 2 + assert stats.average_distance_from_site == 20.0