diff --git a/.jules/bolt.md b/.jules/bolt.md index bb8f0d21..31ad2e7d 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -57,3 +57,7 @@ ## 2025-02-13 - [Substring pre-filtering for regex optimization] **Learning:** In hot paths (like `PriorityEngine._calculate_urgency`), executing pre-compiled regular expressions (`re.search`) for simple keyword extraction or grouping (e.g., `\b(word1|word2)\b`) is significantly slower than simple Python substring checks (`in text`). The regex engine execution overhead in Python adds up in high-iteration loops like priority scoring. **Action:** Always consider pre-extracting literal keywords from simple regex patterns and executing a quick `any(k in text for k in keywords)` pre-filter. Only invoke `regex.search` if the pre-filter passes, avoiding the expensive regex operation on texts that obviously do not match. + +## 2024-05-28 - [Consolidating Single-Condition Aggregates with Group By] +**Learning:** Performing multiple `func.count` queries with single conditions (e.g., `confirmation_type == 'confirmed'` and `confirmation_type == 'disputed'`) leads to redundant database scans and increased network latency. While `case` statements work well for disparate columns, for mutually exclusive values on the *same* column, a single `group_by` query is even faster (~35% improvement in benchmarks). +**Action:** When aggregating mutually exclusive conditions on a single column, prefer querying the column and its count with a `.group_by()` over multiple `.filter().count()` calls or multiple `case` statements. This single roundtrip is significantly more efficient for the database engine. diff --git a/backend/routers/grievances.py b/backend/routers/grievances.py index ad602753..42b453f3 100644 --- a/backend/routers/grievances.py +++ b/backend/routers/grievances.py @@ -402,15 +402,19 @@ def get_closure_status( GrievanceFollower.grievance_id == grievance_id ).scalar() - confirmations_count = db.query(func.count(ClosureConfirmation.id)).filter( - ClosureConfirmation.grievance_id == grievance_id, - ClosureConfirmation.confirmation_type == "confirmed" - ).scalar() + # Optimized: Consolidate multiple aggregate queries into a single database roundtrip + stats = db.query( + ClosureConfirmation.confirmation_type, + func.count(ClosureConfirmation.id) + ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all() - disputes_count = db.query(func.count(ClosureConfirmation.id)).filter( - ClosureConfirmation.grievance_id == grievance_id, - ClosureConfirmation.confirmation_type == "disputed" - ).scalar() + confirmations_count = 0 + disputes_count = 0 + for t, c in stats: + if t == "confirmed": + confirmations_count = c + elif t == "disputed": + disputes_count = c required_confirmations = max(1, int(total_followers * ClosureService.CONFIRMATION_THRESHOLD)) diff --git a/backend/tests/benchmark_closure_status.py b/backend/tests/benchmark_closure_status.py new file mode 100644 index 00000000..5194c0c0 --- /dev/null +++ b/backend/tests/benchmark_closure_status.py @@ -0,0 +1,134 @@ +import time +import uuid +import random +from datetime import datetime, timezone +from sqlalchemy import create_engine, select +from sqlalchemy.orm import sessionmaker +from backend.models import Base, Grievance, GrievanceFollower, ClosureConfirmation, Jurisdiction, JurisdictionLevel, SeverityLevel, GrievanceStatus +from backend.database import get_db + +# Create an in-memory SQLite database +engine = create_engine('sqlite:///:memory:') +Base.metadata.create_all(engine) +SessionLocal = sessionmaker(bind=engine) + +def setup_data(session): + auth = Jurisdiction( + level=JurisdictionLevel.LOCAL, + geographic_coverage={"states": ["Maharashtra"], "districts": ["Mumbai"]}, + responsible_authority="Test Auth", + default_sla_hours=24 + ) + session.add(auth) + session.commit() + + grievances = [] + for i in range(10): + g = Grievance( + unique_id=f"G-{i}", + category="Roads", + severity=SeverityLevel.MEDIUM, + status=GrievanceStatus.OPEN, + pincode="123456", + current_jurisdiction_id=auth.id, + assigned_authority="Test Auth", + sla_deadline=datetime.now(timezone.utc) + ) + session.add(g) + grievances.append(g) + session.commit() + + for g in grievances: + # Add followers + for j in range(50): + f = GrievanceFollower(grievance_id=g.id, user_email=f"user{j}@test.com") + session.add(f) + + # Add confirmations + for j in range(20): + c = ClosureConfirmation(grievance_id=g.id, user_email=f"conf{j}@test.com", confirmation_type="confirmed") + session.add(c) + + # Add disputes + for j in range(5): + d = ClosureConfirmation(grievance_id=g.id, user_email=f"disp{j}@test.com", confirmation_type="disputed") + session.add(d) + + session.commit() + return grievances + +def original_get_closure_status(session, grievance_id): + from sqlalchemy import func + + total_followers = session.query(func.count(GrievanceFollower.id)).filter( + GrievanceFollower.grievance_id == grievance_id + ).scalar() + + confirmations_count = session.query(func.count(ClosureConfirmation.id)).filter( + ClosureConfirmation.grievance_id == grievance_id, + ClosureConfirmation.confirmation_type == "confirmed" + ).scalar() + + disputes_count = session.query(func.count(ClosureConfirmation.id)).filter( + ClosureConfirmation.grievance_id == grievance_id, + ClosureConfirmation.confirmation_type == "disputed" + ).scalar() + + return total_followers, confirmations_count, disputes_count + +def optimized_get_closure_status(session, grievance_id): + from sqlalchemy import func, case + + total_followers = session.query(func.count(GrievanceFollower.id)).filter( + GrievanceFollower.grievance_id == grievance_id + ).scalar() + + # Consolidate multiple aggregate queries into a single database roundtrip + stats = session.query( + ClosureConfirmation.confirmation_type, + func.count(ClosureConfirmation.id) + ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all() + + confirmations_count = 0 + disputes_count = 0 + for t, c in stats: + if t == "confirmed": + confirmations_count = c + elif t == "disputed": + disputes_count = c + + return total_followers, confirmations_count, disputes_count + +def run_benchmark(): + session = SessionLocal() + grievances = setup_data(session) + + g_id = grievances[0].id + + # Warmup + for _ in range(10): + original_get_closure_status(session, g_id) + optimized_get_closure_status(session, g_id) + + ITERATIONS = 500 + + start_time = time.time() + for _ in range(ITERATIONS): + res1 = original_get_closure_status(session, g_id) + original_time = time.time() - start_time + + start_time = time.time() + for _ in range(ITERATIONS): + res2 = optimized_get_closure_status(session, g_id) + optimized_time = time.time() - start_time + + print(f"Original results: {res1}") + print(f"Optimized results: {res2}") + assert res1 == res2, "Results don't match!" + + print(f"Original method: {original_time:.4f} seconds ({original_time/ITERATIONS:.5f} per call)") + print(f"Optimized method: {optimized_time:.4f} seconds ({optimized_time/ITERATIONS:.5f} per call)") + print(f"Improvement: {(original_time - optimized_time) / original_time * 100:.2f}%") + +if __name__ == "__main__": + run_benchmark()