Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@
## 2025-02-13 - [Substring pre-filtering for regex optimization]
**Learning:** In hot paths (like `PriorityEngine._calculate_urgency`), executing pre-compiled regular expressions (`re.search`) for simple keyword extraction or grouping (e.g., `\b(word1|word2)\b`) is significantly slower than simple Python substring checks (`in text`). The regex engine execution overhead in Python adds up in high-iteration loops like priority scoring.
**Action:** Always consider pre-extracting literal keywords from simple regex patterns and executing a quick `any(k in text for k in keywords)` pre-filter. Only invoke `regex.search` if the pre-filter passes, avoiding the expensive regex operation on texts that obviously do not match.

## 2024-05-28 - [Consolidating Single-Condition Aggregates with Group By]
**Learning:** Performing multiple `func.count` queries with single conditions (e.g., `confirmation_type == 'confirmed'` and `confirmation_type == 'disputed'`) leads to redundant database scans and increased network latency. While `case` statements work well for disparate columns, for mutually exclusive values on the *same* column, a single `group_by` query is even faster (~35% improvement in benchmarks).
**Action:** When aggregating mutually exclusive conditions on a single column, prefer querying the column and its count with a `.group_by()` over multiple `.filter().count()` calls or multiple `case` statements. This single roundtrip is significantly more efficient for the database engine.
20 changes: 12 additions & 8 deletions backend/routers/grievances.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,15 +402,19 @@ def get_closure_status(
GrievanceFollower.grievance_id == grievance_id
).scalar()

confirmations_count = db.query(func.count(ClosureConfirmation.id)).filter(
ClosureConfirmation.grievance_id == grievance_id,
ClosureConfirmation.confirmation_type == "confirmed"
).scalar()
# Optimized: Consolidate multiple aggregate queries into a single database roundtrip
stats = db.query(
ClosureConfirmation.confirmation_type,
func.count(ClosureConfirmation.id)
).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all()

Comment on lines +406 to 410
disputes_count = db.query(func.count(ClosureConfirmation.id)).filter(
ClosureConfirmation.grievance_id == grievance_id,
ClosureConfirmation.confirmation_type == "disputed"
).scalar()
confirmations_count = 0
disputes_count = 0
for t, c in stats:
if t == "confirmed":
confirmations_count = c
elif t == "disputed":
disputes_count = c

required_confirmations = max(1, int(total_followers * ClosureService.CONFIRMATION_THRESHOLD))

Expand Down
134 changes: 134 additions & 0 deletions backend/tests/benchmark_closure_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import time
import uuid
import random
from datetime import datetime, timezone
from sqlalchemy import create_engine, select
from sqlalchemy.orm import sessionmaker
from backend.models import Base, Grievance, GrievanceFollower, ClosureConfirmation, Jurisdiction, JurisdictionLevel, SeverityLevel, GrievanceStatus
from backend.database import get_db
Comment on lines +2 to +8

# Create an in-memory SQLite database
engine = create_engine('sqlite:///:memory:')
Base.metadata.create_all(engine)
SessionLocal = sessionmaker(bind=engine)

def setup_data(session):
auth = Jurisdiction(
level=JurisdictionLevel.LOCAL,
geographic_coverage={"states": ["Maharashtra"], "districts": ["Mumbai"]},
responsible_authority="Test Auth",
default_sla_hours=24
)
session.add(auth)
session.commit()

grievances = []
for i in range(10):
g = Grievance(
unique_id=f"G-{i}",
category="Roads",
severity=SeverityLevel.MEDIUM,
status=GrievanceStatus.OPEN,
pincode="123456",
current_jurisdiction_id=auth.id,
assigned_authority="Test Auth",
sla_deadline=datetime.now(timezone.utc)
)
session.add(g)
grievances.append(g)
session.commit()

for g in grievances:
# Add followers
for j in range(50):
f = GrievanceFollower(grievance_id=g.id, user_email=f"user{j}@test.com")
session.add(f)

# Add confirmations
for j in range(20):
c = ClosureConfirmation(grievance_id=g.id, user_email=f"conf{j}@test.com", confirmation_type="confirmed")
session.add(c)

# Add disputes
for j in range(5):
d = ClosureConfirmation(grievance_id=g.id, user_email=f"disp{j}@test.com", confirmation_type="disputed")
session.add(d)

session.commit()
return grievances

def original_get_closure_status(session, grievance_id):
from sqlalchemy import func

total_followers = session.query(func.count(GrievanceFollower.id)).filter(
GrievanceFollower.grievance_id == grievance_id
).scalar()

confirmations_count = session.query(func.count(ClosureConfirmation.id)).filter(
ClosureConfirmation.grievance_id == grievance_id,
ClosureConfirmation.confirmation_type == "confirmed"
).scalar()

disputes_count = session.query(func.count(ClosureConfirmation.id)).filter(
ClosureConfirmation.grievance_id == grievance_id,
ClosureConfirmation.confirmation_type == "disputed"
).scalar()

return total_followers, confirmations_count, disputes_count

def optimized_get_closure_status(session, grievance_id):
from sqlalchemy import func, case

total_followers = session.query(func.count(GrievanceFollower.id)).filter(
GrievanceFollower.grievance_id == grievance_id
).scalar()

# Consolidate multiple aggregate queries into a single database roundtrip
stats = session.query(
ClosureConfirmation.confirmation_type,
func.count(ClosureConfirmation.id)
).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all()

confirmations_count = 0
disputes_count = 0
for t, c in stats:
if t == "confirmed":
confirmations_count = c
elif t == "disputed":
disputes_count = c

return total_followers, confirmations_count, disputes_count

def run_benchmark():
session = SessionLocal()
grievances = setup_data(session)

g_id = grievances[0].id

# Warmup
for _ in range(10):
original_get_closure_status(session, g_id)
optimized_get_closure_status(session, g_id)

ITERATIONS = 500

start_time = time.time()
for _ in range(ITERATIONS):
res1 = original_get_closure_status(session, g_id)
original_time = time.time() - start_time

start_time = time.time()
for _ in range(ITERATIONS):
res2 = optimized_get_closure_status(session, g_id)
optimized_time = time.time() - start_time

print(f"Original results: {res1}")
print(f"Optimized results: {res2}")
assert res1 == res2, "Results don't match!"

print(f"Original method: {original_time:.4f} seconds ({original_time/ITERATIONS:.5f} per call)")
print(f"Optimized method: {optimized_time:.4f} seconds ({optimized_time/ITERATIONS:.5f} per call)")
print(f"Improvement: {(original_time - optimized_time) / original_time * 100:.2f}%")

if __name__ == "__main__":
run_benchmark()
Loading