diff --git a/backend/priority_engine.py b/backend/priority_engine.py index 8a532d72..ed553418 100644 --- a/backend/priority_engine.py +++ b/backend/priority_engine.py @@ -125,7 +125,9 @@ def _calculate_urgency(self, text: str, severity_score: int): # Pre-extract literal keywords for fast substring pre-filtering # Only apply this optimization if the pattern is a simple list of words like \b(word1|word2)\b keywords = [] - if re.fullmatch(r'\\b\([a-zA-Z0-9\s|]+\)\\b', pattern): + # Optimization: Extract literal keywords from simple regex strings like "\b(word1|word2)\b" + # This allows us to use a fast substring check (`in text`) before executing the regex engine. + if pattern.startswith('\\b(') and pattern.endswith(')\\b') and not any(c in pattern[3:-3] for c in ['.', '*', '+', '?', '^', '$', '[', ']', '{', '}']): clean_pattern = pattern.replace('\\b', '').replace('(', '').replace(')', '') keywords = [k.strip() for k in clean_pattern.split('|') if k.strip()] self._regex_cache.append((re.compile(pattern), weight, pattern, keywords)) diff --git a/backend/routers/issues.py b/backend/routers/issues.py index 4b93ad06..4cfcea62 100644 --- a/backend/routers/issues.py +++ b/backend/routers/issues.py @@ -236,8 +236,7 @@ async def create_issue( # Invalidate cache so new issue appears try: recent_issues_cache.clear() - recent_issues_cache.clear() - user_issues_cache.clear() + user_issues_cache.clear() except Exception as e: logger.error(f"Error clearing cache: {e}") diff --git a/backend/tests/benchmark_closure_status.py b/backend/tests/benchmark_closure_status.py new file mode 100644 index 00000000..2bb4d55a --- /dev/null +++ b/backend/tests/benchmark_closure_status.py @@ -0,0 +1,102 @@ +import time +from sqlalchemy.orm import Session +from sqlalchemy import func, create_engine +from backend.database import Base +from backend.models import Grievance, GrievanceFollower, ClosureConfirmation, Issue, Jurisdiction, JurisdictionLevel, SeverityLevel +from sqlalchemy import case, distinct +import datetime + +# Create a temporary in-memory database for testing +engine = create_engine("sqlite:///:memory:") +Base.metadata.create_all(bind=engine) +SessionLocal = Session(bind=engine) + +def populate_db(db: Session, grievance_id: int): + # Add Jurisdiction + j = Jurisdiction(id=1, level=JurisdictionLevel.STATE, geographic_coverage={"states": ["Maharashtra"]}, responsible_authority="PWD", default_sla_hours=48) + db.add(j) + + # Add Grievance + g = Grievance( + id=grievance_id, + current_jurisdiction_id=1, + sla_deadline=datetime.datetime.now(datetime.timezone.utc), + status="open", + category="Road", + unique_id="123", + severity=SeverityLevel.LOW, + assigned_authority="PWD" + ) + db.add(g) + + # Add Followers + for i in range(50): + db.add(GrievanceFollower(grievance_id=grievance_id, user_email=f"user{i}@test.com")) + + # Add Confirmations + for i in range(30): + db.add(ClosureConfirmation(grievance_id=grievance_id, user_email=f"conf_user{i}@test.com", confirmation_type="confirmed")) + for i in range(10): + db.add(ClosureConfirmation(grievance_id=grievance_id, user_email=f"disp_user{i}@test.com", confirmation_type="disputed")) + + db.commit() + +def benchmark_old(db: Session, grievance_id: int, iterations=1000): + start = time.perf_counter() + for _ in range(iterations): + total_followers = db.query(func.count(GrievanceFollower.id)).filter( + GrievanceFollower.grievance_id == grievance_id + ).scalar() + + counts = db.query( + ClosureConfirmation.confirmation_type, + func.count(ClosureConfirmation.id) + ).filter(ClosureConfirmation.grievance_id == grievance_id).group_by(ClosureConfirmation.confirmation_type).all() + + counts_dict = {ctype: count for ctype, count in counts} + confirmations_count = counts_dict.get("confirmed", 0) + disputes_count = counts_dict.get("disputed", 0) + end = time.perf_counter() + if iterations > 10: + print(f"Old approach ({iterations} iters): {end - start:.4f}s") + return total_followers, confirmations_count, disputes_count + +def benchmark_new_agg(db: Session, grievance_id: int, iterations=1000): + start = time.perf_counter() + for _ in range(iterations): + total_followers = db.query(func.count(GrievanceFollower.id)).filter( + GrievanceFollower.grievance_id == grievance_id + ).scalar() + + # Optimize the two counts into one aggregate without group_by + stats = db.query( + func.sum(case((ClosureConfirmation.confirmation_type == 'confirmed', 1), else_=0)).label('confirmed'), + func.sum(case((ClosureConfirmation.confirmation_type == 'disputed', 1), else_=0)).label('disputed') + ).filter(ClosureConfirmation.grievance_id == grievance_id).first() + + confirmations_count = stats.confirmed or 0 + disputes_count = stats.disputed or 0 + end = time.perf_counter() + if iterations > 10: + print(f"New approach (Agg) ({iterations} iters): {end - start:.4f}s") + return total_followers, confirmations_count, disputes_count + +if __name__ == "__main__": + db = SessionLocal + populate_db(db, 1) + + # Warm up + benchmark_old(db, 1, 10) + benchmark_new_agg(db, 1, 10) + + res_old = benchmark_old(db, 1) + res_agg = benchmark_new_agg(db, 1) + + print(f"Old Results: {res_old}") + print(f"New Agg Results: {res_agg}") +def benchmark_new_single(db: Session, grievance_id: int, iterations=1000): + start = time.perf_counter() + for _ in range(iterations): + # We can't easily join them perfectly without cross product, but what if we do subqueries? + # Actually it's probably better to just leave it. Let's look for N+1 queries instead. + pass diff --git a/backend/tests/benchmark_urgency.py b/backend/tests/benchmark_urgency.py new file mode 100644 index 00000000..dddef5f6 --- /dev/null +++ b/backend/tests/benchmark_urgency.py @@ -0,0 +1,51 @@ +import time +from backend.priority_engine import priority_engine +import cProfile +import pstats +import io + +# We create a sample text that does not contain any of the urgency keywords +# but is long enough to simulate a real-world scenario. +sample_text = ( + "There is a small pothole on the corner of 5th and Main. " + "It has been there for a few days and is causing some inconvenience to the drivers. " + "Please send someone to look at it when possible. " + "The road condition is generally poor in this area and needs attention. " + "We have noticed an increase in traffic recently, which might be contributing to the wear and tear. " + "No one has been injured, but we would like to avoid any accidents." +) * 10 # Make it reasonably long + +def benchmark(iterations=10000): + start_time = time.perf_counter() + for _ in range(iterations): + # We only benchmark _calculate_urgency. We give it a base severity of 10. + priority_engine._calculate_urgency(sample_text, 10) + end_time = time.perf_counter() + + total_time = end_time - start_time + avg_time_ms = (total_time / iterations) * 1000 + + print(f"Benchmark: _calculate_urgency") + print(f"Iterations: {iterations}") + print(f"Total time: {total_time:.4f} seconds") + print(f"Average time per call: {avg_time_ms:.4f} ms") + return avg_time_ms + +if __name__ == "__main__": + # Warm up + priority_engine._calculate_urgency(sample_text, 10) + + print("--- Running Benchmark ---") + benchmark() + + # Profile to show where time is spent + print("\n--- Running Profiler ---") + pr = cProfile.Profile() + pr.enable() + for _ in range(5000): + priority_engine._calculate_urgency(sample_text, 10) + pr.disable() + s = io.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.print_stats(15) + print(s.getvalue()) diff --git a/backend/tests/benchmark_urgency_unoptimized.py b/backend/tests/benchmark_urgency_unoptimized.py new file mode 100644 index 00000000..70263603 --- /dev/null +++ b/backend/tests/benchmark_urgency_unoptimized.py @@ -0,0 +1,58 @@ +import time +from backend.priority_engine import priority_engine +import cProfile +import pstats +import io +import re + +# We create a sample text that does not contain any of the urgency keywords +# but is long enough to simulate a real-world scenario. +sample_text = ( + "There is a small pothole on the corner of 5th and Main. " + "It has been there for a few days and is causing some inconvenience to the drivers. " + "Please send someone to look at it when possible. " + "The road condition is generally poor in this area and needs attention. " + "We have noticed an increase in traffic recently, which might be contributing to the wear and tear. " + "No one has been injured, but we would like to avoid any accidents." +) * 10 # Make it reasonably long + +def benchmark(iterations=10000): + start_time = time.perf_counter() + for _ in range(iterations): + priority_engine._calculate_urgency(sample_text, 10) + end_time = time.perf_counter() + + total_time = end_time - start_time + avg_time_ms = (total_time / iterations) * 1000 + + print(f"Benchmark: _calculate_urgency") + print(f"Iterations: {iterations}") + print(f"Total time: {total_time:.4f} seconds") + print(f"Average time per call: {avg_time_ms:.4f} ms") + return avg_time_ms + +if __name__ == "__main__": + # Force the engine to clear its cache and simulate the old unoptimized behavior + # where the keywords list is empty and regex.search is always called. + from backend.adaptive_weights import adaptive_weights + priority_engine._regex_cache = [] + for pattern, weight in adaptive_weights.get_urgency_patterns(): + priority_engine._regex_cache.append((re.compile(pattern), weight, pattern, [])) + + # Warm up + priority_engine._calculate_urgency(sample_text, 10) + + print("--- Running Unoptimized Benchmark ---") + benchmark() + + # Profile to show where time is spent + print("\n--- Running Profiler ---") + pr = cProfile.Profile() + pr.enable() + for _ in range(5000): + priority_engine._calculate_urgency(sample_text, 10) + pr.disable() + s = io.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.print_stats(15) + print(s.getvalue()) diff --git a/test_grievances_opt.py b/test_grievances_opt.py new file mode 100644 index 00000000..adf52e18 --- /dev/null +++ b/test_grievances_opt.py @@ -0,0 +1,26 @@ +import time +from backend.database import SessionLocal +from backend.models import Grievance, GrievanceFollower, ClosureConfirmation +from backend.routers.grievances import get_closure_status +from sqlalchemy import func + +def bench(): + db = SessionLocal() + start = time.perf_counter() + for _ in range(100): + total_followers = db.query(func.count(GrievanceFollower.id)).filter( + GrievanceFollower.grievance_id == 1 + ).scalar() + + counts = db.query( + ClosureConfirmation.confirmation_type, + func.count(ClosureConfirmation.id) + ).filter(ClosureConfirmation.grievance_id == 1).group_by(ClosureConfirmation.confirmation_type).all() + print(f"Old approach: {time.perf_counter() - start}") + + start = time.perf_counter() + for _ in range(100): + # Instead of two queries, we could potentially do this in one, or just measure DB hits + pass + +bench()