-
Notifications
You must be signed in to change notification settings - Fork 36
⚡ Bolt: optimize nearby issues serialization and fix cache stability #573
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| from PIL import Image | ||
| import logging | ||
| import time | ||
| import hashlib | ||
|
|
||
| from backend.utils import process_and_detect, validate_uploaded_file, process_uploaded_image | ||
| from backend.schemas import DetectionResponse, UrgencyAnalysisRequest, UrgencyAnalysisResponse | ||
|
|
@@ -68,35 +69,44 @@ async def _get_cached_result(key: str, func, *args, **kwargs): | |
| return result | ||
|
|
||
| async def _cached_detect_severity(image_bytes: bytes): | ||
| key = f"severity_{hash(image_bytes)}" | ||
| # Stable cache key using MD5 (hash() is unstable across processes) | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"severity_{image_hash}" | ||
| return await _get_cached_result(key, detect_severity_clip, image_bytes) | ||
|
|
||
| async def _cached_detect_smart_scan(image_bytes: bytes): | ||
| key = f"smart_scan_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"smart_scan_{image_hash}" | ||
| return await _get_cached_result(key, detect_smart_scan_clip, image_bytes) | ||
|
Comment on lines
71
to
80
|
||
|
|
||
| async def _cached_generate_caption(image_bytes: bytes): | ||
| key = f"caption_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"caption_{image_hash}" | ||
| return await _get_cached_result(key, generate_image_caption, image_bytes) | ||
|
|
||
| async def _cached_detect_waste(image_bytes: bytes): | ||
| key = f"waste_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"waste_{image_hash}" | ||
| return await _get_cached_result(key, detect_waste_clip, image_bytes) | ||
|
|
||
| async def _cached_detect_civic_eye(image_bytes: bytes): | ||
| key = f"civic_eye_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"civic_eye_{image_hash}" | ||
| return await _get_cached_result(key, detect_civic_eye_clip, image_bytes) | ||
|
|
||
| async def _cached_detect_graffiti(image_bytes: bytes): | ||
| key = f"graffiti_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"graffiti_{image_hash}" | ||
| return await _get_cached_result(key, detect_graffiti_art_clip, image_bytes) | ||
|
|
||
| async def _cached_detect_traffic_sign(image_bytes: bytes): | ||
| key = f"traffic_sign_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"traffic_sign_{image_hash}" | ||
| return await _get_cached_result(key, detect_traffic_sign_clip, image_bytes) | ||
|
|
||
| async def _cached_detect_abandoned_vehicle(image_bytes: bytes): | ||
| key = f"abandoned_vehicle_{hash(image_bytes)}" | ||
| image_hash = hashlib.md5(image_bytes).hexdigest() | ||
| key = f"abandoned_vehicle_{image_hash}" | ||
| return await _get_cached_result(key, detect_abandoned_vehicle_clip, image_bytes) | ||
|
Comment on lines
71
to
110
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Search for hash() used in cache key generation patterns
rg -n --type=py 'cache.*hash\(' -A2 -B2
rg -n --type=py 'key.*=.*hash\(' -A2 -B2Repository: RohanExploit/VishwaGuru Length of output: 897 🏁 Script executed: cat -n backend/ai_service.py | sed -n '180,200p'Repository: RohanExploit/VishwaGuru Length of output: 797 🏁 Script executed: # Search more comprehensively for hash() used in cache contexts
rg -n --type=py 'hash\(' --context=5 | rg -A5 -B5 'cache|key'Repository: RohanExploit/VishwaGuru Length of output: 1976 🏁 Script executed: # Check for any other cache key generation patterns
rg -n --type=py '(cache_key|cache.*key)\s*=' -A1 -B1Repository: RohanExploit/VishwaGuru Length of output: 2928 Inconsistent cache key generation: The line 🧰 Tools🪛 Ruff (0.15.6)[error] 73-73: Probable use of insecure hash functions in (S324) [error] 78-78: Probable use of insecure hash functions in (S324) [error] 83-83: Probable use of insecure hash functions in (S324) [error] 88-88: Probable use of insecure hash functions in (S324) [error] 93-93: Probable use of insecure hash functions in (S324) [error] 98-98: Probable use of insecure hash functions in (S324) [error] 103-103: Probable use of insecure hash functions in (S324) [error] 108-108: Probable use of insecure hash functions in (S324) 🤖 Prompt for AI Agents |
||
|
|
||
| # Endpoints | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -236,8 +236,7 @@ async def create_issue( | |
| # Invalidate cache so new issue appears | ||
| try: | ||
| recent_issues_cache.clear() | ||
| recent_issues_cache.clear() | ||
| user_issues_cache.clear() | ||
| user_issues_cache.clear() | ||
| except Exception as e: | ||
| logger.error(f"Error clearing cache: {e}") | ||
|
|
||
|
|
@@ -347,24 +346,27 @@ def get_nearby_issues( | |
| ) | ||
|
|
||
| # Convert to response format and limit results | ||
| nearby_responses = [ | ||
| NearbyIssueResponse( | ||
| id=issue.id, | ||
| description=issue.description[:100] + "..." if len(issue.description) > 100 else issue.description, | ||
| category=issue.category, | ||
| latitude=issue.latitude, | ||
| longitude=issue.longitude, | ||
| distance_meters=distance, | ||
| upvotes=issue.upvotes or 0, | ||
| created_at=issue.created_at, | ||
| status=issue.status | ||
| ) | ||
| for issue, distance in nearby_issues_with_distance[:limit] | ||
| ] | ||
| # Performance Boost: Map directly to dictionaries to avoid Pydantic overhead | ||
| nearby_data = [] | ||
| for issue, distance in nearby_issues_with_distance[:limit]: | ||
| desc = issue.description or "" | ||
| short_desc = desc[:100] + "..." if len(desc) > 100 else desc | ||
|
|
||
| nearby_data.append({ | ||
| "id": issue.id, | ||
| "description": short_desc, | ||
| "category": issue.category, | ||
| "latitude": issue.latitude, | ||
| "longitude": issue.longitude, | ||
| "distance_meters": distance, | ||
| "upvotes": issue.upvotes or 0, | ||
| "created_at": issue.created_at.isoformat() if issue.created_at else None, | ||
| "status": issue.status | ||
| }) | ||
|
|
||
| # Performance Boost: Cache serialized JSON to bypass redundant Pydantic validation | ||
| # and serialization on cache hits. | ||
| json_data = json.dumps([r.model_dump(mode='json') for r in nearby_responses]) | ||
| json_data = json.dumps(nearby_data) | ||
| nearby_issues_cache.set(json_data, cache_key) | ||
|
|
||
| return Response(content=json_data, media_type="application/json") | ||
|
Comment on lines
+369
to
372
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This note describes MD5 as a “stable cryptographic” hash and claims
hash()causes a 0% hit rate “across workers”. In this codebase the detection cache is an in-memory, per-processThreadSafeCache, so cache hits are not shared across workers regardless of key stability, and MD5 should not be described as cryptographically secure. Please reword this learning/action to focus on determinism/stability (not cryptographic strength) and avoid implying cross-worker cache sharing unless the cache is actually shared (e.g., Redis).