From 644e45f9f31fd8ad518cce0eeeabba981dd999ba Mon Sep 17 00:00:00 2001 From: ankushchk Date: Sun, 1 Mar 2026 20:14:54 +0530 Subject: [PATCH 1/3] perf: optimization for homepage referral activity tracking --- web/views.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/web/views.py b/web/views.py index b4d485749..095cb1b22 100644 --- a/web/views.py +++ b/web/views.py @@ -286,14 +286,30 @@ def index(request): :10 ] # Get more and then sort by clicks - # Add click counts manually since WebRequest.user is a CharField, not a ForeignKey - for referrer in top_referrers: - # Look for both new format /ref/CODE/ and old format ?ref=CODE - ref_code = referrer.referral_code - clicks = WebRequest.objects.filter( - models.Q(path__contains=f"/ref/{ref_code}/") | models.Q(path__contains=f"?ref={ref_code}") - ).count() - referrer.total_clicks = clicks + # PERF FIX: Batch-fetch all click counts in a single query instead of N separate + # queries (one per referrer). This avoids an N+1 query problem that runs + # unindexed LIKE scans against a potentially huge WebRequest table on every + # homepage load. + if top_referrers: + all_codes = [r.referral_code for r in top_referrers] + # Build a single combined Q expression for all codes at once + combined_q = models.Q() + for code in all_codes: + combined_q |= models.Q(path__contains=f"/ref/{code}/") | models.Q(path__contains=f"?ref={code}") + + # Fetch matching paths in one go + matching_paths = WebRequest.objects.filter(combined_q).values_list("path", flat=True) + + # Map each referral code to its click count in-memory + click_counts = {code: 0 for code in all_codes} + for path in matching_paths: + for code in all_codes: + if f"/ref/{code}/" in path or f"?ref={code}" in path: + click_counts[code] += 1 + break # Each path belongs to one referrer + + for referrer in top_referrers: + referrer.total_clicks = click_counts.get(referrer.referral_code, 0) # Re-sort to include click count in ranking top_referrers = sorted( From 4fddaeb8e33db554eb7f376566f33ab5484536ac Mon Sep 17 00:00:00 2001 From: ankushchk Date: Sun, 1 Mar 2026 20:24:08 +0530 Subject: [PATCH 2/3] fix: exact referral code extraction to prevent misattribution --- web/views.py | 57 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/web/views.py b/web/views.py index 095cb1b22..88f49b05e 100644 --- a/web/views.py +++ b/web/views.py @@ -291,25 +291,46 @@ def index(request): # unindexed LIKE scans against a potentially huge WebRequest table on every # homepage load. if top_referrers: - all_codes = [r.referral_code for r in top_referrers] - # Build a single combined Q expression for all codes at once - combined_q = models.Q() - for code in all_codes: - combined_q |= models.Q(path__contains=f"/ref/{code}/") | models.Q(path__contains=f"?ref={code}") - - # Fetch matching paths in one go - matching_paths = WebRequest.objects.filter(combined_q).values_list("path", flat=True) - - # Map each referral code to its click count in-memory - click_counts = {code: 0 for code in all_codes} - for path in matching_paths: + all_codes = sorted([r.referral_code for r in top_referrers if r.referral_code], key=len, reverse=True) + + if all_codes: + # Build a single combined Q expression for all codes at once + combined_q = models.Q() for code in all_codes: - if f"/ref/{code}/" in path or f"?ref={code}" in path: - click_counts[code] += 1 - break # Each path belongs to one referrer - - for referrer in top_referrers: - referrer.total_clicks = click_counts.get(referrer.referral_code, 0) + combined_q |= models.Q(path__contains=f"/ref/{code}/") | models.Q(path__contains=f"ref={code}") + + # Fetch matching paths in one go + matching_paths = WebRequest.objects.filter(combined_q).values_list("path", flat=True) + + from urllib.parse import parse_qs, urlparse + + # Map each referral code to its click count in-memory + click_counts = {code: 0 for code in all_codes} + for path in matching_paths: + extracted_code = None + + # Try new format /en/ref/CODE/ + if "/ref/" in path: + # Use regex or controlled split for exact segment matching + # Regex: look for /ref/ followed by non-slashes, then a trailing slash + m = re.search(r"/ref/([^/?#]+)/", path) + if m: + extracted_code = m.group(1) + + # Try query param format ?ref=CODE + if not extracted_code and "ref=" in path: + # urlparse requires the path to start with / or be a full URL + # so we just parse the query part if needed + parsed_url = urlparse(path) + params = parse_qs(parsed_url.query) + if "ref" in params: + extracted_code = params["ref"][0] + + if extracted_code and extracted_code in click_counts: + click_counts[extracted_code] += 1 + + for referrer in top_referrers: + referrer.total_clicks = click_counts.get(referrer.referral_code, 0) # Re-sort to include click count in ranking top_referrers = sorted( From a33f917ecaa3d0dbb0345e03ce105b09bfd52ccc Mon Sep 17 00:00:00 2001 From: ankushchk Date: Sun, 1 Mar 2026 20:32:43 +0530 Subject: [PATCH 3/3] perf: improve referral extraction regex and use Sum('count') for robustness --- web/views.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/web/views.py b/web/views.py index 88f49b05e..869d42011 100644 --- a/web/views.py +++ b/web/views.py @@ -297,37 +297,42 @@ def index(request): # Build a single combined Q expression for all codes at once combined_q = models.Q() for code in all_codes: - combined_q |= models.Q(path__contains=f"/ref/{code}/") | models.Q(path__contains=f"ref={code}") - - # Fetch matching paths in one go - matching_paths = WebRequest.objects.filter(combined_q).values_list("path", flat=True) + # Broadened contains check to catch /ref/CODE with or without trailing slash + combined_q |= models.Q(path__contains=f"/ref/{code}") | models.Q(path__contains=f"ref={code}") + + # Fetch unique matching paths and their aggregate counts in one go + # Using Sum("count") to support models where rows represent multiple hits + matching_requests = ( + WebRequest.objects.filter(combined_q) + .values("path") + .annotate(total_hits=models.Sum("count")) + ) from urllib.parse import parse_qs, urlparse # Map each referral code to its click count in-memory click_counts = {code: 0 for code in all_codes} - for path in matching_paths: + for req in matching_requests: + path = req["path"] + total_hits = req["total_hits"] or 0 extracted_code = None - # Try new format /en/ref/CODE/ + # Try new format /en/ref/CODE/ or /en/ref/CODE if "/ref/" in path: - # Use regex or controlled split for exact segment matching - # Regex: look for /ref/ followed by non-slashes, then a trailing slash - m = re.search(r"/ref/([^/?#]+)/", path) + # Improved regex to handle trailing slashes, end of path, query params, or fragments + m = re.search(r"/ref/([^/?#]+)(?:/|$|\?|#)", path) if m: extracted_code = m.group(1) # Try query param format ?ref=CODE if not extracted_code and "ref=" in path: - # urlparse requires the path to start with / or be a full URL - # so we just parse the query part if needed parsed_url = urlparse(path) params = parse_qs(parsed_url.query) if "ref" in params: extracted_code = params["ref"][0] if extracted_code and extracted_code in click_counts: - click_counts[extracted_code] += 1 + click_counts[extracted_code] += total_hits for referrer in top_referrers: referrer.total_clicks = click_counts.get(referrer.referral_code, 0)