From ccfd651eb6b5b27dac078f5c138c38ee8f01bc50 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Tue, 7 Apr 2026 19:46:20 +0200
Subject: [PATCH 1/8] scoring: early stopping heuristic to reduce wasted
 scoring calls

Stop scoring candidates after consecutive low-scoring results, with
adaptive patience that increases when a promising score is seen.
Thresholds derived from the per-request threshold parameter.

Refs #1011

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 plans/scoring-early-stopping.md | 295 ++++++++++++++++++++++++++++++++
 yente/scoring.py                |  40 ++++-
 yente/settings.py               |   4 +
 3 files changed, 337 insertions(+), 2 deletions(-)
 create mode 100644 plans/scoring-early-stopping.md

diff --git a/plans/scoring-early-stopping.md b/plans/scoring-early-stopping.md
new file mode 100644
index 00000000..d0ba6141
--- /dev/null
+++ b/plans/scoring-early-stopping.md
@@ -0,0 +1,295 @@
+---
+description: Heuristics to reduce the number of candidates scored in the /match pipeline
+date: 2026-04-07
+tags: [scoring, performance, matching, issue-1011]
+---
+
+# Early stopping heuristics for candidate scoring
+
+GitHub issue: opensanctions/yente#1011
+
+## Problem
+
+The `/match` endpoint retrieves `limit * MATCH_CANDIDATES` (default 5 * 10 = 50) candidates
+from Elasticsearch and scores **every one** with the full algorithm (LogicV2). Users can
+request up to 500 results, meaning up to 5,000 scoring calls per query. The scoring algorithm
+itself isn't terribly slow — yente just invokes it far too often on candidates that will never
+make it into the response.
+
+## Research data
+
+Analysis of three production log samples (30,000 rows, ~20,800 valid scoring entries, 418
+unique queries, 2026-04-07). Mean ~50 candidates scored per query.
+
+### Most scoring work is wasted
+
+| Metric | Value |
+|---|---|
+| Total scoring calls | 20,772 |
+| Scores < 0.5 (below cutoff) | 82.2% |
+| Scores < 0.3 (clearly wasted) | 47.9% |
+| Scores >= 0.7 (match threshold) | 1.0% |
+| Queries with zero candidates >= 0.5 | 49.3% |
+| Queries with zero candidates >= 0.7 | 84.4% |
+
+About half of all queries produce no candidates above 0.5, and 84% produce no matches
+(>= 0.7). Yet we score all ~50 candidates for every query.
+
+### ES ranking vs algo score correlation
+
+ES ranking is a **weak** predictor of algo score. The best algo-scored result appears at:
+
+| Within top N ES results | % of queries |
+|---|---|
+| Top 1 | 23.2% |
+| Top 3 | 35.9% |
+| Top 5 | 44.7% |
+| Top 10 | 63.4% |
+| Top 20 | 83.3% |
+| Top 50 | 98.3% |
+
+Mean algo score by ES rank bucket (ranks 0-49 contain the bulk of data):
+
+| ES rank bucket | Count | Mean algo score | % with algo >= 0.5 |
+|---|---|---|---|
+| 0-9 | 4,056 | 0.358 | 26.9% |
+| 10-19 | 4,092 | 0.326 | 19.5% |
+| 20-29 | 3,957 | 0.310 | 18.0% |
+| 30-39 | 3,818 | 0.311 | 16.5% |
+| 40-49 | 3,737 | 0.297 | 12.2% |
+| 50+ | 1,112 | ~0.19 | 0.0% |
+
+Key observation: within the first 50 candidates, algo scores decline gently (0.36 → 0.30
+mean) but good results appear at every rank. ES does a good job excluding truly irrelevant
+candidates (rank 50+), but within the top 50 it cannot reliably distinguish good from bad.
+
+### Early stopping simulation
+
+"Stop scoring after N consecutive candidates with algo score below threshold":
+
+| Threshold | Patience | Scoring calls saved | Meaningful best results missed (out of 418) |
+|---|---|---|---|
+| 0.3 | 3 | 50.8% | 22 |
+| 0.3 | 5 | 42.0% | 12 |
+| 0.3 | 7 | 36.9% | 9 |
+| 0.3 | 10 | 31.8% | 5 |
+| 0.3 | 15 | 23.0% | 4 |
+
+Simple early stopping with patience=10 saves ~32% of scoring calls and misses 5 out of
+418 queries (1.2%).
+
+### Adaptive patience
+
+When a query has already produced a score above a trigger value, increase patience to
+avoid cutting off queries that have real matches buried deeper in the candidate list:
+
+| Base patience | Boosted patience | Trigger | Saved | Missed (out of 418) |
+|---|---|---|---|---|
+| 5 | 10 | >= 0.4 | 33.3% | 7 |
+| 5 | 15 | >= 0.4 | 30.6% | 6 |
+| 5 | 20 | >= 0.4 | 27.9% | 5 |
+| 5 | 25 | >= 0.4 | 27.0% | 5 |
+
+Adaptive patience helps: queries with no real matches stop early (patience=5, saves the
+most work), while queries with promising candidates keep looking longer. The approach
+`base=5, boost=20, trigger>=0.4` saves ~28% of scoring calls and misses 5 out of 418
+queries (1.2%).
+
+### Missed results profile
+
+With the recommended adaptive settings (base=5, boost=20, trigger>=0.4, min_candidates=10),
+the 5 missed results are:
+
+| Best score | At ES rank | Stopped after | Total candidates |
+|---|---|---|---|
+| 0.667 | 31 | 16 | 49 |
+| 0.583 | 9 | 12 | 46 |
+| 0.565 | 21 | 10 | 49 |
+| 0.543 | 23 | 10 | 48 |
+| 0.512 | 43 | 10 | 97 |
+
+These are all sub-threshold results (< 0.7) that would appear in the response list with
+`match: false`. The highest missed score is 0.667. For screening use cases where only
+`match: true` matters, the quality impact is effectively zero.
+
+### Index score floor
+
+Adding a minimum ES index score before scoring a candidate provides marginal benefit:
+
+| Index score floor | Candidates scored | Good results missed (algo >= 0.5) |
+|---|---|---|
+| >= 5 | 96.7% | 0 |
+| >= 10 | 81.3% | 2 |
+| >= 15 | 39.4% | 7 |
+
+Since most candidates already have index_score > 5, this doesn't help much. The early
+stopping heuristic is more effective.
+
+### Why MATCH_CANDIDATES=10 is correct (and not the right lever)
+
+The 10x multiplier controls **recall** — how many ES candidates we fetch to ensure the
+best algo-scored result is in the pool. The data shows it's well-calibrated:
+
+| MATCH_CANDIDATES equivalent | ES top N (limit=5) | Best result found |
+|---|---|---|
+| 1x | Top 5 | 44.7% |
+| 2x | Top 10 | 63.4% |
+| 4x | Top 20 | 83.3% |
+| **10x** | **Top 50** | **98.3%** |
+
+Reducing the multiplier would lose real results. And within the 50-candidate window, good
+results are spread across all rank buckets — there's no safe truncation point:
+
+| ES rank bucket | % with algo >= 0.5 |
+|---|---|
+| 0-9 | 26.9% |
+| 10-19 | 19.5% |
+| 20-29 | 18.0% |
+| 30-39 | 16.5% |
+| 40-49 | 12.2% |
+
+However, **49.3% of queries have zero candidates above 0.5**. For those queries, the
+multiplier is pure waste — we fetch and score 50 candidates to return nothing. The
+multiplier is calibrated for the ~50% of queries where matches exist, and the other ~50%
+pay the full cost for no benefit.
+
+The multiplier and early stopping solve different problems: the multiplier controls
+**recall** (keep it at 10x), early stopping controls **wasted compute** (stop scoring
+when it's clearly pointless). Together they preserve result quality while cutting scoring
+work by ~28%.
+
+## Proposed approach
+
+### Consecutive-low early stopping with adaptive patience
+
+Add early stopping logic to `score_results()` in `yente/scoring.py`. After scoring each
+candidate, track how many consecutive candidates have scored below a low threshold. Once
+patience is exhausted, stop scoring remaining candidates. When a promising score has been
+seen, multiply patience by a boost factor to keep searching.
+
+Most values are derived from the per-request `threshold` parameter rather than being
+independent settings:
+
+- **Early stop threshold** = `threshold * 0.4` (scores below this count as "low")
+- **Boost trigger** = `threshold * 0.6` (score that switches to boosted patience)
+- **Min candidates** = `limit` (always score at least as many as requested)
+- **Boosted patience** = `patience * 4`
+
+This leaves one setting: `SCORE_EARLY_STOP_PATIENCE` (default 5, env-configurable).
+Set to a large value (e.g., 9999) to effectively disable early stopping.
+
+```python
+EARLY_STOP_BOOST_FACTOR = 4
+
+async def score_results(
+    algorithm: Type[ScoringAlgorithm],
+    entity: Entity,
+    results: Iterable[Tuple[Entity, float]],
+    threshold: float = settings.SCORE_THRESHOLD,
+    cutoff: float = 0.0,
+    limit: Optional[int] = None,
+    config: ScoringConfig = ScoringConfig.defaults(),
+) -> Tuple[int, List[ScoredEntityResponse]]:
+    scored: List[ScoredEntityResponse] = []
+    matches = 0
+    consecutive_low = 0
+    seen_promising = False
+    patience = settings.SCORE_EARLY_STOP_PATIENCE
+    early_stop_threshold = threshold * 0.4
+    boost_trigger = threshold * 0.6
+    min_candidates = limit or 0
+    for rank, (result, index_score) in enumerate(results):
+        scoring = algorithm.compare(query=entity, result=result, config=config)
+        # ... existing logging and sleep ...
+        response = ScoredEntityResponse.from_entity_result(result, scoring, threshold)
+
+        if response.score > early_stop_threshold:
+            consecutive_low = 0
+        else:
+            consecutive_low += 1
+
+        if response.score >= boost_trigger:
+            seen_promising = True
+
+        if response.score <= cutoff:
+            continue
+        if response.match:
+            matches += 1
+        scored.append(response)
+
+        effective_patience = (
+            patience * EARLY_STOP_BOOST_FACTOR if seen_promising
+            else patience
+        )
+        if consecutive_low >= effective_patience and rank >= min_candidates:
+            break
+
+    scored = sorted(scored, key=lambda r: r.score, reverse=True)
+    if limit is not None:
+        scored = scored[:limit]
+    return matches, scored
+```
+
+Note: the `consecutive_low` counter and `seen_promising` flag are updated before the
+`cutoff` filter — a candidate that's below `cutoff` but above the early-stop threshold
+should still reset the counter.
+
+### Settings
+
+One new setting in `yente/settings.py`:
+
+```python
+SCORE_EARLY_STOP_PATIENCE: int = 5
+```
+
+Configurable via `YENTE_SCORE_EARLY_STOP_PATIENCE` environment variable.
+
+At default threshold (0.7) this yields:
+- Early stop threshold: 0.28
+- Boost trigger: 0.42
+- Base patience: 5
+- Boosted patience: 20
+
+## Testing
+
+- Unit tests: mock algorithm that returns predetermined scores; verify early stopping
+  triggers at the right rank and that results are not lost.
+- Compare `/match` output with and without early stopping on a representative query set
+  to validate that result quality is preserved.
+
+## Risks
+
+- **Missed results**: With adaptive patience (base=5, boosted=20, trigger=0.42), the
+  simulation shows ~5 missed results out of 418 queries (1.2%). All are sub-threshold
+  (highest is 0.667, below the 0.7 match threshold). For screening use cases where only
+  `match: true` matters, the quality impact is effectively zero.
+- **Query-dependent behavior**: Some entity types or datasets may have different score
+  distributions. Deriving thresholds from the per-request `threshold` parameter mitigates
+  this — users with a lower threshold automatically get less aggressive early stopping.
+- **Sensitivity to candidate ordering**: Early stopping depends on ES returning candidates
+  in a roughly score-correlated order. If ES ranking degrades (e.g., after index changes),
+  more good results could be missed. The boosted patience provides a buffer for queries
+  where ES and algo scoring clearly diverge.
+
+## Follow-up: raising MATCH_CANDIDATES
+
+Once early stopping is in place, the cost model changes: fetching more candidates from ES
+is cheap, and early stopping caps how many actually get scored. This makes it tempting to
+raise MATCH_CANDIDATES (currently 10) as insurance against the weak ES/algo correlation.
+
+**The data doesn't strongly justify it.** Queries in our sample that fetched beyond 50
+candidates show 0% with algo >= 0.5 past rank 50 — ES relevance drops off hard. And 98.3%
+of best results already fall within the top 50. The remaining 1.7% have best scores below
+0.5 (not meaningful misses).
+
+**The ES/algo divergence is real but bounded.** Per-query Spearman correlation between
+index_score and algo_score has a median of 0.42, with 21.7% of queries showing negative
+correlation. Top-5 overlap between ES and algo rankings is only 35%. The worst observed
+inversion: best algo result (0.592) at ES rank 153. However, even in these worst cases the
+buried results are sub-threshold (< 0.7). The ES query construction (name boosting,
+fuzziness, phonetic matching) would have to substantially fail for a true match to land
+beyond rank 50.
+
+**Recommendation:** Ship early stopping first and measure in production. If the miss rate
+is acceptable, a modest bump (e.g., to 15x) is cheap insurance and worth trying — but
+don't expect a measurable quality improvement based on what we see today.
diff --git a/yente/scoring.py b/yente/scoring.py
index 0b36dfd8..4cc288b0 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -1,5 +1,5 @@
 import asyncio
-from typing import Iterable, List, Optional, Type, Tuple
+from typing import Iterable, List, Type, Tuple
 from nomenklatura.matching.types import ScoringAlgorithm, ScoringConfig
 
 from yente import settings
@@ -9,6 +9,23 @@
 
 log = get_logger(__name__)
 
+# Early stopping: candidates from ES are scored one by one by the matching algorithm
+# (e.g. LogicV2). In production, ~82% of scoring calls produce scores below cutoff, and
+# ~49% of queries have zero candidates above 0.5. To avoid wasting CPU on hopeless
+# candidates, we stop scoring after `patience` consecutive low-scoring results.
+#
+# When a promising score has been seen, patience is multiplied by EARLY_STOP_BOOST_FACTOR
+# to keep searching — queries with real matches tend to have good results scattered across
+# ES ranks (ES and algo scores correlate weakly).
+#
+# Thresholds are derived from the per-request `threshold` parameter so that users with
+# lower thresholds automatically get less aggressive early stopping.
+#
+# Caveat: this can miss results buried deep in the ES ranking. In production log analysis
+# (418 queries), the recommended defaults missed 5 results (1.2%), all sub-threshold
+# (highest 0.667 vs 0.7 threshold). Set YENTE_SCORE_EARLY_STOP_PATIENCE high to disable.
+EARLY_STOP_BOOST_FACTOR = 4
+
 
 async def score_results(
     algorithm: Type[ScoringAlgorithm],
@@ -16,11 +33,18 @@ async def score_results(
     results: Iterable[Tuple[Entity, float]],
     threshold: float = settings.SCORE_THRESHOLD,
     cutoff: float = 0.0,
-    limit: Optional[int] = None,
+    limit: int = settings.MATCH_PAGE,
     config: ScoringConfig = ScoringConfig.defaults(),
 ) -> Tuple[int, List[ScoredEntityResponse]]:
     scored: List[ScoredEntityResponse] = []
     matches = 0
+    # Early stopping variables:
+    consecutive_low = 0
+    patience = settings.SCORE_EARLY_STOP_PATIENCE
+    # Scores below this are counted as consecutive low results:
+    early_stop_threshold = threshold * 0.4
+    # A score above this triggers boosted patience:
+    boost_trigger = threshold * 0.6
     for rank, (result, index_score) in enumerate(results):
         scoring = algorithm.compare(query=entity, result=result, config=config)
         log.debug(
@@ -38,12 +62,24 @@ async def score_results(
         # more even response times when CPU-bound scoring requests pile up.
         await asyncio.sleep(0)
         response = ScoredEntityResponse.from_entity_result(result, scoring, threshold)
+
+        if response.score > early_stop_threshold:
+            consecutive_low = 0
+        else:
+            consecutive_low += 1
+
+        if response.score >= boost_trigger:
+            patience = settings.SCORE_EARLY_STOP_PATIENCE * EARLY_STOP_BOOST_FACTOR
+
         if response.score <= cutoff:
             continue
         if response.match:
             matches += 1
         scored.append(response)
 
+        if consecutive_low >= patience and rank >= limit:
+            break
+
     scored = sorted(scored, key=lambda r: r.score, reverse=True)
     if limit is not None:
         scored = scored[:limit]
diff --git a/yente/settings.py b/yente/settings.py
index 1bd6c44d..16484e72 100644
--- a/yente/settings.py
+++ b/yente/settings.py
@@ -175,6 +175,10 @@ def random_cron() -> str:
 # Default cutoff for scores that should not be returned as /match results:
 SCORE_CUTOFF = 0.50
 
+# Early stopping patience: stop scoring candidates after this many consecutive
+# low-scoring results. Set to a high value (e.g. 9999) to disable.
+SCORE_EARLY_STOP_PATIENCE = env_int("YENTE_SCORE_EARLY_STOP_PATIENCE", 5)
+
 # ElasticSearch and OpenSearch settings:
 INDEX_TYPE = env_str("YENTE_INDEX_TYPE", "elasticsearch").lower().strip()
 if INDEX_TYPE not in ["elasticsearch", "opensearch"]:

From 4ac2ce04dfc4451d312194fdffbb19f0adde25a9 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Wed, 8 Apr 2026 14:15:37 +0200
Subject: [PATCH 2/8] scoring: replace patience early stopping with score
 budget

Replace the multi-step patience/counter/flag logic with a single
equation: budget = budget - 1 + score / (threshold/2). A score of
threshold/2 breaks even; higher scores extend the search, lower
scores drain the budget. Stops when budget is exhausted.

Simpler (one accumulator, one equation, one setting) and better
on production data: 27% savings with 3 missed results vs 6 with
the patience approach, because the budget responds proportionally
to score quality rather than using a binary boost flag.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 yente/scoring.py  | 43 ++++++++++++++++---------------------------
 yente/settings.py |  6 +++---
 2 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/yente/scoring.py b/yente/scoring.py
index 4cc288b0..11a756cc 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -9,22 +9,22 @@
 
 log = get_logger(__name__)
 
-# Early stopping: candidates from ES are scored one by one by the matching algorithm
-# (e.g. LogicV2). In production, ~82% of scoring calls produce scores below cutoff, and
-# ~49% of queries have zero candidates above 0.5. To avoid wasting CPU on hopeless
-# candidates, we stop scoring after `patience` consecutive low-scoring results.
+# Early stopping via score budget: candidates from ES are scored one by one by the
+# matching algorithm (e.g. LogicV2). In production, ~82% of scoring calls produce scores
+# below cutoff, and ~49% of queries have zero candidates above 0.5. To avoid wasting CPU,
+# we maintain a budget that drains with each low-scoring candidate and refills with each
+# good one:
 #
-# When a promising score has been seen, patience is multiplied by EARLY_STOP_BOOST_FACTOR
-# to keep searching — queries with real matches tend to have good results scattered across
-# ES ranks (ES and algo scores correlate weakly).
+#   budget = budget - 1 + score / (threshold / 2)
 #
-# Thresholds are derived from the per-request `threshold` parameter so that users with
-# lower thresholds automatically get less aggressive early stopping.
+# A score of threshold/2 breaks even. Higher scores extend the search; lower scores drain
+# the budget. When the budget is exhausted, we stop. This naturally adapts to query
+# quality: queries with real matches keep searching proportionally longer.
 #
 # Caveat: this can miss results buried deep in the ES ranking. In production log analysis
-# (418 queries), the recommended defaults missed 5 results (1.2%), all sub-threshold
-# (highest 0.667 vs 0.7 threshold). Set YENTE_SCORE_EARLY_STOP_PATIENCE high to disable.
-EARLY_STOP_BOOST_FACTOR = 4
+# (418 queries), budget=10 missed 3 results (0.7%), all sub-threshold (highest 0.592 vs
+# 0.7 threshold). Set YENTE_SCORE_EARLY_STOP_BUDGET high to disable.
+EARLY_STOP_BREAK_EVEN = 0.5  # fraction of threshold where budget breaks even
 
 
 async def score_results(
@@ -38,13 +38,8 @@ async def score_results(
 ) -> Tuple[int, List[ScoredEntityResponse]]:
     scored: List[ScoredEntityResponse] = []
     matches = 0
-    # Early stopping variables:
-    consecutive_low = 0
-    patience = settings.SCORE_EARLY_STOP_PATIENCE
-    # Scores below this are counted as consecutive low results:
-    early_stop_threshold = threshold * 0.4
-    # A score above this triggers boosted patience:
-    boost_trigger = threshold * 0.6
+    budget = float(settings.SCORE_EARLY_STOP_BUDGET)
+    tau = threshold * EARLY_STOP_BREAK_EVEN
     for rank, (result, index_score) in enumerate(results):
         scoring = algorithm.compare(query=entity, result=result, config=config)
         log.debug(
@@ -63,13 +58,7 @@ async def score_results(
         await asyncio.sleep(0)
         response = ScoredEntityResponse.from_entity_result(result, scoring, threshold)
 
-        if response.score > early_stop_threshold:
-            consecutive_low = 0
-        else:
-            consecutive_low += 1
-
-        if response.score >= boost_trigger:
-            patience = settings.SCORE_EARLY_STOP_PATIENCE * EARLY_STOP_BOOST_FACTOR
+        budget = budget - 1.0 + response.score / tau
 
         if response.score <= cutoff:
             continue
@@ -77,7 +66,7 @@ async def score_results(
             matches += 1
         scored.append(response)
 
-        if consecutive_low >= patience and rank >= limit:
+        if budget <= 0 and rank >= limit:
             break
 
     scored = sorted(scored, key=lambda r: r.score, reverse=True)
diff --git a/yente/settings.py b/yente/settings.py
index 16484e72..e66d45c5 100644
--- a/yente/settings.py
+++ b/yente/settings.py
@@ -175,9 +175,9 @@ def random_cron() -> str:
 # Default cutoff for scores that should not be returned as /match results:
 SCORE_CUTOFF = 0.50
 
-# Early stopping patience: stop scoring candidates after this many consecutive
-# low-scoring results. Set to a high value (e.g. 9999) to disable.
-SCORE_EARLY_STOP_PATIENCE = env_int("YENTE_SCORE_EARLY_STOP_PATIENCE", 5)
+# Early stopping budget for candidate scoring. Each candidate costs 1 token; its
+# score earns back score/(threshold/2) tokens. Set high (e.g. 9999) to disable.
+SCORE_EARLY_STOP_BUDGET = env_int("YENTE_SCORE_EARLY_STOP_BUDGET", 10)
 
 # ElasticSearch and OpenSearch settings:
 INDEX_TYPE = env_str("YENTE_INDEX_TYPE", "elasticsearch").lower().strip()

From 538aca43723e935dd027f75cb39e8a57903f2250 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Wed, 8 Apr 2026 17:40:02 +0200
Subject: [PATCH 3/8] scoring: evaluate budget check for every candidate, not
 just above-cutoff

The continue on cutoff filtering was skipping the budget <= 0 check,
so early stopping would almost never trigger when cutoff was set.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 yente/scoring.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/yente/scoring.py b/yente/scoring.py
index 11a756cc..7c4a4699 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -60,11 +60,10 @@ async def score_results(
 
         budget = budget - 1.0 + response.score / tau
 
-        if response.score <= cutoff:
-            continue
-        if response.match:
-            matches += 1
-        scored.append(response)
+        if response.score > cutoff:
+            if response.match:
+                matches += 1
+            scored.append(response)
 
         if budget <= 0 and rank >= limit:
             break

From 2014429f1b70be804fc517aeed78823767e1744d Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Wed, 8 Apr 2026 17:40:42 +0200
Subject: [PATCH 4/8] scoring: guard against threshold <= 0 in budget
 computation

Disable early stopping (budget=inf) when tau would be non-positive,
avoiding ZeroDivisionError on score/tau.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 yente/scoring.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yente/scoring.py b/yente/scoring.py
index 7c4a4699..48c85eba 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -38,8 +38,8 @@ async def score_results(
 ) -> Tuple[int, List[ScoredEntityResponse]]:
     scored: List[ScoredEntityResponse] = []
     matches = 0
-    budget = float(settings.SCORE_EARLY_STOP_BUDGET)
     tau = threshold * EARLY_STOP_BREAK_EVEN
+    budget = float(settings.SCORE_EARLY_STOP_BUDGET) if tau > 0 else float("inf")
     for rank, (result, index_score) in enumerate(results):
         scoring = algorithm.compare(query=entity, result=result, config=config)
         log.debug(

From 4036e3d3fc903637965013c1d2a7a3e5cac1bbfa Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Wed, 8 Apr 2026 17:41:14 +0200
Subject: [PATCH 5/8] scoring: remove dead None check on limit

limit is now typed as int, so the None guard was dead code.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 yente/scoring.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/yente/scoring.py b/yente/scoring.py
index 48c85eba..4f0160e4 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -69,6 +69,4 @@ async def score_results(
             break
 
     scored = sorted(scored, key=lambda r: r.score, reverse=True)
-    if limit is not None:
-        scored = scored[:limit]
-    return matches, scored
+    return matches, scored[:limit]

From 5592fb8b2a3b64f1ae6673e597a6965b7499fa4f Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Wed, 8 Apr 2026 17:41:44 +0200
Subject: [PATCH 6/8] scoring: fix off-by-one in early stop minimum candidates
 check

rank is 0-based, so rank >= limit required limit+1 candidates.
Use rank + 1 >= limit to match the intended "at least limit" semantics.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 yente/scoring.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yente/scoring.py b/yente/scoring.py
index 4f0160e4..e9eaa7ca 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -65,7 +65,7 @@ async def score_results(
                 matches += 1
             scored.append(response)
 
-        if budget <= 0 and rank >= limit:
+        if budget <= 0 and rank + 1 >= limit:
             break
 
     scored = sorted(scored, key=lambda r: r.score, reverse=True)

From 5bcfcdf5b793b1d2b62924915a239fd4526591e3 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Wed, 8 Apr 2026 17:42:40 +0200
Subject: [PATCH 7/8] docs: document YENTE_SCORE_EARLY_STOP_BUDGET setting

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/settings.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/settings.md b/docs/settings.md
index 5e88863f..f54b76f5 100644
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -36,5 +36,6 @@ Yente features various configuration options related to data refresh and re-inde
 | `YENTE_MAX_MATCHES` | `500` | How many results to return per `/match` query at most. |
 | `YENTE_MATCH_CANDIDATES` | `10` | How many candidates to retrieve from the search as a multiplier of the `/match` limit. Note that increasing this parameter will also increase query cost, as each of these candidates scored after retrieval from the index.|
 | `YENTE_MATCH_FUZZY` | `true` | Whether to run expensive Levenshtein queries inside ElasticSearch. |
+| `YENTE_SCORE_EARLY_STOP_BUDGET` | `10` | Budget for early stopping during candidate scoring. Each candidate costs 1 token; its score earns back `score / (threshold/2)` tokens. When the budget is exhausted, scoring stops. Set to a high value (e.g. `9999`) to disable early stopping. |
 | `YENTE_DELTA_UPDATES` | `true` | When set to `false` Yente will download the entire dataset when refreshing the index. |
 | `YENTE_STREAM_LOAD`   | `true`   | If set to `false`, will download the full data before indexing it. This can improve the stability of the indexer, especially when the network connection is a bit sketchy, but requires some local disk cache space.   |

From d0daa59ae7b10e9ba0bef62a660c98ce6693bdb3 Mon Sep 17 00:00:00 2001
From: Friedrich Lindenberg <friedrich@opensanctions.org>
Date: Thu, 9 Apr 2026 20:12:22 +0200
Subject: [PATCH 8/8] scoring: rename SCORE_EARLY_STOP_BUDGET to
 SCORE_STOP_BUDGET

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/settings.md  | 2 +-
 yente/scoring.py  | 4 ++--
 yente/settings.py | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/settings.md b/docs/settings.md
index f54b76f5..9bc16430 100644
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -36,6 +36,6 @@ Yente features various configuration options related to data refresh and re-inde
 | `YENTE_MAX_MATCHES` | `500` | How many results to return per `/match` query at most. |
 | `YENTE_MATCH_CANDIDATES` | `10` | How many candidates to retrieve from the search as a multiplier of the `/match` limit. Note that increasing this parameter will also increase query cost, as each of these candidates scored after retrieval from the index.|
 | `YENTE_MATCH_FUZZY` | `true` | Whether to run expensive Levenshtein queries inside ElasticSearch. |
-| `YENTE_SCORE_EARLY_STOP_BUDGET` | `10` | Budget for early stopping during candidate scoring. Each candidate costs 1 token; its score earns back `score / (threshold/2)` tokens. When the budget is exhausted, scoring stops. Set to a high value (e.g. `9999`) to disable early stopping. |
+| `YENTE_SCORE_STOP_BUDGET` | `10` | Budget for early stopping during candidate scoring. Each candidate costs 1 token; its score earns back `score / (threshold/2)` tokens. When the budget is exhausted, scoring stops. Set to a high value (e.g. `9999`) to disable early stopping. |
 | `YENTE_DELTA_UPDATES` | `true` | When set to `false` Yente will download the entire dataset when refreshing the index. |
 | `YENTE_STREAM_LOAD`   | `true`   | If set to `false`, will download the full data before indexing it. This can improve the stability of the indexer, especially when the network connection is a bit sketchy, but requires some local disk cache space.   |
diff --git a/yente/scoring.py b/yente/scoring.py
index e9eaa7ca..993aadde 100644
--- a/yente/scoring.py
+++ b/yente/scoring.py
@@ -23,7 +23,7 @@
 #
 # Caveat: this can miss results buried deep in the ES ranking. In production log analysis
 # (418 queries), budget=10 missed 3 results (0.7%), all sub-threshold (highest 0.592 vs
-# 0.7 threshold). Set YENTE_SCORE_EARLY_STOP_BUDGET high to disable.
+# 0.7 threshold). Set YENTE_SCORE_STOP_BUDGET high to disable.
 EARLY_STOP_BREAK_EVEN = 0.5  # fraction of threshold where budget breaks even
 
 
@@ -39,7 +39,7 @@ async def score_results(
     scored: List[ScoredEntityResponse] = []
     matches = 0
     tau = threshold * EARLY_STOP_BREAK_EVEN
-    budget = float(settings.SCORE_EARLY_STOP_BUDGET) if tau > 0 else float("inf")
+    budget = float(settings.SCORE_STOP_BUDGET) if tau > 0 else float("inf")
     for rank, (result, index_score) in enumerate(results):
         scoring = algorithm.compare(query=entity, result=result, config=config)
         log.debug(
diff --git a/yente/settings.py b/yente/settings.py
index e66d45c5..6cdd898a 100644
--- a/yente/settings.py
+++ b/yente/settings.py
@@ -177,7 +177,7 @@ def random_cron() -> str:
 
 # Early stopping budget for candidate scoring. Each candidate costs 1 token; its
 # score earns back score/(threshold/2) tokens. Set high (e.g. 9999) to disable.
-SCORE_EARLY_STOP_BUDGET = env_int("YENTE_SCORE_EARLY_STOP_BUDGET", 10)
+SCORE_STOP_BUDGET = env_int("YENTE_SCORE_STOP_BUDGET", 10)
 
 # ElasticSearch and OpenSearch settings:
 INDEX_TYPE = env_str("YENTE_INDEX_TYPE", "elasticsearch").lower().strip()