From 36fbd9aa8e55d1d5de8f017bd758e3a731853e3c Mon Sep 17 00:00:00 2001
From: Abdulkadirklc <104304986+Abdulkadirklc@users.noreply.github.com>
Date: Tue, 31 Mar 2026 22:15:49 +0300
Subject: [PATCH 1/5] feat(recall): add proof_count boost to combined scoring

Observations with more supporting evidence now rank slightly higher
in recall results. proof_count is threaded through the retrieval
pipeline and applied as a multiplicative boost in reranking:

- types.py: add proof_count field to RetrievalResult
- retrieval.py: include proof_count in SELECT columns
- reranking.py: add log1p-normalized proof_count boost (alpha=0.1)

The boost uses the same multiplicative pattern as recency and temporal
signals. proof_count=1 is neutral, proof_count=50 gives ~+5% boost.
Non-observation fact types are unaffected (neutral 0.5).
---
 .../hindsight_api/engine/search/reranking.py  | 39 +++++++++++++++----
 .../hindsight_api/engine/search/retrieval.py  |  2 +-
 .../hindsight_api/engine/search/types.py      |  2 +
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/search/reranking.py b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
index 7d0c37cfe..0937e5702 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/reranking.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
@@ -2,6 +2,7 @@
 Cross-encoder neural reranking for search results.
 """
 
+import math
 from datetime import datetime, timezone
 
 from .types import MergedCandidate, ScoredResult
@@ -13,6 +14,7 @@
 # so the max combined boost is (1 + alpha/2)^2 ≈ +21% and min is (1 - alpha/2)^2 ≈ -19%.
 _RECENCY_ALPHA: float = 0.2
 _TEMPORAL_ALPHA: float = 0.2
+_PROOF_COUNT_ALPHA: float = 0.1  # Conservative: max ±5% for evidence strength
 
 
 def apply_combined_scoring(
@@ -20,28 +22,38 @@ def apply_combined_scoring(
     now: datetime,
     recency_alpha: float = _RECENCY_ALPHA,
     temporal_alpha: float = _TEMPORAL_ALPHA,
+    proof_count_alpha: float = _PROOF_COUNT_ALPHA,
 ) -> None:
     """Apply combined scoring to a list of ScoredResults in-place.
 
-    Uses the cross-encoder score as the primary relevance signal, with recency
-    and temporal proximity applied as multiplicative boosts. This ensures the
-    influence of these secondary signals is always proportional to the base
-    relevance score, regardless of the cross-encoder model's score calibration.
+    Uses the cross-encoder score as the primary relevance signal, with recency,
+    temporal proximity, and proof count applied as multiplicative boosts. This
+    ensures the influence of these secondary signals is always proportional to
+    the base relevance score, regardless of the cross-encoder model's score
+    calibration.
 
     Formula::
 
-        recency_boost  = 1 + recency_alpha  * (recency  - 0.5)   # in [1-α/2, 1+α/2]
-        temporal_boost = 1 + temporal_alpha * (temporal - 0.5)   # in [1-α/2, 1+α/2]
-        combined_score = cross_encoder_score_normalized * recency_boost * temporal_boost
+        recency_boost     = 1 + recency_alpha     * (recency     - 0.5)   # in [1-α/2, 1+α/2]
+        temporal_boost    = 1 + temporal_alpha    * (temporal    - 0.5)   # in [1-α/2, 1+α/2]
+        proof_count_boost = 1 + proof_count_alpha * (proof_norm  - 0.5)   # in [1-α/2, 1+α/2]
+        combined_score    = CE_normalized * recency_boost * temporal_boost * proof_count_boost
+
+    proof_norm maps proof_count to [0, 1] using log1p normalization:
+      proof_count=1 → 0.5 (neutral), proof_count=5 → 0.72, proof_count=50 → 0.93
 
     Temporal proximity is treated as neutral (0.5) when not set by temporal retrieval,
     so temporal_boost collapses to 1.0 for non-temporal queries.
 
+    Proof count is treated as neutral (0.5) when not available (non-observation facts),
+    so proof_count_boost collapses to 1.0 for world/experience/opinion facts.
+
     Args:
         scored_results: Results from the cross-encoder reranker. Mutated in place.
         now: Current UTC datetime for recency calculation.
         recency_alpha: Max relative recency adjustment (default 0.2 → ±10%).
         temporal_alpha: Max relative temporal adjustment (default 0.2 → ±10%).
+        proof_count_alpha: Max relative proof count adjustment (default 0.1 → ±5%).
     """
     if now.tzinfo is None:
         now = now.replace(tzinfo=UTC)
@@ -59,13 +71,24 @@ def apply_combined_scoring(
         # Temporal proximity: meaningful only for temporal queries; neutral otherwise.
         sr.temporal = sr.retrieval.temporal_proximity if sr.retrieval.temporal_proximity is not None else 0.5
 
+        # Proof count: log-normalized evidence strength; neutral for non-observations.
+        # log1p(1)/log1p(100) ≈ 0.15 → maps to neutral 0.5 after centering.
+        # log1p(50)/log1p(100) ≈ 0.85 → strong boost.
+        proof_count = sr.retrieval.proof_count
+        if proof_count is not None and proof_count >= 1:
+            proof_norm = math.log1p(proof_count) / math.log1p(100)  # → [0, 1]
+            proof_norm = min(proof_norm, 1.0)
+        else:
+            proof_norm = 0.5  # Neutral
+
         # RRF: kept at 0.0 for trace continuity but excluded from scoring.
         # RRF is batch-relative (min-max normalised) and redundant after reranking.
         sr.rrf_normalized = 0.0
 
         recency_boost = 1.0 + recency_alpha * (sr.recency - 0.5)
         temporal_boost = 1.0 + temporal_alpha * (sr.temporal - 0.5)
-        sr.combined_score = sr.cross_encoder_score_normalized * recency_boost * temporal_boost
+        proof_count_boost = 1.0 + proof_count_alpha * (proof_norm - 0.5)
+        sr.combined_score = sr.cross_encoder_score_normalized * recency_boost * temporal_boost * proof_count_boost
         sr.weight = sr.combined_score
 
 
diff --git a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
index bca52afdf..2e03eb0e6 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
@@ -148,7 +148,7 @@ async def retrieve_semantic_bm25_combined(
 
     cols = (
         "id, text, context, event_date, occurred_start, occurred_end, mentioned_at, "
-        "fact_type, document_id, chunk_id, tags, metadata"
+        "fact_type, document_id, chunk_id, tags, metadata, proof_count"
     )
     table = fq_table("memory_units")
 
diff --git a/hindsight-api-slim/hindsight_api/engine/search/types.py b/hindsight-api-slim/hindsight_api/engine/search/types.py
index 90edcf0ee..6a63f5c5f 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/types.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/types.py
@@ -48,6 +48,7 @@ class RetrievalResult:
     chunk_id: str | None = None
     tags: list[str] | None = None  # Visibility scope tags
     metadata: dict[str, str] | None = None  # User-provided metadata
+    proof_count: int | None = None  # Number of supporting memories (observations only)
 
     # Retrieval-specific scores (only one will be set depending on retrieval method)
     similarity: float | None = None  # Semantic retrieval
@@ -72,6 +73,7 @@ def from_db_row(cls, row: dict[str, Any]) -> "RetrievalResult":
             chunk_id=row.get("chunk_id"),
             tags=row.get("tags"),
             metadata=row.get("metadata"),
+            proof_count=row.get("proof_count"),
             similarity=row.get("similarity"),
             bm25_score=row.get("bm25_score"),
             activation=row.get("activation"),

From 24e7e6a1fb22b013b0850cf78cd200ddf4335f07 Mon Sep 17 00:00:00 2001
From: Abdulkadirklc <104304986+Abdulkadirklc@users.noreply.github.com>
Date: Wed, 1 Apr 2026 12:58:37 +0300
Subject: [PATCH 2/5] fix(retrieval): Apply proof_count boost to graph and
 temporal retrieval, normalize scaling

---
 .../engine/search/graph_retrieval.py          |  4 +-
 .../hindsight_api/engine/search/reranking.py  | 17 ++--
 .../hindsight_api/engine/search/retrieval.py  |  4 +-
 .../tests/test_reranking_proof_count.py       | 94 +++++++++++++++++++
 4 files changed, 108 insertions(+), 11 deletions(-)
 create mode 100644 hindsight-api-slim/tests/test_reranking_proof_count.py

diff --git a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
index 7cdde9290..e5cf27d8a 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
@@ -175,7 +175,7 @@ async def _retrieve_with_conn(
         entry_points = await conn.fetch(
             f"""
             SELECT id, text, context, event_date, occurred_start, occurred_end,
-                   mentioned_at, fact_type, document_id, chunk_id, tags,
+                   mentioned_at, fact_type, document_id, chunk_id, tags, proof_count,
                    1 - (embedding <=> $1::vector) AS similarity
             FROM {fq_table("memory_units")}
             WHERE bank_id = $2
@@ -230,7 +230,7 @@ async def _retrieve_with_conn(
                 neighbors = await conn.fetch(
                     f"""
                     SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end,
-                           mu.mentioned_at, mu.fact_type,
+                           mu.mentioned_at, mu.fact_type, mu.proof_count,
                            mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
                            ml.weight, ml.link_type, ml.from_unit_id
                     FROM {fq_table("memory_links")} ml
diff --git a/hindsight-api-slim/hindsight_api/engine/search/reranking.py b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
index 0937e5702..e684ef8ed 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/reranking.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
@@ -39,8 +39,10 @@ def apply_combined_scoring(
         proof_count_boost = 1 + proof_count_alpha * (proof_norm  - 0.5)   # in [1-α/2, 1+α/2]
         combined_score    = CE_normalized * recency_boost * temporal_boost * proof_count_boost
 
-    proof_norm maps proof_count to [0, 1] using log1p normalization:
-      proof_count=1 → 0.5 (neutral), proof_count=5 → 0.72, proof_count=50 → 0.93
+    proof_norm maps proof_count using natural log1p without an arbitrary cap:
+      proof_count=1 → base log1p(1)=0.693 (difference=0.0 → neutral multiplier)
+      proof_count=10 → log1p(10)=2.39 (gives boost)
+      proof_count=100 → log1p(100)=4.61 (strong boost)
 
     Temporal proximity is treated as neutral (0.5) when not set by temporal retrieval,
     so temporal_boost collapses to 1.0 for non-temporal queries.
@@ -72,14 +74,15 @@ def apply_combined_scoring(
         sr.temporal = sr.retrieval.temporal_proximity if sr.retrieval.temporal_proximity is not None else 0.5
 
         # Proof count: log-normalized evidence strength; neutral for non-observations.
-        # log1p(1)/log1p(100) ≈ 0.15 → maps to neutral 0.5 after centering.
-        # log1p(50)/log1p(100) ≈ 0.85 → strong boost.
         proof_count = sr.retrieval.proof_count
         if proof_count is not None and proof_count >= 1:
-            proof_norm = math.log1p(proof_count) / math.log1p(100)  # → [0, 1]
-            proof_norm = min(proof_norm, 1.0)
+            # We don't cap this at an arbitrary number like 100 anymore.
+            # log1p naturally dampens large numbers:
+            # count=1 -> 0.69, count=50 -> 3.93, count=500 -> 6.21
+            proof_norm = math.log1p(proof_count)
         else:
-            proof_norm = 0.5  # Neutral
+            # Neutral baseline for log1p is log1p(1) = 0.693
+            proof_norm = math.log1p(1)
 
         # RRF: kept at 0.0 for trace continuity but excluded from scoring.
         # RRF is batch-relative (min-max normalised) and redundant after reranking.
diff --git a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
index 2e03eb0e6..0538b58dd 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
@@ -343,7 +343,7 @@ async def retrieve_temporal_combined(
               {groups_clause}
         ),
         sim_ranked AS (
-            SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
+            SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.proof_count, mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
                    1 - (mu.embedding <=> $1::vector) AS similarity,
                    ROW_NUMBER() OVER (PARTITION BY mu.fact_type ORDER BY mu.embedding <=> $1::vector) AS sim_rn
             FROM date_ranked dr
@@ -351,7 +351,7 @@ async def retrieve_temporal_combined(
             WHERE dr.rn <= 50
               AND (1 - (mu.embedding <=> $1::vector)) >= $6
         )
-        SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags, metadata, similarity
+        SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, proof_count, document_id, chunk_id, tags, metadata, similarity
         FROM sim_ranked
         WHERE sim_rn <= 10
         """,
diff --git a/hindsight-api-slim/tests/test_reranking_proof_count.py b/hindsight-api-slim/tests/test_reranking_proof_count.py
new file mode 100644
index 000000000..2f76a29c6
--- /dev/null
+++ b/hindsight-api-slim/tests/test_reranking_proof_count.py
@@ -0,0 +1,94 @@
+"""
+Unit tests for proof_count boost in reranking.
+"""
+
+from datetime import datetime, timezone
+import pytest
+from uuid import uuid4
+
+from hindsight_api.engine.search.types import RetrievalResult, MergedCandidate, ScoredResult
+from hindsight_api.engine.search.reranking import apply_combined_scoring
+
+UTC = timezone.utc
+
+def create_mock_scored_result(proof_count: int | None = None, ce_score: float = 0.8) -> ScoredResult:
+    """Helper to create a minimal ScoredResult suitable for scoring tests."""
+    retrieval = RetrievalResult(
+        id=uuid4(),
+        text="Test mock fact",
+        fact_type="observation" if proof_count is not None else "world",
+        document_id=uuid4(),
+        chunk_id=uuid4(),
+        embedding=[0.1]*384,
+        similarity=0.9,
+        proof_count=proof_count,
+        # Default neutral dates for testing so only proof_count changes score
+        occurred_start=datetime.now(UTC),
+        occurred_end=datetime.now(UTC)
+    )
+    candidate = MergedCandidate(
+        id=retrieval.id,
+        retrieval=retrieval,
+        semantic_rank=1,
+        bm25_rank=1,
+        rrf_score=0.1
+    )
+    return ScoredResult(
+        candidate=candidate,
+        cross_encoder_score=ce_score,
+        cross_encoder_score_normalized=ce_score,
+        weight=ce_score,
+    )
+
+def test_proof_count_neutral_when_none():
+    """Test that when proof_count is None (e.g. non-observation), it gets neutral 0.5 norm."""
+    sr = create_mock_scored_result(proof_count=None, ce_score=0.8)
+    now = datetime.now(UTC)
+    
+    apply_combined_scoring([sr], now, proof_count_alpha=0.1)
+    
+    # Neutral multiplier means score shouldn't be boosted by proof_count
+    # Since recency is neutral (just created) and temporal is neutral, score should remain unchanged
+    assert sr.combined_score == pytest.approx(0.8, rel=1e-3)
+
+def test_proof_count_neutral_at_one():
+    """Test that proof_count=1 gives neutral multiplier."""
+    sr = create_mock_scored_result(proof_count=1, ce_score=0.8)
+    now = datetime.now(UTC)
+    
+    apply_combined_scoring([sr], now, proof_count_alpha=0.1)
+    
+    # proof_count=1 -> log1p(1) = 0.693, base log1p(1) = 0.693 -> difference 0.0 -> multipler 1.0
+    assert sr.combined_score == pytest.approx(0.8, rel=1e-3)
+
+def test_proof_count_increases_with_higher_counts():
+    """Test that higher proof counts yield strictly higher scores."""
+    now = datetime.now(UTC)
+    
+    # Create results with increasing proof counts
+    sr_5 = create_mock_scored_result(proof_count=5, ce_score=0.8)
+    sr_50 = create_mock_scored_result(proof_count=50, ce_score=0.8)
+    sr_100 = create_mock_scored_result(proof_count=100, ce_score=0.8)
+    
+    # Process them
+    apply_combined_scoring([sr_5, sr_50, sr_100], now, proof_count_alpha=0.1)
+    
+    # Assure scores strictly increase
+    assert sr_5.combined_score > 0.8
+    assert sr_50.combined_score > sr_5.combined_score
+    assert sr_100.combined_score > sr_50.combined_score
+
+def test_proof_count_no_hardcoded_cap_at_100():
+    """Test that observations with counts > 100 continue to scale up (no log1p(100) cap)."""
+    now = datetime.now(UTC)
+    
+    # If capped at 100, these would both get identical scores
+    sr_100 = create_mock_scored_result(proof_count=100, ce_score=0.8)
+    sr_500 = create_mock_scored_result(proof_count=500, ce_score=0.8)
+    sr_1000 = create_mock_scored_result(proof_count=1000, ce_score=0.8)
+    
+    apply_combined_scoring([sr_100, sr_500, sr_1000], now, proof_count_alpha=0.1)
+    
+    # Must strictly increase, not plateau
+    assert sr_500.combined_score > sr_100.combined_score
+    assert sr_1000.combined_score > sr_500.combined_score

From ba4021e5284a754c89e09f16b63b993f48751b47 Mon Sep 17 00:00:00 2001
From: Abdulkadirklc <104304986+Abdulkadirklc@users.noreply.github.com>
Date: Wed, 1 Apr 2026 13:11:01 +0300
Subject: [PATCH 3/5] fix(retrieval): correct proof_norm math to zero-center at
 count 1

---
 .../hindsight_api/engine/search/reranking.py  | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/search/reranking.py b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
index e684ef8ed..f4d83c238 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/reranking.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
@@ -39,10 +39,10 @@ def apply_combined_scoring(
         proof_count_boost = 1 + proof_count_alpha * (proof_norm  - 0.5)   # in [1-α/2, 1+α/2]
         combined_score    = CE_normalized * recency_boost * temporal_boost * proof_count_boost
 
-    proof_norm maps proof_count using natural log1p without an arbitrary cap:
-      proof_count=1 → base log1p(1)=0.693 (difference=0.0 → neutral multiplier)
-      proof_count=10 → log1p(10)=2.39 (gives boost)
-      proof_count=100 → log1p(100)=4.61 (strong boost)
+    proof_norm maps proof_count using a smooth logarithmic curve centered at 0.5:
+      proof_count=1 → 0.5 + 0 = 0.5 (neutral multiplier)
+      proof_count=150 → 0.5 + 0.5 = 1.0 (strong boost)
+      proof_count=22000 → 0.5 + 1.0 = 1.5 (very strong boost)
 
     Temporal proximity is treated as neutral (0.5) when not set by temporal retrieval,
     so temporal_boost collapses to 1.0 for non-temporal queries.
@@ -76,13 +76,15 @@ def apply_combined_scoring(
         # Proof count: log-normalized evidence strength; neutral for non-observations.
         proof_count = sr.retrieval.proof_count
         if proof_count is not None and proof_count >= 1:
-            # We don't cap this at an arbitrary number like 100 anymore.
-            # log1p naturally dampens large numbers:
-            # count=1 -> 0.69, count=50 -> 3.93, count=500 -> 6.21
-            proof_norm = math.log1p(proof_count)
+            # We don't cap this at an arbitrary number. 
+            # We scale naturally so that:
+            # count=1 -> 0.5 (neutral baseline)
+            # count=150 -> 1.0 (+5% boost)
+            # count=22000 -> 1.5 (+10% boost)
+            proof_norm = 0.5 + (math.log(proof_count) / 10.0)
         else:
-            # Neutral baseline for log1p is log1p(1) = 0.693
-            proof_norm = math.log1p(1)
+            # Neutral baseline is precisely 0.5, ensuring neutral multiplier (1.0)
+            proof_norm = 0.5
 
         # RRF: kept at 0.0 for trace continuity but excluded from scoring.
         # RRF is batch-relative (min-max normalised) and redundant after reranking.

From 3aac487b91c91efa29e116376fa644f7100e7a7a Mon Sep 17 00:00:00 2001
From: Abdulkadirklc <104304986+Abdulkadirklc@users.noreply.github.com>
Date: Wed, 1 Apr 2026 14:14:26 +0300
Subject: [PATCH 4/5] fix(retrieval): Apply proof_count boost to link_expansion
 retrieval

---
 .../engine/search/link_expansion_retrieval.py | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/search/link_expansion_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/link_expansion_retrieval.py
index 3b3f434ad..32e81c8e2 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/link_expansion_retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/link_expansion_retrieval.py
@@ -59,7 +59,7 @@ async def _find_semantic_seeds(
     rows = await conn.fetch(
         f"""
         SELECT id, text, context, event_date, occurred_start, occurred_end,
-               mentioned_at, fact_type, document_id, chunk_id, tags,
+               mentioned_at, fact_type, document_id, chunk_id, tags, proof_count,
                1 - (embedding <=> $1::vector) AS similarity
         FROM {fq_table("memory_units")}
         WHERE bank_id = $2
@@ -274,7 +274,7 @@ async def _expand_combined(
                 -- Score = COUNT(DISTINCT shared entities), mapped to [0,1] via tanh.
                 SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                        mu.occurred_end, mu.mentioned_at,
-                       mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+                       mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.proof_count,
                        COUNT(DISTINCT ue_seed.entity_id)::float AS score,
                        'entity'::text AS source
                 FROM {ue} ue_seed
@@ -298,14 +298,14 @@ async def _expand_combined(
                 SELECT
                     id, text, context, event_date, occurred_start,
                     occurred_end, mentioned_at,
-                    fact_type, document_id, chunk_id, tags,
+                    fact_type, document_id, chunk_id, tags, proof_count,
                     MAX(weight) AS score,
                     'semantic'::text AS source
                 FROM (
                     SELECT
                         mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                         mu.occurred_end, mu.mentioned_at,
-                        mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+                        mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.proof_count,
                         ml.weight
                     FROM {ml} ml
                     JOIN {mu} mu ON mu.id = ml.to_unit_id
@@ -317,7 +317,7 @@ async def _expand_combined(
                     SELECT
                         mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                         mu.occurred_end, mu.mentioned_at,
-                        mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+                        mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.proof_count,
                         ml.weight
                     FROM {ml} ml
                     JOIN {mu} mu ON mu.id = ml.from_unit_id
@@ -328,7 +328,7 @@ async def _expand_combined(
                 ) sem_raw
                 GROUP BY id, text, context, event_date, occurred_start,
                          occurred_end, mentioned_at,
-                         fact_type, document_id, chunk_id, tags
+                         fact_type, document_id, chunk_id, tags, proof_count
                 ORDER BY score DESC
                 LIMIT $3
             ),
@@ -339,7 +339,7 @@ async def _expand_combined(
                 SELECT DISTINCT ON (mu.id)
                     mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                     mu.occurred_end, mu.mentioned_at,
-                    mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+                    mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.proof_count,
                     ml.weight AS score,
                     'causal'::text AS source
                 FROM {ml} ml
@@ -429,7 +429,7 @@ async def _expand_observations(
             SELECT
                 mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                 mu.occurred_end, mu.mentioned_at,
-                mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+                mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.proof_count,
                 (SELECT COUNT(DISTINCT s) FROM unnest(mu.source_memory_ids) s WHERE s = ANY(ca.source_ids))::float AS score
             FROM {fq_table("memory_units")} mu, connected_array ca
             WHERE mu.fact_type = 'observation'
@@ -453,13 +453,13 @@ async def _expand_observations(
                 SELECT
                     id, text, context, event_date, occurred_start,
                     occurred_end, mentioned_at,
-                    fact_type, document_id, chunk_id, tags,
+                    fact_type, document_id, chunk_id, tags, proof_count,
                     MAX(weight) AS score,
                     'semantic'::text AS source
                 FROM (
                     SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                            mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id,
-                           mu.chunk_id, mu.tags, ml.weight
+                           mu.chunk_id, mu.tags, mu.proof_count, ml.weight
                     FROM {ml} ml JOIN {mu} mu ON mu.id = ml.to_unit_id
                     WHERE ml.from_unit_id = ANY($1::uuid[])
                       AND ml.link_type = 'semantic' AND mu.fact_type = 'observation'
@@ -467,21 +467,21 @@ async def _expand_observations(
                     UNION ALL
                     SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                            mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id,
-                           mu.chunk_id, mu.tags, ml.weight
+                           mu.chunk_id, mu.tags, mu.proof_count, ml.weight
                     FROM {ml} ml JOIN {mu} mu ON mu.id = ml.from_unit_id
                     WHERE ml.to_unit_id = ANY($1::uuid[])
                       AND ml.link_type = 'semantic' AND mu.fact_type = 'observation'
                       AND mu.id != ALL($1::uuid[])
                 ) sem_raw
                 GROUP BY id, text, context, event_date, occurred_start, occurred_end,
-                         mentioned_at, fact_type, document_id, chunk_id, tags
+                         mentioned_at, fact_type, document_id, chunk_id, tags, proof_count
                 ORDER BY score DESC LIMIT $2
             ),
             causal_expanded AS (
                 SELECT DISTINCT ON (mu.id)
                     mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start,
                     mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id,
-                    mu.chunk_id, mu.tags, ml.weight AS score, 'causal'::text AS source
+                    mu.chunk_id, mu.tags, mu.proof_count, ml.weight AS score, 'causal'::text AS source
                 FROM {ml} ml JOIN {mu} mu ON ml.to_unit_id = mu.id
                 WHERE ml.from_unit_id = ANY($1::uuid[])
                   AND ml.link_type IN ('causes', 'caused_by', 'enables', 'prevents')

From ad425c2879a483dae85712575e65077cecc20eba Mon Sep 17 00:00:00 2001
From: Abdulkadirklc <104304986+Abdulkadirklc@users.noreply.github.com>
Date: Thu, 2 Apr 2026 19:44:44 +0300
Subject: [PATCH 5/5] fix: remove BFS zombie, clamp proof_norm to [0,1], fix
 test comment (log1p->math.log)

---
 .../engine/search/graph_retrieval.py          | 211 ------------------
 .../hindsight_api/engine/search/reranking.py  |  14 +-
 .../tests/test_reranking_proof_count.py       |  20 +-
 3 files changed, 16 insertions(+), 229 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
index c5af33ecc..377b83cb3 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
@@ -65,214 +65,3 @@ async def retrieve(
             Tuple of (List of RetrievalResult with activation scores, optional timing info)
         """
         pass
-class BFSGraphRetriever(GraphRetriever):
-    """
-    Graph retrieval using BFS-style spreading activation.
-
-    Starting from semantic entry points, spreads activation through
-    the memory graph (entity, temporal, causal links) using breadth-first
-    traversal with decaying activation.
-
-    This is the original Hindsight graph retrieval algorithm.
-    """
-
-    def __init__(
-        self,
-        entry_point_limit: int = 5,
-        entry_point_threshold: float = 0.5,
-        activation_decay: float = 0.8,
-        min_activation: float = 0.1,
-        batch_size: int = 20,
-    ):
-        """
-        Initialize BFS graph retriever.
-
-        Args:
-            entry_point_limit: Maximum number of entry points to start from
-            entry_point_threshold: Minimum semantic similarity for entry points
-            activation_decay: Decay factor per hop (activation *= decay)
-            min_activation: Minimum activation to continue spreading
-            batch_size: Number of nodes to process per batch (for neighbor fetching)
-        """
-        self.entry_point_limit = entry_point_limit
-        self.entry_point_threshold = entry_point_threshold
-        self.activation_decay = activation_decay
-        self.min_activation = min_activation
-        self.batch_size = batch_size
-
-    @property
-    def name(self) -> str:
-        return "bfs"
-
-    async def retrieve(
-        self,
-        pool,
-        query_embedding_str: str,
-        bank_id: str,
-        fact_type: str,
-        budget: int,
-        query_text: str | None = None,
-        semantic_seeds: list[RetrievalResult] | None = None,
-        temporal_seeds: list[RetrievalResult] | None = None,
-        adjacency=None,  # Not used by BFS
-        tags: list[str] | None = None,
-        tags_match: TagsMatch = "any",
-        tag_groups: list[TagGroup] | None = None,
-    ) -> tuple[list[RetrievalResult], MPFPTimings | None]:
-        """
-        Retrieve facts using BFS spreading activation.
-
-        Algorithm:
-        1. Find entry points (top semantic matches above threshold)
-        2. BFS traversal: visit neighbors, propagate decaying activation
-        3. Boost causal links (causes, enables, prevents)
-        4. Return visited nodes up to budget
-
-        Note: BFS finds its own entry points via embedding search.
-        The semantic_seeds, temporal_seeds, and adjacency parameters are accepted
-        for interface compatibility but not used.
-        """
-        async with acquire_with_retry(pool) as conn:
-            results = await self._retrieve_with_conn(
-                conn,
-                query_embedding_str,
-                bank_id,
-                fact_type,
-                budget,
-                tags=tags,
-                tags_match=tags_match,
-                tag_groups=tag_groups,
-            )
-            return results, None
-
-    async def _retrieve_with_conn(
-        self,
-        conn,
-        query_embedding_str: str,
-        bank_id: str,
-        fact_type: str,
-        budget: int,
-        tags: list[str] | None = None,
-        tags_match: TagsMatch = "any",
-        tag_groups: list[TagGroup] | None = None,
-    ) -> list[RetrievalResult]:
-        """Internal implementation with connection."""
-        from .tags import build_tag_groups_where_clause, build_tags_where_clause_simple
-
-        tags_clause = build_tags_where_clause_simple(tags, 6, match=tags_match)
-        tag_groups_param_start = 6 + (1 if tags else 0)
-        groups_clause, groups_params, _ = build_tag_groups_where_clause(tag_groups, tag_groups_param_start)
-        params = [query_embedding_str, bank_id, fact_type, self.entry_point_threshold, self.entry_point_limit]
-        if tags:
-            params.append(tags)
-        params.extend(groups_params)
-
-        # Step 1: Find entry points
-        entry_points = await conn.fetch(
-            f"""
-            SELECT id, text, context, event_date, occurred_start, occurred_end,
-                   mentioned_at, fact_type, document_id, chunk_id, tags, proof_count,
-                   1 - (embedding <=> $1::vector) AS similarity
-            FROM {fq_table("memory_units")}
-            WHERE bank_id = $2
-              AND embedding IS NOT NULL
-              AND fact_type = $3
-              AND (1 - (embedding <=> $1::vector)) >= $4
-              {tags_clause}
-              {groups_clause}
-            ORDER BY embedding <=> $1::vector
-            LIMIT $5
-            """,
-            *params,
-        )
-
-        if not entry_points:
-            logger.debug(
-                f"[BFS] No entry points found for fact_type={fact_type} (tags={tags}, tags_match={tags_match})"
-            )
-            return []
-
-        logger.debug(
-            f"[BFS] Found {len(entry_points)} entry points for fact_type={fact_type} "
-            f"(tags={tags}, tags_match={tags_match})"
-        )
-
-        # Step 2: BFS spreading activation
-        visited = set()
-        results = []
-        queue = [(RetrievalResult.from_db_row(dict(r)), r["similarity"]) for r in entry_points]
-        budget_remaining = budget
-
-        while queue and budget_remaining > 0:
-            # Collect a batch of nodes to process
-            batch_nodes = []
-            batch_activations = {}
-
-            while queue and len(batch_nodes) < self.batch_size and budget_remaining > 0:
-                current, activation = queue.pop(0)
-                unit_id = current.id
-
-                if unit_id not in visited:
-                    visited.add(unit_id)
-                    budget_remaining -= 1
-                    current.activation = activation
-                    results.append(current)
-                    batch_nodes.append(current.id)
-                    batch_activations[unit_id] = activation
-
-            # Batch fetch neighbors
-            if batch_nodes and budget_remaining > 0:
-                max_neighbors = len(batch_nodes) * 20
-                neighbors = await conn.fetch(
-                    f"""
-                    SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end,
-                           mu.mentioned_at, mu.fact_type, mu.proof_count,
-                           mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
-                           ml.weight, ml.link_type, ml.from_unit_id
-                    FROM {fq_table("memory_links")} ml
-                    JOIN {fq_table("memory_units")} mu ON ml.to_unit_id = mu.id
-                    WHERE ml.from_unit_id = ANY($1::uuid[])
-                      AND ml.weight >= $2
-                      AND mu.fact_type = $3
-                    ORDER BY ml.weight DESC
-                    LIMIT $4
-                    """,
-                    batch_nodes,
-                    self.min_activation,
-                    fact_type,
-                    max_neighbors,
-                )
-
-                for n in neighbors:
-                    neighbor_id = str(n["id"])
-                    if neighbor_id not in visited:
-                        parent_id = str(n["from_unit_id"])
-                        parent_activation = batch_activations.get(parent_id, 0.5)
-
-                        # Boost causal links
-                        link_type = n["link_type"]
-                        base_weight = n["weight"]
-
-                        if link_type in ("causes", "caused_by"):
-                            causal_boost = 2.0
-                        elif link_type in ("enables", "prevents"):
-                            causal_boost = 1.5
-                        else:
-                            causal_boost = 1.0
-
-                        effective_weight = base_weight * causal_boost
-                        new_activation = parent_activation * effective_weight * self.activation_decay
-
-                        if new_activation > self.min_activation:
-                            neighbor_result = RetrievalResult.from_db_row(dict(n))
-                            queue.append((neighbor_result, new_activation))
-
-        # Apply tags filtering (BFS may traverse into memories that don't match tags criteria)
-        if tags:
-            results = filter_results_by_tags(results, tags, match=tags_match)
-
-        # Apply compound tag group filtering (post-traversal)
-        if tag_groups:
-            results = filter_results_by_tag_groups(results, tag_groups)
-
-        return results
diff --git a/hindsight-api-slim/hindsight_api/engine/search/reranking.py b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
index f4d83c238..ae9e0fc93 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/reranking.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
@@ -39,10 +39,10 @@ def apply_combined_scoring(
         proof_count_boost = 1 + proof_count_alpha * (proof_norm  - 0.5)   # in [1-α/2, 1+α/2]
         combined_score    = CE_normalized * recency_boost * temporal_boost * proof_count_boost
 
-    proof_norm maps proof_count using a smooth logarithmic curve centered at 0.5:
+    proof_norm maps proof_count using a smooth logarithmic curve centered at 0.5,
+    clamped to [0, 1]:
       proof_count=1 → 0.5 + 0 = 0.5 (neutral multiplier)
-      proof_count=150 → 0.5 + 0.5 = 1.0 (strong boost)
-      proof_count=22000 → 0.5 + 1.0 = 1.5 (very strong boost)
+      proof_count=150 → clamped to 1.0 (max +5% boost)
 
     Temporal proximity is treated as neutral (0.5) when not set by temporal retrieval,
     so temporal_boost collapses to 1.0 for non-temporal queries.
@@ -76,12 +76,8 @@ def apply_combined_scoring(
         # Proof count: log-normalized evidence strength; neutral for non-observations.
         proof_count = sr.retrieval.proof_count
         if proof_count is not None and proof_count >= 1:
-            # We don't cap this at an arbitrary number. 
-            # We scale naturally so that:
-            # count=1 -> 0.5 (neutral baseline)
-            # count=150 -> 1.0 (+5% boost)
-            # count=22000 -> 1.5 (+10% boost)
-            proof_norm = 0.5 + (math.log(proof_count) / 10.0)
+            # Clamp to [0, 1] so extreme counts stay within documented ±5% range
+            proof_norm = min(1.0, max(0.0, 0.5 + (math.log(proof_count) / 10.0)))
         else:
             # Neutral baseline is precisely 0.5, ensuring neutral multiplier (1.0)
             proof_norm = 0.5
diff --git a/hindsight-api-slim/tests/test_reranking_proof_count.py b/hindsight-api-slim/tests/test_reranking_proof_count.py
index 2f76a29c6..3fe3e341f 100644
--- a/hindsight-api-slim/tests/test_reranking_proof_count.py
+++ b/hindsight-api-slim/tests/test_reranking_proof_count.py
@@ -58,7 +58,7 @@ def test_proof_count_neutral_at_one():
     
     apply_combined_scoring([sr], now, proof_count_alpha=0.1)
     
-    # proof_count=1 -> log1p(1) = 0.693, base log1p(1) = 0.693 -> difference 0.0 -> multipler 1.0
+    # proof_count=1 -> math.log(1) = 0 -> 0.5 + 0/10 = 0.5 (neutral) -> multiplier 1.0
     assert sr.combined_score == pytest.approx(0.8, rel=1e-3)
 
 def test_proof_count_increases_with_higher_counts():
@@ -79,16 +79,18 @@ def test_proof_count_increases_with_higher_counts():
     assert sr_100.combined_score > sr_50.combined_score
 
 def test_proof_count_no_hardcoded_cap_at_100():
-    """Test that observations with counts > 100 continue to scale up (no log1p(100) cap)."""
+    """Test that proof_count continues to scale within the clamped [0, 1] range."""
     now = datetime.now(UTC)
     
-    # If capped at 100, these would both get identical scores
+    # Use values that stay below the clamp ceiling (proof_norm < 1.0)
+    # log(5)/10=0.16, log(20)/10=0.30, log(100)/10=0.46 → all below 0.5 headroom
+    sr_5 = create_mock_scored_result(proof_count=5, ce_score=0.8)
+    sr_20 = create_mock_scored_result(proof_count=20, ce_score=0.8)
     sr_100 = create_mock_scored_result(proof_count=100, ce_score=0.8)
-    sr_500 = create_mock_scored_result(proof_count=500, ce_score=0.8)
-    sr_1000 = create_mock_scored_result(proof_count=1000, ce_score=0.8)
     
-    apply_combined_scoring([sr_100, sr_500, sr_1000], now, proof_count_alpha=0.1)
+    apply_combined_scoring([sr_5, sr_20, sr_100], now, proof_count_alpha=0.1)
     
-    # Must strictly increase, not plateau
-    assert sr_500.combined_score > sr_100.combined_score
-    assert sr_1000.combined_score > sr_500.combined_score
+    # Must strictly increase within the valid range
+    assert sr_20.combined_score > sr_5.combined_score
+    assert sr_100.combined_score > sr_20.combined_score
+