emson · emson · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026 · Apr 11, 2026
@@ -23,8 +23,15 @@ elfmem uses [Semantic Versioning](https://semver.org/).
 
 
 ### Added
+- **`MemorySystem.learn_document(text, chunk_size, chunker, skip_llm)`:** Ingest a document in one call — chunks text, learns each chunk, auto-consolidates via `dream()` at `inbox_threshold` intervals. Accepts an optional `chunker` callback (e.g. `nltk.sent_tokenize`); default splits at sentence boundaries. Returns `LearnDocumentResult` with chunk and consolidation counts.
+- **`LearnDocumentResult` type:** New result type with `chunks_total`, `chunks_created`, `chunks_duplicate`, `consolidations`, `blocks_promoted`. Exported from `elfmem`.
+- **BM25 keyword search in retrieval pipeline (stage 2b):** `hybrid_retrieve()` now runs BM25 in parallel with vector search, discovering blocks with strong keyword overlap that embedding similarity misses. Soft dependency on `rank_bm25` — when not installed, the stage is silently skipped (zero regression). Install via `pip install elfmem[bm25]`.
+- **`dream(skip_llm, skip_contradictions)` parameters:** `dream()` now forwards `skip_llm` and `skip_contradictions` to `consolidate()`, enabling fast-path consolidation without bypassing policy tracking or threshold persistence.
 - **`MABenchConfig.context_window_tokens`:** New config field (default 4096) representing the LM Studio model's context window. All answer-context truncation derives from this value; set to 2048 for smaller models.
 
+### Fixed
+- **Config wiring: `contradiction_threshold`, `near_dup_exact_threshold`, `near_dup_near_threshold`:** These three `MemoryConfig` fields existed but were not passed from `MemorySystem.consolidate()` to the consolidation operation. Custom config values were silently ignored (defaults matched, so no observable bug at default settings). Now wired through.
+
 ### Added
 - **LoCoMo benchmark harness:** Complete benchmark suite for evaluating elfmem against LoCoMo (ACL 2024) — 10 conversations, 1,986 QA pairs, 5 categories. Includes metrics (Porter-stemmed F1), typed data loading, BM25 hybrid retrieval, observation transform, and CLI runner with `--test`, `--baselines`, `--resume`, `--top-k`, `--category` flags. Results conform to `benchmark_report_spec.md`.
 - **`consolidate(skip_llm=True)`:** Bypass all LLM calls during consolidation (embed + promote only). Reduces ingestion from hours to seconds for bulk import and benchmarks.

@@ -1,27 +1,27 @@
-"""elfmem adapter for MemoryAgentBench — chunk ingestion + retrieval.
+"""elfmem adapter for MemoryAgentBench — document ingestion + retrieval.
 
 Key difference from LoCoMo: MemoryAgentBench's Conflict Resolution competency
 tests contradiction detection — elfmem's primary moat. For CR, we use FULL
 consolidation (contradiction detection ON). For other competencies, we use
-skip_contradictions for speed.
+skip_llm for speed.
+
+BM25 hybrid retrieval is handled natively by elfmem's retrieval pipeline
+(stage 2b in hybrid_retrieve). No adapter-level BM25 or RRF needed.
 """
 
 from __future__ import annotations
 
 import logging
 import tempfile
 import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any
 
 import nltk
 
-from rank_bm25 import BM25Okapi
-
-from elfmem import ElfmemConfig, MemorySystem
-
 from benchmarks.memoryagentbench.config import MABenchConfig
+from elfmem import ElfmemConfig, MemorySystem
 
 nltk.download("punkt_tab", quiet=True)
 
@@ -50,65 +50,6 @@ class ExampleResult:
     ingestion_seconds: float
 
 
-class _BM25Index:
-    """BM25 index over block content for keyword-boosted retrieval.
-
-    Built after consolidation from elfmem's active block content (summaries
-    when available, raw content otherwise). Stores block IDs so RRF merge
-    can match by ID rather than approximate content-prefix heuristics.
-    """
-
-    def __init__(self) -> None:
-        self._ids: list[str] = []
-        self._contents: list[str] = []
-        self._bm25: BM25Okapi | None = None
-
-    def add(self, block_id: str, content: str) -> None:
-        self._ids.append(block_id)
-        self._contents.append(content)
-
-    def build(self) -> None:
-        if self._contents:
-            tokenized = [c.lower().split() for c in self._contents]
-            self._bm25 = BM25Okapi(tokenized)
-
-    def search(self, query: str, top_k: int = 10) -> list[tuple[str, str, float]]:
-        """Return (block_id, content, bm25_score) triples ranked by score."""
-        if self._bm25 is None:
-            return []
-        scores = self._bm25.get_scores(query.lower().split())
-        ranked = sorted(
-            zip(self._ids, self._contents, scores),
-            key=lambda x: x[2],
-            reverse=True,
-        )
-        return ranked[:top_k]
-
-
-def chunk_text(text: str, chunk_size: int = 1024) -> list[str]:
-    """Split text into sentence-aligned chunks of ~chunk_size words.
-
-    Uses NLTK sentence tokenization for clean boundaries.
-    """
-    sentences = nltk.sent_tokenize(text)
-    chunks: list[str] = []
-    current: list[str] = []
-    current_len = 0
-
-    for sentence in sentences:
-        sent_len = len(sentence.split())
-        if current_len + sent_len > chunk_size and current:
-            chunks.append(" ".join(current))
-            current = []
-            current_len = 0
-        current.append(sentence)
-        current_len += sent_len
-
-    if current:
-        chunks.append(" ".join(current))
-    return chunks
-
-
 def _context_budget_words(config: MABenchConfig) -> int:
     """Answer-context word budget derived from the model's context window.
 
@@ -153,34 +94,6 @@ def build_elfmem_config(config: MABenchConfig) -> ElfmemConfig:
     })
 
 
-def _rrf_merge(
-    vector_blocks: list,
-    bm25_results: list[tuple[str, str, float]],
-    top_k: int,
-    k: int = 60,
-) -> tuple[list, str]:
-    """Merge vector search and BM25 results via Reciprocal Rank Fusion.
-
-    BM25 results carry block IDs (from _BM25Index.search), so matching is
-    exact — no content-prefix heuristics, no supplementary raw-chunk fallback.
-    Both retrieval paths use the same block content (summaries or raw text),
-    so the merged context is consistent.
-    """
-    block_scores: dict[str, float] = {}
-    block_map: dict[str, object] = {}
-    for rank, block in enumerate(vector_blocks):
-        block_scores[block.id] = 1.0 / (k + rank)
-        block_map[block.id] = block
-
-    for rank, (block_id, _content, _score) in enumerate(bm25_results):
-        if block_id in block_map:
-            block_scores[block_id] += 1.0 / (k + rank)
-
-    ranked = sorted(block_map.values(), key=lambda b: block_scores[b.id], reverse=True)
-    trimmed = ranked[:top_k]
-    return trimmed, "\n\n".join(b.content for b in trimmed)
-
-
 async def process_example(
     example: dict[str, Any],
     competency: str,
@@ -199,6 +112,7 @@ async def process_example(
 
     # Conflict Resolution needs contradiction detection — elfmem's moat
     is_conflict_resolution = competency == "Conflict_Resolution"
+    skip_llm = not is_conflict_resolution
 
     elfmem_cfg = build_elfmem_config(config)
 
@@ -207,80 +121,32 @@ async def process_example(
 
     system = await MemorySystem.from_config(db_path, config=elfmem_cfg)
     try:
-        # Phase 1: Chunk and ingest context
+        # Phase 1: Ingest document via learn_document()
         start_ingest = time.monotonic()
-        chunks = chunk_text(context, config.chunk_size)
-        total_promoted = 0
-
-        # CR needs contradiction detection — elfmem's core capability.
-        # Other competencies use skip_llm for speed (embedding-only retrieval).
-        skip_llm = not is_conflict_resolution
-
-        await system.begin_session(task_type="ingestion")
-        for i, chunk in enumerate(chunks):
-            await system.learn(
-                content=chunk,
-                tags=[f"chunk:{i}"],
-                category="knowledge",
-                source="memoryagentbench",
-            )
-            # Consolidate periodically
-            if (i + 1) % config.consolidate_every_n_chunks == 0:
-                await system.end_session()
-                await system.begin_session(task_type="consolidation")
-                result = await system.consolidate(skip_llm=skip_llm)
-                total_promoted += result.promoted
-                await system.end_session()
-                await system.begin_session(task_type="ingestion")
-        await system.end_session()
-
-        # Final consolidation for remaining chunks
-        await system.begin_session(task_type="consolidation")
-        result = await system.consolidate(skip_llm=skip_llm)
-        total_promoted += result.promoted
-        await system.end_session()
-
-        # Build BM25 from active block content after consolidation.
-        # With skip_llm=False (CR), blocks carry LLM-generated summaries —
-        # the same content used by elfmem's vector retrieval. This ensures
-        # BM25 and vector search are consistent, enabling exact ID-based RRF
-        # merging without heuristic content-prefix matching.
-        # With skip_llm=True (other competencies), content falls back to the
-        # raw chunk text — same as the previous raw-chunk approach.
-        await system.begin_session(task_type="retrieval")
-        index_frame = await system.frame("attention", query=None, top_k=10000)
-        await system.end_session()
-
-        bm25_index = _BM25Index()
-        for block in index_frame.blocks:
-            bm25_index.add(block.id, block.content)
-        bm25_index.build()
-
+        doc_result = await system.learn_document(
+            context,
+            chunk_size=config.chunk_size,
+            chunker=nltk.sent_tokenize,
+            source="memoryagentbench",
+            skip_llm=skip_llm,
+        )
         ingestion_time = time.monotonic() - start_ingest
-        log.info(f"  Ingested {len(chunks)} chunks, {total_promoted} promoted in {ingestion_time:.1f}s")
+        log.info(
+            f"  Ingested {doc_result.chunks_total} chunks, "
+            f"{doc_result.blocks_promoted} promoted in {ingestion_time:.1f}s"
+        )
 
         # Phase 2: Answer each question
         qa_results: list[QAResult] = []
-        for q_idx, (question, answer_list) in enumerate(zip(questions, answers)):
+        for _q_idx, (question, answer_list) in enumerate(zip(questions, answers, strict=False)):
             q_start = time.monotonic()
 
-            await system.begin_session(task_type="retrieval")
-            frame_result = await system.frame("attention", query=question, top_k=config.top_k)
-            await system.end_session()
-
-            # Build context from blocks directly, bypassing the frame's hardcoded
-            # token_budget (2000 tokens in the attention frame definition).  Both
-            # the BM25 and no-BM25 paths are now bounded only by _context_budget_words,
-            # which is derived from config.context_window_tokens.
-            blocks = frame_result.blocks
+            # elfmem's recall() includes BM25 (stage 2b) + graph expansion
+            # + contradiction suppression natively.
+            blocks = await system.recall(query=question, top_k=config.top_k)
             context_text = "\n\n".join(b.content for b in blocks)
-            bm25_hits = bm25_index.search(question, top_k=config.top_k)
-            if bm25_hits:
-                blocks, context_text = _rrf_merge(blocks, bm25_hits, config.top_k)
 
             # Truncate context to fit the model's context window.
-            # Budget is derived from config.context_window_tokens minus
-            # system prompt, template, question, and answer overhead.
             max_context_words = _context_budget_words(config)
             words = context_text.split()
             if len(words) > max_context_words:
@@ -304,8 +170,8 @@ async def process_example(
         return ExampleResult(
             source=source,
             competency=competency,
-            chunks_ingested=len(chunks),
-            blocks_promoted=total_promoted,
+            chunks_ingested=doc_result.chunks_total,
+            blocks_promoted=doc_result.blocks_promoted,
             qa_results=qa_results,
             ingestion_seconds=ingestion_time,
         )