From 3f4d62de04feafec825c391beae807df12fcf490 Mon Sep 17 00:00:00 2001
From: John Donalson <mirlok@dr.com>
Date: Mon, 26 Jan 2026 20:47:41 -0500
Subject: [PATCH] Normalize 'under' path handling and add remote embedding
 support

Refactors all uses of the 'under' parameter to normalize as a suffix and use MatchText for substring matching, improving path filtering consistency. Adds support for remote embedding services in memory.py and mcp_memory_server.py, routing embedding calls to remote or local as appropriate. Updates documentation and skill usage tips to reflect new search strategies, parallel execution, and token efficiency defaults.
---
 docs/CLAUDE.example.md         | 32 +++++++++++++++++++++++++++++++
 scripts/hybrid/expand.py       | 12 ++++--------
 scripts/hybrid_search.py       | 22 +++++++++++----------
 scripts/mcp_impl/memory.py     | 35 ++++++++++++++++++++++++++++++++--
 scripts/mcp_memory_server.py   | 29 ++++++++++++++++++++++++++--
 scripts/rerank_tools/local.py  | 16 ++++++----------
 skills/context-engine/SKILL.md |  5 +++++
 7 files changed, 119 insertions(+), 32 deletions(-)

diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md
index c35ae4fa..f00ea601 100644
--- a/docs/CLAUDE.example.md
+++ b/docs/CLAUDE.example.md
@@ -64,6 +64,8 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex
   - Increase to limit=5, include_snippet=true for details
   - Use language and under filters to narrow scope
   - Set rerank_enabled=false for faster but less accurate results
+  - Use output_format="toon" for 60-80% token reduction
+  - Fire independent tool calls in parallel (same message block) for 2-3x speedup
 
   When to Use Advanced Features:
 
@@ -158,8 +160,38 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex
 
   - Call set_session_defaults (indexer and memory) early in a session so subsequent
     calls inherit the right collection without repeating it in every request.
+  - Set defaults with: set_session_defaults(output_format="toon", compact=true, limit=5)
   - Use context_search with include_memories and per_source_limits when you want
     blended code + memory results instead of calling repo_search and memory.memory_find
     separately.
   - Treat expand_query and the expand flag on context_answer as expensive options:
     only use them after a normal search/answer attempt failed to find good context.
+
+  Two-Phase Search Strategy:
+
+  - Phase 1 (Discovery): limit=3, compact=true, output_format="toon", per_path=1
+  - Phase 2 (Deep Dive): limit=5-8, include_snippet=true, context_lines=3-5
+  - Only move to Phase 2 after identifying high-value targets from Phase 1
+
+  Parallel Execution Pattern:
+
+  - Fire independent tool calls in a single message block (3x faster)
+  - Example: repo_search + repo_search + symbol_graph all at once
+  - Do NOT wait for one search to complete before starting another
+
+  Token Efficiency Defaults:
+
+  | Parameter | Discovery | Deep Dive |
+  |-----------|-----------|-----------|
+  | limit | 3 | 5-8 |
+  | per_path | 1 | 2 |
+  | compact | true | false |
+  | output_format | "toon" | "json" |
+  | include_snippet | false | true |
+  | context_lines | 0 | 3-5 |
+
+  Fallback Chains:
+
+  - context_answer timeout → repo_search + info_request(include_explanation=true)
+  - pattern_search unavailable → repo_search with structural query terms
+  - neo4j_graph_query empty → symbol_graph (Qdrant-backed fallback)
diff --git a/scripts/hybrid/expand.py b/scripts/hybrid/expand.py
index 23ad3268..f5e78dac 100644
--- a/scripts/hybrid/expand.py
+++ b/scripts/hybrid/expand.py
@@ -781,18 +781,14 @@ def expand_via_embeddings(
         except Exception:
             vec_name = None
 
-    def _norm_under(u: str | None) -> str | None:
+    def _norm_under_suffix(u: str | None) -> str | None:
         if not u:
             return None
         u = str(u).strip().replace("\\", "/")
         u = "/".join([p for p in u.split("/") if p])
         if not u:
             return None
-        if u.startswith("/work/"):
-            return u
-        if not u.startswith("/"):
-            return "/work/" + u
-        return "/work/" + u.lstrip("/")
+        return "/" + u
 
     flt = None
     try:
@@ -807,12 +803,12 @@ def _norm_under(u: str | None) -> str | None:
                 )
             )
         if under:
-            eff_under = _norm_under(under)
+            eff_under = _norm_under_suffix(under)
             if eff_under:
                 must.append(
                     models.FieldCondition(
                         key="metadata.path_prefix",
-                        match=models.MatchValue(value=eff_under),
+                        match=models.MatchText(text=eff_under),
                     )
                 )
         if kind:
diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py
index 71248e9e..0dee48c1 100644
--- a/scripts/hybrid_search.py
+++ b/scripts/hybrid_search.py
@@ -510,7 +510,13 @@ def run_pure_dense_search(
     if language:
         must.append(models.FieldCondition(key="metadata.language", match=models.MatchValue(value=language)))
     if under:
-        must.append(models.FieldCondition(key="metadata.path_prefix", match=models.MatchValue(value=under)))
+        # Normalize under to suffix format for substring matching
+        # e.g., "scripts" -> "/scripts" matches path_prefix "/work/Context-Engine-xxx/scripts"
+        norm_under = str(under).strip().replace("\\", "/")
+        norm_under = "/".join([p for p in norm_under.split("/") if p])
+        if norm_under:
+            norm_under = "/" + norm_under
+            must.append(models.FieldCondition(key="metadata.path_prefix", match=models.MatchText(text=norm_under)))
     if repo and repo != "*":
         if isinstance(repo, list):
             must.append(models.FieldCondition(key="metadata.repo", match=models.MatchAny(any=repo)))
@@ -981,21 +987,17 @@ def _normalize_globs(globs: list[str]) -> list[str]:
     eff_path_globs_norm = _normalize_globs(eff_path_globs)
     eff_not_globs_norm = _normalize_globs(eff_not_globs)
 
-    # Normalize under
-    def _norm_under(u: str | None) -> str | None:
+    def _norm_under_suffix(u: str | None) -> str | None:
+        """Normalize under to suffix format for MatchText substring matching."""
         if not u:
             return None
         u = str(u).strip().replace("\\", "/")
         u = "/".join([p for p in u.split("/") if p])
         if not u:
             return None
-        if not u.startswith("/"):
-            v = "/work/" + u
-        else:
-            v = "/work/" + u.lstrip("/") if not u.startswith("/work/") else u
-        return v
+        return "/" + u
 
-    eff_under = _norm_under(eff_under)
+    eff_under = _norm_under_suffix(eff_under)
 
     # Expansion knobs that affect query construction/results (must be part of cache key)
     try:
@@ -1106,7 +1108,7 @@ def _norm_under(u: str | None) -> str | None:
     if eff_under:
         must.append(
             models.FieldCondition(
-                key="metadata.path_prefix", match=models.MatchValue(value=eff_under)
+                key="metadata.path_prefix", match=models.MatchText(text=eff_under)
             )
         )
     if eff_kind:
diff --git a/scripts/mcp_impl/memory.py b/scripts/mcp_impl/memory.py
index dd72d77b..c9edae09 100644
--- a/scripts/mcp_impl/memory.py
+++ b/scripts/mcp_impl/memory.py
@@ -34,6 +34,35 @@
 # Environment
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
 
+# Remote embedding support
+try:
+    from scripts.embedder import RemoteEmbeddingStub
+    from scripts.ingest.qdrant import embed_batch as _embed_batch_remote
+    _REMOTE_EMBED_AVAILABLE = True
+except ImportError:
+    RemoteEmbeddingStub = None  # type: ignore
+    _embed_batch_remote = None  # type: ignore
+    _REMOTE_EMBED_AVAILABLE = False
+
+
+def _embed_text(model, text: str, model_name: str) -> list:
+    """Embed text using either local model or remote service.
+    
+    Detects RemoteEmbeddingStub and routes to embed_batch() accordingly.
+    """
+    is_remote_stub = (
+        RemoteEmbeddingStub is not None 
+        and isinstance(model, RemoteEmbeddingStub)
+    )
+    
+    if is_remote_stub and _REMOTE_EMBED_AVAILABLE and _embed_batch_remote is not None:
+        # Use remote embedding service
+        vecs = _embed_batch_remote(model, [text])
+        return vecs[0] if isinstance(vecs[0], list) else vecs[0].tolist()
+    else:
+        # Local embedding
+        return next(model.embed([text])).tolist()
+
 
 async def _memory_store_impl(
     information: str,
@@ -116,7 +145,8 @@ def _lex_hash_vector(text: str, dim: int = LEX_VECTOR_DIM) -> list[float]:
         from scripts.mcp_impl.admin_tools import _get_embedding_model
         model = _get_embedding_model(model_name)
 
-    dense = next(model.embed([str(information)])).tolist()
+    # Use helper that handles remote vs local embedding
+    dense = _embed_text(model, str(information), model_name)
 
     lex = _lex_hash_vector(str(information))
 
@@ -254,7 +284,8 @@ def _lex_hash_vector(text: str, dim: int = LEX_VECTOR_DIM) -> list[float]:
             from scripts.mcp_impl.admin_tools import _get_embedding_model
             model = _get_embedding_model(model_name)
 
-        dense_query = next(model.embed([str(query)])).tolist()
+        # Use helper that handles remote vs local embedding
+        dense_query = _embed_text(model, str(query), model_name)
         lex_query = _lex_hash_vector(str(query))
 
         client = QdrantClient(
diff --git a/scripts/mcp_memory_server.py b/scripts/mcp_memory_server.py
index 6b01d804..52ff30a9 100644
--- a/scripts/mcp_memory_server.py
+++ b/scripts/mcp_memory_server.py
@@ -79,6 +79,31 @@
 from scripts.utils import sanitize_vector_name as _sanitize_vector_name
 from scripts.utils import lex_hash_vector_text as _lex_hash_vector_text
 
+# Remote embedding support
+try:
+    from scripts.embedder import RemoteEmbeddingStub
+    from scripts.ingest.qdrant import embed_batch as _embed_batch_remote
+    _REMOTE_EMBED_AVAILABLE = True
+except ImportError:
+    RemoteEmbeddingStub = None  # type: ignore
+    _embed_batch_remote = None  # type: ignore
+    _REMOTE_EMBED_AVAILABLE = False
+
+
+def _embed_text(model, text: str) -> list:
+    """Embed text using either local model or remote service."""
+    is_remote_stub = (
+        RemoteEmbeddingStub is not None 
+        and isinstance(model, RemoteEmbeddingStub)
+    )
+    
+    if is_remote_stub and _REMOTE_EMBED_AVAILABLE and _embed_batch_remote is not None:
+        vecs = _embed_batch_remote(model, [text])
+        return vecs[0] if isinstance(vecs[0], list) else vecs[0].tolist()
+    else:
+        return next(model.embed([text])).tolist()
+
+
 VECTOR_NAME = _sanitize_vector_name(EMBEDDING_MODEL)
 
 # I/O-safety knobs for memory server behavior
@@ -806,7 +831,7 @@ def memory_store(
         md["source"] = "memory"
 
     model = _get_embedding_model()
-    dense = next(model.embed([str(information)])).tolist()
+    dense = _embed_text(model, str(information))
     lex = _lex_hash_vector_text(str(information), LEX_VECTOR_DIM)
 
     # Use UUID to avoid point ID collisions under concurrent load
@@ -959,7 +984,7 @@ def memory_find(
         use_dense = False
     if use_dense:
         model = _get_embedding_model()
-        dense = next(model.embed([str(query)])).tolist()
+        dense = _embed_text(model, str(query))
     else:
         dense = None
     lex = _lex_hash_vector_text(str(query), LEX_VECTOR_DIM)
diff --git a/scripts/rerank_tools/local.py b/scripts/rerank_tools/local.py
index e20e71cf..df2309b2 100644
--- a/scripts/rerank_tools/local.py
+++ b/scripts/rerank_tools/local.py
@@ -166,18 +166,14 @@ def _start_background_warmup():
 _start_background_warmup()
 
 
-def _norm_under(u: str | None) -> str | None:
+def _norm_under_suffix(u: str | None) -> str | None:
     if not u:
         return None
     u = str(u).strip().replace("\\", "/")
     u = "/".join([p for p in u.split("/") if p])
     if not u:
         return None
-    if not u.startswith("/"):
-        return "/work/" + u
-    if not u.startswith("/work/"):
-        return "/work/" + u.lstrip("/")
-    return u
+    return "/" + u
 
 
 def _select_dense_vector_name(
@@ -369,11 +365,11 @@ def rerank_in_process(
                 key="metadata.language", match=models.MatchValue(value=language)
             )
         )
-    eff_under = _norm_under(under)
+    eff_under = _norm_under_suffix(under)
     if eff_under:
         must.append(
             models.FieldCondition(
-                key="metadata.path_prefix", match=models.MatchValue(value=eff_under)
+                key="metadata.path_prefix", match=models.MatchText(text=eff_under)
             )
         )
     flt = models.Filter(must=must) if must else None
@@ -450,11 +446,11 @@ def main():
                 key="metadata.language", match=models.MatchValue(value=args.language)
             )
         )
-    eff_under = _norm_under(args.under)
+    eff_under = _norm_under_suffix(args.under)
     if eff_under:
         must.append(
             models.FieldCondition(
-                key="metadata.path_prefix", match=models.MatchValue(value=eff_under)
+                key="metadata.path_prefix", match=models.MatchText(text=eff_under)
             )
         )
     flt = models.Filter(must=must) if must else None
diff --git a/skills/context-engine/SKILL.md b/skills/context-engine/SKILL.md
index 4b816061..7c9a800a 100644
--- a/skills/context-engine/SKILL.md
+++ b/skills/context-engine/SKILL.md
@@ -409,4 +409,9 @@ Common issues:
 7. **Index before search** - Always run `qdrant_index_root` on first use or after cloning a repo
 8. **Use pattern_search for structural matching** - When looking for code with similar control flow (retry loops, error handling), use `pattern_search` instead of `repo_search` (if enabled)
 9. **Describe patterns in natural language** - `pattern_search` understands "retry with backoff" just as well as actual code examples (if enabled)
+10. **Fire independent searches in parallel** - Call multiple `repo_search`, `symbol_graph`, etc. in the same message block for 2-3x speedup
+11. **Use TOON format for discovery** - Set `output_format: "toon"` for 60-80% token reduction on exploratory queries
+12. **Bootstrap sessions with defaults** - Call `set_session_defaults(output_format="toon", compact=true)` early to avoid repeating params
+13. **Two-phase search** - Discovery first (`limit=3, compact=true`), then deep dive (`limit=5-8, include_snippet=true`) on targets
+14. **Use fallback chains** - If `context_answer` times out, fall back to `repo_search` + `info_request(include_explanation=true)`