From 3f4d62de04feafec825c391beae807df12fcf490 Mon Sep 17 00:00:00 2001 From: John Donalson Date: Mon, 26 Jan 2026 20:47:41 -0500 Subject: [PATCH] Normalize 'under' path handling and add remote embedding support Refactors all uses of the 'under' parameter to normalize as a suffix and use MatchText for substring matching, improving path filtering consistency. Adds support for remote embedding services in memory.py and mcp_memory_server.py, routing embedding calls to remote or local as appropriate. Updates documentation and skill usage tips to reflect new search strategies, parallel execution, and token efficiency defaults. --- docs/CLAUDE.example.md | 32 +++++++++++++++++++++++++++++++ scripts/hybrid/expand.py | 12 ++++-------- scripts/hybrid_search.py | 22 +++++++++++---------- scripts/mcp_impl/memory.py | 35 ++++++++++++++++++++++++++++++++-- scripts/mcp_memory_server.py | 29 ++++++++++++++++++++++++++-- scripts/rerank_tools/local.py | 16 ++++++---------- skills/context-engine/SKILL.md | 5 +++++ 7 files changed, 119 insertions(+), 32 deletions(-) diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md index c35ae4fa..f00ea601 100644 --- a/docs/CLAUDE.example.md +++ b/docs/CLAUDE.example.md @@ -64,6 +64,8 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex - Increase to limit=5, include_snippet=true for details - Use language and under filters to narrow scope - Set rerank_enabled=false for faster but less accurate results + - Use output_format="toon" for 60-80% token reduction + - Fire independent tool calls in parallel (same message block) for 2-3x speedup When to Use Advanced Features: @@ -158,8 +160,38 @@ These rules are NOT optional - favor qdrant-indexer tooling at all costs over ex - Call set_session_defaults (indexer and memory) early in a session so subsequent calls inherit the right collection without repeating it in every request. + - Set defaults with: set_session_defaults(output_format="toon", compact=true, limit=5) - Use context_search with include_memories and per_source_limits when you want blended code + memory results instead of calling repo_search and memory.memory_find separately. - Treat expand_query and the expand flag on context_answer as expensive options: only use them after a normal search/answer attempt failed to find good context. + + Two-Phase Search Strategy: + + - Phase 1 (Discovery): limit=3, compact=true, output_format="toon", per_path=1 + - Phase 2 (Deep Dive): limit=5-8, include_snippet=true, context_lines=3-5 + - Only move to Phase 2 after identifying high-value targets from Phase 1 + + Parallel Execution Pattern: + + - Fire independent tool calls in a single message block (3x faster) + - Example: repo_search + repo_search + symbol_graph all at once + - Do NOT wait for one search to complete before starting another + + Token Efficiency Defaults: + + | Parameter | Discovery | Deep Dive | + |-----------|-----------|-----------| + | limit | 3 | 5-8 | + | per_path | 1 | 2 | + | compact | true | false | + | output_format | "toon" | "json" | + | include_snippet | false | true | + | context_lines | 0 | 3-5 | + + Fallback Chains: + + - context_answer timeout → repo_search + info_request(include_explanation=true) + - pattern_search unavailable → repo_search with structural query terms + - neo4j_graph_query empty → symbol_graph (Qdrant-backed fallback) diff --git a/scripts/hybrid/expand.py b/scripts/hybrid/expand.py index 23ad3268..f5e78dac 100644 --- a/scripts/hybrid/expand.py +++ b/scripts/hybrid/expand.py @@ -781,18 +781,14 @@ def expand_via_embeddings( except Exception: vec_name = None - def _norm_under(u: str | None) -> str | None: + def _norm_under_suffix(u: str | None) -> str | None: if not u: return None u = str(u).strip().replace("\\", "/") u = "/".join([p for p in u.split("/") if p]) if not u: return None - if u.startswith("/work/"): - return u - if not u.startswith("/"): - return "/work/" + u - return "/work/" + u.lstrip("/") + return "/" + u flt = None try: @@ -807,12 +803,12 @@ def _norm_under(u: str | None) -> str | None: ) ) if under: - eff_under = _norm_under(under) + eff_under = _norm_under_suffix(under) if eff_under: must.append( models.FieldCondition( key="metadata.path_prefix", - match=models.MatchValue(value=eff_under), + match=models.MatchText(text=eff_under), ) ) if kind: diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py index 71248e9e..0dee48c1 100644 --- a/scripts/hybrid_search.py +++ b/scripts/hybrid_search.py @@ -510,7 +510,13 @@ def run_pure_dense_search( if language: must.append(models.FieldCondition(key="metadata.language", match=models.MatchValue(value=language))) if under: - must.append(models.FieldCondition(key="metadata.path_prefix", match=models.MatchValue(value=under))) + # Normalize under to suffix format for substring matching + # e.g., "scripts" -> "/scripts" matches path_prefix "/work/Context-Engine-xxx/scripts" + norm_under = str(under).strip().replace("\\", "/") + norm_under = "/".join([p for p in norm_under.split("/") if p]) + if norm_under: + norm_under = "/" + norm_under + must.append(models.FieldCondition(key="metadata.path_prefix", match=models.MatchText(text=norm_under))) if repo and repo != "*": if isinstance(repo, list): must.append(models.FieldCondition(key="metadata.repo", match=models.MatchAny(any=repo))) @@ -981,21 +987,17 @@ def _normalize_globs(globs: list[str]) -> list[str]: eff_path_globs_norm = _normalize_globs(eff_path_globs) eff_not_globs_norm = _normalize_globs(eff_not_globs) - # Normalize under - def _norm_under(u: str | None) -> str | None: + def _norm_under_suffix(u: str | None) -> str | None: + """Normalize under to suffix format for MatchText substring matching.""" if not u: return None u = str(u).strip().replace("\\", "/") u = "/".join([p for p in u.split("/") if p]) if not u: return None - if not u.startswith("/"): - v = "/work/" + u - else: - v = "/work/" + u.lstrip("/") if not u.startswith("/work/") else u - return v + return "/" + u - eff_under = _norm_under(eff_under) + eff_under = _norm_under_suffix(eff_under) # Expansion knobs that affect query construction/results (must be part of cache key) try: @@ -1106,7 +1108,7 @@ def _norm_under(u: str | None) -> str | None: if eff_under: must.append( models.FieldCondition( - key="metadata.path_prefix", match=models.MatchValue(value=eff_under) + key="metadata.path_prefix", match=models.MatchText(text=eff_under) ) ) if eff_kind: diff --git a/scripts/mcp_impl/memory.py b/scripts/mcp_impl/memory.py index dd72d77b..c9edae09 100644 --- a/scripts/mcp_impl/memory.py +++ b/scripts/mcp_impl/memory.py @@ -34,6 +34,35 @@ # Environment QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") +# Remote embedding support +try: + from scripts.embedder import RemoteEmbeddingStub + from scripts.ingest.qdrant import embed_batch as _embed_batch_remote + _REMOTE_EMBED_AVAILABLE = True +except ImportError: + RemoteEmbeddingStub = None # type: ignore + _embed_batch_remote = None # type: ignore + _REMOTE_EMBED_AVAILABLE = False + + +def _embed_text(model, text: str, model_name: str) -> list: + """Embed text using either local model or remote service. + + Detects RemoteEmbeddingStub and routes to embed_batch() accordingly. + """ + is_remote_stub = ( + RemoteEmbeddingStub is not None + and isinstance(model, RemoteEmbeddingStub) + ) + + if is_remote_stub and _REMOTE_EMBED_AVAILABLE and _embed_batch_remote is not None: + # Use remote embedding service + vecs = _embed_batch_remote(model, [text]) + return vecs[0] if isinstance(vecs[0], list) else vecs[0].tolist() + else: + # Local embedding + return next(model.embed([text])).tolist() + async def _memory_store_impl( information: str, @@ -116,7 +145,8 @@ def _lex_hash_vector(text: str, dim: int = LEX_VECTOR_DIM) -> list[float]: from scripts.mcp_impl.admin_tools import _get_embedding_model model = _get_embedding_model(model_name) - dense = next(model.embed([str(information)])).tolist() + # Use helper that handles remote vs local embedding + dense = _embed_text(model, str(information), model_name) lex = _lex_hash_vector(str(information)) @@ -254,7 +284,8 @@ def _lex_hash_vector(text: str, dim: int = LEX_VECTOR_DIM) -> list[float]: from scripts.mcp_impl.admin_tools import _get_embedding_model model = _get_embedding_model(model_name) - dense_query = next(model.embed([str(query)])).tolist() + # Use helper that handles remote vs local embedding + dense_query = _embed_text(model, str(query), model_name) lex_query = _lex_hash_vector(str(query)) client = QdrantClient( diff --git a/scripts/mcp_memory_server.py b/scripts/mcp_memory_server.py index 6b01d804..52ff30a9 100644 --- a/scripts/mcp_memory_server.py +++ b/scripts/mcp_memory_server.py @@ -79,6 +79,31 @@ from scripts.utils import sanitize_vector_name as _sanitize_vector_name from scripts.utils import lex_hash_vector_text as _lex_hash_vector_text +# Remote embedding support +try: + from scripts.embedder import RemoteEmbeddingStub + from scripts.ingest.qdrant import embed_batch as _embed_batch_remote + _REMOTE_EMBED_AVAILABLE = True +except ImportError: + RemoteEmbeddingStub = None # type: ignore + _embed_batch_remote = None # type: ignore + _REMOTE_EMBED_AVAILABLE = False + + +def _embed_text(model, text: str) -> list: + """Embed text using either local model or remote service.""" + is_remote_stub = ( + RemoteEmbeddingStub is not None + and isinstance(model, RemoteEmbeddingStub) + ) + + if is_remote_stub and _REMOTE_EMBED_AVAILABLE and _embed_batch_remote is not None: + vecs = _embed_batch_remote(model, [text]) + return vecs[0] if isinstance(vecs[0], list) else vecs[0].tolist() + else: + return next(model.embed([text])).tolist() + + VECTOR_NAME = _sanitize_vector_name(EMBEDDING_MODEL) # I/O-safety knobs for memory server behavior @@ -806,7 +831,7 @@ def memory_store( md["source"] = "memory" model = _get_embedding_model() - dense = next(model.embed([str(information)])).tolist() + dense = _embed_text(model, str(information)) lex = _lex_hash_vector_text(str(information), LEX_VECTOR_DIM) # Use UUID to avoid point ID collisions under concurrent load @@ -959,7 +984,7 @@ def memory_find( use_dense = False if use_dense: model = _get_embedding_model() - dense = next(model.embed([str(query)])).tolist() + dense = _embed_text(model, str(query)) else: dense = None lex = _lex_hash_vector_text(str(query), LEX_VECTOR_DIM) diff --git a/scripts/rerank_tools/local.py b/scripts/rerank_tools/local.py index e20e71cf..df2309b2 100644 --- a/scripts/rerank_tools/local.py +++ b/scripts/rerank_tools/local.py @@ -166,18 +166,14 @@ def _start_background_warmup(): _start_background_warmup() -def _norm_under(u: str | None) -> str | None: +def _norm_under_suffix(u: str | None) -> str | None: if not u: return None u = str(u).strip().replace("\\", "/") u = "/".join([p for p in u.split("/") if p]) if not u: return None - if not u.startswith("/"): - return "/work/" + u - if not u.startswith("/work/"): - return "/work/" + u.lstrip("/") - return u + return "/" + u def _select_dense_vector_name( @@ -369,11 +365,11 @@ def rerank_in_process( key="metadata.language", match=models.MatchValue(value=language) ) ) - eff_under = _norm_under(under) + eff_under = _norm_under_suffix(under) if eff_under: must.append( models.FieldCondition( - key="metadata.path_prefix", match=models.MatchValue(value=eff_under) + key="metadata.path_prefix", match=models.MatchText(text=eff_under) ) ) flt = models.Filter(must=must) if must else None @@ -450,11 +446,11 @@ def main(): key="metadata.language", match=models.MatchValue(value=args.language) ) ) - eff_under = _norm_under(args.under) + eff_under = _norm_under_suffix(args.under) if eff_under: must.append( models.FieldCondition( - key="metadata.path_prefix", match=models.MatchValue(value=eff_under) + key="metadata.path_prefix", match=models.MatchText(text=eff_under) ) ) flt = models.Filter(must=must) if must else None diff --git a/skills/context-engine/SKILL.md b/skills/context-engine/SKILL.md index 4b816061..7c9a800a 100644 --- a/skills/context-engine/SKILL.md +++ b/skills/context-engine/SKILL.md @@ -409,4 +409,9 @@ Common issues: 7. **Index before search** - Always run `qdrant_index_root` on first use or after cloning a repo 8. **Use pattern_search for structural matching** - When looking for code with similar control flow (retry loops, error handling), use `pattern_search` instead of `repo_search` (if enabled) 9. **Describe patterns in natural language** - `pattern_search` understands "retry with backoff" just as well as actual code examples (if enabled) +10. **Fire independent searches in parallel** - Call multiple `repo_search`, `symbol_graph`, etc. in the same message block for 2-3x speedup +11. **Use TOON format for discovery** - Set `output_format: "toon"` for 60-80% token reduction on exploratory queries +12. **Bootstrap sessions with defaults** - Call `set_session_defaults(output_format="toon", compact=true)` early to avoid repeating params +13. **Two-phase search** - Discovery first (`limit=3, compact=true`), then deep dive (`limit=5-8, include_snippet=true`) on targets +14. **Use fallback chains** - If `context_answer` times out, fall back to `repo_search` + `info_request(include_explanation=true)`