Context-Engine-AI · m1rl0k · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/.env.example b/.env.example
@@ -24,6 +24,13 @@ EMBEDDING_PROVIDER=fastembed
 # Optional repo tag attached to each payload
 REPO_NAME=workspace
 
+# Cross-Codebase Isolation (multi-repo search scoping)
+# When enabled, search results are automatically filtered to the current repo
+# Disable (=0) to search all repos by default (legacy behavior)
+REPO_AUTO_FILTER=1
+# Explicitly set current repo (overrides auto-detection from git/directory)
+# CURRENT_REPO=my-project
+
 # MCP servers (SSE)
 FASTMCP_HOST=0.0.0.0
 FASTMCP_PORT=8000            # search/store MCP (mcp-server-qdrant)
@@ -56,6 +63,14 @@ RERANKER_TOPN=50
 RERANKER_RETURN_M=12
 RERANKER_TIMEOUT_MS=2000
 
+# Post-rerank symbol boost: ensures exact symbol matches rank highest even when
+# the neural reranker disagrees. Applied after rerank blending as a direct score addition.
+# Set to 0 to disable, >1.0 to boost symbol matches more aggressively.
+POST_RERANK_SYMBOL_BOOST=1.0
+# Rerank blend weight: ratio of rerank score vs fusion score (0.0-1.0)
+# Higher = more weight on neural reranker, lower = more weight on lexical/symbol boosts
+RERANK_BLEND_WEIGHT=0.6
+
 # Safety: minimum rerank timeout floor (ms) to avoid cold-start timeouts
 RERANK_TIMEOUT_FLOOR_MS=1000
 

diff --git a/docs/MCP_API.md b/docs/MCP_API.md
@@ -160,6 +160,13 @@ Perform hybrid code search combining dense semantic, lexical BM25, and optional
 - `limit` (int, default 10): Maximum total results to return
 - `per_path` (int, default 2): Maximum results per file path
 
+**Cross-Codebase Isolation:**
+- `repo` (str or list[str], optional): Filter results to specific repository(ies)
+  - Single repo: `"pathful-commons-app"` - Search only this repo
+  - Multiple repos: `["frontend", "backend"]` - Search related repos together
+  - All repos: `"*"` - Explicitly search all indexed repos (disable auto-filter)
+  - Default: Auto-detects current repo from `CURRENT_REPO` env when `REPO_AUTO_FILTER=1`
+
 **Content Filters:**
 - `language` (str, optional): Filter by programming language
 - `path_glob` (str or list[str], optional): Glob patterns for path filtering
@@ -185,6 +192,10 @@ Perform hybrid code search combining dense semantic, lexical BM25, and optional
 - `rerank_top_n` (int, default 50): Number of candidates to consider for reranking
 - `rerank_return_m` (int, default 12): Number of results to return after reranking
 
+Reranking uses a blended scoring approach that preserves symbol match boosts:
+- **Blend weight** (`RERANK_BLEND_WEIGHT`, default 0.6): Ratio of neural reranker score to fusion score
+- **Post-rerank symbol boost** (`POST_RERANK_SYMBOL_BOOST`, default 1.0): Applied after blending to ensure exact symbol matches rank highest even when the neural reranker disagrees
+
 **Response Format:**
 ```json
 {
@@ -254,12 +265,30 @@ Perform hybrid code search combining dense semantic, lexical BM25, and optional
 }
 ```
 
+**Cross-Codebase Search (multi-repo):**
+```json
+{
+  "query": "authentication middleware",
+  "repo": ["frontend", "backend"],
+  "limit": 15
+}
+```
+
+**Single Repo Search:**
+```json
+{
+  "query": "user authentication",
+  "repo": "my-repo",
+  "include_snippet": true
+}
+```
+
 ### context_search()
 
 Blend code search results with memory entries for comprehensive context.
 
 **Parameters:**
-All `repo_search` parameters plus:
+All `repo_search` parameters (including `repo` for cross-codebase isolation) plus:
 - `include_memories` (bool, default true): Whether to include memory results
 - `memory_weight` (float, default 1.0): Weight for memory results vs code results
 - `per_source_limits` (dict, optional): Limits per source type:

diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py
@@ -1454,6 +1454,7 @@ def run_hybrid_search(
     expand: bool = True,
     model: TextEmbedding | None = None,
     collection: str | None = None,
+    repo: str | list[str] | None = None,  # Filter by repo name(s); "*" to disable auto-filter
 ) -> List[Dict[str, Any]]:
     client = QdrantClient(url=os.environ.get("QDRANT_URL", QDRANT_URL), api_key=API_KEY)
     model_name = os.environ.get("EMBEDDING_MODEL", MODEL_NAME)
@@ -1470,7 +1471,23 @@ def run_hybrid_search(
     eff_ext = ext or dsl.get("ext")
     eff_not = not_filter or dsl.get("not")
     eff_case = case or dsl.get("case") or os.environ.get("HYBRID_CASE", "insensitive")
-    eff_repo = dsl.get("repo")
+    # Repo filter: explicit param > DSL > auto-detect from env
+    eff_repo = repo or dsl.get("repo")
+    # Normalize repo to list for multi-repo support
+    if eff_repo and isinstance(eff_repo, str):
+        if eff_repo.strip() == "*":
+            eff_repo = None  # "*" means search all repos
+        else:
+            eff_repo = [r.strip() for r in eff_repo.split(",") if r.strip()]
+    elif eff_repo and isinstance(eff_repo, (list, tuple)):
+        eff_repo = [str(r).strip() for r in eff_repo if str(r).strip() and str(r).strip() != "*"]
+        if not eff_repo:
+            eff_repo = None
+    # Auto-detect repo from env if not specified and auto-filter is enabled
+    if eff_repo is None and str(os.environ.get("REPO_AUTO_FILTER", "1")).strip().lower() in {"1", "true", "yes", "on"}:
+        auto_repo = os.environ.get("CURRENT_REPO") or os.environ.get("REPO_NAME")
+        if auto_repo and auto_repo.strip():
+            eff_repo = [auto_repo.strip()]
     eff_path_regex = path_regex
 
     def _to_list(x):
@@ -1595,12 +1612,27 @@ def _norm_under(u: str | None) -> str | None:
                 key="metadata.language", match=models.MatchValue(value=eff_language)
             )
         )
+    # Repo filter: supports single repo or list of repos (for related codebases)
     if eff_repo:
-        must.append(
-            models.FieldCondition(
-                key="metadata.repo", match=models.MatchValue(value=eff_repo)
+        if isinstance(eff_repo, list) and len(eff_repo) == 1:
+            must.append(
+                models.FieldCondition(
+                    key="metadata.repo", match=models.MatchValue(value=eff_repo[0])
+                )
+            )
+        elif isinstance(eff_repo, list) and len(eff_repo) > 1:
+            # Multiple repos: use MatchAny for OR logic
+            must.append(
+                models.FieldCondition(
+                    key="metadata.repo", match=models.MatchAny(any=eff_repo)
+                )
+            )
+        elif isinstance(eff_repo, str):
+            must.append(
+                models.FieldCondition(
+                    key="metadata.repo", match=models.MatchValue(value=eff_repo)
+                )
             )
-        )
     if eff_under:
         must.append(
             models.FieldCondition(