Context-Engine-AI · m1rl0k · Dec 8, 2025 · Nov 30, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/.env b/.env
@@ -196,3 +196,4 @@ INFO_REQUEST_LIMIT=10
 INFO_REQUEST_CONTEXT_LINES=5
 # INFO_REQUEST_EXPLAIN_DEFAULT=0
 # INFO_REQUEST_RELATIONSHIPS=0
+COMMIT_VECTOR_SEARCH=0
diff --git a/.env.example b/.env.example
@@ -49,6 +49,7 @@ FASTMCP_HTTP_PORT=8002
 FASTMCP_HTTP_HEALTH_PORT=18002
 FASTMCP_INDEXER_HTTP_PORT=8003
 FASTMCP_INDEXER_HTTP_HEALTH_PORT=18003
+# MCP_INDEXER_URL=http://localhost:8003/mcp
 
 
 # Optional: local cross-encoder reranker (ONNX)
@@ -161,6 +162,7 @@ REFRAG_DECODER_MODE=prompt  # prompt|soft
 
 # GLM_API_BASE=https://api.z.ai/api/coding/paas/v4/
 # GLM_MODEL=glm-4.6
+# GLM_API_KEY=your_glm_api_key_here
 
 # GPU Performance Toggle
 # Set to 1 to use native GPU-accelerated server on localhost:8081
@@ -201,6 +203,19 @@ SMART_SYMBOL_REINDEXING=0
 # INDEX_UPSERT_BACKOFF=0.5
 # Debounce file events to coalesce bursts
 # WATCH_DEBOUNCE_SECS=1.5
+# Optional fs metadata fast-path for unchanged files (skips re-reading files
+# when size/mtime match cache.json in the same workspace).
+# INDEX_FS_FASTPATH=0
+# Optional 2-phase pseudo/tag mode (disabled by default).
+# When enabled, indexer/watcher write base-only vectors and a background
+# backfill worker adds pseudo/tags via Qdrant.
+# PSEUDO_BACKFILL_ENABLED=1
+# PSEUDO_BACKFILL_DEBUG=0
+# PSEUDO_BACKFILL_TICK_SECS=60
+# PSEUDO_BACKFILL_MAX_POINTS=256
+
+# Development Remote Upload Configuration
+# HOST_INDEX_PATH=./dev-workspace
 
 # Remote upload git history (used by upload clients)
 # Max number of commits to include per bundle (0 disables git history)
@@ -209,6 +224,7 @@ SMART_SYMBOL_REINDEXING=0
 # REMOTE_UPLOAD_GIT_SINCE=
 # Enable commit lineage goals for indexing
 REFRAG_COMMIT_DESCRIBE=1
+COMMIT_VECTOR_SEARCH=0
 
 STRICT_MEMORY_RESTORE=0
 

diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml
@@ -23,6 +23,20 @@ spec:
         runAsUser: 1000
         runAsGroup: 1000
         fsGroup: 1000
+      initContainers:
+      - name: wait-for-qdrant
+        image: context-engine-indexer-service
+        imagePullPolicy: IfNotPresent
+        command:
+        - /bin/sh
+        - -c
+        - /app/scripts/wait-for-qdrant.sh
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
       containers:
       - name: watcher
         image: context-engine-indexer-service
@@ -130,6 +144,20 @@ spec:
         runAsUser: 1000
         runAsGroup: 1000
         fsGroup: 1000
+      initContainers:
+      - name: wait-for-qdrant
+        image: context-engine-indexer-service
+        imagePullPolicy: IfNotPresent
+        command:
+        - /bin/sh
+        - -c
+        - /app/scripts/wait-for-qdrant.sh
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
       restartPolicy: OnFailure
       containers:
       - name: indexer
@@ -207,6 +235,20 @@ spec:
         runAsUser: 1000
         runAsGroup: 1000
         fsGroup: 1000
+      initContainers:
+      - name: wait-for-qdrant
+        image: context-engine-indexer-service
+        imagePullPolicy: IfNotPresent
+        command:
+        - /bin/sh
+        - -c
+        - /app/scripts/wait-for-qdrant.sh
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
       restartPolicy: OnFailure
       containers:
       - name: init-payload

diff --git a/deploy/kubernetes/qdrant.yaml b/deploy/kubernetes/qdrant.yaml
@@ -24,7 +24,7 @@ spec:
       containers:
       - name: qdrant
         image: qdrant/qdrant:latest
-        imagePullPolicy: IfNotPresent
+        imagePullPolicy: Always
         ports:
         - name: http
           containerPort: 6333

diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md
@@ -1,5 +1,6 @@
 This file is intended for AI agents (Claude, etc.) using the Context‑Engine Qdrant‑Indexer and Memory MCP tools.
 
+These rules are NOT optional - you may existing tools for file reading, "grep" like searching tools and 'codebase search' tools - unrelated to qdrant-indexer mcp tools - you are to favour the qdrant-indexer tooling at all costs, as per these guidelines
 
 Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep
 
@@ -37,7 +38,9 @@ Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep
 
   MCP Tool Patterns:
 
-  # DO - Use concept/keyword-style queries (short natural-language fragments)
+  # DO - Use concept/keyword-style queries (short natural-language fragments).
+  # repo_search is semantic search, not grep, regex, or boolean syntax.
+  # Write queries as short descriptions, not as "foo OR bar" expressions.
   "input validation mechanisms"
   "database connection handling"
   "performance bottlenecks in request path"

diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py
@@ -1454,7 +1454,7 @@ def dense_query(
 
 # In-process API: run hybrid search and return structured items list
 # Optional: pass an existing TextEmbedding instance via model to reuse cache
-
+# Optional: pass mode to adjust implementation/docs weighting (code_first/balanced/docs_first)
 
 def run_hybrid_search(
     queries: List[str],
@@ -1473,6 +1473,7 @@ def run_hybrid_search(
     expand: bool = True,
     model: TextEmbedding | None = None,
     collection: str | None = None,
+    mode: str | None = None,
     repo: str | list[str] | None = None,  # Filter by repo name(s); "*" to disable auto-filter
 ) -> List[Dict[str, Any]]:
     client = QdrantClient(url=os.environ.get("QDRANT_URL", QDRANT_URL), api_key=API_KEY)
@@ -1594,6 +1595,7 @@ def _norm_under(u: str | None) -> str | None:
                 str(_collection()),
                 _env_truthy(os.environ.get("HYBRID_ADAPTIVE_WEIGHTS"), True),
                 _env_truthy(os.environ.get("HYBRID_MMR"), True),
+                str(mode or ""),
             )
         except Exception:
             cache_key = None
@@ -1690,6 +1692,7 @@ def _norm_under(u: str | None) -> str | None:
             'expand': expand,
             'collection': _collection(),
             'vector_name': vec_name,
+            'mode': mode,
         }
         is_duplicate, similar_fp = is_duplicate_request(request_data)
         if is_duplicate:
@@ -1982,7 +1985,15 @@ def _scaled_rrf(rank: int) -> float:
     flt_gated = _sanitize_filter_obj(flt_gated)
 
     result_sets: List[List[Any]] = [
-        dense_query(client, vec_name, v, flt_gated, _scaled_per_query, collection, query_text=queries[i] if i < len(queries) else None) 
+        dense_query(
+            client,
+            vec_name,
+            v,
+            flt_gated,
+            _scaled_per_query,
+            collection,
+            query_text=queries[i] if i < len(queries) else None,
+        )
         for i, v in enumerate(embedded)
     ]
     if os.environ.get("DEBUG_HYBRID_SEARCH"):
@@ -2239,6 +2250,18 @@ def _scaled_rrf(rank: int) -> float:
 
     # Lexical + boosts
     timestamps: List[int] = []
+    # Mode-aware tweaks for implementation/docs weighting. Modes:
+    # - None / "code_first": full IMPLEMENTATION_BOOST and DOCUMENTATION_PENALTY
+    # - "balanced": keep impl boost, halve doc penalty
+    # - "docs_first": reduce impl boost slightly and disable doc penalty
+    eff_mode = (mode or "").strip().lower()
+    impl_boost = IMPLEMENTATION_BOOST
+    doc_penalty = DOCUMENTATION_PENALTY
+    if eff_mode in {"balanced"}:
+        doc_penalty = DOCUMENTATION_PENALTY * 0.5
+    elif eff_mode in {"docs_first", "docs-first", "docs"}:
+        impl_boost = IMPLEMENTATION_BOOST * 0.5
+        doc_penalty = 0.0
     for pid, rec in list(score_map.items()):
         payload = rec["pt"].payload or {}
         base_md = payload.get("metadata") or {}
@@ -2288,22 +2311,25 @@ def _scaled_rrf(rank: int) -> float:
             if ext in {".json", ".yml", ".yaml", ".toml", ".ini"} or "/.codebase/" in path_lower or "/.kiro/" in path_lower:
                 rec["cfg"] = float(rec.get("cfg", 0.0)) - CONFIG_FILE_PENALTY
                 rec["s"] -= CONFIG_FILE_PENALTY
-        # Boost likely implementation files
-        if IMPLEMENTATION_BOOST > 0.0 and path:
+        # Boost likely implementation files (mode-aware)
+        if impl_boost > 0.0 and path:
             if ext in {".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".java", ".rs", ".rb", ".php", ".cs", ".cpp", ".c", ".hpp", ".h"}:
-                rec["impl"] = float(rec.get("impl", 0.0)) + IMPLEMENTATION_BOOST
-                rec["s"] += IMPLEMENTATION_BOOST
-        # Penalize docs for implementation-style questions
-        qlow = " ".join(qlist).lower()
-        if DOCUMENTATION_PENALTY > 0.0 and path:
-            if ("readme" in path_lower or "/docs/" in path_lower or "/documentation/" in path_lower or path_lower.endswith(".md")):
-                if any(w in qlow for w in ["how does", "explain", "works", "algorithm"]):
-                    rec["doc"] = float(rec.get("doc", 0.0)) - DOCUMENTATION_PENALTY
-                    rec["s"] -= DOCUMENTATION_PENALTY
+                rec["impl"] = float(rec.get("impl", 0.0)) + impl_boost
+                rec["s"] += impl_boost
+        # Penalize docs (README/docs/markdown) relative to implementation files (mode-aware)
+        if doc_penalty > 0.0 and path:
+            if (
+                "readme" in path_lower
+                or "/docs/" in path_lower
+                or "/documentation/" in path_lower
+                or path_lower.endswith(".md")
+            ):
+                rec["doc"] = float(rec.get("doc", 0.0)) - doc_penalty
+                rec["s"] -= doc_penalty
 
         if LANG_MATCH_BOOST > 0.0 and path and eff_language:
             lang = str(eff_language).lower()
-            md_lang = str((md.get("language") or "")).lower()
+            md_lang = str((md.get("language") or "").lower())
             if (lang and md_lang and md_lang == lang) or lang_matches_path(lang, path):
                 rec["langb"] += LANG_MATCH_BOOST
                 rec["s"] += LANG_MATCH_BOOST
@@ -2635,6 +2661,20 @@ def _pass_filters(m: Dict[str, Any]) -> bool:
     except Exception:
         all_paths = set()
 
+    # Build path -> host_path map so we can emit related_paths in host space
+    # when PATH_EMIT_MODE prefers host paths. This keeps human-facing paths
+    # consistent while still preserving container paths for backend use.
+    host_map: Dict[str, str] = {}
+    try:
+        for _m in merged:
+            _md = (_m["pt"].payload or {}).get("metadata") or {}
+            _p = str(_md.get("path") or "").strip()
+            _h = str(_md.get("host_path") or "").strip()
+            if _p and _h:
+                host_map[_p] = _h
+    except Exception:
+        host_map = {}
+
     items: List[Dict[str, Any]] = []
     if not merged:
         if _USE_CACHE and cache_key is not None:
@@ -2762,6 +2802,22 @@ def _resolve(seg: str) -> list[str]:
             pass
 
         _related = sorted(_related_set)[:10]
+        # Align related_paths with PATH_EMIT_MODE when possible: in host/auto
+        # modes, prefer host paths when we have a mapping; in container mode,
+        # keep container/path-space values as-is.
+        _related_out = _related
+        try:
+            _mode_related = str(os.environ.get("PATH_EMIT_MODE", "auto")).strip().lower()
+        except Exception:
+            _mode_related = "auto"
+        if _mode_related in {"host", "auto"}:
+            try:
+                _mapped: List[str] = []
+                for rp in _related:
+                    _mapped.append(host_map.get(rp, rp))
+                _related_out = _mapped
+            except Exception:
+                _related_out = _related
         # Best-effort snippet text directly from payload for downstream LLM stitching
         _payload = (m["pt"].payload or {}) if m.get("pt") is not None else {}
         _metadata = _payload.get("metadata", {}) or {}
@@ -2772,6 +2828,14 @@ def _resolve(seg: str) -> list[str]:
             _metadata.get("text") or
             ""
         )
+        # Carry through pseudo/tags so downstream consumers (e.g., repo_search reranker)
+        # can incorporate index-time GLM/llm labels into their own scoring or display.
+        _pseudo = _payload.get("pseudo")
+        if _pseudo is None:
+            _pseudo = _metadata.get("pseudo")
+        _tags = _payload.get("tags")
+        if _tags is None:
+            _tags = _metadata.get("tags")
         # Skip memory-like points without a real file path
         if not _path or not _path.strip():
             if os.environ.get("DEBUG_HYBRID_FILTER"):
@@ -2831,10 +2895,12 @@ def _resolve(seg: str) -> list[str]:
                 "components": comp,
                 "why": why,
                 "relations": {"imports": _imports, "calls": _calls, "symbol_path": _symp},
-                "related_paths": _related,
+                "related_paths": _related_out,
                 "span_budgeted": bool(m.get("_merged_start") is not None),
                 "budget_tokens_used": m.get("_budget_tokens"),
                 "text": _text,
+                "pseudo": _pseudo,
+                "tags": _tags,
             }
         )
     if _USE_CACHE and cache_key is not None:
@@ -3111,7 +3177,15 @@ def _cli_scaled_rrf(rank: int) -> float:
 
     embedded = _embed_queries_cached(model, queries)
     result_sets: List[List[Any]] = [
-        dense_query(client, vec_name, v, flt, _cli_scaled_per_query, eff_collection, query_text=queries[i] if i < len(queries) else None) 
+        dense_query(
+            client,
+            vec_name,
+            v,
+            flt,
+            _cli_scaled_per_query,
+            eff_collection,
+            query_text=queries[i] if i < len(queries) else None,
+        )
         for i, v in enumerate(embedded)
     ]