From 598913c24b952bce320a57efebf37b0ad4463fe8 Mon Sep 17 00:00:00 2001
From: john donalson <mirlok@dr.com>
Date: Sun, 2 Nov 2025 17:17:20 -0500
Subject: [PATCH 1/5] push

---
 .env                          |   6 +-
 .env.example                  |   4 +-
 .gitignore                    |   2 +
 README.md                     |  91 ++++++++++-
 docker-compose.yml            |  14 +-
 scripts/add_vector_name.py    |   2 +-
 scripts/collection_health.py  | 286 ++++++++++++++++++++++++++++++++++
 scripts/create_indexes.py     |   6 +-
 scripts/health_check.py       |   2 +-
 scripts/hybrid_search.py      |   2 +-
 scripts/ingest_code.py        |  26 +++-
 scripts/ingest_history.py     |   2 +-
 scripts/mcp_indexer_server.py |  16 +-
 scripts/mcp_memory_server.py  |   2 +-
 scripts/prune.py              |   2 +-
 scripts/query_named_vector.py |   2 +-
 scripts/rerank_local.py       |   2 +-
 scripts/rerank_query.py       |   2 +-
 scripts/semantic_expansion.py |   2 +-
 scripts/smoke_test.py         |   2 +-
 scripts/warm_start.py         |   2 +-
 scripts/watch_index.py        |  16 +-
 scripts/workspace_state.py    |  43 ++---
 23 files changed, 461 insertions(+), 73 deletions(-)
 create mode 100644 scripts/collection_health.py

diff --git a/.env b/.env
index 9a644ace..cf0957a1 100644
--- a/.env
+++ b/.env
@@ -3,8 +3,10 @@
 QDRANT_URL=http://qdrant:6333
 # QDRANT_API_KEY= # not needed for local
 
-# Default collection used by the MCP server (auto-created if missing)
-# COLLECTION_NAME=my-collection  # Use auto-detected default from .codebase/state.json
+# Single unified collection for seamless cross-repo search
+# Default: "codebase" - all your code in one collection for unified search
+# This enables searching across multiple repos/workspaces without fragmentation
+COLLECTION_NAME=codebase
 
 # Embedding settings (FastEmbed model)
 EMBEDDING_MODEL=BAAI/bge-base-en-v1.5
diff --git a/.env.example b/.env.example
index f16118f2..87c7e330 100644
--- a/.env.example
+++ b/.env.example
@@ -1,7 +1,9 @@
 # Qdrant connection
 QDRANT_URL=http://localhost:6333
 QDRANT_API_KEY=
-COLLECTION_NAME=my-collection
+# Single unified collection for seamless cross-repo search (default: "codebase")
+# Leave unset or use "codebase" for unified search across all your code
+COLLECTION_NAME=codebase
 
 # Embeddings
 EMBEDDING_MODEL=BAAI/bge-base-en-v1.5
diff --git a/.gitignore b/.gitignore
index 15613865..06319ff1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,5 @@ tests/.codebase/cache.json
 tests/.codebase/state.json
 /scripts/.codebase
 /tests/.codebase
+.claude/settings.local.json
+.mcp.json
diff --git a/README.md b/README.md
index dd468441..e748df1f 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,13 @@ INDEX_MICRO_CHUNKS=1 MAX_MICRO_CHUNKS_PER_FILE=200 make reset-dev-dual
 ```
 - Default ports: Memory MCP :8000, Indexer MCP :8001, Qdrant :6333, llama.cpp :8080
 
+**🎯 Seamless Setup Note:**
+- The stack uses a **single unified `codebase` collection** by default
+- All your code goes into one collection for seamless cross-repo search
+- No per-workspace fragmentation - search across everything at once
+- Health checks auto-detect and fix cache/collection sync issues
+- Just run `make reset-dev-dual` on any machine and it works™
+
 ### Make targets: SSE, RMCP, and dual-compat
 - Legacy SSE only (default):
   - Ports: 8000 (/sse), 8001 (/sse)
@@ -96,9 +103,10 @@ INDEX_MICRO_CHUNKS=1 MAX_MICRO_CHUNKS_PER_FILE=200 make reset-dev-dual
    GLM_MODEL=glm-4.6             # Optional, defaults to glm-4.6
    ```
 
-5. **Custom collection name**:
+5. **Collection name** (unified by default):
    ```bash
-   COLLECTION_NAME=my-project    # Defaults to auto-detected repo name
+   COLLECTION_NAME=codebase      # Default: single unified collection for all code
+   # Only change this if you need isolated collections per project
    ```
 
 **After changing `.env`:**
@@ -280,7 +288,7 @@ Ports
 
 | Name | Description | Default |
 |------|-------------|---------|
-| COLLECTION_NAME | Qdrant collection name used by both servers | my-collection |
+| COLLECTION_NAME | Qdrant collection name (unified across all repos) | codebase |
 | REPO_NAME | Logical repo tag stored in payload for filtering | auto-detect from git/folder |
 | HOST_INDEX_PATH | Host path mounted at /work in containers | current repo (.) |
 | QDRANT_URL | Qdrant base URL | container: http://qdrant:6333; local: http://localhost:6333 |
@@ -763,6 +771,50 @@ Notes:
 - Named vector remains aligned with the MCP server (fast-bge-base-en-v1.5). If you change EMBEDDING_MODEL, run `make reindex` to recreate the collection.
 - For very large repos, consider running `make index` on a schedule (or pre-commit) to keep Qdrant warm without full reingestion.
 
+### Multi-repo indexing (unified search)
+
+The stack uses a **single unified `codebase` collection** by default, making multi-repo search seamless:
+
+**Index another repo into the same collection:**
+```bash
+# From your qdrant directory
+make index-here HOST_INDEX_PATH=/path/to/other/repo REPO_NAME=other-repo
+
+# Or with full control:
+HOST_INDEX_PATH=/path/to/other/repo \
+COLLECTION_NAME=codebase \
+REPO_NAME=other-repo \
+docker compose run --rm indexer --root /work
+```
+
+**What happens:**
+- Files from the other repo get indexed into the unified `codebase` collection
+- Each file is tagged with `metadata.repo = "other-repo"` for filtering
+- Search across all repos by default, or filter by specific repo
+
+**Search examples:**
+```bash
+# Search across all indexed repos
+make hybrid QUERY="authentication logic"
+
+# Filter by specific repo
+python scripts/hybrid_search.py \
+  --query "authentication logic" \
+  --repo other-repo
+
+# Filter by repo + language
+python scripts/hybrid_search.py \
+  --query "authentication logic" \
+  --repo other-repo \
+  --language python
+```
+
+**Benefits:**
+- One collection = unified search across all your code
+- No fragmentation or collection management overhead
+- Filter by repo when you need isolation
+- All repos share the same vector space for better semantic search
+
 ### Multi-query re-ranker (no new deps)
 
 - Run a fused query with several phrasings and metadata-aware boosts:
@@ -1296,6 +1348,39 @@ Client tips:
 
 ## Troubleshooting
 
+### Collection Health & Cache Sync
+
+The stack includes automatic health checks that detect and fix cache/collection sync issues:
+
+**Check collection health:**
+```bash
+python scripts/collection_health.py --workspace . --collection codebase
+```
+
+**Auto-heal cache issues:**
+```bash
+python scripts/collection_health.py --workspace . --collection codebase --auto-heal
+```
+
+**What it detects:**
+- Empty collection with cached files (cache thinks files are indexed but they're not)
+- Significant mismatch between cached files and actual collection contents
+- Missing metadata in collection points
+
+**When to use:**
+- After manually deleting collections
+- If searches return no results despite indexing
+- After Qdrant crashes or data loss
+- When switching between collection names
+
+**Automatic healing:**
+- Health checks run automatically on watcher and indexer startup
+- Cache is cleared when sync issues are detected
+- Files are reindexed on next run
+
+### General Issues
+
 - If the MCP servers can’t reach Qdrant, confirm both containers are up: `make ps`.
 - If the SSE port collides, change `FASTMCP_PORT` in `.env` and the mapped port in `docker-compose.yml`.
 - If you customize tool descriptions, restart: `make restart`.
+- If searches return no results, check collection health (see above).
diff --git a/docker-compose.yml b/docker-compose.yml
index ccb39f5c..cae91e27 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -24,7 +24,7 @@ services:
       - FASTMCP_HOST=${FASTMCP_HOST}
       - FASTMCP_PORT=${FASTMCP_PORT}
       - QDRANT_URL=${QDRANT_URL}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL}
       - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
       - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION}
@@ -60,7 +60,7 @@ services:
       - USE_GPU_DECODER=${USE_GPU_DECODER:-0}
       - LLAMACPP_TIMEOUT_SEC=${LLAMACPP_TIMEOUT_SEC:-180}
       - CTX_REQUIRE_IDENTIFIER=${CTX_REQUIRE_IDENTIFIER:-0}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL}
     # SSE endpoint for IDE agents at http://localhost:${FASTMCP_INDEXER_PORT:-8001}/sse
     ports:
@@ -84,7 +84,7 @@ services:
       - FASTMCP_PORT=8000
       - FASTMCP_TRANSPORT=${FASTMCP_HTTP_TRANSPORT}
       - QDRANT_URL=${QDRANT_URL}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL}
       - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
       - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION}
@@ -119,7 +119,7 @@ services:
       - USE_GPU_DECODER=${USE_GPU_DECODER:-0}
       - LLAMACPP_TIMEOUT_SEC=${LLAMACPP_TIMEOUT_SEC:-180}
       - CTX_REQUIRE_IDENTIFIER=${CTX_REQUIRE_IDENTIFIER:-0}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL}
     # Streamable HTTP endpoint for IDE agents at http://localhost:${FASTMCP_INDEXER_HTTP_PORT:-8003}/mcp/
     ports:
@@ -186,7 +186,7 @@ services:
       - .env
     environment:
       - QDRANT_URL=${QDRANT_URL}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL}
     working_dir: /work
     volumes:
@@ -205,7 +205,7 @@ services:
       - .env
     environment:
       - QDRANT_URL=${QDRANT_URL}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
       - EMBEDDING_MODEL=${EMBEDDING_MODEL}
       - WATCH_ROOT=/work
       # Watcher-specific backpressure & timeouts (safer defaults)
@@ -231,7 +231,7 @@ services:
       - .env
     environment:
       - QDRANT_URL=${QDRANT_URL}
-      - COLLECTION_NAME=${COLLECTION_NAME:-my-collection}
+      - COLLECTION_NAME=${COLLECTION_NAME:-codebase}
     working_dir: /work
     volumes:
       - ${HOST_INDEX_PATH:-.}:/work:ro
diff --git a/scripts/add_vector_name.py b/scripts/add_vector_name.py
index c5769e96..81b79c2c 100644
--- a/scripts/add_vector_name.py
+++ b/scripts/add_vector_name.py
@@ -3,7 +3,7 @@
 from qdrant_client import QdrantClient, models
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 
 cli = QdrantClient(url=QDRANT_URL)
 
diff --git a/scripts/collection_health.py b/scripts/collection_health.py
new file mode 100644
index 00000000..55d53b3b
--- /dev/null
+++ b/scripts/collection_health.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+"""
+Collection health monitoring and self-healing for cache/collection sync issues.
+
+Detects when the local cache is out of sync with the actual Qdrant collection
+and triggers corrective actions (cache clear + reindex).
+"""
+import os
+import sys
+from pathlib import Path
+from typing import Optional, Dict, Any
+import logging
+
+# Ensure project root is on sys.path
+ROOT_DIR = Path(__file__).resolve().parent.parent
+if str(ROOT_DIR) not in sys.path:
+    sys.path.insert(0, str(ROOT_DIR))
+
+from scripts.workspace_state import (
+    _read_cache,
+    _write_cache,
+    get_workspace_state,
+    update_workspace_state,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_cached_files_count(workspace_path: str) -> int:
+    """Return the number of files tracked in the local cache."""
+    try:
+        cache = _read_cache(workspace_path)
+        file_hashes = cache.get("file_hashes", {})
+        return len(file_hashes)
+    except Exception as e:
+        logger.warning(f"Failed to read cache: {e}")
+        return 0
+
+
+def get_collection_points_count(collection_name: str, qdrant_url: Optional[str] = None) -> int:
+    """Return the number of points in the Qdrant collection."""
+    try:
+        from qdrant_client import QdrantClient
+        
+        url = qdrant_url or os.environ.get("QDRANT_URL", "http://localhost:6333")
+        api_key = os.environ.get("QDRANT_API_KEY")
+        
+        client = QdrantClient(
+            url=url,
+            api_key=api_key or None,
+            timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20),
+        )
+        
+        result = client.count(collection_name=collection_name, exact=True)
+        return int(getattr(result, "count", 0))
+    except Exception as e:
+        logger.warning(f"Failed to get collection count: {e}")
+        return -1
+
+
+def get_unique_files_in_collection(collection_name: str, qdrant_url: Optional[str] = None) -> int:
+    """Return the number of unique files (distinct paths) in the collection."""
+    try:
+        from qdrant_client import QdrantClient
+        from qdrant_client import models
+        
+        url = qdrant_url or os.environ.get("QDRANT_URL", "http://localhost:6333")
+        api_key = os.environ.get("QDRANT_API_KEY")
+        
+        client = QdrantClient(
+            url=url,
+            api_key=api_key or None,
+            timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20),
+        )
+        
+        # Scroll through all points and collect unique paths
+        unique_paths = set()
+        offset = None
+        batch_size = 100
+        
+        while True:
+            points, offset = client.scroll(
+                collection_name=collection_name,
+                limit=batch_size,
+                offset=offset,
+                with_payload=True,
+            )
+            
+            if not points:
+                break
+                
+            for point in points:
+                try:
+                    payload = point.payload or {}
+                    metadata = payload.get("metadata", {})
+                    path = metadata.get("path")
+                    if path:
+                        unique_paths.add(str(path))
+                except Exception:
+                    continue
+            
+            if offset is None:
+                break
+        
+        return len(unique_paths)
+    except Exception as e:
+        logger.warning(f"Failed to count unique files: {e}")
+        return -1
+
+
+def clear_cache(workspace_path: str) -> bool:
+    """Clear the local file hash cache."""
+    try:
+        cache = {"file_hashes": {}, "updated_at": ""}
+        _write_cache(workspace_path, cache)
+        logger.info(f"Cleared cache for workspace: {workspace_path}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to clear cache: {e}")
+        return False
+
+
+def detect_collection_health(
+    workspace_path: str,
+    collection_name: str,
+    qdrant_url: Optional[str] = None,
+    threshold: float = 0.1,
+) -> Dict[str, Any]:
+    """
+    Detect cache/collection sync issues.
+    
+    Returns a dict with:
+    - healthy: bool
+    - cached_files: int
+    - collection_points: int
+    - unique_files_in_collection: int
+    - issue: Optional[str] - description of the problem
+    - recommendation: Optional[str] - suggested fix
+    """
+    cached_count = get_cached_files_count(workspace_path)
+    points_count = get_collection_points_count(collection_name, qdrant_url)
+    unique_files = get_unique_files_in_collection(collection_name, qdrant_url)
+    
+    result = {
+        "healthy": True,
+        "cached_files": cached_count,
+        "collection_points": points_count,
+        "unique_files_in_collection": unique_files,
+        "issue": None,
+        "recommendation": None,
+    }
+    
+    # Check 1: Collection is empty but cache has entries
+    if points_count == 0 and cached_count > 0:
+        result["healthy"] = False
+        result["issue"] = f"Collection is empty but cache has {cached_count} files"
+        result["recommendation"] = "Clear cache and force reindex"
+        return result
+    
+    # Check 2: Unique files in collection is way less than cached files
+    if unique_files >= 0 and cached_count > 0:
+        ratio = unique_files / cached_count if cached_count > 0 else 0
+        if ratio < threshold:
+            result["healthy"] = False
+            result["issue"] = (
+                f"Cache has {cached_count} files but collection only has {unique_files} "
+                f"unique files ({ratio:.1%} < {threshold:.0%} threshold)"
+            )
+            result["recommendation"] = "Clear cache and force reindex"
+            return result
+    
+    # Check 3: Collection has points but no unique files detected (metadata issue)
+    if points_count > 0 and unique_files == 0:
+        result["healthy"] = False
+        result["issue"] = f"Collection has {points_count} points but no valid file paths in metadata"
+        result["recommendation"] = "Recreate collection with proper metadata"
+        return result
+    
+    return result
+
+
+def auto_heal_if_needed(
+    workspace_path: str,
+    collection_name: str,
+    qdrant_url: Optional[str] = None,
+    dry_run: bool = False,
+) -> Dict[str, Any]:
+    """
+    Detect and automatically fix cache/collection sync issues.
+    
+    Returns a dict with:
+    - action_taken: str
+    - health_check: Dict (from detect_collection_health)
+    """
+    health = detect_collection_health(workspace_path, collection_name, qdrant_url)
+    
+    result = {
+        "action_taken": "none",
+        "health_check": health,
+    }
+    
+    if not health["healthy"]:
+        logger.warning(f"Collection health issue detected: {health['issue']}")
+        logger.info(f"Recommendation: {health['recommendation']}")
+        
+        if not dry_run:
+            if "Clear cache" in health["recommendation"]:
+                if clear_cache(workspace_path):
+                    result["action_taken"] = "cleared_cache"
+                    logger.info("Cache cleared. Reindex required.")
+                else:
+                    result["action_taken"] = "clear_cache_failed"
+        else:
+            result["action_taken"] = "dry_run"
+            logger.info("Dry run mode - no action taken")
+    else:
+        logger.info("Collection health check passed")
+    
+    return result
+
+
+def main():
+    """CLI for health checking and healing."""
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="Check and heal collection health")
+    parser.add_argument(
+        "--workspace",
+        default=os.environ.get("WATCH_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work",
+        help="Workspace path (default: WATCH_ROOT or /work)",
+    )
+    parser.add_argument(
+        "--collection",
+        default=os.environ.get("COLLECTION_NAME", "codebase"),
+        help="Collection name (default: COLLECTION_NAME env or codebase)",
+    )
+    parser.add_argument(
+        "--qdrant-url",
+        default=os.environ.get("QDRANT_URL", "http://localhost:6333"),
+        help="Qdrant URL (default: QDRANT_URL env or http://localhost:6333)",
+    )
+    parser.add_argument(
+        "--auto-heal",
+        action="store_true",
+        help="Automatically fix issues (clear cache if needed)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Check health but don't take action",
+    )
+    
+    args = parser.parse_args()
+    
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(message)s",
+    )
+    
+    if args.auto_heal:
+        result = auto_heal_if_needed(
+            args.workspace,
+            args.collection,
+            args.qdrant_url,
+            dry_run=args.dry_run,
+        )
+        print(f"\nAction taken: {result['action_taken']}")
+    else:
+        health = detect_collection_health(
+            args.workspace,
+            args.collection,
+            args.qdrant_url,
+        )
+        print(f"\nHealth check results:")
+        print(f"  Healthy: {health['healthy']}")
+        print(f"  Cached files: {health['cached_files']}")
+        print(f"  Collection points: {health['collection_points']}")
+        print(f"  Unique files in collection: {health['unique_files_in_collection']}")
+        if not health['healthy']:
+            print(f"  Issue: {health['issue']}")
+            print(f"  Recommendation: {health['recommendation']}")
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/create_indexes.py b/scripts/create_indexes.py
index 74793a90..c0f3ff62 100644
--- a/scripts/create_indexes.py
+++ b/scripts/create_indexes.py
@@ -11,12 +11,12 @@
     update_last_activity = None  # type: ignore
     get_collection_name = None  # type: ignore
 
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 # Discover workspace path for state updates (allows subdir indexing)
 WS_PATH = os.environ.get("INDEX_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work"
 
-# Prefer per-workspace unique collection if none provided
-if (COLLECTION == "my-collection") and ('get_collection_name' in globals()) and get_collection_name:
+# Use workspace state to get collection name (defaults to "codebase")
+if 'get_collection_name' in globals() and get_collection_name:
     try:
         COLLECTION = get_collection_name(WS_PATH)
     except Exception:
diff --git a/scripts/health_check.py b/scripts/health_check.py
index b856a1d0..3a0137e7 100644
--- a/scripts/health_check.py
+++ b/scripts/health_check.py
@@ -27,7 +27,7 @@ def assert_true(cond: bool, msg: str):
 def main():
     qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
     api_key = os.environ.get("QDRANT_API_KEY")
-    collection = os.environ.get("COLLECTION_NAME", "my-collection")
+    collection = os.environ.get("COLLECTION_NAME", "codebase")
     model_name = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 
     print(f"Health check -> {qdrant_url} collection={collection} model={model_name}")
diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py
index cc6ed743..6dd1b25d 100644
--- a/scripts/hybrid_search.py
+++ b/scripts/hybrid_search.py
@@ -42,7 +42,7 @@
 
 
 def _collection() -> str:
-    return os.environ.get("COLLECTION_NAME", "my-collection")
+    return os.environ.get("COLLECTION_NAME", "codebase")
 
 
 MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
diff --git a/scripts/ingest_code.py b/scripts/ingest_code.py
index a8b47153..f3289c49 100644
--- a/scripts/ingest_code.py
+++ b/scripts/ingest_code.py
@@ -1911,14 +1911,12 @@ def index_repo(
         if vector_name is None:
             vector_name = _sanitize_vector_name(model_name)
 
-    # Workspace state: ensure unique per-workspace collection and announce start
+    # Workspace state: use single unified collection for seamless cross-repo search
     try:
         ws_path = str(root)
-        # If collection is unset or default placeholder, generate a per-workspace one
+        # Always use the unified collection (default: "codebase")
         if 'get_collection_name' in globals() and get_collection_name:
-            default_marker = os.environ.get("COLLECTION_NAME", "my-collection")
-            if (not collection) or (collection == "my-collection") or (default_marker == "my-collection"):
-                collection = get_collection_name(ws_path)
+            collection = get_collection_name(ws_path)
         if update_workspace_state:
             update_workspace_state(ws_path, {"qdrant_collection": collection})
         if update_indexing_status:
@@ -1937,6 +1935,22 @@ def index_repo(
     print(
         f"Indexing root={root} -> {qdrant_url} collection={collection} model={model_name} recreate={recreate}"
     )
+
+    # Health check: detect cache/collection sync issues before indexing
+    if not recreate and skip_unchanged:
+        try:
+            from scripts.collection_health import auto_heal_if_needed
+            print("[health_check] Checking collection health...")
+            heal_result = auto_heal_if_needed(str(root), collection, qdrant_url, dry_run=False)
+            if heal_result["action_taken"] == "cleared_cache":
+                print("[health_check] Cache cleared due to sync issue - forcing full reindex")
+            elif not heal_result["health_check"]["healthy"]:
+                print(f"[health_check] Issue detected: {heal_result['health_check']['issue']}")
+            else:
+                print("[health_check] Collection health OK")
+        except Exception as e:
+            print(f"[health_check] Warning: health check failed: {e}")
+
     if recreate:
         recreate_collection(client, collection, dim, vector_name)
     else:
@@ -2387,7 +2401,7 @@ def main():
 
     qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
     api_key = os.environ.get("QDRANT_API_KEY")
-    collection = os.environ.get("COLLECTION_NAME", "my-collection")
+    collection = os.environ.get("COLLECTION_NAME", "codebase")
     model_name = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 
     index_repo(
diff --git a/scripts/ingest_history.py b/scripts/ingest_history.py
index 4c6f83c2..99645386 100644
--- a/scripts/ingest_history.py
+++ b/scripts/ingest_history.py
@@ -11,7 +11,7 @@
 from qdrant_client import QdrantClient, models
 from fastembed import TextEmbedding
 
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
 API_KEY = os.environ.get("QDRANT_API_KEY")
diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py
index 30739686..54eb9692 100644
--- a/scripts/mcp_indexer_server.py
+++ b/scripts/mcp_indexer_server.py
@@ -13,7 +13,7 @@
 - FASTMCP_HOST (default: 0.0.0.0)
 - FASTMCP_INDEXER_PORT (default: 8001)
 - QDRANT_URL (e.g., http://qdrant:6333) — server expects Qdrant reachable via this env
-- COLLECTION_NAME (default: my-collection)
+- COLLECTION_NAME (default: codebase) — unified collection for seamless cross-repo search
 
 Conventions:
 - Repo content must be mounted at /work inside containers
@@ -225,7 +225,7 @@ def _score(token: str) -> int:
 
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MAX_LOG_TAIL = safe_int(
     os.environ.get("MCP_MAX_LOG_TAIL", "4000"),
     default=4000,
@@ -827,13 +827,13 @@ async def qdrant_index_root(
     except Exception:
         pass
 
-    # Resolve collection: prefer explicit non-placeholder; otherwise workspace state
+    # Resolve collection: prefer explicit value; otherwise use workspace state
     try:
         _c = (collection or "").strip()
     except Exception:
         _c = ""
-    _placeholders = {"", "my-collection"}
-    if _c and _c not in _placeholders:
+    # Empty string means use workspace state default (codebase)
+    if _c:
         coll = _c
     else:
         try:
@@ -1241,13 +1241,13 @@ async def qdrant_index(
     if not (real_root == "/work" or real_root.startswith("/work/")):
         return {"ok": False, "error": "subdir escapes /work sandbox"}
     root = real_root
-    # Resolve collection: prefer explicit non-placeholder; otherwise workspace state (use workspace root)
+    # Resolve collection: prefer explicit value; otherwise use workspace state (use workspace root)
     try:
         _c2 = (collection or "").strip()
     except Exception:
         _c2 = ""
-    _placeholders2 = {"", "my-collection"}
-    if _c2 and _c2 not in _placeholders2:
+    # Empty string means use workspace state default (codebase)
+    if _c2:
         coll = _c2
     else:
         try:
diff --git a/scripts/mcp_memory_server.py b/scripts/mcp_memory_server.py
index 03f05459..5a782d82 100644
--- a/scripts/mcp_memory_server.py
+++ b/scripts/mcp_memory_server.py
@@ -9,7 +9,7 @@
 
 # Env
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 LEX_VECTOR_NAME = os.environ.get("LEX_VECTOR_NAME", "lex")
 LEX_VECTOR_DIM = int(os.environ.get("LEX_VECTOR_DIM", "4096") or 4096)
 EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
diff --git a/scripts/prune.py b/scripts/prune.py
index 50405dfb..05db0cab 100755
--- a/scripts/prune.py
+++ b/scripts/prune.py
@@ -6,7 +6,7 @@
 
 from qdrant_client import QdrantClient, models
 
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
 API_KEY = os.environ.get("QDRANT_API_KEY")
 ROOT = Path(os.environ.get("PRUNE_ROOT", ".")).resolve()
diff --git a/scripts/query_named_vector.py b/scripts/query_named_vector.py
index 308f67f2..0c5dd18b 100644
--- a/scripts/query_named_vector.py
+++ b/scripts/query_named_vector.py
@@ -4,7 +4,7 @@
 from qdrant_client import QdrantClient
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 VEC_NAME = "fast-bge-base-en-v1.5"
 
diff --git a/scripts/rerank_local.py b/scripts/rerank_local.py
index de5a8633..56fe7453 100644
--- a/scripts/rerank_local.py
+++ b/scripts/rerank_local.py
@@ -14,7 +14,7 @@
     ort = None
     Tokenizer = None
 
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333")
 API_KEY = os.environ.get("QDRANT_API_KEY")
diff --git a/scripts/rerank_query.py b/scripts/rerank_query.py
index 7f226d63..3efb4b36 100644
--- a/scripts/rerank_query.py
+++ b/scripts/rerank_query.py
@@ -11,7 +11,7 @@
 
 # Env configuration
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 
 # Quick-win boosts
diff --git a/scripts/semantic_expansion.py b/scripts/semantic_expansion.py
index 83625289..a8e94fa0 100644
--- a/scripts/semantic_expansion.py
+++ b/scripts/semantic_expansion.py
@@ -229,7 +229,7 @@ def expand_queries_semantically(
             model = TextEmbedding(model_name=model_name)
         
         if collection is None:
-            collection = os.environ.get("COLLECTION_NAME", "my-collection")
+            collection = os.environ.get("COLLECTION_NAME", "codebase")
         
         # If we don't have the required components, fall back to lexical expansion
         if not (client and model):
diff --git a/scripts/smoke_test.py b/scripts/smoke_test.py
index bbdfffc8..02174687 100644
--- a/scripts/smoke_test.py
+++ b/scripts/smoke_test.py
@@ -4,7 +4,7 @@
 from qdrant_client import QdrantClient
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 
 # Derive the named vector consistently with ingest_code
diff --git a/scripts/warm_start.py b/scripts/warm_start.py
index bcd76c6d..70be8b37 100644
--- a/scripts/warm_start.py
+++ b/scripts/warm_start.py
@@ -38,7 +38,7 @@ def main():
     args = parser.parse_args()
 
     QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-    COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+    COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
     MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 
     print(
diff --git a/scripts/watch_index.py b/scripts/watch_index.py
index 6052441d..ab503f61 100644
--- a/scripts/watch_index.py
+++ b/scripts/watch_index.py
@@ -34,7 +34,7 @@
 import scripts.ingest_code as idx
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 ROOT = Path(os.environ.get("WATCH_ROOT", "/work")).resolve()
 
@@ -474,6 +474,20 @@ def main():
         f"Watch mode: root={ROOT} qdrant={QDRANT_URL} collection={COLLECTION} model={MODEL}"
     )
 
+    # Health check: detect and auto-heal cache/collection sync issues
+    try:
+        from scripts.collection_health import auto_heal_if_needed
+        print("[health_check] Checking collection health...")
+        heal_result = auto_heal_if_needed(str(ROOT), COLLECTION, QDRANT_URL, dry_run=False)
+        if heal_result["action_taken"] == "cleared_cache":
+            print("[health_check] Cache cleared due to sync issue - files will be reindexed")
+        elif not heal_result["health_check"]["healthy"]:
+            print(f"[health_check] Issue detected: {heal_result['health_check']['issue']}")
+        else:
+            print("[health_check] Collection health OK")
+    except Exception as e:
+        print(f"[health_check] Warning: health check failed: {e}")
+
     client = QdrantClient(
         url=QDRANT_URL, timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20)
     )
diff --git a/scripts/workspace_state.py b/scripts/workspace_state.py
index f1a10418..dfa6b4fb 100644
--- a/scripts/workspace_state.py
+++ b/scripts/workspace_state.py
@@ -217,11 +217,9 @@ def get_workspace_state(workspace_path: str) -> WorkspaceState:
 
             # Create new state
             now = datetime.now().isoformat()
-            env_coll = os.environ.get("COLLECTION_NAME")
-            if isinstance(env_coll, str) and env_coll.strip() and env_coll.strip() != "my-collection":
-                collection_name = env_coll.strip()
-            else:
-                collection_name = _generate_collection_name(workspace_path)
+            env_coll = os.environ.get("COLLECTION_NAME", "").strip()
+            # Use env var if set, otherwise default to "codebase"
+            collection_name = env_coll if env_coll else "codebase"
 
             state: WorkspaceState = {
                 "workspace_path": str(Path(workspace_path).resolve()),
@@ -284,33 +282,18 @@ def update_qdrant_stats(workspace_path: str, stats: QdrantStats) -> WorkspaceSta
 
 def get_collection_name(workspace_path: str) -> str:
     """Get the Qdrant collection name for a workspace.
-    If none is present in state, persist either COLLECTION_NAME from env or a generated
-    repoName-<shortHash> based on the workspace path, and return it.
 
-    Fix: treat placeholders as not-real so we don't collide across repos.
-    Placeholders include: empty string, "my-collection", and the env default if it equals "my-collection".
-    Only short-circuit when the stored name is already real.
+    Seamless single-collection mode:
+    - Defaults to "codebase" for unified cross-repo search
+    - All your code goes into one collection
+    - Override via COLLECTION_NAME env var if you need isolation
     """
-    state = get_workspace_state(workspace_path)
-    coll = state.get("qdrant_collection") if isinstance(state, dict) else None
-    env_coll = os.environ.get("COLLECTION_NAME")
-    env_coll = env_coll.strip() if isinstance(env_coll, str) else ""
-    placeholders = {"", "my-collection"}
-    # If env is explicitly the default placeholder, consider it a placeholder too
-    if env_coll == "my-collection":
-        placeholders.add(env_coll)
-
-    # If state has a real (non-placeholder) collection, keep it
-    if isinstance(coll, str):
-        c = coll.strip()
-        if c and c not in placeholders:
-            return c
-
-    # Otherwise, prefer a non-placeholder explicit env override; else generate
-    if env_coll and env_coll not in placeholders:
-        coll = env_coll.strip()
-    else:
-        coll = _generate_collection_name(workspace_path)
+    env_coll = os.environ.get("COLLECTION_NAME", "").strip()
+
+    # Use env var if set, otherwise default to unified "codebase" collection
+    coll = env_coll if env_coll else "codebase"
+
+    # Persist to state for consistency
     update_workspace_state(workspace_path, {"qdrant_collection": coll})
     return coll
 

From 0e7f93daa8734f169a101d82153ebac1b972342a Mon Sep 17 00:00:00 2001
From: john donalson <mirlok@dr.com>
Date: Sun, 2 Nov 2025 21:57:02 -0500
Subject: [PATCH 2/5] add kubernetes support

---
 Dockerfile                              |  38 ++
 deploy/kubernetes/Makefile              | 199 ++++++++++
 deploy/kubernetes/README.md             | 472 ++++++++++++++++++++++++
 deploy/kubernetes/cleanup.sh            | 163 ++++++++
 deploy/kubernetes/configmap.yaml        |  75 ++++
 deploy/kubernetes/deploy.sh             | 250 +++++++++++++
 deploy/kubernetes/indexer-services.yaml | 197 ++++++++++
 deploy/kubernetes/ingress.yaml          |  86 +++++
 deploy/kubernetes/kustomization.yaml    |  87 +++++
 deploy/kubernetes/llamacpp.yaml         | 171 +++++++++
 deploy/kubernetes/mcp-http.yaml         | 323 ++++++++++++++++
 deploy/kubernetes/mcp-indexer.yaml      | 182 +++++++++
 deploy/kubernetes/mcp-memory.yaml       | 148 ++++++++
 deploy/kubernetes/namespace.yaml        |   7 +
 deploy/kubernetes/qdrant.yaml           | 126 +++++++
 docs/MULTI_REPO_COLLECTIONS.md          | 397 ++++++++++++++++++++
 16 files changed, 2921 insertions(+)
 create mode 100644 Dockerfile
 create mode 100644 deploy/kubernetes/Makefile
 create mode 100644 deploy/kubernetes/README.md
 create mode 100755 deploy/kubernetes/cleanup.sh
 create mode 100644 deploy/kubernetes/configmap.yaml
 create mode 100755 deploy/kubernetes/deploy.sh
 create mode 100644 deploy/kubernetes/indexer-services.yaml
 create mode 100644 deploy/kubernetes/ingress.yaml
 create mode 100644 deploy/kubernetes/kustomization.yaml
 create mode 100644 deploy/kubernetes/llamacpp.yaml
 create mode 100644 deploy/kubernetes/mcp-http.yaml
 create mode 100644 deploy/kubernetes/mcp-indexer.yaml
 create mode 100644 deploy/kubernetes/mcp-memory.yaml
 create mode 100644 deploy/kubernetes/namespace.yaml
 create mode 100644 deploy/kubernetes/qdrant.yaml
 create mode 100644 docs/MULTI_REPO_COLLECTIONS.md

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..a3b544b8
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,38 @@
+# Unified Context-Engine image for Kubernetes deployment
+# Supports multiple roles: memory, indexer, watcher, llamacpp
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    WORK_ROOTS="/work,/app"
+
+# Install OS dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    ca-certificates \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Python dependencies for all services
+RUN pip install --no-cache-dir --upgrade \
+    qdrant-client \
+    fastembed \
+    watchdog \
+    onnxruntime \
+    tokenizers \
+    tree_sitter \
+    tree_sitter_languages \
+    mcp \
+    fastmcp
+
+# Copy scripts for all services
+COPY scripts /app/scripts
+
+# Create directories
+WORKDIR /work
+
+# Expose all necessary ports
+EXPOSE 8000 8001 8002 8003 18000 18001 18002 18003
+
+# Default to memory server
+CMD ["python", "/app/scripts/mcp_memory_server.py"]
\ No newline at end of file
diff --git a/deploy/kubernetes/Makefile b/deploy/kubernetes/Makefile
new file mode 100644
index 00000000..8307bbbe
--- /dev/null
+++ b/deploy/kubernetes/Makefile
@@ -0,0 +1,199 @@
+# Context-Engine Kubernetes Deployment Makefile
+
+# Configuration
+NAMESPACE ?= context-engine
+IMAGE_REGISTRY ?= context-engine
+IMAGE_TAG ?= latest
+
+# Default target
+.PHONY: help
+help: ## Show this help message
+	@echo "Context-Engine Kubernetes Deployment Commands"
+	@echo ""
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
+
+# Prerequisites
+.PHONY: check-kubectl
+check-kubectl: ## Check if kubectl is available and cluster is accessible
+	@which kubectl > /dev/null || (echo "kubectl not found. Please install kubectl." && exit 1)
+	@kubectl cluster-info > /dev/null || (echo "Cannot connect to Kubernetes cluster." && exit 1)
+	@echo "✓ Kubernetes connection verified"
+
+# Deployment targets
+.PHONY: deploy
+deploy: check-kubectl ## Deploy all Context-Engine services
+	./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG)
+
+.PHONY: deploy-core
+deploy-core: check-kubectl ## Deploy only core services (Qdrant + MCP servers)
+	@echo "Deploying core services..."
+	kubectl apply -f namespace.yaml
+	kubectl apply -f configmap.yaml
+	kubectl apply -f qdrant.yaml
+	kubectl apply -f mcp-memory.yaml
+	kubectl apply -f mcp-indexer.yaml
+
+.PHONY: deploy-full
+deploy-full: check-kubectl ## Deploy all services including optional ones
+	./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) --deploy-ingress
+
+.PHONY: deploy-minimal
+deploy-minimal: check-kubectl ## Deploy minimal setup (skip Llama.cpp and Ingress)
+	./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) --skip-llamacpp
+
+# Kustomize targets
+.PHONY: kustomize-build
+kustomize-build: ## Build manifests with Kustomize
+	kustomize build .
+
+.PHONY: kustomize-apply
+kustomize-apply: check-kubectl ## Apply manifests with Kustomize
+	kustomize build . | kubectl apply -f -
+
+.PHONY: kustomize-delete
+kustomize-delete: check-kubectl ## Delete manifests with Kustomize
+	kustomize build . | kubectl delete -f -
+
+# Management targets
+.PHONY: status
+status: check-kubectl ## Show deployment status
+	@echo "=== Namespace Status ==="
+	kubectl get namespace $(NAMESPACE) || echo "Namespace $(NAMESPACE) not found"
+	@echo ""
+	@echo "=== Pods ==="
+	kubectl get pods -n $(NAMESPACE) -o wide || echo "No pods found"
+	@echo ""
+	@echo "=== Services ==="
+	kubectl get services -n $(NAMESPACE) || echo "No services found"
+	@echo ""
+	@echo "=== Deployments ==="
+	kubectl get deployments -n $(NAMESPACE) || echo "No deployments found"
+	@echo ""
+	@echo "=== StatefulSets ==="
+	kubectl get statefulsets -n $(NAMESPACE) || echo "No statefulsets found"
+	@echo ""
+	@echo "=== PersistentVolumeClaims ==="
+	kubectl get pvc -n $(NAMESPACE) || echo "No PVCs found"
+	@echo ""
+	@echo "=== Jobs ==="
+	kubectl get jobs -n $(NAMESPACE) || echo "No jobs found"
+
+.PHONY: logs
+logs: check-kubectl ## Show logs for all services
+	@echo "=== Qdrant Logs ==="
+	kubectl logs -f statefulset/qdrant -n $(NAMESPACE) --tail=50 || echo "Qdrant logs not available"
+
+.PHONY: logs-service
+logs-service: check-kubectl ## Show logs for specific service (usage: make logs-service SERVICE=mcp-memory)
+	@if [ -z "$(SERVICE)" ]; then echo "Usage: make logs-service SERVICE=<service-name>"; exit 1; fi
+	kubectl logs -f deployment/$(SERVICE) -n $(NAMESPACE) --tail=100 || kubectl logs -f statefulset/$(SERVICE) -n $(NAMESPACE) --tail=100 || kubectl logs -f job/$(SERVICE) -n $(NAMESPACE) --tail=100 || echo "Service $(SERVICE) not found"
+
+.PHONY: shell
+shell: check-kubectl ## Get a shell in a running pod (usage: make shell POD=mcp-memory-xxx)
+	@if [ -z "$(POD)" ]; then echo "Usage: make shell POD=<pod-name>"; echo "Available pods:"; kubectl get pods -n $(NAMESPACE); exit 1; fi
+	kubectl exec -it $(POD) -n $(NAMESPACE) -- /bin/bash || kubectl exec -it $(POD) -n $(NAMESPACE) -- /bin/sh
+
+# Cleanup targets
+.PHONY: cleanup
+cleanup: check-kubectl ## Remove all Context-Engine resources
+	./cleanup.sh --namespace $(NAMESPACE)
+
+.PHONY: clean-force
+clean-force: check-kubectl ## Force cleanup without confirmation
+	./cleanup.sh --namespace $(NAMESPACE) --force
+
+# Development targets
+.PHONY: restart
+restart: check-kubectl ## Restart all deployments
+	kubectl rollout restart deployment -n $(NAMESPACE)
+	kubectl rollout restart statefulset -n $(NAMESPACE)
+
+.PHONY: restart-service
+restart-service: check-kubectl ## Restart specific service (usage: make restart-service SERVICE=mcp-memory)
+	@if [ -z "$(SERVICE)" ]; then echo "Usage: make restart-service SERVICE=<service-name>"; exit 1; fi
+	kubectl rollout restart deployment/$(SERVICE) -n $(NAMESPACE) || kubectl rollout restart statefulset/$(SERVICE) -n $(NAMESPACE)
+
+.PHONY: scale
+scale: check-kubectl ## Scale a deployment (usage: make scale SERVICE=mcp-memory REPLICAS=3)
+	@if [ -z "$(SERVICE)" ] || [ -z "$(REPLICAS)" ]; then echo "Usage: make scale SERVICE=<service-name> REPLICAS=<number>"; exit 1; fi
+	kubectl scale deployment $(SERVICE) -n $(NAMESPACE) --replicas=$(REPLICAS)
+
+# Port forwarding targets
+.PHONY: port-forward
+port-forward: check-kubectl ## Port forward all services
+	@echo "Opening port forwards in background..."
+	@kubectl port-forward -n $(NAMESPACE) service/qdrant 6333:6333 &
+	@kubectl port-forward -n $(NAMESPACE) service/mcp-memory 8000:8000 &
+	@kubectl port-forward -n $(NAMESPACE) service/mcp-indexer 8001:8001 &
+	@echo "Port forwards started. Use 'make stop-port-forward' to stop."
+
+.PHONY: port-forward-service
+port-forward-service: check-kubectl ## Port forward specific service (usage: make port-forward-service SERVICE=qdrant LOCAL=6333 REMOTE=6333)
+	@if [ -z "$(SERVICE)" ] || [ -z "$(LOCAL)" ] || [ -z "$(REMOTE)" ]; then echo "Usage: make port-forward-service SERVICE=<service-name> LOCAL=<local-port> REMOTE=<remote-port>"; exit 1; fi
+	kubectl port-forward -n $(NAMESPACE) service/$(SERVICE) $(LOCAL):$(REMOTE)
+
+.PHONY: stop-port-forward
+stop-port-forward: ## Stop all port forwards
+	pkill -f "kubectl port-forward" || echo "No port forwards found"
+
+# Build and push targets
+.PHONY: build-image
+build-image: ## Build Docker image
+	docker build -t $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG) ../../
+
+.PHONY: push-image
+push-image: build-image ## Push Docker image to registry
+	docker push $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG)
+
+# Test targets
+.PHONY: test-connection
+test-connection: check-kubectl ## Test connectivity to all services
+	@echo "Testing service connectivity..."
+	@echo "Qdrant:"
+	@kubectl run qdrant-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://qdrant.$(NAMESPACE).svc.cluster.local:6333/health || echo "Qdrant test failed"
+	@echo "MCP Memory:"
+	@kubectl run memory-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-memory.$(NAMESPACE).svc.cluster.local:18000/health || echo "MCP Memory test failed"
+	@echo "MCP Indexer:"
+	@kubectl run indexer-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-indexer.$(NAMESPACE).svc.cluster.local:18001/health || echo "MCP Indexer test failed"
+
+# Configuration targets
+.PHONY: show-config
+show-config: ## Show current configuration
+	@echo "Configuration:"
+	@echo "  NAMESPACE: $(NAMESPACE)"
+	@echo "  IMAGE_REGISTRY: $(IMAGE_REGISTRY)"
+	@echo "  IMAGE_TAG: $(IMAGE_TAG)"
+	@echo ""
+	@echo "Quick start commands:"
+	@echo "  make deploy              # Deploy all services"
+	@echo "  make status              # Show deployment status"
+	@echo "  make logs-service SERVICE=mcp-memory  # Show logs"
+	@echo "  make cleanup             # Remove everything"
+
+.PHONY: show-urls
+show-urls: check-kubectl ## Show access URLs for services
+	@echo "Service URLs (via NodePort):"
+	@echo "  Qdrant:           http://<node-ip>:30333"
+	@echo "  MCP Memory (SSE): http://<node-ip>:30800"
+	@echo "  MCP Indexer (SSE): http://<node-ip>:30802"
+	@echo "  MCP Memory (HTTP): http://<node-ip>:30804"
+	@echo "  MCP Indexer (HTTP): http://<node-ip>:30806"
+	@echo "  Llama.cpp:        http://<node-ip>:30808"
+	@echo ""
+	@echo "Service URLs (via port-forward):"
+	@echo "  make port-forward # Then access via localhost ports"
+
+# Advanced targets
+.PHONY: watch-deployment
+watch-deployment: check-kubectl ## Watch deployment progress
+	watch kubectl get pods,services,deployments -n $(NAMESPACE)
+
+.PHONY: describe-service
+describe-service: check-kubectl ## Describe a service (usage: make describe-service SERVICE=mcp-memory)
+	@if [ -z "$(SERVICE)" ]; then echo "Usage: make describe-service SERVICE=<service-name>"; echo "Available services:"; kubectl get services -n $(NAMESPACE); exit 1; fi
+	kubectl describe service $(SERVICE) -n $(NAMESPACE)
+
+.PHONY: events
+events: check-kubectl ## Show recent events
+	kubectl get events -n $(NAMESPACE) --sort-by=.metadata.creationTimestamp
+
diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md
new file mode 100644
index 00000000..b88fbc23
--- /dev/null
+++ b/deploy/kubernetes/README.md
@@ -0,0 +1,472 @@
+# Kubernetes Deployment Guide
+
+## Overview
+
+This directory contains Kubernetes manifests for deploying Context Engine on a remote cluster using **Kustomize**. This enables:
+
+- **Remote development** from thin clients with cluster-based heavy lifting
+- **Multi-repository indexing** with unified `codebase` collection
+- **Scalable architecture** with independent watcher deployments per repo
+- **Kustomize-based configuration** for easy customization and overlays
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                     Kubernetes Cluster                       │
+│                                                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐     │
+│  │   Qdrant     │  │  Memory MCP  │  │ Indexer MCP  │     │
+│  │ StatefulSet  │  │  Deployment  │  │  Deployment  │     │
+│  │  Port: 6333  │  │  Port: 8000  │  │  Port: 8001  │     │
+│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘     │
+│         │                  │                  │             │
+│         │    ┌─────────────┴──────────────────┘             │
+│         │    │                                              │
+│  ┌──────▼────▼──────────────────────────────────────────┐  │
+│  │           PersistentVolume (qdrant-storage)          │  │
+│  └───────────────────────────────────────────────────────┘  │
+│                                                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐     │
+│  │   Watcher    │  │   Watcher    │  │   Watcher    │     │
+│  │  (repo-1)    │  │  (repo-2)    │  │  (repo-3)    │     │
+│  │  Deployment  │  │  Deployment  │  │  Deployment  │     │
+│  └──────┬───────┘  └──────┬───────┘  └──────┬───────┘     │
+│         │                  │                  │             │
+│  ┌──────▼──────────────────▼──────────────────▼─────────┐  │
+│  │           HostPath Volume (repos)                     │  │
+│  │  /tmp/context-engine-repos/repo-1/                    │  │
+│  │  /tmp/context-engine-repos/repo-2/                    │  │
+│  │  /tmp/context-engine-repos/repo-3/                    │  │
+│  └───────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Quick Start
+
+### Prerequisites
+
+- Kubernetes cluster (1.19+)
+- `kubectl` configured to access your cluster
+- `kustomize` (optional, kubectl has built-in support)
+- Docker image built and pushed to a registry
+
+### 1. Build and Push Image
+
+```bash
+# Build unified image
+docker build -t your-registry/context-engine:latest .
+
+# Push to registry
+docker push your-registry/context-engine:latest
+```
+
+### 2. Update Image References
+
+Edit `kustomization.yaml` to use your registry:
+
+```yaml
+images:
+  - name: context-engine
+    newName: your-registry/context-engine
+    newTag: latest
+```
+
+### 3. Deploy Using Kustomize
+
+```bash
+# Option 1: Using kubectl with kustomize
+kubectl apply -k .
+
+# Option 2: Using kustomize CLI
+kustomize build . | kubectl apply -f -
+
+# Option 3: Using the deploy script
+./deploy.sh --registry your-registry --tag latest
+```
+
+### 4. Deploy Using Makefile
+
+```bash
+# Deploy all services
+make deploy
+
+# Or deploy core services only
+make deploy-core
+
+# Check status
+make status
+```
+
+### 5. Verify Deployment
+
+```bash
+# Check all pods are running
+kubectl get pods -n context-engine
+
+# Check services
+kubectl get svc -n context-engine
+
+# View logs
+make logs-service SERVICE=mcp-memory
+```
+
+### 6. Access Services
+
+```bash
+# Port forward to localhost
+make port-forward
+
+# Or access via NodePort
+# Qdrant: http://<node-ip>:30333
+# MCP Memory: http://<node-ip>:30800
+# MCP Indexer: http://<node-ip>:30802
+```
+
+## Configuration
+
+### Automatic Model Download
+
+The Llama.cpp deployment includes an **init container** that automatically downloads the model on first startup:
+
+- **Default Model**: Qwen2.5-1.5B-Instruct (Q8_0 quantization, ~1.7GB)
+- **Download Location**: `/tmp/context-engine-models/` on the Kubernetes node
+- **Behavior**: Downloads only if model doesn't exist (idempotent)
+- **Good balance**: Fast, accurate, small footprint
+
+To use a different model, edit `configmap.yaml`:
+
+```yaml
+# Model download configuration
+LLAMACPP_MODEL_URL: "https://huggingface.co/your-org/your-model/resolve/main/model.gguf"
+LLAMACPP_MODEL_NAME: "model.gguf"
+```
+
+**Alternative Models**:
+- **Qwen2.5-0.5B-Instruct-Q8** (~500MB) - Tiny, very fast
+- **Qwen2.5-1.5B-Instruct-Q8** (default, ~1.7GB) - Best balance
+- **Granite-3.0-3B-Instruct-Q8** (~3.2GB) - Higher quality
+- **Phi-3-mini-4k-instruct-Q8** (~4GB) - High quality
+
+### Environment Variables (ConfigMap)
+
+Key environment variables in `configmap.yaml`:
+
+```yaml
+COLLECTION_NAME: "codebase"           # Unified collection for all repos
+EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5"
+QDRANT_URL: "http://qdrant:6333"
+INDEX_MICRO_CHUNKS: "1"
+MAX_MICRO_CHUNKS_PER_FILE: "200"
+WATCH_DEBOUNCE_SECS: "1.5"
+```
+
+### Persistent Volumes
+
+Two persistent volumes are required:
+
+1. **qdrant-storage**: Stores Qdrant vector database
+   - Size: 50Gi (adjust based on codebase size)
+   - Access: ReadWriteOnce
+
+2. **repos-storage**: Stores repository code
+   - Size: 100Gi (adjust based on number/size of repos)
+   - Access: ReadWriteMany (required for multiple watchers)
+
+### Resource Requests/Limits
+
+Adjust based on your cluster capacity:
+
+```yaml
+# Qdrant (memory-intensive)
+resources:
+  requests:
+    memory: "4Gi"
+    cpu: "2"
+  limits:
+    memory: "8Gi"
+    cpu: "4"
+
+# MCP Servers (moderate)
+resources:
+  requests:
+    memory: "2Gi"
+    cpu: "1"
+  limits:
+    memory: "4Gi"
+    cpu: "2"
+
+# Watchers (light)
+resources:
+  requests:
+    memory: "512Mi"
+    cpu: "500m"
+  limits:
+    memory: "1Gi"
+    cpu: "1"
+```
+
+## Multi-Repository Setup
+
+### Adding a New Repository
+
+1. **Upload repository code** to the repos volume:
+   ```bash
+   # Using uploader service
+   python scripts/upload_repo.py --repo-name my-service --path /local/path/to/repo
+   
+   # Or using kubectl cp
+   kubectl cp /local/path/to/repo context-engine/uploader-pod:/repos/my-service
+   ```
+
+2. **Create watcher deployment** for the new repo:
+   ```bash
+   # Copy and modify watcher template
+   cp watcher-backend-deployment.yaml watcher-my-service-deployment.yaml
+   
+   # Edit: change WATCH_ROOT, REPO_NAME, and volume subPath
+   # Then apply
+   kubectl apply -f watcher-my-service-deployment.yaml -n context-engine
+   ```
+
+3. **Verify indexing**:
+   ```bash
+   # Check watcher logs
+   kubectl logs -f deployment/watcher-my-service -n context-engine
+   
+   # Check collection status via MCP
+   curl http://indexer-mcp-service:8001/sse
+   ```
+
+### Repository Volume Structure
+
+```
+/repos/
+├── backend/
+│   ├── .codebase/
+│   │   └── state.json
+│   ├── src/
+│   ├── tests/
+│   └── ...
+├── frontend/
+│   ├── .codebase/
+│   │   └── state.json
+│   ├── src/
+│   └── ...
+└── ml-service/
+    ├── .codebase/
+    │   └── state.json
+    ├── models/
+    └── ...
+```
+
+## Accessing Services
+
+### From Within Cluster
+
+Services are accessible via Kubernetes DNS:
+
+- Qdrant: `http://qdrant:6333`
+- Memory MCP: `http://mcp-memory:8000/sse`
+- Indexer MCP: `http://mcp-indexer:8001/sse`
+
+### From Outside Cluster
+
+#### Option 1: Port Forwarding (Development)
+
+```bash
+# Forward MCP services to localhost
+kubectl port-forward -n context-engine svc/mcp-memory 8000:8000
+kubectl port-forward -n context-engine svc/mcp-indexer 8001:8001
+kubectl port-forward -n context-engine svc/qdrant 6333:6333
+```
+
+Then configure your IDE to use `http://localhost:8000/sse` and `http://localhost:8001/sse`.
+
+#### Option 2: Ingress (Production)
+
+```yaml
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: context-engine-ingress
+  namespace: context-engine
+spec:
+  rules:
+  - host: mcp.your-domain.com
+    http:
+      paths:
+      - path: /memory
+        pathType: Prefix
+        backend:
+          service:
+            name: mcp-memory
+            port:
+              number: 8000
+      - path: /indexer
+        pathType: Prefix
+        backend:
+          service:
+            name: mcp-indexer
+            port:
+              number: 8001
+```
+
+#### Option 3: LoadBalancer (Cloud)
+
+Change service type to `LoadBalancer` in service manifests:
+
+```yaml
+spec:
+  type: LoadBalancer
+  ports:
+  - port: 8000
+    targetPort: 8000
+```
+
+## Monitoring and Maintenance
+
+### Health Checks
+
+```bash
+# Check Qdrant health
+kubectl exec -n context-engine qdrant-0 -- curl -f http://localhost:6333/readyz
+
+# Check MCP server health
+kubectl exec -n context-engine deployment/mcp-memory -- curl -f http://localhost:18000/health
+kubectl exec -n context-engine deployment/mcp-indexer -- curl -f http://localhost:18001/health
+```
+
+### Logs
+
+```bash
+# View logs for specific service
+kubectl logs -f -n context-engine deployment/mcp-memory
+kubectl logs -f -n context-engine deployment/mcp-indexer
+kubectl logs -f -n context-engine deployment/watcher-backend
+
+# View logs for all watchers
+kubectl logs -f -n context-engine -l app=watcher
+```
+
+### Collection Status
+
+```bash
+# Port forward indexer MCP
+kubectl port-forward -n context-engine svc/mcp-indexer 8001:8001
+
+# Check collection status
+curl -X POST http://localhost:8001/mcp \
+  -H "Content-Type: application/json" \
+  -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"qdrant_status","arguments":{}}}'
+```
+
+### Backup and Restore
+
+#### Backup Qdrant Data
+
+```bash
+# Create snapshot
+kubectl exec -n context-engine qdrant-0 -- \
+  curl -X POST http://localhost:6333/collections/codebase/snapshots
+
+# Copy snapshot to local
+kubectl cp context-engine/qdrant-0:/qdrant/storage/snapshots/codebase-snapshot.tar \
+  ./backup/codebase-snapshot.tar
+```
+
+#### Restore Qdrant Data
+
+```bash
+# Copy snapshot to pod
+kubectl cp ./backup/codebase-snapshot.tar \
+  context-engine/qdrant-0:/qdrant/storage/snapshots/
+
+# Restore snapshot
+kubectl exec -n context-engine qdrant-0 -- \
+  curl -X PUT http://localhost:6333/collections/codebase/snapshots/upload \
+  -F 'snapshot=@/qdrant/storage/snapshots/codebase-snapshot.tar'
+```
+
+## Troubleshooting
+
+### Pods Not Starting
+
+```bash
+# Check pod status
+kubectl describe pod -n context-engine <pod-name>
+
+# Check events
+kubectl get events -n context-engine --sort-by='.lastTimestamp'
+```
+
+### Persistent Volume Issues
+
+```bash
+# Check PV/PVC status
+kubectl get pv,pvc -n context-engine
+
+# Check PVC events
+kubectl describe pvc -n context-engine <pvc-name>
+```
+
+### Watcher Not Indexing
+
+```bash
+# Check watcher logs
+kubectl logs -f -n context-engine deployment/watcher-backend
+
+# Verify volume mount
+kubectl exec -n context-engine deployment/watcher-backend -- ls -la /repos/backend
+
+# Check Qdrant connectivity
+kubectl exec -n context-engine deployment/watcher-backend -- \
+  curl -f http://qdrant:6333/readyz
+```
+
+### MCP Connection Issues
+
+```bash
+# Test SSE endpoint
+kubectl exec -n context-engine deployment/mcp-indexer -- \
+  curl -H "Accept: text/event-stream" http://localhost:8001/sse
+
+# Check service endpoints
+kubectl get endpoints -n context-engine
+```
+
+## Scaling
+
+### Horizontal Scaling
+
+- **MCP Servers**: Can run multiple replicas behind a service
+- **Watchers**: One per repository (do not scale horizontally)
+- **Qdrant**: Single instance (StatefulSet with replicas=1)
+
+### Vertical Scaling
+
+Adjust resource requests/limits based on workload:
+
+```bash
+# Edit deployment
+kubectl edit deployment -n context-engine mcp-indexer
+
+# Or patch
+kubectl patch deployment -n context-engine mcp-indexer -p \
+  '{"spec":{"template":{"spec":{"containers":[{"name":"mcp-indexer","resources":{"requests":{"memory":"4Gi"}}}]}}}}'
+```
+
+## Security Considerations
+
+1. **Network Policies**: Restrict pod-to-pod communication
+2. **RBAC**: Limit service account permissions
+3. **Secrets Management**: Use Kubernetes secrets or external secret managers
+4. **TLS**: Enable TLS for external access via Ingress
+5. **Resource Quotas**: Set namespace resource quotas
+
+## See Also
+
+- [Multi-Repository Collections Guide](../../docs/MULTI_REPO_COLLECTIONS.md)
+- [MCP API Reference](../../docs/MCP_API.md)
+- [Architecture Overview](../../docs/ARCHITECTURE.md)
+
diff --git a/deploy/kubernetes/cleanup.sh b/deploy/kubernetes/cleanup.sh
new file mode 100755
index 00000000..2ce5d64d
--- /dev/null
+++ b/deploy/kubernetes/cleanup.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+# Context-Engine Kubernetes Cleanup Script
+# This script removes all Context-Engine resources from Kubernetes
+
+set -e
+
+# Configuration
+NAMESPACE="context-engine"
+FORCE=false
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Logging functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if kubectl is available
+check_kubectl() {
+    if ! command -v kubectl &> /dev/null; then
+        log_error "kubectl is not installed or not in PATH"
+        exit 1
+    fi
+
+    if ! kubectl cluster-info &> /dev/null; then
+        log_error "Cannot connect to Kubernetes cluster"
+        exit 1
+    fi
+
+    log_success "Kubernetes connection verified"
+}
+
+# Confirm cleanup
+confirm_cleanup() {
+    if [[ "$FORCE" != "true" ]]; then
+        log_warning "This will delete all Context-Engine resources in namespace: $NAMESPACE"
+        read -p "Are you sure you want to continue? (yes/no): " -r
+        echo
+        if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
+            log_info "Cleanup cancelled"
+            exit 0
+        fi
+    fi
+}
+
+# Delete resources
+cleanup_resources() {
+    log_info "Cleaning up Context-Engine resources..."
+
+    # Delete deployments
+    log_info "Deleting deployments..."
+    kubectl delete deployment --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete statefulsets
+    log_info "Deleting statefulsets..."
+    kubectl delete statefulset --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete jobs
+    log_info "Deleting jobs..."
+    kubectl delete job --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete services
+    log_info "Deleting services..."
+    kubectl delete service --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete ingress
+    log_info "Deleting ingress..."
+    kubectl delete ingress --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete configmaps
+    log_info "Deleting configmaps..."
+    kubectl delete configmap --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete secrets
+    log_info "Deleting secrets..."
+    kubectl delete secret --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete PVCs
+    log_info "Deleting persistent volume claims..."
+    kubectl delete pvc --all -n $NAMESPACE --ignore-not-found=true
+
+    # Delete namespace
+    log_info "Deleting namespace..."
+    kubectl delete namespace $NAMESPACE --ignore-not-found=true
+
+    log_success "Cleanup complete!"
+}
+
+# Help function
+show_help() {
+    echo "Context-Engine Kubernetes Cleanup Script"
+    echo
+    echo "Usage: $0 [OPTIONS]"
+    echo
+    echo "Options:"
+    echo "  -h, --help                Show this help message"
+    echo "  --namespace NAMESPACE     Kubernetes namespace (default: context-engine)"
+    echo "  --force                   Skip confirmation prompt"
+    echo
+    echo "Examples:"
+    echo "  $0                        # Interactive cleanup"
+    echo "  $0 --force                # Force cleanup without confirmation"
+    echo "  $0 --namespace my-ns      # Cleanup specific namespace"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        --namespace)
+            NAMESPACE="$2"
+            shift 2
+            ;;
+        --force)
+            FORCE=true
+            shift
+            ;;
+        *)
+            log_error "Unknown option: $1"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# Main cleanup function
+main() {
+    log_info "Starting Context-Engine Kubernetes cleanup"
+
+    # Check prerequisites
+    check_kubectl
+
+    # Confirm cleanup
+    confirm_cleanup
+
+    # Cleanup resources
+    cleanup_resources
+}
+
+# Run main cleanup
+main
+
diff --git a/deploy/kubernetes/configmap.yaml b/deploy/kubernetes/configmap.yaml
new file mode 100644
index 00000000..0c514bc2
--- /dev/null
+++ b/deploy/kubernetes/configmap.yaml
@@ -0,0 +1,75 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: context-engine-config
+  namespace: context-engine
+  labels:
+    app: context-engine
+data:
+  # Core Configuration
+  COLLECTION_NAME: "codebase"
+  EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5"
+  EMBEDDING_PROVIDER: "fastembed"
+  
+  # Qdrant Configuration
+  QDRANT_URL: "http://qdrant:6333"
+  QDRANT_TIMEOUT: "60"
+  
+  # Indexing Configuration
+  INDEX_MICRO_CHUNKS: "1"
+  MAX_MICRO_CHUNKS_PER_FILE: "200"
+  INDEX_CHUNK_LINES: "120"
+  INDEX_CHUNK_OVERLAP: "20"
+  INDEX_BATCH_SIZE: "64"
+  INDEX_UPSERT_BATCH: "128"
+  INDEX_UPSERT_RETRIES: "5"
+  INDEX_UPSERT_BACKOFF: "0.5"
+  
+  # Watcher Configuration
+  WATCH_DEBOUNCE_SECS: "1.5"
+  
+  # ReFRAG Configuration
+  REFRAG_MODE: "1"
+  REFRAG_GATE_FIRST: "1"
+  REFRAG_CANDIDATES: "200"
+  MICRO_CHUNK_TOKENS: "16"
+  MICRO_CHUNK_STRIDE: "8"
+  MICRO_OUT_MAX_SPANS: "3"
+  MICRO_MERGE_LINES: "4"
+  MICRO_BUDGET_TOKENS: "512"
+  MICRO_TOKENS_PER_LINE: "32"
+  
+  # Decoder Configuration (optional)
+  REFRAG_DECODER: "1"
+  REFRAG_RUNTIME: "llamacpp"
+  LLAMACPP_URL: "http://llamacpp:8080"
+  LLAMACPP_TIMEOUT_SEC: "180"
+  DECODER_MAX_TOKENS: "4000"
+
+  # Model download configuration (for init container)
+  LLAMACPP_MODEL_URL: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf"
+  LLAMACPP_MODEL_NAME: "qwen2.5-1.5b-instruct-q8_0.gguf"
+  
+  # Reranker Configuration
+  RERANKER_ENABLED: "1"
+  
+  # MCP Configuration
+  FASTMCP_HOST: "0.0.0.0"
+  FASTMCP_PORT: "8000"
+  FASTMCP_INDEXER_PORT: "8001"
+  FASTMCP_HEALTH_PORT: "18000"
+  
+  # Memory Configuration
+  MEMORY_SSE_ENABLED: "true"
+  MEMORY_MCP_URL: "http://mcp-memory:8000/sse"
+  MEMORY_MCP_TIMEOUT: "6"
+  
+  # Multi-collection Configuration
+  CTX_MULTI_COLLECTION: "1"
+  CTX_DOC_PASS: "1"
+  
+  # Logging
+  DEBUG_CONTEXT_ANSWER: "0"
+  
+  # Tokenizer
+  TOKENIZER_JSON: "/app/models/tokenizer.json"
diff --git a/deploy/kubernetes/deploy.sh b/deploy/kubernetes/deploy.sh
new file mode 100755
index 00000000..75e6bdfe
--- /dev/null
+++ b/deploy/kubernetes/deploy.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+
+# Context-Engine Kubernetes Deployment Script
+# This script deploys Context-Engine services to Kubernetes
+
+set -e
+
+# Configuration
+NAMESPACE="context-engine"
+IMAGE_REGISTRY="context-engine"
+IMAGE_TAG="latest"
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Logging functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if kubectl is available
+check_kubectl() {
+    if ! command -v kubectl &> /dev/null; then
+        log_error "kubectl is not installed or not in PATH"
+        exit 1
+    fi
+
+    if ! kubectl cluster-info &> /dev/null; then
+        log_error "Cannot connect to Kubernetes cluster"
+        exit 1
+    fi
+
+    log_success "Kubernetes connection verified"
+}
+
+# Create namespace if it doesn't exist
+create_namespace() {
+    log_info "Creating namespace: $NAMESPACE"
+    kubectl apply -f namespace.yaml
+    log_success "Namespace created/verified"
+}
+
+# Deploy configuration
+deploy_config() {
+    log_info "Deploying configuration"
+    kubectl apply -f configmap.yaml
+    log_success "Configuration deployed"
+}
+
+# Deploy core services
+deploy_core() {
+    log_info "Deploying core services"
+
+    # Deploy Qdrant
+    log_info "Deploying Qdrant database..."
+    kubectl apply -f qdrant.yaml
+
+    # Wait for Qdrant to be ready
+    log_info "Waiting for Qdrant to be ready..."
+    kubectl wait --for=condition=ready pod -l component=qdrant -n $NAMESPACE --timeout=300s || log_warning "Qdrant may not be ready yet"
+
+    log_success "Core services deployed"
+}
+
+# Deploy MCP servers
+deploy_mcp_servers() {
+    log_info "Deploying MCP servers"
+
+    # Deploy SSE versions
+    kubectl apply -f mcp-memory.yaml
+    kubectl apply -f mcp-indexer.yaml
+
+    # Wait for MCP servers to be ready
+    log_info "Waiting for MCP servers to be ready..."
+    kubectl wait --for=condition=ready pod -l component=mcp-memory -n $NAMESPACE --timeout=300s || log_warning "MCP Memory may not be ready yet"
+    kubectl wait --for=condition=ready pod -l component=mcp-indexer -n $NAMESPACE --timeout=300s || log_warning "MCP Indexer may not be ready yet"
+
+    log_success "MCP servers deployed"
+}
+
+# Deploy HTTP servers (optional)
+deploy_http_servers() {
+    log_info "Deploying HTTP servers (optional)"
+    kubectl apply -f mcp-http.yaml
+
+    # Wait for HTTP servers to be ready
+    kubectl wait --for=condition=ready pod -l component=mcp-memory-http -n $NAMESPACE --timeout=300s || log_warning "MCP Memory HTTP may not be ready yet"
+    kubectl wait --for=condition=ready pod -l component=mcp-indexer-http -n $NAMESPACE --timeout=300s || log_warning "MCP Indexer HTTP may not be ready yet"
+
+    log_success "HTTP servers deployed"
+}
+
+# Deploy indexer services
+deploy_indexer_services() {
+    log_info "Deploying indexer services"
+    kubectl apply -f indexer-services.yaml
+
+    log_success "Indexer services deployed"
+}
+
+# Deploy optional Llama.cpp service
+deploy_llamacpp() {
+    if [[ "$SKIP_LLAMACPP" != "true" ]]; then
+        log_info "Deploying Llama.cpp service (optional)"
+        kubectl apply -f llamacpp.yaml
+        log_success "Llama.cpp service deployed"
+    else
+        log_warning "Skipping Llama.cpp deployment"
+    fi
+}
+
+# Deploy Ingress (optional)
+deploy_ingress() {
+    if [[ "$DEPLOY_INGRESS" == "true" ]]; then
+        log_info "Deploying Ingress"
+        kubectl apply -f ingress.yaml
+        log_success "Ingress deployed"
+    else
+        log_warning "Skipping Ingress deployment (set --deploy-ingress to enable)"
+    fi
+}
+
+# Show deployment status
+show_status() {
+    log_info "Deployment status:"
+    echo
+    echo "Namespace: $NAMESPACE"
+    echo
+    echo "Pods:"
+    kubectl get pods -n $NAMESPACE -o wide
+    echo
+    echo "Services:"
+    kubectl get services -n $NAMESPACE
+    echo
+
+    log_success "Deployment complete!"
+    echo
+    log_info "Access URLs:"
+    echo "  Qdrant: http://<node-ip>:30333"
+    echo "  MCP Memory (SSE): http://<node-ip>:30800"
+    echo "  MCP Memory (HTTP): http://<node-ip>:30804"
+    echo "  MCP Indexer (SSE): http://<node-ip>:30802"
+    echo "  MCP Indexer (HTTP): http://<node-ip>:30806"
+    if [[ "$SKIP_LLAMACPP" != "true" ]]; then
+        echo "  Llama.cpp: http://<node-ip>:30808"
+    fi
+}
+
+# Main deployment function
+main() {
+    log_info "Starting Context-Engine Kubernetes deployment"
+
+    # Check prerequisites
+    check_kubectl
+
+    # Deploy in order
+    create_namespace
+    deploy_config
+    deploy_core
+    deploy_mcp_servers
+    deploy_http_servers
+    deploy_indexer_services
+    deploy_llamacpp
+    deploy_ingress
+
+    # Show status
+    show_status
+}
+
+# Help function
+show_help() {
+    echo "Context-Engine Kubernetes Deployment Script"
+    echo
+    echo "Usage: $0 [OPTIONS]"
+    echo
+    echo "Options:"
+    echo "  -h, --help                    Show this help message"
+    echo "  -r, --registry REGISTRY       Docker image registry (default: context-engine)"
+    echo "  -t, --tag TAG                 Docker image tag (default: latest)"
+    echo "  --skip-llamacpp               Skip Llama.cpp deployment"
+    echo "  --deploy-ingress              Deploy Ingress configuration"
+    echo "  --namespace NAMESPACE         Kubernetes namespace (default: context-engine)"
+    echo
+    echo "Examples:"
+    echo "  $0                            # Basic deployment"
+    echo "  $0 --skip-llamacpp            # Skip Llama.cpp"
+    echo "  $0 --deploy-ingress           # Deploy with Ingress"
+    echo "  $0 -r myregistry.com -t v1.0  # Use custom image"
+}
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        -r|--registry)
+            IMAGE_REGISTRY="$2"
+            shift 2
+            ;;
+        -t|--tag)
+            IMAGE_TAG="$2"
+            shift 2
+            ;;
+        --skip-llamacpp)
+            SKIP_LLAMACPP=true
+            shift
+            ;;
+        --deploy-ingress)
+            DEPLOY_INGRESS=true
+            shift
+            ;;
+        --namespace)
+            NAMESPACE="$2"
+            shift 2
+            ;;
+        *)
+            log_error "Unknown option: $1"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# Check if we're in the right directory
+if [[ ! -f "qdrant.yaml" ]]; then
+    log_error "Please run this script from the deploy/kubernetes directory"
+    exit 1
+fi
+
+# Run main deployment
+main
+
diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml
new file mode 100644
index 00000000..110d22ce
--- /dev/null
+++ b/deploy/kubernetes/indexer-services.yaml
@@ -0,0 +1,197 @@
+---
+# Watcher Deployment (File change monitoring and reindexing)
+# This is a template - copy and customize for each repository
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: watcher
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: watcher
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: watcher
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: watcher
+    spec:
+      containers:
+      - name: watcher
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/watch_index.py"]
+        workingDir: /repos
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        - name: WATCH_ROOT
+          value: "/repos"
+        - name: QDRANT_TIMEOUT
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_TIMEOUT
+        - name: MAX_MICRO_CHUNKS_PER_FILE
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MAX_MICRO_CHUNKS_PER_FILE
+        - name: INDEX_UPSERT_BATCH
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: INDEX_UPSERT_BATCH
+        - name: INDEX_UPSERT_RETRIES
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: INDEX_UPSERT_RETRIES
+        - name: WATCH_DEBOUNCE_SECS
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: WATCH_DEBOUNCE_SECS
+        resources:
+          requests:
+            memory: "512Mi"
+            cpu: "250m"
+          limits:
+            memory: "2Gi"
+            cpu: "1"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
+---
+# Indexer Job (One-shot code indexing)
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: indexer-job
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: indexer
+spec:
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: indexer
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: indexer
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/ingest_code.py"]
+        workingDir: /repos
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "500m"
+          limits:
+            memory: "4Gi"
+            cpu: "2"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+          readOnly: true
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
+---
+# Index Initialization Job
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: init-payload
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: init
+spec:
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: init
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: init-payload
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/create_indexes.py"]
+        workingDir: /repos
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        resources:
+          requests:
+            memory: "512Mi"
+            cpu: "250m"
+          limits:
+            memory: "1Gi"
+            cpu: "500m"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+          readOnly: true
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
diff --git a/deploy/kubernetes/ingress.yaml b/deploy/kubernetes/ingress.yaml
new file mode 100644
index 00000000..71524d08
--- /dev/null
+++ b/deploy/kubernetes/ingress.yaml
@@ -0,0 +1,86 @@
+---
+# Ingress for Context-Engine services
+# Requires an Ingress controller (e.g., nginx-ingress, traefik)
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: context-engine-ingress
+  namespace: context-engine
+  labels:
+    app: context-engine
+  annotations:
+    # Nginx Ingress annotations
+    nginx.ingress.kubernetes.io/rewrite-target: /$2
+    nginx.ingress.kubernetes.io/ssl-redirect: "true"
+    # Increase timeouts for SSE connections
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
+    # Enable CORS if needed
+    # nginx.ingress.kubernetes.io/enable-cors: "true"
+    # nginx.ingress.kubernetes.io/cors-allow-origin: "*"
+spec:
+  ingressClassName: nginx  # Adjust based on your ingress controller
+  rules:
+  - host: context-engine.example.com  # Change to your domain
+    http:
+      paths:
+      # Qdrant
+      - path: /qdrant(/|$)(.*)
+        pathType: Prefix
+        backend:
+          service:
+            name: qdrant
+            port:
+              number: 6333
+      
+      # MCP Memory (SSE)
+      - path: /mcp/memory(/|$)(.*)
+        pathType: Prefix
+        backend:
+          service:
+            name: mcp-memory
+            port:
+              number: 8000
+      
+      # MCP Indexer (SSE)
+      - path: /mcp/indexer(/|$)(.*)
+        pathType: Prefix
+        backend:
+          service:
+            name: mcp-indexer
+            port:
+              number: 8001
+      
+      # MCP Memory HTTP
+      - path: /mcp-http/memory(/|$)(.*)
+        pathType: Prefix
+        backend:
+          service:
+            name: mcp-memory-http
+            port:
+              number: 8002
+      
+      # MCP Indexer HTTP
+      - path: /mcp-http/indexer(/|$)(.*)
+        pathType: Prefix
+        backend:
+          service:
+            name: mcp-indexer-http
+            port:
+              number: 8003
+      
+      # Llama.cpp (optional)
+      - path: /llamacpp(/|$)(.*)
+        pathType: Prefix
+        backend:
+          service:
+            name: llamacpp
+            port:
+              number: 8080
+  
+  # TLS configuration (optional)
+  # tls:
+  # - hosts:
+  #   - context-engine.example.com
+  #   secretName: context-engine-tls
+
diff --git a/deploy/kubernetes/kustomization.yaml b/deploy/kubernetes/kustomization.yaml
new file mode 100644
index 00000000..e02aa13e
--- /dev/null
+++ b/deploy/kubernetes/kustomization.yaml
@@ -0,0 +1,87 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+metadata:
+  name: context-engine
+  namespace: context-engine
+
+resources:
+  # Namespace and configuration
+  - namespace.yaml
+  - configmap.yaml
+
+  # Core services
+  - qdrant.yaml
+
+  # MCP servers
+  - mcp-memory.yaml
+  - mcp-indexer.yaml
+  - mcp-http.yaml
+
+  # Indexer services
+  - indexer-services.yaml
+
+  # Optional services
+  - llamacpp.yaml
+  - ingress.yaml
+
+# Common labels
+commonLabels:
+  app.kubernetes.io/name: context-engine
+  app.kubernetes.io/component: kubernetes-deployment
+  app.kubernetes.io/managed-by: kustomize
+
+# Patches for production customization
+patchesStrategicMerge:
+  # Uncomment and create patches for production
+  # - patches/production-storage.yaml
+  # - patches/production-resources.yaml
+  # - patches/production-ingress.yaml
+
+# ConfigMap generator (optional - for overrides)
+configMapGenerator:
+  - name: context-engine-overrides
+    literals:
+      # Override specific values here
+      # COLLECTION_NAME=production-collection
+      # EMBEDDING_MODEL=BAAI/bge-large-en-v1.5
+
+# Secret generator (optional - for sensitive data)
+secretGenerator:
+  - name: context-engine-secrets
+    literals:
+      # Add secrets here (recommended to use existing secrets instead)
+      # QDRANT_API_KEY=your-api-key
+
+# Images configuration (customize for your registry)
+images:
+  - name: context-engine
+    newTag: latest
+    # newTag: v1.0.0
+    # newName: your-registry/context-engine
+
+# Namespace override
+namespace: context-engine
+
+# Replicas configuration
+replicas:
+  # Scale MCP servers for high availability
+  - name: mcp-memory
+    count: 1  # Set to 2+ for production
+  - name: mcp-indexer
+    count: 1  # Set to 2+ for production
+
+# Resource patches
+patches:
+  # Example resource customization
+  - patch: |-
+      - op: replace
+        path: /spec/template/spec/containers/0/resources/requests/memory
+        value: "1Gi"
+      - op: replace
+        path: /spec/template/spec/containers/0/resources/limits/memory
+        value: "4Gi"
+    target:
+      kind: Deployment
+      name: mcp-memory
+
diff --git a/deploy/kubernetes/llamacpp.yaml b/deploy/kubernetes/llamacpp.yaml
new file mode 100644
index 00000000..f851996d
--- /dev/null
+++ b/deploy/kubernetes/llamacpp.yaml
@@ -0,0 +1,171 @@
+---
+# Llama.cpp Deployment (Optional - for text generation)
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamacpp
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: llamacpp
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: llamacpp
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: llamacpp
+    spec:
+      # Init container to download model if not present
+      initContainers:
+      - name: model-downloader
+        image: curlimages/curl:latest
+        env:
+        - name: LLAMACPP_MODEL_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: LLAMACPP_MODEL_URL
+        - name: LLAMACPP_MODEL_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: LLAMACPP_MODEL_NAME
+        command:
+          - sh
+          - -c
+          - |
+            MODEL_PATH="/models/${LLAMACPP_MODEL_NAME}"
+
+            if [ -f "$MODEL_PATH" ]; then
+              echo "Model already exists at $MODEL_PATH"
+              ls -lh "$MODEL_PATH"
+              exit 0
+            fi
+
+            echo "Downloading model from ${LLAMACPP_MODEL_URL}..."
+            echo "Target: $MODEL_PATH"
+
+            curl -L --progress-bar -o "$MODEL_PATH.tmp" "${LLAMACPP_MODEL_URL}"
+
+            if [ $? -eq 0 ]; then
+              mv "$MODEL_PATH.tmp" "$MODEL_PATH"
+              echo "Model downloaded successfully"
+              ls -lh "$MODEL_PATH"
+            else
+              echo "Failed to download model"
+              rm -f "$MODEL_PATH.tmp"
+              exit 1
+            fi
+        volumeMounts:
+        - name: models
+          mountPath: /models
+        resources:
+          requests:
+            memory: "256Mi"
+            cpu: "100m"
+          limits:
+            memory: "512Mi"
+            cpu: "500m"
+
+      containers:
+      - name: llamacpp
+        image: ghcr.io/ggerganov/llama.cpp:server
+        imagePullPolicy: IfNotPresent
+        env:
+        - name: LLAMACPP_MODEL_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: LLAMACPP_MODEL_NAME
+        ports:
+        - name: http
+          containerPort: 8080
+          protocol: TCP
+        command:
+          - sh
+          - -c
+        args:
+          - |
+            exec /llama-server \
+              --host 0.0.0.0 \
+              --port 8080 \
+              --model "/models/${LLAMACPP_MODEL_NAME}" \
+              --ctx-size 4096 \
+              --n-gpu-layers 0
+        resources:
+          requests:
+            memory: "2Gi"
+            cpu: "1"
+          limits:
+            memory: "8Gi"
+            cpu: "4"
+        volumeMounts:
+        - name: models
+          mountPath: /models
+          readOnly: false
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: http
+          initialDelaySeconds: 60
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 5
+      volumes:
+      - name: models
+        hostPath:
+          path: /tmp/context-engine-models
+          type: DirectoryOrCreate
+
+---
+# Llama.cpp Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: llamacpp
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: llamacpp
+spec:
+  type: ClusterIP
+  ports:
+  - name: http
+    port: 8080
+    targetPort: http
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: llamacpp
+
+---
+# Optional: Llama.cpp External Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: llamacpp-external
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: llamacpp
+spec:
+  type: NodePort
+  ports:
+  - name: http
+    port: 8080
+    targetPort: http
+    nodePort: 30808
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: llamacpp
+
diff --git a/deploy/kubernetes/mcp-http.yaml b/deploy/kubernetes/mcp-http.yaml
new file mode 100644
index 00000000..7586f022
--- /dev/null
+++ b/deploy/kubernetes/mcp-http.yaml
@@ -0,0 +1,323 @@
+---
+# MCP Memory HTTP Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mcp-memory-http
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-memory-http
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: mcp-memory-http
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: mcp-memory-http
+    spec:
+      containers:
+      - name: mcp-memory-http
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/mcp_memory_server.py"]
+        ports:
+        - name: http
+          containerPort: 8002
+          protocol: TCP
+        - name: health
+          containerPort: 18002
+          protocol: TCP
+        env:
+        - name: FASTMCP_TRANSPORT
+          value: "streamable-http"
+        - name: FASTMCP_HOST
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HOST
+        - name: FASTMCP_PORT
+          value: "8002"
+        - name: FASTMCP_HEALTH_PORT
+          value: "18002"
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "500m"
+          limits:
+            memory: "4Gi"
+            cpu: "2"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+          readOnly: true
+        livenessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 10
+          periodSeconds: 5
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
+---
+# MCP Memory HTTP Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-memory-http
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-memory-http
+spec:
+  type: ClusterIP
+  ports:
+  - name: http
+    port: 8002
+    targetPort: http
+    protocol: TCP
+  - name: health
+    port: 18002
+    targetPort: health
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-memory-http
+
+---
+# Optional: MCP Memory HTTP External Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-memory-http-external
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-memory-http
+spec:
+  type: NodePort
+  ports:
+  - name: http
+    port: 8002
+    targetPort: http
+    nodePort: 30804
+    protocol: TCP
+  - name: health
+    port: 18002
+    targetPort: health
+    nodePort: 30805
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-memory-http
+
+---
+# MCP Indexer HTTP Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mcp-indexer-http
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-indexer-http
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: mcp-indexer-http
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: mcp-indexer-http
+    spec:
+      containers:
+      - name: mcp-indexer-http
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/mcp_indexer_server.py"]
+        ports:
+        - name: http
+          containerPort: 8003
+          protocol: TCP
+        - name: health
+          containerPort: 18003
+          protocol: TCP
+        env:
+        - name: FASTMCP_TRANSPORT
+          value: "streamable-http"
+        - name: FASTMCP_HOST
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HOST
+        - name: FASTMCP_INDEXER_PORT
+          value: "8003"
+        - name: FASTMCP_HEALTH_PORT
+          value: "18003"
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        - name: INDEX_MICRO_CHUNKS
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: INDEX_MICRO_CHUNKS
+        - name: MAX_MICRO_CHUNKS_PER_FILE
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MAX_MICRO_CHUNKS_PER_FILE
+        - name: REFRAG_MODE
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: REFRAG_MODE
+        - name: REFRAG_DECODER
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: REFRAG_DECODER
+        - name: LLAMACPP_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: LLAMACPP_URL
+        - name: MEMORY_SSE_ENABLED
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MEMORY_SSE_ENABLED
+        - name: MEMORY_MCP_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MEMORY_MCP_URL
+        - name: CTX_MULTI_COLLECTION
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: CTX_MULTI_COLLECTION
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "500m"
+          limits:
+            memory: "4Gi"
+            cpu: "2"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+        livenessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 10
+          periodSeconds: 5
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
+---
+# MCP Indexer HTTP Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-indexer-http
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-indexer-http
+spec:
+  type: ClusterIP
+  ports:
+  - name: http
+    port: 8003
+    targetPort: http
+    protocol: TCP
+  - name: health
+    port: 18003
+    targetPort: health
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-indexer-http
+
+---
+# Optional: MCP Indexer HTTP External Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-indexer-http-external
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-indexer-http
+spec:
+  type: NodePort
+  ports:
+  - name: http
+    port: 8003
+    targetPort: http
+    nodePort: 30806
+    protocol: TCP
+  - name: health
+    port: 18003
+    targetPort: health
+    nodePort: 30807
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-indexer-http
+
diff --git a/deploy/kubernetes/mcp-indexer.yaml b/deploy/kubernetes/mcp-indexer.yaml
new file mode 100644
index 00000000..5963a86a
--- /dev/null
+++ b/deploy/kubernetes/mcp-indexer.yaml
@@ -0,0 +1,182 @@
+---
+# MCP Indexer Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mcp-indexer
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-indexer
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: mcp-indexer
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: mcp-indexer
+    spec:
+      containers:
+      - name: mcp-indexer
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/mcp_indexer_server.py"]
+        ports:
+        - name: sse
+          containerPort: 8001
+          protocol: TCP
+        - name: health
+          containerPort: 18001
+          protocol: TCP
+        env:
+        - name: FASTMCP_HOST
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HOST
+        - name: FASTMCP_INDEXER_PORT
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_INDEXER_PORT
+        - name: FASTMCP_HEALTH_PORT
+          value: "18001"
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        - name: INDEX_MICRO_CHUNKS
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: INDEX_MICRO_CHUNKS
+        - name: MAX_MICRO_CHUNKS_PER_FILE
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MAX_MICRO_CHUNKS_PER_FILE
+        - name: REFRAG_MODE
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: REFRAG_MODE
+        - name: REFRAG_DECODER
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: REFRAG_DECODER
+        - name: LLAMACPP_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: LLAMACPP_URL
+        - name: MEMORY_SSE_ENABLED
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MEMORY_SSE_ENABLED
+        - name: MEMORY_MCP_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: MEMORY_MCP_URL
+        - name: CTX_MULTI_COLLECTION
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: CTX_MULTI_COLLECTION
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "500m"
+          limits:
+            memory: "4Gi"
+            cpu: "2"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+        livenessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 10
+          periodSeconds: 5
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
+---
+# MCP Indexer Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-indexer
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-indexer
+spec:
+  type: ClusterIP
+  ports:
+  - name: sse
+    port: 8001
+    targetPort: sse
+    protocol: TCP
+  - name: health
+    port: 18001
+    targetPort: health
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-indexer
+
+---
+# Optional: MCP Indexer External Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-indexer-external
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-indexer
+spec:
+  type: NodePort
+  ports:
+  - name: sse
+    port: 8001
+    targetPort: sse
+    nodePort: 30802
+    protocol: TCP
+  - name: health
+    port: 18001
+    targetPort: health
+    nodePort: 30803
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-indexer
+
diff --git a/deploy/kubernetes/mcp-memory.yaml b/deploy/kubernetes/mcp-memory.yaml
new file mode 100644
index 00000000..6c22bfee
--- /dev/null
+++ b/deploy/kubernetes/mcp-memory.yaml
@@ -0,0 +1,148 @@
+---
+# MCP Memory Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mcp-memory
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-memory
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: mcp-memory
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: mcp-memory
+    spec:
+      containers:
+      - name: mcp-memory
+        image: context-engine:latest
+        imagePullPolicy: IfNotPresent
+        command: ["python", "/app/scripts/mcp_memory_server.py"]
+        ports:
+        - name: sse
+          containerPort: 8000
+          protocol: TCP
+        - name: health
+          containerPort: 18000
+          protocol: TCP
+        env:
+        - name: FASTMCP_HOST
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HOST
+        - name: FASTMCP_PORT
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_PORT
+        - name: FASTMCP_HEALTH_PORT
+          value: "18000"
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        - name: EMBEDDING_PROVIDER
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_PROVIDER
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "500m"
+          limits:
+            memory: "4Gi"
+            cpu: "2"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+          readOnly: true
+        livenessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 10
+          periodSeconds: 5
+      volumes:
+      - name: repos
+        hostPath:
+          path: /tmp/context-engine-repos
+          type: DirectoryOrCreate
+
+---
+# MCP Memory Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-memory
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-memory
+spec:
+  type: ClusterIP
+  ports:
+  - name: sse
+    port: 8000
+    targetPort: sse
+    protocol: TCP
+  - name: health
+    port: 18000
+    targetPort: health
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-memory
+
+---
+# Optional: MCP Memory External Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-memory-external
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: mcp-memory
+spec:
+  type: NodePort
+  ports:
+  - name: sse
+    port: 8000
+    targetPort: sse
+    nodePort: 30800
+    protocol: TCP
+  - name: health
+    port: 18000
+    targetPort: health
+    nodePort: 30801
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: mcp-memory
+
diff --git a/deploy/kubernetes/namespace.yaml b/deploy/kubernetes/namespace.yaml
new file mode 100644
index 00000000..b972df16
--- /dev/null
+++ b/deploy/kubernetes/namespace.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: context-engine
+  labels:
+    name: context-engine
+    app: context-engine
diff --git a/deploy/kubernetes/qdrant.yaml b/deploy/kubernetes/qdrant.yaml
new file mode 100644
index 00000000..3330bcf1
--- /dev/null
+++ b/deploy/kubernetes/qdrant.yaml
@@ -0,0 +1,126 @@
+---
+# Qdrant StatefulSet
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: qdrant
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: qdrant
+spec:
+  serviceName: qdrant
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: qdrant
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: qdrant
+    spec:
+      containers:
+      - name: qdrant
+        image: qdrant/qdrant:latest
+        imagePullPolicy: IfNotPresent
+        ports:
+        - name: http
+          containerPort: 6333
+          protocol: TCP
+        - name: grpc
+          containerPort: 6334
+          protocol: TCP
+        env:
+        - name: QDRANT__SERVICE__HTTP_PORT
+          value: "6333"
+        - name: QDRANT__SERVICE__GRPC_PORT
+          value: "6334"
+        resources:
+          requests:
+            memory: "2Gi"
+            cpu: "1"
+          limits:
+            memory: "8Gi"
+            cpu: "4"
+        volumeMounts:
+        - name: qdrant-storage
+          mountPath: /qdrant/storage
+        livenessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /
+            port: http
+          initialDelaySeconds: 5
+          periodSeconds: 5
+  volumeClaimTemplates:
+  - metadata:
+      name: qdrant-storage
+      labels:
+        app: context-engine
+        component: qdrant
+    spec:
+      accessModes: ["ReadWriteOnce"]
+      storageClassName: standard  # Adjust based on your cluster
+      resources:
+        requests:
+          storage: 50Gi
+
+---
+# Qdrant Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: qdrant
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: qdrant
+spec:
+  type: ClusterIP
+  ports:
+  - name: http
+    port: 6333
+    targetPort: http
+    protocol: TCP
+  - name: grpc
+    port: 6334
+    targetPort: grpc
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: qdrant
+
+---
+# Optional: Qdrant External Service (for direct access)
+apiVersion: v1
+kind: Service
+metadata:
+  name: qdrant-external
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: qdrant
+spec:
+  type: NodePort
+  ports:
+  - name: http
+    port: 6333
+    targetPort: http
+    nodePort: 30333
+    protocol: TCP
+  - name: grpc
+    port: 6334
+    targetPort: grpc
+    nodePort: 30334
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: qdrant
+
diff --git a/docs/MULTI_REPO_COLLECTIONS.md b/docs/MULTI_REPO_COLLECTIONS.md
new file mode 100644
index 00000000..e43a5d60
--- /dev/null
+++ b/docs/MULTI_REPO_COLLECTIONS.md
@@ -0,0 +1,397 @@
+# Multi-Repository Collection Architecture
+
+## Overview
+
+Context Engine supports first-class multi-repository operation through a unified collection architecture. This enables:
+
+- **Single unified collection** (default: `codebase`) for seamless cross-repo search
+- **Per-repo metadata** for filtering and isolation when needed
+- **Remote deployment** on Kubernetes clusters with stronger hardware
+- **Minimal code changes** - existing single-repo workflows remain unchanged
+
+## Architecture Principles
+
+### 1. Unified Collection Model
+
+All repositories index into a **single shared collection** by default (`codebase`). This provides:
+
+- **Seamless cross-repo search**: Query across all your code at once
+- **Simplified management**: One collection to monitor and maintain
+- **Efficient resource usage**: Shared HNSW index and vector storage
+
+### 2. Per-Repository Metadata
+
+Each indexed chunk includes repository identification in its payload:
+
+```json
+{
+  "metadata": {
+    "repo": "my-backend-service",
+    "path": "/work/src/api/handler.py",
+    "host_path": "/Users/john/projects/backend/src/api/handler.py",
+    "container_path": "/work/src/api/handler.py",
+    "language": "python",
+    "kind": "function",
+    "symbol": "handle_request",
+    ...
+  }
+}
+```
+
+**Key metadata fields for multi-repo:**
+- `metadata.repo`: Logical repository name (auto-detected from git or folder name)
+- `metadata.path`: Container path (always starts with `/work`)
+- `metadata.host_path`: Original host filesystem path
+- `metadata.container_path`: Normalized container path for remote deployments
+
+### 3. Workspace State Management
+
+Each repository maintains its own `.codebase/state.json` file:
+
+```json
+{
+  "workspace_path": "/work",
+  "created_at": "2025-01-15T10:30:00",
+  "updated_at": "2025-01-15T14:22:00",
+  "qdrant_collection": "codebase",
+  "indexing_status": {
+    "state": "watching",
+    "started_at": "2025-01-15T14:20:00",
+    "progress": {
+      "files_processed": 1250,
+      "total_files": 1250
+    }
+  },
+  "last_activity": {
+    "timestamp": "2025-01-15T14:22:00",
+    "action": "indexed",
+    "file_path": "/work/src/main.py"
+  }
+}
+```
+
+## Collection Naming Strategy
+
+### Default: Unified Collection
+
+**Recommended for most users:**
+- Collection name: `codebase` (default)
+- All repositories share this collection
+- Filter by `metadata.repo` when you need repo-specific results
+
+**Benefits:**
+- Cross-repo search works out of the box
+- Simpler configuration
+- Better for monorepos and related microservices
+
+### Optional: Per-Repository Collections
+
+**Use when you need strict isolation:**
+- Set `COLLECTION_NAME=my-service-name` per repository
+- Each repo gets its own collection
+- Requires explicit collection parameter in MCP calls
+
+**Trade-offs:**
+- More collections to manage
+- Cross-repo search requires multiple queries
+- Higher memory overhead (separate HNSW indexes)
+
+## Remote Deployment Architecture
+
+### Kubernetes Deployment Model
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                     Kubernetes Cluster                       │
+│                                                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐     │
+│  │   Qdrant     │  │  Memory MCP  │  │ Indexer MCP  │     │
+│  │  (StatefulSet)│  │ (Deployment) │  │ (Deployment) │     │
+│  │  Port: 6333  │  │  Port: 8000  │  │  Port: 8001  │     │
+│  └──────────────┘  └──────────────┘  └──────────────┘     │
+│         │                  │                  │             │
+│         └──────────────────┴──────────────────┘             │
+│                            │                                │
+│  ┌─────────────────────────┴────────────────────────────┐  │
+│  │           Persistent Volume (repos)                   │  │
+│  │  /repos/backend/    /repos/frontend/   /repos/ml/    │  │
+│  └───────────────────────────────────────────────────────┘  │
+│                                                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐     │
+│  │   Watcher    │  │   Watcher    │  │   Watcher    │     │
+│  │  (backend)   │  │  (frontend)  │  │    (ml)      │     │
+│  │ (Deployment) │  │ (Deployment) │  │ (Deployment) │     │
+│  └──────────────┘  └──────────────┘  └──────────────┘     │
+│                                                              │
+│  ┌──────────────────────────────────────────────────────┐  │
+│  │              Uploader Pod (optional)                  │  │
+│  │  Accepts file uploads and writes to /repos volume    │  │
+│  └──────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+         │                    │                    │
+         │                    │                    │
+    ┌────▼────┐          ┌────▼────┐         ┌────▼────┐
+    │  Dev    │          │  Dev    │         │  Dev    │
+    │ Client  │          │ Client  │         │ Client  │
+    │   #1    │          │   #2    │         │   #3    │
+    └─────────┘          └─────────┘         └─────────┘
+```
+
+### Volume Structure
+
+```
+/repos/
+├── backend/
+│   ├── .codebase/
+│   │   └── state.json          # Collection: codebase, repo: backend
+│   ├── src/
+│   └── ...
+├── frontend/
+│   ├── .codebase/
+│   │   └── state.json          # Collection: codebase, repo: frontend
+│   ├── src/
+│   └── ...
+└── ml-service/
+    ├── .codebase/
+    │   └── state.json          # Collection: codebase, repo: ml-service
+    ├── models/
+    └── ...
+```
+
+## MCP Tool Collection Support
+
+All MCP tools accept an optional `collection` parameter:
+
+### Search Tools
+
+```python
+# Search across all repos in the unified collection
+await repo_search(
+    query="authentication handler",
+    limit=10
+)
+
+# Filter to specific repo
+await repo_search(
+    query="authentication handler",
+    limit=10,
+    # Use metadata filter (not collection param) for repo filtering
+    # Collection param is for switching between different Qdrant collections
+)
+
+# Search in a different collection (if using per-repo collections)
+await repo_search(
+    query="authentication handler",
+    collection="backend-service",
+    limit=10
+)
+```
+
+### Memory Tools
+
+```python
+# Store memory in default collection
+await memory_store(
+    information="Use JWT tokens for API authentication",
+    metadata={"kind": "memory", "topic": "auth", "repo": "backend"}
+)
+
+# Store in specific collection
+await memory_store(
+    information="Frontend uses OAuth2 flow",
+    metadata={"kind": "memory", "topic": "auth", "repo": "frontend"},
+    collection="codebase"
+)
+```
+
+### Indexing Tools
+
+```python
+# Index a specific workspace into the unified collection
+await qdrant_index_root(
+    collection="codebase"  # Optional, defaults to workspace state
+)
+
+# Index with explicit collection override
+await qdrant_index(
+    subdir="",
+    recreate=False,
+    collection="my-custom-collection"
+)
+```
+
+## Filtering by Repository
+
+Use Qdrant's payload filters to scope searches to specific repositories:
+
+```python
+# In hybrid_search.py or via MCP tools
+results = hybrid_search(
+    queries=["authentication"],
+    collection="codebase",
+    # Add repo filter via metadata
+    # (Implementation detail: tools should support repo= parameter)
+)
+```
+
+**Recommended enhancement:** Add `repo` parameter to search tools that translates to a payload filter on `metadata.repo`.
+
+## Workspace Discovery
+
+The `list_workspaces` function scans for all `.codebase/state.json` files:
+
+```python
+from scripts.workspace_state import list_workspaces
+
+workspaces = list_workspaces(search_root="/repos")
+# Returns:
+# [
+#   {
+#     "workspace_path": "/repos/backend",
+#     "collection_name": "codebase",
+#     "last_updated": "2025-01-15T14:22:00",
+#     "indexing_state": "watching"
+#   },
+#   {
+#     "workspace_path": "/repos/frontend",
+#     "collection_name": "codebase",
+#     "last_updated": "2025-01-15T14:20:00",
+#     "indexing_state": "idle"
+#   }
+# ]
+```
+
+## Migration Guide
+
+### From Single-Repo to Multi-Repo
+
+**No migration needed!** The default unified collection model works automatically:
+
+1. **Keep using `codebase` collection** (default)
+2. **Index additional repos** - they'll share the same collection
+3. **Filter by repo name** when you need repo-specific results
+
+### From Per-Repo Collections to Unified
+
+If you previously used separate collections per repo:
+
+1. **Create new unified collection:**
+   ```bash
+   COLLECTION_NAME=codebase make reindex
+   ```
+
+2. **Reindex all repositories** into the unified collection:
+   ```bash
+   for repo in backend frontend ml-service; do
+     HOST_INDEX_PATH=/path/to/$repo COLLECTION_NAME=codebase make index
+   done
+   ```
+
+3. **Update MCP client configs** to use `codebase` collection
+
+4. **Optional:** Delete old per-repo collections via Qdrant API
+
+## Best Practices
+
+### 1. Use Unified Collection by Default
+
+- Simplifies cross-repo search
+- Reduces operational overhead
+- Better for related codebases
+
+### 2. Set Meaningful Repo Names
+
+- Use `REPO_NAME` env var or rely on git repo name
+- Keep names consistent across environments
+- Use kebab-case: `backend-api`, `frontend-web`, `ml-training`
+
+### 3. Leverage Payload Indexes
+
+The indexer creates payload indexes on `metadata.repo` for efficient filtering:
+
+```python
+# Fast repo-scoped search (uses payload index)
+results = client.search(
+    collection_name="codebase",
+    query_vector=embedding,
+    query_filter=models.Filter(
+        must=[
+            models.FieldCondition(
+                key="metadata.repo",
+                match=models.MatchValue(value="backend-api")
+            )
+        ]
+    )
+)
+```
+
+### 4. Monitor Collection Health
+
+```bash
+# Check collection status
+make qdrant-status
+
+# List all collections
+make qdrant-list
+
+# Prune stale points
+make prune
+```
+
+### 5. Use Watchers Per Repository
+
+Deploy one watcher per repository in Kubernetes:
+
+```yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: watcher-backend
+spec:
+  replicas: 1
+  template:
+    spec:
+      containers:
+      - name: watcher
+        image: context-engine-indexer:latest
+        command: ["python", "/app/scripts/watch_index.py"]
+        env:
+        - name: WATCH_ROOT
+          value: "/repos/backend"
+        - name: COLLECTION_NAME
+          value: "codebase"
+        - name: REPO_NAME
+          value: "backend"
+        volumeMounts:
+        - name: repos
+          mountPath: /repos
+          subPath: backend
+```
+
+## Compatibility
+
+### Backward Compatibility
+
+All existing single-repo workflows continue to work:
+
+- Default collection name: `codebase`
+- Workspace state auto-created if missing
+- Collection parameter optional in all MCP tools
+- Existing Docker Compose setup unchanged
+
+### Forward Compatibility
+
+The architecture supports future enhancements:
+
+- Multi-collection queries (search across multiple collections)
+- Collection-level access control
+- Collection-specific embedding models
+- Cross-collection deduplication
+
+## See Also
+
+- [Kubernetes Deployment Guide](../deploy/kubernetes/README.md)
+- [MCP API Reference](MCP_API.md)
+- [Architecture Overview](ARCHITECTURE.md)
+- [Development Guide](DEVELOPMENT.md)
+

From f6ccd611b208244c873c6c88a2190763af7f42ea Mon Sep 17 00:00:00 2001
From: john donalson <mirlok@dr.com>
Date: Sun, 2 Nov 2025 21:59:27 -0500
Subject: [PATCH 3/5] Update kustomization.yaml

---
 deploy/kubernetes/kustomization.yaml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/deploy/kubernetes/kustomization.yaml b/deploy/kubernetes/kustomization.yaml
index e02aa13e..c9f3f016 100644
--- a/deploy/kubernetes/kustomization.yaml
+++ b/deploy/kubernetes/kustomization.yaml
@@ -26,10 +26,11 @@ resources:
   - ingress.yaml
 
 # Common labels
-commonLabels:
-  app.kubernetes.io/name: context-engine
-  app.kubernetes.io/component: kubernetes-deployment
-  app.kubernetes.io/managed-by: kustomize
+labels:
+  - pairs:
+      app.kubernetes.io/name: context-engine
+      app.kubernetes.io/component: kubernetes-deployment
+      app.kubernetes.io/managed-by: kustomize
 
 # Patches for production customization
 patchesStrategicMerge:

From 188855d4c852a65a0bcf1362f1f138a5ba9c4493 Mon Sep 17 00:00:00 2001
From: john donalson <mirlok@dr.com>
Date: Sun, 2 Nov 2025 22:08:07 -0500
Subject: [PATCH 4/5] fix

---
 deploy/kubernetes/indexer-services.yaml |  26 +-
 deploy/kubernetes/mcp-http.yaml         |  16 +-
 deploy/kubernetes/mcp-indexer.yaml      |   8 +-
 deploy/kubernetes/mcp-memory.yaml       |   8 +-
 scripts/k8s_uploader.py                 | 307 ++++++++++++++++++++++++
 scripts/mcp_indexer_server.py           |  15 +-
 6 files changed, 341 insertions(+), 39 deletions(-)
 create mode 100755 scripts/k8s_uploader.py

diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml
index 110d22ce..72e3920a 100644
--- a/deploy/kubernetes/indexer-services.yaml
+++ b/deploy/kubernetes/indexer-services.yaml
@@ -44,7 +44,7 @@ spec:
               name: context-engine-config
               key: EMBEDDING_MODEL
         - name: WATCH_ROOT
-          value: "/repos"
+          value: "/work"
         - name: QDRANT_TIMEOUT
           valueFrom:
             configMapKeyRef:
@@ -78,12 +78,12 @@ spec:
             memory: "2Gi"
             cpu: "1"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
 ---
@@ -134,13 +134,13 @@ spec:
             memory: "4Gi"
             cpu: "2"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
           readOnly: true
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
 ---
@@ -186,12 +186,12 @@ spec:
             memory: "1Gi"
             cpu: "500m"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
           readOnly: true
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
diff --git a/deploy/kubernetes/mcp-http.yaml b/deploy/kubernetes/mcp-http.yaml
index 7586f022..5abbe9d9 100644
--- a/deploy/kubernetes/mcp-http.yaml
+++ b/deploy/kubernetes/mcp-http.yaml
@@ -67,8 +67,8 @@ spec:
             memory: "4Gi"
             cpu: "2"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
           readOnly: true
         livenessProbe:
           httpGet:
@@ -83,9 +83,9 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
 ---
@@ -249,8 +249,8 @@ spec:
             memory: "4Gi"
             cpu: "2"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
         livenessProbe:
           httpGet:
             path: /readyz
@@ -264,9 +264,9 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
 ---
diff --git a/deploy/kubernetes/mcp-indexer.yaml b/deploy/kubernetes/mcp-indexer.yaml
index 5963a86a..c219df31 100644
--- a/deploy/kubernetes/mcp-indexer.yaml
+++ b/deploy/kubernetes/mcp-indexer.yaml
@@ -108,8 +108,8 @@ spec:
             memory: "4Gi"
             cpu: "2"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
         livenessProbe:
           httpGet:
             path: /readyz
@@ -123,9 +123,9 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
 ---
diff --git a/deploy/kubernetes/mcp-memory.yaml b/deploy/kubernetes/mcp-memory.yaml
index 6c22bfee..af213f24 100644
--- a/deploy/kubernetes/mcp-memory.yaml
+++ b/deploy/kubernetes/mcp-memory.yaml
@@ -73,8 +73,8 @@ spec:
             memory: "4Gi"
             cpu: "2"
         volumeMounts:
-        - name: repos
-          mountPath: /repos
+        - name: work
+          mountPath: /work
           readOnly: true
         livenessProbe:
           httpGet:
@@ -89,9 +89,9 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: repos
+      - name: work
         hostPath:
-          path: /tmp/context-engine-repos
+          path: /tmp/context-engine-work
           type: DirectoryOrCreate
 
 ---
diff --git a/scripts/k8s_uploader.py b/scripts/k8s_uploader.py
new file mode 100755
index 00000000..d4b1c7d3
--- /dev/null
+++ b/scripts/k8s_uploader.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+"""
+Kubernetes Repository Uploader
+
+Upload local repositories to a Kubernetes cluster running Context Engine
+and trigger indexing via the MCP Indexer API.
+
+Usage:
+    python scripts/k8s_uploader.py /path/to/repo --namespace context-engine
+    python scripts/k8s_uploader.py /path/to/repo --collection my-project --recreate
+    python scripts/k8s_uploader.py /path/to/repo --pod mcp-indexer-abc123 --skip-index
+"""
+
+import argparse
+import json
+import os
+import subprocess
+import sys
+import tarfile
+import tempfile
+import time
+from pathlib import Path
+from typing import Optional, Dict, Any, List
+
+
+def run_command(cmd: List[str], check: bool = True, capture: bool = True) -> subprocess.CompletedProcess:
+    """Run a shell command and return the result."""
+    print(f"Running: {' '.join(cmd)}")
+    try:
+        result = subprocess.run(
+            cmd,
+            check=check,
+            capture_output=capture,
+            text=True
+        )
+        return result
+    except subprocess.CalledProcessError as e:
+        print(f"Error running command: {e}")
+        if capture and e.stderr:
+            print(f"stderr: {e.stderr}")
+        raise
+
+
+def get_indexer_pod(namespace: str, pod_name: Optional[str] = None) -> str:
+    """Get the name of an MCP indexer pod."""
+    if pod_name:
+        return pod_name
+    
+    # Find a running mcp-indexer pod
+    result = run_command([
+        "kubectl", "get", "pods",
+        "-n", namespace,
+        "-l", "component=mcp-indexer",
+        "-o", "jsonpath={.items[0].metadata.name}"
+    ])
+    
+    pod = result.stdout.strip()
+    if not pod:
+        raise RuntimeError(f"No mcp-indexer pod found in namespace {namespace}")
+    
+    print(f"Using pod: {pod}")
+    return pod
+
+
+def create_tar_archive(source_path: Path, exclude_patterns: Optional[List[str]] = None) -> Path:
+    """Create a tar.gz archive of the source directory."""
+    if not source_path.exists():
+        raise FileNotFoundError(f"Source path does not exist: {source_path}")
+    
+    if not source_path.is_dir():
+        raise ValueError(f"Source path must be a directory: {source_path}")
+    
+    # Default exclusions
+    if exclude_patterns is None:
+        exclude_patterns = [
+            ".git",
+            ".codebase",
+            "__pycache__",
+            "*.pyc",
+            ".DS_Store",
+            "node_modules",
+            ".venv",
+            "venv",
+            ".env",
+            "*.log"
+        ]
+    
+    # Create temporary tar file
+    temp_dir = Path(tempfile.mkdtemp())
+    tar_path = temp_dir / f"{source_path.name}.tar.gz"
+    
+    print(f"Creating archive: {tar_path}")
+    print(f"Source: {source_path}")
+    print(f"Excluding: {', '.join(exclude_patterns)}")
+    
+    def should_exclude(path: Path) -> bool:
+        """Check if a path should be excluded."""
+        for pattern in exclude_patterns:
+            if pattern.startswith("*."):
+                # File extension pattern
+                if path.suffix == pattern[1:]:
+                    return True
+            elif path.name == pattern:
+                return True
+            elif pattern in str(path):
+                return True
+        return False
+    
+    with tarfile.open(tar_path, "w:gz") as tar:
+        for item in source_path.rglob("*"):
+            if should_exclude(item):
+                continue
+            
+            arcname = item.relative_to(source_path.parent)
+            try:
+                tar.add(item, arcname=arcname)
+            except Exception as e:
+                print(f"Warning: Could not add {item}: {e}")
+    
+    size_mb = tar_path.stat().st_size / (1024 * 1024)
+    print(f"Archive created: {tar_path} ({size_mb:.2f} MB)")
+    
+    return tar_path
+
+
+def upload_to_pod(tar_path: Path, namespace: str, pod_name: str, target_dir: str = "/work") -> str:
+    """Upload tar archive to a pod and extract it."""
+    repo_name = tar_path.stem.replace(".tar", "")
+    target_path = f"{target_dir}/{repo_name}"
+    
+    print(f"Uploading to pod {pod_name}:{target_path}")
+    
+    # Create target directory in pod
+    run_command([
+        "kubectl", "exec", "-n", namespace, pod_name, "--",
+        "mkdir", "-p", target_path
+    ])
+    
+    # Copy tar file to pod
+    temp_tar = f"/tmp/{tar_path.name}"
+    run_command([
+        "kubectl", "cp", str(tar_path),
+        f"{namespace}/{pod_name}:{temp_tar}"
+    ])
+    
+    # Extract in pod
+    print(f"Extracting archive in pod...")
+    run_command([
+        "kubectl", "exec", "-n", namespace, pod_name, "--",
+        "tar", "-xzf", temp_tar, "-C", target_dir
+    ])
+    
+    # Clean up temp tar in pod
+    run_command([
+        "kubectl", "exec", "-n", namespace, pod_name, "--",
+        "rm", temp_tar
+    ], check=False)
+    
+    print(f"Upload complete: {target_path}")
+    return target_path
+
+
+def trigger_indexing(
+    namespace: str,
+    pod_name: str,
+    repo_path: str,
+    collection: Optional[str] = None,
+    recreate: bool = False
+) -> Dict[str, Any]:
+    """Trigger indexing via the MCP indexer server."""
+    print(f"Triggering indexing for {repo_path}")
+
+    # Build Python command to call qdrant_index_root via MCP server
+    python_cmd = f"""
+import sys
+sys.path.insert(0, '/app')
+from scripts.mcp_indexer_server import qdrant_index_root
+import asyncio
+import json
+import os
+
+# Set working directory to the uploaded repo
+os.chdir('{repo_path}')
+
+# Call indexing
+result = asyncio.run(qdrant_index_root(
+    recreate={str(recreate)},
+    collection={repr(collection) if collection else 'None'}
+))
+print(json.dumps(result, indent=2))
+"""
+    
+    # Execute in pod
+    result = run_command([
+        "kubectl", "exec", "-n", namespace, pod_name, "--",
+        "python", "-c", python_cmd
+    ], check=False)
+
+    # Parse result
+    stdout = result.stdout
+    stderr = result.stderr
+    returncode = result.returncode
+
+    # Extract return code from output
+    for line in stdout.split("\n"):
+        if line.startswith("RETURNCODE:"):
+            try:
+                returncode = int(line.split(":", 1)[1].strip())
+            except:
+                pass
+
+    return {
+        "ok": returncode == 0,
+        "code": returncode,
+        "stdout": stdout,
+        "stderr": stderr,
+        "collection": collection or "codebase",
+        "repo_path": repo_path
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Upload repositories to Kubernetes Context Engine cluster",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Upload and index a repository
+  python scripts/k8s_uploader.py /path/to/my-repo
+
+  # Upload to specific namespace and collection
+  python scripts/k8s_uploader.py /path/to/my-repo --namespace prod --collection my-project
+
+  # Upload and recreate collection (drops existing data)
+  python scripts/k8s_uploader.py /path/to/my-repo --recreate
+
+  # Upload only (skip indexing)
+  python scripts/k8s_uploader.py /path/to/my-repo --skip-index
+
+  # Upload to specific pod
+  python scripts/k8s_uploader.py /path/to/my-repo --pod mcp-indexer-abc123
+        """
+    )
+    
+    parser.add_argument("source", type=str, help="Path to repository to upload")
+    parser.add_argument("--namespace", "-n", default="context-engine", help="Kubernetes namespace (default: context-engine)")
+    parser.add_argument("--pod", "-p", help="Specific pod name (default: auto-detect mcp-indexer pod)")
+    parser.add_argument("--collection", "-c", help="Qdrant collection name (default: codebase)")
+    parser.add_argument("--target-dir", default="/work", help="Target directory in pod (default: /work)")
+    parser.add_argument("--recreate", action="store_true", help="Recreate collection (drops existing data)")
+    parser.add_argument("--skip-index", action="store_true", help="Skip indexing after upload")
+    parser.add_argument("--exclude", action="append", help="Additional exclude patterns")
+    parser.add_argument("--keep-archive", action="store_true", help="Keep temporary archive file")
+    
+    args = parser.parse_args()
+    
+    source_path = Path(args.source).resolve()
+    
+    try:
+        # Get target pod
+        pod_name = get_indexer_pod(args.namespace, args.pod)
+        
+        # Create archive
+        tar_path = create_tar_archive(source_path, args.exclude)
+        
+        # Upload to pod
+        repo_path = upload_to_pod(tar_path, args.namespace, pod_name, args.target_dir)
+        
+        # Trigger indexing
+        if not args.skip_index:
+            result = trigger_indexing(
+                args.namespace,
+                pod_name,
+                repo_path,
+                args.collection,
+                args.recreate
+            )
+            
+            print("\n" + "="*60)
+            print("INDEXING RESULT:")
+            print("="*60)
+            print(json.dumps(result, indent=2))
+
+            if result.get("ok") and result.get("code") == 0:
+                print("\n[SUCCESS] Upload and indexing completed successfully!")
+            else:
+                print("\n[WARNING] Indexing completed with warnings or errors")
+                sys.exit(1)
+        else:
+            print("\n[SUCCESS] Upload completed successfully (indexing skipped)")
+        
+        # Clean up
+        if not args.keep_archive:
+            tar_path.unlink()
+            tar_path.parent.rmdir()
+            print(f"Cleaned up temporary archive")
+        else:
+            print(f"Archive kept at: {tar_path}")
+    
+    except Exception as e:
+        print(f"\n[ERROR] {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py
index 54eb9692..76a34173 100644
--- a/scripts/mcp_indexer_server.py
+++ b/scripts/mcp_indexer_server.py
@@ -290,17 +290,12 @@ def _default_collection() -> str:
 
 
 def _work_script(name: str) -> str:
-    """Return path to a script under /work if present, else local ./scripts.
-    Keeps Docker/default behavior but works in local dev without /work mount.
+    """Return path to a script under /app/scripts (container installation).
+
+    Scripts are always installed at /app/scripts in the container.
+    This is independent of where user repositories are mounted.
     """
-    try:
-        p = os.path.join("/work", "scripts", name)
-        if os.path.exists(p):
-            return p
-    except Exception as e:
-        logger.debug(f"Failed to locate script {name}: {e}")
-        pass
-    return os.path.join(os.getcwd(), "scripts", name)
+    return os.path.join("/app", "scripts", name)
 
 
 # Invalidate router scratchpad after reindex to avoid stale state reuse

From 9647f82011ec5f887dd281c076a887915c654efd Mon Sep 17 00:00:00 2001
From: john donalson <mirlok@dr.com>
Date: Sun, 2 Nov 2025 22:13:55 -0500
Subject: [PATCH 5/5] Update k8s_uploader.py

---
 scripts/k8s_uploader.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/scripts/k8s_uploader.py b/scripts/k8s_uploader.py
index d4b1c7d3..3f712682 100755
--- a/scripts/k8s_uploader.py
+++ b/scripts/k8s_uploader.py
@@ -170,20 +170,25 @@ def trigger_indexing(
     """Trigger indexing via the MCP indexer server."""
     print(f"Triggering indexing for {repo_path}")
 
-    # Build Python command to call qdrant_index_root via MCP server
+    # Build Python command to call qdrant_index via MCP server
+    # Use qdrant_index with subdir parameter to index specific repo
     python_cmd = f"""
 import sys
 sys.path.insert(0, '/app')
-from scripts.mcp_indexer_server import qdrant_index_root
+from scripts.mcp_indexer_server import qdrant_index
 import asyncio
 import json
-import os
 
-# Set working directory to the uploaded repo
-os.chdir('{repo_path}')
+# Extract subdir from repo_path (e.g., /work/test-repo -> test-repo)
+repo_path = '{repo_path}'
+if repo_path.startswith('/work/'):
+    subdir = repo_path[6:]  # Remove '/work/' prefix
+else:
+    subdir = repo_path
 
 # Call indexing
-result = asyncio.run(qdrant_index_root(
+result = asyncio.run(qdrant_index(
+    subdir=subdir,
     recreate={str(recreate)},
     collection={repr(collection) if collection else 'None'}
 ))