From 598913c24b952bce320a57efebf37b0ad4463fe8 Mon Sep 17 00:00:00 2001 From: john donalson Date: Sun, 2 Nov 2025 17:17:20 -0500 Subject: [PATCH 1/5] push --- .env | 6 +- .env.example | 4 +- .gitignore | 2 + README.md | 91 ++++++++++- docker-compose.yml | 14 +- scripts/add_vector_name.py | 2 +- scripts/collection_health.py | 286 ++++++++++++++++++++++++++++++++++ scripts/create_indexes.py | 6 +- scripts/health_check.py | 2 +- scripts/hybrid_search.py | 2 +- scripts/ingest_code.py | 26 +++- scripts/ingest_history.py | 2 +- scripts/mcp_indexer_server.py | 16 +- scripts/mcp_memory_server.py | 2 +- scripts/prune.py | 2 +- scripts/query_named_vector.py | 2 +- scripts/rerank_local.py | 2 +- scripts/rerank_query.py | 2 +- scripts/semantic_expansion.py | 2 +- scripts/smoke_test.py | 2 +- scripts/warm_start.py | 2 +- scripts/watch_index.py | 16 +- scripts/workspace_state.py | 43 ++--- 23 files changed, 461 insertions(+), 73 deletions(-) create mode 100644 scripts/collection_health.py diff --git a/.env b/.env index 9a644ace..cf0957a1 100644 --- a/.env +++ b/.env @@ -3,8 +3,10 @@ QDRANT_URL=http://qdrant:6333 # QDRANT_API_KEY= # not needed for local -# Default collection used by the MCP server (auto-created if missing) -# COLLECTION_NAME=my-collection # Use auto-detected default from .codebase/state.json +# Single unified collection for seamless cross-repo search +# Default: "codebase" - all your code in one collection for unified search +# This enables searching across multiple repos/workspaces without fragmentation +COLLECTION_NAME=codebase # Embedding settings (FastEmbed model) EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 diff --git a/.env.example b/.env.example index f16118f2..87c7e330 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,9 @@ # Qdrant connection QDRANT_URL=http://localhost:6333 QDRANT_API_KEY= -COLLECTION_NAME=my-collection +# Single unified collection for seamless cross-repo search (default: "codebase") +# Leave unset or use "codebase" for unified search across all your code +COLLECTION_NAME=codebase # Embeddings EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 diff --git a/.gitignore b/.gitignore index 15613865..06319ff1 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ tests/.codebase/cache.json tests/.codebase/state.json /scripts/.codebase /tests/.codebase +.claude/settings.local.json +.mcp.json diff --git a/README.md b/README.md index dd468441..e748df1f 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,13 @@ INDEX_MICRO_CHUNKS=1 MAX_MICRO_CHUNKS_PER_FILE=200 make reset-dev-dual ``` - Default ports: Memory MCP :8000, Indexer MCP :8001, Qdrant :6333, llama.cpp :8080 +**🎯 Seamless Setup Note:** +- The stack uses a **single unified `codebase` collection** by default +- All your code goes into one collection for seamless cross-repo search +- No per-workspace fragmentation - search across everything at once +- Health checks auto-detect and fix cache/collection sync issues +- Just run `make reset-dev-dual` on any machine and it worksβ„’ + ### Make targets: SSE, RMCP, and dual-compat - Legacy SSE only (default): - Ports: 8000 (/sse), 8001 (/sse) @@ -96,9 +103,10 @@ INDEX_MICRO_CHUNKS=1 MAX_MICRO_CHUNKS_PER_FILE=200 make reset-dev-dual GLM_MODEL=glm-4.6 # Optional, defaults to glm-4.6 ``` -5. **Custom collection name**: +5. **Collection name** (unified by default): ```bash - COLLECTION_NAME=my-project # Defaults to auto-detected repo name + COLLECTION_NAME=codebase # Default: single unified collection for all code + # Only change this if you need isolated collections per project ``` **After changing `.env`:** @@ -280,7 +288,7 @@ Ports | Name | Description | Default | |------|-------------|---------| -| COLLECTION_NAME | Qdrant collection name used by both servers | my-collection | +| COLLECTION_NAME | Qdrant collection name (unified across all repos) | codebase | | REPO_NAME | Logical repo tag stored in payload for filtering | auto-detect from git/folder | | HOST_INDEX_PATH | Host path mounted at /work in containers | current repo (.) | | QDRANT_URL | Qdrant base URL | container: http://qdrant:6333; local: http://localhost:6333 | @@ -763,6 +771,50 @@ Notes: - Named vector remains aligned with the MCP server (fast-bge-base-en-v1.5). If you change EMBEDDING_MODEL, run `make reindex` to recreate the collection. - For very large repos, consider running `make index` on a schedule (or pre-commit) to keep Qdrant warm without full reingestion. +### Multi-repo indexing (unified search) + +The stack uses a **single unified `codebase` collection** by default, making multi-repo search seamless: + +**Index another repo into the same collection:** +```bash +# From your qdrant directory +make index-here HOST_INDEX_PATH=/path/to/other/repo REPO_NAME=other-repo + +# Or with full control: +HOST_INDEX_PATH=/path/to/other/repo \ +COLLECTION_NAME=codebase \ +REPO_NAME=other-repo \ +docker compose run --rm indexer --root /work +``` + +**What happens:** +- Files from the other repo get indexed into the unified `codebase` collection +- Each file is tagged with `metadata.repo = "other-repo"` for filtering +- Search across all repos by default, or filter by specific repo + +**Search examples:** +```bash +# Search across all indexed repos +make hybrid QUERY="authentication logic" + +# Filter by specific repo +python scripts/hybrid_search.py \ + --query "authentication logic" \ + --repo other-repo + +# Filter by repo + language +python scripts/hybrid_search.py \ + --query "authentication logic" \ + --repo other-repo \ + --language python +``` + +**Benefits:** +- One collection = unified search across all your code +- No fragmentation or collection management overhead +- Filter by repo when you need isolation +- All repos share the same vector space for better semantic search + ### Multi-query re-ranker (no new deps) - Run a fused query with several phrasings and metadata-aware boosts: @@ -1296,6 +1348,39 @@ Client tips: ## Troubleshooting +### Collection Health & Cache Sync + +The stack includes automatic health checks that detect and fix cache/collection sync issues: + +**Check collection health:** +```bash +python scripts/collection_health.py --workspace . --collection codebase +``` + +**Auto-heal cache issues:** +```bash +python scripts/collection_health.py --workspace . --collection codebase --auto-heal +``` + +**What it detects:** +- Empty collection with cached files (cache thinks files are indexed but they're not) +- Significant mismatch between cached files and actual collection contents +- Missing metadata in collection points + +**When to use:** +- After manually deleting collections +- If searches return no results despite indexing +- After Qdrant crashes or data loss +- When switching between collection names + +**Automatic healing:** +- Health checks run automatically on watcher and indexer startup +- Cache is cleared when sync issues are detected +- Files are reindexed on next run + +### General Issues + - If the MCP servers can’t reach Qdrant, confirm both containers are up: `make ps`. - If the SSE port collides, change `FASTMCP_PORT` in `.env` and the mapped port in `docker-compose.yml`. - If you customize tool descriptions, restart: `make restart`. +- If searches return no results, check collection health (see above). diff --git a/docker-compose.yml b/docker-compose.yml index ccb39f5c..cae91e27 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,7 +24,7 @@ services: - FASTMCP_HOST=${FASTMCP_HOST} - FASTMCP_PORT=${FASTMCP_PORT} - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - EMBEDDING_MODEL=${EMBEDDING_MODEL} - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER} - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION} @@ -60,7 +60,7 @@ services: - USE_GPU_DECODER=${USE_GPU_DECODER:-0} - LLAMACPP_TIMEOUT_SEC=${LLAMACPP_TIMEOUT_SEC:-180} - CTX_REQUIRE_IDENTIFIER=${CTX_REQUIRE_IDENTIFIER:-0} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - EMBEDDING_MODEL=${EMBEDDING_MODEL} # SSE endpoint for IDE agents at http://localhost:${FASTMCP_INDEXER_PORT:-8001}/sse ports: @@ -84,7 +84,7 @@ services: - FASTMCP_PORT=8000 - FASTMCP_TRANSPORT=${FASTMCP_HTTP_TRANSPORT} - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - EMBEDDING_MODEL=${EMBEDDING_MODEL} - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER} - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION} @@ -119,7 +119,7 @@ services: - USE_GPU_DECODER=${USE_GPU_DECODER:-0} - LLAMACPP_TIMEOUT_SEC=${LLAMACPP_TIMEOUT_SEC:-180} - CTX_REQUIRE_IDENTIFIER=${CTX_REQUIRE_IDENTIFIER:-0} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - EMBEDDING_MODEL=${EMBEDDING_MODEL} # Streamable HTTP endpoint for IDE agents at http://localhost:${FASTMCP_INDEXER_HTTP_PORT:-8003}/mcp/ ports: @@ -186,7 +186,7 @@ services: - .env environment: - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - EMBEDDING_MODEL=${EMBEDDING_MODEL} working_dir: /work volumes: @@ -205,7 +205,7 @@ services: - .env environment: - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - EMBEDDING_MODEL=${EMBEDDING_MODEL} - WATCH_ROOT=/work # Watcher-specific backpressure & timeouts (safer defaults) @@ -231,7 +231,7 @@ services: - .env environment: - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-my-collection} + - COLLECTION_NAME=${COLLECTION_NAME:-codebase} working_dir: /work volumes: - ${HOST_INDEX_PATH:-.}:/work:ro diff --git a/scripts/add_vector_name.py b/scripts/add_vector_name.py index c5769e96..81b79c2c 100644 --- a/scripts/add_vector_name.py +++ b/scripts/add_vector_name.py @@ -3,7 +3,7 @@ from qdrant_client import QdrantClient, models QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") cli = QdrantClient(url=QDRANT_URL) diff --git a/scripts/collection_health.py b/scripts/collection_health.py new file mode 100644 index 00000000..55d53b3b --- /dev/null +++ b/scripts/collection_health.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python3 +""" +Collection health monitoring and self-healing for cache/collection sync issues. + +Detects when the local cache is out of sync with the actual Qdrant collection +and triggers corrective actions (cache clear + reindex). +""" +import os +import sys +from pathlib import Path +from typing import Optional, Dict, Any +import logging + +# Ensure project root is on sys.path +ROOT_DIR = Path(__file__).resolve().parent.parent +if str(ROOT_DIR) not in sys.path: + sys.path.insert(0, str(ROOT_DIR)) + +from scripts.workspace_state import ( + _read_cache, + _write_cache, + get_workspace_state, + update_workspace_state, +) + +logger = logging.getLogger(__name__) + + +def get_cached_files_count(workspace_path: str) -> int: + """Return the number of files tracked in the local cache.""" + try: + cache = _read_cache(workspace_path) + file_hashes = cache.get("file_hashes", {}) + return len(file_hashes) + except Exception as e: + logger.warning(f"Failed to read cache: {e}") + return 0 + + +def get_collection_points_count(collection_name: str, qdrant_url: Optional[str] = None) -> int: + """Return the number of points in the Qdrant collection.""" + try: + from qdrant_client import QdrantClient + + url = qdrant_url or os.environ.get("QDRANT_URL", "http://localhost:6333") + api_key = os.environ.get("QDRANT_API_KEY") + + client = QdrantClient( + url=url, + api_key=api_key or None, + timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20), + ) + + result = client.count(collection_name=collection_name, exact=True) + return int(getattr(result, "count", 0)) + except Exception as e: + logger.warning(f"Failed to get collection count: {e}") + return -1 + + +def get_unique_files_in_collection(collection_name: str, qdrant_url: Optional[str] = None) -> int: + """Return the number of unique files (distinct paths) in the collection.""" + try: + from qdrant_client import QdrantClient + from qdrant_client import models + + url = qdrant_url or os.environ.get("QDRANT_URL", "http://localhost:6333") + api_key = os.environ.get("QDRANT_API_KEY") + + client = QdrantClient( + url=url, + api_key=api_key or None, + timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20), + ) + + # Scroll through all points and collect unique paths + unique_paths = set() + offset = None + batch_size = 100 + + while True: + points, offset = client.scroll( + collection_name=collection_name, + limit=batch_size, + offset=offset, + with_payload=True, + ) + + if not points: + break + + for point in points: + try: + payload = point.payload or {} + metadata = payload.get("metadata", {}) + path = metadata.get("path") + if path: + unique_paths.add(str(path)) + except Exception: + continue + + if offset is None: + break + + return len(unique_paths) + except Exception as e: + logger.warning(f"Failed to count unique files: {e}") + return -1 + + +def clear_cache(workspace_path: str) -> bool: + """Clear the local file hash cache.""" + try: + cache = {"file_hashes": {}, "updated_at": ""} + _write_cache(workspace_path, cache) + logger.info(f"Cleared cache for workspace: {workspace_path}") + return True + except Exception as e: + logger.error(f"Failed to clear cache: {e}") + return False + + +def detect_collection_health( + workspace_path: str, + collection_name: str, + qdrant_url: Optional[str] = None, + threshold: float = 0.1, +) -> Dict[str, Any]: + """ + Detect cache/collection sync issues. + + Returns a dict with: + - healthy: bool + - cached_files: int + - collection_points: int + - unique_files_in_collection: int + - issue: Optional[str] - description of the problem + - recommendation: Optional[str] - suggested fix + """ + cached_count = get_cached_files_count(workspace_path) + points_count = get_collection_points_count(collection_name, qdrant_url) + unique_files = get_unique_files_in_collection(collection_name, qdrant_url) + + result = { + "healthy": True, + "cached_files": cached_count, + "collection_points": points_count, + "unique_files_in_collection": unique_files, + "issue": None, + "recommendation": None, + } + + # Check 1: Collection is empty but cache has entries + if points_count == 0 and cached_count > 0: + result["healthy"] = False + result["issue"] = f"Collection is empty but cache has {cached_count} files" + result["recommendation"] = "Clear cache and force reindex" + return result + + # Check 2: Unique files in collection is way less than cached files + if unique_files >= 0 and cached_count > 0: + ratio = unique_files / cached_count if cached_count > 0 else 0 + if ratio < threshold: + result["healthy"] = False + result["issue"] = ( + f"Cache has {cached_count} files but collection only has {unique_files} " + f"unique files ({ratio:.1%} < {threshold:.0%} threshold)" + ) + result["recommendation"] = "Clear cache and force reindex" + return result + + # Check 3: Collection has points but no unique files detected (metadata issue) + if points_count > 0 and unique_files == 0: + result["healthy"] = False + result["issue"] = f"Collection has {points_count} points but no valid file paths in metadata" + result["recommendation"] = "Recreate collection with proper metadata" + return result + + return result + + +def auto_heal_if_needed( + workspace_path: str, + collection_name: str, + qdrant_url: Optional[str] = None, + dry_run: bool = False, +) -> Dict[str, Any]: + """ + Detect and automatically fix cache/collection sync issues. + + Returns a dict with: + - action_taken: str + - health_check: Dict (from detect_collection_health) + """ + health = detect_collection_health(workspace_path, collection_name, qdrant_url) + + result = { + "action_taken": "none", + "health_check": health, + } + + if not health["healthy"]: + logger.warning(f"Collection health issue detected: {health['issue']}") + logger.info(f"Recommendation: {health['recommendation']}") + + if not dry_run: + if "Clear cache" in health["recommendation"]: + if clear_cache(workspace_path): + result["action_taken"] = "cleared_cache" + logger.info("Cache cleared. Reindex required.") + else: + result["action_taken"] = "clear_cache_failed" + else: + result["action_taken"] = "dry_run" + logger.info("Dry run mode - no action taken") + else: + logger.info("Collection health check passed") + + return result + + +def main(): + """CLI for health checking and healing.""" + import argparse + + parser = argparse.ArgumentParser(description="Check and heal collection health") + parser.add_argument( + "--workspace", + default=os.environ.get("WATCH_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work", + help="Workspace path (default: WATCH_ROOT or /work)", + ) + parser.add_argument( + "--collection", + default=os.environ.get("COLLECTION_NAME", "codebase"), + help="Collection name (default: COLLECTION_NAME env or codebase)", + ) + parser.add_argument( + "--qdrant-url", + default=os.environ.get("QDRANT_URL", "http://localhost:6333"), + help="Qdrant URL (default: QDRANT_URL env or http://localhost:6333)", + ) + parser.add_argument( + "--auto-heal", + action="store_true", + help="Automatically fix issues (clear cache if needed)", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Check health but don't take action", + ) + + args = parser.parse_args() + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + ) + + if args.auto_heal: + result = auto_heal_if_needed( + args.workspace, + args.collection, + args.qdrant_url, + dry_run=args.dry_run, + ) + print(f"\nAction taken: {result['action_taken']}") + else: + health = detect_collection_health( + args.workspace, + args.collection, + args.qdrant_url, + ) + print(f"\nHealth check results:") + print(f" Healthy: {health['healthy']}") + print(f" Cached files: {health['cached_files']}") + print(f" Collection points: {health['collection_points']}") + print(f" Unique files in collection: {health['unique_files_in_collection']}") + if not health['healthy']: + print(f" Issue: {health['issue']}") + print(f" Recommendation: {health['recommendation']}") + + +if __name__ == "__main__": + main() + diff --git a/scripts/create_indexes.py b/scripts/create_indexes.py index 74793a90..c0f3ff62 100644 --- a/scripts/create_indexes.py +++ b/scripts/create_indexes.py @@ -11,12 +11,12 @@ update_last_activity = None # type: ignore get_collection_name = None # type: ignore -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") # Discover workspace path for state updates (allows subdir indexing) WS_PATH = os.environ.get("INDEX_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work" -# Prefer per-workspace unique collection if none provided -if (COLLECTION == "my-collection") and ('get_collection_name' in globals()) and get_collection_name: +# Use workspace state to get collection name (defaults to "codebase") +if 'get_collection_name' in globals() and get_collection_name: try: COLLECTION = get_collection_name(WS_PATH) except Exception: diff --git a/scripts/health_check.py b/scripts/health_check.py index b856a1d0..3a0137e7 100644 --- a/scripts/health_check.py +++ b/scripts/health_check.py @@ -27,7 +27,7 @@ def assert_true(cond: bool, msg: str): def main(): qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333") api_key = os.environ.get("QDRANT_API_KEY") - collection = os.environ.get("COLLECTION_NAME", "my-collection") + collection = os.environ.get("COLLECTION_NAME", "codebase") model_name = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") print(f"Health check -> {qdrant_url} collection={collection} model={model_name}") diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py index cc6ed743..6dd1b25d 100644 --- a/scripts/hybrid_search.py +++ b/scripts/hybrid_search.py @@ -42,7 +42,7 @@ def _collection() -> str: - return os.environ.get("COLLECTION_NAME", "my-collection") + return os.environ.get("COLLECTION_NAME", "codebase") MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") diff --git a/scripts/ingest_code.py b/scripts/ingest_code.py index a8b47153..f3289c49 100644 --- a/scripts/ingest_code.py +++ b/scripts/ingest_code.py @@ -1911,14 +1911,12 @@ def index_repo( if vector_name is None: vector_name = _sanitize_vector_name(model_name) - # Workspace state: ensure unique per-workspace collection and announce start + # Workspace state: use single unified collection for seamless cross-repo search try: ws_path = str(root) - # If collection is unset or default placeholder, generate a per-workspace one + # Always use the unified collection (default: "codebase") if 'get_collection_name' in globals() and get_collection_name: - default_marker = os.environ.get("COLLECTION_NAME", "my-collection") - if (not collection) or (collection == "my-collection") or (default_marker == "my-collection"): - collection = get_collection_name(ws_path) + collection = get_collection_name(ws_path) if update_workspace_state: update_workspace_state(ws_path, {"qdrant_collection": collection}) if update_indexing_status: @@ -1937,6 +1935,22 @@ def index_repo( print( f"Indexing root={root} -> {qdrant_url} collection={collection} model={model_name} recreate={recreate}" ) + + # Health check: detect cache/collection sync issues before indexing + if not recreate and skip_unchanged: + try: + from scripts.collection_health import auto_heal_if_needed + print("[health_check] Checking collection health...") + heal_result = auto_heal_if_needed(str(root), collection, qdrant_url, dry_run=False) + if heal_result["action_taken"] == "cleared_cache": + print("[health_check] Cache cleared due to sync issue - forcing full reindex") + elif not heal_result["health_check"]["healthy"]: + print(f"[health_check] Issue detected: {heal_result['health_check']['issue']}") + else: + print("[health_check] Collection health OK") + except Exception as e: + print(f"[health_check] Warning: health check failed: {e}") + if recreate: recreate_collection(client, collection, dim, vector_name) else: @@ -2387,7 +2401,7 @@ def main(): qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333") api_key = os.environ.get("QDRANT_API_KEY") - collection = os.environ.get("COLLECTION_NAME", "my-collection") + collection = os.environ.get("COLLECTION_NAME", "codebase") model_name = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") index_repo( diff --git a/scripts/ingest_history.py b/scripts/ingest_history.py index 4c6f83c2..99645386 100644 --- a/scripts/ingest_history.py +++ b/scripts/ingest_history.py @@ -11,7 +11,7 @@ from qdrant_client import QdrantClient, models from fastembed import TextEmbedding -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") API_KEY = os.environ.get("QDRANT_API_KEY") diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py index 30739686..54eb9692 100644 --- a/scripts/mcp_indexer_server.py +++ b/scripts/mcp_indexer_server.py @@ -13,7 +13,7 @@ - FASTMCP_HOST (default: 0.0.0.0) - FASTMCP_INDEXER_PORT (default: 8001) - QDRANT_URL (e.g., http://qdrant:6333) β€” server expects Qdrant reachable via this env -- COLLECTION_NAME (default: my-collection) +- COLLECTION_NAME (default: codebase) β€” unified collection for seamless cross-repo search Conventions: - Repo content must be mounted at /work inside containers @@ -225,7 +225,7 @@ def _score(token: str) -> int: QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MAX_LOG_TAIL = safe_int( os.environ.get("MCP_MAX_LOG_TAIL", "4000"), default=4000, @@ -827,13 +827,13 @@ async def qdrant_index_root( except Exception: pass - # Resolve collection: prefer explicit non-placeholder; otherwise workspace state + # Resolve collection: prefer explicit value; otherwise use workspace state try: _c = (collection or "").strip() except Exception: _c = "" - _placeholders = {"", "my-collection"} - if _c and _c not in _placeholders: + # Empty string means use workspace state default (codebase) + if _c: coll = _c else: try: @@ -1241,13 +1241,13 @@ async def qdrant_index( if not (real_root == "/work" or real_root.startswith("/work/")): return {"ok": False, "error": "subdir escapes /work sandbox"} root = real_root - # Resolve collection: prefer explicit non-placeholder; otherwise workspace state (use workspace root) + # Resolve collection: prefer explicit value; otherwise use workspace state (use workspace root) try: _c2 = (collection or "").strip() except Exception: _c2 = "" - _placeholders2 = {"", "my-collection"} - if _c2 and _c2 not in _placeholders2: + # Empty string means use workspace state default (codebase) + if _c2: coll = _c2 else: try: diff --git a/scripts/mcp_memory_server.py b/scripts/mcp_memory_server.py index 03f05459..5a782d82 100644 --- a/scripts/mcp_memory_server.py +++ b/scripts/mcp_memory_server.py @@ -9,7 +9,7 @@ # Env QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") LEX_VECTOR_NAME = os.environ.get("LEX_VECTOR_NAME", "lex") LEX_VECTOR_DIM = int(os.environ.get("LEX_VECTOR_DIM", "4096") or 4096) EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") diff --git a/scripts/prune.py b/scripts/prune.py index 50405dfb..05db0cab 100755 --- a/scripts/prune.py +++ b/scripts/prune.py @@ -6,7 +6,7 @@ from qdrant_client import QdrantClient, models -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") API_KEY = os.environ.get("QDRANT_API_KEY") ROOT = Path(os.environ.get("PRUNE_ROOT", ".")).resolve() diff --git a/scripts/query_named_vector.py b/scripts/query_named_vector.py index 308f67f2..0c5dd18b 100644 --- a/scripts/query_named_vector.py +++ b/scripts/query_named_vector.py @@ -4,7 +4,7 @@ from qdrant_client import QdrantClient QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") VEC_NAME = "fast-bge-base-en-v1.5" diff --git a/scripts/rerank_local.py b/scripts/rerank_local.py index de5a8633..56fe7453 100644 --- a/scripts/rerank_local.py +++ b/scripts/rerank_local.py @@ -14,7 +14,7 @@ ort = None Tokenizer = None -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") QDRANT_URL = os.environ.get("QDRANT_URL", "http://localhost:6333") API_KEY = os.environ.get("QDRANT_API_KEY") diff --git a/scripts/rerank_query.py b/scripts/rerank_query.py index 7f226d63..3efb4b36 100644 --- a/scripts/rerank_query.py +++ b/scripts/rerank_query.py @@ -11,7 +11,7 @@ # Env configuration QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") # Quick-win boosts diff --git a/scripts/semantic_expansion.py b/scripts/semantic_expansion.py index 83625289..a8e94fa0 100644 --- a/scripts/semantic_expansion.py +++ b/scripts/semantic_expansion.py @@ -229,7 +229,7 @@ def expand_queries_semantically( model = TextEmbedding(model_name=model_name) if collection is None: - collection = os.environ.get("COLLECTION_NAME", "my-collection") + collection = os.environ.get("COLLECTION_NAME", "codebase") # If we don't have the required components, fall back to lexical expansion if not (client and model): diff --git a/scripts/smoke_test.py b/scripts/smoke_test.py index bbdfffc8..02174687 100644 --- a/scripts/smoke_test.py +++ b/scripts/smoke_test.py @@ -4,7 +4,7 @@ from qdrant_client import QdrantClient QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") # Derive the named vector consistently with ingest_code diff --git a/scripts/warm_start.py b/scripts/warm_start.py index bcd76c6d..70be8b37 100644 --- a/scripts/warm_start.py +++ b/scripts/warm_start.py @@ -38,7 +38,7 @@ def main(): args = parser.parse_args() QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") - COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") + COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") print( diff --git a/scripts/watch_index.py b/scripts/watch_index.py index 6052441d..ab503f61 100644 --- a/scripts/watch_index.py +++ b/scripts/watch_index.py @@ -34,7 +34,7 @@ import scripts.ingest_code as idx QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333") -COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection") +COLLECTION = os.environ.get("COLLECTION_NAME", "codebase") MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5") ROOT = Path(os.environ.get("WATCH_ROOT", "/work")).resolve() @@ -474,6 +474,20 @@ def main(): f"Watch mode: root={ROOT} qdrant={QDRANT_URL} collection={COLLECTION} model={MODEL}" ) + # Health check: detect and auto-heal cache/collection sync issues + try: + from scripts.collection_health import auto_heal_if_needed + print("[health_check] Checking collection health...") + heal_result = auto_heal_if_needed(str(ROOT), COLLECTION, QDRANT_URL, dry_run=False) + if heal_result["action_taken"] == "cleared_cache": + print("[health_check] Cache cleared due to sync issue - files will be reindexed") + elif not heal_result["health_check"]["healthy"]: + print(f"[health_check] Issue detected: {heal_result['health_check']['issue']}") + else: + print("[health_check] Collection health OK") + except Exception as e: + print(f"[health_check] Warning: health check failed: {e}") + client = QdrantClient( url=QDRANT_URL, timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20) ) diff --git a/scripts/workspace_state.py b/scripts/workspace_state.py index f1a10418..dfa6b4fb 100644 --- a/scripts/workspace_state.py +++ b/scripts/workspace_state.py @@ -217,11 +217,9 @@ def get_workspace_state(workspace_path: str) -> WorkspaceState: # Create new state now = datetime.now().isoformat() - env_coll = os.environ.get("COLLECTION_NAME") - if isinstance(env_coll, str) and env_coll.strip() and env_coll.strip() != "my-collection": - collection_name = env_coll.strip() - else: - collection_name = _generate_collection_name(workspace_path) + env_coll = os.environ.get("COLLECTION_NAME", "").strip() + # Use env var if set, otherwise default to "codebase" + collection_name = env_coll if env_coll else "codebase" state: WorkspaceState = { "workspace_path": str(Path(workspace_path).resolve()), @@ -284,33 +282,18 @@ def update_qdrant_stats(workspace_path: str, stats: QdrantStats) -> WorkspaceSta def get_collection_name(workspace_path: str) -> str: """Get the Qdrant collection name for a workspace. - If none is present in state, persist either COLLECTION_NAME from env or a generated - repoName- based on the workspace path, and return it. - Fix: treat placeholders as not-real so we don't collide across repos. - Placeholders include: empty string, "my-collection", and the env default if it equals "my-collection". - Only short-circuit when the stored name is already real. + Seamless single-collection mode: + - Defaults to "codebase" for unified cross-repo search + - All your code goes into one collection + - Override via COLLECTION_NAME env var if you need isolation """ - state = get_workspace_state(workspace_path) - coll = state.get("qdrant_collection") if isinstance(state, dict) else None - env_coll = os.environ.get("COLLECTION_NAME") - env_coll = env_coll.strip() if isinstance(env_coll, str) else "" - placeholders = {"", "my-collection"} - # If env is explicitly the default placeholder, consider it a placeholder too - if env_coll == "my-collection": - placeholders.add(env_coll) - - # If state has a real (non-placeholder) collection, keep it - if isinstance(coll, str): - c = coll.strip() - if c and c not in placeholders: - return c - - # Otherwise, prefer a non-placeholder explicit env override; else generate - if env_coll and env_coll not in placeholders: - coll = env_coll.strip() - else: - coll = _generate_collection_name(workspace_path) + env_coll = os.environ.get("COLLECTION_NAME", "").strip() + + # Use env var if set, otherwise default to unified "codebase" collection + coll = env_coll if env_coll else "codebase" + + # Persist to state for consistency update_workspace_state(workspace_path, {"qdrant_collection": coll}) return coll From 0e7f93daa8734f169a101d82153ebac1b972342a Mon Sep 17 00:00:00 2001 From: john donalson Date: Sun, 2 Nov 2025 21:57:02 -0500 Subject: [PATCH 2/5] add kubernetes support --- Dockerfile | 38 ++ deploy/kubernetes/Makefile | 199 ++++++++++ deploy/kubernetes/README.md | 472 ++++++++++++++++++++++++ deploy/kubernetes/cleanup.sh | 163 ++++++++ deploy/kubernetes/configmap.yaml | 75 ++++ deploy/kubernetes/deploy.sh | 250 +++++++++++++ deploy/kubernetes/indexer-services.yaml | 197 ++++++++++ deploy/kubernetes/ingress.yaml | 86 +++++ deploy/kubernetes/kustomization.yaml | 87 +++++ deploy/kubernetes/llamacpp.yaml | 171 +++++++++ deploy/kubernetes/mcp-http.yaml | 323 ++++++++++++++++ deploy/kubernetes/mcp-indexer.yaml | 182 +++++++++ deploy/kubernetes/mcp-memory.yaml | 148 ++++++++ deploy/kubernetes/namespace.yaml | 7 + deploy/kubernetes/qdrant.yaml | 126 +++++++ docs/MULTI_REPO_COLLECTIONS.md | 397 ++++++++++++++++++++ 16 files changed, 2921 insertions(+) create mode 100644 Dockerfile create mode 100644 deploy/kubernetes/Makefile create mode 100644 deploy/kubernetes/README.md create mode 100755 deploy/kubernetes/cleanup.sh create mode 100644 deploy/kubernetes/configmap.yaml create mode 100755 deploy/kubernetes/deploy.sh create mode 100644 deploy/kubernetes/indexer-services.yaml create mode 100644 deploy/kubernetes/ingress.yaml create mode 100644 deploy/kubernetes/kustomization.yaml create mode 100644 deploy/kubernetes/llamacpp.yaml create mode 100644 deploy/kubernetes/mcp-http.yaml create mode 100644 deploy/kubernetes/mcp-indexer.yaml create mode 100644 deploy/kubernetes/mcp-memory.yaml create mode 100644 deploy/kubernetes/namespace.yaml create mode 100644 deploy/kubernetes/qdrant.yaml create mode 100644 docs/MULTI_REPO_COLLECTIONS.md diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..a3b544b8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,38 @@ +# Unified Context-Engine image for Kubernetes deployment +# Supports multiple roles: memory, indexer, watcher, llamacpp +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + WORK_ROOTS="/work,/app" + +# Install OS dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + ca-certificates \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies for all services +RUN pip install --no-cache-dir --upgrade \ + qdrant-client \ + fastembed \ + watchdog \ + onnxruntime \ + tokenizers \ + tree_sitter \ + tree_sitter_languages \ + mcp \ + fastmcp + +# Copy scripts for all services +COPY scripts /app/scripts + +# Create directories +WORKDIR /work + +# Expose all necessary ports +EXPOSE 8000 8001 8002 8003 18000 18001 18002 18003 + +# Default to memory server +CMD ["python", "/app/scripts/mcp_memory_server.py"] \ No newline at end of file diff --git a/deploy/kubernetes/Makefile b/deploy/kubernetes/Makefile new file mode 100644 index 00000000..8307bbbe --- /dev/null +++ b/deploy/kubernetes/Makefile @@ -0,0 +1,199 @@ +# Context-Engine Kubernetes Deployment Makefile + +# Configuration +NAMESPACE ?= context-engine +IMAGE_REGISTRY ?= context-engine +IMAGE_TAG ?= latest + +# Default target +.PHONY: help +help: ## Show this help message + @echo "Context-Engine Kubernetes Deployment Commands" + @echo "" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}' + +# Prerequisites +.PHONY: check-kubectl +check-kubectl: ## Check if kubectl is available and cluster is accessible + @which kubectl > /dev/null || (echo "kubectl not found. Please install kubectl." && exit 1) + @kubectl cluster-info > /dev/null || (echo "Cannot connect to Kubernetes cluster." && exit 1) + @echo "βœ“ Kubernetes connection verified" + +# Deployment targets +.PHONY: deploy +deploy: check-kubectl ## Deploy all Context-Engine services + ./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) + +.PHONY: deploy-core +deploy-core: check-kubectl ## Deploy only core services (Qdrant + MCP servers) + @echo "Deploying core services..." + kubectl apply -f namespace.yaml + kubectl apply -f configmap.yaml + kubectl apply -f qdrant.yaml + kubectl apply -f mcp-memory.yaml + kubectl apply -f mcp-indexer.yaml + +.PHONY: deploy-full +deploy-full: check-kubectl ## Deploy all services including optional ones + ./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) --deploy-ingress + +.PHONY: deploy-minimal +deploy-minimal: check-kubectl ## Deploy minimal setup (skip Llama.cpp and Ingress) + ./deploy.sh --namespace $(NAMESPACE) --registry $(IMAGE_REGISTRY) --tag $(IMAGE_TAG) --skip-llamacpp + +# Kustomize targets +.PHONY: kustomize-build +kustomize-build: ## Build manifests with Kustomize + kustomize build . + +.PHONY: kustomize-apply +kustomize-apply: check-kubectl ## Apply manifests with Kustomize + kustomize build . | kubectl apply -f - + +.PHONY: kustomize-delete +kustomize-delete: check-kubectl ## Delete manifests with Kustomize + kustomize build . | kubectl delete -f - + +# Management targets +.PHONY: status +status: check-kubectl ## Show deployment status + @echo "=== Namespace Status ===" + kubectl get namespace $(NAMESPACE) || echo "Namespace $(NAMESPACE) not found" + @echo "" + @echo "=== Pods ===" + kubectl get pods -n $(NAMESPACE) -o wide || echo "No pods found" + @echo "" + @echo "=== Services ===" + kubectl get services -n $(NAMESPACE) || echo "No services found" + @echo "" + @echo "=== Deployments ===" + kubectl get deployments -n $(NAMESPACE) || echo "No deployments found" + @echo "" + @echo "=== StatefulSets ===" + kubectl get statefulsets -n $(NAMESPACE) || echo "No statefulsets found" + @echo "" + @echo "=== PersistentVolumeClaims ===" + kubectl get pvc -n $(NAMESPACE) || echo "No PVCs found" + @echo "" + @echo "=== Jobs ===" + kubectl get jobs -n $(NAMESPACE) || echo "No jobs found" + +.PHONY: logs +logs: check-kubectl ## Show logs for all services + @echo "=== Qdrant Logs ===" + kubectl logs -f statefulset/qdrant -n $(NAMESPACE) --tail=50 || echo "Qdrant logs not available" + +.PHONY: logs-service +logs-service: check-kubectl ## Show logs for specific service (usage: make logs-service SERVICE=mcp-memory) + @if [ -z "$(SERVICE)" ]; then echo "Usage: make logs-service SERVICE="; exit 1; fi + kubectl logs -f deployment/$(SERVICE) -n $(NAMESPACE) --tail=100 || kubectl logs -f statefulset/$(SERVICE) -n $(NAMESPACE) --tail=100 || kubectl logs -f job/$(SERVICE) -n $(NAMESPACE) --tail=100 || echo "Service $(SERVICE) not found" + +.PHONY: shell +shell: check-kubectl ## Get a shell in a running pod (usage: make shell POD=mcp-memory-xxx) + @if [ -z "$(POD)" ]; then echo "Usage: make shell POD="; echo "Available pods:"; kubectl get pods -n $(NAMESPACE); exit 1; fi + kubectl exec -it $(POD) -n $(NAMESPACE) -- /bin/bash || kubectl exec -it $(POD) -n $(NAMESPACE) -- /bin/sh + +# Cleanup targets +.PHONY: cleanup +cleanup: check-kubectl ## Remove all Context-Engine resources + ./cleanup.sh --namespace $(NAMESPACE) + +.PHONY: clean-force +clean-force: check-kubectl ## Force cleanup without confirmation + ./cleanup.sh --namespace $(NAMESPACE) --force + +# Development targets +.PHONY: restart +restart: check-kubectl ## Restart all deployments + kubectl rollout restart deployment -n $(NAMESPACE) + kubectl rollout restart statefulset -n $(NAMESPACE) + +.PHONY: restart-service +restart-service: check-kubectl ## Restart specific service (usage: make restart-service SERVICE=mcp-memory) + @if [ -z "$(SERVICE)" ]; then echo "Usage: make restart-service SERVICE="; exit 1; fi + kubectl rollout restart deployment/$(SERVICE) -n $(NAMESPACE) || kubectl rollout restart statefulset/$(SERVICE) -n $(NAMESPACE) + +.PHONY: scale +scale: check-kubectl ## Scale a deployment (usage: make scale SERVICE=mcp-memory REPLICAS=3) + @if [ -z "$(SERVICE)" ] || [ -z "$(REPLICAS)" ]; then echo "Usage: make scale SERVICE= REPLICAS="; exit 1; fi + kubectl scale deployment $(SERVICE) -n $(NAMESPACE) --replicas=$(REPLICAS) + +# Port forwarding targets +.PHONY: port-forward +port-forward: check-kubectl ## Port forward all services + @echo "Opening port forwards in background..." + @kubectl port-forward -n $(NAMESPACE) service/qdrant 6333:6333 & + @kubectl port-forward -n $(NAMESPACE) service/mcp-memory 8000:8000 & + @kubectl port-forward -n $(NAMESPACE) service/mcp-indexer 8001:8001 & + @echo "Port forwards started. Use 'make stop-port-forward' to stop." + +.PHONY: port-forward-service +port-forward-service: check-kubectl ## Port forward specific service (usage: make port-forward-service SERVICE=qdrant LOCAL=6333 REMOTE=6333) + @if [ -z "$(SERVICE)" ] || [ -z "$(LOCAL)" ] || [ -z "$(REMOTE)" ]; then echo "Usage: make port-forward-service SERVICE= LOCAL= REMOTE="; exit 1; fi + kubectl port-forward -n $(NAMESPACE) service/$(SERVICE) $(LOCAL):$(REMOTE) + +.PHONY: stop-port-forward +stop-port-forward: ## Stop all port forwards + pkill -f "kubectl port-forward" || echo "No port forwards found" + +# Build and push targets +.PHONY: build-image +build-image: ## Build Docker image + docker build -t $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG) ../../ + +.PHONY: push-image +push-image: build-image ## Push Docker image to registry + docker push $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG) + +# Test targets +.PHONY: test-connection +test-connection: check-kubectl ## Test connectivity to all services + @echo "Testing service connectivity..." + @echo "Qdrant:" + @kubectl run qdrant-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://qdrant.$(NAMESPACE).svc.cluster.local:6333/health || echo "Qdrant test failed" + @echo "MCP Memory:" + @kubectl run memory-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-memory.$(NAMESPACE).svc.cluster.local:18000/health || echo "MCP Memory test failed" + @echo "MCP Indexer:" + @kubectl run indexer-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-indexer.$(NAMESPACE).svc.cluster.local:18001/health || echo "MCP Indexer test failed" + +# Configuration targets +.PHONY: show-config +show-config: ## Show current configuration + @echo "Configuration:" + @echo " NAMESPACE: $(NAMESPACE)" + @echo " IMAGE_REGISTRY: $(IMAGE_REGISTRY)" + @echo " IMAGE_TAG: $(IMAGE_TAG)" + @echo "" + @echo "Quick start commands:" + @echo " make deploy # Deploy all services" + @echo " make status # Show deployment status" + @echo " make logs-service SERVICE=mcp-memory # Show logs" + @echo " make cleanup # Remove everything" + +.PHONY: show-urls +show-urls: check-kubectl ## Show access URLs for services + @echo "Service URLs (via NodePort):" + @echo " Qdrant: http://:30333" + @echo " MCP Memory (SSE): http://:30800" + @echo " MCP Indexer (SSE): http://:30802" + @echo " MCP Memory (HTTP): http://:30804" + @echo " MCP Indexer (HTTP): http://:30806" + @echo " Llama.cpp: http://:30808" + @echo "" + @echo "Service URLs (via port-forward):" + @echo " make port-forward # Then access via localhost ports" + +# Advanced targets +.PHONY: watch-deployment +watch-deployment: check-kubectl ## Watch deployment progress + watch kubectl get pods,services,deployments -n $(NAMESPACE) + +.PHONY: describe-service +describe-service: check-kubectl ## Describe a service (usage: make describe-service SERVICE=mcp-memory) + @if [ -z "$(SERVICE)" ]; then echo "Usage: make describe-service SERVICE="; echo "Available services:"; kubectl get services -n $(NAMESPACE); exit 1; fi + kubectl describe service $(SERVICE) -n $(NAMESPACE) + +.PHONY: events +events: check-kubectl ## Show recent events + kubectl get events -n $(NAMESPACE) --sort-by=.metadata.creationTimestamp + diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md new file mode 100644 index 00000000..b88fbc23 --- /dev/null +++ b/deploy/kubernetes/README.md @@ -0,0 +1,472 @@ +# Kubernetes Deployment Guide + +## Overview + +This directory contains Kubernetes manifests for deploying Context Engine on a remote cluster using **Kustomize**. This enables: + +- **Remote development** from thin clients with cluster-based heavy lifting +- **Multi-repository indexing** with unified `codebase` collection +- **Scalable architecture** with independent watcher deployments per repo +- **Kustomize-based configuration** for easy customization and overlays + +## Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Kubernetes Cluster β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Qdrant β”‚ β”‚ Memory MCP β”‚ β”‚ Indexer MCP β”‚ β”‚ +β”‚ β”‚ StatefulSet β”‚ β”‚ Deployment β”‚ β”‚ Deployment β”‚ β”‚ +β”‚ β”‚ Port: 6333 β”‚ β”‚ Port: 8000 β”‚ β”‚ Port: 8001 β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ PersistentVolume (qdrant-storage) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Watcher β”‚ β”‚ Watcher β”‚ β”‚ Watcher β”‚ β”‚ +β”‚ β”‚ (repo-1) β”‚ β”‚ (repo-2) β”‚ β”‚ (repo-3) β”‚ β”‚ +β”‚ β”‚ Deployment β”‚ β”‚ Deployment β”‚ β”‚ Deployment β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ HostPath Volume (repos) β”‚ β”‚ +β”‚ β”‚ /tmp/context-engine-repos/repo-1/ β”‚ β”‚ +β”‚ β”‚ /tmp/context-engine-repos/repo-2/ β”‚ β”‚ +β”‚ β”‚ /tmp/context-engine-repos/repo-3/ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Quick Start + +### Prerequisites + +- Kubernetes cluster (1.19+) +- `kubectl` configured to access your cluster +- `kustomize` (optional, kubectl has built-in support) +- Docker image built and pushed to a registry + +### 1. Build and Push Image + +```bash +# Build unified image +docker build -t your-registry/context-engine:latest . + +# Push to registry +docker push your-registry/context-engine:latest +``` + +### 2. Update Image References + +Edit `kustomization.yaml` to use your registry: + +```yaml +images: + - name: context-engine + newName: your-registry/context-engine + newTag: latest +``` + +### 3. Deploy Using Kustomize + +```bash +# Option 1: Using kubectl with kustomize +kubectl apply -k . + +# Option 2: Using kustomize CLI +kustomize build . | kubectl apply -f - + +# Option 3: Using the deploy script +./deploy.sh --registry your-registry --tag latest +``` + +### 4. Deploy Using Makefile + +```bash +# Deploy all services +make deploy + +# Or deploy core services only +make deploy-core + +# Check status +make status +``` + +### 5. Verify Deployment + +```bash +# Check all pods are running +kubectl get pods -n context-engine + +# Check services +kubectl get svc -n context-engine + +# View logs +make logs-service SERVICE=mcp-memory +``` + +### 6. Access Services + +```bash +# Port forward to localhost +make port-forward + +# Or access via NodePort +# Qdrant: http://:30333 +# MCP Memory: http://:30800 +# MCP Indexer: http://:30802 +``` + +## Configuration + +### Automatic Model Download + +The Llama.cpp deployment includes an **init container** that automatically downloads the model on first startup: + +- **Default Model**: Qwen2.5-1.5B-Instruct (Q8_0 quantization, ~1.7GB) +- **Download Location**: `/tmp/context-engine-models/` on the Kubernetes node +- **Behavior**: Downloads only if model doesn't exist (idempotent) +- **Good balance**: Fast, accurate, small footprint + +To use a different model, edit `configmap.yaml`: + +```yaml +# Model download configuration +LLAMACPP_MODEL_URL: "https://huggingface.co/your-org/your-model/resolve/main/model.gguf" +LLAMACPP_MODEL_NAME: "model.gguf" +``` + +**Alternative Models**: +- **Qwen2.5-0.5B-Instruct-Q8** (~500MB) - Tiny, very fast +- **Qwen2.5-1.5B-Instruct-Q8** (default, ~1.7GB) - Best balance +- **Granite-3.0-3B-Instruct-Q8** (~3.2GB) - Higher quality +- **Phi-3-mini-4k-instruct-Q8** (~4GB) - High quality + +### Environment Variables (ConfigMap) + +Key environment variables in `configmap.yaml`: + +```yaml +COLLECTION_NAME: "codebase" # Unified collection for all repos +EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5" +QDRANT_URL: "http://qdrant:6333" +INDEX_MICRO_CHUNKS: "1" +MAX_MICRO_CHUNKS_PER_FILE: "200" +WATCH_DEBOUNCE_SECS: "1.5" +``` + +### Persistent Volumes + +Two persistent volumes are required: + +1. **qdrant-storage**: Stores Qdrant vector database + - Size: 50Gi (adjust based on codebase size) + - Access: ReadWriteOnce + +2. **repos-storage**: Stores repository code + - Size: 100Gi (adjust based on number/size of repos) + - Access: ReadWriteMany (required for multiple watchers) + +### Resource Requests/Limits + +Adjust based on your cluster capacity: + +```yaml +# Qdrant (memory-intensive) +resources: + requests: + memory: "4Gi" + cpu: "2" + limits: + memory: "8Gi" + cpu: "4" + +# MCP Servers (moderate) +resources: + requests: + memory: "2Gi" + cpu: "1" + limits: + memory: "4Gi" + cpu: "2" + +# Watchers (light) +resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "1Gi" + cpu: "1" +``` + +## Multi-Repository Setup + +### Adding a New Repository + +1. **Upload repository code** to the repos volume: + ```bash + # Using uploader service + python scripts/upload_repo.py --repo-name my-service --path /local/path/to/repo + + # Or using kubectl cp + kubectl cp /local/path/to/repo context-engine/uploader-pod:/repos/my-service + ``` + +2. **Create watcher deployment** for the new repo: + ```bash + # Copy and modify watcher template + cp watcher-backend-deployment.yaml watcher-my-service-deployment.yaml + + # Edit: change WATCH_ROOT, REPO_NAME, and volume subPath + # Then apply + kubectl apply -f watcher-my-service-deployment.yaml -n context-engine + ``` + +3. **Verify indexing**: + ```bash + # Check watcher logs + kubectl logs -f deployment/watcher-my-service -n context-engine + + # Check collection status via MCP + curl http://indexer-mcp-service:8001/sse + ``` + +### Repository Volume Structure + +``` +/repos/ +β”œβ”€β”€ backend/ +β”‚ β”œβ”€β”€ .codebase/ +β”‚ β”‚ └── state.json +β”‚ β”œβ”€β”€ src/ +β”‚ β”œβ”€β”€ tests/ +β”‚ └── ... +β”œβ”€β”€ frontend/ +β”‚ β”œβ”€β”€ .codebase/ +β”‚ β”‚ └── state.json +β”‚ β”œβ”€β”€ src/ +β”‚ └── ... +└── ml-service/ + β”œβ”€β”€ .codebase/ + β”‚ └── state.json + β”œβ”€β”€ models/ + └── ... +``` + +## Accessing Services + +### From Within Cluster + +Services are accessible via Kubernetes DNS: + +- Qdrant: `http://qdrant:6333` +- Memory MCP: `http://mcp-memory:8000/sse` +- Indexer MCP: `http://mcp-indexer:8001/sse` + +### From Outside Cluster + +#### Option 1: Port Forwarding (Development) + +```bash +# Forward MCP services to localhost +kubectl port-forward -n context-engine svc/mcp-memory 8000:8000 +kubectl port-forward -n context-engine svc/mcp-indexer 8001:8001 +kubectl port-forward -n context-engine svc/qdrant 6333:6333 +``` + +Then configure your IDE to use `http://localhost:8000/sse` and `http://localhost:8001/sse`. + +#### Option 2: Ingress (Production) + +```yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: context-engine-ingress + namespace: context-engine +spec: + rules: + - host: mcp.your-domain.com + http: + paths: + - path: /memory + pathType: Prefix + backend: + service: + name: mcp-memory + port: + number: 8000 + - path: /indexer + pathType: Prefix + backend: + service: + name: mcp-indexer + port: + number: 8001 +``` + +#### Option 3: LoadBalancer (Cloud) + +Change service type to `LoadBalancer` in service manifests: + +```yaml +spec: + type: LoadBalancer + ports: + - port: 8000 + targetPort: 8000 +``` + +## Monitoring and Maintenance + +### Health Checks + +```bash +# Check Qdrant health +kubectl exec -n context-engine qdrant-0 -- curl -f http://localhost:6333/readyz + +# Check MCP server health +kubectl exec -n context-engine deployment/mcp-memory -- curl -f http://localhost:18000/health +kubectl exec -n context-engine deployment/mcp-indexer -- curl -f http://localhost:18001/health +``` + +### Logs + +```bash +# View logs for specific service +kubectl logs -f -n context-engine deployment/mcp-memory +kubectl logs -f -n context-engine deployment/mcp-indexer +kubectl logs -f -n context-engine deployment/watcher-backend + +# View logs for all watchers +kubectl logs -f -n context-engine -l app=watcher +``` + +### Collection Status + +```bash +# Port forward indexer MCP +kubectl port-forward -n context-engine svc/mcp-indexer 8001:8001 + +# Check collection status +curl -X POST http://localhost:8001/mcp \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/call","params":{"name":"qdrant_status","arguments":{}}}' +``` + +### Backup and Restore + +#### Backup Qdrant Data + +```bash +# Create snapshot +kubectl exec -n context-engine qdrant-0 -- \ + curl -X POST http://localhost:6333/collections/codebase/snapshots + +# Copy snapshot to local +kubectl cp context-engine/qdrant-0:/qdrant/storage/snapshots/codebase-snapshot.tar \ + ./backup/codebase-snapshot.tar +``` + +#### Restore Qdrant Data + +```bash +# Copy snapshot to pod +kubectl cp ./backup/codebase-snapshot.tar \ + context-engine/qdrant-0:/qdrant/storage/snapshots/ + +# Restore snapshot +kubectl exec -n context-engine qdrant-0 -- \ + curl -X PUT http://localhost:6333/collections/codebase/snapshots/upload \ + -F 'snapshot=@/qdrant/storage/snapshots/codebase-snapshot.tar' +``` + +## Troubleshooting + +### Pods Not Starting + +```bash +# Check pod status +kubectl describe pod -n context-engine + +# Check events +kubectl get events -n context-engine --sort-by='.lastTimestamp' +``` + +### Persistent Volume Issues + +```bash +# Check PV/PVC status +kubectl get pv,pvc -n context-engine + +# Check PVC events +kubectl describe pvc -n context-engine +``` + +### Watcher Not Indexing + +```bash +# Check watcher logs +kubectl logs -f -n context-engine deployment/watcher-backend + +# Verify volume mount +kubectl exec -n context-engine deployment/watcher-backend -- ls -la /repos/backend + +# Check Qdrant connectivity +kubectl exec -n context-engine deployment/watcher-backend -- \ + curl -f http://qdrant:6333/readyz +``` + +### MCP Connection Issues + +```bash +# Test SSE endpoint +kubectl exec -n context-engine deployment/mcp-indexer -- \ + curl -H "Accept: text/event-stream" http://localhost:8001/sse + +# Check service endpoints +kubectl get endpoints -n context-engine +``` + +## Scaling + +### Horizontal Scaling + +- **MCP Servers**: Can run multiple replicas behind a service +- **Watchers**: One per repository (do not scale horizontally) +- **Qdrant**: Single instance (StatefulSet with replicas=1) + +### Vertical Scaling + +Adjust resource requests/limits based on workload: + +```bash +# Edit deployment +kubectl edit deployment -n context-engine mcp-indexer + +# Or patch +kubectl patch deployment -n context-engine mcp-indexer -p \ + '{"spec":{"template":{"spec":{"containers":[{"name":"mcp-indexer","resources":{"requests":{"memory":"4Gi"}}}]}}}}' +``` + +## Security Considerations + +1. **Network Policies**: Restrict pod-to-pod communication +2. **RBAC**: Limit service account permissions +3. **Secrets Management**: Use Kubernetes secrets or external secret managers +4. **TLS**: Enable TLS for external access via Ingress +5. **Resource Quotas**: Set namespace resource quotas + +## See Also + +- [Multi-Repository Collections Guide](../../docs/MULTI_REPO_COLLECTIONS.md) +- [MCP API Reference](../../docs/MCP_API.md) +- [Architecture Overview](../../docs/ARCHITECTURE.md) + diff --git a/deploy/kubernetes/cleanup.sh b/deploy/kubernetes/cleanup.sh new file mode 100755 index 00000000..2ce5d64d --- /dev/null +++ b/deploy/kubernetes/cleanup.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +# Context-Engine Kubernetes Cleanup Script +# This script removes all Context-Engine resources from Kubernetes + +set -e + +# Configuration +NAMESPACE="context-engine" +FORCE=false + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if kubectl is available +check_kubectl() { + if ! command -v kubectl &> /dev/null; then + log_error "kubectl is not installed or not in PATH" + exit 1 + fi + + if ! kubectl cluster-info &> /dev/null; then + log_error "Cannot connect to Kubernetes cluster" + exit 1 + fi + + log_success "Kubernetes connection verified" +} + +# Confirm cleanup +confirm_cleanup() { + if [[ "$FORCE" != "true" ]]; then + log_warning "This will delete all Context-Engine resources in namespace: $NAMESPACE" + read -p "Are you sure you want to continue? (yes/no): " -r + echo + if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then + log_info "Cleanup cancelled" + exit 0 + fi + fi +} + +# Delete resources +cleanup_resources() { + log_info "Cleaning up Context-Engine resources..." + + # Delete deployments + log_info "Deleting deployments..." + kubectl delete deployment --all -n $NAMESPACE --ignore-not-found=true + + # Delete statefulsets + log_info "Deleting statefulsets..." + kubectl delete statefulset --all -n $NAMESPACE --ignore-not-found=true + + # Delete jobs + log_info "Deleting jobs..." + kubectl delete job --all -n $NAMESPACE --ignore-not-found=true + + # Delete services + log_info "Deleting services..." + kubectl delete service --all -n $NAMESPACE --ignore-not-found=true + + # Delete ingress + log_info "Deleting ingress..." + kubectl delete ingress --all -n $NAMESPACE --ignore-not-found=true + + # Delete configmaps + log_info "Deleting configmaps..." + kubectl delete configmap --all -n $NAMESPACE --ignore-not-found=true + + # Delete secrets + log_info "Deleting secrets..." + kubectl delete secret --all -n $NAMESPACE --ignore-not-found=true + + # Delete PVCs + log_info "Deleting persistent volume claims..." + kubectl delete pvc --all -n $NAMESPACE --ignore-not-found=true + + # Delete namespace + log_info "Deleting namespace..." + kubectl delete namespace $NAMESPACE --ignore-not-found=true + + log_success "Cleanup complete!" +} + +# Help function +show_help() { + echo "Context-Engine Kubernetes Cleanup Script" + echo + echo "Usage: $0 [OPTIONS]" + echo + echo "Options:" + echo " -h, --help Show this help message" + echo " --namespace NAMESPACE Kubernetes namespace (default: context-engine)" + echo " --force Skip confirmation prompt" + echo + echo "Examples:" + echo " $0 # Interactive cleanup" + echo " $0 --force # Force cleanup without confirmation" + echo " $0 --namespace my-ns # Cleanup specific namespace" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_help + exit 0 + ;; + --namespace) + NAMESPACE="$2" + shift 2 + ;; + --force) + FORCE=true + shift + ;; + *) + log_error "Unknown option: $1" + show_help + exit 1 + ;; + esac +done + +# Main cleanup function +main() { + log_info "Starting Context-Engine Kubernetes cleanup" + + # Check prerequisites + check_kubectl + + # Confirm cleanup + confirm_cleanup + + # Cleanup resources + cleanup_resources +} + +# Run main cleanup +main + diff --git a/deploy/kubernetes/configmap.yaml b/deploy/kubernetes/configmap.yaml new file mode 100644 index 00000000..0c514bc2 --- /dev/null +++ b/deploy/kubernetes/configmap.yaml @@ -0,0 +1,75 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: context-engine-config + namespace: context-engine + labels: + app: context-engine +data: + # Core Configuration + COLLECTION_NAME: "codebase" + EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5" + EMBEDDING_PROVIDER: "fastembed" + + # Qdrant Configuration + QDRANT_URL: "http://qdrant:6333" + QDRANT_TIMEOUT: "60" + + # Indexing Configuration + INDEX_MICRO_CHUNKS: "1" + MAX_MICRO_CHUNKS_PER_FILE: "200" + INDEX_CHUNK_LINES: "120" + INDEX_CHUNK_OVERLAP: "20" + INDEX_BATCH_SIZE: "64" + INDEX_UPSERT_BATCH: "128" + INDEX_UPSERT_RETRIES: "5" + INDEX_UPSERT_BACKOFF: "0.5" + + # Watcher Configuration + WATCH_DEBOUNCE_SECS: "1.5" + + # ReFRAG Configuration + REFRAG_MODE: "1" + REFRAG_GATE_FIRST: "1" + REFRAG_CANDIDATES: "200" + MICRO_CHUNK_TOKENS: "16" + MICRO_CHUNK_STRIDE: "8" + MICRO_OUT_MAX_SPANS: "3" + MICRO_MERGE_LINES: "4" + MICRO_BUDGET_TOKENS: "512" + MICRO_TOKENS_PER_LINE: "32" + + # Decoder Configuration (optional) + REFRAG_DECODER: "1" + REFRAG_RUNTIME: "llamacpp" + LLAMACPP_URL: "http://llamacpp:8080" + LLAMACPP_TIMEOUT_SEC: "180" + DECODER_MAX_TOKENS: "4000" + + # Model download configuration (for init container) + LLAMACPP_MODEL_URL: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf" + LLAMACPP_MODEL_NAME: "qwen2.5-1.5b-instruct-q8_0.gguf" + + # Reranker Configuration + RERANKER_ENABLED: "1" + + # MCP Configuration + FASTMCP_HOST: "0.0.0.0" + FASTMCP_PORT: "8000" + FASTMCP_INDEXER_PORT: "8001" + FASTMCP_HEALTH_PORT: "18000" + + # Memory Configuration + MEMORY_SSE_ENABLED: "true" + MEMORY_MCP_URL: "http://mcp-memory:8000/sse" + MEMORY_MCP_TIMEOUT: "6" + + # Multi-collection Configuration + CTX_MULTI_COLLECTION: "1" + CTX_DOC_PASS: "1" + + # Logging + DEBUG_CONTEXT_ANSWER: "0" + + # Tokenizer + TOKENIZER_JSON: "/app/models/tokenizer.json" diff --git a/deploy/kubernetes/deploy.sh b/deploy/kubernetes/deploy.sh new file mode 100755 index 00000000..75e6bdfe --- /dev/null +++ b/deploy/kubernetes/deploy.sh @@ -0,0 +1,250 @@ +#!/bin/bash + +# Context-Engine Kubernetes Deployment Script +# This script deploys Context-Engine services to Kubernetes + +set -e + +# Configuration +NAMESPACE="context-engine" +IMAGE_REGISTRY="context-engine" +IMAGE_TAG="latest" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Logging functions +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[SUCCESS]${NC} $1" +} + +log_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if kubectl is available +check_kubectl() { + if ! command -v kubectl &> /dev/null; then + log_error "kubectl is not installed or not in PATH" + exit 1 + fi + + if ! kubectl cluster-info &> /dev/null; then + log_error "Cannot connect to Kubernetes cluster" + exit 1 + fi + + log_success "Kubernetes connection verified" +} + +# Create namespace if it doesn't exist +create_namespace() { + log_info "Creating namespace: $NAMESPACE" + kubectl apply -f namespace.yaml + log_success "Namespace created/verified" +} + +# Deploy configuration +deploy_config() { + log_info "Deploying configuration" + kubectl apply -f configmap.yaml + log_success "Configuration deployed" +} + +# Deploy core services +deploy_core() { + log_info "Deploying core services" + + # Deploy Qdrant + log_info "Deploying Qdrant database..." + kubectl apply -f qdrant.yaml + + # Wait for Qdrant to be ready + log_info "Waiting for Qdrant to be ready..." + kubectl wait --for=condition=ready pod -l component=qdrant -n $NAMESPACE --timeout=300s || log_warning "Qdrant may not be ready yet" + + log_success "Core services deployed" +} + +# Deploy MCP servers +deploy_mcp_servers() { + log_info "Deploying MCP servers" + + # Deploy SSE versions + kubectl apply -f mcp-memory.yaml + kubectl apply -f mcp-indexer.yaml + + # Wait for MCP servers to be ready + log_info "Waiting for MCP servers to be ready..." + kubectl wait --for=condition=ready pod -l component=mcp-memory -n $NAMESPACE --timeout=300s || log_warning "MCP Memory may not be ready yet" + kubectl wait --for=condition=ready pod -l component=mcp-indexer -n $NAMESPACE --timeout=300s || log_warning "MCP Indexer may not be ready yet" + + log_success "MCP servers deployed" +} + +# Deploy HTTP servers (optional) +deploy_http_servers() { + log_info "Deploying HTTP servers (optional)" + kubectl apply -f mcp-http.yaml + + # Wait for HTTP servers to be ready + kubectl wait --for=condition=ready pod -l component=mcp-memory-http -n $NAMESPACE --timeout=300s || log_warning "MCP Memory HTTP may not be ready yet" + kubectl wait --for=condition=ready pod -l component=mcp-indexer-http -n $NAMESPACE --timeout=300s || log_warning "MCP Indexer HTTP may not be ready yet" + + log_success "HTTP servers deployed" +} + +# Deploy indexer services +deploy_indexer_services() { + log_info "Deploying indexer services" + kubectl apply -f indexer-services.yaml + + log_success "Indexer services deployed" +} + +# Deploy optional Llama.cpp service +deploy_llamacpp() { + if [[ "$SKIP_LLAMACPP" != "true" ]]; then + log_info "Deploying Llama.cpp service (optional)" + kubectl apply -f llamacpp.yaml + log_success "Llama.cpp service deployed" + else + log_warning "Skipping Llama.cpp deployment" + fi +} + +# Deploy Ingress (optional) +deploy_ingress() { + if [[ "$DEPLOY_INGRESS" == "true" ]]; then + log_info "Deploying Ingress" + kubectl apply -f ingress.yaml + log_success "Ingress deployed" + else + log_warning "Skipping Ingress deployment (set --deploy-ingress to enable)" + fi +} + +# Show deployment status +show_status() { + log_info "Deployment status:" + echo + echo "Namespace: $NAMESPACE" + echo + echo "Pods:" + kubectl get pods -n $NAMESPACE -o wide + echo + echo "Services:" + kubectl get services -n $NAMESPACE + echo + + log_success "Deployment complete!" + echo + log_info "Access URLs:" + echo " Qdrant: http://:30333" + echo " MCP Memory (SSE): http://:30800" + echo " MCP Memory (HTTP): http://:30804" + echo " MCP Indexer (SSE): http://:30802" + echo " MCP Indexer (HTTP): http://:30806" + if [[ "$SKIP_LLAMACPP" != "true" ]]; then + echo " Llama.cpp: http://:30808" + fi +} + +# Main deployment function +main() { + log_info "Starting Context-Engine Kubernetes deployment" + + # Check prerequisites + check_kubectl + + # Deploy in order + create_namespace + deploy_config + deploy_core + deploy_mcp_servers + deploy_http_servers + deploy_indexer_services + deploy_llamacpp + deploy_ingress + + # Show status + show_status +} + +# Help function +show_help() { + echo "Context-Engine Kubernetes Deployment Script" + echo + echo "Usage: $0 [OPTIONS]" + echo + echo "Options:" + echo " -h, --help Show this help message" + echo " -r, --registry REGISTRY Docker image registry (default: context-engine)" + echo " -t, --tag TAG Docker image tag (default: latest)" + echo " --skip-llamacpp Skip Llama.cpp deployment" + echo " --deploy-ingress Deploy Ingress configuration" + echo " --namespace NAMESPACE Kubernetes namespace (default: context-engine)" + echo + echo "Examples:" + echo " $0 # Basic deployment" + echo " $0 --skip-llamacpp # Skip Llama.cpp" + echo " $0 --deploy-ingress # Deploy with Ingress" + echo " $0 -r myregistry.com -t v1.0 # Use custom image" +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_help + exit 0 + ;; + -r|--registry) + IMAGE_REGISTRY="$2" + shift 2 + ;; + -t|--tag) + IMAGE_TAG="$2" + shift 2 + ;; + --skip-llamacpp) + SKIP_LLAMACPP=true + shift + ;; + --deploy-ingress) + DEPLOY_INGRESS=true + shift + ;; + --namespace) + NAMESPACE="$2" + shift 2 + ;; + *) + log_error "Unknown option: $1" + show_help + exit 1 + ;; + esac +done + +# Check if we're in the right directory +if [[ ! -f "qdrant.yaml" ]]; then + log_error "Please run this script from the deploy/kubernetes directory" + exit 1 +fi + +# Run main deployment +main + diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml new file mode 100644 index 00000000..110d22ce --- /dev/null +++ b/deploy/kubernetes/indexer-services.yaml @@ -0,0 +1,197 @@ +--- +# Watcher Deployment (File change monitoring and reindexing) +# This is a template - copy and customize for each repository +apiVersion: apps/v1 +kind: Deployment +metadata: + name: watcher + namespace: context-engine + labels: + app: context-engine + component: watcher +spec: + replicas: 1 + selector: + matchLabels: + app: context-engine + component: watcher + template: + metadata: + labels: + app: context-engine + component: watcher + spec: + containers: + - name: watcher + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/watch_index.py"] + workingDir: /repos + env: + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + - name: EMBEDDING_MODEL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_MODEL + - name: WATCH_ROOT + value: "/repos" + - name: QDRANT_TIMEOUT + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_TIMEOUT + - name: MAX_MICRO_CHUNKS_PER_FILE + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MAX_MICRO_CHUNKS_PER_FILE + - name: INDEX_UPSERT_BATCH + valueFrom: + configMapKeyRef: + name: context-engine-config + key: INDEX_UPSERT_BATCH + - name: INDEX_UPSERT_RETRIES + valueFrom: + configMapKeyRef: + name: context-engine-config + key: INDEX_UPSERT_RETRIES + - name: WATCH_DEBOUNCE_SECS + valueFrom: + configMapKeyRef: + name: context-engine-config + key: WATCH_DEBOUNCE_SECS + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "2Gi" + cpu: "1" + volumeMounts: + - name: repos + mountPath: /repos + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + +--- +# Indexer Job (One-shot code indexing) +apiVersion: batch/v1 +kind: Job +metadata: + name: indexer-job + namespace: context-engine + labels: + app: context-engine + component: indexer +spec: + template: + metadata: + labels: + app: context-engine + component: indexer + spec: + restartPolicy: OnFailure + containers: + - name: indexer + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/ingest_code.py"] + workingDir: /repos + env: + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + - name: EMBEDDING_MODEL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_MODEL + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2" + volumeMounts: + - name: repos + mountPath: /repos + readOnly: true + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + +--- +# Index Initialization Job +apiVersion: batch/v1 +kind: Job +metadata: + name: init-payload + namespace: context-engine + labels: + app: context-engine + component: init +spec: + template: + metadata: + labels: + app: context-engine + component: init + spec: + restartPolicy: OnFailure + containers: + - name: init-payload + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/create_indexes.py"] + workingDir: /repos + env: + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + volumeMounts: + - name: repos + mountPath: /repos + readOnly: true + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + diff --git a/deploy/kubernetes/ingress.yaml b/deploy/kubernetes/ingress.yaml new file mode 100644 index 00000000..71524d08 --- /dev/null +++ b/deploy/kubernetes/ingress.yaml @@ -0,0 +1,86 @@ +--- +# Ingress for Context-Engine services +# Requires an Ingress controller (e.g., nginx-ingress, traefik) +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: context-engine-ingress + namespace: context-engine + labels: + app: context-engine + annotations: + # Nginx Ingress annotations + nginx.ingress.kubernetes.io/rewrite-target: /$2 + nginx.ingress.kubernetes.io/ssl-redirect: "true" + # Increase timeouts for SSE connections + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + # Enable CORS if needed + # nginx.ingress.kubernetes.io/enable-cors: "true" + # nginx.ingress.kubernetes.io/cors-allow-origin: "*" +spec: + ingressClassName: nginx # Adjust based on your ingress controller + rules: + - host: context-engine.example.com # Change to your domain + http: + paths: + # Qdrant + - path: /qdrant(/|$)(.*) + pathType: Prefix + backend: + service: + name: qdrant + port: + number: 6333 + + # MCP Memory (SSE) + - path: /mcp/memory(/|$)(.*) + pathType: Prefix + backend: + service: + name: mcp-memory + port: + number: 8000 + + # MCP Indexer (SSE) + - path: /mcp/indexer(/|$)(.*) + pathType: Prefix + backend: + service: + name: mcp-indexer + port: + number: 8001 + + # MCP Memory HTTP + - path: /mcp-http/memory(/|$)(.*) + pathType: Prefix + backend: + service: + name: mcp-memory-http + port: + number: 8002 + + # MCP Indexer HTTP + - path: /mcp-http/indexer(/|$)(.*) + pathType: Prefix + backend: + service: + name: mcp-indexer-http + port: + number: 8003 + + # Llama.cpp (optional) + - path: /llamacpp(/|$)(.*) + pathType: Prefix + backend: + service: + name: llamacpp + port: + number: 8080 + + # TLS configuration (optional) + # tls: + # - hosts: + # - context-engine.example.com + # secretName: context-engine-tls + diff --git a/deploy/kubernetes/kustomization.yaml b/deploy/kubernetes/kustomization.yaml new file mode 100644 index 00000000..e02aa13e --- /dev/null +++ b/deploy/kubernetes/kustomization.yaml @@ -0,0 +1,87 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +metadata: + name: context-engine + namespace: context-engine + +resources: + # Namespace and configuration + - namespace.yaml + - configmap.yaml + + # Core services + - qdrant.yaml + + # MCP servers + - mcp-memory.yaml + - mcp-indexer.yaml + - mcp-http.yaml + + # Indexer services + - indexer-services.yaml + + # Optional services + - llamacpp.yaml + - ingress.yaml + +# Common labels +commonLabels: + app.kubernetes.io/name: context-engine + app.kubernetes.io/component: kubernetes-deployment + app.kubernetes.io/managed-by: kustomize + +# Patches for production customization +patchesStrategicMerge: + # Uncomment and create patches for production + # - patches/production-storage.yaml + # - patches/production-resources.yaml + # - patches/production-ingress.yaml + +# ConfigMap generator (optional - for overrides) +configMapGenerator: + - name: context-engine-overrides + literals: + # Override specific values here + # COLLECTION_NAME=production-collection + # EMBEDDING_MODEL=BAAI/bge-large-en-v1.5 + +# Secret generator (optional - for sensitive data) +secretGenerator: + - name: context-engine-secrets + literals: + # Add secrets here (recommended to use existing secrets instead) + # QDRANT_API_KEY=your-api-key + +# Images configuration (customize for your registry) +images: + - name: context-engine + newTag: latest + # newTag: v1.0.0 + # newName: your-registry/context-engine + +# Namespace override +namespace: context-engine + +# Replicas configuration +replicas: + # Scale MCP servers for high availability + - name: mcp-memory + count: 1 # Set to 2+ for production + - name: mcp-indexer + count: 1 # Set to 2+ for production + +# Resource patches +patches: + # Example resource customization + - patch: |- + - op: replace + path: /spec/template/spec/containers/0/resources/requests/memory + value: "1Gi" + - op: replace + path: /spec/template/spec/containers/0/resources/limits/memory + value: "4Gi" + target: + kind: Deployment + name: mcp-memory + diff --git a/deploy/kubernetes/llamacpp.yaml b/deploy/kubernetes/llamacpp.yaml new file mode 100644 index 00000000..f851996d --- /dev/null +++ b/deploy/kubernetes/llamacpp.yaml @@ -0,0 +1,171 @@ +--- +# Llama.cpp Deployment (Optional - for text generation) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llamacpp + namespace: context-engine + labels: + app: context-engine + component: llamacpp +spec: + replicas: 1 + selector: + matchLabels: + app: context-engine + component: llamacpp + template: + metadata: + labels: + app: context-engine + component: llamacpp + spec: + # Init container to download model if not present + initContainers: + - name: model-downloader + image: curlimages/curl:latest + env: + - name: LLAMACPP_MODEL_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: LLAMACPP_MODEL_URL + - name: LLAMACPP_MODEL_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: LLAMACPP_MODEL_NAME + command: + - sh + - -c + - | + MODEL_PATH="/models/${LLAMACPP_MODEL_NAME}" + + if [ -f "$MODEL_PATH" ]; then + echo "Model already exists at $MODEL_PATH" + ls -lh "$MODEL_PATH" + exit 0 + fi + + echo "Downloading model from ${LLAMACPP_MODEL_URL}..." + echo "Target: $MODEL_PATH" + + curl -L --progress-bar -o "$MODEL_PATH.tmp" "${LLAMACPP_MODEL_URL}" + + if [ $? -eq 0 ]; then + mv "$MODEL_PATH.tmp" "$MODEL_PATH" + echo "Model downloaded successfully" + ls -lh "$MODEL_PATH" + else + echo "Failed to download model" + rm -f "$MODEL_PATH.tmp" + exit 1 + fi + volumeMounts: + - name: models + mountPath: /models + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + + containers: + - name: llamacpp + image: ghcr.io/ggerganov/llama.cpp:server + imagePullPolicy: IfNotPresent + env: + - name: LLAMACPP_MODEL_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: LLAMACPP_MODEL_NAME + ports: + - name: http + containerPort: 8080 + protocol: TCP + command: + - sh + - -c + args: + - | + exec /llama-server \ + --host 0.0.0.0 \ + --port 8080 \ + --model "/models/${LLAMACPP_MODEL_NAME}" \ + --ctx-size 4096 \ + --n-gpu-layers 0 + resources: + requests: + memory: "2Gi" + cpu: "1" + limits: + memory: "8Gi" + cpu: "4" + volumeMounts: + - name: models + mountPath: /models + readOnly: false + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 60 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + volumes: + - name: models + hostPath: + path: /tmp/context-engine-models + type: DirectoryOrCreate + +--- +# Llama.cpp Service +apiVersion: v1 +kind: Service +metadata: + name: llamacpp + namespace: context-engine + labels: + app: context-engine + component: llamacpp +spec: + type: ClusterIP + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP + selector: + app: context-engine + component: llamacpp + +--- +# Optional: Llama.cpp External Service +apiVersion: v1 +kind: Service +metadata: + name: llamacpp-external + namespace: context-engine + labels: + app: context-engine + component: llamacpp +spec: + type: NodePort + ports: + - name: http + port: 8080 + targetPort: http + nodePort: 30808 + protocol: TCP + selector: + app: context-engine + component: llamacpp + diff --git a/deploy/kubernetes/mcp-http.yaml b/deploy/kubernetes/mcp-http.yaml new file mode 100644 index 00000000..7586f022 --- /dev/null +++ b/deploy/kubernetes/mcp-http.yaml @@ -0,0 +1,323 @@ +--- +# MCP Memory HTTP Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mcp-memory-http + namespace: context-engine + labels: + app: context-engine + component: mcp-memory-http +spec: + replicas: 1 + selector: + matchLabels: + app: context-engine + component: mcp-memory-http + template: + metadata: + labels: + app: context-engine + component: mcp-memory-http + spec: + containers: + - name: mcp-memory-http + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/mcp_memory_server.py"] + ports: + - name: http + containerPort: 8002 + protocol: TCP + - name: health + containerPort: 18002 + protocol: TCP + env: + - name: FASTMCP_TRANSPORT + value: "streamable-http" + - name: FASTMCP_HOST + valueFrom: + configMapKeyRef: + name: context-engine-config + key: FASTMCP_HOST + - name: FASTMCP_PORT + value: "8002" + - name: FASTMCP_HEALTH_PORT + value: "18002" + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + - name: EMBEDDING_MODEL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_MODEL + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2" + volumeMounts: + - name: repos + mountPath: /repos + readOnly: true + livenessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + +--- +# MCP Memory HTTP Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-memory-http + namespace: context-engine + labels: + app: context-engine + component: mcp-memory-http +spec: + type: ClusterIP + ports: + - name: http + port: 8002 + targetPort: http + protocol: TCP + - name: health + port: 18002 + targetPort: health + protocol: TCP + selector: + app: context-engine + component: mcp-memory-http + +--- +# Optional: MCP Memory HTTP External Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-memory-http-external + namespace: context-engine + labels: + app: context-engine + component: mcp-memory-http +spec: + type: NodePort + ports: + - name: http + port: 8002 + targetPort: http + nodePort: 30804 + protocol: TCP + - name: health + port: 18002 + targetPort: health + nodePort: 30805 + protocol: TCP + selector: + app: context-engine + component: mcp-memory-http + +--- +# MCP Indexer HTTP Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mcp-indexer-http + namespace: context-engine + labels: + app: context-engine + component: mcp-indexer-http +spec: + replicas: 1 + selector: + matchLabels: + app: context-engine + component: mcp-indexer-http + template: + metadata: + labels: + app: context-engine + component: mcp-indexer-http + spec: + containers: + - name: mcp-indexer-http + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/mcp_indexer_server.py"] + ports: + - name: http + containerPort: 8003 + protocol: TCP + - name: health + containerPort: 18003 + protocol: TCP + env: + - name: FASTMCP_TRANSPORT + value: "streamable-http" + - name: FASTMCP_HOST + valueFrom: + configMapKeyRef: + name: context-engine-config + key: FASTMCP_HOST + - name: FASTMCP_INDEXER_PORT + value: "8003" + - name: FASTMCP_HEALTH_PORT + value: "18003" + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + - name: EMBEDDING_MODEL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_MODEL + - name: INDEX_MICRO_CHUNKS + valueFrom: + configMapKeyRef: + name: context-engine-config + key: INDEX_MICRO_CHUNKS + - name: MAX_MICRO_CHUNKS_PER_FILE + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MAX_MICRO_CHUNKS_PER_FILE + - name: REFRAG_MODE + valueFrom: + configMapKeyRef: + name: context-engine-config + key: REFRAG_MODE + - name: REFRAG_DECODER + valueFrom: + configMapKeyRef: + name: context-engine-config + key: REFRAG_DECODER + - name: LLAMACPP_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: LLAMACPP_URL + - name: MEMORY_SSE_ENABLED + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MEMORY_SSE_ENABLED + - name: MEMORY_MCP_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MEMORY_MCP_URL + - name: CTX_MULTI_COLLECTION + valueFrom: + configMapKeyRef: + name: context-engine-config + key: CTX_MULTI_COLLECTION + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2" + volumeMounts: + - name: repos + mountPath: /repos + livenessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + +--- +# MCP Indexer HTTP Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-indexer-http + namespace: context-engine + labels: + app: context-engine + component: mcp-indexer-http +spec: + type: ClusterIP + ports: + - name: http + port: 8003 + targetPort: http + protocol: TCP + - name: health + port: 18003 + targetPort: health + protocol: TCP + selector: + app: context-engine + component: mcp-indexer-http + +--- +# Optional: MCP Indexer HTTP External Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-indexer-http-external + namespace: context-engine + labels: + app: context-engine + component: mcp-indexer-http +spec: + type: NodePort + ports: + - name: http + port: 8003 + targetPort: http + nodePort: 30806 + protocol: TCP + - name: health + port: 18003 + targetPort: health + nodePort: 30807 + protocol: TCP + selector: + app: context-engine + component: mcp-indexer-http + diff --git a/deploy/kubernetes/mcp-indexer.yaml b/deploy/kubernetes/mcp-indexer.yaml new file mode 100644 index 00000000..5963a86a --- /dev/null +++ b/deploy/kubernetes/mcp-indexer.yaml @@ -0,0 +1,182 @@ +--- +# MCP Indexer Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mcp-indexer + namespace: context-engine + labels: + app: context-engine + component: mcp-indexer +spec: + replicas: 1 + selector: + matchLabels: + app: context-engine + component: mcp-indexer + template: + metadata: + labels: + app: context-engine + component: mcp-indexer + spec: + containers: + - name: mcp-indexer + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/mcp_indexer_server.py"] + ports: + - name: sse + containerPort: 8001 + protocol: TCP + - name: health + containerPort: 18001 + protocol: TCP + env: + - name: FASTMCP_HOST + valueFrom: + configMapKeyRef: + name: context-engine-config + key: FASTMCP_HOST + - name: FASTMCP_INDEXER_PORT + valueFrom: + configMapKeyRef: + name: context-engine-config + key: FASTMCP_INDEXER_PORT + - name: FASTMCP_HEALTH_PORT + value: "18001" + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + - name: EMBEDDING_MODEL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_MODEL + - name: INDEX_MICRO_CHUNKS + valueFrom: + configMapKeyRef: + name: context-engine-config + key: INDEX_MICRO_CHUNKS + - name: MAX_MICRO_CHUNKS_PER_FILE + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MAX_MICRO_CHUNKS_PER_FILE + - name: REFRAG_MODE + valueFrom: + configMapKeyRef: + name: context-engine-config + key: REFRAG_MODE + - name: REFRAG_DECODER + valueFrom: + configMapKeyRef: + name: context-engine-config + key: REFRAG_DECODER + - name: LLAMACPP_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: LLAMACPP_URL + - name: MEMORY_SSE_ENABLED + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MEMORY_SSE_ENABLED + - name: MEMORY_MCP_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: MEMORY_MCP_URL + - name: CTX_MULTI_COLLECTION + valueFrom: + configMapKeyRef: + name: context-engine-config + key: CTX_MULTI_COLLECTION + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2" + volumeMounts: + - name: repos + mountPath: /repos + livenessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + +--- +# MCP Indexer Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-indexer + namespace: context-engine + labels: + app: context-engine + component: mcp-indexer +spec: + type: ClusterIP + ports: + - name: sse + port: 8001 + targetPort: sse + protocol: TCP + - name: health + port: 18001 + targetPort: health + protocol: TCP + selector: + app: context-engine + component: mcp-indexer + +--- +# Optional: MCP Indexer External Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-indexer-external + namespace: context-engine + labels: + app: context-engine + component: mcp-indexer +spec: + type: NodePort + ports: + - name: sse + port: 8001 + targetPort: sse + nodePort: 30802 + protocol: TCP + - name: health + port: 18001 + targetPort: health + nodePort: 30803 + protocol: TCP + selector: + app: context-engine + component: mcp-indexer + diff --git a/deploy/kubernetes/mcp-memory.yaml b/deploy/kubernetes/mcp-memory.yaml new file mode 100644 index 00000000..6c22bfee --- /dev/null +++ b/deploy/kubernetes/mcp-memory.yaml @@ -0,0 +1,148 @@ +--- +# MCP Memory Deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mcp-memory + namespace: context-engine + labels: + app: context-engine + component: mcp-memory +spec: + replicas: 1 + selector: + matchLabels: + app: context-engine + component: mcp-memory + template: + metadata: + labels: + app: context-engine + component: mcp-memory + spec: + containers: + - name: mcp-memory + image: context-engine:latest + imagePullPolicy: IfNotPresent + command: ["python", "/app/scripts/mcp_memory_server.py"] + ports: + - name: sse + containerPort: 8000 + protocol: TCP + - name: health + containerPort: 18000 + protocol: TCP + env: + - name: FASTMCP_HOST + valueFrom: + configMapKeyRef: + name: context-engine-config + key: FASTMCP_HOST + - name: FASTMCP_PORT + valueFrom: + configMapKeyRef: + name: context-engine-config + key: FASTMCP_PORT + - name: FASTMCP_HEALTH_PORT + value: "18000" + - name: QDRANT_URL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: QDRANT_URL + - name: COLLECTION_NAME + valueFrom: + configMapKeyRef: + name: context-engine-config + key: COLLECTION_NAME + - name: EMBEDDING_MODEL + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_MODEL + - name: EMBEDDING_PROVIDER + valueFrom: + configMapKeyRef: + name: context-engine-config + key: EMBEDDING_PROVIDER + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "4Gi" + cpu: "2" + volumeMounts: + - name: repos + mountPath: /repos + readOnly: true + livenessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /readyz + port: health + initialDelaySeconds: 10 + periodSeconds: 5 + volumes: + - name: repos + hostPath: + path: /tmp/context-engine-repos + type: DirectoryOrCreate + +--- +# MCP Memory Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-memory + namespace: context-engine + labels: + app: context-engine + component: mcp-memory +spec: + type: ClusterIP + ports: + - name: sse + port: 8000 + targetPort: sse + protocol: TCP + - name: health + port: 18000 + targetPort: health + protocol: TCP + selector: + app: context-engine + component: mcp-memory + +--- +# Optional: MCP Memory External Service +apiVersion: v1 +kind: Service +metadata: + name: mcp-memory-external + namespace: context-engine + labels: + app: context-engine + component: mcp-memory +spec: + type: NodePort + ports: + - name: sse + port: 8000 + targetPort: sse + nodePort: 30800 + protocol: TCP + - name: health + port: 18000 + targetPort: health + nodePort: 30801 + protocol: TCP + selector: + app: context-engine + component: mcp-memory + diff --git a/deploy/kubernetes/namespace.yaml b/deploy/kubernetes/namespace.yaml new file mode 100644 index 00000000..b972df16 --- /dev/null +++ b/deploy/kubernetes/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: context-engine + labels: + name: context-engine + app: context-engine diff --git a/deploy/kubernetes/qdrant.yaml b/deploy/kubernetes/qdrant.yaml new file mode 100644 index 00000000..3330bcf1 --- /dev/null +++ b/deploy/kubernetes/qdrant.yaml @@ -0,0 +1,126 @@ +--- +# Qdrant StatefulSet +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: qdrant + namespace: context-engine + labels: + app: context-engine + component: qdrant +spec: + serviceName: qdrant + replicas: 1 + selector: + matchLabels: + app: context-engine + component: qdrant + template: + metadata: + labels: + app: context-engine + component: qdrant + spec: + containers: + - name: qdrant + image: qdrant/qdrant:latest + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 6333 + protocol: TCP + - name: grpc + containerPort: 6334 + protocol: TCP + env: + - name: QDRANT__SERVICE__HTTP_PORT + value: "6333" + - name: QDRANT__SERVICE__GRPC_PORT + value: "6334" + resources: + requests: + memory: "2Gi" + cpu: "1" + limits: + memory: "8Gi" + cpu: "4" + volumeMounts: + - name: qdrant-storage + mountPath: /qdrant/storage + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + volumeClaimTemplates: + - metadata: + name: qdrant-storage + labels: + app: context-engine + component: qdrant + spec: + accessModes: ["ReadWriteOnce"] + storageClassName: standard # Adjust based on your cluster + resources: + requests: + storage: 50Gi + +--- +# Qdrant Service +apiVersion: v1 +kind: Service +metadata: + name: qdrant + namespace: context-engine + labels: + app: context-engine + component: qdrant +spec: + type: ClusterIP + ports: + - name: http + port: 6333 + targetPort: http + protocol: TCP + - name: grpc + port: 6334 + targetPort: grpc + protocol: TCP + selector: + app: context-engine + component: qdrant + +--- +# Optional: Qdrant External Service (for direct access) +apiVersion: v1 +kind: Service +metadata: + name: qdrant-external + namespace: context-engine + labels: + app: context-engine + component: qdrant +spec: + type: NodePort + ports: + - name: http + port: 6333 + targetPort: http + nodePort: 30333 + protocol: TCP + - name: grpc + port: 6334 + targetPort: grpc + nodePort: 30334 + protocol: TCP + selector: + app: context-engine + component: qdrant + diff --git a/docs/MULTI_REPO_COLLECTIONS.md b/docs/MULTI_REPO_COLLECTIONS.md new file mode 100644 index 00000000..e43a5d60 --- /dev/null +++ b/docs/MULTI_REPO_COLLECTIONS.md @@ -0,0 +1,397 @@ +# Multi-Repository Collection Architecture + +## Overview + +Context Engine supports first-class multi-repository operation through a unified collection architecture. This enables: + +- **Single unified collection** (default: `codebase`) for seamless cross-repo search +- **Per-repo metadata** for filtering and isolation when needed +- **Remote deployment** on Kubernetes clusters with stronger hardware +- **Minimal code changes** - existing single-repo workflows remain unchanged + +## Architecture Principles + +### 1. Unified Collection Model + +All repositories index into a **single shared collection** by default (`codebase`). This provides: + +- **Seamless cross-repo search**: Query across all your code at once +- **Simplified management**: One collection to monitor and maintain +- **Efficient resource usage**: Shared HNSW index and vector storage + +### 2. Per-Repository Metadata + +Each indexed chunk includes repository identification in its payload: + +```json +{ + "metadata": { + "repo": "my-backend-service", + "path": "/work/src/api/handler.py", + "host_path": "/Users/john/projects/backend/src/api/handler.py", + "container_path": "/work/src/api/handler.py", + "language": "python", + "kind": "function", + "symbol": "handle_request", + ... + } +} +``` + +**Key metadata fields for multi-repo:** +- `metadata.repo`: Logical repository name (auto-detected from git or folder name) +- `metadata.path`: Container path (always starts with `/work`) +- `metadata.host_path`: Original host filesystem path +- `metadata.container_path`: Normalized container path for remote deployments + +### 3. Workspace State Management + +Each repository maintains its own `.codebase/state.json` file: + +```json +{ + "workspace_path": "/work", + "created_at": "2025-01-15T10:30:00", + "updated_at": "2025-01-15T14:22:00", + "qdrant_collection": "codebase", + "indexing_status": { + "state": "watching", + "started_at": "2025-01-15T14:20:00", + "progress": { + "files_processed": 1250, + "total_files": 1250 + } + }, + "last_activity": { + "timestamp": "2025-01-15T14:22:00", + "action": "indexed", + "file_path": "/work/src/main.py" + } +} +``` + +## Collection Naming Strategy + +### Default: Unified Collection + +**Recommended for most users:** +- Collection name: `codebase` (default) +- All repositories share this collection +- Filter by `metadata.repo` when you need repo-specific results + +**Benefits:** +- Cross-repo search works out of the box +- Simpler configuration +- Better for monorepos and related microservices + +### Optional: Per-Repository Collections + +**Use when you need strict isolation:** +- Set `COLLECTION_NAME=my-service-name` per repository +- Each repo gets its own collection +- Requires explicit collection parameter in MCP calls + +**Trade-offs:** +- More collections to manage +- Cross-repo search requires multiple queries +- Higher memory overhead (separate HNSW indexes) + +## Remote Deployment Architecture + +### Kubernetes Deployment Model + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Kubernetes Cluster β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Qdrant β”‚ β”‚ Memory MCP β”‚ β”‚ Indexer MCP β”‚ β”‚ +β”‚ β”‚ (StatefulSet)β”‚ β”‚ (Deployment) β”‚ β”‚ (Deployment) β”‚ β”‚ +β”‚ β”‚ Port: 6333 β”‚ β”‚ Port: 8000 β”‚ β”‚ Port: 8001 β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Persistent Volume (repos) β”‚ β”‚ +β”‚ β”‚ /repos/backend/ /repos/frontend/ /repos/ml/ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Watcher β”‚ β”‚ Watcher β”‚ β”‚ Watcher β”‚ β”‚ +β”‚ β”‚ (backend) β”‚ β”‚ (frontend) β”‚ β”‚ (ml) β”‚ β”‚ +β”‚ β”‚ (Deployment) β”‚ β”‚ (Deployment) β”‚ β”‚ (Deployment) β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Uploader Pod (optional) β”‚ β”‚ +β”‚ β”‚ Accepts file uploads and writes to /repos volume β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β”‚ β”‚ β”‚ + β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β–Όβ”€β”€β”€β”€β” + β”‚ Dev β”‚ β”‚ Dev β”‚ β”‚ Dev β”‚ + β”‚ Client β”‚ β”‚ Client β”‚ β”‚ Client β”‚ + β”‚ #1 β”‚ β”‚ #2 β”‚ β”‚ #3 β”‚ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +### Volume Structure + +``` +/repos/ +β”œβ”€β”€ backend/ +β”‚ β”œβ”€β”€ .codebase/ +β”‚ β”‚ └── state.json # Collection: codebase, repo: backend +β”‚ β”œβ”€β”€ src/ +β”‚ └── ... +β”œβ”€β”€ frontend/ +β”‚ β”œβ”€β”€ .codebase/ +β”‚ β”‚ └── state.json # Collection: codebase, repo: frontend +β”‚ β”œβ”€β”€ src/ +β”‚ └── ... +└── ml-service/ + β”œβ”€β”€ .codebase/ + β”‚ └── state.json # Collection: codebase, repo: ml-service + β”œβ”€β”€ models/ + └── ... +``` + +## MCP Tool Collection Support + +All MCP tools accept an optional `collection` parameter: + +### Search Tools + +```python +# Search across all repos in the unified collection +await repo_search( + query="authentication handler", + limit=10 +) + +# Filter to specific repo +await repo_search( + query="authentication handler", + limit=10, + # Use metadata filter (not collection param) for repo filtering + # Collection param is for switching between different Qdrant collections +) + +# Search in a different collection (if using per-repo collections) +await repo_search( + query="authentication handler", + collection="backend-service", + limit=10 +) +``` + +### Memory Tools + +```python +# Store memory in default collection +await memory_store( + information="Use JWT tokens for API authentication", + metadata={"kind": "memory", "topic": "auth", "repo": "backend"} +) + +# Store in specific collection +await memory_store( + information="Frontend uses OAuth2 flow", + metadata={"kind": "memory", "topic": "auth", "repo": "frontend"}, + collection="codebase" +) +``` + +### Indexing Tools + +```python +# Index a specific workspace into the unified collection +await qdrant_index_root( + collection="codebase" # Optional, defaults to workspace state +) + +# Index with explicit collection override +await qdrant_index( + subdir="", + recreate=False, + collection="my-custom-collection" +) +``` + +## Filtering by Repository + +Use Qdrant's payload filters to scope searches to specific repositories: + +```python +# In hybrid_search.py or via MCP tools +results = hybrid_search( + queries=["authentication"], + collection="codebase", + # Add repo filter via metadata + # (Implementation detail: tools should support repo= parameter) +) +``` + +**Recommended enhancement:** Add `repo` parameter to search tools that translates to a payload filter on `metadata.repo`. + +## Workspace Discovery + +The `list_workspaces` function scans for all `.codebase/state.json` files: + +```python +from scripts.workspace_state import list_workspaces + +workspaces = list_workspaces(search_root="/repos") +# Returns: +# [ +# { +# "workspace_path": "/repos/backend", +# "collection_name": "codebase", +# "last_updated": "2025-01-15T14:22:00", +# "indexing_state": "watching" +# }, +# { +# "workspace_path": "/repos/frontend", +# "collection_name": "codebase", +# "last_updated": "2025-01-15T14:20:00", +# "indexing_state": "idle" +# } +# ] +``` + +## Migration Guide + +### From Single-Repo to Multi-Repo + +**No migration needed!** The default unified collection model works automatically: + +1. **Keep using `codebase` collection** (default) +2. **Index additional repos** - they'll share the same collection +3. **Filter by repo name** when you need repo-specific results + +### From Per-Repo Collections to Unified + +If you previously used separate collections per repo: + +1. **Create new unified collection:** + ```bash + COLLECTION_NAME=codebase make reindex + ``` + +2. **Reindex all repositories** into the unified collection: + ```bash + for repo in backend frontend ml-service; do + HOST_INDEX_PATH=/path/to/$repo COLLECTION_NAME=codebase make index + done + ``` + +3. **Update MCP client configs** to use `codebase` collection + +4. **Optional:** Delete old per-repo collections via Qdrant API + +## Best Practices + +### 1. Use Unified Collection by Default + +- Simplifies cross-repo search +- Reduces operational overhead +- Better for related codebases + +### 2. Set Meaningful Repo Names + +- Use `REPO_NAME` env var or rely on git repo name +- Keep names consistent across environments +- Use kebab-case: `backend-api`, `frontend-web`, `ml-training` + +### 3. Leverage Payload Indexes + +The indexer creates payload indexes on `metadata.repo` for efficient filtering: + +```python +# Fast repo-scoped search (uses payload index) +results = client.search( + collection_name="codebase", + query_vector=embedding, + query_filter=models.Filter( + must=[ + models.FieldCondition( + key="metadata.repo", + match=models.MatchValue(value="backend-api") + ) + ] + ) +) +``` + +### 4. Monitor Collection Health + +```bash +# Check collection status +make qdrant-status + +# List all collections +make qdrant-list + +# Prune stale points +make prune +``` + +### 5. Use Watchers Per Repository + +Deploy one watcher per repository in Kubernetes: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: watcher-backend +spec: + replicas: 1 + template: + spec: + containers: + - name: watcher + image: context-engine-indexer:latest + command: ["python", "/app/scripts/watch_index.py"] + env: + - name: WATCH_ROOT + value: "/repos/backend" + - name: COLLECTION_NAME + value: "codebase" + - name: REPO_NAME + value: "backend" + volumeMounts: + - name: repos + mountPath: /repos + subPath: backend +``` + +## Compatibility + +### Backward Compatibility + +All existing single-repo workflows continue to work: + +- Default collection name: `codebase` +- Workspace state auto-created if missing +- Collection parameter optional in all MCP tools +- Existing Docker Compose setup unchanged + +### Forward Compatibility + +The architecture supports future enhancements: + +- Multi-collection queries (search across multiple collections) +- Collection-level access control +- Collection-specific embedding models +- Cross-collection deduplication + +## See Also + +- [Kubernetes Deployment Guide](../deploy/kubernetes/README.md) +- [MCP API Reference](MCP_API.md) +- [Architecture Overview](ARCHITECTURE.md) +- [Development Guide](DEVELOPMENT.md) + From f6ccd611b208244c873c6c88a2190763af7f42ea Mon Sep 17 00:00:00 2001 From: john donalson Date: Sun, 2 Nov 2025 21:59:27 -0500 Subject: [PATCH 3/5] Update kustomization.yaml --- deploy/kubernetes/kustomization.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deploy/kubernetes/kustomization.yaml b/deploy/kubernetes/kustomization.yaml index e02aa13e..c9f3f016 100644 --- a/deploy/kubernetes/kustomization.yaml +++ b/deploy/kubernetes/kustomization.yaml @@ -26,10 +26,11 @@ resources: - ingress.yaml # Common labels -commonLabels: - app.kubernetes.io/name: context-engine - app.kubernetes.io/component: kubernetes-deployment - app.kubernetes.io/managed-by: kustomize +labels: + - pairs: + app.kubernetes.io/name: context-engine + app.kubernetes.io/component: kubernetes-deployment + app.kubernetes.io/managed-by: kustomize # Patches for production customization patchesStrategicMerge: From 188855d4c852a65a0bcf1362f1f138a5ba9c4493 Mon Sep 17 00:00:00 2001 From: john donalson Date: Sun, 2 Nov 2025 22:08:07 -0500 Subject: [PATCH 4/5] fix --- deploy/kubernetes/indexer-services.yaml | 26 +- deploy/kubernetes/mcp-http.yaml | 16 +- deploy/kubernetes/mcp-indexer.yaml | 8 +- deploy/kubernetes/mcp-memory.yaml | 8 +- scripts/k8s_uploader.py | 307 ++++++++++++++++++++++++ scripts/mcp_indexer_server.py | 15 +- 6 files changed, 341 insertions(+), 39 deletions(-) create mode 100755 scripts/k8s_uploader.py diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml index 110d22ce..72e3920a 100644 --- a/deploy/kubernetes/indexer-services.yaml +++ b/deploy/kubernetes/indexer-services.yaml @@ -44,7 +44,7 @@ spec: name: context-engine-config key: EMBEDDING_MODEL - name: WATCH_ROOT - value: "/repos" + value: "/work" - name: QDRANT_TIMEOUT valueFrom: configMapKeyRef: @@ -78,12 +78,12 @@ spec: memory: "2Gi" cpu: "1" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate --- @@ -134,13 +134,13 @@ spec: memory: "4Gi" cpu: "2" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work readOnly: true volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate --- @@ -186,12 +186,12 @@ spec: memory: "1Gi" cpu: "500m" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work readOnly: true volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate diff --git a/deploy/kubernetes/mcp-http.yaml b/deploy/kubernetes/mcp-http.yaml index 7586f022..5abbe9d9 100644 --- a/deploy/kubernetes/mcp-http.yaml +++ b/deploy/kubernetes/mcp-http.yaml @@ -67,8 +67,8 @@ spec: memory: "4Gi" cpu: "2" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work readOnly: true livenessProbe: httpGet: @@ -83,9 +83,9 @@ spec: initialDelaySeconds: 10 periodSeconds: 5 volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate --- @@ -249,8 +249,8 @@ spec: memory: "4Gi" cpu: "2" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work livenessProbe: httpGet: path: /readyz @@ -264,9 +264,9 @@ spec: initialDelaySeconds: 10 periodSeconds: 5 volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate --- diff --git a/deploy/kubernetes/mcp-indexer.yaml b/deploy/kubernetes/mcp-indexer.yaml index 5963a86a..c219df31 100644 --- a/deploy/kubernetes/mcp-indexer.yaml +++ b/deploy/kubernetes/mcp-indexer.yaml @@ -108,8 +108,8 @@ spec: memory: "4Gi" cpu: "2" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work livenessProbe: httpGet: path: /readyz @@ -123,9 +123,9 @@ spec: initialDelaySeconds: 10 periodSeconds: 5 volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate --- diff --git a/deploy/kubernetes/mcp-memory.yaml b/deploy/kubernetes/mcp-memory.yaml index 6c22bfee..af213f24 100644 --- a/deploy/kubernetes/mcp-memory.yaml +++ b/deploy/kubernetes/mcp-memory.yaml @@ -73,8 +73,8 @@ spec: memory: "4Gi" cpu: "2" volumeMounts: - - name: repos - mountPath: /repos + - name: work + mountPath: /work readOnly: true livenessProbe: httpGet: @@ -89,9 +89,9 @@ spec: initialDelaySeconds: 10 periodSeconds: 5 volumes: - - name: repos + - name: work hostPath: - path: /tmp/context-engine-repos + path: /tmp/context-engine-work type: DirectoryOrCreate --- diff --git a/scripts/k8s_uploader.py b/scripts/k8s_uploader.py new file mode 100755 index 00000000..d4b1c7d3 --- /dev/null +++ b/scripts/k8s_uploader.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +""" +Kubernetes Repository Uploader + +Upload local repositories to a Kubernetes cluster running Context Engine +and trigger indexing via the MCP Indexer API. + +Usage: + python scripts/k8s_uploader.py /path/to/repo --namespace context-engine + python scripts/k8s_uploader.py /path/to/repo --collection my-project --recreate + python scripts/k8s_uploader.py /path/to/repo --pod mcp-indexer-abc123 --skip-index +""" + +import argparse +import json +import os +import subprocess +import sys +import tarfile +import tempfile +import time +from pathlib import Path +from typing import Optional, Dict, Any, List + + +def run_command(cmd: List[str], check: bool = True, capture: bool = True) -> subprocess.CompletedProcess: + """Run a shell command and return the result.""" + print(f"Running: {' '.join(cmd)}") + try: + result = subprocess.run( + cmd, + check=check, + capture_output=capture, + text=True + ) + return result + except subprocess.CalledProcessError as e: + print(f"Error running command: {e}") + if capture and e.stderr: + print(f"stderr: {e.stderr}") + raise + + +def get_indexer_pod(namespace: str, pod_name: Optional[str] = None) -> str: + """Get the name of an MCP indexer pod.""" + if pod_name: + return pod_name + + # Find a running mcp-indexer pod + result = run_command([ + "kubectl", "get", "pods", + "-n", namespace, + "-l", "component=mcp-indexer", + "-o", "jsonpath={.items[0].metadata.name}" + ]) + + pod = result.stdout.strip() + if not pod: + raise RuntimeError(f"No mcp-indexer pod found in namespace {namespace}") + + print(f"Using pod: {pod}") + return pod + + +def create_tar_archive(source_path: Path, exclude_patterns: Optional[List[str]] = None) -> Path: + """Create a tar.gz archive of the source directory.""" + if not source_path.exists(): + raise FileNotFoundError(f"Source path does not exist: {source_path}") + + if not source_path.is_dir(): + raise ValueError(f"Source path must be a directory: {source_path}") + + # Default exclusions + if exclude_patterns is None: + exclude_patterns = [ + ".git", + ".codebase", + "__pycache__", + "*.pyc", + ".DS_Store", + "node_modules", + ".venv", + "venv", + ".env", + "*.log" + ] + + # Create temporary tar file + temp_dir = Path(tempfile.mkdtemp()) + tar_path = temp_dir / f"{source_path.name}.tar.gz" + + print(f"Creating archive: {tar_path}") + print(f"Source: {source_path}") + print(f"Excluding: {', '.join(exclude_patterns)}") + + def should_exclude(path: Path) -> bool: + """Check if a path should be excluded.""" + for pattern in exclude_patterns: + if pattern.startswith("*."): + # File extension pattern + if path.suffix == pattern[1:]: + return True + elif path.name == pattern: + return True + elif pattern in str(path): + return True + return False + + with tarfile.open(tar_path, "w:gz") as tar: + for item in source_path.rglob("*"): + if should_exclude(item): + continue + + arcname = item.relative_to(source_path.parent) + try: + tar.add(item, arcname=arcname) + except Exception as e: + print(f"Warning: Could not add {item}: {e}") + + size_mb = tar_path.stat().st_size / (1024 * 1024) + print(f"Archive created: {tar_path} ({size_mb:.2f} MB)") + + return tar_path + + +def upload_to_pod(tar_path: Path, namespace: str, pod_name: str, target_dir: str = "/work") -> str: + """Upload tar archive to a pod and extract it.""" + repo_name = tar_path.stem.replace(".tar", "") + target_path = f"{target_dir}/{repo_name}" + + print(f"Uploading to pod {pod_name}:{target_path}") + + # Create target directory in pod + run_command([ + "kubectl", "exec", "-n", namespace, pod_name, "--", + "mkdir", "-p", target_path + ]) + + # Copy tar file to pod + temp_tar = f"/tmp/{tar_path.name}" + run_command([ + "kubectl", "cp", str(tar_path), + f"{namespace}/{pod_name}:{temp_tar}" + ]) + + # Extract in pod + print(f"Extracting archive in pod...") + run_command([ + "kubectl", "exec", "-n", namespace, pod_name, "--", + "tar", "-xzf", temp_tar, "-C", target_dir + ]) + + # Clean up temp tar in pod + run_command([ + "kubectl", "exec", "-n", namespace, pod_name, "--", + "rm", temp_tar + ], check=False) + + print(f"Upload complete: {target_path}") + return target_path + + +def trigger_indexing( + namespace: str, + pod_name: str, + repo_path: str, + collection: Optional[str] = None, + recreate: bool = False +) -> Dict[str, Any]: + """Trigger indexing via the MCP indexer server.""" + print(f"Triggering indexing for {repo_path}") + + # Build Python command to call qdrant_index_root via MCP server + python_cmd = f""" +import sys +sys.path.insert(0, '/app') +from scripts.mcp_indexer_server import qdrant_index_root +import asyncio +import json +import os + +# Set working directory to the uploaded repo +os.chdir('{repo_path}') + +# Call indexing +result = asyncio.run(qdrant_index_root( + recreate={str(recreate)}, + collection={repr(collection) if collection else 'None'} +)) +print(json.dumps(result, indent=2)) +""" + + # Execute in pod + result = run_command([ + "kubectl", "exec", "-n", namespace, pod_name, "--", + "python", "-c", python_cmd + ], check=False) + + # Parse result + stdout = result.stdout + stderr = result.stderr + returncode = result.returncode + + # Extract return code from output + for line in stdout.split("\n"): + if line.startswith("RETURNCODE:"): + try: + returncode = int(line.split(":", 1)[1].strip()) + except: + pass + + return { + "ok": returncode == 0, + "code": returncode, + "stdout": stdout, + "stderr": stderr, + "collection": collection or "codebase", + "repo_path": repo_path + } + + +def main(): + parser = argparse.ArgumentParser( + description="Upload repositories to Kubernetes Context Engine cluster", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Upload and index a repository + python scripts/k8s_uploader.py /path/to/my-repo + + # Upload to specific namespace and collection + python scripts/k8s_uploader.py /path/to/my-repo --namespace prod --collection my-project + + # Upload and recreate collection (drops existing data) + python scripts/k8s_uploader.py /path/to/my-repo --recreate + + # Upload only (skip indexing) + python scripts/k8s_uploader.py /path/to/my-repo --skip-index + + # Upload to specific pod + python scripts/k8s_uploader.py /path/to/my-repo --pod mcp-indexer-abc123 + """ + ) + + parser.add_argument("source", type=str, help="Path to repository to upload") + parser.add_argument("--namespace", "-n", default="context-engine", help="Kubernetes namespace (default: context-engine)") + parser.add_argument("--pod", "-p", help="Specific pod name (default: auto-detect mcp-indexer pod)") + parser.add_argument("--collection", "-c", help="Qdrant collection name (default: codebase)") + parser.add_argument("--target-dir", default="/work", help="Target directory in pod (default: /work)") + parser.add_argument("--recreate", action="store_true", help="Recreate collection (drops existing data)") + parser.add_argument("--skip-index", action="store_true", help="Skip indexing after upload") + parser.add_argument("--exclude", action="append", help="Additional exclude patterns") + parser.add_argument("--keep-archive", action="store_true", help="Keep temporary archive file") + + args = parser.parse_args() + + source_path = Path(args.source).resolve() + + try: + # Get target pod + pod_name = get_indexer_pod(args.namespace, args.pod) + + # Create archive + tar_path = create_tar_archive(source_path, args.exclude) + + # Upload to pod + repo_path = upload_to_pod(tar_path, args.namespace, pod_name, args.target_dir) + + # Trigger indexing + if not args.skip_index: + result = trigger_indexing( + args.namespace, + pod_name, + repo_path, + args.collection, + args.recreate + ) + + print("\n" + "="*60) + print("INDEXING RESULT:") + print("="*60) + print(json.dumps(result, indent=2)) + + if result.get("ok") and result.get("code") == 0: + print("\n[SUCCESS] Upload and indexing completed successfully!") + else: + print("\n[WARNING] Indexing completed with warnings or errors") + sys.exit(1) + else: + print("\n[SUCCESS] Upload completed successfully (indexing skipped)") + + # Clean up + if not args.keep_archive: + tar_path.unlink() + tar_path.parent.rmdir() + print(f"Cleaned up temporary archive") + else: + print(f"Archive kept at: {tar_path}") + + except Exception as e: + print(f"\n[ERROR] {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() + diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py index 54eb9692..76a34173 100644 --- a/scripts/mcp_indexer_server.py +++ b/scripts/mcp_indexer_server.py @@ -290,17 +290,12 @@ def _default_collection() -> str: def _work_script(name: str) -> str: - """Return path to a script under /work if present, else local ./scripts. - Keeps Docker/default behavior but works in local dev without /work mount. + """Return path to a script under /app/scripts (container installation). + + Scripts are always installed at /app/scripts in the container. + This is independent of where user repositories are mounted. """ - try: - p = os.path.join("/work", "scripts", name) - if os.path.exists(p): - return p - except Exception as e: - logger.debug(f"Failed to locate script {name}: {e}") - pass - return os.path.join(os.getcwd(), "scripts", name) + return os.path.join("/app", "scripts", name) # Invalidate router scratchpad after reindex to avoid stale state reuse From 9647f82011ec5f887dd281c076a887915c654efd Mon Sep 17 00:00:00 2001 From: john donalson Date: Sun, 2 Nov 2025 22:13:55 -0500 Subject: [PATCH 5/5] Update k8s_uploader.py --- scripts/k8s_uploader.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/scripts/k8s_uploader.py b/scripts/k8s_uploader.py index d4b1c7d3..3f712682 100755 --- a/scripts/k8s_uploader.py +++ b/scripts/k8s_uploader.py @@ -170,20 +170,25 @@ def trigger_indexing( """Trigger indexing via the MCP indexer server.""" print(f"Triggering indexing for {repo_path}") - # Build Python command to call qdrant_index_root via MCP server + # Build Python command to call qdrant_index via MCP server + # Use qdrant_index with subdir parameter to index specific repo python_cmd = f""" import sys sys.path.insert(0, '/app') -from scripts.mcp_indexer_server import qdrant_index_root +from scripts.mcp_indexer_server import qdrant_index import asyncio import json -import os -# Set working directory to the uploaded repo -os.chdir('{repo_path}') +# Extract subdir from repo_path (e.g., /work/test-repo -> test-repo) +repo_path = '{repo_path}' +if repo_path.startswith('/work/'): + subdir = repo_path[6:] # Remove '/work/' prefix +else: + subdir = repo_path # Call indexing -result = asyncio.run(qdrant_index_root( +result = asyncio.run(qdrant_index( + subdir=subdir, recreate={str(recreate)}, collection={repr(collection) if collection else 'None'} ))