diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md index 5adfeb8c..a84050e8 100644 --- a/deploy/kubernetes/README.md +++ b/deploy/kubernetes/README.md @@ -199,6 +199,28 @@ MAX_MICRO_CHUNKS_PER_FILE: "200" WATCH_DEBOUNCE_SECS: "1.5" ``` +#### Syncing `configmap.yaml` from `.env` + +If you treat a `.env` file as the source of truth for configuration, you can use the helper script `scripts/sync_env_to_k8s.py` to keep `deploy/kubernetes/configmap.yaml` and the workloads in sync: + +```bash +cd /path/to/Context-Engine +python3 scripts/sync_env_to_k8s.py --env-file .env --k8s-dir deploy/kubernetes +``` + +This will: + +- Regenerate `deploy/kubernetes/configmap.yaml` so its `data:` keys match the provided `.env` (excluding sensitive keys such as `GLM_API_KEY` by default). +- Ensure all Deployments and Jobs in `deploy/kubernetes/` include: + + ```yaml + envFrom: + - configMapRef: + name: context-engine-config + ``` + +In CI (for example Bamboo), you can run the same script against the workspace copy of the manifests before `kustomize build . | kubectl apply -f -`, and then provide any sensitive values (such as `GLM_API_KEY`) via Kubernetes `Secret` resources or per-environment overrides instead of committing them to git. + ### Persistent Volumes The deployment uses HostPath volumes for simplicity (suitable for single-node clusters like minikube): diff --git a/deploy/kubernetes/configmap.yaml b/deploy/kubernetes/configmap.yaml index 903e4d76..b3b9b8f0 100644 --- a/deploy/kubernetes/configmap.yaml +++ b/deploy/kubernetes/configmap.yaml @@ -7,110 +7,103 @@ metadata: app: context-engine component: configuration data: - COLLECTION_NAME: "codebase" - EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5" - EMBEDDING_PROVIDER: "fastembed" - - FASTMCP_HOST: "0.0.0.0" - FASTMCP_PORT: "8000" - FASTMCP_INDEXER_PORT: "8001" - - TOOL_STORE_DESCRIPTION: "Store reusable code snippets for later retrieval. The 'information' is a clear NL description; include the actual code in 'metadata.code' and add 'metadata.language' (e.g., python, typescript) and 'metadata.path' when known. Use this whenever you generate or refine a code snippet." - TOOL_FIND_DESCRIPTION: "Search for relevant code snippets using multiple phrasings of the query (multi-query). Prefer results where metadata.language matches the target file and metadata.path is relevant. You may pass optional filters (language, path_prefix, kind) which the server applies server-side. Include 'metadata.code', 'metadata.path', and 'metadata.language' in responses." - - RERANKER_ENABLED: "1" - RERANKER_TOPN: "100" - RERANKER_RETURN_M: "20" - RERANKER_TIMEOUT_MS: "3000" - RERANK_TIMEOUT_FLOOR_MS: "1000" - - EMBEDDING_WARMUP: "0" - RERANK_WARMUP: "0" - - HYBRID_IN_PROCESS: "1" - RERANK_IN_PROCESS: "1" - - USE_TREE_SITTER: "1" - - HYBRID_EXPAND: "1" - HYBRID_PER_PATH: "1" - HYBRID_SYMBOL_BOOST: "0.35" - HYBRID_RECENCY_WEIGHT: "0.1" - RERANK_EXPAND: "1" - - INDEX_SEMANTIC_CHUNKS: "0" - - MEMORY_SSE_ENABLED: "true" - MEMORY_MCP_URL: "http://mcp:8000/sse" - MEMORY_MCP_TIMEOUT: "6" - - LLM_PROVIDER: "ollama" - OLLAMA_HOST: "http://ollama:11434" - LLM_EXPAND_MODEL: "phi3:mini" - LLM_EXPAND_MAX: "4" - PRF_ENABLED: "1" - - REFRAG_MODE: "1" - MINI_VECTOR_NAME: "mini" - MINI_VEC_DIM: "64" - MINI_VEC_SEED: "1337" - HYBRID_MINI_WEIGHT: "1.0" - - INDEX_MICRO_CHUNKS: "1" - MICRO_CHUNK_TOKENS: "16" - MICRO_CHUNK_STRIDE: "8" - REFRAG_GATE_FIRST: "1" - REFRAG_CANDIDATES: "200" - - MICRO_OUT_MAX_SPANS: "3" - MICRO_MERGE_LINES: "4" - MICRO_BUDGET_TOKENS: "512" - MICRO_TOKENS_PER_LINE: "32" - - CTX_SUMMARY_CHARS: "0" - - REFRAG_DECODER: "1" - REFRAG_RUNTIME: "llamacpp" - REFRAG_ENCODER_MODEL: "BAAI/bge-base-en-v1.5" - REFRAG_PHI_PATH: "/work/models/refrag_phi_768_to_dmodel.bin" - REFRAG_SENSE: "heuristic" - - LLAMACPP_URL: "http://llamacpp:8080" - LLAMACPP_TIMEOUT_SEC: "180" - DECODER_MAX_TOKENS: "4000" - REFRAG_DECODER_MODE: "prompt" - REFRAG_SOFT_SCALE: "1.0" - - MAX_MICRO_CHUNKS_PER_FILE: "200" - QDRANT_TIMEOUT: "60" - MEMORY_AUTODETECT: "1" - MEMORY_COLLECTION_TTL_SECS: "300" - - FASTMCP_HTTP_TRANSPORT: "http" - FASTMCP_HTTP_PORT: "8002" - FASTMCP_HTTP_HEALTH_PORT: "18002" - FASTMCP_INDEXER_HTTP_PORT: "8003" - FASTMCP_INDEXER_HTTP_HEALTH_PORT: "18003" - - WATCH_DEBOUNCE_SECS: "1.5" - INDEX_UPSERT_BATCH: "128" - INDEX_UPSERT_RETRIES: "5" - - QDRANT_URL: "http://qdrant:6333" - - QDRANT_API_KEY: "" - REPO_NAME: "workspace" - FASTMCP_SERVER_NAME: "qdrant-mcp" - HOST_INDEX_PATH: "/work" - - INDEX_CHUNK_LINES: "120" - INDEX_CHUNK_OVERLAP: "20" - INDEX_BATCH_SIZE: "64" - INDEX_UPSERT_BACKOFF: "0.5" - FASTMCP_HEALTH_PORT: "18000" - CTX_MULTI_COLLECTION: "1" - CTX_DOC_PASS: "1" - DEBUG_CONTEXT_ANSWER: "0" - TOKENIZER_JSON: "/app/models/tokenizer.json" - LLAMACPP_MODEL_URL: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf" - LLAMACPP_MODEL_NAME: "qwen2.5-1.5b-instruct-q8_0.gguf" + COLLECTION_NAME: codebase + CTX_SNIPPET_CHARS: '400' + CTX_SUMMARY_CHARS: '0' + DECODER_MAX_TOKENS: '4000' + EMBEDDING_MODEL: BAAI/bge-base-en-v1.5 + EMBEDDING_PROVIDER: fastembed + EMBEDDING_WARMUP: '0' + FASTMCP_HOST: 0.0.0.0 + FASTMCP_HTTP_HEALTH_PORT: '18002' + FASTMCP_HTTP_PORT: '8002' + FASTMCP_HTTP_TRANSPORT: http + FASTMCP_INDEXER_HTTP_HEALTH_PORT: '18003' + FASTMCP_INDEXER_HTTP_PORT: '8003' + FASTMCP_INDEXER_PORT: '8001' + FASTMCP_PORT: '8000' + GLM_API_BASE: https://api.z.ai/api/coding/paas/v4/ + GLM_MODEL: glm-4.6 + HOST_INDEX_PATH: ./dev-workspace + HYBRID_EXPAND: '0' + HYBRID_IN_PROCESS: '1' + HYBRID_MINI_WEIGHT: '1.0' + HYBRID_PER_PATH: '1' + HYBRID_RECENCY_WEIGHT: '0.1' + HYBRID_RESULTS_CACHE: '128' + HYBRID_RESULTS_CACHE_ENABLED: '1' + HYBRID_SYMBOL_BOOST: '0.35' + INDEX_CHUNK_LINES: '60' + INDEX_CHUNK_OVERLAP: '10' + INDEX_MICRO_CHUNKS: '0' + INDEX_SEMANTIC_CHUNKS: '1' + LLAMACPP_EXTRA_ARGS: '' + LLAMACPP_GPU_LAYERS: '32' + LLAMACPP_GPU_SPLIT: '' + LLAMACPP_THREADS: '6' + LLAMACPP_TIMEOUT_SEC: '300' + LLAMACPP_URL: http://host.docker.internal:8081 + LLAMACPP_USE_GPU: '1' + LLM_EXPAND_MAX: '0' + LLM_EXPAND_MODEL: phi3:mini + LLM_PROVIDER: ollama + MAX_CHANGED_SYMBOLS_RATIO: '0.6 # If >60% of symbols changed, do full reprocessing' + MAX_EMBED_CACHE: '16384' + MAX_MICRO_CHUNKS_PER_FILE: '500' + MCP_INDEXER_URL: http://localhost:8003/mcp + MEMORY_AUTODETECT: '1' + MEMORY_COLLECTION_TTL_SECS: '300' + MEMORY_MCP_TIMEOUT: '6' + MEMORY_MCP_URL: http://mcp:8000/sse + MEMORY_SSE_ENABLED: 'true' + MICRO_BUDGET_TOKENS: '1500' + MICRO_CHUNK_STRIDE: '48' + MICRO_CHUNK_TOKENS: '24' + MICRO_MERGE_LINES: '4' + MICRO_OUT_MAX_SPANS: '3' + MICRO_TOKENS_PER_LINE: '32' + MINI_VECTOR_NAME: mini + MINI_VEC_DIM: '64' + MINI_VEC_SEED: '1337' + MULTI_REPO_MODE: '1' + OLLAMA_HOST: http://host.docker.internal:11434 + PRF_ENABLED: '1' + QDRANT_TIMEOUT: '20' + QDRANT_URL: http://qdrant:6333 + REFRAG_CANDIDATES: '200' + REFRAG_COMMIT_DESCRIBE: '1' + REFRAG_DECODER: '1' + REFRAG_DECODER_MODE: 'prompt # prompt|soft' + REFRAG_ENCODER_MODEL: BAAI/bge-base-en-v1.5 + REFRAG_GATE_FIRST: '1' + REFRAG_MODE: '1' + REFRAG_PHI_PATH: /work/models/refrag_phi_768_to_dmodel.bin + REFRAG_PSEUDO_DESCRIBE: '1' + REFRAG_RUNTIME: glm + REFRAG_SENSE: heuristic + REFRAG_SOFT_SCALE: '1.0' + REMOTE_UPLOAD_GIT_MAX_COMMITS: '500' + RERANKER_ENABLED: '1' + RERANKER_ONNX_PATH: /work/models/model_qint8_avx512_vnni.onnx + RERANKER_RETURN_M: '20' + RERANKER_TIMEOUT_MS: '3000' + RERANKER_TOKENIZER_PATH: /work/models/tokenizer.json + RERANKER_TOPN: '100' + RERANK_EXPAND: '1' + RERANK_IN_PROCESS: '1' + RERANK_TIMEOUT_FLOOR_MS: '1000' + RERANK_WARMUP: '0' + SMART_SYMBOL_REINDEXING: '1' + STRICT_MEMORY_RESTORE: '1' + TOOL_FIND_DESCRIPTION: Search for relevant code snippets using multiple phrasings + of the query (multi-query). Prefer results where metadata.language matches the + target file and metadata.path is relevant. You may pass optional filters (language, + path_prefix, kind) which the server applies server-side. Include 'metadata.code', + 'metadata.path', and 'metadata.language' in responses. + TOOL_STORE_DESCRIPTION: Store reusable code snippets for later retrieval. The 'information' + is a clear NL description; include the actual code in 'metadata.code' and add + 'metadata.language' (e.g., python, typescript) and 'metadata.path' when known. + Use this whenever you generate or refine a code snippet. + USE_GPU_DECODER: '0' + USE_TREE_SITTER: '1' + WATCH_DEBOUNCE_SECS: '4' diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml index a8351c35..f176e615 100644 --- a/deploy/kubernetes/indexer-services.yaml +++ b/deploy/kubernetes/indexer-services.yaml @@ -1,6 +1,3 @@ ---- -# Indexer Service Deployment (file change monitoring and reindexing) -# This is a template - copy and customize for each repository apiVersion: apps/v1 kind: Deployment metadata: @@ -26,7 +23,9 @@ spec: - name: watcher image: context-engine-indexer-service imagePullPolicy: IfNotPresent - command: ["python", "/app/scripts/watch_index.py"] + command: + - python + - /app/scripts/watch_index.py workingDir: /work env: - name: QDRANT_URL @@ -45,7 +44,7 @@ spec: name: context-engine-config key: EMBEDDING_MODEL - name: WATCH_ROOT - value: "/work" + value: /work - name: QDRANT_TIMEOUT valueFrom: configMapKeyRef: @@ -73,17 +72,20 @@ spec: key: WATCH_DEBOUNCE_SECS resources: requests: - memory: "512Mi" - cpu: "250m" + memory: 512Mi + cpu: 250m limits: - memory: "2Gi" - cpu: "1000m" + memory: 2Gi + cpu: 1000m volumeMounts: - name: work-volume mountPath: /work readOnly: true - name: metadata-volume mountPath: /work/.codebase + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: @@ -91,9 +93,7 @@ spec: - name: metadata-volume persistentVolumeClaim: claimName: code-metadata-pvc - --- -# Indexer Job (One-shot code indexing) apiVersion: batch/v1 kind: Job metadata: @@ -115,7 +115,9 @@ spec: - name: indexer image: context-engine-indexer-service imagePullPolicy: IfNotPresent - command: ["python", "/app/scripts/ingest_code.py"] + command: + - python + - /app/scripts/ingest_code.py workingDir: /work env: - name: QDRANT_URL @@ -135,17 +137,20 @@ spec: key: EMBEDDING_MODEL resources: requests: - memory: "1Gi" - cpu: "500m" + memory: 1Gi + cpu: 500m limits: - memory: "4Gi" - cpu: "2000m" + memory: 4Gi + cpu: 2000m volumeMounts: - name: work-volume mountPath: /work readOnly: true - name: metadata-volume mountPath: /work/.codebase + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: @@ -153,9 +158,7 @@ spec: - name: metadata-volume persistentVolumeClaim: claimName: code-metadata-pvc - --- -# Index Initialization Job apiVersion: batch/v1 kind: Job metadata: @@ -177,7 +180,9 @@ spec: - name: init-payload image: context-engine-indexer-service imagePullPolicy: IfNotPresent - command: ["python", "/app/scripts/create_indexes.py"] + command: + - python + - /app/scripts/create_indexes.py workingDir: /work env: - name: QDRANT_URL @@ -192,17 +197,20 @@ spec: key: COLLECTION_NAME resources: requests: - memory: "512Mi" - cpu: "250m" + memory: 512Mi + cpu: 250m limits: - memory: "1Gi" - cpu: "500m" + memory: 1Gi + cpu: 500m volumeMounts: - name: work-volume mountPath: /work readOnly: true - name: metadata-volume mountPath: /work/.codebase + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: diff --git a/deploy/kubernetes/llamacpp.yaml b/deploy/kubernetes/llamacpp.yaml index 331bdabe..398771a4 100644 --- a/deploy/kubernetes/llamacpp.yaml +++ b/deploy/kubernetes/llamacpp.yaml @@ -1,5 +1,3 @@ ---- -# Optional Llama.cpp Service (Text Generation) apiVersion: apps/v1 kind: Deployment metadata: @@ -9,7 +7,7 @@ metadata: app: context-engine component: llamacpp spec: - replicas: 1 # Set to 0 if not needed + replicas: 1 selector: matchLabels: app: context-engine @@ -20,7 +18,6 @@ spec: app: context-engine component: llamacpp spec: - # Init container to download model if not present initContainers: - name: model-downloader image: curlimages/curl:latest @@ -36,42 +33,26 @@ spec: name: context-engine-config key: LLAMACPP_MODEL_NAME command: - - sh - - -c - - | - MODEL_PATH="/models/${LLAMACPP_MODEL_NAME}" - - if [ -f "$MODEL_PATH" ]; then - echo "Model already exists at $MODEL_PATH" - ls -lh "$MODEL_PATH" - exit 0 - fi - - echo "Downloading model from ${LLAMACPP_MODEL_URL}..." - echo "Target: $MODEL_PATH" - - curl -L --progress-bar -o "$MODEL_PATH.tmp" "${LLAMACPP_MODEL_URL}" - - if [ $? -eq 0 ]; then - mv "$MODEL_PATH.tmp" "$MODEL_PATH" - echo "Model downloaded successfully" - ls -lh "$MODEL_PATH" - else - echo "Failed to download model" - rm -f "$MODEL_PATH.tmp" - exit 1 - fi + - sh + - -c + - "MODEL_PATH=\"/models/${LLAMACPP_MODEL_NAME}\"\n\nif [ -f \"$MODEL_PATH\"\ + \ ]; then\n echo \"Model already exists at $MODEL_PATH\"\n ls -lh \"$MODEL_PATH\"\ + \n exit 0\nfi\n\necho \"Downloading model from ${LLAMACPP_MODEL_URL}...\"\ + \necho \"Target: $MODEL_PATH\"\n\ncurl -L --progress-bar -o \"$MODEL_PATH.tmp\"\ + \ \"${LLAMACPP_MODEL_URL}\"\n\nif [ $? -eq 0 ]; then\n mv \"$MODEL_PATH.tmp\"\ + \ \"$MODEL_PATH\"\n echo \"Model downloaded successfully\"\n ls -lh \"\ + $MODEL_PATH\"\nelse\n echo \"Failed to download model\"\n rm -f \"$MODEL_PATH.tmp\"\ + \n exit 1\nfi\n" volumeMounts: - name: models mountPath: /models resources: requests: - memory: "512Mi" - cpu: "100m" + memory: 512Mi + cpu: 100m limits: - memory: "2Gi" - cpu: "500m" - + memory: 2Gi + cpu: 500m containers: - name: llamacpp image: ghcr.io/ggerganov/llama.cpp:server @@ -83,35 +64,36 @@ spec: name: context-engine-config key: LLAMACPP_MODEL_NAME - name: LLAMA_ARG_MODEL - value: "/models/model.gguf" + value: /models/model.gguf - name: LLAMA_ARG_CTX_SIZE - value: "8192" + value: '8192' - name: LLAMA_ARG_HOST - value: "0.0.0.0" + value: 0.0.0.0 - name: LLAMA_ARG_PORT - value: "8080" + value: '8080' ports: - name: http containerPort: 8080 protocol: TCP - command: ["llama-server"] + command: + - llama-server args: - - "--model" - - "/models/model.gguf" - - "--host" - - "0.0.0.0" - - "--port" - - "8080" - - "--ctx-size" - - "8192" - - "--no-warmup" + - --model + - /models/model.gguf + - --host + - 0.0.0.0 + - --port + - '8080' + - --ctx-size + - '8192' + - --no-warmup resources: requests: - memory: "2Gi" - cpu: "1000m" + memory: 2Gi + cpu: 1000m limits: - memory: "8Gi" - cpu: "4000m" + memory: 8Gi + cpu: 4000m volumeMounts: - name: models mountPath: /models @@ -130,13 +112,14 @@ spec: initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 5 + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: models persistentVolumeClaim: claimName: code-models-pvc - --- -# Llama.cpp Service apiVersion: v1 kind: Service metadata: @@ -146,18 +129,16 @@ metadata: app: context-engine component: llamacpp spec: - type: ClusterIP # Change to LoadBalancer for external access + type: ClusterIP ports: - name: http port: 8080 targetPort: http - # nodePort: 30808 # Optional: specify node port protocol: TCP selector: app: context-engine component: llamacpp --- -# Optional: Llama.cpp External Service apiVersion: v1 kind: Service metadata: diff --git a/deploy/kubernetes/mcp-http.yaml b/deploy/kubernetes/mcp-http.yaml index e48c29df..19c2894f 100644 --- a/deploy/kubernetes/mcp-http.yaml +++ b/deploy/kubernetes/mcp-http.yaml @@ -1,5 +1,3 @@ ---- -# MCP Memory Server (HTTP) Deployment apiVersion: apps/v1 kind: Deployment metadata: @@ -25,8 +23,20 @@ spec: - name: mcp-memory-http image: context-engine-memory imagePullPolicy: IfNotPresent - command: ["python", "-m", "mcp.server.fastmcp"] - args: ["--server-name", "context-engine-http", "--host", "0.0.0.0", "--port", "8000", "--transport", "http", "/app/scripts/memory_server.py"] + command: + - python + - -m + - mcp.server.fastmcp + args: + - --server-name + - context-engine-http + - --host + - 0.0.0.0 + - --port + - '8000' + - --transport + - http + - /app/scripts/memory_server.py ports: - name: http containerPort: 8000 @@ -71,21 +81,21 @@ spec: name: context-engine-config key: FASTMCP_HOST - name: FASTMCP_PORT - value: "8000" + value: '8000' - name: FASTMCP_TRANSPORT valueFrom: configMapKeyRef: name: context-engine-config key: FASTMCP_HTTP_TRANSPORT - name: FASTMCP_HEALTH_PORT - value: "18000" + value: '18000' resources: requests: - memory: "512Mi" - cpu: "250m" + memory: 512Mi + cpu: 250m limits: - memory: "2Gi" - cpu: "1000m" + memory: 2Gi + cpu: 1000m volumeMounts: - name: work-volume mountPath: /work @@ -102,13 +112,14 @@ spec: port: health initialDelaySeconds: 10 periodSeconds: 5 + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: claimName: code-repos-pvc - --- -# MCP Memory Server (HTTP) Service apiVersion: v1 kind: Service metadata: @@ -118,7 +129,7 @@ metadata: app: context-engine component: mcp-memory-http spec: - type: ClusterIP # Change to LoadBalancer for external access + type: ClusterIP ports: - name: http port: 8002 @@ -131,9 +142,7 @@ spec: selector: app: context-engine component: mcp-memory-http - --- -# Optional: MCP Memory HTTP External Service apiVersion: v1 kind: Service metadata: @@ -158,9 +167,7 @@ spec: selector: app: context-engine component: mcp-memory-http - --- -# MCP Indexer Server (HTTP) Deployment apiVersion: apps/v1 kind: Deployment metadata: @@ -186,8 +193,20 @@ spec: - name: mcp-indexer-http image: context-engine-indexer imagePullPolicy: IfNotPresent - command: ["python", "-m", "mcp.server.fastmcp"] - args: ["--server-name", "context-engine-indexer-http", "--host", "0.0.0.0", "--port", "8001", "--transport", "http", "/app/scripts/indexer_server.py"] + command: + - python + - -m + - mcp.server.fastmcp + args: + - --server-name + - context-engine-indexer-http + - --host + - 0.0.0.0 + - --port + - '8001' + - --transport + - http + - /app/scripts/indexer_server.py ports: - name: http containerPort: 8001 @@ -257,21 +276,21 @@ spec: name: context-engine-config key: FASTMCP_HOST - name: FASTMCP_INDEXER_PORT - value: "8001" + value: '8001' - name: FASTMCP_TRANSPORT valueFrom: configMapKeyRef: name: context-engine-config key: FASTMCP_HTTP_TRANSPORT - name: FASTMCP_HEALTH_PORT - value: "18001" + value: '18001' resources: requests: - memory: "512Mi" - cpu: "250m" + memory: 512Mi + cpu: 250m limits: - memory: "2Gi" - cpu: "1000m" + memory: 2Gi + cpu: 1000m volumeMounts: - name: work-volume mountPath: /work @@ -289,6 +308,9 @@ spec: port: health initialDelaySeconds: 10 periodSeconds: 5 + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: @@ -296,9 +318,7 @@ spec: - name: codebase-volume persistentVolumeClaim: claimName: code-metadata-pvc - --- -# MCP Indexer Server (HTTP) Service apiVersion: v1 kind: Service metadata: @@ -308,7 +328,7 @@ metadata: app: context-engine component: mcp-indexer-http spec: - type: ClusterIP # Change to LoadBalancer for external access + type: ClusterIP ports: - name: http port: 8003 @@ -321,9 +341,7 @@ spec: selector: app: context-engine component: mcp-indexer-http - --- -# Optional: MCP Indexer HTTP External Service apiVersion: v1 kind: Service metadata: diff --git a/deploy/kubernetes/mcp-indexer.yaml b/deploy/kubernetes/mcp-indexer.yaml index bdc1103e..39089e2b 100644 --- a/deploy/kubernetes/mcp-indexer.yaml +++ b/deploy/kubernetes/mcp-indexer.yaml @@ -1,5 +1,3 @@ ---- -# MCP Indexer Server (SSE) Deployment apiVersion: apps/v1 kind: Deployment metadata: @@ -25,7 +23,9 @@ spec: - name: mcp-indexer image: context-engine-indexer imagePullPolicy: IfNotPresent - command: ["python", "/app/scripts/mcp_indexer_server.py"] + command: + - python + - /app/scripts/mcp_indexer_server.py ports: - name: sse containerPort: 8001 @@ -45,9 +45,9 @@ spec: name: context-engine-config key: FASTMCP_INDEXER_PORT - name: FASTMCP_HEALTH_PORT - value: "18001" + value: '18001' - name: FASTMCP_TRANSPORT - value: "sse" + value: sse - name: QDRANT_URL valueFrom: configMapKeyRef: @@ -70,11 +70,11 @@ spec: key: CTX_MULTI_COLLECTION resources: requests: - memory: "512Mi" - cpu: "250m" + memory: 512Mi + cpu: 250m limits: - memory: "2Gi" - cpu: "1000m" + memory: 2Gi + cpu: 1000m volumeMounts: - name: work-volume mountPath: /work @@ -92,6 +92,9 @@ spec: port: health initialDelaySeconds: 10 periodSeconds: 5 + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: @@ -99,9 +102,7 @@ spec: - name: codebase-volume persistentVolumeClaim: claimName: code-metadata-pvc - --- -# MCP Indexer Server (SSE) Service apiVersion: v1 kind: Service metadata: @@ -111,7 +112,7 @@ metadata: app: context-engine component: mcp-indexer spec: - type: ClusterIP # Change to LoadBalancer for external access + type: ClusterIP ports: - name: sse port: 8001 @@ -124,9 +125,7 @@ spec: selector: app: context-engine component: mcp-indexer - --- -# Optional: MCP Indexer External Service apiVersion: v1 kind: Service metadata: diff --git a/deploy/kubernetes/mcp-memory.yaml b/deploy/kubernetes/mcp-memory.yaml index bac047e7..f971108a 100644 --- a/deploy/kubernetes/mcp-memory.yaml +++ b/deploy/kubernetes/mcp-memory.yaml @@ -1,5 +1,3 @@ ---- -# MCP Memory Server (SSE) Deployment apiVersion: apps/v1 kind: Deployment metadata: @@ -25,7 +23,9 @@ spec: - name: mcp-memory image: context-engine-memory imagePullPolicy: IfNotPresent - command: ["python", "/app/scripts/mcp_memory_server.py"] + command: + - python + - /app/scripts/mcp_memory_server.py ports: - name: sse containerPort: 8000 @@ -45,9 +45,9 @@ spec: name: context-engine-config key: FASTMCP_PORT - name: FASTMCP_HEALTH_PORT - value: "18000" + value: '18000' - name: FASTMCP_TRANSPORT - value: "sse" + value: sse - name: QDRANT_URL valueFrom: configMapKeyRef: @@ -65,11 +65,11 @@ spec: key: EMBEDDING_MODEL resources: requests: - memory: "1Gi" - cpu: "500m" + memory: 1Gi + cpu: 500m limits: - memory: "4Gi" - cpu: "2" + memory: 4Gi + cpu: '2' volumeMounts: - name: work-volume mountPath: /work @@ -86,13 +86,14 @@ spec: port: health initialDelaySeconds: 10 periodSeconds: 5 + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: claimName: code-repos-pvc - --- -# MCP Memory Server (SSE) Service apiVersion: v1 kind: Service metadata: @@ -102,7 +103,7 @@ metadata: app: context-engine component: mcp-memory spec: - type: ClusterIP # Change to LoadBalancer for external access + type: ClusterIP ports: - name: sse port: 8000 @@ -115,9 +116,7 @@ spec: selector: app: context-engine component: mcp-memory - --- -# Optional: MCP Memory External Service apiVersion: v1 kind: Service metadata: @@ -142,4 +141,3 @@ spec: selector: app: context-engine component: mcp-memory - diff --git a/deploy/kubernetes/upload-service.yaml b/deploy/kubernetes/upload-service.yaml index 1379ab7c..db827dc3 100644 --- a/deploy/kubernetes/upload-service.yaml +++ b/deploy/kubernetes/upload-service.yaml @@ -1,5 +1,3 @@ ---- -# Delta Upload Service Deployment apiVersion: apps/v1 kind: Deployment metadata: @@ -26,9 +24,11 @@ spec: fsGroup: 1000 containers: - name: upload-service - image: context-engine-upload-service # Use service-specific image name + image: context-engine-upload-service imagePullPolicy: IfNotPresent - command: ["python", "scripts/upload_service.py"] + command: + - python + - scripts/upload_service.py ports: - name: http containerPort: 8002 @@ -45,15 +45,15 @@ spec: name: context-engine-config key: COLLECTION_NAME - name: UPLOAD_SERVICE_HOST - value: "0.0.0.0" + value: 0.0.0.0 - name: UPLOAD_SERVICE_PORT - value: "8002" + value: '8002' - name: WORK_DIR - value: "/work" + value: /work - name: MAX_BUNDLE_SIZE_MB - value: "100" + value: '100' - name: UPLOAD_TIMEOUT_SECS - value: "300" + value: '300' - name: EMBEDDING_MODEL valueFrom: configMapKeyRef: @@ -76,11 +76,11 @@ spec: key: INDEX_MICRO_CHUNKS resources: requests: - memory: "512Mi" - cpu: "250m" + memory: 512Mi + cpu: 250m limits: - memory: "2Gi" - cpu: "1000m" + memory: 2Gi + cpu: 1000m volumeMounts: - name: work-volume mountPath: /work @@ -98,6 +98,9 @@ spec: port: http initialDelaySeconds: 10 periodSeconds: 5 + envFrom: + - configMapRef: + name: context-engine-config volumes: - name: work-volume persistentVolumeClaim: @@ -105,9 +108,7 @@ spec: - name: codebase-volume persistentVolumeClaim: claimName: upload-codebase-pvc - --- -# Delta Upload Service Service apiVersion: v1 kind: Service metadata: @@ -117,13 +118,13 @@ metadata: app: context-engine component: upload-service spec: - type: NodePort # Change to LoadBalancer for external access + type: NodePort ports: - name: http port: 8002 targetPort: http - nodePort: 30810 # Optional: specify node port + nodePort: 30810 protocol: TCP selector: app: context-engine - component: upload-service \ No newline at end of file + component: upload-service diff --git a/scripts/sync_env_to_k8s.py b/scripts/sync_env_to_k8s.py new file mode 100644 index 00000000..4e0c3ef1 --- /dev/null +++ b/scripts/sync_env_to_k8s.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python3 +"""Sync .env into Kubernetes configmap.yaml and inject envFrom into workloads. + +Usage (from repo root): + + python scripts/sync_env_to_k8s.py + +This will: +- Read .env at the repo root. +- Regenerate deploy/kubernetes/configmap.yaml so its data matches .env. +- Add envFrom: configMapRef: context-engine-config to every Deployment/Job + container in deploy/kubernetes/*.yaml (if not already present). + +Requires: PyYAML (pip install pyyaml) +""" + +import argparse +from pathlib import Path + +try: + import yaml # type: ignore +except ImportError as exc: # pragma: no cover + raise SystemExit("This script requires PyYAML. Install with 'pip install pyyaml'.") from exc + + +def repo_root() -> Path: + """Return the repo root (one level above scripts/).""" + return Path(__file__).resolve().parents[1] + + +def parse_env_file(env_path: Path) -> dict: + """Parse a simple KEY=VALUE .env file into a dict of strings. + + - Ignores blank lines and lines starting with '#'. + - Splits on the first '='. + - Strips surrounding quotes from values if present. + """ + data: dict[str, str] = {} + with env_path.open("r", encoding="utf-8") as f: + for raw_line in f: + line = raw_line.strip() + if not line or line.startswith("#"): + continue + if "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip() + if not key: + continue + # Strip single/double quotes if the whole value is quoted + if (value.startswith("\"") and value.endswith("\"")) or ( + value.startswith("'") and value.endswith("'") + ): + value = value[1:-1] + data[key] = str(value) + return data + + +def update_configmap( + configmap_path: Path, + env_data: dict, + name: str, + namespace: str, + exclude_keys: list[str] | None = None, +) -> None: + """Regenerate configmap.yaml so that data matches env_data. + + Metadata (name/namespace) are ensured; any existing labels/annotations are preserved + if present in the current file. + + Optionally excludes specific keys (e.g. sensitive secrets like GLM_API_KEY) + from the generated ConfigMap data. + """ + existing_meta = {} + if configmap_path.exists(): + with configmap_path.open("r", encoding="utf-8") as f: + docs = list(yaml.safe_load_all(f)) + if docs and isinstance(docs[0], dict): + existing_meta = docs[0].get("metadata", {}) or {} + + metadata = dict(existing_meta) + metadata.setdefault("name", name) + metadata.setdefault("namespace", namespace) + + if exclude_keys is None: + exclude_keys = [] + excluded = set(exclude_keys) + + data = {k: str(v) for k, v in sorted(env_data.items()) if k not in excluded} + + cm = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": metadata, + "data": data, + } + + with configmap_path.open("w", encoding="utf-8") as f: + yaml.safe_dump(cm, f, default_flow_style=False, sort_keys=False) + + +def ensure_envfrom_in_doc(doc: dict, configmap_name: str) -> bool: + """Ensure every container in a Deployment/Job has envFrom for the given ConfigMap. + + Returns True if the document was modified. + """ + kind = doc.get("kind") + if kind not in {"Deployment", "Job"}: + return False + + spec = doc.get("spec") or {} + template = spec.get("template") or {} + pod_spec = template.get("spec") or {} + containers = pod_spec.get("containers") or [] + if not isinstance(containers, list) or not containers: + return False + + changed = False + + for container in containers: + env_from = container.get("envFrom") or [] + if not isinstance(env_from, list): + env_from = [env_from] + + already_present = any( + isinstance(entry, dict) + and "configMapRef" in entry + and isinstance(entry["configMapRef"], dict) + and entry["configMapRef"].get("name") == configmap_name + for entry in env_from + ) + + if not already_present: + env_from.append({"configMapRef": {"name": configmap_name}}) + container["envFrom"] = env_from + changed = True + else: + # Normalise back to list form + container["envFrom"] = env_from + + return changed + + +def update_workloads(k8s_dir: Path, configmap_name: str) -> None: + """Walk deploy/kubernetes and inject envFrom into all Deployments/Jobs.""" + for path in sorted(k8s_dir.glob("*.yaml")): + with path.open("r", encoding="utf-8") as f: + docs = list(yaml.safe_load_all(f)) + + if not docs: + continue + + changed = False + new_docs: list[dict] = [] + + for doc in docs: + if not isinstance(doc, dict): + new_docs.append(doc) + continue + if ensure_envfrom_in_doc(doc, configmap_name): + changed = True + new_docs.append(doc) + + if changed: + with path.open("w", encoding="utf-8") as f: + yaml.safe_dump_all(new_docs, f, default_flow_style=False, sort_keys=False) + print(f"Updated envFrom in {path}") + + +def main() -> None: + root = repo_root() + + parser = argparse.ArgumentParser( + description="Sync .env into configmap.yaml and inject envFrom into Kubernetes workloads.", + ) + parser.add_argument( + "--env-file", + default=str(root / ".env"), + help="Path to .env file (default: repo_root/.env)", + ) + parser.add_argument( + "--k8s-dir", + default=str(root / "deploy" / "kubernetes"), + help="Path to Kubernetes manifests directory (default: deploy/kubernetes)", + ) + parser.add_argument( + "--configmap-name", + default="context-engine-config", + help="Name of the ConfigMap to update (default: context-engine-config)", + ) + parser.add_argument( + "--namespace", + default="context-engine", + help="Namespace for the ConfigMap (default: context-engine)", + ) + parser.add_argument( + "--exclude-key", + action="append", + default=None, + help=( + "Environment key to exclude from the generated ConfigMap data. " + "May be passed multiple times. Defaults to ['GLM_API_KEY'] if not provided." + ), + ) + + args = parser.parse_args() + + env_path = Path(args.env_file) + k8s_dir = Path(args.k8s_dir) + + if not env_path.is_file(): + raise SystemExit(f".env file not found at {env_path}") + if not k8s_dir.is_dir(): + raise SystemExit(f"Kubernetes directory not found at {k8s_dir}") + + print(f"Loading .env from {env_path}...") + env_data = parse_env_file(env_path) + print(f"Loaded {len(env_data)} keys from .env") + + configmap_path = k8s_dir / "configmap.yaml" + print(f"Updating ConfigMap at {configmap_path}...") + exclude_keys = args.exclude_key if args.exclude_key is not None else ["GLM_API_KEY"] + if exclude_keys: + print(f"Excluding keys from ConfigMap: {', '.join(sorted(set(exclude_keys)))}") + update_configmap(configmap_path, env_data, args.configmap_name, args.namespace, exclude_keys) + + print(f"Injecting envFrom: configMapRef: {args.configmap_name} into workloads under {k8s_dir}...") + update_workloads(k8s_dir, args.configmap_name) + + print("Done. Review and commit the updated YAMLs if they look correct.") + + +if __name__ == "__main__": # pragma: no cover + main()