Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions deploy/kubernetes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,28 @@ MAX_MICRO_CHUNKS_PER_FILE: "200"
WATCH_DEBOUNCE_SECS: "1.5"
```

#### Syncing `configmap.yaml` from `.env`

If you treat a `.env` file as the source of truth for configuration, you can use the helper script `scripts/sync_env_to_k8s.py` to keep `deploy/kubernetes/configmap.yaml` and the workloads in sync:

```bash
cd /path/to/Context-Engine
python3 scripts/sync_env_to_k8s.py --env-file .env --k8s-dir deploy/kubernetes
```

This will:

- Regenerate `deploy/kubernetes/configmap.yaml` so its `data:` keys match the provided `.env` (excluding sensitive keys such as `GLM_API_KEY` by default).
- Ensure all Deployments and Jobs in `deploy/kubernetes/` include:

```yaml
envFrom:
- configMapRef:
name: context-engine-config
```

In CI (for example Bamboo), you can run the same script against the workspace copy of the manifests before `kustomize build . | kubectl apply -f -`, and then provide any sensitive values (such as `GLM_API_KEY`) via Kubernetes `Secret` resources or per-environment overrides instead of committing them to git.

### Persistent Volumes

The deployment uses HostPath volumes for simplicity (suitable for single-node clusters like minikube):
Expand Down
207 changes: 100 additions & 107 deletions deploy/kubernetes/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,110 +7,103 @@ metadata:
app: context-engine
component: configuration
data:
COLLECTION_NAME: "codebase"
EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5"
EMBEDDING_PROVIDER: "fastembed"

FASTMCP_HOST: "0.0.0.0"
FASTMCP_PORT: "8000"
FASTMCP_INDEXER_PORT: "8001"

TOOL_STORE_DESCRIPTION: "Store reusable code snippets for later retrieval. The 'information' is a clear NL description; include the actual code in 'metadata.code' and add 'metadata.language' (e.g., python, typescript) and 'metadata.path' when known. Use this whenever you generate or refine a code snippet."
TOOL_FIND_DESCRIPTION: "Search for relevant code snippets using multiple phrasings of the query (multi-query). Prefer results where metadata.language matches the target file and metadata.path is relevant. You may pass optional filters (language, path_prefix, kind) which the server applies server-side. Include 'metadata.code', 'metadata.path', and 'metadata.language' in responses."

RERANKER_ENABLED: "1"
RERANKER_TOPN: "100"
RERANKER_RETURN_M: "20"
RERANKER_TIMEOUT_MS: "3000"
RERANK_TIMEOUT_FLOOR_MS: "1000"

EMBEDDING_WARMUP: "0"
RERANK_WARMUP: "0"

HYBRID_IN_PROCESS: "1"
RERANK_IN_PROCESS: "1"

USE_TREE_SITTER: "1"

HYBRID_EXPAND: "1"
HYBRID_PER_PATH: "1"
HYBRID_SYMBOL_BOOST: "0.35"
HYBRID_RECENCY_WEIGHT: "0.1"
RERANK_EXPAND: "1"

INDEX_SEMANTIC_CHUNKS: "0"

MEMORY_SSE_ENABLED: "true"
MEMORY_MCP_URL: "http://mcp:8000/sse"
MEMORY_MCP_TIMEOUT: "6"

LLM_PROVIDER: "ollama"
OLLAMA_HOST: "http://ollama:11434"
LLM_EXPAND_MODEL: "phi3:mini"
LLM_EXPAND_MAX: "4"
PRF_ENABLED: "1"

REFRAG_MODE: "1"
MINI_VECTOR_NAME: "mini"
MINI_VEC_DIM: "64"
MINI_VEC_SEED: "1337"
HYBRID_MINI_WEIGHT: "1.0"

INDEX_MICRO_CHUNKS: "1"
MICRO_CHUNK_TOKENS: "16"
MICRO_CHUNK_STRIDE: "8"
REFRAG_GATE_FIRST: "1"
REFRAG_CANDIDATES: "200"

MICRO_OUT_MAX_SPANS: "3"
MICRO_MERGE_LINES: "4"
MICRO_BUDGET_TOKENS: "512"
MICRO_TOKENS_PER_LINE: "32"

CTX_SUMMARY_CHARS: "0"

REFRAG_DECODER: "1"
REFRAG_RUNTIME: "llamacpp"
REFRAG_ENCODER_MODEL: "BAAI/bge-base-en-v1.5"
REFRAG_PHI_PATH: "/work/models/refrag_phi_768_to_dmodel.bin"
REFRAG_SENSE: "heuristic"

LLAMACPP_URL: "http://llamacpp:8080"
LLAMACPP_TIMEOUT_SEC: "180"
DECODER_MAX_TOKENS: "4000"
REFRAG_DECODER_MODE: "prompt"
REFRAG_SOFT_SCALE: "1.0"

MAX_MICRO_CHUNKS_PER_FILE: "200"
QDRANT_TIMEOUT: "60"
MEMORY_AUTODETECT: "1"
MEMORY_COLLECTION_TTL_SECS: "300"

FASTMCP_HTTP_TRANSPORT: "http"
FASTMCP_HTTP_PORT: "8002"
FASTMCP_HTTP_HEALTH_PORT: "18002"
FASTMCP_INDEXER_HTTP_PORT: "8003"
FASTMCP_INDEXER_HTTP_HEALTH_PORT: "18003"

WATCH_DEBOUNCE_SECS: "1.5"
INDEX_UPSERT_BATCH: "128"
INDEX_UPSERT_RETRIES: "5"

QDRANT_URL: "http://qdrant:6333"

QDRANT_API_KEY: ""
REPO_NAME: "workspace"
FASTMCP_SERVER_NAME: "qdrant-mcp"
HOST_INDEX_PATH: "/work"

INDEX_CHUNK_LINES: "120"
INDEX_CHUNK_OVERLAP: "20"
INDEX_BATCH_SIZE: "64"
INDEX_UPSERT_BACKOFF: "0.5"
FASTMCP_HEALTH_PORT: "18000"
CTX_MULTI_COLLECTION: "1"
CTX_DOC_PASS: "1"
DEBUG_CONTEXT_ANSWER: "0"
TOKENIZER_JSON: "/app/models/tokenizer.json"
LLAMACPP_MODEL_URL: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf"
LLAMACPP_MODEL_NAME: "qwen2.5-1.5b-instruct-q8_0.gguf"
COLLECTION_NAME: codebase
CTX_SNIPPET_CHARS: '400'
CTX_SUMMARY_CHARS: '0'
DECODER_MAX_TOKENS: '4000'
EMBEDDING_MODEL: BAAI/bge-base-en-v1.5
EMBEDDING_PROVIDER: fastembed
EMBEDDING_WARMUP: '0'
FASTMCP_HOST: 0.0.0.0
FASTMCP_HTTP_HEALTH_PORT: '18002'
FASTMCP_HTTP_PORT: '8002'
FASTMCP_HTTP_TRANSPORT: http
FASTMCP_INDEXER_HTTP_HEALTH_PORT: '18003'
FASTMCP_INDEXER_HTTP_PORT: '8003'
FASTMCP_INDEXER_PORT: '8001'
FASTMCP_PORT: '8000'
GLM_API_BASE: https://api.z.ai/api/coding/paas/v4/
GLM_MODEL: glm-4.6
HOST_INDEX_PATH: ./dev-workspace
HYBRID_EXPAND: '0'
HYBRID_IN_PROCESS: '1'
HYBRID_MINI_WEIGHT: '1.0'
HYBRID_PER_PATH: '1'
HYBRID_RECENCY_WEIGHT: '0.1'
HYBRID_RESULTS_CACHE: '128'
HYBRID_RESULTS_CACHE_ENABLED: '1'
HYBRID_SYMBOL_BOOST: '0.35'
INDEX_CHUNK_LINES: '60'
INDEX_CHUNK_OVERLAP: '10'
INDEX_MICRO_CHUNKS: '0'
INDEX_SEMANTIC_CHUNKS: '1'
LLAMACPP_EXTRA_ARGS: ''
LLAMACPP_GPU_LAYERS: '32'
LLAMACPP_GPU_SPLIT: ''
LLAMACPP_THREADS: '6'
LLAMACPP_TIMEOUT_SEC: '300'
LLAMACPP_URL: http://host.docker.internal:8081
LLAMACPP_USE_GPU: '1'
LLM_EXPAND_MAX: '0'
LLM_EXPAND_MODEL: phi3:mini
LLM_PROVIDER: ollama
MAX_CHANGED_SYMBOLS_RATIO: '0.6 # If >60% of symbols changed, do full reprocessing'
MAX_EMBED_CACHE: '16384'
MAX_MICRO_CHUNKS_PER_FILE: '500'
MCP_INDEXER_URL: http://localhost:8003/mcp
MEMORY_AUTODETECT: '1'
MEMORY_COLLECTION_TTL_SECS: '300'
MEMORY_MCP_TIMEOUT: '6'
MEMORY_MCP_URL: http://mcp:8000/sse
MEMORY_SSE_ENABLED: 'true'
MICRO_BUDGET_TOKENS: '1500'
MICRO_CHUNK_STRIDE: '48'
MICRO_CHUNK_TOKENS: '24'
MICRO_MERGE_LINES: '4'
MICRO_OUT_MAX_SPANS: '3'
MICRO_TOKENS_PER_LINE: '32'
MINI_VECTOR_NAME: mini
MINI_VEC_DIM: '64'
MINI_VEC_SEED: '1337'
MULTI_REPO_MODE: '1'
OLLAMA_HOST: http://host.docker.internal:11434
PRF_ENABLED: '1'
QDRANT_TIMEOUT: '20'
QDRANT_URL: http://qdrant:6333
REFRAG_CANDIDATES: '200'
REFRAG_COMMIT_DESCRIBE: '1'
REFRAG_DECODER: '1'
REFRAG_DECODER_MODE: 'prompt # prompt|soft'
REFRAG_ENCODER_MODEL: BAAI/bge-base-en-v1.5
REFRAG_GATE_FIRST: '1'
REFRAG_MODE: '1'
REFRAG_PHI_PATH: /work/models/refrag_phi_768_to_dmodel.bin
REFRAG_PSEUDO_DESCRIBE: '1'
REFRAG_RUNTIME: glm
REFRAG_SENSE: heuristic
REFRAG_SOFT_SCALE: '1.0'
REMOTE_UPLOAD_GIT_MAX_COMMITS: '500'
RERANKER_ENABLED: '1'
RERANKER_ONNX_PATH: /work/models/model_qint8_avx512_vnni.onnx
RERANKER_RETURN_M: '20'
RERANKER_TIMEOUT_MS: '3000'
RERANKER_TOKENIZER_PATH: /work/models/tokenizer.json
RERANKER_TOPN: '100'
RERANK_EXPAND: '1'
RERANK_IN_PROCESS: '1'
RERANK_TIMEOUT_FLOOR_MS: '1000'
RERANK_WARMUP: '0'
SMART_SYMBOL_REINDEXING: '1'
STRICT_MEMORY_RESTORE: '1'
TOOL_FIND_DESCRIPTION: Search for relevant code snippets using multiple phrasings
of the query (multi-query). Prefer results where metadata.language matches the
target file and metadata.path is relevant. You may pass optional filters (language,
path_prefix, kind) which the server applies server-side. Include 'metadata.code',
'metadata.path', and 'metadata.language' in responses.
TOOL_STORE_DESCRIPTION: Store reusable code snippets for later retrieval. The 'information'
is a clear NL description; include the actual code in 'metadata.code' and add
'metadata.language' (e.g., python, typescript) and 'metadata.path' when known.
Use this whenever you generate or refine a code snippet.
USE_GPU_DECODER: '0'
USE_TREE_SITTER: '1'
WATCH_DEBOUNCE_SECS: '4'
54 changes: 31 additions & 23 deletions deploy/kubernetes/indexer-services.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
---
# Indexer Service Deployment (file change monitoring and reindexing)
# This is a template - copy and customize for each repository
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -26,7 +23,9 @@ spec:
- name: watcher
image: context-engine-indexer-service
imagePullPolicy: IfNotPresent
command: ["python", "/app/scripts/watch_index.py"]
command:
- python
- /app/scripts/watch_index.py
workingDir: /work
env:
- name: QDRANT_URL
Expand All @@ -45,7 +44,7 @@ spec:
name: context-engine-config
key: EMBEDDING_MODEL
- name: WATCH_ROOT
value: "/work"
value: /work
- name: QDRANT_TIMEOUT
valueFrom:
configMapKeyRef:
Expand Down Expand Up @@ -73,27 +72,28 @@ spec:
key: WATCH_DEBOUNCE_SECS
resources:
requests:
memory: "512Mi"
cpu: "250m"
memory: 512Mi
cpu: 250m
limits:
memory: "2Gi"
cpu: "1000m"
memory: 2Gi
cpu: 1000m
volumeMounts:
- name: work-volume
mountPath: /work
readOnly: true
- name: metadata-volume
mountPath: /work/.codebase
envFrom:
- configMapRef:
name: context-engine-config
volumes:
- name: work-volume
persistentVolumeClaim:
claimName: code-repos-pvc
- name: metadata-volume
persistentVolumeClaim:
claimName: code-metadata-pvc

---
# Indexer Job (One-shot code indexing)
apiVersion: batch/v1
kind: Job
metadata:
Expand All @@ -115,7 +115,9 @@ spec:
- name: indexer
image: context-engine-indexer-service
imagePullPolicy: IfNotPresent
command: ["python", "/app/scripts/ingest_code.py"]
command:
- python
- /app/scripts/ingest_code.py
workingDir: /work
env:
- name: QDRANT_URL
Expand All @@ -135,27 +137,28 @@ spec:
key: EMBEDDING_MODEL
resources:
requests:
memory: "1Gi"
cpu: "500m"
memory: 1Gi
cpu: 500m
limits:
memory: "4Gi"
cpu: "2000m"
memory: 4Gi
cpu: 2000m
volumeMounts:
- name: work-volume
mountPath: /work
readOnly: true
- name: metadata-volume
mountPath: /work/.codebase
envFrom:
- configMapRef:
name: context-engine-config
volumes:
- name: work-volume
persistentVolumeClaim:
claimName: code-repos-pvc
- name: metadata-volume
persistentVolumeClaim:
claimName: code-metadata-pvc

---
# Index Initialization Job
apiVersion: batch/v1
kind: Job
metadata:
Expand All @@ -177,7 +180,9 @@ spec:
- name: init-payload
image: context-engine-indexer-service
imagePullPolicy: IfNotPresent
command: ["python", "/app/scripts/create_indexes.py"]
command:
- python
- /app/scripts/create_indexes.py
workingDir: /work
env:
- name: QDRANT_URL
Expand All @@ -192,17 +197,20 @@ spec:
key: COLLECTION_NAME
resources:
requests:
memory: "512Mi"
cpu: "250m"
memory: 512Mi
cpu: 250m
limits:
memory: "1Gi"
cpu: "500m"
memory: 1Gi
cpu: 500m
volumeMounts:
- name: work-volume
mountPath: /work
readOnly: true
- name: metadata-volume
mountPath: /work/.codebase
envFrom:
- configMapRef:
name: context-engine-config
volumes:
- name: work-volume
persistentVolumeClaim:
Expand Down
Loading
Loading