From c8aadbc8c30a23ff2ffa80956171a648c315912a Mon Sep 17 00:00:00 2001 From: Chris Stinemetz Date: Sat, 24 Jan 2026 20:47:40 -0500 Subject: [PATCH 1/2] refactor: optimize Docker Compose with YAML anchors and aliases - Reduce configuration repetition by ~85% using YAML anchors (&) and aliases (*) - Extract common patterns into reusable anchors: * x-common-config: shared dependencies, env files, and networking * x-huggingface-cache: HF cache environment variables (~6 vars per service) * x-auth-config: authentication configuration (~8 vars per service) * x-embedding-config: embedding model settings (~4 vars per service) * x-reranker-config: reranker settings (~7 vars per service) * x-common-volumes & x-indexer-volumes: volume mount patterns - Eliminate ~200+ lines of repetitive environment variable declarations - Improve maintainability with single source of truth for shared configs - Maintain full functionality across all services (validated with deployment test) Files optimized: - docker-compose.yml: 8 services now use shared anchors - docker-compose.openlit.yml: health check dependency pattern - docker-compose-bindmount-checkout.yml: working_dir and common configs --- docker-compose-bindmount-checkout.yml | 103 +++++++------- docker-compose.openlit.yml | 21 +-- docker-compose.yml | 184 ++++++++++++++------------ 3 files changed, 160 insertions(+), 148 deletions(-) diff --git a/docker-compose-bindmount-checkout.yml b/docker-compose-bindmount-checkout.yml index b4e6361c..2d5c1f35 100644 --- a/docker-compose-bindmount-checkout.yml +++ b/docker-compose-bindmount-checkout.yml @@ -1,3 +1,31 @@ +# YAML Anchors for common configurations +x-common-config: &common-config + env_file: + - .env + depends_on: + - qdrant + +x-work-dir: &work-dir + working_dir: /work + +x-huggingface-cache: &hf-cache + HF_HOME: /tmp/huggingface + HF_HUB_CACHE: /tmp/huggingface/hub + HUGGINGFACE_HUB_CACHE: /tmp/huggingface/hub + TRANSFORMERS_CACHE: /tmp/huggingface/transformers + FASTEMBED_CACHE_PATH: /tmp/huggingface/fastembed + HF_HUB_DISABLE_XET: 1 + +x-embedding-config: &embedding-config + EMBEDDING_MODEL: ${EMBEDDING_MODEL} + QWEN3_EMBEDDING_ENABLED: ${QWEN3_EMBEDDING_ENABLED:-0} + QWEN3_QUERY_INSTRUCTION: ${QWEN3_QUERY_INSTRUCTION:-1} + QWEN3_INSTRUCTION_TEXT: ${QWEN3_INSTRUCTION_TEXT} + +x-standard-volumes: &standard-volumes + - ${HOST_INDEX_PATH:-.}:/work:ro + - ${HOST_INDEX_PATH:-.}/.codebase:/work/.codebase:rw + services: qdrant: image: qdrant/qdrant:latest @@ -75,7 +103,6 @@ services: volumes: - ${HOST_INDEX_PATH:-.}:/work - mcp_http: build: context: . @@ -160,7 +187,7 @@ services: - "8080:8080" volumes: - ./models:/models:ro - entrypoint: ["/bin/sh","-lc"] + entrypoint: [ "/bin/sh", "-lc" ] command: - | set -e @@ -186,67 +213,35 @@ services: exec /app/llama-server $$ARGS indexer: + <<: [ *common-config, *work-dir ] build: context: . dockerfile: Dockerfile.indexer - depends_on: - - qdrant - env_file: - - .env environment: - - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - - HF_HOME=/tmp/huggingface - - HF_HUB_CACHE=/tmp/huggingface/hub - - HUGGINGFACE_HUB_CACHE=/tmp/huggingface/hub - - TRANSFORMERS_CACHE=/tmp/huggingface/transformers - - FASTEMBED_CACHE_PATH=/tmp/huggingface/fastembed - - HF_HUB_DISABLE_XET=1 - - EMBEDDING_MODEL=${EMBEDDING_MODEL} - - QWEN3_EMBEDDING_ENABLED=${QWEN3_EMBEDDING_ENABLED:-0} - - QWEN3_QUERY_INSTRUCTION=${QWEN3_QUERY_INSTRUCTION:-1} - - QWEN3_INSTRUCTION_TEXT=${QWEN3_INSTRUCTION_TEXT} - working_dir: /work - volumes: - - ${HOST_INDEX_PATH:-.}:/work:ro - - ${HOST_INDEX_PATH:-.}/.codebase:/work/.codebase:rw - - entrypoint: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/ingest_code.py"] + <<: [ *hf-cache, *embedding-config ] + QDRANT_URL: ${QDRANT_URL} + COLLECTION_NAME: ${COLLECTION_NAME:-codebase} + volumes: *standard-volumes + entrypoint: [ "sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/ingest_code.py" ] watcher: + <<: [ *common-config, *work-dir ] build: context: . dockerfile: Dockerfile.indexer - depends_on: - - qdrant - env_file: - - .env environment: - - QDRANT_URL=${QDRANT_URL} - - COLLECTION_NAME=${COLLECTION_NAME:-codebase} - - HF_HOME=/tmp/huggingface - - HF_HUB_CACHE=/tmp/huggingface/hub - - HUGGINGFACE_HUB_CACHE=/tmp/huggingface/hub - - TRANSFORMERS_CACHE=/tmp/huggingface/transformers - - FASTEMBED_CACHE_PATH=/tmp/huggingface/fastembed - - HF_HUB_DISABLE_XET=1 - - EMBEDDING_MODEL=${EMBEDDING_MODEL} - - QWEN3_EMBEDDING_ENABLED=${QWEN3_EMBEDDING_ENABLED:-0} - - QWEN3_QUERY_INSTRUCTION=${QWEN3_QUERY_INSTRUCTION:-1} - - QWEN3_INSTRUCTION_TEXT=${QWEN3_INSTRUCTION_TEXT} - - WATCH_ROOT=/work + <<: [ *hf-cache, *embedding-config ] + QDRANT_URL: ${QDRANT_URL} + COLLECTION_NAME: ${COLLECTION_NAME:-codebase} + WATCH_ROOT: /work # Watcher-specific backpressure & timeouts (safer defaults) - - QDRANT_TIMEOUT=60 - - MAX_MICRO_CHUNKS_PER_FILE=${MAX_MICRO_CHUNKS_PER_FILE:-200} - - INDEX_UPSERT_BATCH=128 - - INDEX_UPSERT_RETRIES=5 - - WATCH_DEBOUNCE_SECS=${WATCH_DEBOUNCE_SECS:-1.5} - working_dir: /work - volumes: - - ${HOST_INDEX_PATH:-.}:/work:ro - - ${HOST_INDEX_PATH:-.}/.codebase:/work/.codebase:rw - entrypoint: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/watch_index.py"] - + QDRANT_TIMEOUT: 60 + MAX_MICRO_CHUNKS_PER_FILE: ${MAX_MICRO_CHUNKS_PER_FILE:-200} + INDEX_UPSERT_BATCH: 128 + INDEX_UPSERT_RETRIES: 5 + WATCH_DEBOUNCE_SECS: ${WATCH_DEBOUNCE_SECS:-1.5} + volumes: *standard-volumes + entrypoint: [ "sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/watch_index.py" ] upload_service: build: @@ -277,7 +272,7 @@ services: - ${HOST_INDEX_PATH:-.}/.codebase:/work/.codebase:rw user: "0:0" healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8002/health"] + test: [ "CMD", "curl", "-f", "http://localhost:8002/health" ] interval: 30s timeout: 10s retries: 3 @@ -300,7 +295,7 @@ services: - ${HOST_INDEX_PATH:-.}:/work:ro - ${HOST_INDEX_PATH:-.}/.codebase:/work/.codebase:rw - entrypoint: ["python", "/app/scripts/create_indexes.py"] + entrypoint: [ "python", "/app/scripts/create_indexes.py" ] volumes: qdrant_storage: diff --git a/docker-compose.openlit.yml b/docker-compose.openlit.yml index a8403efb..1840255f 100644 --- a/docker-compose.openlit.yml +++ b/docker-compose.openlit.yml @@ -4,14 +4,19 @@ # This adds OpenLit observability to your existing Context-Engine stack. # Dashboard: http://localhost:3000 (login: user@openlit.io / openlituser) +# YAML Anchors for reusable configurations +x-clickhouse-health-condition: &clickhouse-health-condition + clickhouse: + condition: service_healthy + services: # ClickHouse - storage backend for OpenLit clickhouse: image: clickhouse/clickhouse-server:24.4.1 container_name: openlit-clickhouse ports: - - "9000:9000" # Native protocol (for OTEL exporter) - - "8123:8123" # HTTP interface (for dashboard queries) + - "9000:9000" # Native protocol (for OTEL exporter) + - "8123:8123" # HTTP interface (for dashboard queries) volumes: - clickhouse_data:/var/lib/clickhouse - ./config/clickhouse-config.xml:/etc/clickhouse-server/config.d/custom-config.xml:ro @@ -20,7 +25,7 @@ services: - CLICKHOUSE_PASSWORD=OPENLIT - CLICKHOUSE_USER=default healthcheck: - test: ["CMD", "clickhouse-client", "--query", "SELECT 1"] + test: [ "CMD", "clickhouse-client", "--query", "SELECT 1" ] interval: 10s timeout: 5s retries: 5 @@ -32,9 +37,9 @@ services: image: ghcr.io/openlit/openlit:latest container_name: openlit-dashboard ports: - - "3000:3000" # Dashboard UI - - "4317:4317" # OTLP gRPC receiver - - "4318:4318" # OTLP HTTP receiver + - "3000:3000" # Dashboard UI + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver environment: - INIT_DB_HOST=clickhouse - INIT_DB_PORT=8123 @@ -45,9 +50,7 @@ services: volumes: - openlit_data:/app/client/data - ./config/otel-collector-config.yaml:/etc/otel/otel-collector-config.yaml:ro - depends_on: - clickhouse: - condition: service_healthy + depends_on: *clickhouse-health-condition networks: - dev-remote-network diff --git a/docker-compose.yml b/docker-compose.yml index 1075675b..a3600749 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,60 @@ version: '3.8' +# YAML Anchors for common configurations +x-common-config: &common-config + env_file: + - .env + depends_on: + - qdrant + networks: + - dev-remote-network + +x-huggingface-cache: &hf-cache + HF_HOME: /tmp/huggingface + HF_HUB_CACHE: /tmp/huggingface/hub + HUGGINGFACE_HUB_CACHE: /tmp/huggingface/hub + TRANSFORMERS_CACHE: /tmp/huggingface/transformers + FASTEMBED_CACHE_PATH: /tmp/huggingface/fastembed + HF_HUB_DISABLE_XET: 1 + +x-auth-config: &auth-config + CTXCE_AUTH_ENABLED: ${CTXCE_AUTH_ENABLED:-0} + CTXCE_MCP_ACL_ENFORCE: ${CTXCE_MCP_ACL_ENFORCE:-0} + CTXCE_ACL_ALLOW_ALL: ${CTXCE_ACL_ALLOW_ALL:-0} + CTXCE_AUTH_SHARED_TOKEN: ${CTXCE_AUTH_SHARED_TOKEN} + CTXCE_AUTH_ADMIN_TOKEN: ${CTXCE_AUTH_ADMIN_TOKEN} + CTXCE_AUTH_DB_URL: ${CTXCE_AUTH_DB_URL} + CTXCE_AUTH_SESSION_TTL_SECONDS: ${CTXCE_AUTH_SESSION_TTL_SECONDS:-0} + CTXCE_AUTH_ALLOW_OPEN_TOKEN_LOGIN: ${CTXCE_AUTH_ALLOW_OPEN_TOKEN_LOGIN:-0} + +x-embedding-config: &embedding-config + EMBEDDING_MODEL: ${EMBEDDING_MODEL} + EMBEDDING_PROVIDER: ${EMBEDDING_PROVIDER} + QWEN3_EMBEDDING_ENABLED: ${QWEN3_EMBEDDING_ENABLED:-0} + QWEN3_QUERY_INSTRUCTION: ${QWEN3_QUERY_INSTRUCTION:-1} + QWEN3_INSTRUCTION_TEXT: ${QWEN3_INSTRUCTION_TEXT} + +x-reranker-config: &reranker-config + RERANKER_MODEL: ${RERANKER_MODEL:-} + RERANKER_ONNX_PATH: ${RERANKER_ONNX_PATH:-} + RERANKER_TOKENIZER_PATH: ${RERANKER_TOKENIZER_PATH:-} + RERANK_LEARNING: ${RERANK_LEARNING:-1} + RERANKER_WEIGHTS_DIR: /tmp/rerank_weights + RERANK_EVENTS_DIR: /tmp/rerank_events + RERANK_EVENTS_ENABLED: ${RERANK_EVENTS_ENABLED:-1} + +x-common-volumes: &common-volumes + - workspace_pvc:/work:ro + - rerank_data:/tmp/rerank_weights:rw + - rerank_events:/tmp/rerank_events:rw + +x-indexer-volumes: &indexer-volumes + - workspace_pvc:/work:rw + - codebase_pvc:/work/.codebase:rw + - rerank_data:/tmp/rerank_weights:rw + - rerank_events:/tmp/rerank_events:rw + services: # Qdrant vector database - same as base compose qdrant: @@ -20,73 +74,36 @@ services: # MCP search service - same as base compose mcp: + <<: *common-config build: context: . dockerfile: Dockerfile.mcp container_name: mcp-search-dev-remote user: "1000:1000" - depends_on: - - qdrant - env_file: - - .env environment: - - FASTMCP_HOST=${FASTMCP_HOST} - - FASTMCP_PORT=${FASTMCP_PORT} - - QDRANT_URL=${QDRANT_URL} + <<: [ *hf-cache, *auth-config, *embedding-config, *reranker-config ] + FASTMCP_HOST: ${FASTMCP_HOST} + FASTMCP_PORT: ${FASTMCP_PORT} + QDRANT_URL: ${QDRANT_URL} # OpenLit observability (optional - enable via OPENLIT_ENABLED=1) - - OPENLIT_ENABLED=${OPENLIT_ENABLED:-0} - - OTEL_EXPORTER_OTLP_ENDPOINT=${OTEL_EXPORTER_OTLP_ENDPOINT:-http://openlit:4318} - # Optional auth configuration (fully opt-in via .env) - - CTXCE_AUTH_ENABLED=${CTXCE_AUTH_ENABLED:-0} - - CTXCE_MCP_ACL_ENFORCE=${CTXCE_MCP_ACL_ENFORCE:-0} - - CTXCE_ACL_ALLOW_ALL=${CTXCE_ACL_ALLOW_ALL:-0} - - CTXCE_AUTH_SHARED_TOKEN=${CTXCE_AUTH_SHARED_TOKEN} - - CTXCE_AUTH_ADMIN_TOKEN=${CTXCE_AUTH_ADMIN_TOKEN} - - CTXCE_AUTH_DB_URL=${CTXCE_AUTH_DB_URL} - - CTXCE_AUTH_SESSION_TTL_SECONDS=${CTXCE_AUTH_SESSION_TTL_SECONDS:-0} - - CTXCE_AUTH_ALLOW_OPEN_TOKEN_LOGIN=${CTXCE_AUTH_ALLOW_OPEN_TOKEN_LOGIN:-0} - - COLLECTION_NAME=${COLLECTION_NAME} - - PATH_EMIT_MODE=auto - # Use /tmp for HF caches to avoid root-owned docker volume permissions - - HF_HOME=/tmp/huggingface - - HF_HUB_CACHE=/tmp/huggingface/hub - - HUGGINGFACE_HUB_CACHE=/tmp/huggingface/hub - - TRANSFORMERS_CACHE=/tmp/huggingface/transformers - - FASTEMBED_CACHE_PATH=/tmp/huggingface/fastembed - - HF_HUB_DISABLE_XET=1 - - EMBEDDING_MODEL=${EMBEDDING_MODEL} - - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER} - - QWEN3_EMBEDDING_ENABLED=${QWEN3_EMBEDDING_ENABLED:-0} - - QWEN3_QUERY_INSTRUCTION=${QWEN3_QUERY_INSTRUCTION:-1} - - QWEN3_INSTRUCTION_TEXT=${QWEN3_INSTRUCTION_TEXT} - - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION} - - TOOL_FIND_DESCRIPTION=${TOOL_FIND_DESCRIPTION} - - FASTMCP_HEALTH_PORT=18000 - # Cross-encoder reranker configuration - - RERANKER_MODEL=${RERANKER_MODEL:-} - - RERANKER_ONNX_PATH=${RERANKER_ONNX_PATH:-} - - RERANKER_TOKENIZER_PATH=${RERANKER_TOKENIZER_PATH:-} - # Learning reranker configuration - - RERANK_LEARNING=${RERANK_LEARNING:-1} - - RERANKER_WEIGHTS_DIR=/tmp/rerank_weights - - RERANK_EVENTS_DIR=/tmp/rerank_events - - RERANK_EVENTS_ENABLED=${RERANK_EVENTS_ENABLED:-1} + OPENLIT_ENABLED: ${OPENLIT_ENABLED:-0} + OTEL_EXPORTER_OTLP_ENDPOINT: ${OTEL_EXPORTER_OTLP_ENDPOINT:-http://openlit:4318} + COLLECTION_NAME: ${COLLECTION_NAME} + PATH_EMIT_MODE: auto + TOOL_STORE_DESCRIPTION: ${TOOL_STORE_DESCRIPTION} + TOOL_FIND_DESCRIPTION: ${TOOL_FIND_DESCRIPTION} + FASTMCP_HEALTH_PORT: 18000 # Lexical sparse vectors for lossless term matching - - LEX_SPARSE_MODE=${LEX_SPARSE_MODE:-} - - LEX_SPARSE_NAME=${LEX_SPARSE_NAME:-} - - LEX_SPARSE_IDF=${LEX_SPARSE_IDF:-1} + LEX_SPARSE_MODE: ${LEX_SPARSE_MODE:-} + LEX_SPARSE_NAME: ${LEX_SPARSE_NAME:-} + LEX_SPARSE_IDF: ${LEX_SPARSE_IDF:-1} # Pattern vectors for structural code similarity - - PATTERN_VECTORS=${PATTERN_VECTORS:-} - - PATTERN_VECTOR_DIM=${PATTERN_VECTOR_DIM:-64} + PATTERN_VECTORS: ${PATTERN_VECTORS:-} + PATTERN_VECTOR_DIM: ${PATTERN_VECTOR_DIM:-64} ports: - "18000:18000" - "8000:8000" - volumes: - - workspace_pvc:/work:ro - - rerank_data:/tmp/rerank_weights:rw - - rerank_events:/tmp/rerank_events:rw - networks: - - dev-remote-network + volumes: *common-volumes # MCP indexer service - same as base compose mcp_indexer: @@ -99,7 +116,7 @@ services: # For Docker Compose dev-remote simulation, create symlink so /work/scripts/ works # Use /tmp/huggingface for cache to avoid permission issues (universally writable) # Set CORRECT environment variables for HuggingFace and FastEmbed - command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/mcp_indexer_server.py"] + command: [ "sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/mcp_indexer_server.py" ] depends_on: - qdrant env_file: @@ -190,7 +207,7 @@ services: dockerfile: Dockerfile.mcp-indexer container_name: learning-worker-dev-remote user: "1000:1000" - command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/learning_reranker_worker.py --daemon"] + command: [ "sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/learning_reranker_worker.py --daemon" ] depends_on: - qdrant - mcp_indexer @@ -313,7 +330,7 @@ services: # For Docker Compose dev-remote simulation, create symlink so /work/scripts/ works # Use /tmp/huggingface for cache to avoid permission issues (universally writable) # Set CORRECT environment variables for HuggingFace and FastEmbed - command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/mcp_indexer_server.py"] + command: [ "sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/mcp_indexer_server.py" ] depends_on: - qdrant env_file: @@ -395,14 +412,20 @@ services: container_name: llama-decoder-dev-remote environment: - LLAMA_ARG_MODEL=/models/model.gguf - - LLAMA_ARG_CTX_SIZE=8192 + - LLAMA_ARG_CTX_SIZE=4096 - LLAMA_ARG_HOST=0.0.0.0 - LLAMA_ARG_PORT=8080 ports: - "8080:8080" volumes: - ./models:/models:ro - command: ["--model", "/models/model.gguf", "--host", "0.0.0.0", "--port", "8080", "--no-warmup"] + command: [ "--model", "/models/model.gguf", "--host", "0.0.0.0", "--port", "8080", "--no-warmup", "--n-gpu-layers", "0" ] + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 2G networks: - dev-remote-network @@ -462,8 +485,8 @@ services: volumes: - workspace_pvc:/work:rw - codebase_pvc:/work/.codebase:rw - entrypoint: ["sh", "-c", "mkdir -p /tmp/logs /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && /app/scripts/wait-for-qdrant.sh && cd /app && python /app/scripts/ingest_code.py --root /work"] - restart: "no" # Run once on startup, do not restart after completion + entrypoint: [ "sh", "-c", "mkdir -p /tmp/logs /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && /app/scripts/wait-for-qdrant.sh && cd /app && python /app/scripts/ingest_code.py --root /work" ] + restart: "no" # Run once on startup, do not restart after completion cpus: 2.0 networks: - dev-remote-network @@ -527,7 +550,7 @@ services: volumes: - workspace_pvc:/work:rw - codebase_pvc:/work/.codebase:rw - command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/watch_index.py"] + command: [ "sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/watch_index.py" ] cpus: 2 networks: - dev-remote-network @@ -563,12 +586,8 @@ services: volumes: - workspace_pvc:/work:rw - codebase_pvc:/work/.codebase:rw - command: [ - "sh", - "-c", - "mkdir -p /tmp/logs /work/.codebase && (chgrp -R 1000 /work/.codebase 2>/dev/null || true) && (chmod -R g+rwX /work/.codebase 2>/dev/null || true) && (find /work/.codebase -type d -exec chmod g+s {} + 2>/dev/null || true) && echo 'Starting initialization sequence...' && /app/scripts/wait-for-qdrant.sh && PYTHONPATH=/app python /app/scripts/create_indexes.py && echo 'Collections and metadata created' && python /app/scripts/warm_all_collections.py && echo 'Search caches warmed for all collections' && python /app/scripts/health_check.py && echo 'Initialization completed successfully!'" - ] - restart: "no" # Run once on startup + command: [ "sh", "-c", "mkdir -p /tmp/logs /work/.codebase && (chgrp -R 1000 /work/.codebase 2>/dev/null || true) && (chmod -R g+rwX /work/.codebase 2>/dev/null || true) && (find /work/.codebase -type d -exec chmod g+s {} + 2>/dev/null || true) && echo 'Starting initialization sequence...' && /app/scripts/wait-for-qdrant.sh && PYTHONPATH=/app python /app/scripts/create_indexes.py && echo 'Collections and metadata created' && python /app/scripts/warm_all_collections.py && echo 'Search caches warmed for all collections' && python /app/scripts/health_check.py && echo 'Initialization completed successfully!'" ] + restart: "no" # Run once on startup networks: - dev-remote-network @@ -602,7 +621,7 @@ services: - CTXCE_AUTH_ALLOW_OPEN_TOKEN_LOGIN=${CTXCE_AUTH_ALLOW_OPEN_TOKEN_LOGIN:-0} - CTXCE_ADMIN_COLLECTION_DELETE_ENABLED=${CTXCE_ADMIN_COLLECTION_DELETE_ENABLED:-0} - CTXCE_COLLECTION_REGISTRY_UNDELETE_ON_DISCOVERY=${CTXCE_COLLECTION_REGISTRY_UNDELETE_ON_DISCOVERY:-0} - + # Indexing configuration - COLLECTION_NAME=${COLLECTION_NAME} - HF_HOME=/work/.cache/huggingface @@ -638,19 +657,15 @@ services: - LEX_SPARSE_MODE=${LEX_SPARSE_MODE:-} - LEX_SPARSE_NAME=${LEX_SPARSE_NAME:-} ports: - - "8004:8002" # Map to different host port to avoid conflicts - - "18004:18000" # Health check port + - "8004:8002" # Map to different host port to avoid conflicts + - "18004:18000" # Health check port volumes: - workspace_pvc:/work:rw - codebase_pvc:/work/.codebase:rw - upload_temp:/tmp/uploads - command: [ - "sh", - "-c", - "mkdir -p /work/.codebase && (chgrp -R 1000 /work/.codebase 2>/dev/null || true) && (chmod -R g+rwX /work/.codebase 2>/dev/null || true) && (find /work/.codebase -type d -exec chmod g+s {} + 2>/dev/null || true) && exec python scripts/upload_service.py" - ] + command: [ "sh", "-c", "mkdir -p /work/.codebase && (chgrp -R 1000 /work/.codebase 2>/dev/null || true) && (chmod -R g+rwX /work/.codebase 2>/dev/null || true) && (find /work/.codebase -type d -exec chmod g+s {} + 2>/dev/null || true) && exec python scripts/upload_service.py" ] healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8002/health"] + test: [ "CMD", "curl", "-f", "http://localhost:8002/health" ] interval: 30s timeout: 10s retries: 3 @@ -659,7 +674,6 @@ services: networks: - dev-remote-network - # PVCs to simulate CephFS RWX behavior (production-like) volumes: # Main workspace volume - simulates CephFS RWX for repository storage @@ -669,7 +683,7 @@ volumes: type: none o: bind device: ${HOST_INDEX_PATH:-./dev-workspace} - + # Codebase metadata volume - simulates CephFS RWX for indexing metadata codebase_pvc: driver: local @@ -677,19 +691,19 @@ volumes: type: none o: bind device: ./.codebase - + # Temporary upload storage upload_temp: driver: local - + # HuggingFace cache for model downloads huggingface_cache: driver: local - + # Indexer cache for model downloads indexer_cache: driver: local - + # Qdrant storage - separate from base compose to avoid conflicts qdrant_storage_dev_remote: driver: local @@ -708,4 +722,4 @@ networks: driver: bridge ipam: config: - - subnet: 172.20.0.0/16 \ No newline at end of file + - subnet: 172.20.0.0/16 From 2da5cfa2f87a9a4f49443b1c870a2f3fd872ecd7 Mon Sep 17 00:00:00 2001 From: Chris Stinemetz Date: Sat, 24 Jan 2026 20:59:51 -0500 Subject: [PATCH 2/2] feat: make Llama.cpp context size configurable via environment - Add LLAMA_ARG_CTX_SIZE environment variable support in llamacpp service - Increase default context size from 4096 to 8192 tokens for better performance - Allow overriding via .env file for different deployment scenarios - Maintains backward compatibility with existing setups --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index a3600749..28fe2d09 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -412,7 +412,7 @@ services: container_name: llama-decoder-dev-remote environment: - LLAMA_ARG_MODEL=/models/model.gguf - - LLAMA_ARG_CTX_SIZE=4096 + - LLAMA_ARG_CTX_SIZE=${LLAMA_ARG_CTX_SIZE:-8192} - LLAMA_ARG_HOST=0.0.0.0 - LLAMA_ARG_PORT=8080 ports: