diff --git a/.env b/.env
index 3d34067c..b26ac1a1 100644
--- a/.env
+++ b/.env
@@ -3,6 +3,11 @@
 QDRANT_URL=http://qdrant:6333
 # QDRANT_API_KEY= # not needed for local
 
+# Repository mode: 0=single-repo (default), 1=multi-repo
+# Single-repo: All files go into one collection (COLLECTION_NAME)
+# Multi-repo: Each subdirectory gets its own collection
+MULTI_REPO_MODE=0
+
 # Single unified collection for seamless cross-repo search
 # Default: "codebase" - all your code in one collection for unified search
 # This enables searching across multiple repos/workspaces without fragmentation
@@ -144,7 +149,7 @@ MEMORY_COLLECTION_TTL_SECS=300
 # INDEX_UPSERT_BATCH=128
 # INDEX_UPSERT_RETRIES=5
 # INDEX_UPSERT_BACKOFF=0.5
- WATCH_DEBOUNCE_SECS=4
+WATCH_DEBOUNCE_SECS=4
 
 
 # Duplicate Streamable HTTP MCP instances (run alongside SSE)
@@ -161,3 +166,6 @@ HYBRID_RESULTS_CACHE_ENABLED=1
 INDEX_CHUNK_LINES=60
 INDEX_CHUNK_OVERLAP=10
 USE_GPU_DECODER=0
+
+# Development Remote Upload Configuration
+HOST_INDEX_PATH=./dev-workspace
diff --git a/.env.example b/.env.example
index 87c7e330..5a80abea 100644
--- a/.env.example
+++ b/.env.example
@@ -1,10 +1,21 @@
 # Qdrant connection
 QDRANT_URL=http://localhost:6333
 QDRANT_API_KEY=
+
+# Multi-repo mode: 0=single-repo (default), 1=multi-repo
+# Single-repo: All files go into one collection (COLLECTION_NAME)
+# Multi-repo: Each subdirectory gets its own collection
+MULTI_REPO_MODE=0
+
 # Single unified collection for seamless cross-repo search (default: "codebase")
 # Leave unset or use "codebase" for unified search across all your code
 COLLECTION_NAME=codebase
 
+# Repository mode: 0=single-repo (default), 1=multi-repo
+# Single-repo: All files go into one collection (COLLECTION_NAME)
+# Multi-repo: Each subdirectory gets its own collection
+MULTI_REPO_MODE=0
+
 # Embeddings
 EMBEDDING_MODEL=BAAI/bge-base-en-v1.5
 EMBEDDING_PROVIDER=fastembed
diff --git a/Dockerfile.mcp b/Dockerfile.mcp
index ef40683b..22524111 100644
--- a/Dockerfile.mcp
+++ b/Dockerfile.mcp
@@ -3,11 +3,16 @@ FROM python:3.11-slim
 
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1 \
-    WORK_ROOTS="/work,/app"
+    WORK_ROOTS="/work,/app" \
+    HF_HOME=/tmp/cache \
+    TRANSFORMERS_CACHE=/tmp/cache
 
 # Install latest FastMCP with Streamable HTTP (RMCP) support + deps
 RUN pip install --no-cache-dir --upgrade mcp fastmcp qdrant-client fastembed
 
+# Create cache directory with proper permissions
+RUN mkdir -p /tmp/cache && chmod 755 /tmp/cache
+
 # Bake scripts into image so server can run even when /work points elsewhere
 COPY scripts /app/scripts
 
diff --git a/Dockerfile.upload-service b/Dockerfile.upload-service
new file mode 100644
index 00000000..ef6d4538
--- /dev/null
+++ b/Dockerfile.upload-service
@@ -0,0 +1,56 @@
+# Dockerfile for Context-Engine Delta Upload Service
+FROM python:3.11-slim
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PYTHONPATH=/app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create app directory
+WORKDIR /app
+
+# Copy requirements first for better caching
+COPY requirements.txt .
+
+# Install Python dependencies
+RUN pip install --upgrade pip && \
+    pip install -r requirements.txt
+
+# Copy application code
+COPY scripts/ ./scripts/
+COPY . .
+
+# Create work directory for repositories
+RUN mkdir -p /work && \
+    chmod 755 /work
+
+# Create non-root user for security
+RUN useradd --create-home --shell /bin/bash app && \
+    chown -R app:app /app /work
+USER app
+
+# Expose port
+EXPOSE 8002
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8002/health || exit 1
+
+# Default environment variables
+ENV UPLOAD_SERVICE_HOST=0.0.0.0 \
+    UPLOAD_SERVICE_PORT=8002 \
+    QDRANT_URL=http://qdrant:6333 \
+    WORK_DIR=/work \
+    MAX_BUNDLE_SIZE_MB=100 \
+    UPLOAD_TIMEOUT_SECS=300
+
+# Run the upload service
+CMD ["python", "scripts/upload_service.py"]
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 95ea9e6d..8d598bf9 100644
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,8 @@ SHELL := /bin/bash
 # An empty export forces docker to use its default context/socket.
 export DOCKER_HOST =
 
-.PHONY: help up down logs ps restart rebuild index reindex watch env hybrid bootstrap history rerank-local setup-reranker prune warm health
-.PHONY: venv venv-install
+.PHONY: help up down logs ps restart rebuild index reindex watch watch-remote env hybrid bootstrap history rerank-local setup-reranker prune warm health test-e2e
+.PHONY: venv venv-install dev-remote-up dev-remote-down dev-remote-logs dev-remote-restart dev-remote-bootstrap dev-remote-test dev-remote-client dev-remote-clean
 
 .PHONY: qdrant-status qdrant-list qdrant-prune qdrant-index-root
 
@@ -77,6 +77,23 @@ index-here: ## index the current directory: make index-here [RECREATE=1] [REPO_N
 watch: ## watch mode: reindex changed files on save (Ctrl+C to stop)
 	docker compose run --rm --entrypoint python indexer /work/scripts/watch_index.py
 
+watch-remote: ## remote watch mode: upload delta bundles to remote server (Ctrl+C to stop)
+	@echo "Starting remote watch mode..."
+	@if [ -z "$(REMOTE_UPLOAD_ENDPOINT)" ]; then \
+		echo "Error: REMOTE_UPLOAD_ENDPOINT is required"; \
+		echo "Usage: make watch-remote REMOTE_UPLOAD_ENDPOINT=http://your-server:8080 [REMOTE_UPLOAD_MAX_RETRIES=3] [REMOTE_UPLOAD_TIMEOUT=30]"; \
+		exit 1; \
+	fi
+	@echo "Remote upload endpoint: $(REMOTE_UPLOAD_ENDPOINT)"
+	@echo "Max retries: $${REMOTE_UPLOAD_MAX_RETRIES:-3}"
+	@echo "Timeout: $${REMOTE_UPLOAD_TIMEOUT:-30} seconds"
+	docker compose run --rm --entrypoint python \
+		-e REMOTE_UPLOAD_ENABLED=1 \
+		-e REMOTE_UPLOAD_ENDPOINT=$(REMOTE_UPLOAD_ENDPOINT) \
+		-e REMOTE_UPLOAD_MAX_RETRIES=$${REMOTE_UPLOAD_MAX_RETRIES:-3} \
+		-e REMOTE_UPLOAD_TIMEOUT=$${REMOTE_UPLOAD_TIMEOUT:-30} \
+		indexer /work/scripts/watch_index.py
+
 rerank: ## multi-query re-ranker helper example
 	docker compose run --rm --entrypoint python indexer /work/scripts/rerank_query.py \
 	  --query "chunk code by lines with overlap for indexing" \
@@ -216,12 +233,54 @@ llamacpp-build-image: ## build custom llama.cpp image with baked model (override
 # Download a tokenizer.json for micro-chunking (default: BAAI/bge-base-en-v1.5)
 TOKENIZER_URL ?= https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json
 TOKENIZER_PATH ?= models/tokenizer.json
-
 tokenizer: ## download tokenizer.json to models/tokenizer.json (override with TOKENIZER_URL/TOKENIZER_PATH)
 	@mkdir -p $(dir $(TOKENIZER_PATH))
 	@echo "Downloading: $(TOKENIZER_URL) -> $(TOKENIZER_PATH)" && \
 	curl -L --fail --retry 3 -C - "$(TOKENIZER_URL)" -o "$(TOKENIZER_PATH)"
 
+# --- Development Remote Upload System Targets ---
+
+dev-remote-up: ## start dev-remote stack with upload service
+	@echo "Starting development remote upload system..."
+	@mkdir -p dev-workspace/.codebase
+	docker compose -f docker-compose.dev-remote.yml up -d --build
+
+dev-remote-down: ## stop dev-remote stack
+	@echo "Stopping development remote upload system..."
+	docker compose -f docker-compose.dev-remote.yml down
+
+dev-remote-logs: ## follow logs for dev-remote stack
+	docker compose -f docker-compose.dev-remote.yml logs -f --tail=100
+
+dev-remote-restart: ## restart dev-remote stack (rebuild)
+	docker compose -f docker-compose.dev-remote.yml down && docker compose -f docker-compose.dev-remote.yml up -d --build
+
+dev-remote-bootstrap: env dev-remote-up ## bootstrap dev-remote: up -> wait -> init -> index -> warm
+	@echo "Bootstrapping development remote upload system..."
+	./scripts/wait-for-qdrant.sh
+	docker compose -f docker-compose.dev-remote.yml run --rm init_payload || true
+	$(MAKE) tokenizer
+	docker compose -f docker-compose.dev-remote.yml run --rm indexer --root /work --recreate
+	$(MAKE) warm || true
+	$(MAKE) health
+
+dev-remote-test: ## test remote upload workflow
+	@echo "Testing remote upload workflow..."
+	@echo "Upload service should be accessible at http://localhost:8004"
+	@echo "Health check: curl http://localhost:8004/health"
+	@echo "Status check: curl 'http://localhost:8004/api/v1/delta/status?workspace_path=/work/test-repo'"
+	@echo "Test upload: curl -X POST -F 'bundle=@test-bundle.tar.gz' -F 'workspace_path=/work/test-repo' http://localhost:8004/api/v1/delta/upload"
+
+dev-remote-client: ## start remote upload client for testing
+	@echo "Starting remote upload client..."
+	docker compose -f docker-compose.dev-remote.yml --profile client up -d remote_upload_client
+
+dev-remote-clean: ## clean up dev-remote volumes and containers
+	@echo "Cleaning up development remote upload system..."
+	docker compose -f docker-compose.dev-remote.yml down -v
+	docker volume rm context-engine_shared_workspace context-engine_shared_codebase context-engine_upload_temp context-engine_qdrant_storage_dev_remote 2>/dev/null || true
+	rm -rf dev-workspace
+
 
 # Router helpers
 Q ?= what is hybrid search?
diff --git a/README.md b/README.md
index 5b4a5f80..607bbcc7 100644
--- a/README.md
+++ b/README.md
@@ -730,6 +730,8 @@ Indexer/Search MCP (8001 SSE, 8003 RMCP):
 - search_callers_for — intent wrapper for probable callers/usages
 - search_importers_for — intent wrapper for files importing a module/symbol
 - change_history_for_path(path) — summarize recent changes using stored metadata
+- collection_map - return collection↔repo mappings
+- default_collection - set the collection to use for the session
 
 Notes:
 - Most search tools accept filters like language, under, path_glob, kind, symbol, ext.
@@ -888,11 +890,25 @@ For production-grade backup/migration strategies, see the official Qdrant docume
 
 Operational notes:
 - Collection name comes from `COLLECTION_NAME` (see .env). This stack defaults to a single collection for both code and memories; filtering uses `metadata.kind`.
-- If you switch to a dedicated memory collection, update the MCP Memory server and the Indexer’s memory blending env to point at it.
+- If you switch to a dedicated memory collection, update the MCP Memory server and the Indexer's memory blending env to point at it.
 - Consider pruning expired memories by filtering `expires_at < now`.
 
 - Call `context_search` on :8001 (SSE) or :8003 (RMCP) with `{ "include_memories": true }` to return both memory and code results.
 
+### Collection Naming Strategies
+
+Different hash lengths are used for different workspace types:
+
+**Local Workspaces:** `repo-name-8charhash`
+- Example: `Anesidara-e8d0f5fc`
+- Used by local indexer/watcher
+- Assumes unique repo names within workspace
+
+**Remote Uploads:** `folder-name-16charhash-8charhash`
+- Example: `testupload2-04e680d5939dd035-b8b8d4cc`
+- Collision avoidance for duplicate folder names for different codebases
+- 16-char hash identifies workspace, 8-char hash identifies collection
+
 
 ### Enable memory blending (for context_search)
 
diff --git a/build-images.sh b/build-images.sh
new file mode 100644
index 00000000..2cc3cd3a
--- /dev/null
+++ b/build-images.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# Docker Build Script for Context-Engine
+# Builds all service images with custom registry tagging
+
+set -euo pipefail
+
+# Configuration
+REGISTRY="192.168.96.61:30009/library"
+PROJECT_NAME="context-engine"
+TAG="${TAG:-latest}"
+
+# Service mapping (service_name:dockerfile:final_image_name)
+declare -A SERVICES=(
+    ["memory"]="Dockerfile.mcp:${PROJECT_NAME}-memory"
+    ["indexer"]="Dockerfile.mcp-indexer:${PROJECT_NAME}-indexer"
+    ["indexer-service"]="Dockerfile.indexer:${PROJECT_NAME}-indexer-service"
+    ["llamacpp"]="Dockerfile.llamacpp:${PROJECT_NAME}-llamacpp"
+)
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Logging functions
+log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
+log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
+log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
+
+# Build function
+build_image() {
+    local service=$1
+    local dockerfile=$2
+    local image_name=$3
+    local full_image="${REGISTRY}/${image_name}:${TAG}"
+
+    log_info "Building ${service} service..."
+    log_info "Dockerfile: ${dockerfile}"
+    log_info "Image: ${full_image}"
+
+    if ! docker build \
+        -f "${dockerfile}" \
+        -t "${full_image}" \
+        --build-arg BUILDKIT_INLINE_CACHE=1 \
+        .; then
+        log_error "Failed to build ${service} image"
+        return 1
+    fi
+
+    log_info "Successfully built ${service} image: ${full_image}"
+
+    # Push if registry is accessible
+    if [[ "${PUSH_IMAGES:-false}" == "true" ]]; then
+        log_info "Pushing ${service} image..."
+        if ! docker push "${full_image}"; then
+            log_warn "Failed to push ${service} image (registry may be inaccessible)"
+            return 1
+        fi
+        log_info "Successfully pushed ${service} image"
+    fi
+
+    echo "${full_image}"
+}
+
+# Main build process
+main() {
+    log_info "Starting Context-Engine Docker build process..."
+    log_info "Registry: ${REGISTRY}"
+    log_info "Tag: ${TAG}"
+    log_info "Push enabled: ${PUSH_IMAGES:-false}"
+    echo
+
+    # Check if Docker is running
+    if ! docker info >/dev/null 2>&1; then
+        log_error "Docker is not running or not accessible"
+        exit 1
+    fi
+
+    # Check if Dockerfiles exist
+    for service in "${!SERVICES[@]}"; do
+        IFS=':' read -r dockerfile image_name <<< "${SERVICES[$service]}"
+        if [[ ! -f "${dockerfile}" ]]; then
+            log_error "Dockerfile not found: ${dockerfile}"
+            exit 1
+        fi
+    done
+
+    local built_images=()
+    local failed_services=()
+
+    # Build each service
+    for service in "${!SERVICES[@]}"; do
+        IFS=':' read -r dockerfile image_name <<< "${SERVICES[$service]}"
+
+        if built_image=$(build_image "$service" "$dockerfile" "$image_name"); then
+            built_images+=("$built_image")
+        else
+            failed_services+=("$service")
+        fi
+        echo
+    done
+
+    # Summary
+    log_info "Build Summary:"
+    log_info "Successfully built: ${#built_images[@]} images"
+    for img in "${built_images[@]}"; do
+        log_info "  ✓ ${img}"
+    done
+
+    if [[ ${#failed_services[@]} -gt 0 ]]; then
+        log_error "Failed to build: ${#failed_services[@]} services"
+        for service in "${failed_services[@]}"; do
+            log_error "  ✗ ${service}"
+        done
+        exit 1
+    fi
+
+    log_info "All images built successfully!"
+
+    # Generate updated kustomization.yaml
+    cat > "deploy/kubernetes/kustomization-images.yaml" << 'EOF'
+# Image overrides for Context-Engine Kubernetes deployment
+# Use this with: kustomize build . --load-restrictor=LoadRestrictionsNone | kubectl apply -f -
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - namespace.yaml
+  - configmap.yaml
+  - qdrant.yaml
+  - mcp-memory.yaml
+  - mcp-indexer.yaml
+  - mcp-http.yaml
+  - indexer-services.yaml
+  - llamacpp.yaml
+  - ingress.yaml
+
+images:
+EOF
+
+    # Add images to kustomization
+    for service in "${!SERVICES[@]}"; do
+        IFS=':' read -r dockerfile image_name <<< "${SERVICES[$service]}"
+        full_image="${REGISTRY}/${image_name}:${TAG}"
+        cat >> "deploy/kubernetes/kustomization-images.yaml" << EOF
+  - name: ${image_name}
+    newName: ${full_image%:*}  # Remove tag
+    newTag: ${TAG}
+EOF
+    done
+
+    cat >> "deploy/kubernetes/kustomization-images.yaml" << 'EOF'
+
+# Common labels
+commonLabels:
+  app.kubernetes.io/name: context-engine
+  app.kubernetes.io/component: kubernetes-deployment
+  app.kubernetes.io/managed-by: kustomize
+
+# Namespace override
+namespace: context-engine
+EOF
+
+    log_info "Generated deploy/kubernetes/kustomization-images.yaml"
+    log_info "To deploy: kustomize build deploy/kubernetes/ | kubectl apply -f -"
+}
+
+# Help function
+show_help() {
+    cat << EOF
+Context-Engine Docker Build Script
+
+Usage: $0 [OPTIONS]
+
+Options:
+  -t, --tag TAG          Set image tag (default: latest)
+  -p, --push             Push images to registry after build
+  -h, --help             Show this help message
+
+Examples:
+  $0                                    # Build with default tag
+  $0 -t v1.0.0                         # Build with custom tag
+  $0 --push                            # Build and push to registry
+  TAG=dev-branch $0                    # Build using environment variable
+
+Environment Variables:
+  TAG                Image tag to use
+  PUSH_IMAGES        Set to 'true' to push after build
+
+Registry Configuration:
+  Current registry: ${REGISTRY}
+  To change: modify REGISTRY variable in script
+
+Generated Files:
+  - deploy/kubernetes/kustomization-images.yaml
+    Contains image references for Kubernetes deployment
+
+EOF
+}
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -t|--tag)
+            TAG="$2"
+            shift 2
+            ;;
+        -p|--push)
+            export PUSH_IMAGES=true
+            shift
+            ;;
+        -h|--help)
+            show_help
+            exit 0
+            ;;
+        *)
+            log_error "Unknown option: $1"
+            show_help
+            exit 1
+            ;;
+    esac
+done
+
+# Run main function
+main "$@"
\ No newline at end of file
diff --git a/deploy/kubernetes/Makefile b/deploy/kubernetes/Makefile
index 8307bbbe..76d9df7d 100644
--- a/deploy/kubernetes/Makefile
+++ b/deploy/kubernetes/Makefile
@@ -53,7 +53,6 @@ kustomize-apply: check-kubectl ## Apply manifests with Kustomize
 .PHONY: kustomize-delete
 kustomize-delete: check-kubectl ## Delete manifests with Kustomize
 	kustomize build . | kubectl delete -f -
-
 # Management targets
 .PHONY: status
 status: check-kubectl ## Show deployment status
@@ -79,9 +78,18 @@ status: check-kubectl ## Show deployment status
 	kubectl get jobs -n $(NAMESPACE) || echo "No jobs found"
 
 .PHONY: logs
-logs: check-kubectl ## Show logs for all services
+logs: check-kubectl ## Show logs for core services (tail 100)
 	@echo "=== Qdrant Logs ==="
-	kubectl logs -f statefulset/qdrant -n $(NAMESPACE) --tail=50 || echo "Qdrant logs not available"
+	kubectl logs -f statefulset/qdrant -n $(NAMESPACE) --tail=100 || echo "Qdrant logs not available"
+	@echo ""
+	@echo "=== MCP Memory Logs ==="
+	kubectl logs -f deployment/mcp-memory -n $(NAMESPACE) --tail=100 || echo "MCP Memory logs not available"
+	@echo ""
+	@echo "=== MCP Indexer Logs ==="
+	kubectl logs -f deployment/mcp-indexer -n $(NAMESPACE) --tail=100 || echo "MCP Indexer logs not available"
+	@echo ""
+	@echo "=== Watcher Logs ==="
+	kubectl logs -f deployment/watcher -n $(NAMESPACE) --tail=100 || echo "Watcher logs not available"
 
 .PHONY: logs-service
 logs-service: check-kubectl ## Show logs for specific service (usage: make logs-service SERVICE=mcp-memory)
@@ -136,9 +144,8 @@ port-forward-service: check-kubectl ## Port forward specific service (usage: mak
 stop-port-forward: ## Stop all port forwards
 	pkill -f "kubectl port-forward" || echo "No port forwards found"
 
-# Build and push targets
 .PHONY: build-image
-build-image: ## Build Docker image
+build-image: ## Build Docker image (requires Docker)
 	docker build -t $(IMAGE_REGISTRY)/context-engine:$(IMAGE_TAG) ../../
 
 .PHONY: push-image
@@ -152,9 +159,9 @@ test-connection: check-kubectl ## Test connectivity to all services
 	@echo "Qdrant:"
 	@kubectl run qdrant-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://qdrant.$(NAMESPACE).svc.cluster.local:6333/health || echo "Qdrant test failed"
 	@echo "MCP Memory:"
-	@kubectl run memory-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-memory.$(NAMESPACE).svc.cluster.local:18000/health || echo "MCP Memory test failed"
+	@kubectl run memory-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-memory.$(NAMESPACE).svc.cluster.local:8000/health || echo "MCP Memory test failed"
 	@echo "MCP Indexer:"
-	@kubectl run indexer-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-indexer.$(NAMESPACE).svc.cluster.local:18001/health || echo "MCP Indexer test failed"
+	@kubectl run indexer-test --image=curlimages/curl --rm -i --restart=Never -n $(NAMESPACE) -- curl -f http://mcp-indexer.$(NAMESPACE).svc.cluster.local:8001/health || echo "MCP Indexer test failed"
 
 # Configuration targets
 .PHONY: show-config
@@ -167,7 +174,7 @@ show-config: ## Show current configuration
 	@echo "Quick start commands:"
 	@echo "  make deploy              # Deploy all services"
 	@echo "  make status              # Show deployment status"
-	@echo "  make logs-service SERVICE=mcp-memory  # Show logs"
+	@echo "  make logs                # Show logs"
 	@echo "  make cleanup             # Remove everything"
 
 .PHONY: show-urls
@@ -196,4 +203,3 @@ describe-service: check-kubectl ## Describe a service (usage: make describe-serv
 .PHONY: events
 events: check-kubectl ## Show recent events
 	kubectl get events -n $(NAMESPACE) --sort-by=.metadata.creationTimestamp
-
diff --git a/deploy/kubernetes/cleanup.sh b/deploy/kubernetes/cleanup.sh
index 2ce5d64d..dadfa6ec 100755
--- a/deploy/kubernetes/cleanup.sh
+++ b/deploy/kubernetes/cleanup.sh
@@ -48,60 +48,150 @@ check_kubectl() {
     log_success "Kubernetes connection verified"
 }
 
-# Confirm cleanup
-confirm_cleanup() {
-    if [[ "$FORCE" != "true" ]]; then
-        log_warning "This will delete all Context-Engine resources in namespace: $NAMESPACE"
-        read -p "Are you sure you want to continue? (yes/no): " -r
+# Check if namespace exists
+check_namespace() {
+    if ! kubectl get namespace $NAMESPACE &> /dev/null; then
+        log_warning "Namespace $NAMESPACE does not exist"
+        return 1
+    fi
+    return 0
+}
+
+# Show what will be deleted
+show_deletion_plan() {
+    log_info "The following resources will be deleted:"
+    echo
+
+    # Show current resources
+    echo "Pods:"
+    kubectl get pods -n $NAMESPACE 2>/dev/null || echo "  No pods found"
+    echo
+    echo "Services:"
+    kubectl get services -n $NAMESPACE 2>/dev/null || echo "  No services found"
+    echo
+    echo "Deployments:"
+    kubectl get deployments -n $NAMESPACE 2>/dev/null || echo "  No deployments found"
+    echo
+    echo "StatefulSets:"
+    kubectl get statefulsets -n $NAMESPACE 2>/dev/null || echo "  No statefulsets found"
+    echo
+    echo "Jobs:"
+    kubectl get jobs -n $NAMESPACE 2>/dev/null || echo "  No jobs found"
+    echo
+    echo "PersistentVolumeClaims:"
+    kubectl get pvc -n $NAMESPACE 2>/dev/null || echo "  No PVCs found"
+    echo
+    echo "ConfigMaps:"
+    kubectl get configmaps -n $NAMESPACE 2>/dev/null || echo "  No configmaps found"
+    echo
+    if kubectl get ingress -n $NAMESPACE &> /dev/null; then
+        echo "Ingress:"
+        kubectl get ingress -n $NAMESPACE
         echo
-        if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
-            log_info "Cleanup cancelled"
-            exit 0
-        fi
     fi
+
+    log_warning "This will permanently delete all data in Qdrant and any other persistent storage!"
+}
+
+confirm_cleanup() {
+    if [[ "$FORCE" == "true" ]]; then
+        return 0
+    fi
+    read -p "Are you sure you want to delete all Context-Engine resources? (yes/no): " -r
+    echo
+    if [[ ! $REPLY =~ ^[Yy][Ee][Ss]$ ]]; then
+        log_info "Cleanup cancelled"
+        exit 0
+    fi
+}
+
+# Delete namespace and all resources
+delete_namespace() {
+    log_info "Deleting namespace: $NAMESPACE"
+    kubectl delete namespace $NAMESPACE --ignore-not-found=true
+    log_success "Namespace deleted"
+}
+
+# Wait for namespace deletion
+wait_for_deletion() {
+    log_info "Waiting for namespace deletion to complete..."
+
+    local timeout=60
+    local count=0
+
+    while kubectl get namespace $NAMESPACE &> /dev/null; do
+        if [[ $count -ge $timeout ]]; then
+            log_warning "Namespace deletion is taking longer than expected"
+            log_info "You may need to manually delete remaining resources"
+            return 1
+        fi
+
+        echo -n "."
+        sleep 1
+        ((count++))
+    done
+
+    echo
+    log_success "Namespace deletion completed"
 }
 
-# Delete resources
-cleanup_resources() {
-    log_info "Cleaning up Context-Engine resources..."
+# Force delete if needed
+force_delete() {
+    log_warning "Attempting to force delete remaining resources..."
+
+    # Force delete any remaining pods
+    kubectl delete pods --all -n $NAMESPACE --grace-period=0 --force 2>/dev/null || true
+
+    # Force delete any remaining PVCs
+    kubectl delete pvc --all -n $NAMESPACE --grace-period=0 --force 2>/dev/null || true
 
-    # Delete deployments
-    log_info "Deleting deployments..."
-    kubectl delete deployment --all -n $NAMESPACE --ignore-not-found=true
+    log_success "Force delete completed"
+}
+
+# Verify cleanup
+verify_cleanup() {
+    log_info "Verifying cleanup..."
 
-    # Delete statefulsets
-    log_info "Deleting statefulsets..."
-    kubectl delete statefulset --all -n $NAMESPACE --ignore-not-found=true
+    if kubectl get namespace $NAMESPACE &> /dev/null; then
+        log_error "Namespace $NAMESPACE still exists"
+        return 1
+    fi
 
-    # Delete jobs
-    log_info "Deleting jobs..."
-    kubectl delete job --all -n $NAMESPACE --ignore-not-found=true
+    log_success "Cleanup completed successfully"
+}
 
-    # Delete services
-    log_info "Deleting services..."
-    kubectl delete service --all -n $NAMESPACE --ignore-not-found=true
+# Main cleanup function
+main() {
+    log_info "Starting Context-Engine Kubernetes cleanup"
 
-    # Delete ingress
-    log_info "Deleting ingress..."
-    kubectl delete ingress --all -n $NAMESPACE --ignore-not-found=true
+    # Check prerequisites
+    check_kubectl
 
-    # Delete configmaps
-    log_info "Deleting configmaps..."
-    kubectl delete configmap --all -n $NAMESPACE --ignore-not-found=true
+    # Check if namespace exists
+    if ! check_namespace; then
+        log_success "Nothing to clean up - namespace $NAMESPACE does not exist"
+        exit 0
+    fi
 
-    # Delete secrets
-    log_info "Deleting secrets..."
-    kubectl delete secret --all -n $NAMESPACE --ignore-not-found=true
+    # Show what will be deleted
+    show_deletion_plan
 
-    # Delete PVCs
-    log_info "Deleting persistent volume claims..."
-    kubectl delete pvc --all -n $NAMESPACE --ignore-not-found=true
+    # Ask for confirmation (unless forced)
+    confirm_cleanup
 
     # Delete namespace
-    log_info "Deleting namespace..."
-    kubectl delete namespace $NAMESPACE --ignore-not-found=true
+    delete_namespace
 
-    log_success "Cleanup complete!"
+    # Wait for deletion
+    if ! wait_for_deletion; then
+        log_warning "Standard deletion incomplete, attempting force delete..."
+        force_delete
+    fi
+
+    # Verify cleanup
+    verify_cleanup
+
+    log_success "Context-Engine cleanup completed!"
 }
 
 # Help function
@@ -111,14 +201,17 @@ show_help() {
     echo "Usage: $0 [OPTIONS]"
     echo
     echo "Options:"
-    echo "  -h, --help                Show this help message"
-    echo "  --namespace NAMESPACE     Kubernetes namespace (default: context-engine)"
-    echo "  --force                   Skip confirmation prompt"
+    echo "  -h, --help                    Show this help message"
+    echo "  -n, --namespace NAMESPACE     Kubernetes namespace (default: context-engine)"
+    echo "  -f, --force                   Skip confirmation prompt"
+    echo
+    echo "Environment variables:"
+    echo "  NAMESPACE=context-engine      Kubernetes namespace"
     echo
     echo "Examples:"
-    echo "  $0                        # Interactive cleanup"
-    echo "  $0 --force                # Force cleanup without confirmation"
-    echo "  $0 --namespace my-ns      # Cleanup specific namespace"
+    echo "  $0                            # Interactive cleanup with confirmation"
+    echo "  $0 --force                    # Cleanup without confirmation"
+    echo "  $0 -n my-namespace            # Cleanup different namespace"
 }
 
 # Parse command line arguments
@@ -128,11 +221,11 @@ while [[ $# -gt 0 ]]; do
             show_help
             exit 0
             ;;
-        --namespace)
+        -n|--namespace)
             NAMESPACE="$2"
             shift 2
             ;;
-        --force)
+        -f|--force|--force=true)
             FORCE=true
             shift
             ;;
@@ -144,20 +237,11 @@ while [[ $# -gt 0 ]]; do
     esac
 done
 
-# Main cleanup function
-main() {
-    log_info "Starting Context-Engine Kubernetes cleanup"
-
-    # Check prerequisites
-    check_kubectl
-
-    # Confirm cleanup
-    confirm_cleanup
-
-    # Cleanup resources
-    cleanup_resources
-}
+# Check if we're in the right directory
+if [[ ! -f "qdrant.yaml" ]]; then
+    log_error "Please run this script from the deploy/kubernetes directory"
+    exit 1
+fi
 
 # Run main cleanup
 main
-
diff --git a/deploy/kubernetes/code-models-pvc.yaml b/deploy/kubernetes/code-models-pvc.yaml
new file mode 100644
index 00000000..c69358d5
--- /dev/null
+++ b/deploy/kubernetes/code-models-pvc.yaml
@@ -0,0 +1,18 @@
+---
+# Persistent Volume Claim for model storage (CephFS RWX)
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: code-models-pvc
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: llamacpp
+    type: storage
+spec:
+  accessModes:
+    - ReadWriteMany  # CephFS supports RWX for multiple pods
+  storageClassName: rook-cephfs  # Adjust based on your storage class
+  resources:
+    requests:
+      storage: 20Gi  # Adjust size based on expected model footprint
diff --git a/deploy/kubernetes/configmap.yaml b/deploy/kubernetes/configmap.yaml
index 0c514bc2..1c73f60c 100644
--- a/deploy/kubernetes/configmap.yaml
+++ b/deploy/kubernetes/configmap.yaml
@@ -5,71 +5,112 @@ metadata:
   namespace: context-engine
   labels:
     app: context-engine
+component: configuration
 data:
-  # Core Configuration
   COLLECTION_NAME: "codebase"
   EMBEDDING_MODEL: "BAAI/bge-base-en-v1.5"
   EMBEDDING_PROVIDER: "fastembed"
-  
-  # Qdrant Configuration
-  QDRANT_URL: "http://qdrant:6333"
-  QDRANT_TIMEOUT: "60"
-  
-  # Indexing Configuration
-  INDEX_MICRO_CHUNKS: "1"
-  MAX_MICRO_CHUNKS_PER_FILE: "200"
-  INDEX_CHUNK_LINES: "120"
-  INDEX_CHUNK_OVERLAP: "20"
-  INDEX_BATCH_SIZE: "64"
-  INDEX_UPSERT_BATCH: "128"
-  INDEX_UPSERT_RETRIES: "5"
-  INDEX_UPSERT_BACKOFF: "0.5"
-  
-  # Watcher Configuration
-  WATCH_DEBOUNCE_SECS: "1.5"
-  
-  # ReFRAG Configuration
+
+  FASTMCP_HOST: "0.0.0.0"
+  FASTMCP_PORT: "8000"
+  FASTMCP_INDEXER_PORT: "8001"
+
+  TOOL_STORE_DESCRIPTION: "Store reusable code snippets for later retrieval. The 'information' is a clear NL description; include the actual code in 'metadata.code' and add 'metadata.language' (e.g., python, typescript) and 'metadata.path' when known. Use this whenever you generate or refine a code snippet."
+  TOOL_FIND_DESCRIPTION: "Search for relevant code snippets using multiple phrasings of the query (multi-query). Prefer results where metadata.language matches the target file and metadata.path is relevant. You may pass optional filters (language, path_prefix, kind) which the server applies server-side. Include 'metadata.code', 'metadata.path', and 'metadata.language' in responses."
+
+  RERANKER_ENABLED: "1"
+  RERANKER_TOPN: "100"
+  RERANKER_RETURN_M: "20"
+  RERANKER_TIMEOUT_MS: "3000"
+  RERANK_TIMEOUT_FLOOR_MS: "1000"
+
+  EMBEDDING_WARMUP: "0"
+  RERANK_WARMUP: "0"
+
+  HYBRID_IN_PROCESS: "1"
+  RERANK_IN_PROCESS: "1"
+
+  USE_TREE_SITTER: "1"
+
+  HYBRID_EXPAND: "1"
+  HYBRID_PER_PATH: "1"
+  HYBRID_SYMBOL_BOOST: "0.35"
+  HYBRID_RECENCY_WEIGHT: "0.1"
+  RERANK_EXPAND: "1"
+
+  INDEX_SEMANTIC_CHUNKS: "0"
+
+  MEMORY_SSE_ENABLED: "true"
+  MEMORY_MCP_URL: "http://mcp:8000/sse"
+  MEMORY_MCP_TIMEOUT: "6"
+
+  LLM_PROVIDER: "ollama"
+  OLLAMA_HOST: "http://ollama:11434"
+  LLM_EXPAND_MODEL: "phi3:mini"
+  LLM_EXPAND_MAX: "4"
+  PRF_ENABLED: "1"
+
   REFRAG_MODE: "1"
-  REFRAG_GATE_FIRST: "1"
-  REFRAG_CANDIDATES: "200"
+  MINI_VECTOR_NAME: "mini"
+  MINI_VEC_DIM: "64"
+  MINI_VEC_SEED: "1337"
+  HYBRID_MINI_WEIGHT: "1.0"
+
+  INDEX_MICRO_CHUNKS: "1"
   MICRO_CHUNK_TOKENS: "16"
   MICRO_CHUNK_STRIDE: "8"
+  REFRAG_GATE_FIRST: "1"
+  REFRAG_CANDIDATES: "200"
+
   MICRO_OUT_MAX_SPANS: "3"
   MICRO_MERGE_LINES: "4"
   MICRO_BUDGET_TOKENS: "512"
   MICRO_TOKENS_PER_LINE: "32"
-  
-  # Decoder Configuration (optional)
+
+  CTX_SUMMARY_CHARS: "0"
+
   REFRAG_DECODER: "1"
   REFRAG_RUNTIME: "llamacpp"
+  REFRAG_ENCODER_MODEL: "BAAI/bge-base-en-v1.5"
+  REFRAG_PHI_PATH: "/work/models/refrag_phi_768_to_dmodel.bin"
+  REFRAG_SENSE: "heuristic"
+
   LLAMACPP_URL: "http://llamacpp:8080"
   LLAMACPP_TIMEOUT_SEC: "180"
   DECODER_MAX_TOKENS: "4000"
+  REFRAG_DECODER_MODE: "prompt"
+  REFRAG_SOFT_SCALE: "1.0"
 
-  # Model download configuration (for init container)
-  LLAMACPP_MODEL_URL: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf"
-  LLAMACPP_MODEL_NAME: "qwen2.5-1.5b-instruct-q8_0.gguf"
-  
-  # Reranker Configuration
-  RERANKER_ENABLED: "1"
-  
-  # MCP Configuration
-  FASTMCP_HOST: "0.0.0.0"
-  FASTMCP_PORT: "8000"
-  FASTMCP_INDEXER_PORT: "8001"
+  MAX_MICRO_CHUNKS_PER_FILE: "200"
+  QDRANT_TIMEOUT: "60"
+  MEMORY_AUTODETECT: "1"
+  MEMORY_COLLECTION_TTL_SECS: "300"
+
+  FASTMCP_HTTP_TRANSPORT: "http"
+  FASTMCP_HTTP_PORT: "8002"
+  FASTMCP_HTTP_HEALTH_PORT: "18002"
+  FASTMCP_INDEXER_HTTP_PORT: "8003"
+  FASTMCP_INDEXER_HTTP_HEALTH_PORT: "18003"
+
+  WATCH_DEBOUNCE_SECS: "1.5"
+  INDEX_UPSERT_BATCH: "128"
+  INDEX_UPSERT_RETRIES: "5"
+
+  QDRANT_URL: "http://qdrant:6333"
+
+  QDRANT_API_KEY: ""
+  REPO_NAME: "workspace"
+  FASTMCP_SERVER_NAME: "qdrant-mcp"
+  HOST_INDEX_PATH: "/work"
+
+  INDEX_CHUNK_LINES: "120"
+  INDEX_CHUNK_OVERLAP: "20"
+  INDEX_BATCH_SIZE: "64"
+  INDEX_UPSERT_BACKOFF: "0.5"
   FASTMCP_HEALTH_PORT: "18000"
-  
-  # Memory Configuration
-  MEMORY_SSE_ENABLED: "true"
-  MEMORY_MCP_URL: "http://mcp-memory:8000/sse"
-  MEMORY_MCP_TIMEOUT: "6"
-  
-  # Multi-collection Configuration
   CTX_MULTI_COLLECTION: "1"
   CTX_DOC_PASS: "1"
-  
-  # Logging
   DEBUG_CONTEXT_ANSWER: "0"
-  
-  # Tokenizer
   TOKENIZER_JSON: "/app/models/tokenizer.json"
+  LLAMACPP_MODEL_URL: "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q8_0.gguf"
+  LLAMACPP_MODEL_NAME: "qwen2.5-1.5b-instruct-q8_0.gguf"
diff --git a/deploy/kubernetes/deploy.sh b/deploy/kubernetes/deploy.sh
index 156f0e68..61fdf1c6 100755
--- a/deploy/kubernetes/deploy.sh
+++ b/deploy/kubernetes/deploy.sh
@@ -7,8 +7,9 @@ set -e
 
 # Configuration
 NAMESPACE="context-engine"
-IMAGE_REGISTRY="context-engine"
+IMAGE_REGISTRY="context-engine"  # Change to your registry if needed
 IMAGE_TAG="latest"
+USE_KUSTOMIZE=${USE_KUSTOMIZE:-"false"}
 
 # Colors for output
 RED='\033[0;31m'
@@ -73,7 +74,7 @@ deploy_core() {
 
     # Wait for Qdrant to be ready
     log_info "Waiting for Qdrant to be ready..."
-    kubectl wait --for=condition=ready pod -l component=qdrant -n $NAMESPACE --timeout=300s || log_warning "Qdrant may not be ready yet"
+    kubectl wait --for=condition=ready pod -l component=qdrant -n "$NAMESPACE" --timeout=300s
 
     log_success "Core services deployed"
 }
@@ -88,8 +89,8 @@ deploy_mcp_servers() {
 
     # Wait for MCP servers to be ready
     log_info "Waiting for MCP servers to be ready..."
-    kubectl wait --for=condition=ready pod -l component=mcp-memory -n $NAMESPACE --timeout=300s || log_warning "MCP Memory may not be ready yet"
-    kubectl wait --for=condition=ready pod -l component=mcp-indexer -n $NAMESPACE --timeout=300s || log_warning "MCP Indexer may not be ready yet"
+    kubectl wait --for=condition=ready pod -l component=mcp-memory -n "$NAMESPACE" --timeout=300s
+    kubectl wait --for=condition=ready pod -l component=mcp-indexer -n "$NAMESPACE" --timeout=300s
 
     log_success "MCP servers deployed"
 }
@@ -99,9 +100,9 @@ deploy_http_servers() {
     log_info "Deploying HTTP servers (optional)"
     kubectl apply -f mcp-http.yaml
 
-    # Wait for HTTP servers to be ready
-    kubectl wait --for=condition=ready pod -l component=mcp-memory-http -n $NAMESPACE --timeout=300s || log_warning "MCP Memory HTTP may not be ready yet"
-    kubectl wait --for=condition=ready pod -l component=mcp-indexer-http -n $NAMESPACE --timeout=300s || log_warning "MCP Indexer HTTP may not be ready yet"
+    log_info "Waiting for HTTP servers to be ready..."
+    kubectl wait --for=condition=ready pod -l component=mcp-memory-http -n "$NAMESPACE" --timeout=300s
+    kubectl wait --for=condition=ready pod -l component=mcp-indexer-http -n "$NAMESPACE" --timeout=300s
 
     log_success "HTTP servers deployed"
 }
@@ -132,7 +133,7 @@ deploy_ingress() {
         kubectl apply -f ingress.yaml
         log_success "Ingress deployed"
     else
-        log_warning "Skipping Ingress deployment (set --deploy-ingress to enable)"
+        log_warning "Skipping Ingress deployment (set DEPLOY_INGRESS=true or pass --deploy-ingress to enable)"
     fi
 }
 
@@ -148,6 +149,9 @@ show_status() {
     echo "Services:"
     kubectl get services -n $NAMESPACE
     echo
+    echo "Persistent Volumes:"
+    kubectl get pvc -n $NAMESPACE || echo "No PVCs found"
+    echo
 
     log_success "Deployment complete!"
     echo
@@ -162,7 +166,6 @@ show_status() {
     fi
 }
 
-
 # Patch images to the chosen registry:tag and refresh jobs
 set_images() {
   local full="${IMAGE_REGISTRY}:${IMAGE_TAG}"
@@ -234,7 +237,6 @@ apply_with_kustomize() {
   rm -rf "${tmp_dir}"
 }
 
-
 # Main deployment function
 main() {
     log_info "Starting Context-Engine Kubernetes deployment"
@@ -329,6 +331,4 @@ if [[ ! -f "qdrant.yaml" ]]; then
     exit 1
 fi
 
-# Run main deployment
 main
-
diff --git a/deploy/kubernetes/indexer-services.yaml b/deploy/kubernetes/indexer-services.yaml
index a2695dc3..a8351c35 100644
--- a/deploy/kubernetes/indexer-services.yaml
+++ b/deploy/kubernetes/indexer-services.yaml
@@ -1,5 +1,5 @@
 ---
-# Watcher Deployment (File change monitoring and reindexing)
+# Indexer Service Deployment (file change monitoring and reindexing)
 # This is a template - copy and customize for each repository
 apiVersion: apps/v1
 kind: Deployment
@@ -8,24 +8,23 @@ metadata:
   namespace: context-engine
   labels:
     app: context-engine
-    component: watcher
+    component: indexer-service
 spec:
   replicas: 1
   selector:
     matchLabels:
       app: context-engine
-      component: watcher
+      component: indexer-service
   template:
     metadata:
       labels:
         app: context-engine
-        component: watcher
+        component: indexer-service
     spec:
       serviceAccountName: context-engine
-
       containers:
       - name: watcher
-        image: context-engine:latest
+        image: context-engine-indexer-service
         imagePullPolicy: IfNotPresent
         command: ["python", "/app/scripts/watch_index.py"]
         workingDir: /work
@@ -78,15 +77,20 @@ spec:
             cpu: "250m"
           limits:
             memory: "2Gi"
-            cpu: "1"
+            cpu: "1000m"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
+          readOnly: true
+        - name: metadata-volume
+          mountPath: /work/.codebase
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
+      - name: metadata-volume
+        persistentVolumeClaim:
+          claimName: code-metadata-pvc
 
 ---
 # Indexer Job (One-shot code indexing)
@@ -109,7 +113,7 @@ spec:
       restartPolicy: OnFailure
       containers:
       - name: indexer
-        image: context-engine:latest
+        image: context-engine-indexer-service
         imagePullPolicy: IfNotPresent
         command: ["python", "/app/scripts/ingest_code.py"]
         workingDir: /work
@@ -135,16 +139,20 @@ spec:
             cpu: "500m"
           limits:
             memory: "4Gi"
-            cpu: "2"
+            cpu: "2000m"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
           readOnly: true
+        - name: metadata-volume
+          mountPath: /work/.codebase
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
+      - name: metadata-volume
+        persistentVolumeClaim:
+          claimName: code-metadata-pvc
 
 ---
 # Index Initialization Job
@@ -167,7 +175,7 @@ spec:
       restartPolicy: OnFailure
       containers:
       - name: init-payload
-        image: context-engine:latest
+        image: context-engine-indexer-service
         imagePullPolicy: IfNotPresent
         command: ["python", "/app/scripts/create_indexes.py"]
         workingDir: /work
@@ -190,12 +198,15 @@ spec:
             memory: "1Gi"
             cpu: "500m"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
           readOnly: true
+        - name: metadata-volume
+          mountPath: /work/.codebase
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
-
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
+      - name: metadata-volume
+        persistentVolumeClaim:
+          claimName: code-metadata-pvc
diff --git a/deploy/kubernetes/ingress.yaml b/deploy/kubernetes/ingress.yaml
index a415be73..99c1b5a6 100644
--- a/deploy/kubernetes/ingress.yaml
+++ b/deploy/kubernetes/ingress.yaml
@@ -1,5 +1,5 @@
 ---
-# Ingress for Context-Engine services
+# Ingress for Context-Engine services (optional)
 # Requires an Ingress controller (e.g., nginx-ingress, traefik)
 apiVersion: networking.k8s.io/v1
 kind: Ingress
@@ -8,15 +8,14 @@ metadata:
   namespace: context-engine
   labels:
     app: context-engine
+    component: ingress
   annotations:
-    # Nginx Ingress annotations
     nginx.ingress.kubernetes.io/use-regex: "true"
     nginx.ingress.kubernetes.io/rewrite-target: /$2
-    nginx.ingress.kubernetes.io/ssl-redirect: "true"
-    # Increase timeouts for SSE connections
+    nginx.ingress.kubernetes.io/ssl-redirect: "false"
+    nginx.ingress.kubernetes.io/proxy-body-size: "100m"
     nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
     nginx.ingress.kubernetes.io/proxy-send-timeout: "3600"
-    # Enable CORS if needed
     # nginx.ingress.kubernetes.io/enable-cors: "true"
     # nginx.ingress.kubernetes.io/cors-allow-origin: "*"
 spec:
@@ -25,7 +24,6 @@ spec:
   - host: context-engine.example.com  # Change to your domain
     http:
       paths:
-      # Qdrant
       - path: /qdrant(/|$)(.*)
         pathType: ImplementationSpecific
         backend:
@@ -33,8 +31,6 @@ spec:
             name: qdrant
             port:
               number: 6333
-      
-      # MCP Memory (SSE)
       - path: /mcp/memory(/|$)(.*)
         pathType: ImplementationSpecific
         backend:
@@ -42,8 +38,6 @@ spec:
             name: mcp-memory
             port:
               number: 8000
-      
-      # MCP Indexer (SSE)
       - path: /mcp/indexer(/|$)(.*)
         pathType: ImplementationSpecific
         backend:
@@ -51,8 +45,6 @@ spec:
             name: mcp-indexer
             port:
               number: 8001
-      
-      # MCP Memory HTTP
       - path: /mcp-http/memory(/|$)(.*)
         pathType: ImplementationSpecific
         backend:
@@ -60,8 +52,6 @@ spec:
             name: mcp-memory-http
             port:
               number: 8002
-      
-      # MCP Indexer HTTP
       - path: /mcp-http/indexer(/|$)(.*)
         pathType: ImplementationSpecific
         backend:
@@ -69,8 +59,6 @@ spec:
             name: mcp-indexer-http
             port:
               number: 8003
-      
-      # Llama.cpp (optional)
       - path: /llamacpp(/|$)(.*)
         pathType: ImplementationSpecific
         backend:
@@ -78,10 +66,9 @@ spec:
             name: llamacpp
             port:
               number: 8080
-  
+
   # TLS configuration (optional)
   # tls:
   # - hosts:
   #   - context-engine.example.com
   #   secretName: context-engine-tls
-
diff --git a/deploy/kubernetes/kustomization.yaml b/deploy/kubernetes/kustomization.yaml
index 908af460..10944621 100644
--- a/deploy/kubernetes/kustomization.yaml
+++ b/deploy/kubernetes/kustomization.yaml
@@ -12,6 +12,7 @@ resources:
 
   # Core services
   - qdrant.yaml
+  - code-models-pvc.yaml
 
   # MCP servers
   - mcp-memory.yaml
@@ -22,20 +23,17 @@ resources:
   - indexer-services.yaml
   - rbac.yaml
   - networkpolicy.yaml
-
   - hpa.yaml
 
-
   # Optional services
   - llamacpp.yaml
   - ingress.yaml
 
 # Common labels
-labels:
-  - pairs:
-      app.kubernetes.io/name: context-engine
-      app.kubernetes.io/component: kubernetes-deployment
-      app.kubernetes.io/managed-by: kustomize
+commonLabels:
+  app.kubernetes.io/name: context-engine
+  app.kubernetes.io/component: kubernetes-deployment
+  app.kubernetes.io/managed-by: kustomize
 
 # Patches for production customization
 patchesStrategicMerge: []
@@ -81,4 +79,3 @@ patches:
     target:
       kind: Deployment
       name: mcp-memory
-
diff --git a/deploy/kubernetes/llamacpp.yaml b/deploy/kubernetes/llamacpp.yaml
index aec6fcfe..695e3770 100644
--- a/deploy/kubernetes/llamacpp.yaml
+++ b/deploy/kubernetes/llamacpp.yaml
@@ -1,5 +1,5 @@
 ---
-# Llama.cpp Deployment (Optional - for text generation)
+# Optional Llama.cpp Service (Text Generation)
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -9,7 +9,7 @@ metadata:
     app: context-engine
     component: llamacpp
 spec:
-  replicas: 1
+  replicas: 1  # Set to 0 if not needed
   selector:
     matchLabels:
       app: context-engine
@@ -86,26 +86,33 @@ spec:
         - name: http
           containerPort: 8080
           protocol: TCP
-        command:
-          - /app/llama-server
+        env:
+        - name: LLAMA_ARG_MODEL
+          value: "/models/model.gguf"
+        - name: LLAMA_ARG_CTX_SIZE
+          value: "8192"
+        - name: LLAMA_ARG_HOST
+          value: "0.0.0.0"
+        - name: LLAMA_ARG_PORT
+          value: "8080"
+        command: ["llama-server"]
         args:
-          - --host
-          - "0.0.0.0"
-          - --port
-          - "8080"
-          - --model
-          - /models/qwen2.5-1.5b-instruct-q8_0.gguf
-          - --ctx-size
-          - "4096"
-          - --n-gpu-layers
-          - "0"
+        - "--model"
+        - "/models/model.gguf"
+        - "--host"
+        - "0.0.0.0"
+        - "--port"
+        - "8080"
+        - "--ctx-size"
+        - "8192"
+        - "--no-warmup"
         resources:
           requests:
             memory: "2Gi"
-            cpu: "1"
+            cpu: "1000m"
           limits:
             memory: "8Gi"
-            cpu: "4"
+            cpu: "4000m"
         volumeMounts:
         - name: models
           mountPath: /models
@@ -115,18 +122,19 @@ spec:
             path: /health
             port: http
           initialDelaySeconds: 60
-          periodSeconds: 10
+          periodSeconds: 30
+          timeoutSeconds: 10
         readinessProbe:
           httpGet:
             path: /health
             port: http
           initialDelaySeconds: 30
-          periodSeconds: 5
+          periodSeconds: 10
+          timeoutSeconds: 5
       volumes:
       - name: models
-        hostPath:
-          path: /tmp/context-engine-models
-          type: DirectoryOrCreate
+        persistentVolumeClaim:
+          claimName: code-models-pvc
 
 ---
 # Llama.cpp Service
@@ -139,16 +147,16 @@ metadata:
     app: context-engine
     component: llamacpp
 spec:
-  type: ClusterIP
+  type: NodePort  # Change to LoadBalancer for external access
   ports:
   - name: http
     port: 8080
     targetPort: http
+    nodePort: 30808  # Optional: specify node port
     protocol: TCP
   selector:
     app: context-engine
     component: llamacpp
-
 ---
 # Optional: Llama.cpp External Service
 apiVersion: v1
@@ -170,4 +178,3 @@ spec:
   selector:
     app: context-engine
     component: llamacpp
-
diff --git a/deploy/kubernetes/mcp-http.yaml b/deploy/kubernetes/mcp-http.yaml
index 8ecd7b55..9829b085 100644
--- a/deploy/kubernetes/mcp-http.yaml
+++ b/deploy/kubernetes/mcp-http.yaml
@@ -1,5 +1,5 @@
 ---
-# MCP Memory HTTP Deployment
+# MCP Memory Server (HTTP) Deployment
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -21,31 +21,20 @@ spec:
         component: mcp-memory-http
     spec:
       serviceAccountName: context-engine
-
       containers:
       - name: mcp-memory-http
-        image: context-engine:latest
+        image: context-engine-memory
         imagePullPolicy: IfNotPresent
-        command: ["python", "/app/scripts/mcp_memory_server.py"]
+        command: ["python", "-m", "mcp.server.fastmcp"]
+        args: ["--server-name", "context-engine-http", "--host", "0.0.0.0", "--port", "8000", "--transport", "http", "/app/scripts/memory_server.py"]
         ports:
         - name: http
-          containerPort: 8002
+          containerPort: 8000
           protocol: TCP
         - name: health
-          containerPort: 18002
+          containerPort: 18000
           protocol: TCP
         env:
-        - name: FASTMCP_TRANSPORT
-          value: "streamable-http"
-        - name: FASTMCP_HOST
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: FASTMCP_HOST
-        - name: FASTMCP_PORT
-          value: "8002"
-        - name: FASTMCP_HEALTH_PORT
-          value: "18002"
         - name: QDRANT_URL
           valueFrom:
             configMapKeyRef:
@@ -61,15 +50,44 @@ spec:
             configMapKeyRef:
               name: context-engine-config
               key: EMBEDDING_MODEL
+        - name: EMBEDDING_PROVIDER
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_PROVIDER
+        - name: TOOL_STORE_DESCRIPTION
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: TOOL_STORE_DESCRIPTION
+        - name: TOOL_FIND_DESCRIPTION
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: TOOL_FIND_DESCRIPTION
+        - name: FASTMCP_HOST
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HOST
+        - name: FASTMCP_PORT
+          value: "8000"
+        - name: FASTMCP_TRANSPORT
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HTTP_TRANSPORT
+        - name: FASTMCP_HEALTH_PORT
+          value: "18000"
         resources:
           requests:
-            memory: "1Gi"
-            cpu: "500m"
+            memory: "512Mi"
+            cpu: "250m"
           limits:
-            memory: "4Gi"
-            cpu: "2"
+            memory: "2Gi"
+            cpu: "1000m"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
           readOnly: true
         livenessProbe:
@@ -85,13 +103,12 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
 
 ---
-# MCP Memory HTTP Service
+# MCP Memory Server (HTTP) Service
 apiVersion: v1
 kind: Service
 metadata:
@@ -101,15 +118,17 @@ metadata:
     app: context-engine
     component: mcp-memory-http
 spec:
-  type: ClusterIP
+  type: NodePort  # Change to LoadBalancer for external access
   ports:
   - name: http
     port: 8002
     targetPort: http
+    nodePort: 30804  # Optional: specify node port
     protocol: TCP
   - name: health
     port: 18002
     targetPort: health
+    nodePort: 30805  # Optional: specify node port
     protocol: TCP
   selector:
     app: context-engine
@@ -143,7 +162,7 @@ spec:
     component: mcp-memory-http
 
 ---
-# MCP Indexer HTTP Deployment
+# MCP Indexer Server (HTTP) Deployment
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -167,28 +186,18 @@ spec:
       serviceAccountName: context-engine
       containers:
       - name: mcp-indexer-http
-        image: context-engine:latest
+        image: context-engine-indexer
         imagePullPolicy: IfNotPresent
-        command: ["python", "/app/scripts/mcp_indexer_server.py"]
+        command: ["python", "-m", "mcp.server.fastmcp"]
+        args: ["--server-name", "context-engine-indexer-http", "--host", "0.0.0.0", "--port", "8001", "--transport", "http", "/app/scripts/indexer_server.py"]
         ports:
         - name: http
-          containerPort: 8003
+          containerPort: 8001
           protocol: TCP
         - name: health
-          containerPort: 18003
+          containerPort: 18001
           protocol: TCP
         env:
-        - name: FASTMCP_TRANSPORT
-          value: "streamable-http"
-        - name: FASTMCP_HOST
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: FASTMCP_HOST
-        - name: FASTMCP_INDEXER_PORT
-          value: "8003"
-        - name: FASTMCP_HEALTH_PORT
-          value: "18003"
         - name: QDRANT_URL
           valueFrom:
             configMapKeyRef:
@@ -244,16 +253,32 @@ spec:
             configMapKeyRef:
               name: context-engine-config
               key: CTX_MULTI_COLLECTION
+        - name: FASTMCP_HOST
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HOST
+        - name: FASTMCP_INDEXER_PORT
+          value: "8001"
+        - name: FASTMCP_TRANSPORT
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: FASTMCP_HTTP_TRANSPORT
+        - name: FASTMCP_HEALTH_PORT
+          value: "18001"
         resources:
           requests:
-            memory: "1Gi"
-            cpu: "500m"
+            memory: "512Mi"
+            cpu: "250m"
           limits:
-            memory: "4Gi"
-            cpu: "2"
+            memory: "2Gi"
+            cpu: "1000m"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
+        - name: codebase-volume
+          mountPath: /work/.codebase
         livenessProbe:
           httpGet:
             path: /readyz
@@ -267,13 +292,15 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
+      - name: codebase-volume
+        persistentVolumeClaim:
+          claimName: code-metadata-pvc
 
 ---
-# MCP Indexer HTTP Service
+# MCP Indexer Server (HTTP) Service
 apiVersion: v1
 kind: Service
 metadata:
@@ -283,15 +310,17 @@ metadata:
     app: context-engine
     component: mcp-indexer-http
 spec:
-  type: ClusterIP
+  type: NodePort  # Change to LoadBalancer for external access
   ports:
   - name: http
     port: 8003
     targetPort: http
+    nodePort: 30806  # Optional: specify node port
     protocol: TCP
   - name: health
     port: 18003
     targetPort: health
+    nodePort: 30807  # Optional: specify node port
     protocol: TCP
   selector:
     app: context-engine
@@ -323,4 +352,3 @@ spec:
   selector:
     app: context-engine
     component: mcp-indexer-http
-
diff --git a/deploy/kubernetes/mcp-indexer.yaml b/deploy/kubernetes/mcp-indexer.yaml
index a11bff6b..389316f3 100644
--- a/deploy/kubernetes/mcp-indexer.yaml
+++ b/deploy/kubernetes/mcp-indexer.yaml
@@ -1,5 +1,5 @@
 ---
-# MCP Indexer Deployment
+# MCP Indexer Server (SSE) Deployment
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -21,10 +21,9 @@ spec:
         component: mcp-indexer
     spec:
       serviceAccountName: context-engine
-
       containers:
       - name: mcp-indexer
-        image: context-engine:latest
+        image: context-engine-indexer
         imagePullPolicy: IfNotPresent
         command: ["python", "/app/scripts/mcp_indexer_server.py"]
         ports:
@@ -47,6 +46,8 @@ spec:
               key: FASTMCP_INDEXER_PORT
         - name: FASTMCP_HEALTH_PORT
           value: "18001"
+        - name: FASTMCP_TRANSPORT
+          value: "sse"
         - name: QDRANT_URL
           valueFrom:
             configMapKeyRef:
@@ -62,41 +63,6 @@ spec:
             configMapKeyRef:
               name: context-engine-config
               key: EMBEDDING_MODEL
-        - name: INDEX_MICRO_CHUNKS
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: INDEX_MICRO_CHUNKS
-        - name: MAX_MICRO_CHUNKS_PER_FILE
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: MAX_MICRO_CHUNKS_PER_FILE
-        - name: REFRAG_MODE
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: REFRAG_MODE
-        - name: REFRAG_DECODER
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: REFRAG_DECODER
-        - name: LLAMACPP_URL
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: LLAMACPP_URL
-        - name: MEMORY_SSE_ENABLED
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: MEMORY_SSE_ENABLED
-        - name: MEMORY_MCP_URL
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: MEMORY_MCP_URL
         - name: CTX_MULTI_COLLECTION
           valueFrom:
             configMapKeyRef:
@@ -104,14 +70,16 @@ spec:
               key: CTX_MULTI_COLLECTION
         resources:
           requests:
-            memory: "1Gi"
-            cpu: "500m"
+            memory: "512Mi"
+            cpu: "250m"
           limits:
-            memory: "4Gi"
-            cpu: "2"
+            memory: "2Gi"
+            cpu: "1000m"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
+        - name: codebase-volume
+          mountPath: /work/.codebase
         livenessProbe:
           httpGet:
             path: /readyz
@@ -125,13 +93,15 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
+      - name: codebase-volume
+        persistentVolumeClaim:
+          claimName: code-metadata-pvc
 
 ---
-# MCP Indexer Service
+# MCP Indexer Server (SSE) Service
 apiVersion: v1
 kind: Service
 metadata:
@@ -141,15 +111,17 @@ metadata:
     app: context-engine
     component: mcp-indexer
 spec:
-  type: ClusterIP
+  type: NodePort  # Change to LoadBalancer for external access
   ports:
   - name: sse
     port: 8001
     targetPort: sse
+    nodePort: 30802  # Optional: specify node port
     protocol: TCP
   - name: health
     port: 18001
     targetPort: health
+    nodePort: 30803  # Optional: specify node port
     protocol: TCP
   selector:
     app: context-engine
@@ -181,4 +153,3 @@ spec:
   selector:
     app: context-engine
     component: mcp-indexer
-
diff --git a/deploy/kubernetes/mcp-memory.yaml b/deploy/kubernetes/mcp-memory.yaml
index aca68cb0..891ce15e 100644
--- a/deploy/kubernetes/mcp-memory.yaml
+++ b/deploy/kubernetes/mcp-memory.yaml
@@ -1,5 +1,5 @@
 ---
-# MCP Memory Deployment
+# MCP Memory Server (SSE) Deployment
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -21,10 +21,9 @@ spec:
         component: mcp-memory
     spec:
       serviceAccountName: context-engine
-
       containers:
       - name: mcp-memory
-        image: context-engine:latest
+        image: context-engine-memory
         imagePullPolicy: IfNotPresent
         command: ["python", "/app/scripts/mcp_memory_server.py"]
         ports:
@@ -47,6 +46,8 @@ spec:
               key: FASTMCP_PORT
         - name: FASTMCP_HEALTH_PORT
           value: "18000"
+        - name: FASTMCP_TRANSPORT
+          value: "sse"
         - name: QDRANT_URL
           valueFrom:
             configMapKeyRef:
@@ -62,11 +63,6 @@ spec:
             configMapKeyRef:
               name: context-engine-config
               key: EMBEDDING_MODEL
-        - name: EMBEDDING_PROVIDER
-          valueFrom:
-            configMapKeyRef:
-              name: context-engine-config
-              key: EMBEDDING_PROVIDER
         resources:
           requests:
             memory: "1Gi"
@@ -75,7 +71,7 @@ spec:
             memory: "4Gi"
             cpu: "2"
         volumeMounts:
-        - name: work
+        - name: work-volume
           mountPath: /work
           readOnly: true
         livenessProbe:
@@ -91,13 +87,12 @@ spec:
           initialDelaySeconds: 10
           periodSeconds: 5
       volumes:
-      - name: work
-        hostPath:
-          path: /tmp/context-engine-work
-          type: DirectoryOrCreate
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: code-repos-pvc
 
 ---
-# MCP Memory Service
+# MCP Memory Server (SSE) Service
 apiVersion: v1
 kind: Service
 metadata:
@@ -107,15 +102,17 @@ metadata:
     app: context-engine
     component: mcp-memory
 spec:
-  type: ClusterIP
+  type: NodePort  # Change to LoadBalancer for external access
   ports:
   - name: sse
     port: 8000
     targetPort: sse
+    nodePort: 30800  # Optional: specify node port
     protocol: TCP
   - name: health
     port: 18000
     targetPort: health
+    nodePort: 30801  # Optional: specify node port
     protocol: TCP
   selector:
     app: context-engine
diff --git a/deploy/kubernetes/namespace.yaml b/deploy/kubernetes/namespace.yaml
index b972df16..0f0cecad 100644
--- a/deploy/kubernetes/namespace.yaml
+++ b/deploy/kubernetes/namespace.yaml
@@ -3,5 +3,6 @@ kind: Namespace
 metadata:
   name: context-engine
   labels:
-    name: context-engine
+name: context-engine
     app: context-engine
+    component: infrastructure
diff --git a/deploy/kubernetes/qdrant.yaml b/deploy/kubernetes/qdrant.yaml
index 503af2f8..191041b1 100644
--- a/deploy/kubernetes/qdrant.yaml
+++ b/deploy/kubernetes/qdrant.yaml
@@ -39,7 +39,7 @@ spec:
           value: "6334"
         resources:
           requests:
-            memory: "2Gi"
+memory: "2Gi"
             cpu: "1"
           limits:
             memory: "8Gi"
@@ -49,13 +49,13 @@ spec:
           mountPath: /qdrant/storage
         livenessProbe:
           httpGet:
-            path: /healthz
+path: /healthz
             port: http
           initialDelaySeconds: 30
           periodSeconds: 10
         readinessProbe:
           httpGet:
-            path: /readyz
+path: /readyz
             port: http
           initialDelaySeconds: 5
           periodSeconds: 5
@@ -67,10 +67,10 @@ spec:
         component: qdrant
     spec:
       accessModes: ["ReadWriteOnce"]
-      storageClassName: standard  # Adjust based on your cluster
+# storageClassName: "" # Uncomment and set if you want to specify a storage class
       resources:
         requests:
-          storage: 50Gi
+          storage: 20Gi
 
 ---
 # Qdrant Service
@@ -108,19 +108,18 @@ metadata:
     app: context-engine
     component: qdrant
 spec:
-  type: NodePort
+type: NodePort  # Change to LoadBalancer if your cluster supports it
   ports:
   - name: http
     port: 6333
     targetPort: http
-    nodePort: 30333
+nodePort: 30333  # Optional: specify node port
     protocol: TCP
   - name: grpc
     port: 6334
     targetPort: grpc
-    nodePort: 30334
+nodePort: 30334  # Optional: specify node port
     protocol: TCP
   selector:
     app: context-engine
     component: qdrant
-
diff --git a/deploy/kubernetes/upload-codebase-pvc.yaml b/deploy/kubernetes/upload-codebase-pvc.yaml
new file mode 100644
index 00000000..cd6d07a9
--- /dev/null
+++ b/deploy/kubernetes/upload-codebase-pvc.yaml
@@ -0,0 +1,23 @@
+---
+# Persistent Volume Claim for codebase metadata storage (CephFS RWX)
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: upload-codebase-pvc
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: upload-service
+    type: storage
+spec:
+  accessModes:
+    - ReadWriteMany  # CephFS supports RWX for multiple pods
+  storageClassName: rook-cephfs  # Adjust based on your CephFS storage class
+  resources:
+    requests:
+      storage: 5Gi  # Smaller size for metadata/cache
+  # Optional: selector for specific PV
+  # selector:
+  #   matchLabels:
+  #     app: context-engine
+  #     component: upload-codebase
\ No newline at end of file
diff --git a/deploy/kubernetes/upload-pvc.yaml b/deploy/kubernetes/upload-pvc.yaml
new file mode 100644
index 00000000..8e4487dd
--- /dev/null
+++ b/deploy/kubernetes/upload-pvc.yaml
@@ -0,0 +1,47 @@
+---
+# Persistent Volume Claim for code repositories storage (CephFS RWX)
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: code-repos-pvc
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: upload-service
+    type: storage
+spec:
+  accessModes:
+    - ReadWriteMany  # CephFS supports RWX for multiple pods
+  storageClassName: rook-cephfs  # Adjust based on your CephFS storage class
+  resources:
+    requests:
+      storage: 10Gi  # Adjust size based on your needs
+  # Optional: selector for specific PV
+  # selector:
+  #   matchLabels:
+  #     app: context-engine
+  #     component: code-repos
+
+---
+# Persistent Volume Claim for code metadata storage (CephFS RWX)
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: code-metadata-pvc
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: upload-service
+    type: storage
+spec:
+  accessModes:
+    - ReadWriteMany  # CephFS supports RWX for multiple pods
+  storageClassName: rook-cephfs  # Adjust based on your CephFS storage class
+  resources:
+    requests:
+      storage: 5Gi  # Smaller size for metadata/cache
+  # Optional: selector for specific PV
+  # selector:
+  #   matchLabels:
+  #     app: context-engine
+  #     component: code-metadata
diff --git a/deploy/kubernetes/upload-service.yaml b/deploy/kubernetes/upload-service.yaml
new file mode 100644
index 00000000..189a35b1
--- /dev/null
+++ b/deploy/kubernetes/upload-service.yaml
@@ -0,0 +1,129 @@
+---
+# Delta Upload Service Deployment
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: upload-service
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: upload-service
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: context-engine
+      component: upload-service
+  template:
+    metadata:
+      labels:
+        app: context-engine
+        component: upload-service
+    spec:
+      securityContext:
+        runAsUser: 1000
+        runAsGroup: 1000
+        fsGroup: 1000
+      containers:
+      - name: upload-service
+        image: context-engine-upload-service  # Use service-specific image name
+        imagePullPolicy: IfNotPresent
+        command: ["python", "scripts/upload_service.py"]
+        ports:
+        - name: http
+          containerPort: 8002
+          protocol: TCP
+        env:
+        - name: QDRANT_URL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: QDRANT_URL
+        - name: COLLECTION_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: COLLECTION_NAME
+        - name: UPLOAD_SERVICE_HOST
+          value: "0.0.0.0"
+        - name: UPLOAD_SERVICE_PORT
+          value: "8002"
+        - name: WORK_DIR
+          value: "/work"
+        - name: MAX_BUNDLE_SIZE_MB
+          value: "100"
+        - name: UPLOAD_TIMEOUT_SECS
+          value: "300"
+        - name: EMBEDDING_MODEL
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: EMBEDDING_MODEL
+        - name: USE_TREE_SITTER
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: USE_TREE_SITTER
+        - name: INDEX_SEMANTIC_CHUNKS
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: INDEX_SEMANTIC_CHUNKS
+        - name: INDEX_MICRO_CHUNKS
+          valueFrom:
+            configMapKeyRef:
+              name: context-engine-config
+              key: INDEX_MICRO_CHUNKS
+        resources:
+          requests:
+            memory: "512Mi"
+            cpu: "250m"
+          limits:
+            memory: "2Gi"
+            cpu: "1000m"
+        volumeMounts:
+        - name: work-volume
+          mountPath: /work
+        - name: codebase-volume
+          mountPath: /work/.codebase
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: http
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: http
+          initialDelaySeconds: 10
+          periodSeconds: 5
+      volumes:
+      - name: work-volume
+        persistentVolumeClaim:
+          claimName: upload-work-pvc
+      - name: codebase-volume
+        persistentVolumeClaim:
+          claimName: upload-codebase-pvc
+
+---
+# Delta Upload Service Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: upload-service
+  namespace: context-engine
+  labels:
+    app: context-engine
+    component: upload-service
+spec:
+  type: NodePort  # Change to LoadBalancer for external access
+  ports:
+  - name: http
+    port: 8002
+    targetPort: http
+    nodePort: 30804  # Optional: specify node port
+    protocol: TCP
+  selector:
+    app: context-engine
+    component: upload-service
\ No newline at end of file
diff --git a/docker-compose.dev-remote.yml b/docker-compose.dev-remote.yml
new file mode 100644
index 00000000..27a2a4ca
--- /dev/null
+++ b/docker-compose.dev-remote.yml
@@ -0,0 +1,404 @@
+# Development Docker Compose for Remote Upload System Testing
+# This file simulates Kubernetes environment with shared volumes that simulate the Kubernetes CephFS RWX PVC behavior.
+# Repos stored in /work/ (which is project root - avoiding docker volumes) and metadata are stored in /work/.codebase/repos (project root/.codebase)
+# Updated to use separate PVCs for workspace and codebase to eliminate circular dependencies
+
+version: '3.8'
+
+services:
+  # Qdrant vector database - same as base compose
+  qdrant:
+    image: qdrant/qdrant:latest
+    container_name: qdrant-db-dev-remote
+    ports:
+      - "6333:6333"
+      - "6334:6334"
+    volumes:
+      - qdrant_storage_dev_remote:/qdrant/storage
+    networks:
+      - dev-remote-network
+
+  # MCP search service - same as base compose
+  mcp:
+    build:
+      context: .
+      dockerfile: Dockerfile.mcp
+    container_name: mcp-search-dev-remote
+    user: "1000:1000"
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - FASTMCP_HOST=${FASTMCP_HOST}
+      - FASTMCP_PORT=${FASTMCP_PORT}
+      - QDRANT_URL=${QDRANT_URL}
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - PATH_EMIT_MODE=container
+      - HF_HOME=/work/.cache/huggingface
+      - TRANSFORMERS_CACHE=/work/.cache/huggingface
+      - HUGGINGFACE_HUB_CACHE=/work/.cache/huggingface
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
+      - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION}
+      - TOOL_FIND_DESCRIPTION=${TOOL_FIND_DESCRIPTION}
+      - FASTMCP_HEALTH_PORT=18000
+      - HF_HOME=/home/user/.cache
+      - TRANSFORMERS_CACHE=/home/user/.cache
+    ports:
+      - "18000:18000"
+      - "8000:8000"
+    volumes:
+      - workspace_pvc:/work:ro
+      - huggingface_cache:/home/user/.cache
+    networks:
+      - dev-remote-network
+
+  # MCP indexer service - same as base compose
+  mcp_indexer:
+    build:
+      context: .
+      dockerfile: Dockerfile.mcp-indexer
+    container_name: mcp-indexer-dev-remote
+    user: "1000:1000"
+    # In K8s, scripts would be accessed directly at /app/scripts/ or via proper initContainer
+    # For Docker Compose dev-remote simulation, create symlink so /work/scripts/ works
+    # Use /tmp/huggingface for cache to avoid permission issues (universally writable)
+    # Set CORRECT environment variables for HuggingFace and FastEmbed
+    command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/mcp_indexer_server.py"]
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - FASTMCP_HEALTH_PORT=18001
+      - FASTMCP_HOST=${FASTMCP_HOST}
+      - FASTMCP_INDEXER_PORT=${FASTMCP_INDEXER_PORT}
+      - QDRANT_URL=${QDRANT_URL}
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - PATH_EMIT_MODE=container
+      - HF_HOME=/tmp/huggingface
+      - HF_HUB_CACHE=/tmp/huggingface/hub
+      - TRANSFORMERS_CACHE=/tmp/huggingface/transformers
+      - FASTEMBED_CACHE_PATH=/tmp/huggingface/fastembed
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
+      - QDRANT_TIMEOUT=${QDRANT_TIMEOUT:-60}
+      - INDEX_SEMANTIC_CHUNKS=${INDEX_SEMANTIC_CHUNKS:-0}
+      - INDEX_MICRO_CHUNKS=${INDEX_MICRO_CHUNKS:-0}
+      - INDEX_UPSERT_BATCH=${INDEX_UPSERT_BATCH:-512}
+      - INDEX_UPSERT_RETRIES=${INDEX_UPSERT_RETRIES:-5}
+      - MAX_MICRO_CHUNKS_PER_FILE=${MAX_MICRO_CHUNKS_PER_FILE:-200}
+    ports:
+      - "${FASTMCP_INDEXER_PORT:-8001}:8001"
+      - "18001:18001"
+    volumes:
+      - workspace_pvc:/work:rw
+      - codebase_pvc:/work/.codebase:rw
+    networks:
+      - dev-remote-network
+
+  # MCP HTTP search service - same as base compose
+  mcp_http:
+    build:
+      context: .
+      dockerfile: Dockerfile.mcp
+    container_name: mcp-search-http-dev-remote
+    user: "1000:1000"
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - FASTMCP_HOST=${FASTMCP_HOST}
+      - FASTMCP_PORT=8000
+      - FASTMCP_TRANSPORT=${FASTMCP_HTTP_TRANSPORT}
+      - QDRANT_URL=${QDRANT_URL}
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - PATH_EMIT_MODE=container
+      - HF_HOME=/work/.cache/huggingface
+      - TRANSFORMERS_CACHE=/work/.cache/huggingface
+      - HUGGINGFACE_HUB_CACHE=/work/.cache/huggingface
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
+      - TOOL_STORE_DESCRIPTION=${TOOL_STORE_DESCRIPTION}
+      - TOOL_FIND_DESCRIPTION=${TOOL_FIND_DESCRIPTION}
+      - FASTMCP_HEALTH_PORT=18000
+      - HF_HOME=/home/user/.cache
+      - TRANSFORMERS_CACHE=/home/user/.cache
+    ports:
+      - "${FASTMCP_HTTP_HEALTH_PORT:-18002}:18000"
+      - "${FASTMCP_HTTP_PORT:-8002}:8000"
+    volumes:
+      - workspace_pvc:/work:ro
+      - huggingface_cache:/home/user/.cache
+    networks:
+      - dev-remote-network
+
+  # MCP HTTP indexer service - same as base compose
+  mcp_indexer_http:
+    build:
+      context: .
+      dockerfile: Dockerfile.mcp-indexer
+    container_name: mcp-indexer-http-dev-remote
+    user: "1000:1000"
+    # In K8s, scripts would be accessed directly at /app/scripts/ or via proper initContainer
+    # For Docker Compose dev-remote simulation, create symlink so /work/scripts/ works
+    # Use /tmp/huggingface for cache to avoid permission issues (universally writable)
+    # Set CORRECT environment variables for HuggingFace and FastEmbed
+    command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/mcp_indexer_server.py"]
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - FASTMCP_HOST=${FASTMCP_HOST}
+      - FASTMCP_INDEXER_PORT=8001
+      - FASTMCP_TRANSPORT=${FASTMCP_HTTP_TRANSPORT}
+      - QDRANT_URL=${QDRANT_URL}
+      - FASTMCP_HEALTH_PORT=18001
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - PATH_EMIT_MODE=container
+      - HF_HOME=/tmp/huggingface
+      - HF_HUB_CACHE=/tmp/huggingface/hub
+      - TRANSFORMERS_CACHE=/tmp/huggingface/transformers
+      - FASTEMBED_CACHE_PATH=/tmp/huggingface/fastembed
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
+      - QDRANT_TIMEOUT=${QDRANT_TIMEOUT:-60}
+      - INDEX_SEMANTIC_CHUNKS=${INDEX_SEMANTIC_CHUNKS:-0}
+      - INDEX_MICRO_CHUNKS=${INDEX_MICRO_CHUNKS:-0}
+      - INDEX_UPSERT_BATCH=${INDEX_UPSERT_BATCH:-512}
+      - INDEX_UPSERT_RETRIES=${INDEX_UPSERT_RETRIES:-5}
+      - MAX_MICRO_CHUNKS_PER_FILE=${MAX_MICRO_CHUNKS_PER_FILE:-200}
+    ports:
+      - "${FASTMCP_INDEXER_HTTP_PORT:-8003}:8001"
+      - "${FASTMCP_INDEXER_HTTP_HEALTH_PORT:-18003}:18001"
+    volumes:
+      - workspace_pvc:/work:rw
+      - codebase_pvc:/work/.codebase:rw
+    networks:
+      - dev-remote-network
+
+  # Llama.cpp decoder service - same as base compose
+  llamacpp:
+    image: ghcr.io/ggerganov/llama.cpp:server
+    container_name: llama-decoder-dev-remote
+    environment:
+      - LLAMA_ARG_MODEL=/models/model.gguf
+      - LLAMA_ARG_CTX_SIZE=8192
+      - LLAMA_ARG_HOST=0.0.0.0
+      - LLAMA_ARG_PORT=8080
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./models:/models:ro
+    command: ["--model", "/models/model.gguf", "--host", "0.0.0.0", "--port", "8080", "--no-warmup"]
+    networks:
+      - dev-remote-network
+
+  # Indexer service - modified for PVC volumes
+  indexer:
+    build:
+      context: .
+      dockerfile: Dockerfile.indexer
+    container_name: indexer-dev-remote
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - QDRANT_URL=${QDRANT_URL}
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - HF_HOME=/work/.cache/huggingface
+      - TRANSFORMERS_CACHE=/work/.cache/huggingface
+      - HUGGINGFACE_HUB_CACHE=/work/.cache/huggingface
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - HF_HOME=/home/user/.cache
+      - HOST_INDEX_PATH=/work
+      - TRANSFORMERS_CACHE=/home/user/.cache
+      - QDRANT_TIMEOUT=${QDRANT_TIMEOUT:-60}
+      - INDEX_SEMANTIC_CHUNKS=${INDEX_SEMANTIC_CHUNKS:-0}
+      - INDEX_MICRO_CHUNKS=${INDEX_MICRO_CHUNKS:-0}
+      - INDEX_UPSERT_BATCH=${INDEX_UPSERT_BATCH:-512}
+      - INDEX_UPSERT_RETRIES=${INDEX_UPSERT_RETRIES:-5}
+      - MAX_MICRO_CHUNKS_PER_FILE=${MAX_MICRO_CHUNKS_PER_FILE:-200}
+    volumes:
+      - workspace_pvc:/work:rw
+      - codebase_pvc:/work/.codebase:rw
+      - huggingface_cache:/home/user/.cache
+    entrypoint: ["sh", "-c", "mkdir -p /tmp/logs && /app/scripts/wait-for-qdrant.sh && cd /app && python /app/scripts/ingest_code.py --root /work"]
+    restart: "no"  # Run once on startup, do not restart after completion
+    networks:
+      - dev-remote-network
+
+  # Watcher service - modified for PVC volumes
+  watcher:
+    build:
+      context: .
+      dockerfile: Dockerfile.indexer
+    container_name: watcher-dev-remote
+    user: "1000:1000"
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - QDRANT_URL=${QDRANT_URL}
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - HF_HOME=/tmp/huggingface
+      - HF_HUB_CACHE=/tmp/huggingface/hub
+      - TRANSFORMERS_CACHE=/tmp/huggingface/transformers
+      - FASTEMBED_CACHE_PATH=/tmp/huggingface/fastembed
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
+      - WATCH_ROOT=${WATCH_ROOT:-/work}
+      - HOST_INDEX_PATH=/work
+      - QDRANT_TIMEOUT=${QDRANT_TIMEOUT:-60}
+      - INDEX_SEMANTIC_CHUNKS=${INDEX_SEMANTIC_CHUNKS:-0}
+      - INDEX_MICRO_CHUNKS=${INDEX_MICRO_CHUNKS:-0}
+      - INDEX_UPSERT_BATCH=${INDEX_UPSERT_BATCH:-512}
+      - INDEX_UPSERT_RETRIES=${INDEX_UPSERT_RETRIES:-5}
+      - MAX_MICRO_CHUNKS_PER_FILE=${MAX_MICRO_CHUNKS_PER_FILE:-200}
+      - WATCH_DEBOUNCE_SECS=${WATCH_DEBOUNCE_SECS:-1.5}
+      - REMOTE_UPLOAD_ENABLED=${REMOTE_UPLOAD_ENABLED:-0}
+    volumes:
+      - workspace_pvc:/work:rw
+      - codebase_pvc:/work/.codebase:rw
+    command: ["sh", "-c", "mkdir -p /tmp/huggingface/hub /tmp/huggingface/transformers /tmp/huggingface/fastembed && exec python /app/scripts/watch_index.py"]
+    networks:
+      - dev-remote-network
+
+  # Init payload service - modified for PVC volumes with complete bootstrap
+  init_payload:
+    build:
+      context: .
+      dockerfile: Dockerfile.indexer
+    container_name: init-payload-dev-remote
+    user: "1000:1000"
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      - QDRANT_URL=${QDRANT_URL}
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - HF_HOME=/work/.cache/huggingface
+      - TRANSFORMERS_CACHE=/work/.cache/huggingface
+      - HUGGINGFACE_HUB_CACHE=/work/.cache/huggingface
+      - WORKDIR=/work
+      - TOKENIZER_URL=${TOKENIZER_URL:-https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json}
+      - TOKENIZER_PATH=${TOKENIZER_PATH:-/work/models/tokenizer.json}
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+    volumes:
+      - workspace_pvc:/work:rw
+      - codebase_pvc:/work/.codebase:rw
+    command: [
+      "sh",
+      "-c",
+      "mkdir -p /tmp/logs && echo 'Starting initialization sequence...' && /app/scripts/wait-for-qdrant.sh && PYTHONPATH=/app python /app/scripts/create_indexes.py && echo 'Collections and metadata created' && python /app/scripts/warm_all_collections.py && echo 'Search caches warmed for all collections' && python /app/scripts/health_check.py && echo 'Initialization completed successfully!'"
+    ]
+    restart: "no"  # Run once on startup
+    networks:
+      - dev-remote-network
+
+  # NEW: Upload Service for Remote Upload System
+  upload_service:
+    build:
+      context: .
+      dockerfile: Dockerfile.upload-service
+    container_name: upload-service-dev-remote
+    user: "1000:1000"
+    depends_on:
+      - qdrant
+    env_file:
+      - .env
+    environment:
+      # Upload service configuration
+      - UPLOAD_SERVICE_HOST=0.0.0.0
+      - UPLOAD_SERVICE_PORT=8002
+      - QDRANT_URL=${QDRANT_URL}
+      - WORKDIR=/work
+      - MAX_BUNDLE_SIZE_MB=100
+      - UPLOAD_TIMEOUT_SECS=300
+      
+      # Indexing configuration
+      - COLLECTION_NAME=${COLLECTION_NAME}
+      - HF_HOME=/work/.cache/huggingface
+      - TRANSFORMERS_CACHE=/work/.cache/huggingface
+      - HUGGINGFACE_HUB_CACHE=/work/.cache/huggingface
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER}
+      - USE_TREE_SITTER=${USE_TREE_SITTER}
+      - INDEX_SEMANTIC_CHUNKS=${INDEX_SEMANTIC_CHUNKS}
+      - INDEX_MICRO_CHUNKS=${INDEX_MICRO_CHUNKS}
+      
+      # Remote upload mode configuration
+      - REMOTE_UPLOAD_ENABLED=1
+      - REMOTE_UPLOAD_MODE=development
+      - REMOTE_UPLOAD_DEBUG=1
+      
+      # Qdrant configuration
+      - QDRANT_TIMEOUT=${QDRANT_TIMEOUT}
+      - MAX_MICRO_CHUNKS_PER_FILE=${MAX_MICRO_CHUNKS_PER_FILE}
+      - INDEX_UPSERT_BATCH=${INDEX_UPSERT_BATCH}
+      - INDEX_UPSERT_RETRIES=${INDEX_UPSERT_RETRIES}
+    ports:
+      - "8004:8002"  # Map to different host port to avoid conflicts
+      - "18004:18000"  # Health check port
+    volumes:
+      - workspace_pvc:/work:rw
+      - codebase_pvc:/work/.codebase:rw
+      - upload_temp:/tmp/uploads
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    restart: unless-stopped
+    networks:
+      - dev-remote-network
+
+  
+# PVCs to simulate CephFS RWX behavior (production-like)
+volumes:
+  # Main workspace volume - simulates CephFS RWX for repository storage
+  workspace_pvc:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: ${HOST_INDEX_PATH:-./dev-workspace}
+  
+  # Codebase metadata volume - simulates CephFS RWX for indexing metadata
+  codebase_pvc:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: ./.codebase
+  
+  # Temporary upload storage
+  upload_temp:
+    driver: local
+  
+  # HuggingFace cache for model downloads
+  huggingface_cache:
+    driver: local
+  
+  # Indexer cache for model downloads
+  indexer_cache:
+    driver: local
+  
+  # Qdrant storage - separate from base compose to avoid conflicts
+  qdrant_storage_dev_remote:
+    driver: local
+
+# Custom network for service discovery
+networks:
+  dev-remote-network:
+    driver: bridge
+    ipam:
+      config:
+        - subnet: 172.20.0.0/16
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 0d1e3698..d6c0f21e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,9 @@ tree_sitter>=0.25.2
 tree_sitter_languages; python_version < "3.13"
 mcp==1.17.0
 fastmcp==2.12.4
+fastapi
+uvicorn[standard]
+python-multipart
 openai>=1.0
 
 # Test-only
diff --git a/scripts/create_indexes.py b/scripts/create_indexes.py
index c0f3ff62..970f5374 100644
--- a/scripts/create_indexes.py
+++ b/scripts/create_indexes.py
@@ -4,21 +4,36 @@
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
 from datetime import datetime
+# Import critical functions first
 try:
-    from scripts.workspace_state import update_workspace_state, update_last_activity, get_collection_name
+    from scripts.workspace_state import get_collection_name, is_multi_repo_mode
 except Exception:
-    update_workspace_state = None  # type: ignore
-    update_last_activity = None  # type: ignore
     get_collection_name = None  # type: ignore
+    is_multi_repo_mode = None  # type: ignore
+
+# Import other optional functions
+try:
+    from scripts.workspace_state import log_activity
+except Exception:
+    log_activity = None  # type: ignore
 
 COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 # Discover workspace path for state updates (allows subdir indexing)
 WS_PATH = os.environ.get("INDEX_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work"
 
-# Use workspace state to get collection name (defaults to "codebase")
+# Skip creating root collection in multi-repo mode when indexing entire /work tree
+if is_multi_repo_mode and is_multi_repo_mode() and WS_PATH == "/work":
+    print("Multi-repo mode enabled - skipping root collection creation for /work")
+    exit(0)
+
+# Prefer workspace-derived collection names when env value is a placeholder
 if 'get_collection_name' in globals() and get_collection_name:
     try:
-        COLLECTION = get_collection_name(WS_PATH)
+        resolved = get_collection_name(None)
+        if resolved:
+            placeholders = {"", "default-collection", "my-collection", "codebase"}
+            if COLLECTION in placeholders:
+                COLLECTION = resolved
     except Exception:
         pass
 
@@ -37,19 +52,14 @@
     field_schema=models.PayloadSchemaType.KEYWORD,
 )
 
-# Update workspace state to record collection and activity
+# Log activity using cleaned workspace_state function
 try:
-    if update_workspace_state:
-        update_workspace_state(WS_PATH, {"qdrant_collection": COLLECTION})
-    if update_last_activity:
-        update_last_activity(
-            WS_PATH,
-            {
-                "timestamp": datetime.now().isoformat(),
-                "action": "initialized",
-                "file_path": "",
-                "details": {"created_indexes": ["metadata.language", "metadata.path_prefix"]},
-            },
+    if log_activity:
+        log_activity(
+            repo_name=None,
+            action="initialized",
+            file_path="",
+            details={"created_indexes": ["metadata.language", "metadata.path_prefix"]},
         )
 except Exception:
     pass
diff --git a/scripts/dev-setup.sh b/scripts/dev-setup.sh
new file mode 100755
index 00000000..1a7c5553
--- /dev/null
+++ b/scripts/dev-setup.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+
+# Development Environment Setup Script for Remote Upload System
+# This script sets up the development environment for testing the remote upload workflow
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Configuration
+DEV_WORKSPACE="${DEV_WORKSPACE:-./dev-workspace}"
+
+# Functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if Docker is running
+check_docker() {
+    log_info "Checking Docker installation..."
+    if ! command -v docker &> /dev/null; then
+        log_error "Docker is not installed or not in PATH"
+        exit 1
+    fi
+    
+    if ! docker info &> /dev/null; then
+        log_error "Docker daemon is not running"
+        exit 1
+    fi
+    
+    log_success "Docker is available and running"
+}
+
+# Check if Docker Compose is available
+check_docker_compose() {
+    log_info "Checking Docker Compose installation..."
+    if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
+        log_error "Docker Compose is not installed"
+        exit 1
+    fi
+    
+    log_success "Docker Compose is available"
+}
+
+# Create development workspace directory structure
+setup_workspace() {
+    log_info "Setting up development workspace..."
+    
+    # Create main workspace directory
+    mkdir -p "$DEV_WORKSPACE"
+    mkdir -p "$DEV_WORKSPACE/.codebase"
+    
+    log_success "Development workspace created at $DEV_WORKSPACE"
+    log_info "You can mount your existing repositories here for testing"
+}
+
+# Create environment file
+create_env_file() {
+    log_info "Creating environment configuration..."
+    
+    if [ ! -f ".env" ]; then
+        cp .env.example .env
+        log_success "Created .env from .env.example"
+    else
+        log_warning ".env file already exists, skipping creation"
+    fi
+    
+    # Add dev-remote specific configurations if not already present
+    if ! grep -q "HOST_INDEX_PATH=./dev-workspace" .env; then
+        cat >> .env << 'EOF'
+
+# Development Remote Upload Configuration
+HOST_INDEX_PATH=./dev-workspace
+DEV_REMOTE_MODE=1
+DEV_REMOTE_DEBUG=1
+
+# Upload Service Configuration (Development)
+UPLOAD_SERVICE_HOST=0.0.0.0
+UPLOAD_SERVICE_PORT=8002
+UPLOAD_SERVICE_DEBUG=1
+
+# Remote Upload Client Configuration
+REMOTE_UPLOAD_ENABLED=1
+REMOTE_UPLOAD_ENDPOINT=http://upload_service:8002
+REMOTE_UPLOAD_MAX_RETRIES=3
+REMOTE_UPLOAD_TIMEOUT=30
+REMOTE_UPLOAD_DEBUG=1
+
+# Development-specific settings
+QDRANT_TIMEOUT=60
+MAX_MICRO_CHUNKS_PER_FILE=200
+INDEX_UPSERT_BATCH=128
+INDEX_UPSERT_RETRIES=5
+WATCH_DEBOUNCE_SECS=1.5
+EOF
+        log_success "Added dev-remote configuration to .env"
+    else
+        log_warning "Dev-remote configuration already exists in .env"
+    fi
+}
+
+# Print usage information
+print_usage() {
+    log_info "Development environment setup complete!"
+    echo
+    echo "Quick Start:"
+    echo "  1. Copy your repository to dev-workspace/your-repo-name"
+    echo "  2. Run: make dev-remote-bootstrap"
+    echo "  3. Test with: make dev-remote-test"
+    echo
+    echo "Available commands:"
+    echo "  make dev-remote-up          - Start the dev-remote stack"
+    echo "  make dev-remote-down        - Stop the dev-remote stack"
+    echo "  make dev-remote-bootstrap   - Bootstrap the complete system"
+    echo "  make dev-remote-test        - Test the remote upload workflow"
+    echo "  make dev-remote-client      - Start remote upload client"
+    echo "  make dev-remote-clean       - Clean up all dev-remote resources"
+    echo
+    echo "Service URLs:"
+    echo "  Upload Service:     http://localhost:8004"
+    echo "  Qdrant Dashboard:   http://localhost:6333"
+    echo "  MCP Search:         http://localhost:8000"
+    echo "  MCP Indexer:        http://localhost:8001"
+    echo
+    echo "Testing Workflow:"
+    echo "  1. Place your code in: $DEV_WORKSPACE/your-repo"
+    echo "  2. Start the stack: make dev-remote-bootstrap"
+    echo "  3. Test upload: curl http://localhost:8004/health"
+    echo "  4. Check status: curl 'http://localhost:8004/api/v1/delta/status?workspace_path=/work/your-repo'"
+    echo
+    echo "For remote upload testing:"
+    echo "  1. Set REMOTE_UPLOAD_ENDPOINT=http://localhost:8004"
+    echo "  2. Run: make watch-remote REMOTE_UPLOAD_ENDPOINT=http://localhost:8004"
+    echo
+    log_success "Ready to test the remote upload system!"
+}
+
+# Main execution
+main() {
+    log_info "Setting up development environment for remote upload system..."
+    
+    check_docker
+    check_docker_compose
+    setup_workspace
+    create_env_file
+    print_usage
+    
+    log_success "Development environment setup completed successfully!"
+}
+
+# Run main function
+main "$@"
\ No newline at end of file
diff --git a/scripts/health_check.py b/scripts/health_check.py
index 3a0137e7..67e80e2e 100644
--- a/scripts/health_check.py
+++ b/scripts/health_check.py
@@ -39,83 +39,101 @@ def main():
 
     client = QdrantClient(url=qdrant_url, api_key=api_key or None)
 
-    # 1) Collection exists and has expected named vector/dimension
-    info = client.get_collection(collection)
-    cfg = info.config.params.vectors
-    if isinstance(cfg, dict):
-        present_names = list(cfg.keys())
-        assert_true(len(present_names) >= 1, "Collection has at least one named vector")
-        assert_true(
-            vec_name_expect in present_names,
-            f"Expected vector name present: {vec_name_expect} in {present_names}",
-        )
-        got_dim = cfg[vec_name_expect].size
-    else:
-        present_names = ["<unnamed>"]
-        got_dim = cfg.size
-    assert_true(
-        got_dim == dim, f"Vector dimension matches embedding ({got_dim} == {dim})"
-    )
-
-    # 2) HNSW tuned params (best effort; allow >= thresholds)
-    hcfg = info.config.hnsw_config
+    # Get all collections and check each one
     try:
-        m = getattr(hcfg, "m", None)
-        efc = getattr(hcfg, "ef_construct", None)
-        assert_true(m is None or m >= 16, f"HNSW m>=16 (got {m})")
-        assert_true(efc is None or efc >= 256, f"HNSW ef_construct>=256 (got {efc})")
-    except Exception:
-        print("[WARN] Could not read HNSW config; continuing")
-
-    # 3) Payload indexes created (language, path_prefix, repo, kind, symbol)
-    # Not all clients expose schema listing; we validate by running filtered queries
-    probe_text = "split code into overlapping line chunks"
-    probe_vec = next(model.embed([probe_text])).tolist()
-
-    # Unfiltered query
-    qp = client.query_points(
-        collection_name=collection,
-        query=probe_vec,
-        using=vec_name_expect,
-        limit=3,
-        with_payload=True,
-        search_params=models.SearchParams(hnsw_ef=128),
-    )
-    res_points = getattr(qp, "points", qp)
-    assert_true(isinstance(res_points, list), "query_points returns a list of points")
-
-    # Filtered by language + kind (should not error; may return 0 results if dataset sparse)
-    flt = models.Filter(
-        must=[
-            models.FieldCondition(
-                key="metadata.language", match=models.MatchValue(value="python")
-            ),
-            models.FieldCondition(
-                key="metadata.kind", match=models.MatchValue(value="function")
-            ),
-        ]
-    )
-    qp2 = client.query_points(
-        collection_name=collection,
-        query=probe_vec,
-        using=vec_name_expect,
-        query_filter=flt,
-        limit=3,
-        with_payload=True,
-    )
-    res2 = getattr(qp2, "points", qp2) or []
-    # If results exist, ensure payload has kind/symbol keys
-    if res2:
-        md: Dict[str, Any] = (res2[0].payload or {}).get("metadata") or {}
+        collections_response = client.get_collections()
+        collections = [c.name for c in collections_response.collections]
+        print(f"Found collections: {collections}")
+    except Exception as e:
+        print(f"Error getting collections: {e}")
+        sys.exit(1)
+
+    if not collections:
+        print("No collections found - nothing to health check")
+        return
+
+    # Check each collection
+    for collection_name in collections:
+        print(f"Checking collection: {collection_name}")
+
+        # 1) Collection exists and has expected named vector/dimension
+        info = client.get_collection(collection_name)
+        cfg = info.config.params.vectors
+        if isinstance(cfg, dict):
+            present_names = list(cfg.keys())
+            assert_true(len(present_names) >= 1, "Collection has at least one named vector")
+            assert_true(
+                vec_name_expect in present_names,
+                f"Expected vector name present: {vec_name_expect} in {present_names}",
+            )
+            got_dim = cfg[vec_name_expect].size
+        else:
+            present_names = ["<unnamed>"]
+            got_dim = cfg.size
         assert_true(
-            "kind" in md and "symbol" in md,
-            "payload includes metadata.kind and metadata.symbol",
+            got_dim == dim, f"Vector dimension matches embedding ({got_dim} == {dim})"
         )
-    else:
-        print("[OK] Filtered query ran (no results is acceptable depending on data)")
 
-    print("All checks passed.")
+        # 2) HNSW tuned params (best effort; allow >= thresholds)
+        hcfg = info.config.hnsw_config
+        try:
+            m = getattr(hcfg, "m", None)
+            efc = getattr(hcfg, "ef_construct", None)
+            assert_true(m is None or m >= 16, f"HNSW m>=16 (got {m})")
+            assert_true(efc is None or efc >= 256, f"HNSW ef_construct>=256 (got {efc})")
+        except Exception:
+            print("[WARN] Could not read HNSW config; continuing")
+
+        # 3) Test queries on this collection
+        probe_text = "split code into overlapping line chunks"
+        probe_vec = next(model.embed([probe_text])).tolist()
+
+        # Unfiltered query
+        qp = client.query_points(
+            collection_name=collection_name,
+            query=probe_vec,
+            using=vec_name_expect,
+            limit=3,
+            with_payload=True,
+            search_params=models.SearchParams(hnsw_ef=128),
+        )
+        res_points = getattr(qp, "points", qp)
+        assert_true(isinstance(res_points, list), "query_points returns a list of points")
+
+        # Filtered by language + kind (should not error; may return 0 results if dataset sparse)
+        flt = models.Filter(
+            must=[
+                models.FieldCondition(
+                    key="metadata.language", match=models.MatchValue(value="python")
+                ),
+                models.FieldCondition(
+                    key="metadata.kind", match=models.MatchValue(value="function")
+                ),
+            ]
+        )
+        qp2 = client.query_points(
+            collection_name=collection_name,
+            query=probe_vec,
+            using=vec_name_expect,
+            query_filter=flt,
+            limit=3,
+            with_payload=True,
+        )
+        res2 = getattr(qp2, "points", qp2) or []
+        # If results exist, ensure payload has kind/symbol keys
+        if res2:
+            md: Dict[str, Any] = (res2[0].payload or {}).get("metadata") or {}
+            assert_true(
+                "kind" in md and "symbol" in md,
+                "payload includes metadata.kind and metadata.symbol",
+            )
+        else:
+            print("[OK] Filtered query ran (no results is acceptable depending on data)")
+
+        print(f"[OK] Collection {collection_name} health check passed")
+
+    print(f"[OK] All {len(collections)} collections passed health check")
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file
diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py
index 6dd1b25d..d2fe5af8 100644
--- a/scripts/hybrid_search.py
+++ b/scripts/hybrid_search.py
@@ -41,8 +41,17 @@
 logger = logging.getLogger("hybrid_search")
 
 
-def _collection() -> str:
-    return os.environ.get("COLLECTION_NAME", "codebase")
+def _collection(collection_name: str | None = None) -> str:
+    """Determine collection name with priority: CLI arg > env > default."""
+
+    if collection_name and collection_name.strip():
+        return collection_name.strip()
+
+    env_coll = os.environ.get("COLLECTION_NAME", "").strip()
+    if env_coll:
+        return env_coll
+
+    return "my-collection"
 
 
 MODEL_NAME = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
@@ -120,6 +129,29 @@ def _coerce_points(result: Any) -> List[Any]:
         return [result]
 
 
+def _legacy_vector_search(
+    client: QdrantClient,
+    collection: str,
+    vec_name: str,
+    vector: List[float],
+    per_query: int,
+    flt,
+) -> List[Any]:
+    """Fallback to legacy client.search when query_points is unavailable."""
+
+    try:
+        result = client.search(
+            collection_name=collection,
+            query_vector={"name": vec_name, "vector": vector},
+            limit=per_query,
+            with_payload=True,
+            query_filter=flt,
+        )
+        return _coerce_points(getattr(result, "points", result))
+    except Exception:
+        return []
+
+
 def _embed_queries_cached(
     model: TextEmbedding, queries: List[str]
 ) -> List[List[float]]:
@@ -1159,14 +1191,15 @@ def _sanitize_filter_obj(flt):
         return None
 
 
-def lex_query(client: QdrantClient, v: List[float], flt, per_query: int) -> List[Any]:
+def lex_query(client: QdrantClient, v: List[float], flt, per_query: int, collection_name: str | None = None) -> List[Any]:
     ef = max(EF_SEARCH, 32 + 4 * int(per_query))
     flt = _sanitize_filter_obj(flt)
+    collection = _collection(collection_name)
 
     # Prefer modern API; handle kwarg rename between client versions (query_filter -> filter)
     try:
         qp = client.query_points(
-            collection_name=_collection(),
+            collection_name=collection,
             query=v,
             using=LEX_VECTOR_NAME,
             query_filter=flt,
@@ -1180,7 +1213,7 @@ def lex_query(client: QdrantClient, v: List[float], flt, per_query: int) -> List
         if os.environ.get("DEBUG_HYBRID_SEARCH"):
             logger.debug("QP_FILTER_KWARG_SWITCH", extra={"using": LEX_VECTOR_NAME})
         qp = client.query_points(
-            collection_name=_collection(),
+            collection_name=collection,
             query=v,
             using=LEX_VECTOR_NAME,
             filter=flt,
@@ -1189,6 +1222,8 @@ def lex_query(client: QdrantClient, v: List[float], flt, per_query: int) -> List
             with_payload=True,
         )
         return _coerce_points(getattr(qp, "points", qp))
+    except AttributeError:
+        return _legacy_vector_search(client, collection, LEX_VECTOR_NAME, v, per_query, flt)
     except Exception as e:
         # Retry without a filter at all (handles servers that reject certain filter shapes)
         if os.environ.get("DEBUG_HYBRID_SEARCH"):
@@ -1198,7 +1233,7 @@ def lex_query(client: QdrantClient, v: List[float], flt, per_query: int) -> List
                 pass
         try:
             qp = client.query_points(
-                collection_name=_collection(),
+                collection_name=collection,
                 query=v,
                 using=LEX_VECTOR_NAME,
                 query_filter=None,
@@ -1209,7 +1244,7 @@ def lex_query(client: QdrantClient, v: List[float], flt, per_query: int) -> List
             return _coerce_points(getattr(qp, "points", qp))
         except TypeError:
             qp = client.query_points(
-                collection_name=_collection(),
+                collection_name=collection,
                 query=v,
                 using=LEX_VECTOR_NAME,
                 filter=None,
@@ -1224,18 +1259,19 @@ def lex_query(client: QdrantClient, v: List[float], flt, per_query: int) -> List
                     logger.debug("QP_FILTER_DROP_FAILED", extra={"using": LEX_VECTOR_NAME, "reason": str(e2)[:200]})
                 except Exception:
                     pass
-            return []
+        return _legacy_vector_search(client, collection, LEX_VECTOR_NAME, v, per_query, flt)
 
 
 def dense_query(
-    client: QdrantClient, vec_name: str, v: List[float], flt, per_query: int
+    client: QdrantClient, vec_name: str, v: List[float], flt, per_query: int, collection_name: str | None = None
 ) -> List[Any]:
     ef = max(EF_SEARCH, 32 + 4 * int(per_query))
     flt = _sanitize_filter_obj(flt)
+    collection = _collection(collection_name)
 
     try:
         qp = client.query_points(
-            collection_name=_collection(),
+            collection_name=collection,
             query=v,
             using=vec_name,
             query_filter=flt,
@@ -1248,7 +1284,7 @@ def dense_query(
         if os.environ.get("DEBUG_HYBRID_SEARCH"):
             logger.debug("QP_FILTER_KWARG_SWITCH", extra={"using": vec_name})
         qp = client.query_points(
-            collection_name=_collection(),
+            collection_name=collection,
             query=v,
             using=vec_name,
             filter=flt,
@@ -1278,7 +1314,7 @@ def dense_query(
         except TypeError:
             try:
                 qp = client.query_points(
-                    collection_name=_collection(),
+                    collection_name=collection,
                     query=v,
                     using=vec_name,
                     filter=None,
@@ -1293,7 +1329,7 @@ def dense_query(
                         logger.debug("QP_FILTER_DROP_FAILED", extra={"using": vec_name, "reason": str(e2)[:200]})
                     except Exception:
                         pass
-                return []
+        return _legacy_vector_search(client, collection, vec_name, v, per_query, flt)
 
 
 # In-process API: run hybrid search and return structured items list
@@ -1316,6 +1352,7 @@ def run_hybrid_search(
     not_glob: str | list[str] | None = None,
     expand: bool = True,
     model: TextEmbedding | None = None,
+    collection: str | None = None,
 ) -> List[Dict[str, Any]]:
     client = QdrantClient(url=os.environ.get("QDRANT_URL", QDRANT_URL), api_key=API_KEY)
     model_name = os.environ.get("EMBEDDING_MODEL", MODEL_NAME)
@@ -1622,7 +1659,7 @@ def _bn(p: str) -> str:
     score_map: Dict[str, Dict[str, Any]] = {}
     try:
         lex_vec = lex_hash_vector(qlist)
-        lex_results = lex_query(client, lex_vec, flt, max(24, limit))
+        lex_results = lex_query(client, lex_vec, flt, max(24, limit), collection)
     except Exception:
         lex_results = []
 
@@ -1664,7 +1701,7 @@ def _bn(p: str) -> str:
     try:
         if embedded:
             dim = len(embedded[0])
-            _ensure_collection(client, _collection(), dim, vec_name)
+            _ensure_collection(client, _collection(collection), dim, vec_name)
     except Exception:
         pass
     # Optional gate-first using mini vectors to restrict dense search to candidates
@@ -1721,7 +1758,7 @@ def _bn(p: str) -> str:
             # Get top candidates using MINI vectors (fast prefilter)
             candidate_ids = set()
             for mv in mini_queries:
-                mini_results = dense_query(client, MINI_VECTOR_NAME, mv, flt, cand_n)
+                mini_results = dense_query(client, MINI_VECTOR_NAME, mv, flt, cand_n, collection)
                 for result in mini_results:
                     if hasattr(result, 'id'):
                         candidate_ids.add(result.id)
@@ -1775,7 +1812,7 @@ def _bn(p: str) -> str:
     flt_gated = _sanitize_filter_obj(flt_gated)
 
     result_sets: List[List[Any]] = [
-        dense_query(client, vec_name, v, flt_gated, max(24, limit)) for v in embedded
+        dense_query(client, vec_name, v, flt_gated, max(24, limit), collection) for v in embedded
     ]
     if os.environ.get("DEBUG_HYBRID_SEARCH"):
         total_dense_results = sum(len(rs) for rs in result_sets)
@@ -1792,7 +1829,7 @@ def _bn(p: str) -> str:
             try:
                 mini_queries = [_project_mini(list(v), MINI_VEC_DIM) for v in embedded]
                 mini_sets: List[List[Any]] = [
-                    dense_query(client, MINI_VECTOR_NAME, mv, flt, max(24, limit))
+                    dense_query(client, MINI_VECTOR_NAME, mv, flt, max(24, limit), collection)
                     for mv in mini_queries
                 ]
                 for res in mini_sets:
@@ -1947,7 +1984,7 @@ def _bn(p: str) -> str:
             try:
                 lex_vec2 = lex_hash_vector(prf_qs)
                 lex_results2 = lex_query(
-                    client, lex_vec2, flt, max(12, limit // 2 or 6)
+                    client, lex_vec2, flt, max(12, limit // 2 or 6), collection
                 )
             except Exception:
                 lex_results2 = []
@@ -1976,7 +2013,7 @@ def _bn(p: str) -> str:
             try:
                 embedded2 = _embed_queries_cached(_model, prf_qs)
                 result_sets2: List[List[Any]] = [
-                    dense_query(client, vec_name, v, flt, max(12, limit // 2 or 6))
+                    dense_query(client, vec_name, v, flt, max(12, limit // 2 or 6), collection)
                     for v in embedded2
                 ]
                 for res2 in result_sets2:
@@ -2695,6 +2732,8 @@ def main():
     # Structured filters to mirror MCP tool fields
     ap.add_argument("--ext", type=str, default=None)
     ap.add_argument("--not", dest="not_filter", type=str, default=None)
+    ap.add_argument("--collection", type=str, default=None,
+                     help="Target collection name")
     ap.add_argument(
         "--case",
         type=str,
@@ -2707,6 +2746,9 @@ def main():
 
     args = ap.parse_args()
 
+    # Resolve effective collection early to avoid variable usage errors
+    eff_collection = args.collection or os.environ.get("COLLECTION_NAME", "my-collection")
+
     model = TextEmbedding(model_name=MODEL_NAME)
     vec_name = _sanitize_vector_name(MODEL_NAME)
     client = QdrantClient(url=QDRANT_URL, api_key=API_KEY or None)
@@ -2715,7 +2757,7 @@ def main():
     try:
         first_vec = next(model.embed(["__dim__warmup__"]))
         dim = len(first_vec.tolist())
-        _ensure_collection(client, _collection(), dim, vec_name)
+        _ensure_collection(client, _collection(eff_collection), dim, vec_name)
     except Exception:
         pass
 
@@ -2822,7 +2864,7 @@ def _norm_under(u: str | None) -> str | None:
     # Server-side lexical vector search (hashing) as an additional ranked list
     try:
         lex_vec = lex_hash_vector(queries)
-        lex_results = lex_query(client, lex_vec, flt, args.per_query)
+        lex_results = lex_query(client, lex_vec, flt, args.per_query, eff_collection)
     except Exception:
         lex_results = []
 
@@ -2874,7 +2916,7 @@ def _norm_under(u: str | None) -> str | None:
 
     embedded = _embed_queries_cached(model, queries)
     result_sets: List[List[Any]] = [
-        dense_query(client, vec_name, v, flt, args.per_query) for v in embedded
+        dense_query(client, vec_name, v, flt, args.per_query, eff_collection) for v in embedded
     ]
 
     # RRF fusion (weighted)
diff --git a/scripts/ingest_code.py b/scripts/ingest_code.py
index f3289c49..9434794b 100644
--- a/scripts/ingest_code.py
+++ b/scripts/ingest_code.py
@@ -1,38 +1,15 @@
 from __future__ import annotations
 
 
-# Helper: detect repository name automatically (no REPO_NAME env needed)
+# Import repository detection from workspace_state to avoid duplication
 def _detect_repo_name_from_path(path: Path) -> str:
+    """Wrapper function to use workspace_state repository detection."""
     try:
-        import subprocess, os as _os
-
-        base = path if path.is_dir() else path.parent
-        r = subprocess.run(
-            ["git", "-C", str(base), "rev-parse", "--show-toplevel"],
-            capture_output=True,
-            text=True,
-        )
-        top = r.stdout.strip()
-        if r.returncode == 0 and top:
-            return Path(top).name or "workspace"
-    except Exception:
-        pass
-    # Fallback: walk up to find a .git folder
-    try:
-        cur = path if path.is_dir() else path.parent
-        for p in [cur] + list(cur.parents):
-            try:
-                if (p / ".git").exists():
-                    return p.name or "workspace"
-            except Exception:
-                continue
-    except Exception:
-        pass
-    # Last resort: directory name
-    try:
-        return (path if path.is_dir() else path.parent).name or "workspace"
-    except Exception:
-        return "workspace"
+        from scripts.workspace_state import _extract_repo_name_from_path as _ws_detect
+        return _ws_detect(str(path))
+    except ImportError:
+        # Fallback for when workspace_state is not available
+        return path.name if path.is_dir() else path.parent.name
 
 
 #!/usr/bin/env python3
@@ -56,27 +33,43 @@ def _detect_repo_name_from_path(path: Path) -> str:
 from fastembed import TextEmbedding
 
 
-
 from datetime import datetime
+
+# Import critical multi-repo functions first
 try:
     from scripts.workspace_state import (
-        update_indexing_status,
-        update_last_activity,
-        update_workspace_state,
+        is_multi_repo_mode,
         get_collection_name,
+    )
+except ImportError:
+    is_multi_repo_mode = None  # type: ignore
+    get_collection_name = None  # type: ignore
+
+# Import watcher's repo detection for surgical fix
+try:
+    from scripts.watch_index import _detect_repo_for_file, _get_collection_for_file
+except ImportError:
+    _detect_repo_for_file = None  # type: ignore
+    _get_collection_for_file = None  # type: ignore
+
+# Import other workspace state functions (optional)
+try:
+    from scripts.workspace_state import (
+        log_activity,
         get_cached_file_hash,
         set_cached_file_hash,
         remove_cached_file,
+        update_indexing_status,
+        update_workspace_state,
     )
-except Exception:
+except ImportError:
     # State integration is optional; continue if not available
-    update_indexing_status = None  # type: ignore
-    update_last_activity = None  # type: ignore
-    update_workspace_state = None  # type: ignore
-    get_collection_name = None  # type: ignore
+    log_activity = None  # type: ignore
     get_cached_file_hash = None  # type: ignore
     set_cached_file_hash = None  # type: ignore
     remove_cached_file = None  # type: ignore
+    update_indexing_status = None  # type: ignore
+    update_workspace_state = None  # type: ignore
 
 # Optional Tree-sitter import (graceful fallback)
 try:
@@ -463,7 +456,6 @@ def chunk_semantic(
     n = len(lines)
 
 
-
     # Extract symbols with line ranges
     symbols = _extract_symbols(language, text)
     if not symbols:
@@ -524,7 +516,6 @@ def chunk_by_tokens(
         Tokenizer = None  # type: ignore
 
 
-
     try:
         k = int(os.environ.get("MICRO_CHUNK_TOKENS", str(k_tokens or 16)) or 16)
     except Exception:
@@ -688,23 +679,21 @@ def ensure_collection(client: QdrantClient, name: str, dim: int, vector_name: st
     """
     try:
         info = client.get_collection(name)
-        # Ensure HNSW tuned params even if the collection already existed
-        try:
-            client.update_collection(
-                collection_name=name,
-                hnsw_config=models.HnswConfigDiff(m=16, ef_construct=256),
-            )
-        except Exception:
-            pass
-        # Schema repair: add missing named vectors on existing collections
+        # Prevent I/O storm - only update vectors if they actually don't exist
         try:
             cfg = getattr(info.config.params, "vectors", None)
             if isinstance(cfg, dict):
+                # Check if collection already has required vectors before updating
+                has_lex = LEX_VECTOR_NAME in cfg
+                has_mini = MINI_VECTOR_NAME in cfg
+
+                # Only add to missing if vector doesn't already exist
                 missing = {}
-                if LEX_VECTOR_NAME not in cfg:
+                if not has_lex:
                     missing[LEX_VECTOR_NAME] = models.VectorParams(
                         size=LEX_VECTOR_DIM, distance=models.Distance.COSINE
                     )
+
                 try:
                     refrag_on = os.environ.get("REFRAG_MODE", "").strip().lower() in {
                         "1",
@@ -714,13 +703,17 @@ def ensure_collection(client: QdrantClient, name: str, dim: int, vector_name: st
                     }
                 except Exception:
                     refrag_on = False
-                if refrag_on and MINI_VECTOR_NAME not in cfg:
+
+                if refrag_on and not has_mini:
                     missing[MINI_VECTOR_NAME] = models.VectorParams(
                         size=int(
                             os.environ.get("MINI_VEC_DIM", MINI_VEC_DIM) or MINI_VEC_DIM
                         ),
                         distance=models.Distance.COSINE,
                     )
+
+                # Only update collection if vectors are actually missing
+                # Previous behavior: always called update_collection() causing I/O storms
                 if missing:
                     try:
                         client.update_collection(
@@ -729,10 +722,13 @@ def ensure_collection(client: QdrantClient, name: str, dim: int, vector_name: st
                     except Exception:
                         # Best-effort; if server doesn't support adding vectors, leave to recreate path
                         pass
-        except Exception:
+        except Exception as e:
+            print(f"[COLLECTION_ERROR] Failed to update collection {name}: {e}")
             pass
         return
-    except Exception:
+    except Exception as e:
+        # Collection doesn't exist - proceed to create it
+        print(f"[COLLECTION_INFO] Creating new collection {name}: {type(e).__name__}")
         pass
     vectors_cfg = {
         vector_name: models.VectorParams(size=dim, distance=models.Distance.COSINE),
@@ -1199,7 +1195,6 @@ def _extract_symbols_java(text: str) -> List[_Sym]:
     return syms
 
 
-
 def _extract_symbols_csharp(text: str) -> List[_Sym]:
     lines = text.splitlines()
     syms: List[_Sym] = []
@@ -1263,7 +1258,6 @@ def _extract_symbols_php(text: str) -> List[_Sym]:
     return syms
 
 
-
 def _extract_symbols_shell(text: str) -> List[_Sym]:
     lines = text.splitlines()
     syms: List[_Sym] = []
@@ -1667,8 +1661,8 @@ def index_single_file(
         ws_path = os.environ.get("WATCH_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work"
         try:
             if get_cached_file_hash:
-                prev_local = get_cached_file_hash(ws_path, str(file_path))
-                if prev_local and prev_local == file_hash:
+                prev_local = get_cached_file_hash(str(file_path), repo_tag)
+                if prev_local and file_hash and prev_local == file_hash:
                     print(f"Skipping unchanged file (cache): {file_path}")
                     return False
         except Exception:
@@ -1855,13 +1849,13 @@ def make_point(pid, dense_vec, lex_vec, payload):
         try:
             ws = os.environ.get("WATCH_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work"
             if set_cached_file_hash:
-                set_cached_file_hash(ws, str(file_path), file_hash)
+                file_repo_tag = _detect_repo_name_from_path(file_path)
+                set_cached_file_hash(str(file_path), file_hash, file_repo_tag)
         except Exception:
             pass
         return True
     return False
 
-
 def index_repo(
     root: Path,
     qdrant_url: str,
@@ -1911,35 +1905,61 @@ def index_repo(
         if vector_name is None:
             vector_name = _sanitize_vector_name(model_name)
 
-    # Workspace state: use single unified collection for seamless cross-repo search
+    use_per_repo_collections = False
+
+    # Workspace state: derive collection and persist metadata
     try:
         ws_path = str(root)
-        # Always use the unified collection (default: "codebase")
-        if 'get_collection_name' in globals() and get_collection_name:
-            collection = get_collection_name(ws_path)
-        if update_workspace_state:
-            update_workspace_state(ws_path, {"qdrant_collection": collection})
-        if update_indexing_status:
+        repo_tag = _detect_repo_name_from_path(root) if _detect_repo_name_from_path else None
+
+        is_multi_repo = bool(is_multi_repo_mode and is_multi_repo_mode())
+        use_per_repo_collections = bool(is_multi_repo and _get_collection_for_file)
+
+        if use_per_repo_collections:
+            collection = None  # Determined per file later
+            print("[multi_repo] Using per-repo collections for root")
+        else:
+            if 'get_collection_name' in globals() and get_collection_name:
+                try:
+                    resolved = get_collection_name(ws_path)
+                    placeholders = {"", "default-collection", "my-collection", "codebase"}
+                    if resolved and collection in placeholders:
+                        collection = resolved
+                except Exception:
+                    pass
+
+        if update_workspace_state and not use_per_repo_collections:
+            update_workspace_state(
+                workspace_path=ws_path,
+                updates={"qdrant_collection": collection},
+                repo_name=repo_tag,
+            )
+        if update_indexing_status and repo_tag:
             update_indexing_status(
-                ws_path,
-                {
+                workspace_path=ws_path,
+                status={
                     "state": "indexing",
                     "started_at": datetime.now().isoformat(),
                     "progress": {"files_processed": 0, "total_files": None},
                 },
+                repo_name=repo_tag,
             )
-    except Exception:
-        pass
+    except Exception as e:
+        # Log state update errors instead of silent failure
+        import traceback
+        print(f"[ERROR] Failed to update workspace state during indexing: {e}")
+        print(f"[ERROR] Traceback: {traceback.format_exc()}")
 
 
     print(
         f"Indexing root={root} -> {qdrant_url} collection={collection} model={model_name} recreate={recreate}"
     )
 
-    # Health check: detect cache/collection sync issues before indexing
-    if not recreate and skip_unchanged:
+    # Health check: detect cache/collection sync issues before indexing (single-collection mode only)
+    if not recreate and skip_unchanged and not use_per_repo_collections and collection:
         try:
             from scripts.collection_health import auto_heal_if_needed
+
             print("[health_check] Checking collection health...")
             heal_result = auto_heal_if_needed(str(root), collection, qdrant_url, dry_run=False)
             if heal_result["action_taken"] == "cleared_cache":
@@ -1951,15 +1971,21 @@ def index_repo(
         except Exception as e:
             print(f"[health_check] Warning: health check failed: {e}")
 
-    if recreate:
-        recreate_collection(client, collection, dim, vector_name)
+    # Skip single collection setup in multi-repo mode
+    if not use_per_repo_collections:
+        if recreate:
+            recreate_collection(client, collection, dim, vector_name)
+        else:
+            ensure_collection(client, collection, dim, vector_name)
+        # Ensure useful payload indexes exist (idempotent)
+        ensure_payload_indexes(client, collection)
     else:
-        ensure_collection(client, collection, dim, vector_name)
-
-    # Ensure useful payload indexes exist (idempotent)
-    ensure_payload_indexes(client, collection)
+        print("[multi_repo] Skipping single collection setup - will create per-repo collections during indexing")
     # Repo tag for filtering: auto-detect from git or folder name
     repo_tag = _detect_repo_name_from_path(root)
+    workspace_root = os.environ.get("WATCH_ROOT") or os.environ.get("WORKSPACE_PATH") or "/work"
+    touched_repos: set[str] = set()
+    repo_roots: dict[str, str] = {}
 
     # Batch and scaling config (env/CLI overridable)
     batch_texts: list[str] = []
@@ -2010,6 +2036,18 @@ def make_point(pid, dense_vec, lex_vec, payload):
 
     for file_path in iter_files(root):
         files_seen += 1
+
+        # Determine collection per-file in multi-repo mode (use watcher's exact logic)
+        current_collection = collection
+        if use_per_repo_collections:
+            if _get_collection_for_file:
+                current_collection = _get_collection_for_file(file_path)
+                # Ensure collection exists on first use
+                ensure_collection(client, current_collection, dim, vector_name)
+                ensure_payload_indexes(client, current_collection)
+            else:
+                current_collection = get_collection_name(ws_path) if get_collection_name else "default-collection"
+
         try:
             text = file_path.read_text(encoding="utf-8", errors="ignore")
         except Exception as e:
@@ -2018,20 +2056,38 @@ def make_point(pid, dense_vec, lex_vec, payload):
         language = detect_language(file_path)
         file_hash = hashlib.sha1(text.encode("utf-8", errors="ignore")).hexdigest()
 
+        per_file_repo = (
+            _detect_repo_name_from_path(file_path)
+            if _detect_repo_name_from_path
+            else repo_tag
+        )
+        if per_file_repo:
+            touched_repos.add(per_file_repo)
+            repo_roots.setdefault(
+                per_file_repo,
+                str(Path(workspace_root).resolve() / per_file_repo),
+            )
+
         # Skip unchanged files if enabled (default)
         if skip_unchanged:
             # Prefer local workspace cache to avoid Qdrant lookups
             try:
                 if get_cached_file_hash:
-                    prev_local = get_cached_file_hash(ws_path, str(file_path))
-                    if prev_local and prev_local == file_hash:
+                    prev_local = get_cached_file_hash(str(file_path), per_file_repo)
+                    if prev_local and file_hash and prev_local == file_hash:
                         if PROGRESS_EVERY <= 0 and files_seen % 50 == 0:
                             print(f"... processed {files_seen} files (skipping unchanged, cache)")
                             try:
                                 if update_indexing_status:
+                                    target_workspace = (
+                                        ws_path if not use_per_repo_collections else str(file_path.parent)
+                                    )
+                                    target_repo = (
+                                        repo_tag if not use_per_repo_collections else per_file_repo
+                                    )
                                     update_indexing_status(
-                                        ws_path,
-                                        {
+                                        workspace_path=target_workspace,
+                                        status={
                                             "state": "indexing",
                                             "progress": {
                                                 "files_processed": files_seen,
@@ -2039,6 +2095,7 @@ def make_point(pid, dense_vec, lex_vec, payload):
                                                 "current_file": str(file_path),
                                             },
                                         },
+                                        repo_name=target_repo,
                                     )
                             except Exception:
                                 pass
@@ -2047,16 +2104,28 @@ def make_point(pid, dense_vec, lex_vec, payload):
                         continue
             except Exception:
                 pass
-            prev = get_indexed_file_hash(client, collection, str(file_path))
-            if prev and prev == file_hash:
+            prev = get_indexed_file_hash(client, current_collection, str(file_path))
+            if prev and file_hash and prev == file_hash:
+                # File exists in Qdrant with same hash - cache it locally for next time
+                try:
+                    if set_cached_file_hash:
+                        set_cached_file_hash(str(file_path), file_hash, per_file_repo)
+                except Exception:
+                    pass
                 if PROGRESS_EVERY <= 0 and files_seen % 50 == 0:
                     # minor heartbeat when no progress cadence configured
                     print(f"... processed {files_seen} files (skipping unchanged)")
                     try:
                         if update_indexing_status:
+                            target_workspace = (
+                                ws_path if not use_per_repo_collections else str(file_path.parent)
+                            )
+                            target_repo = (
+                                repo_tag if not use_per_repo_collections else per_file_repo
+                            )
                             update_indexing_status(
-                                ws_path,
-                                {
+                                workspace_path=target_workspace,
+                                status={
                                     "state": "indexing",
                                     "progress": {
                                         "files_processed": files_seen,
@@ -2064,6 +2133,7 @@ def make_point(pid, dense_vec, lex_vec, payload):
                                         "current_file": str(file_path),
                                     },
                                 },
+                                repo_name=target_repo,
                             )
                     except Exception:
                         pass
@@ -2073,7 +2143,7 @@ def make_point(pid, dense_vec, lex_vec, payload):
 
         # Dedupe per-file by deleting previous points for this path (default)
         if dedupe:
-            delete_points_by_path(client, collection, str(file_path))
+            delete_points_by_path(client, current_collection, str(file_path))
 
         files_indexed += 1
         symbols = _extract_symbols(language, text)
@@ -2168,7 +2238,7 @@ def make_point(pid, dense_vec, lex_vec, payload):
                     "kind": kind,
                     "symbol": sym,
                     "symbol_path": sym_path or "",
-                    "repo": repo_tag,
+                    "repo": per_file_repo,
                     "start_line": ch["start"],
                     "end_line": ch["end"],
                     "code": ch["text"],
@@ -2220,14 +2290,22 @@ def make_point(pid, dense_vec, lex_vec, payload):
                     make_point(i, v, lx, m)
                     for i, v, lx, m in zip(batch_ids, vectors, batch_lex, batch_meta)
                 ]
-                upsert_points(client, collection, points)
+                upsert_points(client, current_collection, points)
                 # Update local file-hash cache for any files that had chunks in this flush
                 try:
                     if set_cached_file_hash:
                         for _p, _h in list(batch_file_hashes.items()):
                             try:
                                 if _p and _h:
-                                    set_cached_file_hash(ws_path, _p, _h)
+                                    file_repo_tag = _detect_repo_name_from_path(Path(_p))
+                                    repos_touched_name = file_repo_tag or per_file_repo
+                                    if repos_touched_name:
+                                        touched_repos.add(repos_touched_name)
+                                        repo_roots.setdefault(
+                                            repos_touched_name,
+                                            str(Path(workspace_root).resolve() / repos_touched_name),
+                                        )
+                                    set_cached_file_hash(_p, _h, file_repo_tag)
                             except Exception:
                                 continue
                 except Exception:
@@ -2241,19 +2319,25 @@ def make_point(pid, dense_vec, lex_vec, payload):
             )
             try:
                 if update_indexing_status:
-                    update_indexing_status(
-                        ws_path,
-                        {
-                            "state": "indexing",
-                            "progress": {
-                                "files_processed": files_seen,
-                                "total_files": None,
-                                "current_file": str(file_path),
+                    per_file_repo = _detect_repo_name_from_path(file_path) if _detect_repo_name_from_path else repo_tag
+                    if per_file_repo:
+                        update_indexing_status(
+                            workspace_path=str(file_path.parent),
+                            status={
+                                "state": "indexing",
+                                "progress": {
+                                    "files_processed": repo_progress.get(per_file_repo, 0),
+                                    "total_files": repo_total.get(per_file_repo, None),
+                                    "current_file": str(file_path),
+                                },
                             },
-                        },
-                    )
-            except Exception:
-                pass
+                            repo_name=per_file_repo,
+                        )
+            except Exception as e:
+                # Log progress update errors instead of silent failure
+                import traceback
+                print(f"[ERROR] Failed to update indexing progress: {e}")
+                print(f"[ERROR] Traceback: {traceback.format_exc()}")
 
     if batch_texts:
         vectors = embed_batch(model, batch_texts)
@@ -2267,14 +2351,16 @@ def make_point(pid, dense_vec, lex_vec, payload):
             make_point(i, v, lx, m)
             for i, v, lx, m in zip(batch_ids, vectors, batch_lex, batch_meta)
         ]
-        upsert_points(client, collection, points)
+        upsert_points(client, current_collection, points)
         # Update local file-hash cache for any files that had chunks during this run (final flush)
         try:
             if set_cached_file_hash:
                 for _p, _h in list(batch_file_hashes.items()):
                     try:
                         if _p and _h:
-                            set_cached_file_hash(ws_path, _p, _h)
+                            per_file_repo = _detect_repo_name_from_path(Path(_p))
+                            if per_file_repo:
+                                set_cached_file_hash(_p, _h, per_file_repo)
                     except Exception:
                         continue
         except Exception:
@@ -2286,30 +2372,43 @@ def make_point(pid, dense_vec, lex_vec, payload):
 
     # Workspace state: mark completion
     try:
-        if update_last_activity:
-            update_last_activity(
-                ws_path,
-                {
-                    "timestamp": datetime.now().isoformat(),
-                    "action": "scan-completed",
-                    "file_path": "",
-                    "details": {
-                        "files_seen": files_seen,
-                        "files_indexed": files_indexed,
-                        "chunks_indexed": points_indexed,
-                    },
+        if log_activity:
+            # Extract repo name from workspace path for log_activity
+            repo_name = None
+            if use_per_repo_collections:
+                # In multi-repo mode, we need to determine which repo this activity belongs to
+                # For scan completion, we use the workspace path as the repo identifier
+                repo_name = _detect_repo_name_from_path(Path(ws_path))
+
+            log_activity(
+                repo_name=repo_name,
+                action="scan-completed",
+                file_path="",
+                details={
+                    "files_seen": files_seen,
+                    "files_indexed": files_indexed,
+                    "chunks_indexed": points_indexed,
                 },
             )
         if update_indexing_status:
-            update_indexing_status(
-                ws_path,
-                {
-                    "state": "idle",
-                    "progress": {"files_processed": files_indexed, "total_files": None},
-                },
-            )
-    except Exception:
-        pass
+            for repo_name in touched_repos or ({repo_tag} if repo_tag else set()):
+                try:
+                    target_ws = repo_roots.get(repo_name) or ws_path
+                    update_indexing_status(
+                        workspace_path=target_ws,
+                        status={
+                            "state": "idle",
+                            "progress": {"files_processed": files_indexed, "total_files": None},
+                        },
+                        repo_name=repo_name,
+                    )
+                except Exception:
+                    continue
+    except Exception as e:
+        # Log the error instead of silently swallowing it
+        import traceback
+        print(f"[ERROR] Failed to update workspace state after indexing completion: {e}")
+        print(f"[ERROR] Traceback: {traceback.format_exc()}")
 
 
 def main():
@@ -2401,9 +2500,29 @@ def main():
 
     qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
     api_key = os.environ.get("QDRANT_API_KEY")
-    collection = os.environ.get("COLLECTION_NAME", "codebase")
+    collection = os.environ.get("COLLECTION_NAME") or os.environ.get("DEFAULT_COLLECTION") or "codebase"
     model_name = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 
+    # Resolve collection name based on multi-repo mode
+    multi_repo = bool(is_multi_repo_mode and is_multi_repo_mode())
+    if multi_repo:
+        # Multi-repo mode: pass collection=None to trigger per-repo collection resolution
+        collection = None
+        print("[multi_repo] Multi-repo mode enabled - will create separate collections per repository")
+    else:
+        # Single-repo mode: use environment variable
+        if 'get_collection_name' in globals() and get_collection_name:
+            try:
+                resolved = get_collection_name(str(Path(args.root).resolve()))
+                placeholders = {"", "default-collection", "my-collection", "codebase"}
+                if resolved and collection in placeholders:
+                    collection = resolved
+            except Exception:
+                pass
+        if not collection:
+            collection = os.environ.get("COLLECTION_NAME", "codebase")
+        print(f"[single_repo] Single-repo mode enabled - using collection: {collection}")
+
     index_repo(
         Path(args.root).resolve(),
         qdrant_url,
diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py
index 76a34173..95b047be 100644
--- a/scripts/mcp_indexer_server.py
+++ b/scripts/mcp_indexer_server.py
@@ -50,6 +50,14 @@
 
 # Cache for memory collection autodetection (name + timestamp)
 _MEM_COLL_CACHE = {"name": None, "ts": 0.0}
+# Session defaults map (token -> defaults). Guarded for concurrency.
+_SESSION_LOCK = threading.Lock()
+SESSION_DEFAULTS: Dict[str, Dict[str, Any]] = {}
+# Per-connection defaults keyed by ctx.session (no token required)
+from weakref import WeakKeyDictionary
+_SESSION_CTX_LOCK = threading.Lock()
+SESSION_DEFAULTS_BY_SESSION: "WeakKeyDictionary[Any, Dict[str, Any]]" = WeakKeyDictionary()
+
 
 _roots = [p.strip() for p in _roots_env.split(",") if p.strip()] or ["/work", "/app"]
 try:
@@ -142,8 +150,9 @@ def _highlight_snippet(snippet, tokens):  # type: ignore
 
 try:
     # Official MCP Python SDK (FastMCP convenience server)
-    from mcp.server.fastmcp import FastMCP
+    from mcp.server.fastmcp import FastMCP, Context  # type: ignore
 except Exception as e:  # pragma: no cover
+    # Keep FastMCP import error loud; Context is for type hints only
     raise SystemExit("mcp package is required inside the container: pip install mcp")
 
 APP_NAME = os.environ.get("FASTMCP_SERVER_NAME", "qdrant-indexer-mcp")
@@ -225,7 +234,22 @@ def _score(token: str) -> int:
 
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
+DEFAULT_COLLECTION = (
+    os.environ.get("DEFAULT_COLLECTION")
+    or os.environ.get("COLLECTION_NAME")
+    or "my-collection"
+)
+try:
+    from scripts.workspace_state import get_collection_name as _ws_get_collection_name  # type: ignore
+
+    if DEFAULT_COLLECTION in {"", "default-collection", "my-collection", "codebase"}:
+        workspace_path = os.environ.get("WATCH_ROOT", "/work")
+        resolved = _ws_get_collection_name(workspace_path)
+        if resolved:
+            DEFAULT_COLLECTION = resolved
+except Exception:
+    pass
+
 MAX_LOG_TAIL = safe_int(
     os.environ.get("MCP_MAX_LOG_TAIL", "4000"),
     default=4000,
@@ -258,11 +282,23 @@ def _score(token: str) -> int:
 
 # --- Workspace state integration helpers ---
 def _state_file_path(ws_path: str = "/work") -> str:
+    """Locate workspace state using centralized metadata helpers when available."""
     try:
-        return os.path.join(ws_path, ".codebase", "state.json")
-    except Exception as e:
-        logger.warning(f"State file path construction failed, using fallback: {e}")
-        return "/work/.codebase/state.json"
+        from scripts.workspace_state import (
+            _extract_repo_name_from_path,
+            _state_file_path as _ws_state_file_path,
+        )
+
+        repo_name = _extract_repo_name_from_path(ws_path)
+        return str(_ws_state_file_path(workspace_path=None, repo_name=repo_name))
+    except Exception:
+        try:
+            from scripts.workspace_state import _state_file_path as _ws_state_file_path
+
+            return str(_ws_state_file_path(workspace_path=ws_path, repo_name=None))
+        except Exception as exc:
+            logger.warning(f"State file path construction failed, using fallback: {exc}")
+            return os.path.join(ws_path, ".codebase", "state.json")
 
 
 def _read_ws_state(ws_path: str = "/work") -> Optional[Dict[str, Any]]:
@@ -279,38 +315,33 @@ def _read_ws_state(ws_path: str = "/work") -> Optional[Dict[str, Any]]:
 
 
 def _default_collection() -> str:
+    env_coll = (os.environ.get("DEFAULT_COLLECTION") or os.environ.get("COLLECTION_NAME") or "").strip()
+    if env_coll:
+        return env_coll
     st = _read_ws_state("/work")
     if st:
         coll = st.get("qdrant_collection")
         if isinstance(coll, str) and coll.strip():
             return coll.strip()
-    # Fall back to current environment rather than module-load default so tests
-    # and dynamic collection switching work correctly.
-    return os.environ.get("COLLECTION_NAME", DEFAULT_COLLECTION)
-
+    return DEFAULT_COLLECTION
 
 def _work_script(name: str) -> str:
-    """Return path to a script under /app/scripts (container installation).
-
-    Scripts are always installed at /app/scripts in the container.
-    This is independent of where user repositories are mounted.
-    """
-    return os.path.join("/app", "scripts", name)
-
-
-# Invalidate router scratchpad after reindex to avoid stale state reuse
-_def_ws = "/work"
-
+    """Return path to script respecting bind mounts first, then /app, then local fallback."""
+    try:
+        work_path = os.path.join("/work", "scripts", name)
+        if os.path.exists(work_path):
+            return work_path
+    except Exception:
+        pass
 
-def _invalidate_router_scratchpad(ws_path: str = _def_ws) -> bool:
     try:
-        p = os.path.join(ws_path, ".codebase", "router_scratchpad.json")
-        if os.path.exists(p):
-            os.remove(p)
-            return True
+        app_path = os.path.join("/app", "scripts", name)
+        if os.path.exists(app_path):
+            return app_path
     except Exception:
         pass
-    return False
+
+    return os.path.join(os.getcwd(), "scripts", name)
 
 
 mcp = FastMCP(APP_NAME)
@@ -517,7 +548,6 @@ def _cap_tail(s: str) -> str:
         except Exception as e:
             return {"ok": False, "code": -2, "stdout": "", "stderr": str(e)}
         finally:
-            # Explicitly close pipes to avoid unraisable warnings on transport GC
             try:
                 if proc is not None:
                     if proc.stdout is not None:
@@ -648,28 +678,90 @@ def _to_str_list_relaxed(x: _Any) -> list[str]:
     if x is None:
         return []
     if isinstance(x, (list, tuple)):
-        return [str(e) for e in x if str(e).strip()]
+        flat: list[str] = []
+        for item in x:
+            flat.extend(_to_str_list_relaxed(item))
+        return [t for t in flat if t.strip()]
     if isinstance(x, str):
         s = x.strip()
         if not s:
             return []
-        # Try JSON array or Python literal list
-        if s.startswith("[") and s.endswith("]"):
-            try:
-                arr = json.loads(s)
-                if isinstance(arr, list):
-                    return [str(e) for e in arr if str(e).strip()]
-            except json.JSONDecodeError:
-                try:
-                    arr = _ast.literal_eval(s)
-                    if isinstance(arr, (list, tuple)):
-                        return [str(e) for e in arr if str(e).strip()]
-                except (ValueError, SyntaxError):
-                    pass
-        # Comma-separated fallback
-        if "," in s:
-            return [t.strip() for t in s.split(",") if t.strip()]
-        return [s]
+
+        def _normalize_tokens(val: _Any, depth: int = 0) -> list[str]:
+            if depth > 10:
+                text = str(val).strip()
+                return [text] if text else []
+            if isinstance(val, (list, tuple)):
+                tokens: list[str] = []
+                for item in val:
+                    tokens.extend(_normalize_tokens(item, depth + 1))
+                return tokens
+
+            text = str(val).strip()
+            if not text:
+                return []
+
+            seen: set[str] = set()
+            current = text
+            while True:
+                if not current:
+                    return []
+                key = f"{depth}:{current}"
+                if key in seen:
+                    return [current]
+                seen.add(key)
+
+                if len(current) >= 2 and current[0] == current[-1] and current[0] in {'"', "'"}:
+                    current = current[1:-1].strip()
+                    continue
+
+                changed = False
+                if current.startswith('/"'):
+                    current = current[2:].strip()
+                    changed = True
+                if current.endswith('"/'):
+                    current = current[:-2].strip()
+                    changed = True
+                if current.endswith('/"'):
+                    current = current[:-2].strip()
+                    changed = True
+                if changed:
+                    continue
+
+                parsed = None
+                for parser in (json.loads, _ast.literal_eval):
+                    try:
+                        parsed = parser(current)
+                    except Exception:
+                        continue
+                    else:
+                        break
+                if isinstance(parsed, (list, tuple)):
+                    tokens: list[str] = []
+                    for item in parsed:
+                        tokens.extend(_normalize_tokens(item, depth + 1))
+                    return tokens
+                if isinstance(parsed, str):
+                    current = parsed.strip()
+                    continue
+                if parsed is not None:
+                    current = str(parsed).strip()
+                    continue
+
+                maybe = current.replace('\\"', '"').replace("\\'", "'")
+                if maybe != current:
+                    current = maybe.strip()
+                    continue
+
+                if ',' in current:
+                    tokens: list[str] = []
+                    for part in current.split(','):
+                        tokens.extend(_normalize_tokens(part, depth + 1))
+                    return tokens
+
+                return [current]
+
+        return [t for t in _normalize_tokens(s) if t.strip()]
     return [str(x)]
 
 
@@ -834,9 +926,13 @@ async def qdrant_index_root(
         try:
             from scripts.workspace_state import (
                 get_collection_name as _ws_get_collection_name,
+                is_multi_repo_mode as _ws_is_multi_repo_mode,
             )  # type: ignore
 
-            coll = _ws_get_collection_name("/work")
+            if _ws_is_multi_repo_mode():
+                coll = _ws_get_collection_name("/work") or _default_collection()
+            else:
+                coll = _ws_get_collection_name(None) or _default_collection()
         except Exception:
             coll = _default_collection()
 
@@ -906,9 +1002,12 @@ async def workspace_info(
     - {"workspace_path": str, "default_collection": str, "source": "state_file"|"env", "state": dict}
     """
     ws_path = (workspace_path or "/work").strip() or "/work"
+
+
     st = _read_ws_state(ws_path) or {}
     coll = (
         (st.get("qdrant_collection") if isinstance(st, dict) else None)
+        or os.environ.get("DEFAULT_COLLECTION")
         or os.environ.get("COLLECTION_NAME")
         or DEFAULT_COLLECTION
     )
@@ -943,28 +1042,233 @@ async def list_workspaces(search_root: Optional[str] = None) -> Dict[str, Any]:
 
 
 @mcp.tool()
-async def memory_store(
-    information: str,
-    metadata: Optional[Dict[str, Any]] = None,
+async def collection_map(
+    search_root: Optional[str] = None,
     collection: Optional[str] = None,
+    repo_name: Optional[str] = None,
+    include_samples: Optional[bool] = None,
+    limit: Optional[int] = None,
 ) -> Dict[str, Any]:
-    """Store a free-form memory entry in Qdrant (no code path metadata).
+    """Return collection↔repo mappings with optional Qdrant payload samples."""
 
-    What it does:
-    - Embeds the text and upserts a payload with {"information", "metadata"}
-    - Uses named vectors (dense + lexical; mini when enabled)
-    - Enables context_search(include_memories=true) to surface it alongside code
+    def _norm_str(val: Any) -> Optional[str]:
+        if val is None:
+            return None
+        try:
+            s = str(val).strip()
+        except Exception:
+            return None
+        return s or None
 
-    When to use:
-    - Save preferences, decisions, or notes to retrieve later with code context
+    collection_filter = _norm_str(collection)
+    repo_filter = _norm_str(repo_name)
+    sample_flag = _coerce_bool(include_samples, False)
 
-    Parameters:
-    - information: str. Required text to remember.
-    - metadata: dict (optional). Tags like {"kind": "preference", "source": "memory"}.
-    - collection: str (optional). Defaults to workspace/env COLLECTION_NAME.
+    max_entries: Optional[int] = None
+    if limit is not None:
+        try:
+            max_entries = max(1, int(limit))
+        except Exception:
+            max_entries = None
 
-    Returns:
-    - {"ok": true, "id": str, "collection": str} or {"error": "..."}
+    state_entries: List[Dict[str, Any]] = []
+    state_error: Optional[str] = None
+
+    try:
+        from scripts.workspace_state import get_collection_mappings as _get_collection_mappings  # type: ignore
+
+        try:
+            state_entries = await asyncio.to_thread(
+                lambda: _get_collection_mappings(search_root)
+            )
+        except Exception as exc:
+            state_error = str(exc)
+            state_entries = []
+    except Exception as exc:  # pragma: no cover
+        state_error = f"workspace_state unavailable: {exc}"
+        state_entries = []
+
+    if repo_filter:
+        state_entries = [
+            entry for entry in state_entries if _norm_str(entry.get("repo_name")) == repo_filter
+        ]
+    if collection_filter:
+        state_entries = [
+            entry
+            for entry in state_entries
+            if _norm_str(entry.get("collection_name")) == collection_filter
+        ]
+
+    results: List[Dict[str, Any]] = []
+    seen_collections: set[str] = set()
+
+    for entry in state_entries:
+        item = dict(entry)
+        item["source"] = "state"
+        results.append(item)
+        coll = _norm_str(entry.get("collection_name"))
+        if coll:
+            seen_collections.add(coll)
+
+    # Qdrant helpers -----------------------------------------------------
+    sample_cache: Dict[str, Tuple[Optional[Dict[str, Any]], Optional[str]]] = {}
+    qdrant_error: Optional[str] = None
+    qdrant_used = False
+    client = None
+
+    def _ensure_qdrant_client():
+        nonlocal client, qdrant_error, qdrant_used
+        if client is not None or qdrant_error:
+            return client
+        try:
+            from qdrant_client import QdrantClient  # type: ignore
+        except Exception as exc:  # pragma: no cover
+            qdrant_error = f"qdrant_client unavailable: {exc}"
+            return None
+
+        try:
+            qdrant_used = True
+            return QdrantClient(
+                url=QDRANT_URL,
+                api_key=os.environ.get("QDRANT_API_KEY"),
+                timeout=float(os.environ.get("QDRANT_TIMEOUT", "20") or 20),
+            )
+        except Exception as exc:  # pragma: no cover
+            qdrant_error = str(exc)
+            return None
+
+    async def _sample_payload(coll_name: Optional[str]) -> Tuple[Optional[Dict[str, Any]], Optional[str]]:
+        key = _norm_str(coll_name) or ""
+        if not key:
+            return None, "missing_collection"
+        if key in sample_cache:
+            return sample_cache[key]
+
+        cli = _ensure_qdrant_client()
+        if cli is None:
+            sample_cache[key] = (None, qdrant_error)
+            return sample_cache[key]
+
+        def _scroll_one():
+            try:
+                points, _ = cli.scroll(
+                    collection_name=key,
+                    limit=1,
+                    with_payload=True,
+                    with_vectors=False,
+                )
+                return points
+            except Exception as exc:  # pragma: no cover
+                raise exc
+
+        try:
+            points = await asyncio.to_thread(_scroll_one)
+        except Exception as exc:  # pragma: no cover
+            err = str(exc)
+            sample_cache[key] = (None, err)
+            return sample_cache[key]
+
+        if not points:
+            sample_cache[key] = (None, None)
+            return sample_cache[key]
+
+        payload = points[0].payload or {}
+        metadata = payload.get("metadata") or {}
+        sample = {
+            "host_path": metadata.get("host_path"),
+            "container_path": metadata.get("container_path"),
+            "path": metadata.get("path") or payload.get("path"),
+            "start_line": metadata.get("start_line"),
+            "end_line": metadata.get("end_line"),
+        }
+        sample_cache[key] = (sample, None)
+        return sample_cache[key]
+
+    # Attach samples to state-backed entries when requested
+    if sample_flag and results:
+        for entry in results:
+            coll_name = entry.get("collection_name")
+            sample, err = await _sample_payload(coll_name)
+            if sample:
+                entry["sample"] = sample
+            if err:
+                entry.setdefault("warnings", []).append(err)
+
+    # If no state entries (or explicit collection filtered out), fall back to Qdrant listings
+    fallback_entries: List[Dict[str, Any]] = []
+    need_qdrant_listing = not results
+
+    if need_qdrant_listing:
+        cli = _ensure_qdrant_client()
+        if cli is not None:
+            def _list_collections():
+                info = cli.get_collections()
+                return [c.name for c in info.collections]
+
+            try:
+                collection_names = await asyncio.to_thread(_list_collections)
+            except Exception as exc:  # pragma: no cover
+                qdrant_error = str(exc)
+                collection_names = []
+
+            if collection_filter:
+                collection_names = [
+                    name for name in collection_names if _norm_str(name) == collection_filter
+                ]
+
+            count = 0
+            for name in collection_names:
+                if name in seen_collections:
+                    continue
+                entry: Dict[str, Any] = {
+                    "collection_name": name,
+                    "source": "qdrant",
+                }
+                sample, err = await _sample_payload(name) if sample_flag else (None, None)
+                if sample:
+                    entry["sample"] = sample
+                if err:
+                    entry.setdefault("warnings", []).append(err)
+                fallback_entries.append(entry)
+                count += 1
+                if max_entries is not None and count >= max_entries:
+                    break
+
+    entries = results + fallback_entries
+
+    return {
+        "results": entries,
+        "counts": {
+            "state": len(state_entries),
+            "returned": len(entries),
+            "fallback": len(fallback_entries),
+        },
+        "errors": {
+            "state": state_error,
+            "qdrant": qdrant_error,
+        },
+        "qdrant_used": qdrant_used,
+        "filters": {
+            "collection": collection_filter,
+            "repo_name": repo_filter,
+            "search_root": search_root,
+            "include_samples": sample_flag,
+            "limit": max_entries,
+        },
+    }
+
+
+@mcp.tool()
+async def memory_store(
+    information: str,
+    metadata: Optional[Dict[str, Any]] = None,
+    collection: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Store a free-form memory entry in Qdrant using the active collection.
+
+    - Embeds the text and writes both dense and lexical vectors (plus mini vector in ReFRAG mode).
+    - Honors explicit collection overrides; otherwise falls back to workspace/env defaults.
+    - Returns a payload compatible with context-aware tools.
     """
     try:
         from qdrant_client import QdrantClient, models  # type: ignore
@@ -972,6 +1276,8 @@ async def memory_store(
         import time, hashlib, re, math
         from scripts.utils import sanitize_vector_name
         from scripts.ingest_code import ensure_collection as _ensure_collection  # type: ignore
+
+
         from scripts.ingest_code import project_mini as _project_mini  # type: ignore
 
     except Exception as e:  # pragma: no cover
@@ -1248,9 +1554,13 @@ async def qdrant_index(
         try:
             from scripts.workspace_state import (
                 get_collection_name as _ws_get_collection_name,
+                is_multi_repo_mode as _ws_is_multi_repo_mode,
             )  # type: ignore
 
-            coll = _ws_get_collection_name("/work")
+            if _ws_is_multi_repo_mode():
+                coll = _ws_get_collection_name(root) or _default_collection()
+            else:
+                coll = _ws_get_collection_name(None) or _default_collection()
         except Exception:
             coll = _default_collection()
 
@@ -1279,17 +1589,69 @@ async def qdrant_index(
 
 
 @mcp.tool()
-async def qdrant_prune(kwargs: Any = None) -> Dict[str, Any]:
-    """Remove stale points for /work (files deleted/moved but still in the index).
+async def set_session_defaults(
+    collection: Any = None,
+    session: Any = None,
+    ctx: Context = None,
+    **kwargs,
+) -> Dict[str, Any]:
+    """Set defaults (e.g., collection) for subsequent calls.
 
-    When to use:
-    - After large deletes/moves when watcher/indexer may not have cleaned up
+    Behavior:
+    - If request Context is available, persist defaults per-connection so later calls on
+      the same MCP session automatically use them (no token required).
+    - Optionally also stores token-scoped defaults for cross-connection reuse.
+    """
+    try:
+        _extra = _extract_kwargs_payload(kwargs)
+        if _extra:
+            if (collection is None or (isinstance(collection, str) and collection.strip() == "")) and _extra.get("collection") is not None:
+                collection = _extra.get("collection")
+            if (session is None or (isinstance(session, str) and str(session).strip() == "")) and _extra.get("session") is not None:
+                session = _extra.get("session")
+    except Exception:
+        pass
 
-    Parameters:
-    - (none). Operates on the current collection for /work.
+    defaults: Dict[str, Any] = {}
+    if isinstance(collection, str) and collection.strip():
+        defaults["collection"] = str(collection).strip()
 
-    Returns:
-    - Subprocess result from prune.py; on success code==0.
+    # Per-connection storage (preferred)
+    try:
+        if ctx is not None and getattr(ctx, "session", None) is not None and defaults:
+            with _SESSION_CTX_LOCK:
+                existing2 = SESSION_DEFAULTS_BY_SESSION.get(ctx.session) or {}
+                existing2.update(defaults)
+                SESSION_DEFAULTS_BY_SESSION[ctx.session] = existing2
+    except Exception:
+        pass
+
+    # Optional token storage
+    sid = str(session).strip() if session is not None else ""
+    if not sid:
+        sid = uuid.uuid4().hex[:12]
+    try:
+        if defaults:
+            with _SESSION_LOCK:
+                existing = SESSION_DEFAULTS.get(sid) or {}
+                existing.update(defaults)
+                SESSION_DEFAULTS[sid] = existing
+    except Exception:
+        pass
+
+    return {
+        "ok": True,
+        "session": sid,
+        "defaults": SESSION_DEFAULTS.get(sid, {}),
+        "applied": ("connection" if (ctx is not None and getattr(ctx, "session", None) is not None) else "token"),
+    }
+
+@mcp.tool()
+async def qdrant_prune(kwargs: Any = None, **ignored: Any) -> Dict[str, Any]:
+    """Remove stale points for /work (files deleted/moved but still in the index).
+
+    Extra arguments are accepted for forward compatibility but ignored.
+    Returns the subprocess result from ``prune.py`` with status information.
     """
     env = os.environ.copy()
     env["PRUNE_ROOT"] = "/work"
@@ -1314,6 +1676,11 @@ async def repo_search(
     highlight_snippet: Any = None,
     collection: Any = None,
     workspace_path: Any = None,
+
+
+    session: Any = None,
+    ctx: Context = None,
+
     # Structured filters (optional; mirrors hybrid_search flags)
     language: Any = None,
     under: Any = None,
@@ -1340,7 +1707,8 @@ async def repo_search(
     - query: str or list[str]. Multiple queries are fused; accepts "queries" alias.
     - limit: int (default 10). Total results across files.
     - per_path: int (default 2). Max results per file.
-    - include_snippet: bool. If true, returns a short snippet near the hit; control length with context_lines.
+    - include_snippet/context_lines: return inline snippets near hits when true.
+    - rerank_*: optional ONNX reranker toggles; timeouts fall back to hybrid output.
     - collection: str. Target collection; defaults to workspace state or env COLLECTION_NAME.
     - Filters (optional): language, under (path prefix), kind, symbol, ext, path_regex,
       path_glob (str or list[str]), not_glob (str or list[str]), not_ (negative text), case.
@@ -1416,6 +1784,12 @@ async def repo_search(
                 or (isinstance(collection, str) and collection.strip() == "")
             ) and _extra.get("collection"):
                 collection = _extra.get("collection")
+            # Optional session token for session-scoped defaults
+            if (
+                (session is None) or (isinstance(session, str) and str(session).strip() == "")
+            ) and _extra.get("session") is not None:
+                session = _extra.get("session")
+
             # Optional workspace_path routing
             if (
                 (workspace_path is None)
@@ -1425,6 +1799,7 @@ async def repo_search(
                 )
             ) and _extra.get("workspace_path") is not None:
                 workspace_path = _extra.get("workspace_path")
+
             if (
                 language is None
                 or (isinstance(language, str) and language.strip() == "")
@@ -1489,6 +1864,10 @@ def _to_bool(x, default):
             return False
         return default
 
+    # Session token (top-level or parsed from nested kwargs above)
+    sid = (str(session).strip() if session is not None else "")
+
+
     def _to_str(x, default=""):
         if x is None:
             return default
@@ -1515,17 +1894,39 @@ def _to_str(x, default=""):
     )
     highlight_snippet = _to_bool(highlight_snippet, True)
 
-    # Resolve collection: explicit > workspace_path state > default
-    ws_hint = _to_str(workspace_path, "").strip()
+    # Resolve collection precedence: explicit > per-connection defaults > token defaults > env default
     coll_hint = _to_str(collection, "").strip()
-    if not coll_hint and ws_hint:
+
+    # 1) Per-connection defaults via ctx (no token required)
+    if (not coll_hint) and ctx is not None and getattr(ctx, "session", None) is not None:
+        try:
+            with _SESSION_CTX_LOCK:
+                _d2 = SESSION_DEFAULTS_BY_SESSION.get(ctx.session) or {}
+                _sc2 = str((_d2.get("collection") or "")).strip()
+                if _sc2:
+                    coll_hint = _sc2
+        except Exception:
+            pass
+
+    # 2) Legacy token-based defaults
+    if (not coll_hint) and sid:
         try:
-            st = _read_ws_state(ws_hint)
-            if st and isinstance(st.get("qdrant_collection"), str):
-                coll_hint = st.get("qdrant_collection").strip()
+            with _SESSION_LOCK:
+                _d = SESSION_DEFAULTS.get(sid) or {}
+                _sc = str((_d.get("collection") or "")).strip()
+                if _sc:
+                    coll_hint = _sc
         except Exception:
             pass
-    collection = coll_hint or _default_collection()
+
+    # 3) Environment default
+    env_coll = (os.environ.get("DEFAULT_COLLECTION") or os.environ.get("COLLECTION_NAME") or "").strip()
+    if (not coll_hint) and env_coll:
+        coll_hint = env_coll
+
+    # Final fallback
+    env_fallback = (os.environ.get("DEFAULT_COLLECTION") or os.environ.get("COLLECTION_NAME") or "my-collection").strip()
+    collection = coll_hint or env_fallback
 
     language = _to_str(language, "").strip()
     under = _to_str(under, "").strip()
@@ -1624,7 +2025,7 @@ def _to_str_list(x):
                     path_regex=path_regex or None,
                     path_glob=(path_globs or None),
                     not_glob=(not_globs or None),
-                    expand=str(os.environ.get("HYBRID_EXPAND", "0")).strip().lower()
+                    expand=str(os.environ.get("HYBRID_EXPAND", "1")).strip().lower()
                     in {"1", "true", "yes", "on"},
                     model=model,
                 )
@@ -1675,6 +2076,8 @@ def _to_str_list(x):
             cmd += ["--not-glob", g]
         for q in queries:
             cmd += ["--query", q]
+        if collection:
+            cmd += ["--collection", str(collection)]
 
         res = await _run_async(cmd, env=env)
         for line in (res.get("stdout") or "").splitlines():
@@ -2090,6 +2493,8 @@ async def repo_search_compat(**arguments) -> Dict[str, Any]:
             "rerank_timeout_ms": args.get("rerank_timeout_ms"),
             "highlight_snippet": args.get("highlight_snippet"),
             "collection": args.get("collection"),
+            "session": args.get("session"),
+            "workspace_path": args.get("workspace_path"),
             "language": args.get("language"),
             "under": args.get("under"),
             "kind": args.get("kind"),
@@ -3261,9 +3666,9 @@ def _poll_ready():
                 if tool_name:
                     qtext = " ".join([q for q in queries if q]).strip() or queries[0]
                     arg_variants: List[Dict[str, Any]] = [
-                        {"query": qtext, "limit": mem_limit},
-                        {"q": qtext, "limit": mem_limit},
-                        {"text": qtext, "limit": mem_limit},
+                        {"query": qtext, "limit": mem_limit, "collection": mcoll},
+                        {"q": qtext, "limit": mem_limit, "collection": mcoll},
+                        {"text": qtext, "limit": mem_limit, "collection": mcoll},
                     ]
                     res_obj = None
                     for args in arg_variants:
@@ -6342,7 +6747,6 @@ def _k(s: Dict[str, Any]):
                     include_snippet=bool(include_snippet),
                     queries=queries,
                 )
-
     # Debug: log span details
     if os.environ.get("DEBUG_CONTEXT_ANSWER"):
         logger.debug(
diff --git a/scripts/mcp_memory_server.py b/scripts/mcp_memory_server.py
index 5a782d82..6777f16a 100644
--- a/scripts/mcp_memory_server.py
+++ b/scripts/mcp_memory_server.py
@@ -2,14 +2,26 @@
 from typing import Any, Dict, Optional, List
 import json
 import threading
+from weakref import WeakKeyDictionary
 
 
-from mcp.server.fastmcp import FastMCP
+# FastMCP server and request Context (ctx) for per-connection state
+try:
+    from mcp.server.fastmcp import FastMCP, Context  # type: ignore
+except Exception:
+    # Fallback: keep FastMCP import; treat Context as Any for type hints
+    from mcp.server.fastmcp import FastMCP  # type: ignore
+    Context = Any  # type: ignore
+
 from qdrant_client import QdrantClient, models
 
 # Env
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
+DEFAULT_COLLECTION = (
+    os.environ.get("DEFAULT_COLLECTION")
+    or os.environ.get("COLLECTION_NAME")
+    or "my-collection"
+)
 LEX_VECTOR_NAME = os.environ.get("LEX_VECTOR_NAME", "lex")
 LEX_VECTOR_DIM = int(os.environ.get("LEX_VECTOR_DIM", "4096") or 4096)
 EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
@@ -37,8 +49,6 @@ def _get_embedding_model():
                 _EMBED_MODEL = m
     return m
 
-
-
 # Ensure repo roots are importable so 'scripts' resolves inside container
 import sys as _sys
 _roots_env = os.environ.get("WORK_ROOTS", "")
@@ -59,6 +69,51 @@ def _get_embedding_model():
 
 VECTOR_NAME = _sanitize_vector_name(EMBEDDING_MODEL)
 
+# I/O-safety knobs for memory server behavior
+# These env vars allow tuning startup latency vs. first-call latency, especially important
+# on slow storage backends (e.g., Ceph + HDD). See comments below for rationale.
+MEMORY_ENSURE_ON_START = str(os.environ.get("MEMORY_ENSURE_ON_START", "1")).strip().lower() in {"1", "true", "yes", "on"}
+MEMORY_COLD_SKIP_DENSE = str(os.environ.get("MEMORY_COLD_SKIP_DENSE", "0")).strip().lower() in {"1", "true", "yes", "on"}
+MEMORY_PROBE_EMBED_DIM = str(os.environ.get("MEMORY_PROBE_EMBED_DIM", "1")).strip().lower() in {"1", "true", "yes", "on"}
+try:
+    MEMORY_VECTOR_DIM = int(os.environ.get("MEMORY_VECTOR_DIM") or os.environ.get("EMBED_DIM") or "768")
+except Exception:
+    MEMORY_VECTOR_DIM = 768
+
+# Lazy embedding model cache with double-checked locking.
+# RATIONALE: Avoid loading the embedding model (100–500 MB) on module import.
+# On slow storage (Ceph + HDD), eager loading can cause 30–60s startup delays.
+# Instead, load on first tool call (store/find). Subsequent calls reuse cached instance.
+_EMBED_MODEL_CACHE: Dict[str, Any] = {}
+_EMBED_MODEL_LOCK = threading.Lock()
+
+def _get_embedding_model():
+    """Lazily load and cache the embedding model to avoid startup I/O."""
+    from fastembed import TextEmbedding
+    m = _EMBED_MODEL_CACHE.get(EMBEDDING_MODEL)
+    if m is None:
+        with _EMBED_MODEL_LOCK:
+            m = _EMBED_MODEL_CACHE.get(EMBEDDING_MODEL)
+            if m is None:
+                m = TextEmbedding(model_name=EMBEDDING_MODEL)
+                _EMBED_MODEL_CACHE[EMBEDDING_MODEL] = m
+    return m
+
+# Track ensured collections to reduce redundant ensure calls.
+# RATIONALE: Avoid repeated Qdrant network calls for the same collection.
+_ENSURED = set()
+
+def _ensure_once(name: str) -> bool:
+    """Ensure collection exists, but only once per process (cached result)."""
+    if name in _ENSURED:
+        return True
+    try:
+        _ensure_collection(name)
+        _ENSURED.add(name)
+        return True
+    except Exception:
+        return False
+
 mcp = FastMCP(name="memory-server")
 
 # Capture tool registry automatically by wrapping the decorator once
@@ -90,6 +145,13 @@ def _inner(fn):
 except Exception:
     HEALTH_PORT = 18000
 
+# In-memory session defaults (legacy token-based)
+_SESSION_LOCK = threading.Lock()
+SESSION_DEFAULTS: Dict[str, Dict[str, Any]] = {}
+# In-memory per-connection defaults keyed by ctx.session (no token required)
+_SESSION_CTX_LOCK = threading.Lock()
+SESSION_DEFAULTS_BY_SESSION: "WeakKeyDictionary[Any, Dict[str, Any]]" = WeakKeyDictionary()
+
 
 def _start_readyz_server():
     try:
@@ -137,35 +199,129 @@ def log_message(self, *args, **kwargs):
 
 
 def _ensure_collection(name: str):
+    """Create collection if missing.
+
+    Default behavior mirrors the original implementation for PR compatibility:
+    - Probe the embedding model to detect the dense vector dimension (MEMORY_PROBE_EMBED_DIM=1)
+    - Eager ensure on startup (MEMORY_ENSURE_ON_START=1)
+
+    For slow storage backends (e.g., Ceph + HDD), set the following in your env:
+    - MEMORY_PROBE_EMBED_DIM=0  -> skip model probing; use MEMORY_VECTOR_DIM/EMBED_DIM
+    - MEMORY_ENSURE_ON_START=0  -> ensure lazily on first tool call
+    """
     try:
-        info = client.get_collection(name)
+        client.get_collection(name)
         return True
     except Exception:
         pass
-    # Derive dense vector dimension from embedding model to avoid mismatch
-    # Derive dense vector dimension from embedding model to avoid mismatch
-    try:
-        _model_probe = TextEmbedding(model_name=EMBEDDING_MODEL)
-        _dense_vec = next(_model_probe.embed(["probe"]))
-        _dense_dim = len(getattr(_dense_vec, "tolist", lambda: _dense_vec)()) if hasattr(_dense_vec, "tolist") else len(_dense_vec)
-    except Exception:
+
+    # Choose dense dimension based on config: probe (default) vs env-configured
+    if MEMORY_PROBE_EMBED_DIM:
         try:
-            _dense_dim = int(os.environ.get("EMBED_DIM", "768") or 768)
+            from fastembed import TextEmbedding
+            _model_probe = TextEmbedding(model_name=EMBEDDING_MODEL)
+            _dense_vec = next(_model_probe.embed(["probe"]))
+            if hasattr(_dense_vec, "tolist"):
+                dense_dim = len(_dense_vec.tolist())
+            else:
+                try:
+                    dense_dim = len(_dense_vec)
+                except Exception:
+                    dense_dim = int(os.environ.get("MEMORY_VECTOR_DIM") or os.environ.get("EMBED_DIM") or "768")
         except Exception:
-            _dense_dim = 768
+            # Fallback to env-configured dimension if probing fails
+            try:
+                dense_dim = int(os.environ.get("MEMORY_VECTOR_DIM") or os.environ.get("EMBED_DIM") or "768")
+            except Exception:
+                dense_dim = 768
+    else:
+        dense_dim = int(MEMORY_VECTOR_DIM or 768)
+
     vectors_cfg = {
-        VECTOR_NAME: models.VectorParams(
-            size=int(_dense_dim or 768), distance=models.Distance.COSINE
-        ),
-        LEX_VECTOR_NAME: models.VectorParams(
-            size=LEX_VECTOR_DIM, distance=models.Distance.COSINE
-        ),
+        VECTOR_NAME: models.VectorParams(size=int(dense_dim or 768), distance=models.Distance.COSINE),
+        LEX_VECTOR_NAME: models.VectorParams(size=LEX_VECTOR_DIM, distance=models.Distance.COSINE),
     }
     client.create_collection(collection_name=name, vectors_config=vectors_cfg)
     return True
 
 
-_ensure_collection(DEFAULT_COLLECTION)
+# Optional eager collection ensure on startup (enabled by default for backward compatibility).
+# Set MEMORY_ENSURE_ON_START=0 to defer ensure to first tool call (recommended on slow storage).
+if MEMORY_ENSURE_ON_START:
+    try:
+        _ensure_collection(DEFAULT_COLLECTION)
+    except Exception:
+        pass
+
+@mcp.tool()
+def set_session_defaults(
+    collection: Optional[str] = None,
+    session: Optional[str] = None,
+    ctx: Context = None,
+    **kwargs: Any,
+) -> Dict[str, Any]:
+    """Set defaults (e.g., collection) for subsequent calls.
+
+    Behavior:
+    - If a request Context is provided (normal with FastMCP), store defaults per-connection
+      so subsequent calls on the same MCP session automatically use them (no token needed).
+    - Optionally, also supports a lightweight token for clients that prefer cross-connection reuse.
+
+    Precedence everywhere: explicit collection > per-connection defaults > token defaults > env default.
+    """
+    try:
+        _extra = kwargs or {}
+        if isinstance(_extra, dict) and "kwargs" in _extra:
+            inner = _extra.get("kwargs")
+            if isinstance(inner, dict):
+                _extra = inner
+            elif isinstance(inner, str):
+                try:
+                    _extra = json.loads(inner)
+                except Exception:
+                    _extra = {}
+        if (not collection) and isinstance(_extra, dict) and _extra.get("collection") is not None:
+            collection = _extra.get("collection")
+        if (not session) and isinstance(_extra, dict) and _extra.get("session") is not None:
+            session = _extra.get("session")
+    except Exception:
+        pass
+
+    # Prepare defaults payload
+    defaults: Dict[str, Any] = {}
+    if isinstance(collection, str) and collection.strip():
+        defaults["collection"] = collection.strip()
+
+    # Store per-connection (preferred, no token required)
+    try:
+        if ctx is not None and getattr(ctx, "session", None) is not None and defaults:
+            with _SESSION_CTX_LOCK:
+                existing = SESSION_DEFAULTS_BY_SESSION.get(ctx.session) or {}
+                existing.update(defaults)
+                SESSION_DEFAULTS_BY_SESSION[ctx.session] = existing
+    except Exception:
+        pass
+
+    # Optional: also support legacy token
+    sid = (str(session).strip() if session is not None else "") or None
+    if not sid:
+        import uuid as _uuid
+        sid = _uuid.uuid4().hex[:12]
+    try:
+        if defaults:
+            with _SESSION_LOCK:
+                existing = SESSION_DEFAULTS.get(sid) or {}
+                existing.update(defaults)
+                SESSION_DEFAULTS[sid] = existing
+    except Exception:
+        pass
+
+    return {
+        "ok": True,
+        "session": sid,
+        "defaults": (SESSION_DEFAULTS.get(sid, {}) if sid else {}),
+        "applied": ("connection" if (ctx is not None and getattr(ctx, "session", None) is not None) else "token"),
+    }
 
 
 @mcp.tool()
@@ -173,9 +329,16 @@ def store(
     information: str,
     metadata: Optional[Dict[str, Any]] = None,
     collection: Optional[str] = None,
+    session: Optional[str] = None,
+    ctx: Context = None,
+    **kwargs: Any,
 ) -> Dict[str, Any]:
-    """Store a memory entry into Qdrant (dual vectors consistent with indexer)."""
-    coll = collection or DEFAULT_COLLECTION
+    """Store a memory entry into Qdrant (dual vectors consistent with indexer).
+
+    First call may be slower because the embedding model loads lazily.
+    """
+    coll = _resolve_collection(collection, session=session, ctx=ctx, extra_kwargs=kwargs)
+    _ensure_once(coll)
     model = _get_embedding_model()
     dense = next(model.embed([str(information)])).tolist()
     lex = _lex_hash_vector_text(str(information), LEX_VECTOR_DIM)
@@ -199,33 +362,51 @@ def find(
     limit: Optional[int] = None,
     collection: Optional[str] = None,
     top_k: Optional[int] = None,
+    session: Optional[str] = None,
+    ctx: Context = None,
+    **kwargs: Any,
 ) -> Dict[str, Any]:
-    """Find memory-like entries by vector similarity (dense + lexical fusion)."""
-    coll = collection or DEFAULT_COLLECTION
-    model = _get_embedding_model()
-    dense = next(model.embed([str(query)])).tolist()
+    """Find memory-like entries by vector similarity (dense + lexical fusion).
+
+    Cold-start option: set MEMORY_COLD_SKIP_DENSE=1 to skip dense embedding until the
+    model is cached (useful on slow storage).
+    """
+    coll = _resolve_collection(collection, session=session, ctx=ctx, extra_kwargs=kwargs)
+    _ensure_once(coll)
+
+    use_dense = True
+    if MEMORY_COLD_SKIP_DENSE and EMBEDDING_MODEL not in _EMBED_MODEL_CACHE:
+        use_dense = False
+    if use_dense:
+        model = _get_embedding_model()
+        dense = next(model.embed([str(query)])).tolist()
+    else:
+        dense = None
     lex = _lex_hash_vector_text(str(query), LEX_VECTOR_DIM)
 
     # Harmonize alias: top_k -> limit
     lim = int(limit if limit is not None else (top_k if top_k is not None else 5))
 
     # Two searches (prefer query_points) then simple RRF-like merge
-    try:
-        qp_dense = client.query_points(
-            collection_name=coll,
-            query=dense,
-            using=VECTOR_NAME,
-            limit=max(10, lim),
-            with_payload=True,
-        )
-        res_dense = getattr(qp_dense, "points", qp_dense)
-    except AttributeError:
-        res_dense = client.search(
-            collection_name=coll,
-            query_vector=(VECTOR_NAME, dense),
-            limit=max(10, lim),
-            with_payload=True,
-        )
+    if use_dense:
+        try:
+            qp_dense = client.query_points(
+                collection_name=coll,
+                query=dense,
+                using=VECTOR_NAME,
+                limit=max(10, lim),
+                with_payload=True,
+            )
+            res_dense = getattr(qp_dense, "points", qp_dense)
+        except AttributeError:
+            res_dense = client.search(
+                collection_name=coll,
+                query_vector=(VECTOR_NAME, dense),
+                limit=max(10, lim),
+                with_payload=True,
+            )
+    else:
+        res_dense = []
 
     try:
         qp_lex = client.query_points(
@@ -287,6 +468,65 @@ def add_hits(hits, weight: float):
     return {"ok": True, "results": ordered, "count": len(ordered)}
 
 
+def _resolve_collection(
+    collection: Optional[str],
+    session: Optional[str] = None,
+    ctx: Context = None,
+    extra_kwargs: Any = None,
+) -> str:
+    """Resolve the collection name honoring explicit args, session defaults, and env fallbacks."""
+    coll = (collection or "").strip()
+    sid: Optional[str] = None
+
+    # Extract overrides from nested kwargs payloads some clients send
+    try:
+        payload = extra_kwargs or {}
+        if isinstance(payload, dict) and "kwargs" in payload:
+            payload = payload.get("kwargs")
+            if isinstance(payload, str):
+                try:
+                    payload = json.loads(payload)
+                except Exception:
+                    payload = {}
+        if not coll and isinstance(payload, dict) and payload.get("collection") is not None:
+            coll = str(payload.get("collection")).strip()
+        if isinstance(payload, dict) and payload.get("session") is not None:
+            sid = str(payload.get("session")).strip()
+    except Exception:
+        pass
+
+    # Explicit session parameter wins over payload session
+    try:
+        if session is not None and str(session).strip():
+            sid = str(session).strip()
+    except Exception:
+        pass
+
+    # Per-connection defaults via Context session
+    if not coll and ctx is not None and getattr(ctx, "session", None) is not None:
+        try:
+            with _SESSION_CTX_LOCK:
+                defaults = SESSION_DEFAULTS_BY_SESSION.get(ctx.session) or {}
+                candidate = str(defaults.get("collection") or "").strip()
+                if candidate:
+                    coll = candidate
+        except Exception:
+            pass
+
+    # Legacy token-based session defaults
+    if not coll and sid:
+        try:
+            with _SESSION_LOCK:
+                defaults = SESSION_DEFAULTS.get(sid) or {}
+                candidate = str(defaults.get("collection") or "").strip()
+                if candidate:
+                    coll = candidate
+        except Exception:
+            pass
+
+    return coll or DEFAULT_COLLECTION
+
+
 if __name__ == "__main__":
     transport = os.environ.get("FASTMCP_TRANSPORT", "sse").strip().lower()
     # Start lightweight /readyz health endpoint in background (best-effort)
diff --git a/scripts/memory_backup.py b/scripts/memory_backup.py
new file mode 100644
index 00000000..410ed90a
--- /dev/null
+++ b/scripts/memory_backup.py
@@ -0,0 +1,319 @@
+#!/usr/bin/env python3
+"""
+Memory Backup Utility for Qdrant Collections
+
+Exports memories (non-code points) from Qdrant collections to JSON for backup purposes.
+Memories are identified as points without file path metadata - typically user-added notes,
+context, or other information that's not tied to specific code files.
+
+Usage:
+    python scripts/memory_backup.py --collection test-repo-58ecbbc8 --output memories_backup.json
+    python scripts/memory_backup.py --collection test-repo-58ecbbc8 --output memories_backup_$(date +%Y%m%d).json
+"""
+
+import os
+import sys
+import json
+import argparse
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+
+# Add project root to path for imports
+ROOT_DIR = Path(__file__).resolve().parent.parent
+if str(ROOT_DIR) not in sys.path:
+    sys.path.insert(0, str(ROOT_DIR))
+
+try:
+    from qdrant_client import QdrantClient
+    from qdrant_client.models import Filter, FieldCondition, MatchValue
+except ImportError:
+    print("ERROR: qdrant-client not installed. Install with: pip install qdrant-client")
+    sys.exit(1)
+
+
+def get_qdrant_client() -> QdrantClient:
+    """Initialize Qdrant client with environment configuration."""
+    qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
+    api_key = os.environ.get("QDRANT_API_KEY")
+
+    return QdrantClient(url=qdrant_url, api_key=api_key or None)
+
+
+def is_memory_point(payload: Dict[str, Any]) -> bool:
+    """
+    Determine if a point is a memory (user-added) rather than code-indexed content.
+
+    Memory points typically:
+    - Have no 'path' in metadata (not tied to a file)
+    - May have 'source' set to 'memory'
+    - Have 'content' field that's not extracted from code
+
+    Args:
+        payload: Point payload from Qdrant
+
+    Returns:
+        True if this appears to be a memory point, False if it's code content
+    """
+    if not payload:
+        return False
+
+    metadata = payload.get("metadata", {})
+
+    # Primary indicator: no file path means it's likely a memory
+    if not metadata.get("path"):
+        return True
+
+    # Secondary indicator: explicit source marking
+    if metadata.get("source") == "memory":
+        return True
+
+    # Tertiary: content-based heuristics
+    content = payload.get("information", "")
+    if content and not metadata.get("language") and not metadata.get("kind"):
+        # Content without language/kind metadata is likely user-added
+        return True
+
+    return False
+
+
+def export_memories(
+    collection_name: str,
+    output_file: str,
+    client: Optional[QdrantClient] = None,
+    include_vectors: bool = True,
+    batch_size: int = 1000
+) -> Dict[str, Any]:
+    """
+    Export memories from a Qdrant collection to JSON.
+
+    Args:
+        collection_name: Qdrant collection name
+        output_file: Output JSON file path
+        client: Qdrant client instance (will create if None)
+        include_vectors: Whether to include vector embeddings in backup
+        batch_size: Number of points to fetch per request
+
+    Returns:
+        Dict with backup statistics
+    """
+    if client is None:
+        client = get_qdrant_client()
+
+    # Verify collection exists
+    try:
+        collections = client.get_collections().collections
+        if collection_name not in [c.name for c in collections]:
+            raise ValueError(f"Collection '{collection_name}' not found")
+    except Exception as e:
+        raise RuntimeError(f"Failed to access Qdrant: {e}")
+
+    print(f"Exporting memories from collection: {collection_name}")
+    print(f"Output file: {output_file}")
+
+    # Get all points from collection
+    all_points = []
+    total_count = 0
+    memory_count = 0
+
+    # Use scroll to get all points efficiently
+    next_page_offset = None
+    while True:
+        points, next_page_offset = client.scroll(
+            collection_name=collection_name,
+            offset=next_page_offset,
+            limit=batch_size,
+            with_payload=True,
+            with_vectors=include_vectors
+        )
+
+        if not points:
+            break
+
+        all_points.extend(points)
+        total_count += len(points)
+
+        # Filter for memory points
+        memory_points = []
+        for point in points:
+            if is_memory_point(point.payload or {}):
+                memory_points.append(point)
+                memory_count += 1
+
+        print(f"Fetched {len(points)} points (total: {total_count}), found {len(memory_points)} memories (total: {memory_count})")
+
+        if next_page_offset is None:
+            break
+
+    if memory_count == 0:
+        print("No memories found in collection!")
+        return {
+            "collection": collection_name,
+            "total_points": total_count,
+            "memory_count": 0,
+            "backup_file": output_file,
+            "success": True
+        }
+
+    # Prepare backup data
+    backup_data = {
+        "backup_info": {
+            "collection_name": collection_name,
+            "export_date": datetime.now().isoformat(),
+            "total_points_exported": total_count,
+            "memory_points_found": memory_count,
+            "include_vectors": include_vectors,
+            "vector_dimension": None  # Will be set if vectors included
+        },
+        "memories": []
+    }
+
+    # Process memory points
+    for point in all_points:
+        if not is_memory_point(point.payload or {}):
+            continue
+
+        payload = point.payload or {}
+        memory_entry = {
+            "id": str(point.id),
+            "content": payload.get("information", ""),
+            "metadata": payload.get("metadata", {}),
+        }
+
+        # Include vector if requested
+        if include_vectors and point.vector:
+            if hasattr(point.vector, 'tolist'):
+                memory_entry["vector"] = point.vector.tolist()
+            else:
+                memory_entry["vector"] = point.vector
+
+            # Set vector dimension from first memory
+            if backup_data["backup_info"]["vector_dimension"] is None:
+                vector_data = memory_entry["vector"]
+                if isinstance(vector_data, dict):
+                    # Named vector format: {"memory": [values]}
+                    first_vector = next(iter(vector_data.values()))
+                    backup_data["backup_info"]["vector_dimension"] = len(first_vector)
+                else:
+                    # Direct vector list format
+                    backup_data["backup_info"]["vector_dimension"] = len(vector_data)
+
+        backup_data["memories"].append(memory_entry)
+
+    # Write backup file
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, 'w') as f:
+        json.dump(backup_data, f, indent=2)
+
+    print(f"✅ Backup completed successfully!")
+    print(f"   Total points processed: {total_count}")
+    print(f"   Memory points exported: {memory_count}")
+    print(f"   Backup file: {output_path}")
+    print(f"   File size: {output_path.stat().st_size / 1024:.1f} KB")
+
+    return {
+        "collection": collection_name,
+        "total_points": total_count,
+        "memory_count": memory_count,
+        "backup_file": str(output_path),
+        "file_size": output_path.stat().st_size,
+        "success": True
+    }
+
+
+def list_collections() -> None:
+    """List all available Qdrant collections."""
+    client = get_qdrant_client()
+
+    try:
+        collections = client.get_collections().collections
+        print("Available collections:")
+        for collection in collections:
+            info = client.get_collection(collection.name)
+            point_count = info.points_count
+            print(f"  - {collection.name} ({point_count:,} points)")
+    except Exception as e:
+        print(f"Error listing collections: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Backup memories (non-code points) from Qdrant collections",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --collection test-repo-58ecbbc8 --output memories_backup.json
+  %(prog)s --list-collections
+  %(prog)s --collection test-repo-58ecbbc8 --output backup_$(date +%Y%m%d_%H%M%S).json --no-vectors
+        """
+    )
+
+    parser.add_argument(
+        "--collection", "-c",
+        required=False,
+        help="Qdrant collection name to backup memories from"
+    )
+
+    parser.add_argument(
+        "--output", "-o",
+        help="Output JSON file path for backup"
+    )
+
+    parser.add_argument(
+        "--list-collections", "-l",
+        action="store_true",
+        help="List all available collections"
+    )
+
+    parser.add_argument(
+        "--no-vectors",
+        action="store_true",
+        help="Don't include vector embeddings in backup (smaller file, requires re-embedding)"
+    )
+
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=1000,
+        help="Number of points to fetch per request (default: 1000)"
+    )
+
+    args = parser.parse_args()
+
+    if args.list_collections:
+        list_collections()
+        return
+
+    if not args.collection:
+        parser.error("--collection required unless using --list-collections")
+
+    if not args.output:
+        # Generate default filename with timestamp
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        args.output = f"{args.collection}_memories_{timestamp}.json"
+
+    try:
+        result = export_memories(
+            collection_name=args.collection,
+            output_file=args.output,
+            include_vectors=not args.no_vectors,
+            batch_size=args.batch_size
+        )
+
+        if result["success"]:
+            print(f"\n🎉 Memory backup completed successfully!")
+            if result["memory_count"] == 0:
+                print("   (No memories found to backup)")
+        else:
+            print(f"\n❌ Memory backup failed!")
+            sys.exit(1)
+
+    except Exception as e:
+        print(f"\n❌ Error during backup: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/memory_restore.py b/scripts/memory_restore.py
new file mode 100644
index 00000000..cacddeda
--- /dev/null
+++ b/scripts/memory_restore.py
@@ -0,0 +1,379 @@
+#!/usr/bin/env python3
+"""
+Memory Restore Utility for Qdrant Collections
+
+Imports previously backed up memories into Qdrant collections.
+Can restore to existing collections (append) or new ones.
+Supports re-embedding memories if vectors were not included in backup.
+
+Usage:
+    python scripts/memory_restore.py --backup memories_backup.json --collection test-repo-58ecbbc8
+    python scripts/memory_restore.py --backup memories_backup.json --collection new-test-repo --embedding-model BAAI/bge-large-en-v1.5
+    python scripts/memory_restore.py --backup memories_backup.json --collection new-collection --new-collection
+"""
+
+import os
+import sys
+import json
+import argparse
+from datetime import datetime
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+
+# Add project root to path for imports
+ROOT_DIR = Path(__file__).resolve().parent.parent
+if str(ROOT_DIR) not in sys.path:
+    sys.path.insert(0, str(ROOT_DIR))
+
+try:
+    from qdrant_client import QdrantClient
+    from qdrant_client.models import VectorParams, Distance
+    from fastembed import TextEmbedding
+except ImportError as e:
+    print(f"ERROR: Missing required dependency: {e}")
+    print("Install with: pip install qdrant-client fastembed")
+    sys.exit(1)
+
+
+def get_qdrant_client() -> QdrantClient:
+    """Initialize Qdrant client with environment configuration."""
+    qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
+    api_key = os.environ.get("QDRANT_API_KEY")
+
+    return QdrantClient(url=qdrant_url, api_key=api_key or None)
+
+
+def get_embedding_model(model_name: str):
+    """Initialize embedding model with the given name."""
+    try:
+        return TextEmbedding(model_name=model_name)
+    except Exception as e:
+        raise RuntimeError(f"Failed to load embedding model '{model_name}': {e}")
+
+
+def ensure_collection_exists(
+    client: QdrantClient,
+    collection_name: str,
+    vector_dimension: int,
+    vector_name: str = "memory"
+) -> None:
+    """
+    Ensure the target collection exists with appropriate vector configuration.
+
+    Args:
+        client: Qdrant client instance
+        collection_name: Collection name
+        vector_dimension: Vector dimensions for memories
+        vector_name: Name for the memory vector
+    """
+    try:
+        # Check if collection exists
+        collections = client.get_collections().collections
+        if collection_name in [c.name for c in collections]:
+            print(f"Collection '{collection_name}' already exists")
+            return
+    except Exception as e:
+        print(f"Warning: Could not check collection existence: {e}")
+
+    # Create collection with memory vector
+    try:
+        client.create_collection(
+            collection_name=collection_name,
+            vectors_config={
+                vector_name: VectorParams(
+                    size=vector_dimension,
+                    distance=Distance.COSINE
+                )
+            }
+        )
+        print(f"✅ Created collection '{collection_name}' with {vector_dimension}-dim vectors")
+    except Exception as e:
+        raise RuntimeError(f"Failed to create collection '{collection_name}': {e}")
+
+
+def restore_memories(
+    backup_file: str,
+    collection_name: str,
+    client: Optional[QdrantClient] = None,
+    embedding_model_name: Optional[str] = None,
+    vector_name: str = "memory",
+    batch_size: int = 100,
+    skip_existing: bool = True
+) -> Dict[str, Any]:
+    """
+    Restore memories from backup file to Qdrant collection.
+
+    Args:
+        backup_file: Path to backup JSON file
+        collection_name: Target collection name
+        client: Qdrant client instance (will create if None)
+        embedding_model_name: Model name for re-embedding (if vectors not in backup)
+        vector_name: Name for the memory vector in collection
+        batch_size: Number of memories to upload per batch
+        skip_existing: Skip memories that already exist in collection
+
+    Returns:
+        Dict with restore statistics
+    """
+    if client is None:
+        client = get_qdrant_client()
+
+    # Load backup file
+    backup_path = Path(backup_file)
+    if not backup_path.exists():
+        raise FileNotFoundError(f"Backup file not found: {backup_file}")
+
+    try:
+        with open(backup_path, 'r') as f:
+            backup_data = json.load(f)
+    except Exception as e:
+        raise ValueError(f"Invalid backup file format: {e}")
+
+    # Validate backup structure
+    if "memories" not in backup_data:
+        raise ValueError("Invalid backup file: missing 'memories' section")
+
+    memories = backup_data["memories"]
+    backup_info = backup_data.get("backup_info", {})
+
+    print(f"Restoring memories from: {backup_file}")
+    print(f"Target collection: {collection_name}")
+    print(f"Memories in backup: {len(memories)}")
+
+    if backup_info:
+        print(f"Original collection: {backup_info.get('collection_name', 'unknown')}")
+        print(f"Backup date: {backup_info.get('export_date', 'unknown')}")
+        print(f"Vector dimension: {backup_info.get('vector_dimension', 'unknown')}")
+
+    # Determine vector configuration
+    vectors_included = backup_info.get("include_vectors", True) and memories and "vector" in memories[0]
+
+    if not vectors_included:
+        if not embedding_model_name:
+            # Use default model
+            embedding_model_name = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
+
+        print(f"Vectors not included in backup, will re-embed with: {embedding_model_name}")
+        embedding_model = get_embedding_model(embedding_model_name)
+
+        # Get vector dimension from model
+        test_vector = next(embedding_model.embed(["test"])).tolist()
+        vector_dimension = len(test_vector)
+        print(f"Embedding model vector dimension: {vector_dimension}")
+    else:
+        # Use dimension from backup
+        vector_dimension = backup_info.get("vector_dimension", len(memories[0]["vector"]))
+        embedding_model = None
+        print(f"Using vectors from backup, dimension: {vector_dimension}")
+
+    # Ensure collection exists
+    ensure_collection_exists(client, collection_name, vector_dimension, vector_name)
+
+    # Check for existing memories if skip_existing is True
+    existing_ids = set()
+    if skip_existing:
+        try:
+            # Get all existing point IDs
+            all_points, _ = client.scroll(
+                collection_name=collection_name,
+                limit=None,
+                with_payload=False,
+                with_vectors=False
+            )
+            existing_ids = {str(point.id) for point in all_points}
+            print(f"Found {len(existing_ids)} existing points in collection")
+        except Exception as e:
+            print(f"Warning: Could not check existing points: {e}")
+            skip_existing = False
+
+    # Process and upload memories in batches
+    restored_count = 0
+    skipped_count = 0
+    error_count = 0
+
+    for i in range(0, len(memories), batch_size):
+        batch = memories[i:i + batch_size]
+        batch_points = []
+
+        for memory in batch:
+            memory_id = memory.get("id", "")
+
+            # Skip if already exists
+            if skip_existing and memory_id in existing_ids:
+                skipped_count += 1
+                continue
+
+            try:
+                # Prepare vector
+                if vectors_included:
+                    vector = memory.get("vector")
+                    if not vector:
+                        raise ValueError("Memory missing vector data")
+                    # Vector from backup is already in the correct format: {"memory": [values]}
+                else:
+                    # Re-embed content
+                    content = memory.get("content", "")
+                    if not content:
+                        raise ValueError("Memory missing content for embedding")
+
+                    vector = next(embedding_model.embed([content])).tolist()
+                    # For re-embedded vectors, we need to structure them with the vector name
+                    vector = {vector_name: vector}
+
+                # Prepare point data
+                point_data = {
+                    "id": memory_id,
+                    "vector": vector,
+                    "payload": {
+                        "information": memory.get("content", ""),
+                        "metadata": memory.get("metadata", {})
+                    }
+                }
+
+                batch_points.append(point_data)
+
+            except Exception as e:
+                print(f"Error processing memory {memory_id}: {e}")
+                error_count += 1
+                continue
+
+        # Upload batch
+        if batch_points:
+            try:
+                client.upsert(collection_name=collection_name, points=batch_points)
+                restored_count += len(batch_points)
+                print(f"  Uploaded batch {i//batch_size + 1}: +{len(batch_points)} memories (total: {restored_count})")
+            except Exception as e:
+                print(f"Error uploading batch {i//batch_size + 1}: {e}")
+                error_count += len(batch_points)
+
+    # Final statistics
+    print(f"\n✅ Memory restore completed!")
+    print(f"   Total memories in backup: {len(memories)}")
+    print(f"   Successfully restored: {restored_count}")
+    print(f"   Skipped (already exists): {skipped_count}")
+    print(f"   Errors: {error_count}")
+    print(f"   Target collection: {collection_name}")
+
+    # Verify final count
+    try:
+        final_count = client.count(collection_name).count
+        print(f"   Final collection size: {final_count:,} points")
+    except Exception as e:
+        print(f"   Warning: Could not get final count: {e}")
+
+    return {
+        "collection": collection_name,
+        "backup_file": backup_file,
+        "total_memories": len(memories),
+        "restored": restored_count,
+        "skipped": skipped_count,
+        "errors": error_count,
+        "success": True
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Restore memories from backup to Qdrant collections",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --backup memories_backup.json --collection test-repo-58ecbbc8
+  %(prog)s --backup memories_backup.json --collection new-test-repo --embedding-model BAAI/bge-large-en-v1.5
+  %(prog)s --backup memories_backup.json --collection new-collection --new-collection --no-skip-existing
+        """
+    )
+
+    parser.add_argument(
+        "--backup", "-b",
+        required=True,
+        help="Path to backup JSON file"
+    )
+
+    parser.add_argument(
+        "--collection", "-c",
+        required=True,
+        help="Target Qdrant collection name"
+    )
+
+    parser.add_argument(
+        "--embedding-model", "-m",
+        help="Embedding model for re-embedding (if vectors not in backup)"
+    )
+
+    parser.add_argument(
+        "--vector-name",
+        default="memory",
+        help="Name for the memory vector in collection (default: memory)"
+    )
+
+    parser.add_argument(
+        "--batch-size",
+        type=int,
+        default=100,
+        help="Number of memories to upload per batch (default: 100)"
+    )
+
+    parser.add_argument(
+        "--no-skip-existing",
+        action="store_true",
+        help="Don't skip memories that already exist in collection"
+    )
+
+    parser.add_argument(
+        "--list-backup-info",
+        action="store_true",
+        help="Show backup file information without restoring"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        # Load backup to show info
+        with open(args.backup, 'r') as f:
+            backup_data = json.load(f)
+
+        if args.list_backup_info:
+            print("Backup Information:")
+            print("=" * 50)
+            backup_info = backup_data.get("backup_info", {})
+            for key, value in backup_info.items():
+                print(f"  {key}: {value}")
+
+            memories = backup_data.get("memories", [])
+            print(f"  Memory count: {len(memories)}")
+
+            if memories:
+                sample = memories[0]
+                has_vector = "vector" in sample
+                print(f"  Has vectors: {has_vector}")
+                if has_vector:
+                    vector_dim = len(sample["vector"])
+                    print(f"  Vector dimension: {vector_dim}")
+
+            return
+
+        # Restore memories
+        result = restore_memories(
+            backup_file=args.backup,
+            collection_name=args.collection,
+            embedding_model_name=args.embedding_model,
+            vector_name=args.vector_name,
+            batch_size=args.batch_size,
+            skip_existing=not args.no_skip_existing
+        )
+
+        if result["success"]:
+            print(f"\n🎉 Memory restoration completed successfully!")
+        else:
+            print(f"\n❌ Memory restoration failed!")
+            sys.exit(1)
+
+    except Exception as e:
+        print(f"\n❌ Error during restoration: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/scripts/remote_upload_client.py b/scripts/remote_upload_client.py
new file mode 100644
index 00000000..aac98034
--- /dev/null
+++ b/scripts/remote_upload_client.py
@@ -0,0 +1,1111 @@
+#!/usr/bin/env python3
+"""
+Remote upload client for delta bundles in Context-Engine.
+
+This module provides functionality to create and upload delta bundles to a remote
+server, enabling real-time code synchronization across distributed environments.
+
+Example usage:
+    export HOST_ROOT="/tmp/testupload" && export CONTAINER_ROOT="/work" && export
+      PYTHONPATH="/home/coder/project/Context-Engine:$PYTHONPATH" && python3
+      scripts/remote_upload_client.py --path /tmp/testupload)
+"""
+
+import os
+import json
+import time
+import uuid
+import hashlib
+import tarfile
+import tempfile
+import logging
+import argparse
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Tuple
+from datetime import datetime
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Import existing workspace state functions
+from scripts.workspace_state import (
+    get_cached_file_hash,
+    set_cached_file_hash,
+    get_collection_name,
+    _extract_repo_name_from_path,
+)
+
+# Import existing hash function
+import scripts.ingest_code as idx
+
+
+class RemoteUploadClient:
+    """Client for uploading delta bundles to remote server."""
+
+    def _translate_to_container_path(self, host_path: str) -> str:
+        """Translate host path to container path for API communication."""
+        # Use environment variable for path mapping if available
+        host_root = os.environ.get("HOST_ROOT", "/home/coder/project/Context-Engine/dev-workspace")
+        container_root = os.environ.get("CONTAINER_ROOT", "/work")
+
+        if host_path.startswith(host_root):
+            return host_path.replace(host_root, container_root)
+        else:
+            # Fallback: if path doesn't match expected pattern, use as-is
+            return host_path
+
+    def __init__(self, upload_endpoint: str, workspace_path: str, collection_name: str,
+                 max_retries: int = 3, timeout: int = 30, metadata_path: Optional[str] = None):
+        """Initialize remote upload client."""
+        self.upload_endpoint = upload_endpoint.rstrip('/')
+        self.workspace_path = workspace_path
+        self.collection_name = collection_name
+        self.max_retries = max_retries
+        self.timeout = timeout
+        self.temp_dir = None
+
+        # Set environment variables for cache functions
+        os.environ["WORKSPACE_PATH"] = workspace_path
+
+        # Get repo name for cache operations
+        try:
+            from scripts.workspace_state import _extract_repo_name_from_path
+            self.repo_name = _extract_repo_name_from_path(workspace_path)
+            # Fallback to directory name if repo detection fails (for non-git repos)
+            if not self.repo_name:
+                self.repo_name = Path(workspace_path).name
+        except ImportError:
+            self.repo_name = Path(workspace_path).name
+
+        # Setup HTTP session with simple retry
+        self.session = requests.Session()
+        retry_strategy = Retry(total=max_retries, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        self.session.mount("http://", adapter)
+        self.session.mount("https://", adapter)
+
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit with cleanup."""
+        self.cleanup()
+
+    def cleanup(self):
+        """Clean up temporary directories."""
+        if self.temp_dir and os.path.exists(self.temp_dir):
+            try:
+                import shutil
+                shutil.rmtree(self.temp_dir)
+                logger.debug(f"[remote_upload] Cleaned up temporary directory: {self.temp_dir}")
+            except Exception as e:
+                logger.warning(f"[remote_upload] Failed to cleanup temp directory {self.temp_dir}: {e}")
+            finally:
+                self.temp_dir = None
+
+    def get_mapping_summary(self) -> Dict[str, Any]:
+        """Return derived collection mapping details."""
+        container_path = self._translate_to_container_path(self.workspace_path)
+        return {
+            "repo_name": self.repo_name,
+            "collection_name": self.collection_name,
+            "source_path": self.workspace_path,
+            "container_path": container_path,
+            "upload_endpoint": self.upload_endpoint,
+        }
+
+    def log_mapping_summary(self) -> None:
+        """Log mapping summary for user visibility."""
+        info = self.get_mapping_summary()
+        logger.info("[remote_upload] Collection mapping:")
+        logger.info(f"  repo_name: {info['repo_name']}")
+        logger.info(f"  collection_name: {info['collection_name']}")
+        logger.info(f"  source_path: {info['source_path']}")
+        logger.info(f"  container_path: {info['container_path']}")
+
+    def _get_temp_bundle_dir(self) -> Path:
+        """Get or create temporary directory for bundle creation."""
+        if not self.temp_dir:
+            self.temp_dir = tempfile.mkdtemp(prefix="delta_bundle_")
+        return Path(self.temp_dir)
+    # CLI is stateless - sequence tracking is handled by server
+
+    def detect_file_changes(self, changed_paths: List[Path]) -> Dict[str, List]:
+        """
+        Detect what type of changes occurred for each file path.
+
+        Args:
+            changed_paths: List of changed file paths
+
+        Returns:
+            Dictionary with change types: created, updated, deleted, moved, unchanged
+        """
+        changes = {
+            "created": [],
+            "updated": [],
+            "deleted": [],
+            "moved": [],
+            "unchanged": []
+        }
+
+        for path in changed_paths:
+            abs_path = str(path.resolve())
+            cached_hash = get_cached_file_hash(abs_path, self.repo_name)
+
+            if not path.exists():
+                # File was deleted
+                if cached_hash:
+                    changes["deleted"].append(path)
+            else:
+                # File exists - calculate current hash
+                try:
+                    with open(path, 'rb') as f:
+                        content = f.read()
+                    current_hash = hashlib.sha1(content).hexdigest()
+
+                    if not cached_hash:
+                        # New file
+                        changes["created"].append(path)
+                    elif cached_hash != current_hash:
+                        # Modified file
+                        changes["updated"].append(path)
+                    else:
+                        # Unchanged (might be a move detection candidate)
+                        changes["unchanged"].append(path)
+
+                    # Update cache
+                    set_cached_file_hash(abs_path, current_hash, self.repo_name)
+                except Exception:
+                    # Skip files that can't be read
+                    continue
+
+        # Detect moves by looking for files with same content hash
+        # but different paths (requires additional tracking)
+        changes["moved"] = self._detect_moves(changes["created"], changes["deleted"])
+
+        return changes
+
+    def _detect_moves(self, created_files: List[Path], deleted_files: List[Path]) -> List[Tuple[Path, Path]]:
+        """
+        Detect file moves by matching content hashes between created and deleted files.
+
+        Args:
+            created_files: List of newly created files
+            deleted_files: List of deleted files
+
+        Returns:
+            List of (source, destination) path tuples for detected moves
+        """
+        moves = []
+        deleted_hashes = {}
+
+        # Build hash map for deleted files
+        for deleted_path in deleted_files:
+            try:
+                # Try to get cached hash first, fallback to file content
+                cached_hash = get_cached_file_hash(str(deleted_path), self.repo_name)
+                if cached_hash:
+                    deleted_hashes[cached_hash] = deleted_path
+                    continue
+
+                # If no cached hash, try to read from file if it still exists
+                if deleted_path.exists():
+                    with open(deleted_path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    deleted_hashes[file_hash] = deleted_path
+            except Exception:
+                continue
+
+        # Match created files with deleted files by hash
+        for created_path in created_files:
+            try:
+                with open(created_path, 'rb') as f:
+                    content = f.read()
+                file_hash = hashlib.sha1(content).hexdigest()
+
+                if file_hash in deleted_hashes:
+                    source_path = deleted_hashes[file_hash]
+                    moves.append((source_path, created_path))
+                    # Remove from consideration
+                    del deleted_hashes[file_hash]
+            except Exception:
+                continue
+
+        return moves
+
+    def create_delta_bundle(self, changes: Dict[str, List]) -> Tuple[str, Dict[str, Any]]:
+        """
+        Create a delta bundle from detected changes.
+
+        Args:
+            changes: Dictionary of file changes by type
+
+        Returns:
+            Tuple of (bundle_path, manifest_metadata)
+        """
+        bundle_id = str(uuid.uuid4())
+        # CLI is stateless - server handles sequence numbers
+        created_at = datetime.now().isoformat()
+
+        # Create temporary directory for bundle
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+
+            # Create directory structure
+            files_dir = temp_path / "files"
+            metadata_dir = temp_path / "metadata"
+            files_dir.mkdir()
+            metadata_dir.mkdir()
+
+            # Create subdirectories
+            (files_dir / "created").mkdir()
+            (files_dir / "updated").mkdir()
+            (files_dir / "moved").mkdir()
+
+            operations = []
+            total_size = 0
+            file_hashes = {}
+
+            # Process created files
+            for path in changes["created"]:
+                rel_path = str(path.relative_to(Path(self.workspace_path)))
+                try:
+                    with open(path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    content_hash = f"sha1:{file_hash}"
+
+                    # Write file to bundle
+                    bundle_file_path = files_dir / "created" / rel_path
+                    bundle_file_path.parent.mkdir(parents=True, exist_ok=True)
+                    bundle_file_path.write_bytes(content)
+
+                    # Get file info
+                    stat = path.stat()
+                    language = idx.CODE_EXTS.get(path.suffix.lower(), "unknown")
+
+                    operation = {
+                        "operation": "created",
+                        "path": rel_path,
+                        "relative_path": rel_path,
+                        "absolute_path": str(path.resolve()),
+                        "size_bytes": stat.st_size,
+                        "content_hash": content_hash,
+                        "file_hash": f"sha1:{idx.hash_id(content.decode('utf-8', errors='ignore'), rel_path, 1, len(content.splitlines()))}",
+                        "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                        "language": language
+                    }
+                    operations.append(operation)
+                    file_hashes[rel_path] = f"sha1:{file_hash}"
+                    total_size += stat.st_size
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing created file {path}: {e}")
+                    continue
+
+            # Process updated files
+            for path in changes["updated"]:
+                rel_path = str(path.relative_to(Path(self.workspace_path)))
+                try:
+                    with open(path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    content_hash = f"sha1:{file_hash}"
+                    previous_hash = get_cached_file_hash(str(path.resolve()), self.repo_name)
+
+                    # Write file to bundle
+                    bundle_file_path = files_dir / "updated" / rel_path
+                    bundle_file_path.parent.mkdir(parents=True, exist_ok=True)
+                    bundle_file_path.write_bytes(content)
+
+                    # Get file info
+                    stat = path.stat()
+                    language = idx.CODE_EXTS.get(path.suffix.lower(), "unknown")
+
+                    operation = {
+                        "operation": "updated",
+                        "path": rel_path,
+                        "relative_path": rel_path,
+                        "absolute_path": str(path.resolve()),
+                        "size_bytes": stat.st_size,
+                        "content_hash": content_hash,
+                        "previous_hash": f"sha1:{previous_hash}" if previous_hash else None,
+                        "file_hash": f"sha1:{idx.hash_id(content.decode('utf-8', errors='ignore'), rel_path, 1, len(content.splitlines()))}",
+                        "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                        "language": language
+                    }
+                    operations.append(operation)
+                    file_hashes[rel_path] = f"sha1:{file_hash}"
+                    total_size += stat.st_size
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing updated file {path}: {e}")
+                    continue
+
+            # Process moved files
+            for source_path, dest_path in changes["moved"]:
+                dest_rel_path = str(dest_path.relative_to(Path(self.workspace_path)))
+                source_rel_path = str(source_path.relative_to(Path(self.workspace_path)))
+                try:
+                    with open(dest_path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    content_hash = f"sha1:{file_hash}"
+
+                    # Write file to bundle
+                    bundle_file_path = files_dir / "moved" / dest_rel_path
+                    bundle_file_path.parent.mkdir(parents=True, exist_ok=True)
+                    bundle_file_path.write_bytes(content)
+
+                    # Get file info
+                    stat = dest_path.stat()
+                    language = idx.CODE_EXTS.get(dest_path.suffix.lower(), "unknown")
+
+                    operation = {
+                        "operation": "moved",
+                        "path": dest_rel_path,
+                        "relative_path": dest_rel_path,
+                        "absolute_path": str(dest_path.resolve()),
+                        "source_path": source_rel_path,
+                        "source_relative_path": source_rel_path,
+                        "source_absolute_path": str(source_path.resolve()),
+                        "size_bytes": stat.st_size,
+                        "content_hash": content_hash,
+                        "file_hash": f"sha1:{idx.hash_id(content.decode('utf-8', errors='ignore'), dest_rel_path, 1, len(content.splitlines()))}",
+                        "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                        "language": language
+                    }
+                    operations.append(operation)
+                    file_hashes[dest_rel_path] = f"sha1:{file_hash}"
+                    total_size += stat.st_size
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing moved file {source_path} -> {dest_path}: {e}")
+                    continue
+
+            # Process deleted files
+            for path in changes["deleted"]:
+                rel_path = str(path.relative_to(Path(self.workspace_path)))
+                try:
+                    previous_hash = get_cached_file_hash(str(path.resolve()), self.repo_name)
+
+                    operation = {
+                        "operation": "deleted",
+                        "path": rel_path,
+                        "relative_path": rel_path,
+                        "absolute_path": str(path.resolve()),
+                        "previous_hash": f"sha1:{previous_hash}" if previous_hash else None,
+                        "file_hash": None,
+                        "modified_time": datetime.now().isoformat(),
+                        "language": idx.CODE_EXTS.get(path.suffix.lower(), "unknown")
+                    }
+                    operations.append(operation)
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing deleted file {path}: {e}")
+                    continue
+
+            # Create manifest
+            manifest = {
+                "version": "1.0",
+                "bundle_id": bundle_id,
+                "workspace_path": self.workspace_path,
+                "collection_name": self.collection_name,
+                "created_at": created_at,
+                # CLI is stateless - server will assign sequence numbers
+                "sequence_number": None,  # Server will assign
+                "parent_sequence": None,   # Server will determine
+                "operations": {
+                    "created": len(changes["created"]),
+                    "updated": len(changes["updated"]),
+                    "deleted": len(changes["deleted"]),
+                    "moved": len(changes["moved"])
+                },
+                "total_files": len(operations),
+                "total_size_bytes": total_size,
+                "compression": "gzip",
+                "encoding": "utf-8"
+            }
+
+            # Write manifest
+            (temp_path / "manifest.json").write_text(json.dumps(manifest, indent=2))
+
+            # Write operations metadata
+            operations_metadata = {
+                "operations": operations
+            }
+            (metadata_dir / "operations.json").write_text(json.dumps(operations_metadata, indent=2))
+
+            # Write hashes
+            hashes_metadata = {
+                "workspace_path": self.workspace_path,
+                "updated_at": created_at,
+                "file_hashes": file_hashes
+            }
+            (metadata_dir / "hashes.json").write_text(json.dumps(hashes_metadata, indent=2))
+
+            # Create tarball in temporary directory
+            temp_bundle_dir = self._get_temp_bundle_dir()
+            bundle_path = temp_bundle_dir / f"{bundle_id}.tar.gz"
+            with tarfile.open(bundle_path, "w:gz") as tar:
+                tar.add(temp_path, arcname=f"{bundle_id}")
+
+            return str(bundle_path), manifest
+
+    def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Upload delta bundle to remote server with exponential backoff retry.
+
+        Args:
+            bundle_path: Path to the bundle tarball
+            manifest: Bundle manifest metadata
+
+        Returns:
+            Server response dictionary
+        """
+        last_error = None
+
+        for attempt in range(self.max_retries + 1):
+            try:
+                # Simple exponential backoff
+                if attempt > 0:
+                    delay = min(2 ** (attempt - 1), 30)  # 1, 2, 4, 8... capped at 30s
+                    logger.info(f"[remote_upload] Retry attempt {attempt + 1}/{self.max_retries + 1} after {delay}s delay")
+                    time.sleep(delay)
+
+                # Verify bundle exists
+                if not os.path.exists(bundle_path):
+                    return {"success": False, "error": {"code": "BUNDLE_NOT_FOUND", "message": f"Bundle not found: {bundle_path}"}}
+
+                # Check bundle size (100MB limit)
+                bundle_size = os.path.getsize(bundle_path)
+                if bundle_size > 100 * 1024 * 1024:
+                    return {"success": False, "error": {"code": "BUNDLE_TOO_LARGE", "message": f"Bundle too large: {bundle_size} bytes"}}
+
+                with open(bundle_path, 'rb') as bundle_file:
+                    files = {
+                        'bundle': (f"{manifest['bundle_id']}.tar.gz", bundle_file, 'application/gzip')
+                    }
+
+                    data = {
+                        'workspace_path': self._translate_to_container_path(self.workspace_path),
+                        'collection_name': self.collection_name,
+                        # CLI is stateless - server handles sequence numbers
+                        'force': 'false',
+                        'source_path': self.workspace_path,
+                    }
+
+                    logger.info(f"[remote_upload] Uploading bundle {manifest['bundle_id']} (size: {bundle_size} bytes)")
+
+                    response = self.session.post(
+                        f"{self.upload_endpoint}/api/v1/delta/upload",
+                        files=files,
+                        data=data,
+                        timeout=self.timeout
+                    )
+
+                    if response.status_code == 200:
+                        result = response.json()
+                        logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+                        return result
+
+                    # Handle error
+                    error_msg = f"Upload failed with status {response.status_code}"
+                    try:
+                        error_detail = response.json()
+                        error_detail_msg = error_detail.get('error', {}).get('message', 'Unknown error')
+                        error_msg += f": {error_detail_msg}"
+                        error_code = error_detail.get('error', {}).get('code', 'HTTP_ERROR')
+                    except:
+                        error_msg += f": {response.text[:200]}"
+                        error_code = "HTTP_ERROR"
+
+                    last_error = {"success": False, "error": {"code": error_code, "message": error_msg, "status_code": response.status_code}}
+
+                    # Don't retry on client errors (except 429)
+                    if 400 <= response.status_code < 500 and response.status_code != 429:
+                        return last_error
+
+                    logger.warning(f"[remote_upload] Upload attempt {attempt + 1} failed: {error_msg}")
+
+            except requests.exceptions.Timeout as e:
+                last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
+                logger.warning(f"[remote_upload] Upload timeout on attempt {attempt + 1}: {e}")
+
+            except requests.exceptions.ConnectionError as e:
+                last_error = {"success": False, "error": {"code": "CONNECTION_ERROR", "message": f"Connection error: {str(e)}"}}
+                logger.warning(f"[remote_upload] Connection error on attempt {attempt + 1}: {e}")
+
+            except requests.exceptions.RequestException as e:
+                last_error = {"success": False, "error": {"code": "NETWORK_ERROR", "message": f"Network error: {str(e)}"}}
+                logger.warning(f"[remote_upload] Network error on attempt {attempt + 1}: {e}")
+
+            except Exception as e:
+                last_error = {"success": False, "error": {"code": "UPLOAD_ERROR", "message": f"Upload error: {str(e)}"}}
+                logger.error(f"[remote_upload] Unexpected error on attempt {attempt + 1}: {e}")
+
+        # All retries exhausted
+        logger.error(f"[remote_upload] All {self.max_retries + 1} upload attempts failed for bundle {manifest.get('bundle_id', 'unknown')}")
+        return last_error or {
+            "success": False,
+            "error": {
+                "code": "MAX_RETRIES_EXCEEDED",
+                "message": f"Upload failed after {self.max_retries + 1} attempts"
+            }
+        }
+
+    def get_server_status(self) -> Dict[str, Any]:
+        """Get server status with simplified error handling."""
+        try:
+            container_workspace_path = self._translate_to_container_path(self.workspace_path)
+
+            response = self.session.get(
+                f"{self.upload_endpoint}/api/v1/delta/status",
+                params={'workspace_path': container_workspace_path},
+                timeout=min(self.timeout, 10)
+            )
+
+            if response.status_code == 200:
+                return response.json()
+
+            # Handle error response
+            error_msg = f"Status check failed with HTTP {response.status_code}"
+            try:
+                error_detail = response.json()
+                error_msg += f": {error_detail.get('error', {}).get('message', 'Unknown error')}"
+            except:
+                error_msg += f": {response.text[:100]}"
+
+            return {"success": False, "error": {"code": "STATUS_ERROR", "message": error_msg}}
+
+        except requests.exceptions.Timeout:
+            return {"success": False, "error": {"code": "STATUS_TIMEOUT", "message": "Status check timeout"}}
+        except requests.exceptions.ConnectionError:
+            return {"success": False, "error": {"code": "CONNECTION_ERROR", "message": f"Cannot connect to server"}}
+        except Exception as e:
+            return {"success": False, "error": {"code": "STATUS_CHECK_ERROR", "message": f"Status check error: {str(e)}"}}
+
+    def has_meaningful_changes(self, changes: Dict[str, List]) -> bool:
+        """Check if changes warrant a delta upload."""
+        total_changes = sum(len(files) for op, files in changes.items() if op != "unchanged")
+        return total_changes > 0
+
+    def process_changes_and_upload(self, changes: Dict[str, List]) -> bool:
+        """
+        Process pre-computed changes and upload delta bundle.
+        Includes comprehensive error handling and graceful fallback.
+
+        Args:
+            changes: Dictionary of file changes by type
+
+        Returns:
+            True if upload was successful, False otherwise
+        """
+        try:
+            logger.info(f"[remote_upload] Processing pre-computed changes")
+
+            # Validate input
+            if not changes:
+                logger.info("[remote_upload] No changes provided")
+                return True
+
+            if not self.has_meaningful_changes(changes):
+                logger.info("[remote_upload] No meaningful changes detected, skipping upload")
+                return True
+
+            # Log change summary
+            total_changes = sum(len(files) for op, files in changes.items() if op != "unchanged")
+            logger.info(f"[remote_upload] Detected {total_changes} meaningful changes: "
+                       f"{len(changes['created'])} created, {len(changes['updated'])} updated, "
+                       f"{len(changes['deleted'])} deleted, {len(changes['moved'])} moved")
+
+            # Create delta bundle
+            bundle_path = None
+            try:
+                bundle_path, manifest = self.create_delta_bundle(changes)
+                logger.info(f"[remote_upload] Created delta bundle: {manifest['bundle_id']} "
+                           f"(size: {manifest['total_size_bytes']} bytes)")
+
+                # Validate bundle was created successfully
+                if not bundle_path or not os.path.exists(bundle_path):
+                    raise RuntimeError(f"Failed to create bundle at {bundle_path}")
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Error creating delta bundle: {e}")
+                # Clean up any temporary files on failure
+                self.cleanup()
+                return False
+
+            # Upload bundle with retry logic
+            try:
+                response = self.upload_bundle(bundle_path, manifest)
+
+                if response.get("success", False):
+                    processed_ops = response.get('processed_operations', {})
+                    logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+                    logger.info(f"[remote_upload] Processed operations: {processed_ops}")
+
+                    # Clean up temporary bundle after successful upload
+                    try:
+                        if os.path.exists(bundle_path):
+                            os.remove(bundle_path)
+                            logger.debug(f"[remote_upload] Cleaned up temporary bundle: {bundle_path}")
+                        # Also clean up the entire temp directory if this is the last bundle
+                        self.cleanup()
+                    except Exception as cleanup_error:
+                        logger.warning(f"[remote_upload] Failed to cleanup bundle {bundle_path}: {cleanup_error}")
+
+                    return True
+                else:
+                    error_msg = response.get('error', {}).get('message', 'Unknown upload error')
+                    logger.error(f"[remote_upload] Upload failed: {error_msg}")
+                    return False
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Error uploading bundle: {e}")
+                return False
+
+        except Exception as e:
+            logger.error(f"[remote_upload] Unexpected error in process_changes_and_upload: {e}")
+            return False
+
+    def get_all_code_files(self) -> List[Path]:
+        """Get all code files in the workspace."""
+        all_files = []
+        try:
+            workspace_path = Path(self.workspace_path)
+            for ext in idx.CODE_EXTS:
+                all_files.extend(workspace_path.rglob(f"*{ext}"))
+
+            # Filter out directories and hidden files
+            all_files = [
+                f for f in all_files
+                if f.is_file()
+                and not any(part.startswith('.') for part in f.parts)
+                and '.codebase' not in str(f)
+            ]
+        except Exception as e:
+            logger.error(f"[watch] Error scanning files: {e}")
+
+        return all_files
+
+    def watch_loop(self, interval: int = 5):
+        """Main file watching loop using existing detection and upload methods."""
+        logger.info(f"[watch] Starting file monitoring (interval: {interval}s)")
+        logger.info(f"[watch] Monitoring: {self.workspace_path}")
+        logger.info(f"[watch] Press Ctrl+C to stop")
+
+        try:
+            while True:
+                try:
+                    # Use existing change detection (get all files in workspace)
+                    all_files = self.get_all_code_files()
+                    changes = self.detect_file_changes(all_files)
+
+                    # Count only meaningful changes (exclude unchanged)
+                    meaningful_changes = len(changes.get("created", [])) + len(changes.get("updated", [])) + len(changes.get("deleted", [])) + len(changes.get("moved", []))
+
+                    if meaningful_changes > 0:
+                        logger.info(f"[watch] Detected {meaningful_changes} changes: { {k: len(v) for k, v in changes.items() if k != 'unchanged'} }")
+
+                        # Use existing upload method
+                        success = self.process_changes_and_upload(changes)
+
+                        if success:
+                            logger.info(f"[watch] Successfully uploaded changes")
+                        else:
+                            logger.error(f"[watch] Failed to upload changes")
+                    else:
+                        logger.debug(f"[watch] No changes detected")  # Debug level to avoid spam
+
+                    # Sleep until next check
+                    time.sleep(interval)
+
+                except KeyboardInterrupt:
+                    logger.info(f"[watch] Received interrupt signal, stopping...")
+                    break
+                except Exception as e:
+                    logger.error(f"[watch] Error in watch loop: {e}")
+                    time.sleep(interval)  # Continue even after errors
+
+        except KeyboardInterrupt:
+            logger.info(f"[watch] File monitoring stopped by user")
+
+    def process_and_upload_changes(self, changed_paths: List[Path]) -> bool:
+        """
+        Process changed paths and upload delta bundle if meaningful changes exist.
+        Includes comprehensive error handling and graceful fallback.
+
+        Args:
+            changed_paths: List of changed file paths
+
+        Returns:
+            True if upload was successful, False otherwise
+        """
+        try:
+            logger.info(f"[remote_upload] Processing {len(changed_paths)} changed paths")
+
+            # Validate input
+            if not changed_paths:
+                logger.info("[remote_upload] No changed paths provided")
+                return True
+
+            # Detect changes
+            try:
+                changes = self.detect_file_changes(changed_paths)
+            except Exception as e:
+                logger.error(f"[remote_upload] Error detecting file changes: {e}")
+                return False
+
+            if not self.has_meaningful_changes(changes):
+                logger.info("[remote_upload] No meaningful changes detected, skipping upload")
+                return True
+
+            # Log change summary
+            total_changes = sum(len(files) for op, files in changes.items() if op != "unchanged")
+            logger.info(f"[remote_upload] Detected {total_changes} meaningful changes: "
+                       f"{len(changes['created'])} created, {len(changes['updated'])} updated, "
+                       f"{len(changes['deleted'])} deleted, {len(changes['moved'])} moved")
+
+            # Create delta bundle
+            bundle_path = None
+            try:
+                bundle_path, manifest = self.create_delta_bundle(changes)
+                logger.info(f"[remote_upload] Created delta bundle: {manifest['bundle_id']} "
+                           f"(size: {manifest['total_size_bytes']} bytes)")
+
+                # Validate bundle was created successfully
+                if not bundle_path or not os.path.exists(bundle_path):
+                    raise RuntimeError(f"Failed to create bundle at {bundle_path}")
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Error creating delta bundle: {e}")
+                # Clean up any temporary files on failure
+                self.cleanup()
+                return False
+
+            # Upload bundle with retry logic
+            try:
+                response = self.upload_bundle(bundle_path, manifest)
+
+                if response.get("success", False):
+                    processed_ops = response.get('processed_operations', {})
+                    logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+                    logger.info(f"[remote_upload] Processed operations: {processed_ops}")
+
+                    # Clean up temporary bundle after successful upload
+                    try:
+                        if os.path.exists(bundle_path):
+                            os.remove(bundle_path)
+                            logger.debug(f"[remote_upload] Cleaned up temporary bundle: {bundle_path}")
+                        # Also clean up the entire temp directory if this is the last bundle
+                        self.cleanup()
+                    except Exception as cleanup_error:
+                        logger.warning(f"[remote_upload] Failed to cleanup bundle {bundle_path}: {cleanup_error}")
+
+                    return True
+                else:
+                    error = response.get("error", {})
+                    error_code = error.get("code", "UNKNOWN")
+                    error_msg = error.get("message", "Unknown error")
+
+                    logger.error(f"[remote_upload] Upload failed: {error_msg}")
+
+                    # Handle specific error types
+                    # CLI is stateless - server handles sequence management
+                    if error_code in ["BUNDLE_TOO_LARGE", "BUNDLE_NOT_FOUND"]:
+                        # These are unrecoverable errors
+                        logger.error(f"[remote_upload] Unrecoverable error ({error_code}): {error_msg}")
+                        return False
+                    elif error_code in ["TIMEOUT_ERROR", "CONNECTION_ERROR", "NETWORK_ERROR"]:
+                        # These might be temporary, suggest fallback
+                        logger.warning(f"[remote_upload] Network-related error ({error_code}): {error_msg}")
+                        logger.warning("[remote_upload] Consider falling back to local mode if this persists")
+                        return False
+                    else:
+                        # Other errors
+                        logger.error(f"[remote_upload] Upload error ({error_code}): {error_msg}")
+                        return False
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Unexpected error during upload: {e}")
+                return False
+
+        except Exception as e:
+            logger.error(f"[remote_upload] Critical error in process_and_upload_changes: {e}")
+            logger.exception("[remote_upload] Full traceback:")
+            return False
+
+def get_remote_config(cli_path: Optional[str] = None) -> Dict[str, str]:
+    """Get remote upload configuration from environment variables and command-line arguments."""
+    # Use command-line path if provided, otherwise fall back to environment variables
+    if cli_path:
+        workspace_path = cli_path
+    else:
+        workspace_path = os.environ.get("WATCH_ROOT", os.environ.get("WORKSPACE_PATH", "/work"))
+
+    # Use auto-generated collection name based on repo name
+    repo_name = _extract_repo_name_from_path(workspace_path)
+    # Fallback to directory name if repo detection fails
+    if not repo_name:
+        repo_name = Path(workspace_path).name
+    collection_name = get_collection_name(repo_name)
+
+    return {
+        "upload_endpoint": os.environ.get("REMOTE_UPLOAD_ENDPOINT", "http://localhost:8080"),
+        "workspace_path": workspace_path,
+        "collection_name": collection_name,
+        "max_retries": int(os.environ.get("REMOTE_UPLOAD_MAX_RETRIES", "3")),
+        "timeout": int(os.environ.get("REMOTE_UPLOAD_TIMEOUT", "30"))
+    }
+
+
+def main():
+    """Main entry point for the remote upload client."""
+    parser = argparse.ArgumentParser(
+        description="Remote upload client for delta bundles in Context-Engine",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Upload from current directory or environment variables
+  python remote_upload_client.py
+
+  # Upload from specific directory
+  python remote_upload_client.py --path /path/to/repo
+
+  # Upload from specific directory with custom endpoint
+  python remote_upload_client.py --path /path/to/repo --endpoint http://remote-server:8080
+
+  # Watch for file changes and upload automatically
+  python remote_upload_client.py --path /path/to/repo --watch
+
+  # Watch with custom interval (check every 3 seconds)
+  python remote_upload_client.py --path /path/to/repo --watch --interval 3
+        """
+    )
+
+    parser.add_argument(
+        "--path",
+        type=str,
+        help="Path to the directory to upload (overrides WATCH_ROOT/WORKSPACE_PATH environment variables)"
+    )
+
+    parser.add_argument(
+        "--endpoint",
+        type=str,
+        help="Remote upload endpoint (overrides REMOTE_UPLOAD_ENDPOINT environment variable)"
+    )
+
+    parser.add_argument(
+        "--max-retries",
+        type=int,
+        help="Maximum number of upload retries (overrides REMOTE_UPLOAD_MAX_RETRIES environment variable)"
+    )
+
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        help="Request timeout in seconds (overrides REMOTE_UPLOAD_TIMEOUT environment variable)"
+    )
+
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Force upload of all files (ignore cached state and treat all files as new)"
+    )
+
+    parser.add_argument(
+        "--show-mapping",
+        action="store_true",
+        help="Print collection↔workspace mapping information and exit"
+    )
+
+    parser.add_argument(
+        "--watch", "-w",
+        action="store_true",
+        help="Watch for file changes and upload automatically (continuous mode)"
+    )
+
+    parser.add_argument(
+        "--interval", "-i",
+        type=int,
+        default=5,
+        help="Watch interval in seconds (default: 5)"
+    )
+
+    args = parser.parse_args()
+
+    # Validate path if provided
+    if args.path:
+        if not os.path.exists(args.path):
+            logger.error(f"Path does not exist: {args.path}")
+            return 1
+
+        if not os.path.isdir(args.path):
+            logger.error(f"Path is not a directory: {args.path}")
+            return 1
+
+        args.path = os.path.abspath(args.path)
+        logger.info(f"Using specified path: {args.path}")
+
+    # Get configuration
+    config = get_remote_config(args.path)
+
+    # Override with command-line arguments
+    if args.endpoint:
+        config["upload_endpoint"] = args.endpoint
+    if args.max_retries is not None:
+        config["max_retries"] = args.max_retries
+    if args.timeout is not None:
+        config["timeout"] = args.timeout
+
+    logger.info(f"Workspace path: {config['workspace_path']}")
+    logger.info(f"Collection name: {config['collection_name']}")
+    logger.info(f"Upload endpoint: {config['upload_endpoint']}")
+
+    if args.show_mapping:
+        with RemoteUploadClient(
+            upload_endpoint=config["upload_endpoint"],
+            workspace_path=config["workspace_path"],
+            collection_name=config["collection_name"],
+            max_retries=config["max_retries"],
+            timeout=config["timeout"],
+        ) as client:
+            client.log_mapping_summary()
+        return 0
+
+    # Handle watch mode
+    if args.watch:
+        logger.info("Starting watch mode for continuous file monitoring")
+        try:
+            with RemoteUploadClient(
+                upload_endpoint=config["upload_endpoint"],
+                workspace_path=config["workspace_path"],
+                collection_name=config["collection_name"],
+                max_retries=config["max_retries"],
+                timeout=config["timeout"]
+            ) as client:
+
+                logger.info("Remote upload client initialized successfully")
+                client.log_mapping_summary()
+
+                # Test server connection first
+                logger.info("Checking server status...")
+                status = client.get_server_status()
+                is_success = (
+                    isinstance(status, dict) and
+                    'workspace_path' in status and
+                    'collection_name' in status and
+                    status.get('status') == 'ready'
+                )
+                if not is_success:
+                    error = status.get("error", {})
+                    logger.error(f"Cannot connect to server: {error.get('message', 'Unknown error')}")
+                    return 1
+
+                logger.info("Server connection successful")
+                logger.info(f"Starting file monitoring with {args.interval}s interval")
+
+                # Start the watch loop
+                client.watch_loop(interval=args.interval)
+
+            return 0
+
+        except KeyboardInterrupt:
+            logger.info("Watch mode stopped by user")
+            return 0
+        except Exception as e:
+            logger.error(f"Watch mode failed: {e}")
+            return 1
+
+    # Initialize client with context manager for cleanup
+    try:
+        with RemoteUploadClient(
+            upload_endpoint=config["upload_endpoint"],
+            workspace_path=config["workspace_path"],
+            collection_name=config["collection_name"],
+            max_retries=config["max_retries"],
+            timeout=config["timeout"]
+        ) as client:
+
+            logger.info("Remote upload client initialized successfully")
+
+            client.log_mapping_summary()
+
+            # Test server connection
+            logger.info("Checking server status...")
+            status = client.get_server_status()
+            # For delta endpoint, success is indicated by having expected fields (not a "success" boolean)
+            is_success = (
+                isinstance(status, dict) and
+                'workspace_path' in status and
+                'collection_name' in status and
+                status.get('status') == 'ready'
+            )
+            if not is_success:
+                error = status.get("error", {})
+                logger.error(f"Cannot connect to server: {error.get('message', 'Unknown error')}")
+                return 1
+
+            logger.info("Server connection successful")
+
+            # Scan repository and upload files
+            logger.info("Scanning repository for files...")
+            workspace_path = Path(config['workspace_path'])
+
+            # Find all files in the repository
+            all_files = []
+            for file_path in workspace_path.rglob('*'):
+                if file_path.is_file() and not file_path.name.startswith('.'):
+                    rel_path = file_path.relative_to(workspace_path)
+                    # Skip .codebase directory and other metadata
+                    if not str(rel_path).startswith('.codebase'):
+                        all_files.append(file_path)
+
+            logger.info(f"Found {len(all_files)} files to upload")
+
+            if not all_files:
+                logger.warning("No files found to upload")
+                return 0
+
+            # Detect changes (treat all files as changes for initial upload)
+            if args.force:
+                # Force mode: treat all files as created
+                changes = {"created": all_files, "updated": [], "deleted": [], "moved": [], "unchanged": []}
+            else:
+                changes = client.detect_file_changes(all_files)
+
+            if not client.has_meaningful_changes(changes):
+                logger.info("No meaningful changes to upload")
+                return 0
+
+            logger.info(f"Changes detected: {len(changes.get('created', []))} created, {len(changes.get('updated', []))} updated, {len(changes.get('deleted', []))} deleted")
+
+            # Process and upload changes
+            logger.info("Uploading files to remote server...")
+            success = client.process_changes_and_upload(changes)
+
+            if success:
+                logger.info("Repository upload completed successfully!")
+                logger.info(f"Collection name: {config['collection_name']}")
+                logger.info(f"Files uploaded: {len(all_files)}")
+            else:
+                logger.error("Repository upload failed!")
+                return 1
+
+            return 0
+
+    except Exception as e:
+        logger.error(f"Failed to initialize remote upload client: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
diff --git a/scripts/standalone_upload_client.py b/scripts/standalone_upload_client.py
new file mode 100644
index 00000000..76a42432
--- /dev/null
+++ b/scripts/standalone_upload_client.py
@@ -0,0 +1,1259 @@
+#!/usr/bin/env python3
+"""
+Standalone Remote Upload Client for Context-Engine.
+
+This is a self-contained version of the remote upload client that doesn't require
+the full Context-Engine repository. It includes only the essential functions
+needed for delta bundle creation and upload.
+
+Example usage:
+    python3 standalone_upload_client.py --path /path/to/your/project --server https://your-server.com
+"""
+
+import os
+import json
+import time
+import uuid
+import hashlib
+import tarfile
+import tempfile
+import logging
+import argparse
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Tuple
+from datetime import datetime
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# =============================================================================
+# EMBEDDED DEPENDENCIES (Extracted from Context-Engine)
+# =============================================================================
+
+# Language detection mapping (from ingest_code.py)
+CODE_EXTS = {
+    ".py": "python",
+    ".js": "javascript",
+    ".ts": "typescript",
+    ".tsx": "typescript",
+    ".jsx": "javascript",
+    ".java": "java",
+    ".go": "go",
+    ".rs": "rust",
+    ".rb": "ruby",
+    ".php": "php",
+    ".c": "c",
+    ".h": "c",
+    ".cpp": "cpp",
+    ".cc": "cpp",
+    ".hpp": "cpp",
+    ".cs": "csharp",
+    ".kt": "kotlin",
+    ".swift": "swift",
+    ".scala": "scala",
+    ".sh": "shell",
+    ".ps1": "powershell",
+    ".psm1": "powershell",
+    ".psd1": "powershell",
+    ".sql": "sql",
+    ".md": "markdown",
+    ".yml": "yaml",
+    ".yaml": "yaml",
+    ".toml": "toml",
+    ".ini": "ini",
+    ".cfg": "ini",
+    ".conf": "ini",
+    ".xml": "xml",
+    ".html": "html",
+    ".htm": "html",
+    ".css": "css",
+    ".scss": "scss",
+    ".sass": "sass",
+    ".less": "less",
+    ".json": "json",
+    "Dockerfile": "dockerfile",
+    "Makefile": "makefile",
+    ".tf": "terraform",
+    ".tfvars": "terraform",
+    ".hcl": "terraform",
+    ".vue": "vue",
+    ".svelte": "svelte",
+    ".elm": "elm",
+    ".dart": "dart",
+    ".lua": "lua",
+    ".r": "r",
+    ".R": "r",
+    ".m": "matlab",
+    ".pl": "perl",
+    ".swift": "swift",
+    ".kt": "kotlin",
+    ".cljs": "clojure",
+    ".clj": "clojure",
+    ".hs": "haskell",
+    ".ml": "ocaml",
+    ".zig": "zig",
+    ".nim": "nim",
+    ".v": "verilog",
+    ".sv": "verilog",
+    ".vhdl": "vhdl",
+    ".asm": "assembly",
+    ".s": "assembly",
+    ". Dockerfile": "dockerfile",
+}
+
+def hash_id(text: str, path: str, start: int, end: int) -> str:
+    """Generate hash ID for content (from ingest_code.py)."""
+    h = hashlib.sha1(
+        f"{path}:{start}-{end}\n{text}".encode("utf-8", errors="ignore")
+    ).hexdigest()
+    return h[:16]
+
+def get_collection_name(repo_name: Optional[str] = None) -> str:
+    """Generate collection name with 8-char hash for local workspaces.
+
+    Simplified version from workspace_state.py.
+    """
+    if not repo_name:
+        return "default-collection"
+    hash_obj = hashlib.sha256(repo_name.encode())
+    short_hash = hash_obj.hexdigest()[:8]
+    return f"{repo_name}-{short_hash}"
+
+def _extract_repo_name_from_path(workspace_path: str) -> str:
+    """Extract repository name from workspace path.
+
+    Simplified version from workspace_state.py.
+    """
+    try:
+        path = Path(workspace_path).resolve()
+        # Get the directory name as repo name
+        return path.name
+    except Exception:
+        return "unknown-repo"
+
+# Simple file-based hash cache (simplified from workspace_state.py)
+class SimpleHashCache:
+    """Simple file-based hash cache for tracking file changes."""
+
+    def __init__(self, workspace_path: str, repo_name: str):
+        self.workspace_path = Path(workspace_path).resolve()
+        self.repo_name = repo_name
+        self.cache_dir = self.workspace_path / ".context-engine"
+        self.cache_file = self.cache_dir / "file_cache.json"
+        self.cache_dir.mkdir(exist_ok=True)
+
+    def _load_cache(self) -> Dict[str, str]:
+        """Load cache from disk."""
+        if not self.cache_file.exists():
+            return {}
+        try:
+            with open(self.cache_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                return data.get("file_hashes", {})
+        except Exception:
+            return {}
+
+    def _save_cache(self, file_hashes: Dict[str, str]):
+        """Save cache to disk."""
+        try:
+            data = {
+                "file_hashes": file_hashes,
+                "updated_at": datetime.now().isoformat()
+            }
+            with open(self.cache_file, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2)
+        except Exception:
+            pass
+
+    def get_hash(self, file_path: str) -> str:
+        """Get cached file hash."""
+        file_hashes = self._load_cache()
+        abs_path = str(Path(file_path).resolve())
+        return file_hashes.get(abs_path, "")
+
+    def set_hash(self, file_path: str, file_hash: str):
+        """Set cached file hash."""
+        file_hashes = self._load_cache()
+        abs_path = str(Path(file_path).resolve())
+        file_hashes[abs_path] = file_hash
+        self._save_cache(file_hashes)
+
+# Create global cache instance (will be initialized in RemoteUploadClient)
+_hash_cache: Optional[SimpleHashCache] = None
+
+def get_cached_file_hash(file_path: str, repo_name: Optional[str] = None) -> str:
+    """Get cached file hash for tracking changes."""
+    global _hash_cache
+    if _hash_cache:
+        return _hash_cache.get_hash(file_path)
+    return ""
+
+def set_cached_file_hash(file_path: str, file_hash: str, repo_name: Optional[str] = None):
+    """Set cached file hash for tracking changes."""
+    global _hash_cache
+    if _hash_cache:
+        _hash_cache.set_hash(file_path, file_hash)
+
+
+class RemoteUploadClient:
+    """Client for uploading delta bundles to remote server."""
+
+    def _translate_to_container_path(self, host_path: str) -> str:
+        """Translate host path to container path for API communication."""
+        # Use environment variable for path mapping if available
+        host_root = os.environ.get("HOST_ROOT", "/home/coder/project/Context-Engine/dev-workspace")
+        container_root = os.environ.get("CONTAINER_ROOT", "/work")
+
+        if host_path.startswith(host_root):
+            return host_path.replace(host_root, container_root)
+        else:
+            # Fallback: if path doesn't match expected pattern, use as-is
+            return host_path
+
+    def __init__(self, upload_endpoint: str, workspace_path: str, collection_name: str,
+                 max_retries: int = 3, timeout: int = 30, metadata_path: Optional[str] = None):
+        """Initialize remote upload client."""
+        self.upload_endpoint = upload_endpoint.rstrip('/')
+        self.workspace_path = workspace_path
+        self.collection_name = collection_name
+        self.max_retries = max_retries
+        self.timeout = timeout
+        self.temp_dir = None
+
+        # Set environment variables for cache functions
+        os.environ["WORKSPACE_PATH"] = workspace_path
+
+        # Store repo name and initialize hash cache
+        self.repo_name = _extract_repo_name_from_path(workspace_path)
+        # Fallback to directory name if repo detection fails (for non-git repos)
+        if not self.repo_name:
+            self.repo_name = Path(workspace_path).name
+        global _hash_cache
+        _hash_cache = SimpleHashCache(workspace_path, self.repo_name)
+
+        # Setup HTTP session with simple retry
+        self.session = requests.Session()
+        retry_strategy = Retry(total=max_retries, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
+        adapter = HTTPAdapter(max_retries=retry_strategy)
+        self.session.mount("http://", adapter)
+        self.session.mount("https://", adapter)
+
+    def __enter__(self):
+        """Context manager entry."""
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        """Context manager exit with cleanup."""
+        self.cleanup()
+
+    def cleanup(self):
+        """Clean up temporary directories."""
+        if self.temp_dir and os.path.exists(self.temp_dir):
+            try:
+                import shutil
+                shutil.rmtree(self.temp_dir)
+                logger.debug(f"[remote_upload] Cleaned up temporary directory: {self.temp_dir}")
+            except Exception as e:
+                logger.warning(f"[remote_upload] Failed to cleanup temp directory {self.temp_dir}: {e}")
+            finally:
+                self.temp_dir = None
+
+    def get_mapping_summary(self) -> Dict[str, Any]:
+        """Return derived collection mapping details."""
+        container_path = self._translate_to_container_path(self.workspace_path)
+        return {
+            "repo_name": self.repo_name,
+            "collection_name": self.collection_name,
+            "source_path": self.workspace_path,
+            "container_path": container_path,
+            "upload_endpoint": self.upload_endpoint,
+        }
+
+    def log_mapping_summary(self) -> None:
+        """Log mapping summary for user visibility."""
+        info = self.get_mapping_summary()
+        logger.info("[remote_upload] Collection mapping:")
+        logger.info(f"  repo_name: {info['repo_name']}")
+        logger.info(f"  collection_name: {info['collection_name']}")
+        logger.info(f"  source_path: {info['source_path']}")
+        logger.info(f"  container_path: {info['container_path']}")
+
+    def _get_temp_bundle_dir(self) -> Path:
+        """Get or create temporary directory for bundle creation."""
+        if not self.temp_dir:
+            self.temp_dir = tempfile.mkdtemp(prefix="delta_bundle_")
+        return Path(self.temp_dir)
+    # CLI is stateless - sequence tracking is handled by server
+
+    def detect_file_changes(self, changed_paths: List[Path]) -> Dict[str, List]:
+        """
+        Detect what type of changes occurred for each file path.
+
+        Args:
+            changed_paths: List of changed file paths
+
+        Returns:
+            Dictionary with change types: created, updated, deleted, moved, unchanged
+        """
+        changes = {
+            "created": [],
+            "updated": [],
+            "deleted": [],
+            "moved": [],
+            "unchanged": []
+        }
+
+        for path in changed_paths:
+            abs_path = str(path.resolve())
+            cached_hash = get_cached_file_hash(abs_path, self.repo_name)
+
+            if not path.exists():
+                # File was deleted
+                if cached_hash:
+                    changes["deleted"].append(path)
+            else:
+                # File exists - calculate current hash
+                try:
+                    with open(path, 'rb') as f:
+                        content = f.read()
+                    current_hash = hashlib.sha1(content).hexdigest()
+
+                    if not cached_hash:
+                        # New file
+                        changes["created"].append(path)
+                    elif cached_hash != current_hash:
+                        # Modified file
+                        changes["updated"].append(path)
+                    else:
+                        # Unchanged (might be a move detection candidate)
+                        changes["unchanged"].append(path)
+
+                    # Update cache
+                    set_cached_file_hash(abs_path, current_hash, self.repo_name)
+                except Exception:
+                    # Skip files that can't be read
+                    continue
+
+        # Detect moves by looking for files with same content hash
+        # but different paths (requires additional tracking)
+        changes["moved"] = self._detect_moves(changes["created"], changes["deleted"])
+
+        return changes
+
+    def _detect_moves(self, created_files: List[Path], deleted_files: List[Path]) -> List[Tuple[Path, Path]]:
+        """
+        Detect file moves by matching content hashes between created and deleted files.
+
+        Args:
+            created_files: List of newly created files
+            deleted_files: List of deleted files
+
+        Returns:
+            List of (source, destination) path tuples for detected moves
+        """
+        moves = []
+        deleted_hashes = {}
+
+        # Build hash map for deleted files
+        for deleted_path in deleted_files:
+            try:
+                # Try to get cached hash first, fallback to file content
+                cached_hash = get_cached_file_hash(str(deleted_path), self.repo_name)
+                if cached_hash:
+                    deleted_hashes[cached_hash] = deleted_path
+                    continue
+
+                # If no cached hash, try to read from file if it still exists
+                if deleted_path.exists():
+                    with open(deleted_path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    deleted_hashes[file_hash] = deleted_path
+            except Exception:
+                continue
+
+        # Match created files with deleted files by hash
+        for created_path in created_files:
+            try:
+                with open(created_path, 'rb') as f:
+                    content = f.read()
+                file_hash = hashlib.sha1(content).hexdigest()
+
+                if file_hash in deleted_hashes:
+                    source_path = deleted_hashes[file_hash]
+                    moves.append((source_path, created_path))
+                    # Remove from consideration
+                    del deleted_hashes[file_hash]
+            except Exception:
+                continue
+
+        return moves
+
+    def create_delta_bundle(self, changes: Dict[str, List]) -> Tuple[str, Dict[str, Any]]:
+        """
+        Create a delta bundle from detected changes.
+
+        Args:
+            changes: Dictionary of file changes by type
+
+        Returns:
+            Tuple of (bundle_path, manifest_metadata)
+        """
+        bundle_id = str(uuid.uuid4())
+        # CLI is stateless - server handles sequence numbers
+        created_at = datetime.now().isoformat()
+
+        # Create temporary directory for bundle
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+
+            # Create directory structure
+            files_dir = temp_path / "files"
+            metadata_dir = temp_path / "metadata"
+            files_dir.mkdir()
+            metadata_dir.mkdir()
+
+            # Create subdirectories
+            (files_dir / "created").mkdir()
+            (files_dir / "updated").mkdir()
+            (files_dir / "moved").mkdir()
+
+            operations = []
+            total_size = 0
+            file_hashes = {}
+
+            # Process created files
+            for path in changes["created"]:
+                rel_path = str(path.relative_to(Path(self.workspace_path)))
+                try:
+                    with open(path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    content_hash = f"sha1:{file_hash}"
+
+                    # Write file to bundle
+                    bundle_file_path = files_dir / "created" / rel_path
+                    bundle_file_path.parent.mkdir(parents=True, exist_ok=True)
+                    bundle_file_path.write_bytes(content)
+
+                    # Get file info
+                    stat = path.stat()
+                    language = CODE_EXTS.get(path.suffix.lower(), "unknown")
+
+                    operation = {
+                        "operation": "created",
+                        "path": rel_path,
+                        "relative_path": rel_path,
+                        "absolute_path": str(path.resolve()),
+                        "size_bytes": stat.st_size,
+                        "content_hash": content_hash,
+                        "file_hash": f"sha1:{hash_id(content.decode('utf-8', errors='ignore'), rel_path, 1, len(content.splitlines()))}",
+                        "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                        "language": language
+                    }
+                    operations.append(operation)
+                    file_hashes[rel_path] = f"sha1:{file_hash}"
+                    total_size += stat.st_size
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing created file {path}: {e}")
+                    continue
+
+            # Process updated files
+            for path in changes["updated"]:
+                rel_path = str(path.relative_to(Path(self.workspace_path)))
+                try:
+                    with open(path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    content_hash = f"sha1:{file_hash}"
+                    previous_hash = get_cached_file_hash(str(path.resolve()), self.repo_name)
+
+                    # Write file to bundle
+                    bundle_file_path = files_dir / "updated" / rel_path
+                    bundle_file_path.parent.mkdir(parents=True, exist_ok=True)
+                    bundle_file_path.write_bytes(content)
+
+                    # Get file info
+                    stat = path.stat()
+                    language = CODE_EXTS.get(path.suffix.lower(), "unknown")
+
+                    operation = {
+                        "operation": "updated",
+                        "path": rel_path,
+                        "relative_path": rel_path,
+                        "absolute_path": str(path.resolve()),
+                        "size_bytes": stat.st_size,
+                        "content_hash": content_hash,
+                        "previous_hash": f"sha1:{previous_hash}" if previous_hash else None,
+                        "file_hash": f"sha1:{hash_id(content.decode('utf-8', errors='ignore'), rel_path, 1, len(content.splitlines()))}",
+                        "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                        "language": language
+                    }
+                    operations.append(operation)
+                    file_hashes[rel_path] = f"sha1:{file_hash}"
+                    total_size += stat.st_size
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing updated file {path}: {e}")
+                    continue
+
+            # Process moved files
+            for source_path, dest_path in changes["moved"]:
+                dest_rel_path = str(dest_path.relative_to(Path(self.workspace_path)))
+                source_rel_path = str(source_path.relative_to(Path(self.workspace_path)))
+                try:
+                    with open(dest_path, 'rb') as f:
+                        content = f.read()
+                    file_hash = hashlib.sha1(content).hexdigest()
+                    content_hash = f"sha1:{file_hash}"
+
+                    # Write file to bundle
+                    bundle_file_path = files_dir / "moved" / dest_rel_path
+                    bundle_file_path.parent.mkdir(parents=True, exist_ok=True)
+                    bundle_file_path.write_bytes(content)
+
+                    # Get file info
+                    stat = dest_path.stat()
+                    language = CODE_EXTS.get(dest_path.suffix.lower(), "unknown")
+
+                    operation = {
+                        "operation": "moved",
+                        "path": dest_rel_path,
+                        "relative_path": dest_rel_path,
+                        "absolute_path": str(dest_path.resolve()),
+                        "source_path": source_rel_path,
+                        "source_relative_path": source_rel_path,
+                        "source_absolute_path": str(source_path.resolve()),
+                        "size_bytes": stat.st_size,
+                        "content_hash": content_hash,
+                        "file_hash": f"sha1:{idx.hash_id(content.decode('utf-8', errors='ignore'), dest_rel_path, 1, len(content.splitlines()))}",
+                        "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
+                        "language": language
+                    }
+                    operations.append(operation)
+                    file_hashes[dest_rel_path] = f"sha1:{file_hash}"
+                    total_size += stat.st_size
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing moved file {source_path} -> {dest_path}: {e}")
+                    continue
+
+            # Process deleted files
+            for path in changes["deleted"]:
+                rel_path = str(path.relative_to(Path(self.workspace_path)))
+                try:
+                    previous_hash = get_cached_file_hash(str(path.resolve()), self.repo_name)
+
+                    operation = {
+                        "operation": "deleted",
+                        "path": rel_path,
+                        "relative_path": rel_path,
+                        "absolute_path": str(path.resolve()),
+                        "previous_hash": f"sha1:{previous_hash}" if previous_hash else None,
+                        "file_hash": None,
+                        "modified_time": datetime.now().isoformat(),
+                        "language": idx.CODE_EXTS.get(path.suffix.lower(), "unknown")
+                    }
+                    operations.append(operation)
+
+                except Exception as e:
+                    print(f"[bundle_create] Error processing deleted file {path}: {e}")
+                    continue
+
+            # Create manifest
+            manifest = {
+                "version": "1.0",
+                "bundle_id": bundle_id,
+                "workspace_path": self.workspace_path,
+                "collection_name": self.collection_name,
+                "created_at": created_at,
+                # CLI is stateless - server will assign sequence numbers
+                "sequence_number": None,  # Server will assign
+                "parent_sequence": None,   # Server will determine
+                "operations": {
+                    "created": len(changes["created"]),
+                    "updated": len(changes["updated"]),
+                    "deleted": len(changes["deleted"]),
+                    "moved": len(changes["moved"])
+                },
+                "total_files": len(operations),
+                "total_size_bytes": total_size,
+                "compression": "gzip",
+                "encoding": "utf-8"
+            }
+
+            # Write manifest
+            (temp_path / "manifest.json").write_text(json.dumps(manifest, indent=2))
+
+            # Write operations metadata
+            operations_metadata = {
+                "operations": operations
+            }
+            (metadata_dir / "operations.json").write_text(json.dumps(operations_metadata, indent=2))
+
+            # Write hashes
+            hashes_metadata = {
+                "workspace_path": self.workspace_path,
+                "updated_at": created_at,
+                "file_hashes": file_hashes
+            }
+            (metadata_dir / "hashes.json").write_text(json.dumps(hashes_metadata, indent=2))
+
+            # Create tarball in temporary directory
+            temp_bundle_dir = self._get_temp_bundle_dir()
+            bundle_path = temp_bundle_dir / f"{bundle_id}.tar.gz"
+            with tarfile.open(bundle_path, "w:gz") as tar:
+                tar.add(temp_path, arcname=f"{bundle_id}")
+
+            return str(bundle_path), manifest
+
+    def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Upload delta bundle to remote server with exponential backoff retry.
+
+        Args:
+            bundle_path: Path to the bundle tarball
+            manifest: Bundle manifest metadata
+
+        Returns:
+            Server response dictionary
+        """
+        last_error = None
+
+        for attempt in range(self.max_retries + 1):
+            try:
+                # Simple exponential backoff
+                if attempt > 0:
+                    delay = min(2 ** (attempt - 1), 30)  # 1, 2, 4, 8... capped at 30s
+                    logger.info(f"[remote_upload] Retry attempt {attempt + 1}/{self.max_retries + 1} after {delay}s delay")
+                    time.sleep(delay)
+
+                # Verify bundle exists
+                if not os.path.exists(bundle_path):
+                    return {"success": False, "error": {"code": "BUNDLE_NOT_FOUND", "message": f"Bundle not found: {bundle_path}"}}
+
+                # Check bundle size (100MB limit)
+                bundle_size = os.path.getsize(bundle_path)
+                if bundle_size > 100 * 1024 * 1024:
+                    return {"success": False, "error": {"code": "BUNDLE_TOO_LARGE", "message": f"Bundle too large: {bundle_size} bytes"}}
+
+                with open(bundle_path, 'rb') as bundle_file:
+                    files = {
+                        'bundle': (f"{manifest['bundle_id']}.tar.gz", bundle_file, 'application/gzip')
+                    }
+
+                    data = {
+                        'workspace_path': self._translate_to_container_path(self.workspace_path),
+                        'collection_name': self.collection_name,
+                        # CLI is stateless - server handles sequence numbers
+                        'force': 'false',
+                        'source_path': self.workspace_path,
+                    }
+
+                    logger.info(f"[remote_upload] Uploading bundle {manifest['bundle_id']} (size: {bundle_size} bytes)")
+
+                    response = self.session.post(
+                        f"{self.upload_endpoint}/api/v1/delta/upload",
+                        files=files,
+                        data=data,
+                        timeout=self.timeout
+                    )
+
+                    if response.status_code == 200:
+                        result = response.json()
+                        logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+                        return result
+                    # Handle error
+                    error_msg = f"Upload failed with status {response.status_code}"
+                    try:
+                        error_detail = response.json()
+                        error_detail_msg = error_detail.get('error', {}).get('message', 'Unknown error')
+                        error_msg += f": {error_detail_msg}"
+                        error_code = error_detail.get('error', {}).get('code', 'HTTP_ERROR')
+                    except:
+                        error_msg += f": {response.text[:200]}"
+                        error_code = "HTTP_ERROR"
+
+                    last_error = {"success": False, "error": {"code": error_code, "message": error_msg, "status_code": response.status_code}}
+
+                    # Don't retry on client errors (except 429)
+                    if 400 <= response.status_code < 500 and response.status_code != 429:
+                        return last_error
+
+                    logger.warning(f"[remote_upload] Upload attempt {attempt + 1} failed: {error_msg}")
+
+            except requests.exceptions.Timeout as e:
+                last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
+                logger.warning(f"[remote_upload] Upload timeout on attempt {attempt + 1}: {e}")
+
+            except requests.exceptions.ConnectionError as e:
+                last_error = {"success": False, "error": {"code": "CONNECTION_ERROR", "message": f"Connection error: {str(e)}"}}
+                logger.warning(f"[remote_upload] Connection error on attempt {attempt + 1}: {e}")
+
+            except requests.exceptions.RequestException as e:
+                last_error = {"success": False, "error": {"code": "NETWORK_ERROR", "message": f"Network error: {str(e)}"}}
+                logger.warning(f"[remote_upload] Network error on attempt {attempt + 1}: {e}")
+
+            except Exception as e:
+                last_error = {"success": False, "error": {"code": "UPLOAD_ERROR", "message": f"Upload error: {str(e)}"}}
+                logger.error(f"[remote_upload] Unexpected error on attempt {attempt + 1}: {e}")
+
+        # All retries exhausted
+        logger.error(f"[remote_upload] All {self.max_retries + 1} upload attempts failed for bundle {manifest.get('bundle_id', 'unknown')}")
+        return last_error or {
+            "success": False,
+            "error": {
+                "code": "MAX_RETRIES_EXCEEDED",
+                "message": f"Upload failed after {self.max_retries + 1} attempts"
+            }
+        }
+
+    def get_server_status(self) -> Dict[str, Any]:
+        """Get server status with simplified error handling."""
+        try:
+            container_workspace_path = self._translate_to_container_path(self.workspace_path)
+
+            response = self.session.get(
+                f"{self.upload_endpoint}/api/v1/delta/status",
+                params={'workspace_path': container_workspace_path},
+                timeout=min(self.timeout, 10)
+            )
+
+            if response.status_code == 200:
+                return response.json()
+
+            # Handle error response
+            error_msg = f"Status check failed with HTTP {response.status_code}"
+            try:
+                error_detail = response.json()
+                error_msg += f": {error_detail.get('error', {}).get('message', 'Unknown error')}"
+            except:
+                error_msg += f": {response.text[:100]}"
+
+            return {"success": False, "error": {"code": "STATUS_ERROR", "message": error_msg}}
+
+        except requests.exceptions.Timeout:
+            return {"success": False, "error": {"code": "STATUS_TIMEOUT", "message": "Status check timeout"}}
+        except requests.exceptions.ConnectionError:
+            return {"success": False, "error": {"code": "CONNECTION_ERROR", "message": f"Cannot connect to server"}}
+        except Exception as e:
+            return {"success": False, "error": {"code": "STATUS_CHECK_ERROR", "message": f"Status check error: {str(e)}"}}
+
+    def has_meaningful_changes(self, changes: Dict[str, List]) -> bool:
+        """Check if changes warrant a delta upload."""
+        total_changes = sum(len(files) for op, files in changes.items() if op != "unchanged")
+        return total_changes > 0
+
+    def process_changes_and_upload(self, changes: Dict[str, List]) -> bool:
+        """
+        Process pre-computed changes and upload delta bundle.
+        Includes comprehensive error handling and graceful fallback.
+
+        Args:
+            changes: Dictionary of file changes by type
+
+        Returns:
+            True if upload was successful, False otherwise
+        """
+        try:
+            logger.info(f"[remote_upload] Processing pre-computed changes")
+
+            # Validate input
+            if not changes:
+                logger.info("[remote_upload] No changes provided")
+                return True
+
+            if not self.has_meaningful_changes(changes):
+                logger.info("[remote_upload] No meaningful changes detected, skipping upload")
+                return True
+
+            # Log change summary
+            total_changes = sum(len(files) for op, files in changes.items() if op != "unchanged")
+            logger.info(f"[remote_upload] Detected {total_changes} meaningful changes: "
+                       f"{len(changes['created'])} created, {len(changes['updated'])} updated, "
+                       f"{len(changes['deleted'])} deleted, {len(changes['moved'])} moved")
+
+            # Create delta bundle
+            bundle_path = None
+            try:
+                bundle_path, manifest = self.create_delta_bundle(changes)
+                logger.info(f"[remote_upload] Created delta bundle: {manifest['bundle_id']} "
+                           f"(size: {manifest['total_size_bytes']} bytes)")
+
+                # Validate bundle was created successfully
+                if not bundle_path or not os.path.exists(bundle_path):
+                    raise RuntimeError(f"Failed to create bundle at {bundle_path}")
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Error creating delta bundle: {e}")
+                # Clean up any temporary files on failure
+                self.cleanup()
+                return False
+
+            # Upload bundle with retry logic
+            try:
+                response = self.upload_bundle(bundle_path, manifest)
+
+                if response.get("success", False):
+                    processed_ops = response.get('processed_operations', {})
+                    logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+                    logger.info(f"[remote_upload] Processed operations: {processed_ops}")
+
+                    # Clean up temporary bundle after successful upload
+                    try:
+                        if os.path.exists(bundle_path):
+                            os.remove(bundle_path)
+                            logger.debug(f"[remote_upload] Cleaned up temporary bundle: {bundle_path}")
+                        # Also clean up the entire temp directory if this is the last bundle
+                        self.cleanup()
+                    except Exception as cleanup_error:
+                        logger.warning(f"[remote_upload] Failed to cleanup bundle {bundle_path}: {cleanup_error}")
+
+                    return True
+                else:
+                    error_msg = response.get('error', {}).get('message', 'Unknown upload error')
+                    logger.error(f"[remote_upload] Upload failed: {error_msg}")
+                    return False
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Error uploading bundle: {e}")
+                return False
+
+        except Exception as e:
+            logger.error(f"[remote_upload] Unexpected error in process_changes_and_upload: {e}")
+            return False
+
+    def watch_loop(self, interval: int = 5):
+        """Main file watching loop using existing detection and upload methods."""
+        logger.info(f"[watch] Starting file monitoring (interval: {interval}s)")
+        logger.info(f"[watch] Monitoring: {self.workspace_path}")
+        logger.info(f"[watch] Press Ctrl+C to stop")
+
+        try:
+            while True:
+                try:
+                    # Use existing change detection (get all files in workspace)
+                    all_files = self.get_all_code_files()
+                    changes = self.detect_file_changes(all_files)
+
+                    # Count only meaningful changes (exclude unchanged)
+                    meaningful_changes = len(changes.get("created", [])) + len(changes.get("updated", [])) + len(changes.get("deleted", [])) + len(changes.get("moved", []))
+
+                    if meaningful_changes > 0:
+                        logger.info(f"[watch] Detected {meaningful_changes} changes: { {k: len(v) for k, v in changes.items() if k != 'unchanged'} }")
+
+                        # Use existing upload method
+                        success = self.process_changes_and_upload(changes)
+
+                        if success:
+                            logger.info(f"[watch] Successfully uploaded changes")
+                        else:
+                            logger.error(f"[watch] Failed to upload changes")
+                    else:
+                        logger.debug(f"[watch] No changes detected")  # Debug level to avoid spam
+
+                    # Sleep until next check
+                    time.sleep(interval)
+
+                except KeyboardInterrupt:
+                    logger.info(f"[watch] Received interrupt signal, stopping...")
+                    break
+                except Exception as e:
+                    logger.error(f"[watch] Error in watch loop: {e}")
+                    time.sleep(interval)  # Continue even after errors
+
+        except KeyboardInterrupt:
+            logger.info(f"[watch] File monitoring stopped by user")
+
+    def get_all_code_files(self) -> List[Path]:
+        """Get all code files in the workspace."""
+        all_files = []
+        try:
+            workspace_path = Path(self.workspace_path)
+            for ext in CODE_EXTS:
+                all_files.extend(workspace_path.rglob(f"*{ext}"))
+
+            # Filter out directories and hidden files
+            all_files = [
+                f for f in all_files
+                if f.is_file()
+                and not any(part.startswith('.') for part in f.parts)
+                and '.context-engine' not in str(f)
+            ]
+        except Exception as e:
+            logger.error(f"[watch] Error scanning files: {e}")
+
+        return all_files
+
+    def process_and_upload_changes(self, changed_paths: List[Path]) -> bool:
+        """
+        Process changed paths and upload delta bundle if meaningful changes exist.
+        Includes comprehensive error handling and graceful fallback.
+
+        Args:
+            changed_paths: List of changed file paths
+
+        Returns:
+            True if upload was successful, False otherwise
+        """
+        try:
+            logger.info(f"[remote_upload] Processing {len(changed_paths)} changed paths")
+
+            # Validate input
+            if not changed_paths:
+                logger.info("[remote_upload] No changed paths provided")
+                return True
+
+            # Detect changes
+            try:
+                changes = self.detect_file_changes(changed_paths)
+            except Exception as e:
+                logger.error(f"[remote_upload] Error detecting file changes: {e}")
+                return False
+
+            if not self.has_meaningful_changes(changes):
+                logger.info("[remote_upload] No meaningful changes detected, skipping upload")
+                return True
+
+            # Log change summary
+            total_changes = sum(len(files) for op, files in changes.items() if op != "unchanged")
+            logger.info(f"[remote_upload] Detected {total_changes} meaningful changes: "
+                       f"{len(changes['created'])} created, {len(changes['updated'])} updated, "
+                       f"{len(changes['deleted'])} deleted, {len(changes['moved'])} moved")
+
+            # Create delta bundle
+            bundle_path = None
+            try:
+                bundle_path, manifest = self.create_delta_bundle(changes)
+                logger.info(f"[remote_upload] Created delta bundle: {manifest['bundle_id']} "
+                           f"(size: {manifest['total_size_bytes']} bytes)")
+
+                # Validate bundle was created successfully
+                if not bundle_path or not os.path.exists(bundle_path):
+                    raise RuntimeError(f"Failed to create bundle at {bundle_path}")
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Error creating delta bundle: {e}")
+                # Clean up any temporary files on failure
+                self.cleanup()
+                return False
+
+            # Upload bundle with retry logic
+            try:
+                response = self.upload_bundle(bundle_path, manifest)
+
+                if response.get("success", False):
+                    processed_ops = response.get('processed_operations', {})
+                    logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+                    logger.info(f"[remote_upload] Processed operations: {processed_ops}")
+
+                    # Clean up temporary bundle after successful upload
+                    try:
+                        if os.path.exists(bundle_path):
+                            os.remove(bundle_path)
+                            logger.debug(f"[remote_upload] Cleaned up temporary bundle: {bundle_path}")
+                        # Also clean up the entire temp directory if this is the last bundle
+                        self.cleanup()
+                    except Exception as cleanup_error:
+                        logger.warning(f"[remote_upload] Failed to cleanup bundle {bundle_path}: {cleanup_error}")
+
+                    return True
+                else:
+                    error = response.get("error", {})
+                    error_code = error.get("code", "UNKNOWN")
+                    error_msg = error.get("message", "Unknown error")
+
+                    logger.error(f"[remote_upload] Upload failed: {error_msg}")
+
+                    # Handle specific error types
+                    # CLI is stateless - server handles sequence management
+                    if error_code in ["BUNDLE_TOO_LARGE", "BUNDLE_NOT_FOUND"]:
+                        # These are unrecoverable errors
+                        logger.error(f"[remote_upload] Unrecoverable error ({error_code}): {error_msg}")
+                        return False
+                    elif error_code in ["TIMEOUT_ERROR", "CONNECTION_ERROR", "NETWORK_ERROR"]:
+                        # These might be temporary, suggest fallback
+                        logger.warning(f"[remote_upload] Network-related error ({error_code}): {error_msg}")
+                        logger.warning("[remote_upload] Consider falling back to local mode if this persists")
+                        return False
+                    else:
+                        # Other errors
+                        logger.error(f"[remote_upload] Upload error ({error_code}): {error_msg}")
+                        return False
+
+            except Exception as e:
+                logger.error(f"[remote_upload] Unexpected error during upload: {e}")
+                return False
+
+        except Exception as e:
+            logger.error(f"[remote_upload] Critical error in process_and_upload_changes: {e}")
+            logger.exception("[remote_upload] Full traceback:")
+            return False
+
+def get_remote_config(cli_path: Optional[str] = None) -> Dict[str, str]:
+    """Get remote upload configuration from environment variables and command-line arguments."""
+    # Use command-line path if provided, otherwise fall back to environment variables
+    if cli_path:
+        workspace_path = cli_path
+    else:
+        workspace_path = os.environ.get("WATCH_ROOT", os.environ.get("WORKSPACE_PATH", "/work"))
+
+    # Use auto-generated collection name based on repo name
+    repo_name = _extract_repo_name_from_path(workspace_path)
+    # Fallback to directory name if repo detection fails
+    if not repo_name:
+        repo_name = Path(workspace_path).name
+    collection_name = get_collection_name(repo_name)
+
+    return {
+        "upload_endpoint": os.environ.get("REMOTE_UPLOAD_ENDPOINT", "http://localhost:8080"),
+        "workspace_path": workspace_path,
+        "collection_name": collection_name,
+        "max_retries": int(os.environ.get("REMOTE_UPLOAD_MAX_RETRIES", "3")),
+        "timeout": int(os.environ.get("REMOTE_UPLOAD_TIMEOUT", "30"))
+    }
+
+
+def main():
+    """Main entry point for the remote upload client."""
+    parser = argparse.ArgumentParser(
+        description="Remote upload client for delta bundles in Context-Engine",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Upload from current directory or environment variables
+  python remote_upload_client.py
+
+  # Upload from specific directory
+  python remote_upload_client.py --path /path/to/repo
+
+  # Upload from specific directory with custom endpoint
+  python remote_upload_client.py --path /path/to/repo --endpoint http://remote-server:8080
+        """
+    )
+
+    parser.add_argument(
+        "--path",
+        type=str,
+        help="Path to the directory to upload (overrides WATCH_ROOT/WORKSPACE_PATH environment variables)"
+    )
+
+    parser.add_argument(
+        "--endpoint",
+        type=str,
+        help="Remote upload endpoint (overrides REMOTE_UPLOAD_ENDPOINT environment variable)"
+    )
+
+    parser.add_argument(
+        "--max-retries",
+        type=int,
+        help="Maximum number of upload retries (overrides REMOTE_UPLOAD_MAX_RETRIES environment variable)"
+    )
+
+    parser.add_argument(
+        "--timeout",
+        type=int,
+        help="Request timeout in seconds (overrides REMOTE_UPLOAD_TIMEOUT environment variable)"
+    )
+
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Force upload of all files (ignore cached state and treat all files as new)"
+    )
+
+    parser.add_argument(
+        "--show-mapping",
+        action="store_true",
+        help="Print collection↔workspace mapping information and exit"
+    )
+
+    parser.add_argument(
+        "--watch", "-w",
+        action="store_true",
+        help="Watch for file changes and upload automatically (continuous mode)"
+    )
+
+    parser.add_argument(
+        "--interval", "-i",
+        type=int,
+        default=5,
+        help="Watch interval in seconds (default: 5)"
+    )
+
+    args = parser.parse_args()
+
+    # Validate path if provided
+    if args.path:
+        if not os.path.exists(args.path):
+            logger.error(f"Path does not exist: {args.path}")
+            return 1
+
+        if not os.path.isdir(args.path):
+            logger.error(f"Path is not a directory: {args.path}")
+            return 1
+
+        args.path = os.path.abspath(args.path)
+        logger.info(f"Using specified path: {args.path}")
+
+    # Get configuration
+    config = get_remote_config(args.path)
+
+    # Override config with command-line arguments if provided
+    if args.endpoint:
+        config["upload_endpoint"] = args.endpoint
+    if args.max_retries is not None:
+        config["max_retries"] = args.max_retries
+    if args.timeout is not None:
+        config["timeout"] = args.timeout
+
+    logger.info(f"Workspace path: {config['workspace_path']}")
+    logger.info(f"Collection name: {config['collection_name']}")
+    logger.info(f"Upload endpoint: {config['upload_endpoint']}")
+
+    if args.show_mapping:
+        with RemoteUploadClient(
+            upload_endpoint=config["upload_endpoint"],
+            workspace_path=config["workspace_path"],
+            collection_name=config["collection_name"],
+            max_retries=config["max_retries"],
+            timeout=config["timeout"],
+        ) as client:
+            client.log_mapping_summary()
+        return 0
+
+    # Handle watch mode
+    if args.watch:
+        logger.info("Starting watch mode for continuous file monitoring")
+        try:
+            with RemoteUploadClient(
+                upload_endpoint=config["upload_endpoint"],
+                workspace_path=config["workspace_path"],
+                collection_name=config["collection_name"],
+                max_retries=config["max_retries"],
+                timeout=config["timeout"]
+            ) as client:
+
+                logger.info("Remote upload client initialized successfully")
+                client.log_mapping_summary()
+
+                # Test server connection first
+                logger.info("Checking server status...")
+                status = client.get_server_status()
+                is_success = (
+                    isinstance(status, dict) and
+                    'workspace_path' in status and
+                    'collection_name' in status and
+                    status.get('status') == 'ready'
+                )
+                if not is_success:
+                    error = status.get("error", {})
+                    logger.error(f"Cannot connect to server: {error.get('message', 'Unknown error')}")
+                    return 1
+
+                logger.info("Server connection successful")
+                logger.info(f"Starting file monitoring with {args.interval}s interval")
+
+                # Start the watch loop
+                client.watch_loop(interval=args.interval)
+
+            return 0
+
+        except KeyboardInterrupt:
+            logger.info("Watch mode stopped by user")
+            return 0
+        except Exception as e:
+            logger.error(f"Watch mode failed: {e}")
+            return 1
+
+    # Single upload mode (original logic)
+    # Initialize client with context manager for cleanup
+    try:
+        with RemoteUploadClient(
+            upload_endpoint=config["upload_endpoint"],
+            workspace_path=config["workspace_path"],
+            collection_name=config["collection_name"],
+            max_retries=config["max_retries"],
+            timeout=config["timeout"]
+        ) as client:
+
+            logger.info("Remote upload client initialized successfully")
+
+            client.log_mapping_summary()
+
+            # Test server connection
+            logger.info("Checking server status...")
+            status = client.get_server_status()
+            # For delta endpoint, success is indicated by having expected fields (not a "success" boolean)
+            is_success = (
+                isinstance(status, dict) and
+                'workspace_path' in status and
+                'collection_name' in status and
+                status.get('status') == 'ready'
+            )
+            if not is_success:
+                error = status.get("error", {})
+                logger.error(f"Cannot connect to server: {error.get('message', 'Unknown error')}")
+                return 1
+
+            logger.info("Server connection successful")
+
+            # Scan repository and upload files
+            logger.info("Scanning repository for files...")
+            workspace_path = Path(config['workspace_path'])
+
+            # Find all files in the repository
+            all_files = []
+            for file_path in workspace_path.rglob('*'):
+                if file_path.is_file() and not file_path.name.startswith('.'):
+                    rel_path = file_path.relative_to(workspace_path)
+                    # Skip .codebase directory and other metadata
+                    if not str(rel_path).startswith('.codebase'):
+                        all_files.append(file_path)
+
+            logger.info(f"Found {len(all_files)} files to upload")
+
+            if not all_files:
+                logger.warning("No files found to upload")
+                return 0
+
+            # Detect changes (treat all files as changes for initial upload)
+            if args.force:
+                # Force mode: treat all files as created
+                changes = {"created": all_files, "updated": [], "deleted": [], "moved": [], "unchanged": []}
+            else:
+                changes = client.detect_file_changes(all_files)
+
+            if not client.has_meaningful_changes(changes):
+                logger.info("No meaningful changes to upload")
+                return 0
+
+            logger.info(f"Changes detected: {len(changes.get('created', []))} created, {len(changes.get('updated', []))} updated, {len(changes.get('deleted', []))} deleted")
+
+            # Process and upload changes
+            logger.info("Uploading files to remote server...")
+            success = client.process_changes_and_upload(changes)
+
+            if success:
+                logger.info("Repository upload completed successfully!")
+                logger.info(f"Collection name: {config['collection_name']}")
+                logger.info(f"Files uploaded: {len(all_files)}")
+            else:
+                logger.error("Repository upload failed!")
+                return 1
+
+            return 0
+
+    except Exception as e:
+        logger.error(f"Failed to initialize remote upload client: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    import sys
+    sys.exit(main())
diff --git a/scripts/upload_service.py b/scripts/upload_service.py
new file mode 100644
index 00000000..0b5c1589
--- /dev/null
+++ b/scripts/upload_service.py
@@ -0,0 +1,526 @@
+#!/usr/bin/env python3
+"""
+HTTP Upload Service for Delta Bundles in Context-Engine.
+
+This FastAPI service receives delta bundles from remote upload clients,
+processes them, and integrates with the existing indexing pipeline.
+"""
+
+import os
+import json
+import tarfile
+import tempfile
+import hashlib
+import asyncio
+import logging
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+from datetime import datetime
+
+import uvicorn
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException, Request, status
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+
+# Import existing workspace state and indexing functions
+try:
+    from scripts.workspace_state import (
+        log_activity,
+        get_collection_name,
+        get_cached_file_hash,
+        set_cached_file_hash,
+        _extract_repo_name_from_path,
+        update_repo_origin,
+        get_collection_mappings,
+    )
+except ImportError:
+    # Fallback for testing without full environment
+    log_activity = None
+    get_collection_name = None
+    get_cached_file_hash = None
+    set_cached_file_hash = None
+    _extract_repo_name_from_path = None
+    update_repo_origin = None
+    get_collection_mappings = None
+
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+# Configuration from environment
+QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
+DEFAULT_COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+WORK_DIR = os.environ.get("WORK_DIR", "/work")
+MAX_BUNDLE_SIZE_MB = int(os.environ.get("MAX_BUNDLE_SIZE_MB", "100"))
+UPLOAD_TIMEOUT_SECS = int(os.environ.get("UPLOAD_TIMEOUT_SECS", "300"))
+
+# FastAPI app
+app = FastAPI(
+    title="Context-Engine Delta Upload Service",
+    description="HTTP service for receiving and processing delta bundles",
+    version="1.0.0"
+)
+
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# In-memory sequence tracking (in production, use persistent storage)
+_sequence_tracker: Dict[str, int] = {}
+
+class UploadResponse(BaseModel):
+    success: bool
+    bundle_id: Optional[str] = None
+    sequence_number: Optional[int] = None
+    processed_operations: Optional[Dict[str, int]] = None
+    processing_time_ms: Optional[int] = None
+    next_sequence: Optional[int] = None
+    error: Optional[Dict[str, Any]] = None
+
+class StatusResponse(BaseModel):
+    workspace_path: str
+    collection_name: str
+    last_sequence: int
+    last_upload: Optional[str] = None
+    pending_operations: int
+    status: str
+    server_info: Dict[str, Any]
+
+class HealthResponse(BaseModel):
+    status: str
+    timestamp: str
+    version: str
+    qdrant_url: str
+    work_dir: str
+
+def get_workspace_key(workspace_path: str) -> str:
+    """Generate 16-char hash for collision avoidance in remote uploads.
+
+    Remote uploads may have identical folder names from different users,
+    so uses longer hash than local indexing (8-chars) to ensure uniqueness.
+
+    Both host paths (/home/user/project/repo) and container paths (/work/repo)
+    should generate the same key for the same repository.
+    """
+    repo_name = Path(workspace_path).name
+    return hashlib.sha256(repo_name.encode('utf-8')).hexdigest()[:16]
+
+def get_next_sequence(workspace_path: str) -> int:
+    """Get next sequence number for workspace."""
+    key = get_workspace_key(workspace_path)
+    current = _sequence_tracker.get(key, 0)
+    next_seq = current + 1
+    _sequence_tracker[key] = next_seq
+    return next_seq
+
+def get_last_sequence(workspace_path: str) -> int:
+    """Get last sequence number for workspace."""
+    key = get_workspace_key(workspace_path)
+    return _sequence_tracker.get(key, 0)
+
+def validate_bundle_format(bundle_path: Path) -> Dict[str, Any]:
+    """Validate delta bundle format and return manifest."""
+    try:
+        with tarfile.open(bundle_path, "r:gz") as tar:
+            # Check for required files
+            required_files = ["manifest.json", "metadata/operations.json", "metadata/hashes.json"]
+            members = tar.getnames()
+
+            for req_file in required_files:
+                if not any(req_file in member for member in members):
+                    raise ValueError(f"Missing required file: {req_file}")
+
+            # Extract and validate manifest
+            manifest_member = None
+            for member in members:
+                if member.endswith("manifest.json"):
+                    manifest_member = member
+                    break
+
+            if not manifest_member:
+                raise ValueError("manifest.json not found in bundle")
+
+            manifest_file = tar.extractfile(manifest_member)
+            if not manifest_file:
+                raise ValueError("Cannot extract manifest.json")
+
+            manifest = json.loads(manifest_file.read().decode('utf-8'))
+
+            # Validate manifest structure
+            required_fields = ["version", "bundle_id", "workspace_path", "created_at", "sequence_number"]
+            for field in required_fields:
+                if field not in manifest:
+                    raise ValueError(f"Missing required field in manifest: {field}")
+
+            return manifest
+
+    except Exception as e:
+        raise ValueError(f"Invalid bundle format: {str(e)}")
+
+async def process_delta_bundle(workspace_path: str, bundle_path: Path, manifest: Dict[str, Any]) -> Dict[str, int]:
+    """Process delta bundle and return operation counts."""
+    operations_count = {
+        "created": 0,
+        "updated": 0,
+        "deleted": 0,
+        "moved": 0,
+        "skipped": 0,
+        "failed": 0
+    }
+
+    try:
+        # CRITICAL FIX: Extract repo name and create workspace under WORK_DIR
+        # Previous bug: used source workspace_path directly, extracting files outside /work
+        # This caused watcher service to never see uploaded files
+        if _extract_repo_name_from_path:
+            repo_name = _extract_repo_name_from_path(workspace_path)
+            # Fallback to directory name if repo detection fails
+            if not repo_name:
+                repo_name = Path(workspace_path).name
+        else:
+            # Fallback: use directory name
+            repo_name = Path(workspace_path).name
+
+        # Generate workspace under WORK_DIR using repo name hash
+        workspace_key = get_workspace_key(workspace_path)
+        workspace = Path(WORK_DIR) / f"{repo_name}-{workspace_key}"
+        workspace.mkdir(parents=True, exist_ok=True)
+
+        with tarfile.open(bundle_path, "r:gz") as tar:
+            # Extract operations metadata
+            ops_member = None
+            for member in tar.getnames():
+                if member.endswith("metadata/operations.json"):
+                    ops_member = member
+                    break
+
+            if not ops_member:
+                raise ValueError("operations.json not found in bundle")
+
+            ops_file = tar.extractfile(ops_member)
+            if not ops_file:
+                raise ValueError("Cannot extract operations.json")
+
+            operations_data = json.loads(ops_file.read().decode('utf-8'))
+            operations = operations_data.get("operations", [])
+
+            # Process each operation
+            for operation in operations:
+                op_type = operation.get("operation")
+                rel_path = operation.get("path")
+
+                if not rel_path:
+                    operations_count["skipped"] += 1
+                    continue
+
+                target_path = workspace / rel_path
+
+                try:
+                    if op_type == "created":
+                        # Extract file from bundle
+                        file_member = None
+                        for member in tar.getnames():
+                            if member.endswith(f"files/created/{rel_path}"):
+                                file_member = member
+                                break
+
+                        if file_member:
+                            file_content = tar.extractfile(file_member)
+                            if file_content:
+                                target_path.parent.mkdir(parents=True, exist_ok=True)
+                                target_path.write_bytes(file_content.read())
+                                operations_count["created"] += 1
+                            else:
+                                operations_count["failed"] += 1
+                        else:
+                            operations_count["failed"] += 1
+
+                    elif op_type == "updated":
+                        # Extract updated file
+                        file_member = None
+                        for member in tar.getnames():
+                            if member.endswith(f"files/updated/{rel_path}"):
+                                file_member = member
+                                break
+
+                        if file_member:
+                            file_content = tar.extractfile(file_member)
+                            if file_content:
+                                target_path.parent.mkdir(parents=True, exist_ok=True)
+                                target_path.write_bytes(file_content.read())
+                                operations_count["updated"] += 1
+                            else:
+                                operations_count["failed"] += 1
+                        else:
+                            operations_count["failed"] += 1
+
+                    elif op_type == "moved":
+                        # Extract moved file to destination
+                        file_member = None
+                        for member in tar.getnames():
+                            if member.endswith(f"files/moved/{rel_path}"):
+                                file_member = member
+                                break
+
+                        if file_member:
+                            file_content = tar.extractfile(file_member)
+                            if file_content:
+                                target_path.parent.mkdir(parents=True, exist_ok=True)
+                                target_path.write_bytes(file_content.read())
+                                operations_count["moved"] += 1
+                            else:
+                                operations_count["failed"] += 1
+                        else:
+                            operations_count["failed"] += 1
+
+                    elif op_type == "deleted":
+                        # Delete file
+                        if target_path.exists():
+                            target_path.unlink()
+                            operations_count["deleted"] += 1
+                        else:
+                            operations_count["skipped"] += 1
+
+                    else:
+                        operations_count["skipped"] += 1
+
+                except Exception as e:
+                    logger.error(f"Error processing operation {op_type} for {rel_path}: {e}")
+                    operations_count["failed"] += 1
+
+        return operations_count
+
+    except Exception as e:
+        logger.error(f"Error processing delta bundle: {e}")
+        raise
+
+
+@app.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint."""
+    return HealthResponse(
+        status="healthy",
+        timestamp=datetime.now().isoformat(),
+        version="1.0.0",
+        qdrant_url=QDRANT_URL,
+        work_dir=WORK_DIR
+    )
+
+@app.get("/api/v1/delta/status", response_model=StatusResponse)
+async def get_status(workspace_path: str):
+    """Get upload status for workspace."""
+    try:
+        # Get collection name
+        if get_collection_name:
+            repo_name = _extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None
+            collection_name = get_collection_name(repo_name)
+        else:
+            collection_name = DEFAULT_COLLECTION
+
+        # Get last sequence
+        last_sequence = get_last_sequence(workspace_path)
+
+        last_upload = None
+
+        return StatusResponse(
+            workspace_path=workspace_path,
+            collection_name=collection_name,
+            last_sequence=last_sequence,
+            last_upload=last_upload,
+            pending_operations=0,
+            status="ready",
+            server_info={
+                "version": "1.0.0",
+                "max_bundle_size_mb": MAX_BUNDLE_SIZE_MB,
+                "supported_formats": ["tar.gz"]
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Error getting status: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/api/v1/delta/upload", response_model=UploadResponse)
+async def upload_delta_bundle(
+    request: Request,
+    bundle: UploadFile = File(...),
+    workspace_path: str = Form(...),
+    collection_name: Optional[str] = Form(None),
+    sequence_number: Optional[int] = Form(None),
+    force: Optional[bool] = Form(False),
+    source_path: Optional[str] = Form(None),
+):
+    """Upload and process delta bundle."""
+    start_time = datetime.now()
+
+    try:
+        # Validate workspace path
+        workspace = Path(workspace_path)
+        if not workspace.is_absolute():
+            workspace = Path(WORK_DIR) / workspace
+
+        workspace_path = str(workspace.resolve())
+
+        # Get collection name
+        if not collection_name:
+            if get_collection_name:
+                repo_name = _extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None
+                # Fallback to directory name if repo detection fails
+                if not repo_name:
+                    repo_name = Path(workspace_path).name
+                collection_name = get_collection_name(repo_name)
+            else:
+                collection_name = DEFAULT_COLLECTION
+
+        # Persist origin metadata for remote lookups
+        try:
+            if update_repo_origin and repo_name:
+                workspace_key = get_workspace_key(workspace_path)
+                container_workspace = str(Path(WORK_DIR) / f"{repo_name}-{workspace_key}")
+                update_repo_origin(
+                    workspace_path=container_workspace,
+                    repo_name=repo_name,
+                    container_path=container_workspace,
+                    source_path=source_path or workspace_path,
+                    collection_name=collection_name,
+                )
+        except Exception as origin_err:
+            logger.debug(f"[upload_service] Failed to persist origin info: {origin_err}")
+
+        # Validate bundle size
+        if bundle.size and bundle.size > MAX_BUNDLE_SIZE_MB * 1024 * 1024:
+            raise HTTPException(
+                status_code=413,
+                detail=f"Bundle too large. Max size: {MAX_BUNDLE_SIZE_MB}MB"
+            )
+
+        # Save bundle to temporary file
+        with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete=False) as temp_file:
+            bundle_path = Path(temp_file.name)
+
+            # Stream upload to file
+            content = await bundle.read()
+            bundle_path.write_bytes(content)
+
+        try:
+            # Validate bundle format
+            manifest = validate_bundle_format(bundle_path)
+            bundle_id = manifest.get("bundle_id")
+            manifest_sequence = manifest.get("sequence_number")
+
+            # Check sequence number
+            if sequence_number is None:
+                sequence_number = manifest_sequence
+
+            if not force and sequence_number is not None:
+                last_sequence = get_last_sequence(workspace_path)
+                if sequence_number != last_sequence + 1:
+                    return UploadResponse(
+                        success=False,
+                        error={
+                            "code": "SEQUENCE_MISMATCH",
+                            "message": f"Expected sequence {last_sequence + 1}, got {sequence_number}",
+                            "expected_sequence": last_sequence + 1,
+                            "received_sequence": sequence_number,
+                            "retry_after": 5000
+                        }
+                    )
+
+            # Process delta bundle
+            operations_count = await process_delta_bundle(workspace_path, bundle_path, manifest)
+
+
+            # Update sequence tracking
+            if sequence_number is not None:
+                key = get_workspace_key(workspace_path)
+                _sequence_tracker[key] = sequence_number
+
+            # Log activity using cleaned workspace_state function
+            if log_activity:
+                log_activity(
+                    repo_name=_extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None,
+                    action="uploaded",
+                    file_path=bundle_id,
+                    details={
+                        "bundle_id": bundle_id,
+                        "operations": operations_count,
+                        "source": "delta_upload"
+                    }
+                )
+
+            # Calculate processing time
+            processing_time = (datetime.now() - start_time).total_seconds() * 1000
+
+            return UploadResponse(
+                success=True,
+                bundle_id=bundle_id,
+                sequence_number=sequence_number,
+                processed_operations=operations_count,
+                processing_time_ms=int(processing_time),
+                next_sequence=sequence_number + 1 if sequence_number else None
+            )
+
+        finally:
+            # Clean up temporary file
+            try:
+                bundle_path.unlink()
+            except Exception:
+                pass
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error processing upload: {e}")
+        return UploadResponse(
+            success=False,
+            error={
+                "code": "PROCESSING_ERROR",
+                "message": f"Error processing bundle: {str(e)}"
+            }
+        )
+
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """Global exception handler."""
+    logger.error(f"Unhandled exception: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={
+            "success": False,
+            "error": {
+                "code": "INTERNAL_ERROR",
+                "message": "Internal server error"
+            }
+        }
+    )
+
+def main():
+    """Main entry point for the upload service."""
+    host = os.environ.get("UPLOAD_SERVICE_HOST", "0.0.0.0")
+    port = int(os.environ.get("UPLOAD_SERVICE_PORT", "8002"))
+
+    logger.info(f"Starting upload service on {host}:{port}")
+    logger.info(f"Qdrant URL: {QDRANT_URL}")
+    logger.info(f"Work directory: {WORK_DIR}")
+    logger.info(f"Max bundle size: {MAX_BUNDLE_SIZE_MB}MB")
+
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        log_level="info",
+        access_log=True
+    )
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/wait-for-qdrant.sh b/scripts/wait-for-qdrant.sh
index 98f9e859..e26c73cf 100755
--- a/scripts/wait-for-qdrant.sh
+++ b/scripts/wait-for-qdrant.sh
@@ -1,6 +1,18 @@
 #!/usr/bin/env bash
 set -euo pipefail
-until curl -fsS "${QDRANT_URL:-http://localhost:6333}/" >/dev/null; do
+# Use Python stdlib to avoid curl dependency in the container
+until python - <<'PY'
+import os, sys, urllib.request
+url = os.environ.get("QDRANT_URL", "http://localhost:6333")
+if not url.endswith("/"):
+    url += "/"
+try:
+    with urllib.request.urlopen(url, timeout=2) as r:
+        sys.exit(0 if getattr(r, "status", 200) < 500 else 1)
+except Exception:
+    sys.exit(1)
+PY
+do
   echo "Waiting for Qdrant at ${QDRANT_URL:-http://localhost:6333} ..."
   sleep 1
 done
diff --git a/scripts/warm_all_collections.py b/scripts/warm_all_collections.py
new file mode 100644
index 00000000..0344da82
--- /dev/null
+++ b/scripts/warm_all_collections.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+"""
+Script to warm all collections in Qdrant
+"""
+import os
+import sys
+import subprocess
+from qdrant_client import QdrantClient
+
+def main():
+    # Get configuration from environment
+    qdrant_url = os.environ.get("QDRANT_URL", "http://qdrant:6333")
+    ef = os.environ.get("EF", "256")
+    limit = os.environ.get("LIMIT", "3")
+
+    print(f"Connecting to Qdrant at {qdrant_url}")
+
+    # Connect to Qdrant
+    client = QdrantClient(url=qdrant_url)
+
+    # Get all collections
+    try:
+        collections_response = client.get_collections()
+        collections = [c.name for c in collections_response.collections]
+        print(f"Found collections: {collections}")
+    except Exception as e:
+        print(f"Error getting collections: {e}")
+        sys.exit(1)
+
+    # Warm each collection
+    for collection_name in collections:
+        print(f"Warming collection: {collection_name}")
+        try:
+            # Set environment variable for the collection name
+            env = os.environ.copy()
+            env["COLLECTION_NAME"] = collection_name
+
+            result = subprocess.run(
+                [
+                    "python",
+                    "/app/scripts/warm_start.py",
+                    "--ef", ef,
+                    "--limit", limit
+                ],
+                capture_output=True,
+                text=True,
+                check=True,
+                env=env
+            )
+            print(f"Successfully warmed {collection_name}")
+        except subprocess.CalledProcessError as e:
+            print(f"Error warming {collection_name}: {e}")
+            print(f"stdout: {e.stdout}")
+            print(f"stderr: {e.stderr}")
+            sys.exit(1)
+
+    print("All collections warmed successfully")
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/watch_index.py b/scripts/watch_index.py
index ab503f61..c9e94c57 100644
--- a/scripts/watch_index.py
+++ b/scripts/watch_index.py
@@ -3,7 +3,7 @@
 import time
 import threading
 from pathlib import Path
-from typing import Set
+from typing import Optional, Set
 
 from qdrant_client import QdrantClient, models
 from fastembed import TextEmbedding
@@ -20,28 +20,72 @@
     sys.path.insert(0, str(ROOT_DIR))
 
 from scripts.workspace_state import (
-    get_workspace_state,
-    update_indexing_status,
-    update_last_activity,
-    update_workspace_state,
+    _extract_repo_name_from_path,
+    get_collection_name,
+    _get_global_state_dir,
+    is_multi_repo_mode,
     get_cached_file_hash,
     set_cached_file_hash,
     remove_cached_file,
+    update_indexing_status,
+    update_workspace_state,
 )
 import hashlib
 from datetime import datetime
 
 import scripts.ingest_code as idx
+from scripts.logger import get_logger
+
+
+try:
+    logger = get_logger(__name__)
+except Exception:  # pragma: no cover - fallback for logger import issues
+    import logging
+
+    logger = logging.getLogger(__name__)
 
 QDRANT_URL = os.environ.get("QDRANT_URL", "http://qdrant:6333")
-COLLECTION = os.environ.get("COLLECTION_NAME", "codebase")
 MODEL = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
 ROOT = Path(os.environ.get("WATCH_ROOT", "/work")).resolve()
 
+# Back-compat: legacy modules/tests expect a module-level COLLECTION constant.
+# It will be updated in main() once the resolved collection is known.
+COLLECTION = os.environ.get("COLLECTION_NAME", "my-collection")
+
 # Debounce interval
 DELAY_SECS = float(os.environ.get("WATCH_DEBOUNCE_SECS", "1.0"))
 
 
+def _detect_repo_for_file(file_path: Path) -> Optional[Path]:
+    """Detect repository root for a file under WATCH root."""
+    try:
+        rel_path = file_path.resolve().relative_to(ROOT.resolve())
+    except Exception:
+        return None
+    if not rel_path.parts:
+        return ROOT
+    return ROOT / rel_path.parts[0]
+
+
+def _get_collection_for_repo(repo_path: Path) -> str:
+    try:
+        repo_name = _extract_repo_name_from_path(str(repo_path))
+        if repo_name:
+            return get_collection_name(repo_name)
+    except Exception:
+        pass
+    return os.environ.get("COLLECTION_NAME", "my-collection")
+
+
+def _get_collection_for_file(file_path: Path) -> str:
+    if not is_multi_repo_mode():
+        return os.environ.get("COLLECTION_NAME", "my-collection")
+    repo_path = _detect_repo_for_file(file_path)
+    if repo_path is not None:
+        return _get_collection_for_repo(repo_path)
+    return os.environ.get("COLLECTION_NAME", "my-collection")
+
+
 class ChangeQueue:
     def __init__(self, process_cb):
         self._lock = threading.Lock()
@@ -59,7 +103,10 @@ def add(self, p: Path):
                 try:
                     self._timer.cancel()
                 except Exception as e:
-                    logger.error(f"Failed to cancel timer in ChangeQueue.add: {e}")
+                    logger.error(
+                        "Failed to cancel timer in ChangeQueue.add",
+                        extra={"error": str(e)},
+                    )
             self._timer = threading.Timer(DELAY_SECS, self._flush)
             self._timer.daemon = True
             self._timer.start()
@@ -88,9 +135,10 @@ def _flush(self):
                 except Exception as e:
                     try:
                         print(f"[watcher_error] processing batch failed: {e}")
-                    except Exception as inner_e:
+                    except Exception as inner_e:  # pragma: no cover - logging fallback
                         logger.error(
-                            f"Exception in ChangeQueue._flush during batch processing: {inner_e}"
+                            "Exception in ChangeQueue._flush during batch processing",
+                            extra={"error": str(inner_e)},
                         )
                 # drain any pending accumulated during processing
                 with self._lock:
@@ -104,25 +152,40 @@ def _flush(self):
 
 class IndexHandler(FileSystemEventHandler):
     def __init__(
-        self, root: Path, queue: ChangeQueue, client: QdrantClient, collection: str
+        self,
+        root: Path,
+        queue: ChangeQueue,
+        client: Optional[QdrantClient],
+        default_collection: Optional[str] = None,
+        *,
+        collection: Optional[str] = None,
     ):
         super().__init__()
         self.root = root
         self.queue = queue
         self.client = client
-        self.collection = collection
+        resolved_collection = collection if collection is not None else default_collection
+        self.default_collection = resolved_collection
+        self.collection = resolved_collection
         self.excl = idx._Excluder(root)
         # Track ignore file for live reloads
         try:
             ig_name = os.environ.get("QDRANT_IGNORE_FILE", ".qdrantignore")
             self._ignore_path = (self.root / ig_name).resolve()
-        except Exception:
+        except (OSError, ValueError) as e:
+            try:
+                print(f"[ignore_file] Could not resolve ignore file path: {e}")
+            except Exception:
+                pass
             self._ignore_path = None
-        self._ignore_mtime = (
-            self._ignore_path.stat().st_mtime
-            if self._ignore_path and self._ignore_path.exists()
-            else 0.0
-        )
+        try:
+            self._ignore_mtime = (
+                self._ignore_path.stat().st_mtime
+                if self._ignore_path and self._ignore_path.exists()
+                else 0.0
+            )
+        except Exception:
+            self._ignore_mtime = 0.0
 
     def _maybe_reload_excluder(self):
         try:
@@ -146,7 +209,6 @@ def _maybe_enqueue(self, src_path: str):
         self._maybe_reload_excluder()
         p = Path(src_path)
         try:
-            # normalize to absolute within root
             p = p.resolve()
         except Exception:
             return
@@ -158,6 +220,17 @@ def _maybe_enqueue(self, src_path: str):
             rel = p.resolve().relative_to(self.root.resolve())
         except ValueError:
             return
+
+        try:
+            if _get_global_state_dir is not None:
+                global_state_dir = _get_global_state_dir()
+                if p.is_relative_to(global_state_dir):
+                    return
+        except (OSError, ValueError):
+            pass
+
+        if any(part == ".codebase" for part in p.parts):
+            return
         # directory-level excludes (parent dir)
         rel_dir = "/" + str(rel.parent).replace(os.sep, "/")
         if rel_dir == "/.":
@@ -191,19 +264,30 @@ def on_deleted(self, event):
         # Only attempt deletion for code files we would have indexed
         if p.suffix.lower() not in idx.CODE_EXTS:
             return
-        try:
-            idx.delete_points_by_path(self.client, self.collection, str(p))
-            print(f"[deleted] {p}")
-            # Drop local cache entry
+        if self.client is not None:
             try:
-                remove_cached_file(str(self.root), str(p))
+                collection = self.collection or _get_collection_for_file(p)
+                idx.delete_points_by_path(self.client, collection, str(p))
+                print(f"[deleted] {p} -> {collection}")
             except Exception:
                 pass
+        else:
+            print(f"File deletion detected: {p}")
 
-            try:
-                _log_activity(str(self.root), "deleted", p)
-            except Exception:
-                pass
+        try:
+            repo_path = _detect_repo_for_file(p)
+            if repo_path:
+                repo_name = _extract_repo_name_from_path(str(repo_path))
+                remove_cached_file(str(p), repo_name)
+            else:
+                root_repo_name = _extract_repo_name_from_path(str(self.root))
+                remove_cached_file(str(p), root_repo_name)
+        except Exception:
+            pass
+
+        try:
+            repo_path = _detect_repo_for_file(p) or self.root
+            _log_activity(str(repo_path), "deleted", p)
         except Exception as e:
             try:
                 print(f"[delete_error] {p}: {e}")
@@ -240,7 +324,13 @@ def on_moved(self, event):
                         )
                         print(f"[moved:ignored_dest_deleted_src] {src} -> {dest}")
                         try:
-                            remove_cached_file(str(self.root), str(src))
+                            src_repo_path = _detect_repo_for_file(src)
+                            src_repo_name = (
+                                _extract_repo_name_from_path(str(src_repo_path))
+                                if src_repo_path is not None
+                                else None
+                            )
+                            remove_cached_file(str(src), src_repo_name)
                         except Exception:
                             pass
 
@@ -249,35 +339,53 @@ def on_moved(self, event):
                 return
         except Exception:
             pass
-        # Try in-place rename (preserve vectors)
+        src_collection = _get_collection_for_file(src)
+        dest_collection = _get_collection_for_file(dest)
+        is_cross_collection = src_collection != dest_collection
+        if is_cross_collection:
+            print(f"[cross_collection_move] {src} -> {dest}")
+
         moved_count = -1
-        try:
-            moved_count = _rename_in_store(self.client, self.collection, src, dest)
-        except Exception:
-            moved_count = -1
+        renamed_hash: str | None = None
+        if self.client is not None:
+            try:
+                moved_count, renamed_hash = _rename_in_store(
+                    self.client, src_collection, src, dest, dest_collection
+                )
+            except Exception:
+                moved_count, renamed_hash = -1, None
         if moved_count and moved_count > 0:
             try:
-                print(f"[moved] {src} -> {dest} ({moved_count} chunk(s) relinked)")
-                # Update local cache: carry hash from src to dest if present
-                prev_hash = None
-                try:
-                    prev_hash = get_cached_file_hash(str(self.root), str(src))
-                except Exception:
-                    prev_hash = None
-                if prev_hash:
-                    try:
-                        set_cached_file_hash(str(self.root), str(dest), prev_hash)
-                    except Exception:
-                        pass
-                    try:
-                        remove_cached_file(str(self.root), str(src))
-                    except Exception:
-                        pass
+                print(
+                    f"[moved] {src} -> {dest} ({moved_count} chunk(s) relinked)"
+                )
+                src_repo_path = _detect_repo_for_file(src)
+                dest_repo_path = _detect_repo_for_file(dest)
+                src_repo_name = (
+                    _extract_repo_name_from_path(str(src_repo_path))
+                    if src_repo_path is not None
+                    else None
+                )
+                dest_repo_name = (
+                    _extract_repo_name_from_path(str(dest_repo_path))
+                    if dest_repo_path is not None
+                    else None
+                )
+                src_hash = ""
+                if src_repo_name:
+                    src_hash = get_cached_file_hash(str(src), src_repo_name)
+                    remove_cached_file(str(src), src_repo_name)
+                if not src_hash and renamed_hash:
+                    src_hash = renamed_hash
+                if dest_repo_name and src_hash:
+                    set_cached_file_hash(
+                        str(dest), src_hash, dest_repo_name
+                    )
             except Exception:
                 pass
             try:
                 _log_activity(
-                    str(self.root),
+                    str(dest_repo_path or self.root),
                     "moved",
                     dest,
                     {"from": str(src), "chunks": int(moved_count)},
@@ -285,13 +393,22 @@ def on_moved(self, event):
             except Exception:
                 pass
             return
-        # Fallback: delete old then index new destination
-        try:
-            if src.suffix.lower() in idx.CODE_EXTS:
-                idx.delete_points_by_path(self.client, self.collection, str(src))
-                print(f"[moved:deleted_src] {src}")
-        except Exception:
-            pass
+        if self.client is not None:
+            try:
+                if src.suffix.lower() in idx.CODE_EXTS:
+                    try:
+                        idx.delete_points_by_path(self.client, src_collection, str(src))
+                    except Exception:
+                        idx.delete_points_by_path(
+                            self.client,
+                            self.collection or src_collection,
+                            str(src),
+                        )
+                    print(f"[moved:deleted_src] {src}")
+            except Exception:
+                pass
+        else:
+            print(f"[remote_mode] Move detected: {src} -> {dest}")
         try:
             self._maybe_enqueue(str(dest))
         except Exception:
@@ -301,9 +418,10 @@ def on_moved(self, event):
 # --- Workspace state helpers ---
 def _set_status_indexing(workspace_path: str, total_files: int) -> None:
     try:
+        repo_name = _extract_repo_name_from_path(workspace_path)
         update_indexing_status(
-            workspace_path,
-            {
+            repo_name=repo_name,
+            status={
                 "state": "indexing",
                 "started_at": datetime.now().isoformat(),
                 "progress": {"files_processed": 0, "total_files": int(total_files)},
@@ -321,9 +439,10 @@ def _update_progress(
     current_file: Path | None,
 ) -> None:
     try:
+        repo_name = _extract_repo_name_from_path(workspace_path)
         update_indexing_status(
-            workspace_path,
-            {
+            repo_name=repo_name,
+            status={
                 "state": "indexing",
                 "started_at": started_at,
                 "progress": {
@@ -341,14 +460,18 @@ def _log_activity(
     workspace_path: str, action: str, file_path: Path, details: dict | None = None
 ) -> None:
     try:
-        update_last_activity(
-            workspace_path,
-            {
-                "timestamp": datetime.now().isoformat(),
-                "action": action,
-                "file_path": str(file_path),
-                "details": details or {},
-            },
+        repo_name = _extract_repo_name_from_path(workspace_path)
+        from scripts.workspace_state import log_activity
+
+        valid_actions = {"indexed", "deleted", "skipped", "scan-completed", "initialized", "moved"}
+        if action not in valid_actions:
+            action = "indexed"
+
+        log_activity(
+            repo_name=repo_name,
+            action=action,  # type: ignore[arg-type]
+            file_path=str(file_path),
+            details=details,
         )
     except Exception:
         pass
@@ -356,13 +479,19 @@ def _log_activity(
 
 # --- Move/Rename optimization: reuse vectors when file content unchanged ---
 def _rename_in_store(
-    client: QdrantClient, collection: str, src: Path, dest: Path
-) -> int:
+    client: QdrantClient,
+    src_collection: str,
+    src: Path,
+    dest: Path,
+    dest_collection: Optional[str] = None,
+) -> tuple[int, str | None]:
     """Best-effort: if dest content hash matches previously indexed src hash,
     update points in-place to the new path without re-embedding.
 
     Returns number of points moved, or -1 if not applicable/failure.
     """
+    if dest_collection is None:
+        dest_collection = src_collection
     try:
         if not dest.exists() or dest.is_dir():
             return -1
@@ -371,9 +500,16 @@ def _rename_in_store(
         except Exception:
             return -1
         dest_hash = hashlib.sha1(text.encode("utf-8", errors="ignore")).hexdigest()
-        prev = idx.get_indexed_file_hash(client, collection, str(src))
+        prev = idx.get_indexed_file_hash(client, src_collection, str(src))
+        logger.debug(
+            "rename fast-path candidate src=%s dest=%s prev_hash=%s dest_hash=%s",
+            str(src),
+            str(dest),
+            prev,
+            dest_hash,
+        )
         if not prev or prev != dest_hash:
-            return -1
+            return -1, prev if prev else None
 
         moved = 0
         next_offset = None
@@ -386,7 +522,7 @@ def _rename_in_store(
                 ]
             )
             points, next_offset = client.scroll(
-                collection_name=collection,
+                collection_name=src_collection,
                 scroll_filter=filt,
                 with_payload=True,
                 with_vectors=True,
@@ -445,16 +581,34 @@ def _rename_in_store(
                 except Exception:
                     continue
             if new_points:
-                idx.upsert_points(client, collection, new_points)
+                logger.debug(
+                    "rename fast-path upserting %d chunk(s) %s -> %s into %s",
+                    len(new_points),
+                    str(src),
+                    str(dest),
+                    dest_collection,
+                )
+                idx.upsert_points(client, dest_collection, new_points)
                 moved += len(new_points)
+            if next_offset is None:
+                break
 
         try:
-            idx.delete_points_by_path(client, collection, str(src))
+            idx.delete_points_by_path(client, src_collection, str(src))
         except Exception:
             pass
-        return moved
-    except Exception:
-        return -1
+        return moved, dest_hash
+    except Exception as exc:
+        try:
+            logger.warning(
+                "[rename_debug] rename failed for %s -> %s: %s",
+                str(src),
+                str(dest),
+                exc,
+            )
+        except Exception:
+            pass
+        return -1, None
 
 
 def main():
@@ -463,26 +617,47 @@ def main():
         from scripts.workspace_state import get_collection_name as _get_coll
     except Exception:
         _get_coll = None
-    global COLLECTION
+
+    multi_repo_enabled = False
     try:
-        if _get_coll:
-            COLLECTION = _get_coll(str(ROOT))
+        multi_repo_enabled = bool(is_multi_repo_mode())
     except Exception:
-        pass
+        multi_repo_enabled = False
+
+    default_collection = os.environ.get("COLLECTION_NAME", "my-collection")
+    if _get_coll:
+        try:
+            resolved = _get_coll(str(ROOT))
+            if resolved:
+                default_collection = resolved
+        except Exception:
+            pass
+    if multi_repo_enabled:
+        print("[multi_repo] Multi-repo mode enabled - per-repo collections in use")
+    else:
+        print("[single_repo] Single-repo mode enabled - using single collection")
+
+    global COLLECTION
+    COLLECTION = default_collection
 
     print(
-        f"Watch mode: root={ROOT} qdrant={QDRANT_URL} collection={COLLECTION} model={MODEL}"
+        f"Watch mode: root={ROOT} qdrant={QDRANT_URL} collection={default_collection} model={MODEL}"
     )
 
     # Health check: detect and auto-heal cache/collection sync issues
     try:
         from scripts.collection_health import auto_heal_if_needed
+
         print("[health_check] Checking collection health...")
-        heal_result = auto_heal_if_needed(str(ROOT), COLLECTION, QDRANT_URL, dry_run=False)
-        if heal_result["action_taken"] == "cleared_cache":
+        heal_result = auto_heal_if_needed(
+            str(ROOT), default_collection, QDRANT_URL, dry_run=False
+        )
+        if heal_result.get("action_taken") == "cleared_cache":
             print("[health_check] Cache cleared due to sync issue - files will be reindexed")
-        elif not heal_result["health_check"]["healthy"]:
-            print(f"[health_check] Issue detected: {heal_result['health_check']['issue']}")
+        elif not heal_result.get("health_check", {}).get("healthy", True):
+            print(
+                f"[health_check] Issue detected: {heal_result['health_check'].get('issue', 'unknown')}"
+            )
         else:
             print("[health_check] Collection health OK")
     except Exception as e:
@@ -492,23 +667,19 @@ def main():
         url=QDRANT_URL, timeout=int(os.environ.get("QDRANT_TIMEOUT", "20") or 20)
     )
 
-    # Compute embedding dimension first (for deterministic dense vector selection)
     model = TextEmbedding(model_name=MODEL)
-    dim = len(next(model.embed(["dimension probe"])))
+    model_dim = len(next(model.embed(["dimension probe"])))
 
-    # Determine dense vector name deterministically
     try:
-        info = client.get_collection(COLLECTION)
+        info = client.get_collection(default_collection)
         cfg = info.config.params.vectors
         if isinstance(cfg, dict) and cfg:
-            # Prefer vector whose size matches embedding dim
             vector_name = None
             for name, params in cfg.items():
                 psize = getattr(params, "size", None) or getattr(params, "dim", None)
-                if psize and int(psize) == int(dim):
+                if psize and int(psize) == int(model_dim):
                     vector_name = name
                     break
-            # If LEX vector exists, pick a different name as dense
             if vector_name is None and getattr(idx, "LEX_VECTOR_NAME", None) in cfg:
                 for name in cfg.keys():
                     if name != idx.LEX_VECTOR_NAME:
@@ -521,24 +692,43 @@ def main():
     except Exception:
         vector_name = idx._sanitize_vector_name(MODEL)
 
-    # Ensure collection + payload indexes exist
     try:
-        idx.ensure_collection(client, COLLECTION, dim, vector_name)
+        idx.ensure_collection(client, default_collection, model_dim, vector_name)
     except Exception:
         pass
-    idx.ensure_payload_indexes(client, COLLECTION)
+    idx.ensure_payload_indexes(client, default_collection)
 
-    # Ensure workspace state exists and set collection
     try:
-        update_workspace_state(str(ROOT), {"qdrant_collection": COLLECTION})
-        update_indexing_status(str(ROOT), {"state": "watching"})
-    except Exception:
-        pass
+        if multi_repo_enabled:
+            root_repo_name = _extract_repo_name_from_path(str(ROOT))
+            if root_repo_name:
+                root_collection = get_collection_name(root_repo_name)
+                update_indexing_status(
+                    repo_name=root_repo_name,
+                    status={"state": "watching"},
+                )
+                print(
+                    f"[workspace_state] Initialized repo state: {root_repo_name} -> {root_collection}"
+                )
+            else:
+                print(
+                    "[workspace_state] Multi-repo: root path is not a repo; skipping state initialization"
+                )
+        else:
+            update_workspace_state(
+                workspace_path=str(ROOT),
+                updates={"qdrant_collection": default_collection},
+            )
+            update_indexing_status(status={"state": "watching"})
+    except Exception as e:
+        print(f"[workspace_state] Error initializing workspace state: {e}")
 
     q = ChangeQueue(
-        lambda paths: _process_paths(paths, client, model, vector_name, str(ROOT))
+        lambda paths: _process_paths(
+            paths, client, model, vector_name, model_dim, str(ROOT)
+        )
     )
-    handler = IndexHandler(ROOT, q, client, COLLECTION)
+    handler = IndexHandler(ROOT, q, client, default_collection)
 
     obs = Observer()
     obs.schedule(handler, str(ROOT), recursive=True)
@@ -554,58 +744,86 @@ def main():
         obs.join()
 
 
-def _process_paths(paths, client, model, vector_name: str, workspace_path: str):
-    # Prepare progress
+def _process_paths(paths, client, model, vector_name: str, model_dim: int, workspace_path: str):
     unique_paths = sorted(set(Path(x) for x in paths))
-    total = len(unique_paths)
+    if not unique_paths:
+        return
+
     started_at = datetime.now().isoformat()
-    try:
-        update_indexing_status(
-            workspace_path,
-            {
-                "state": "indexing",
-                "started_at": started_at,
-                "progress": {"files_processed": 0, "total_files": total},
-            },
-        )
-    except Exception:
-        pass
 
-    processed = 0
-    try:
-        for p in unique_paths:
-            current = p
-            if not p.exists():
-                # File was removed; ensure its points and cache are deleted
-                try:
-                    idx.delete_points_by_path(client, COLLECTION, str(p))
-                    print(f"[deleted] {p}")
-                except Exception:
-                    pass
+    repo_groups: dict[str, list[Path]] = {}
+    for p in unique_paths:
+        repo_path = _detect_repo_for_file(p) or Path(workspace_path)
+        repo_groups.setdefault(str(repo_path), []).append(p)
+
+    for repo_path, repo_files in repo_groups.items():
+        try:
+            repo_name = _extract_repo_name_from_path(repo_path)
+            update_indexing_status(
+                repo_name=repo_name,
+                status={
+                    "state": "indexing",
+                    "started_at": started_at,
+                    "progress": {
+                        "files_processed": 0,
+                        "total_files": len(repo_files),
+                    },
+                },
+            )
+        except Exception:
+            pass
+
+    repo_progress: dict[str, int] = {key: 0 for key in repo_groups.keys()}
+
+    for p in unique_paths:
+        repo_path = _detect_repo_for_file(p) or Path(workspace_path)
+        repo_key = str(repo_path)
+        repo_files = repo_groups.get(repo_key, [])
+        repo_name = _extract_repo_name_from_path(repo_key)
+        collection = _get_collection_for_file(p)
+
+        if not p.exists():
+            if client is not None:
                 try:
-                    remove_cached_file(workspace_path, str(p))
+                    idx.delete_points_by_path(client, collection, str(p))
+                    print(f"[deleted] {p} -> {collection}")
                 except Exception:
                     pass
-                _log_activity(workspace_path, "deleted", p)
-                processed += 1
-                _update_progress(workspace_path, started_at, processed, total, current)
-                continue
-            # Lazily instantiate model if needed
-            if model is None:
-                from fastembed import TextEmbedding
-
-                mname = os.environ.get("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
-                model = TextEmbedding(model_name=mname)
+            try:
+                remove_cached_file(str(p), repo_name)
+            except Exception:
+                pass
+            _log_activity(repo_key, "deleted", p)
+            repo_progress[repo_key] = repo_progress.get(repo_key, 0) + 1
+            try:
+                _update_progress(
+                    repo_key,
+                    started_at,
+                    repo_progress[repo_key],
+                    len(repo_files),
+                    p,
+                )
+            except Exception:
+                pass
+            continue
+
+        if client is not None and model is not None:
+            try:
+                idx.ensure_collection(client, collection, model_dim, vector_name)
+                idx.ensure_payload_indexes(client, collection)
+            except Exception:
+                pass
+
             ok = False
             try:
                 ok = idx.index_single_file(
                     client,
                     model,
-                    COLLECTION,
+                    collection,
                     vector_name,
                     p,
                     dedupe=True,
-                    skip_unchanged=True,
+                    skip_unchanged=False,
                 )
             except Exception as e:
                 try:
@@ -614,23 +832,40 @@ def _process_paths(paths, client, model, vector_name: str, workspace_path: str):
                     pass
                 ok = False
             status = "indexed" if ok else "skipped"
-            print(f"[{status}] {p}")
+            print(f"[{status}] {p} -> {collection}")
             if ok:
                 try:
                     size = int(p.stat().st_size)
                 except Exception:
                     size = None
-                _log_activity(workspace_path, "indexed", p, {"file_size": size})
+                _log_activity(repo_key, "indexed", p, {"file_size": size})
             else:
                 _log_activity(
-                    workspace_path, "skipped", p, {"reason": "no-change-or-error"}
+                    repo_key, "skipped", p, {"reason": "no-change-or-error"}
                 )
-            processed += 1
-            _update_progress(workspace_path, started_at, processed, total, current)
-    finally:
-        # Always return to watching state even if processing raised
+        else:
+            print(f"Not processing locally: {p}")
+            _log_activity(repo_key, "skipped", p, {"reason": "remote-mode"})
+
+        repo_progress[repo_key] = repo_progress.get(repo_key, 0) + 1
         try:
-            update_indexing_status(workspace_path, {"state": "watching"})
+            _update_progress(
+                repo_key,
+                started_at,
+                repo_progress[repo_key],
+                len(repo_files),
+                p,
+            )
+        except Exception:
+            pass
+
+    for repo_path in repo_groups.keys():
+        try:
+            repo_name = _extract_repo_name_from_path(repo_path)
+            update_indexing_status(
+                repo_name=repo_name,
+                status={"state": "watching"},
+            )
         except Exception:
             pass
 
diff --git a/scripts/workspace_state.py b/scripts/workspace_state.py
index dfa6b4fb..e05f80b5 100644
--- a/scripts/workspace_state.py
+++ b/scripts/workspace_state.py
@@ -6,26 +6,30 @@
 - Collection information and indexing status
 - Progress tracking during indexing operations
 - Activity logging with structured metadata
-- Multi-project support with per-workspace state files
-
-Based on the codebase-index-cli workspace state pattern but adapted for our Python ecosystem.
+- Multi-repo support with per-repo state files
 """
 import json
 import os
-import uuid
 import re
-import hashlib
+import uuid
 import subprocess
+import hashlib
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Literal, TypedDict
 import threading
 import time
 
-# Type definitions matching codebase-index-cli patterns
+# Type definitions
 IndexingState = Literal['idle', 'initializing', 'scanning', 'indexing', 'watching', 'error']
 ActivityAction = Literal['indexed', 'deleted', 'skipped', 'scan-completed', 'initialized', 'moved']
 
+# Constants
+STATE_DIRNAME = ".codebase"
+STATE_FILENAME = "state.json"
+CACHE_FILENAME = "cache.json"
+PLACEHOLDER_COLLECTION_NAMES = {"", "default-collection", "my-collection"}
+
 class IndexingProgress(TypedDict, total=False):
     files_processed: int
     total_files: Optional[int]
@@ -53,40 +57,77 @@ class LastActivity(TypedDict, total=False):
     file_path: Optional[str]
     details: Optional[ActivityDetails]
 
-class QdrantStats(TypedDict, total=False):
-    total_vectors: int
-    unique_files: int
-    vector_dimension: int
-    last_updated: str
-    collection_name: str
+class OriginInfo(TypedDict, total=False):
+    repo_name: Optional[str]
+    container_path: Optional[str]
+    source_path: Optional[str]
+    collection_name: Optional[str]
+    updated_at: Optional[str]
+
 
 class WorkspaceState(TypedDict, total=False):
-    workspace_path: str
     created_at: str
     updated_at: str
     qdrant_collection: str
     indexing_status: Optional[IndexingStatus]
     last_activity: Optional[LastActivity]
-    qdrant_stats: Optional[QdrantStats]
+    qdrant_stats: Optional[Dict[str, Any]]
+    origin: Optional[OriginInfo]
 
-# Constants
-STATE_DIRNAME = ".codebase"
-STATE_FILENAME = "state.json"
+def is_multi_repo_mode() -> bool:
+    """Check if multi-repo mode is enabled."""
+    return os.environ.get("MULTI_REPO_MODE", "0").strip().lower() in {
+        "1", "true", "yes", "on"
+    }
 
-# Thread-safe state management
-# Use re-entrant locks to avoid deadlocks when helper functions call each other
-_state_locks: Dict[str, threading.RLock] = {}
 _state_lock = threading.Lock()
 # Track last-used timestamps for cleanup of idle workspace locks
+_state_locks: Dict[str, threading.RLock] = {}
 _state_lock_last_used: Dict[str, float] = {}
 
-def _get_state_lock(workspace_path: str) -> threading.RLock:
-    """Get or create a thread-safe lock for a specific workspace and record last-used time."""
+def _resolve_workspace_root() -> str:
+    """Determine the default workspace root path."""
+    return os.environ.get("WORKSPACE_PATH") or os.environ.get("WATCH_ROOT") or "/work"
+
+def _resolve_repo_context(
+    workspace_path: Optional[str] = None,
+    repo_name: Optional[str] = None,
+) -> tuple[str, Optional[str]]:
+    """Normalize workspace/repo context, ensuring multi-repo callers map to repo state."""
+    resolved_workspace = workspace_path or _resolve_workspace_root()
+
+    if is_multi_repo_mode():
+        if repo_name:
+            return resolved_workspace, repo_name
+
+        if workspace_path:
+            detected = _detect_repo_name_from_path(Path(workspace_path))
+            if detected:
+                return resolved_workspace, detected
+
+        return resolved_workspace, None
+
+    return resolved_workspace, repo_name
+
+def _get_state_lock(workspace_path: Optional[str] = None, repo_name: Optional[str] = None) -> threading.RLock:
+    """Get or create a lock for the workspace or repo state and track usage."""
+    if repo_name and is_multi_repo_mode():
+        key = f"repo::{repo_name}"
+    else:
+        key = str(Path(workspace_path or _resolve_workspace_root()).resolve())
+
     with _state_lock:
-        if workspace_path not in _state_locks:
-            _state_locks[workspace_path] = threading.RLock()
-        _state_lock_last_used[workspace_path] = time.time()
-        return _state_locks[workspace_path]
+        if key not in _state_locks:
+            _state_locks[key] = threading.RLock()
+        _state_lock_last_used[key] = time.time()
+        return _state_locks[key]
+
+def _get_repo_state_dir(repo_name: str) -> Path:
+    """Get the state directory for a repository."""
+    base_dir = Path(os.environ.get("WORKSPACE_PATH") or os.environ.get("WATCH_ROOT") or "/work")
+    if is_multi_repo_mode():
+        return base_dir / STATE_DIRNAME / "repos" / repo_name
+    return base_dir / STATE_DIRNAME
 
 def _get_state_path(workspace_path: str) -> Path:
     """Get the path to the state.json file for a workspace."""
@@ -94,6 +135,13 @@ def _get_state_path(workspace_path: str) -> Path:
     state_dir = workspace / STATE_DIRNAME
     return state_dir / STATE_FILENAME
 
+
+def _get_global_state_dir(workspace_path: Optional[str] = None) -> Path:
+    """Return the root .codebase directory used for workspace metadata."""
+
+    base_dir = Path(workspace_path or _resolve_workspace_root()).resolve()
+    return base_dir / STATE_DIRNAME
+
 def _ensure_state_dir(workspace_path: str) -> Path:
     """Ensure the .codebase directory exists and return the state file path."""
     workspace = Path(workspace_path).resolve()
@@ -122,13 +170,33 @@ def _sanitize_name(s: str, max_len: int = 64) -> str:
 def _cross_process_lock(lock_path: Path):
     """Advisory cross-process exclusive lock using a companion .lock file.
     Safe across container/process boundaries; pairs with atomic rename writes.
+    Ensures group-writable permissions so non-root indexers/watchers can operate.
     """
-    lock_path.parent.mkdir(exist_ok=True)
-    f = open(lock_path, "a+")
+
+    lock_path.parent.mkdir(parents=True, exist_ok=True)
+
+    lock_file = None
+    fd = None
+    try:
+        fd = os.open(lock_path, os.O_CREAT | os.O_RDWR, 0o664)
+        lock_file = os.fdopen(fd, "a+")
+    except PermissionError:
+        # If we cannot create or open the requested lock, fall back to /tmp (permissive)
+        tmp_path = Path("/tmp") / (lock_path.name)
+        tmp_path.parent.mkdir(parents=True, exist_ok=True)
+        fd = os.open(tmp_path, os.O_CREAT | os.O_RDWR, 0o664)
+        lock_file = os.fdopen(fd, "a+")
+        lock_path = tmp_path
+
     try:
+        try:
+            os.chmod(lock_path, 0o664)
+        except PermissionError:
+            pass
+
         if fcntl is not None:
             try:
-                fcntl.flock(f.fileno(), fcntl.LOCK_EX)
+                fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
             except Exception:
                 pass
         yield
@@ -136,12 +204,12 @@ def _cross_process_lock(lock_path: Path):
         try:
             if fcntl is not None:
                 try:
-                    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+                    fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
                 except Exception:
                     pass
         finally:
             try:
-                f.close()
+                lock_file.close()
             except Exception:
                 pass
 
@@ -192,145 +260,324 @@ def _atomic_write_state(state_path: Path, state: WorkspaceState) -> None:
             pass
         raise
 
-def get_workspace_state(workspace_path: str) -> WorkspaceState:
-    """Get the current workspace state, creating it if it doesn't exist.
+def get_workspace_state(
+    workspace_path: Optional[str] = None, repo_name: Optional[str] = None
+) -> WorkspaceState:
+    """Get the current workspace state, creating it if it doesn't exist."""
 
-    Uses a cross-process lock to avoid concurrent read-modify-write races across
-    multiple containers/processes.
-    """
-    lock = _get_state_lock(workspace_path)
+    workspace_path, repo_name = _resolve_repo_context(workspace_path, repo_name)
+
+    if is_multi_repo_mode() and repo_name is None:
+        print(
+            f"[workspace_state] Multi-repo: Skipping state read for workspace={workspace_path} without repo_name"
+        )
+        return {}
+
+    lock = _get_state_lock(workspace_path, repo_name)
     with lock:
-        state_path = _get_state_path(workspace_path)
-        lock_path = state_path.with_suffix(state_path.suffix + ".lock")
+        state_path: Path
+        lock_scope_path: Path
+
+        if is_multi_repo_mode() and repo_name:
+            state_dir = _get_repo_state_dir(repo_name)
+            state_dir.mkdir(parents=True, exist_ok=True)
+            state_path = state_dir / STATE_FILENAME
+            lock_scope_path = state_dir
+        else:
+            try:
+                state_path = _ensure_state_dir(workspace_path)
+                lock_scope_path = state_path.parent
+            except PermissionError:
+                lock_scope_path = _get_global_state_dir(workspace_path)
+                lock_scope_path.mkdir(parents=True, exist_ok=True)
+                state_path = lock_scope_path / STATE_FILENAME
+
+        lock_path = lock_scope_path / (STATE_FILENAME + ".lock")
         with _cross_process_lock(lock_path):
             if state_path.exists():
                 try:
-                    with open(state_path, 'r', encoding='utf-8') as f:
+                    with open(state_path, "r", encoding="utf-8") as f:
                         state = json.load(f)
-                        # Ensure required fields exist
-                        if not isinstance(state, dict):
-                            raise ValueError("Invalid state format")
-                        return state
+                        if isinstance(state, dict):
+                            return state
                 except (json.JSONDecodeError, ValueError, OSError):
-                    # Corrupted or invalid state file, recreate
                     pass
 
-            # Create new state
             now = datetime.now().isoformat()
-            env_coll = os.environ.get("COLLECTION_NAME", "").strip()
-            # Use env var if set, otherwise default to "codebase"
-            collection_name = env_coll if env_coll else "codebase"
+            collection_name = get_collection_name(repo_name)
 
             state: WorkspaceState = {
-                "workspace_path": str(Path(workspace_path).resolve()),
+                "workspace_path": str(Path(workspace_path or _resolve_workspace_root()).resolve()),
                 "created_at": now,
                 "updated_at": now,
                 "qdrant_collection": collection_name,
-                "indexing_status": {
-                    "state": "idle"
-                }
+                "indexing_status": {"state": "idle"},
             }
 
-            # Ensure directory exists and write state
-            state_path = _ensure_state_dir(workspace_path)
             _atomic_write_state(state_path, state)
             return state
 
-def update_workspace_state(workspace_path: str, updates: Dict[str, Any]) -> WorkspaceState:
-    """Update workspace state with the given changes.
 
-    Cross-process safe using an advisory lock file.
-    """
-    lock = _get_state_lock(workspace_path)
+def update_workspace_state(
+    workspace_path: Optional[str] = None,
+    updates: Optional[Dict[str, Any]] = None,
+    repo_name: Optional[str] = None,
+) -> WorkspaceState:
+    """Update workspace state with the given changes."""
+
+    workspace_path, repo_name = _resolve_repo_context(workspace_path, repo_name)
+    updates = updates or {}
+
+    if is_multi_repo_mode() and repo_name is None:
+        print(
+            f"[workspace_state] Multi-repo: Skipping state update for workspace={workspace_path} without repo_name"
+        )
+        return {}
+
+    lock = _get_state_lock(workspace_path, repo_name)
     with lock:
-        state_path = _ensure_state_dir(workspace_path)
-        lock_path = state_path.with_suffix(state_path.suffix + ".lock")
+        state = get_workspace_state(workspace_path, repo_name)
+        for key, value in updates.items():
+            if key in state or key in WorkspaceState.__annotations__:
+                state[key] = value
+
+        state["updated_at"] = datetime.now().isoformat()
+
+        if is_multi_repo_mode() and repo_name:
+            state_dir = _get_repo_state_dir(repo_name)
+            state_dir.mkdir(parents=True, exist_ok=True)
+            state_path = state_dir / STATE_FILENAME
+        else:
+            try:
+                state_path = _ensure_state_dir(workspace_path)
+            except PermissionError:
+                state_dir = _get_global_state_dir(workspace_path)
+                state_dir.mkdir(parents=True, exist_ok=True)
+                state_path = state_dir / STATE_FILENAME
+
+        _atomic_write_state(state_path, state)
+        return state
+
+def update_indexing_status(
+    workspace_path: Optional[str] = None,
+    status: Optional[IndexingStatus] = None,
+    repo_name: Optional[str] = None,
+) -> WorkspaceState:
+    """Update indexing status in workspace state."""
+    workspace_path, repo_name = _resolve_repo_context(workspace_path, repo_name)
+
+    if is_multi_repo_mode() and repo_name is None:
+        print(
+            f"[workspace_state] Multi-repo: Skipping indexing status update for workspace={workspace_path} without repo_name"
+        )
+        return {}
+
+    if status is None:
+        status = {"state": "idle"}
+
+    return update_workspace_state(
+        workspace_path=workspace_path,
+        updates={"indexing_status": status},
+        repo_name=repo_name,
+    )
+
+
+def update_repo_origin(
+    workspace_path: Optional[str] = None,
+    repo_name: Optional[str] = None,
+    *,
+    container_path: Optional[str] = None,
+    source_path: Optional[str] = None,
+    collection_name: Optional[str] = None,
+) -> WorkspaceState:
+    """Update origin metadata for a repository/workspace."""
+
+    resolved_workspace, resolved_repo = _resolve_repo_context(workspace_path, repo_name)
+
+    if is_multi_repo_mode() and resolved_repo is None:
+        return {}
+
+    state = get_workspace_state(resolved_workspace, resolved_repo)
+    if not state:
+        state = {}
+
+    origin: OriginInfo = dict(state.get("origin", {}))  # type: ignore[arg-type]
+    if resolved_repo:
+        origin["repo_name"] = resolved_repo
+    if container_path or workspace_path:
+        origin["container_path"] = container_path or workspace_path
+    if source_path:
+        origin["source_path"] = source_path
+    if collection_name:
+        origin["collection_name"] = collection_name
+    origin["updated_at"] = datetime.now().isoformat()
+
+    updates: Dict[str, Any] = {"origin": origin}
+    if collection_name:
+        updates.setdefault("qdrant_collection", collection_name)
+
+    return update_workspace_state(
+        workspace_path=resolved_workspace,
+        updates=updates,
+        repo_name=resolved_repo,
+    )
+
+
+def log_activity(
+    repo_name: Optional[str] = None,
+    action: Optional[ActivityAction] = None,
+    file_path: Optional[str] = None,
+    details: Optional[ActivityDetails] = None,
+    workspace_path: Optional[str] = None,
+) -> None:
+    """Log activity to workspace state."""
+
+    if not action:
+        return
+
+    activity = {
+        "timestamp": datetime.now().isoformat(),
+        "action": action,
+        "file_path": file_path,
+        "details": details or {},
+    }
+
+    resolved_workspace = workspace_path or _resolve_workspace_root()
+
+    if is_multi_repo_mode() and repo_name:
+        state_dir = _get_repo_state_dir(repo_name)
+        state_dir.mkdir(parents=True, exist_ok=True)
+        state_path = state_dir / STATE_FILENAME
+        lock_path = state_path.with_suffix(".lock")
+
         with _cross_process_lock(lock_path):
-            # Read current state (best-effort)
             try:
-                with open(state_path, 'r', encoding='utf-8') as f:
-                    state = json.load(f)
-                    if not isinstance(state, dict):
-                        state = {}
+                if state_path.exists():
+                    with open(state_path, "r", encoding="utf-8") as f:
+                        state = json.load(f)
+                else:
+                    state = {"created_at": datetime.now().isoformat()}
             except Exception:
-                state = {}
-
-            # Apply updates (preserve prior behavior: only known or existing keys)
-            for key, value in updates.items():
-                if key in state or key in WorkspaceState.__annotations__:
-                    state[key] = value
+                state = {"created_at": datetime.now().isoformat()}
 
-            # Always update timestamp
+            state["last_activity"] = activity
             state["updated_at"] = datetime.now().isoformat()
-
-            # Write back to file atomically
             _atomic_write_state(state_path, state)
-            return state
+    else:
+        update_workspace_state(
+            workspace_path=resolved_workspace,
+            updates={"last_activity": activity},
+            repo_name=repo_name,
+        )
 
-def update_indexing_status(workspace_path: str, status: IndexingStatus) -> WorkspaceState:
-    """Update the indexing status in workspace state."""
-    return update_workspace_state(workspace_path, {"indexing_status": status})
 
-def update_last_activity(workspace_path: str, activity: LastActivity) -> WorkspaceState:
-    """Update the last activity in workspace state."""
-    return update_workspace_state(workspace_path, {"last_activity": activity})
+def _generate_collection_name_from_repo(repo_name: str) -> str:
+    """Generate collection name with 8-char hash for local workspaces.
 
-def update_qdrant_stats(workspace_path: str, stats: QdrantStats) -> WorkspaceState:
-    """Update Qdrant statistics in workspace state."""
-    stats["last_updated"] = datetime.now().isoformat()
-    return update_workspace_state(workspace_path, {"qdrant_stats": stats})
+    Used by local indexer/watcher. Remote uploads use 16+8 char pattern
+    for collision avoidance when folder names may be identical.
+    """
+    hash_obj = hashlib.sha256(repo_name.encode())
+    short_hash = hash_obj.hexdigest()[:8]
+    return f"{repo_name}-{short_hash}"
 
-def get_collection_name(workspace_path: str) -> str:
-    """Get the Qdrant collection name for a workspace.
+def get_collection_name(repo_name: Optional[str] = None) -> str:
+    """Get collection name for repository or workspace."""
+    # In multi-repo mode, prioritize repo-specific collection names
+    if is_multi_repo_mode() and repo_name:
+        return _generate_collection_name_from_repo(repo_name)
 
-    Seamless single-collection mode:
-    - Defaults to "codebase" for unified cross-repo search
-    - All your code goes into one collection
-    - Override via COLLECTION_NAME env var if you need isolation
-    """
+    # Check environment for single-repo mode or fallback
     env_coll = os.environ.get("COLLECTION_NAME", "").strip()
+    if env_coll and env_coll not in PLACEHOLDER_COLLECTION_NAMES:
+        return env_coll
 
-    # Use env var if set, otherwise default to unified "codebase" collection
-    coll = env_coll if env_coll else "codebase"
+    # Use repo name if provided (for single-repo mode with repo name)
+    if repo_name:
+        return _generate_collection_name_from_repo(repo_name)
 
-    # Persist to state for consistency
-    update_workspace_state(workspace_path, {"qdrant_collection": coll})
-    return coll
+    # Default fallback
+    return "global-collection"
 
-# --- Persistent file-hash cache (.codebase/cache.json) ---
-CACHE_FILENAME = "cache.json"
+def _detect_repo_name_from_path(path: Path) -> str:
+    """Detect repository name from path. Clean, robust implementation."""
+    try:
+        resolved_path = path.resolve()
+    except Exception:
+        return None
+
+    candidate_roots: List[Path] = []
+    for root_str in (
+        os.environ.get("WATCH_ROOT"),
+        os.environ.get("WORKSPACE_PATH"),
+        "/work",
+        os.environ.get("HOST_ROOT"),
+        "/home/coder/project/Context-Engine/dev-workspace",
+    ):
+        if not root_str:
+            continue
+        try:
+            root_path = Path(root_str).resolve()
+        except Exception:
+            continue
+        if root_path not in candidate_roots:
+            candidate_roots.append(root_path)
+
+    for base in candidate_roots:
+        try:
+            rel_path = resolved_path.relative_to(base)
+        except ValueError:
+            continue
+
+        if not rel_path.parts:
+            continue
+
+        repo_name = rel_path.parts[0]
+        if repo_name in (".codebase", ".git", "__pycache__"):
+            continue
 
+        repo_path = base / repo_name
+        if repo_path.exists() or str(resolved_path).startswith(str(repo_path) + os.sep):
+            return repo_name
 
+    return None
+
+def _extract_repo_name_from_path(workspace_path: str) -> str:
+    """Extract repository name from workspace path."""
+    return _detect_repo_name_from_path(Path(workspace_path))
+
+# Cache functions for file hash tracking
 def _get_cache_path(workspace_path: str) -> Path:
-    ws = Path(workspace_path).resolve()
-    return ws / STATE_DIRNAME / CACHE_FILENAME
+    """Get the path to the cache.json file."""
+    workspace = Path(workspace_path).resolve()
+    return workspace / STATE_DIRNAME / CACHE_FILENAME
 
 
 def _read_cache(workspace_path: str) -> Dict[str, Any]:
-    """Best-effort load of the workspace cache (file hashes keyed by absolute path)."""
+    """Read cache file, return empty dict if it doesn't exist or is invalid."""
+
+    cache_path = _get_cache_path(workspace_path)
+    if not cache_path.exists():
+        return {"file_hashes": {}, "updated_at": datetime.now().isoformat()}
+
     try:
-        p = _get_cache_path(workspace_path)
-        if not p.exists():
-            return {"file_hashes": {}, "updated_at": datetime.now().isoformat()}
-        with open(p, "r", encoding="utf-8") as f:
+        with open(cache_path, "r", encoding="utf-8") as f:
             obj = json.load(f)
             if isinstance(obj, dict) and isinstance(obj.get("file_hashes"), dict):
                 return obj
-            return {"file_hashes": {}, "updated_at": datetime.now().isoformat()}
     except Exception:
-        return {"file_hashes": {}, "updated_at": datetime.now().isoformat()}
+        pass
+
+    return {"file_hashes": {}, "updated_at": datetime.now().isoformat()}
 
 
 def _write_cache(workspace_path: str, cache: Dict[str, Any]) -> None:
-    """Atomic write of cache file to avoid corruption under concurrency.
+    """Atomic write of cache file with cross-process locking."""
 
-    Uses both an in-process lock and a cross-process lock file to serialize writers.
-    """
     lock = _get_state_lock(workspace_path)
     with lock:
-        state_dir = Path(workspace_path).resolve() / STATE_DIRNAME
-        state_dir.mkdir(exist_ok=True)
         cache_path = _get_cache_path(workspace_path)
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
         lock_path = cache_path.with_suffix(cache_path.suffix + ".lock")
         with _cross_process_lock(lock_path):
             tmp = cache_path.with_suffix(f".tmp.{uuid.uuid4().hex[:8]}")
@@ -345,70 +592,90 @@ def _write_cache(workspace_path: str, cache: Dict[str, Any]) -> None:
                     pass
 
 
-def get_cached_file_hash(workspace_path: str, file_path: str) -> str:
-    """Return cached content hash for an absolute file path, or empty string."""
-    cache = _read_cache(workspace_path)
-    try:
-        return str((cache.get("file_hashes") or {}).get(str(Path(file_path).resolve()), ""))
-    except Exception:
-        return ""
+def get_cached_file_hash(file_path: str, repo_name: Optional[str] = None) -> str:
+    """Get cached file hash for tracking changes."""
+    if is_multi_repo_mode() and repo_name:
+        state_dir = _get_repo_state_dir(repo_name)
+        cache_path = state_dir / CACHE_FILENAME
 
+        if cache_path.exists():
+            try:
+                with open(cache_path, 'r', encoding='utf-8') as f:
+                    cache = json.load(f)
+                    file_hashes = cache.get("file_hashes", {})
+                    return file_hashes.get(str(Path(file_path).resolve()), "")
+            except Exception:
+                pass
+    else:
+        cache = _read_cache(_resolve_workspace_root())
+        return cache.get("file_hashes", {}).get(str(Path(file_path).resolve()), "")
 
-def set_cached_file_hash(workspace_path: str, file_path: str, file_hash: str) -> None:
-    """Set cached content hash for an absolute file path and persist immediately."""
-    lock = _get_state_lock(workspace_path)
-    with lock:
-        cache = _read_cache(workspace_path)
-        fh = cache.setdefault("file_hashes", {})
-        fh[str(Path(file_path).resolve())] = str(file_hash)
-        cache["updated_at"] = datetime.now().isoformat()
-        _write_cache(workspace_path, cache)
+    return ""
 
 
-def remove_cached_file(workspace_path: str, file_path: str) -> None:
-    """Remove a file entry from the cache and persist."""
-    lock = _get_state_lock(workspace_path)
-    with lock:
-        cache = _read_cache(workspace_path)
-        fh = cache.setdefault("file_hashes", {})
+def set_cached_file_hash(file_path: str, file_hash: str, repo_name: Optional[str] = None) -> None:
+    """Set cached file hash for tracking changes."""
+
+    fp = str(Path(file_path).resolve())
+
+    if is_multi_repo_mode() and repo_name:
+        state_dir = _get_repo_state_dir(repo_name)
+        cache_path = state_dir / CACHE_FILENAME
+        state_dir.mkdir(parents=True, exist_ok=True)
+
         try:
-            fp = str(Path(file_path).resolve())
-        except Exception:
-            fp = str(file_path)
-        if fp in fh:
-            fh.pop(fp, None)
+            if cache_path.exists():
+                with open(cache_path, "r", encoding="utf-8") as f:
+                    cache = json.load(f)
+            else:
+                cache = {"file_hashes": {}, "created_at": datetime.now().isoformat()}
+
+            cache.setdefault("file_hashes", {})[fp] = file_hash
             cache["updated_at"] = datetime.now().isoformat()
-            _write_cache(workspace_path, cache)
 
-def list_workspaces(search_root: Optional[str] = None) -> List[Dict[str, Any]]:
-    """Find all workspaces with .codebase/state.json files."""
-    if search_root is None:
-        search_root = os.getcwd()
+            _atomic_write_state(cache_path, cache)  # reuse atomic writer for files
+        except Exception:
+            pass
+        return
+
+    cache = _read_cache(_resolve_workspace_root())
+    cache.setdefault("file_hashes", {})[fp] = file_hash
+    cache["updated_at"] = datetime.now().isoformat()
+    _write_cache(_resolve_workspace_root(), cache)
 
-    workspaces = []
-    search_path = Path(search_root).resolve()
 
-    # Search for .codebase directories
-    for state_dir in search_path.rglob(STATE_DIRNAME):
-        state_file = state_dir / STATE_FILENAME
-        if state_file.exists():
+def remove_cached_file(file_path: str, repo_name: Optional[str] = None) -> None:
+    """Remove file entry from cache."""
+    if is_multi_repo_mode() and repo_name:
+        state_dir = _get_repo_state_dir(repo_name)
+        cache_path = state_dir / CACHE_FILENAME
+
+        if cache_path.exists():
             try:
-                workspace_path = str(state_dir.parent)
-                state = get_workspace_state(workspace_path)
-                workspaces.append({
-                    "workspace_path": workspace_path,
-                    "collection_name": state.get("qdrant_collection"),
-                    "last_updated": state.get("updated_at"),
-                    "indexing_state": state.get("indexing_status", {}).get("state", "unknown")
-                })
+                with open(cache_path, 'r', encoding='utf-8') as f:
+                    cache = json.load(f)
+                    file_hashes = cache.get("file_hashes", {})
+
+                fp = str(Path(file_path).resolve())
+                if fp in file_hashes:
+                    file_hashes.pop(fp, None)
+                    cache["updated_at"] = datetime.now().isoformat()
+
+                    _atomic_write_state(cache_path, cache)
             except Exception:
-                # Skip corrupted state files
-                continue
+                pass
+        return
+
+    cache = _read_cache(_resolve_workspace_root())
+    fp = str(Path(file_path).resolve())
+    if fp in cache.get("file_hashes", {}):
+        cache["file_hashes"].pop(fp, None)
+        cache["updated_at"] = datetime.now().isoformat()
+        _write_cache(_resolve_workspace_root(), cache)
 
-    return sorted(workspaces, key=lambda x: x.get("last_updated", ""), reverse=True)
 
-def cleanup_old_state_locks(max_idle_seconds: int = 900) -> int:
-    """Best-effort cleanup of idle workspace locks.
+def cleanup_old_cache_locks(max_idle_seconds: int = 900) -> int:
+    """Best-effort cleanup of idle cache locks.
 
     Removes locks that have been idle (not requested via _get_state_lock) for longer than max_idle_seconds
     and whose lock can be acquired without blocking (i.e., not held).
@@ -446,14 +713,67 @@ def cleanup_old_state_locks(max_idle_seconds: int = 900) -> int:
             removed += 1
     return removed
 
-if __name__ == "__main__":
-    # Simple CLI for testing
-    import sys
-    if len(sys.argv) > 1:
-        workspace = sys.argv[1]
-        state = get_workspace_state(workspace)
-        print(json.dumps(state, indent=2))
-    else:
-        workspaces = list_workspaces()
-        for ws in workspaces:
-            print(f"{ws['workspace_path']}: {ws['collection_name']} ({ws['indexing_state']})")
+
+def get_collection_mappings(search_root: Optional[str] = None) -> List[Dict[str, Any]]:
+    """Enumerate collection mappings with origin metadata."""
+
+    root_path = Path(search_root or _resolve_workspace_root()).resolve()
+    mappings: List[Dict[str, Any]] = []
+
+    try:
+        if is_multi_repo_mode():
+            repos_root = root_path / STATE_DIRNAME / "repos"
+            if repos_root.exists():
+                for repo_dir in sorted(p for p in repos_root.iterdir() if p.is_dir()):
+                    repo_name = repo_dir.name
+                    state_path = repo_dir / STATE_FILENAME
+                    if not state_path.exists():
+                        continue
+                    try:
+                        with open(state_path, "r", encoding="utf-8") as f:
+                            state = json.load(f) or {}
+                    except Exception:
+                        continue
+
+                    origin = state.get("origin", {}) or {}
+                    mappings.append(
+                        {
+                            "repo_name": repo_name,
+                            "collection_name": state.get("qdrant_collection")
+                            or get_collection_name(repo_name),
+                            "container_path": origin.get("container_path")
+                            or str((Path(_resolve_workspace_root()) / repo_name).resolve()),
+                            "source_path": origin.get("source_path"),
+                            "state_file": str(state_path),
+                            "updated_at": state.get("updated_at"),
+                        }
+                    )
+        else:
+            state_path = root_path / STATE_DIRNAME / STATE_FILENAME
+            if state_path.exists():
+                try:
+                    with open(state_path, "r", encoding="utf-8") as f:
+                        state = json.load(f) or {}
+                except Exception:
+                    state = {}
+
+                origin = state.get("origin", {}) or {}
+                repo_name = origin.get("repo_name") or Path(root_path).name
+                mappings.append(
+                    {
+                        "repo_name": repo_name,
+                        "collection_name": state.get("qdrant_collection")
+                        or get_collection_name(repo_name),
+                        "container_path": origin.get("container_path")
+                        or str(root_path),
+                        "source_path": origin.get("source_path"),
+                        "state_file": str(state_path),
+                        "updated_at": state.get("updated_at"),
+                    }
+                )
+    except Exception:
+        return mappings
+
+    return mappings
+
+# Add missing functions that callers expect (already defined above)
\ No newline at end of file
diff --git a/tests/test_change_history_for_path.py b/tests/test_change_history_for_path.py
index a0f9c046..52be7592 100644
--- a/tests/test_change_history_for_path.py
+++ b/tests/test_change_history_for_path.py
@@ -16,7 +16,13 @@ def _decorator(fn):
             return fn
         return _decorator
 
+class _Context:
+    def __init__(self, *args, **kwargs):
+        # Tests only access .session when present; keep permissive defaults
+        self.session = kwargs.get("session")
+
 setattr(fastmcp_pkg, "FastMCP", _FastMCP)
+setattr(fastmcp_pkg, "Context", _Context)
 sys.modules.setdefault("mcp", mcp_pkg)
 sys.modules.setdefault("mcp.server", server_pkg)
 sys.modules.setdefault("mcp.server.fastmcp", fastmcp_pkg)
diff --git a/tests/test_hybrid_cli_json.py b/tests/test_hybrid_cli_json.py
index 5fb5be79..4a3cf480 100644
--- a/tests/test_hybrid_cli_json.py
+++ b/tests/test_hybrid_cli_json.py
@@ -28,7 +28,7 @@ def __init__(self, *args, **kwargs):
             self.args = args
             self.kwargs = kwargs
 
-    def fake_dense_query(client, vec_name, vector, flt, per_query):
+    def fake_dense_query(client, vec_name, vector, flt, per_query, collection_name=None):
         md = {
             "path": "/work/pkg/a.py",
             "symbol": "foo",
diff --git a/tests/test_reranker_verification.py b/tests/test_reranker_verification.py
index b7c24123..2642c65f 100644
--- a/tests/test_reranker_verification.py
+++ b/tests/test_reranker_verification.py
@@ -17,7 +17,12 @@ def _decorator(fn):
             return fn
         return _decorator
 
+class _Context:
+    def __init__(self, *args, **kwargs):
+        self.session = kwargs.get("session")
+
 setattr(fastmcp_pkg, "FastMCP", _FastMCP)
+setattr(fastmcp_pkg, "Context", _Context)
 sys.modules.setdefault("mcp", mcp_pkg)
 sys.modules.setdefault("mcp.server", server_pkg)
 sys.modules.setdefault("mcp.server.fastmcp", fastmcp_pkg)
@@ -25,6 +30,20 @@ def _decorator(fn):
 srv = importlib.import_module("scripts.mcp_indexer_server")
 
 
+def _make_hybrid_stub(fake_run):
+    mod = types.ModuleType("scripts.hybrid_search")
+    mod.run_hybrid_search = fake_run
+    mod.lang_matches_path = lambda path, lang=None: True
+    mod._merge_and_budget_spans = lambda spans, *args, **kwargs: spans
+    mod.TextEmbedding = object
+    mod.QdrantClient = object
+    return mod
+
+
+def _fake_embedding_model(*args, **kwargs):
+    return object()
+
+
 @pytest.mark.service
 @pytest.mark.anyio
 async def test_rerank_inproc_changes_order(monkeypatch):
@@ -58,17 +77,32 @@ def fake_rerank_local(pairs):
 
     # Patch hybrid and rerank
     monkeypatch.setenv("EMBEDDING_MODEL", "BAAI/bge-base-en-v1.5")
+    monkeypatch.setitem(sys.modules, "scripts.hybrid_search", _make_hybrid_stub(fake_run_hybrid_search))
+    monkeypatch.delitem(sys.modules, "scripts.mcp_indexer_server", raising=False)
+    server = importlib.import_module("scripts.mcp_indexer_server")
+    monkeypatch.setattr(
+        server,
+        "_get_embedding_model",
+        _fake_embedding_model,
+    )
+    monkeypatch.setattr(
+        server,
+        "run_hybrid_search",
+        fake_run_hybrid_search,
+        raising=False,
+    )
     monkeypatch.setattr(
-        importlib.import_module("scripts.hybrid_search"), "run_hybrid_search", fake_run_hybrid_search
+        importlib.import_module("scripts.rerank_local"),
+        "rerank_local",
+        fake_rerank_local,
     )
-    monkeypatch.setattr(importlib.import_module("scripts.rerank_local"), "rerank_local", fake_rerank_local)
 
     # Baseline (rerank disabled) preserves hybrid order A then B
-    base = await srv.repo_search(query="q", limit=2, per_path=2, rerank_enabled=False, compact=True)
+    base = await server.repo_search(query="q", limit=2, per_path=2, rerank_enabled=False, compact=True)
     assert [r["path"] for r in base["results"]] == ["/work/a.py", "/work/b.py"]
 
     # With rerank enabled, order should flip to B then A; counters should show inproc_hybrid
-    rr = await srv.repo_search(query="q", limit=2, per_path=2, rerank_enabled=True, compact=True)
+    rr = await server.repo_search(query="q", limit=2, per_path=2, rerank_enabled=True, compact=True)
     assert rr.get("used_rerank") is True
     assert rr.get("rerank_counters", {}).get("inproc_hybrid", 0) >= 1
     assert [r["path"] for r in rr["results"]] == ["/work/b.py", "/work/a.py"]
@@ -91,12 +125,19 @@ async def fake_run_async(cmd, env=None, timeout=None):
         # Simulate subprocess reranker timing out
         return {"ok": False, "code": -1, "stdout": "", "stderr": f"Command timed out after {timeout}s"}
 
+    monkeypatch.setitem(sys.modules, "scripts.hybrid_search", _make_hybrid_stub(fake_run_hybrid_search))
+    monkeypatch.delitem(sys.modules, "scripts.mcp_indexer_server", raising=False)
+    server = importlib.import_module("scripts.mcp_indexer_server")
     monkeypatch.setattr(
-        importlib.import_module("scripts.hybrid_search"), "run_hybrid_search", fake_run_hybrid_search
+        server,
+        "run_hybrid_search",
+        fake_run_hybrid_search,
+        raising=False,
     )
-    monkeypatch.setattr(srv, "_run_async", fake_run_async)
+    monkeypatch.setattr(server, "_get_embedding_model", _fake_embedding_model)
+    monkeypatch.setattr(server, "_run_async", fake_run_async)
 
-    rr = await srv.repo_search(query="q", limit=2, per_path=2, rerank_enabled=True, compact=True)
+    rr = await server.repo_search(query="q", limit=2, per_path=2, rerank_enabled=True, compact=True)
     # Fallback should keep original order from hybrid; timeout counter incremented
     assert rr.get("used_rerank") is False
     assert rr.get("rerank_counters", {}).get("timeout", 0) >= 1