Context-Engine-AI · voarsh2 · Nov 25, 2025 · Nov 22, 2025 · Nov 22, 2025 · Nov 24, 2025
diff --git a/.env.example b/.env.example
@@ -1,6 +1,6 @@
 # Qdrant connection
 QDRANT_URL=http://localhost:6333
-QDRANT_API_KEY=
+# QDRANT_API_KEY=
 
 # Multi-repo mode: 0=single-repo (default), 1=multi-repo
 # Single-repo: All files go into one collection (COLLECTION_NAME)
@@ -117,6 +117,9 @@ REFRAG_ENCODER_MODEL=BAAI/bge-base-en-v1.5
 REFRAG_PHI_PATH=/work/models/refrag_phi_768_to_dmodel.json
 REFRAG_SENSE=heuristic
 
+# Enable index-time pseudo descriptions for micro-chunks (requires REFRAG_DECODER)
+# REFRAG_PSEUDO_DESCRIBE=1
+
 # Llama.cpp sidecar (optional)
 # Docker CPU-only (stable): http://llamacpp:8080
 # Native GPU-accelerated (fast): http://localhost:8081

diff --git a/ctx-hook-simple.sh b/ctx-hook-simple.sh
@@ -6,41 +6,108 @@
 # Read JSON input from stdin
 INPUT=$(cat)
 
-# Extract the user message using jq
+# Extract the prompt text from Claude's JSON payload
 if command -v jq >/dev/null 2>&1; then
-    USER_MESSAGE=$(echo "$INPUT" | jq -r '.user_message')
+	USER_MESSAGE=$(echo "$INPUT" | jq -r '.prompt')
+	USER_CWD=$(echo "$INPUT" | jq -r '.cwd // empty')
 else
-    echo "$INPUT"
-    exit 0
+	# Fallback: treat entire input as the prompt text
+	USER_MESSAGE="$INPUT"
 fi
 
 # Skip if empty message
 if [ -z "$USER_MESSAGE" ] || [ "$USER_MESSAGE" = "null" ]; then
-    echo "$INPUT"
-    exit 0
-fi
-
-# Easy bypass patterns - any of these will skip ctx enhancement
-if [[ "$USER_MESSAGE" =~ ^(noctx|raw|bypass|skip|no-enhance): ]] || \
-   [[ "$USER_MESSAGE" =~ ^\\ ]] || \
-   [[ "$USER_MESSAGE" =~ ^\< ]] || \
-   [[ "$USER_MESSAGE" =~ ^(/help|/clear|/exit|/quit) ]] || \
-   [[ "$USER_MESSAGE" =~ ^\?\s*$ ]] || \
-   [ ${#USER_MESSAGE} -lt 12 ]; then
-    echo "$INPUT"
-    exit 0
+	echo "$INPUT"
+	exit 0
 fi
 
 # Set working directory to where the hook script is located
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Determine workspace directory:
+# - If CTX_WORKSPACE_DIR is already set, honor it.
+# - If running from an embedded extension under ~/.windsurf-server/extensions,
+#   default to the caller's CWD (Claude/VS Code workspace root).
+# - Otherwise (repo-local hook), default to the script directory so it works
+#   even when Claude runs from a parent folder.
+if [ -n "${CTX_WORKSPACE_DIR:-}" ]; then
+	WORKSPACE_DIR="$CTX_WORKSPACE_DIR"
+elif [[ "$SCRIPT_DIR" == */.windsurf-server/extensions/* ]]; then
+	WORKSPACE_DIR="$PWD"
+else
+	WORKSPACE_DIR="$SCRIPT_DIR"
+fi
+export CTX_WORKSPACE_DIR="$WORKSPACE_DIR"
+
+# If the workspace root does not contain ctx_config.json, but exactly one
+# direct child directory does, treat that child directory as the effective
+# workspace. This supports multi-repo workspaces where the ctx-enabled repo
+# (with ctx_config.json and .env) lives one level below the VS Code root.
+if [ ! -f "$WORKSPACE_DIR/ctx_config.json" ]; then
+	FOUND_SUBDIR=""
+	for candidate in "$WORKSPACE_DIR"/*; do
+		if [ -d "$candidate" ] && [ -f "$candidate/ctx_config.json" ]; then
+			if [ -z "$FOUND_SUBDIR" ]; then
+				FOUND_SUBDIR="$candidate"
+			else
+				# More than one candidate; ambiguous, keep original WORKSPACE_DIR
+				FOUND_SUBDIR=""
+				break
+			fi
+		fi
+	done
+	if [ -n "$FOUND_SUBDIR" ]; then
+		WORKSPACE_DIR="$FOUND_SUBDIR"
+		export CTX_WORKSPACE_DIR="$WORKSPACE_DIR"
+	fi
+fi
+
+# Prefer workspace-level ctx_config.json, fall back to one next to the script
+if [ -f "$WORKSPACE_DIR/ctx_config.json" ]; then
+	CONFIG_FILE="$WORKSPACE_DIR/ctx_config.json"
+elif [ -f "$SCRIPT_DIR/ctx_config.json" ]; then
+	CONFIG_FILE="$SCRIPT_DIR/ctx_config.json"
+else
+	CONFIG_FILE=""
+fi
+
+# Optional: enable file logging when CTX_HOOK_LOG=1 or a .ctx_hook_log marker
+# file exists in the workspace. When disabled, no log file is written.
+if [ "${CTX_HOOK_LOG:-0}" = "1" ] || [ -f "$WORKSPACE_DIR/.ctx_hook_log" ]; then
+	LOG_FILE="$WORKSPACE_DIR/ctx-hook.log"
+	LOG_ENABLED=1
+else
+	LOG_ENABLED=0
+fi
+
 cd "$SCRIPT_DIR"
 
+# Optional: enable extra debug information in the JSON payload
+# when CTX_HOOK_DEBUG=1 is set in the environment, or when a
+# .ctx_hook_debug marker file exists in the workspace.
+CTX_HOOK_DEBUG="${CTX_HOOK_DEBUG:-}"
+if [ -z "$CTX_HOOK_DEBUG" ] && [ -f "$WORKSPACE_DIR/.ctx_hook_debug" ]; then
+	CTX_HOOK_DEBUG="1"
+fi
+
+# Log the incoming payload when logging is enabled
+if [ "$LOG_ENABLED" = "1" ]; then
+	{
+		echo "[$(date -Iseconds)] HOOK INVOKED"
+		echo "PWD   = $PWD"
+		echo "WORKSPACE_DIR = $WORKSPACE_DIR"
+		echo "INPUT = <<EOF"
+		echo "$INPUT"
+		echo "EOF"
+		echo
+	} >> "$LOG_FILE"
+fi
+
 # Read all settings from ctx_config.json
-CONFIG_FILE="ctx_config.json"
-if [ -f "$CONFIG_FILE" ]; then
-    CTX_COLLECTION=$(grep -o '"default_collection"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"default_collection"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+if [ -n "$CONFIG_FILE" ] && [ -f "$CONFIG_FILE" ]; then
+    CTX_COLLECTION=$(grep -o '"default_collection"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"default_collection"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' )
     REFRAG_RUNTIME=$(grep -o '"refrag_runtime"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"refrag_runtime"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' || echo "glm")
-    GLM_API_KEY=$(grep -o '"glm_api_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_key"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+    GLM_API_KEY=$(grep -o '"glm_api_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_key"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' )
     GLM_API_BASE=$(grep -o '"glm_api_base"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_base"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
     GLM_MODEL=$(grep -o '"glm_model"[[:space:]]*:[[:space:]]*"[^\"]*"' "$CONFIG_FILE" | sed 's/.*"glm_model"[[:space:]]*:[[:space:]]*"\([^\"]*\)".*/\1/' || echo "glm-4.6")
     CTX_DEFAULT_MODE=$(grep -o '"default_mode"[[:space:]]*:[[:space:]]*"[^\"]*"' "$CONFIG_FILE" | sed 's/.*"default_mode"[[:space:]]*:[[:space:]]*"\([^\"]*\)".*/\1/')
@@ -65,8 +132,39 @@ CTX_REWRITE_MAX_TOKENS=${CTX_REWRITE_MAX_TOKENS:-320}
 # Export GLM/context environment variables from config
 export REFRAG_RUNTIME GLM_API_KEY GLM_API_BASE GLM_MODEL CTX_REQUIRE_CONTEXT CTX_RELEVANCE_GATE CTX_MIN_RELEVANCE CTX_REWRITE_MAX_TOKENS
 
-# Build ctx command with optional mode flag
-CTX_CMD=(python3 scripts/ctx.py)
+# Easy bypass patterns - any of these will skip ctx enhancement
+BYPASS_REASON=""
+if [[ "$USER_MESSAGE" =~ ^(noctx|raw|bypass|skip|no-enhance): ]]; then
+	BYPASS_REASON="prefix_tag"
+elif [[ "$USER_MESSAGE" =~ ^\\ ]]; then
+	BYPASS_REASON="leading_backslash"
+elif [[ "$USER_MESSAGE" =~ ^\< ]]; then
+	BYPASS_REASON="leading_angle_bracket"
+elif [[ "$USER_MESSAGE" =~ ^(/help|/clear|/exit|/quit) ]]; then
+	BYPASS_REASON="slash_command"
+elif [[ "$USER_MESSAGE" =~ ^\?\s*$ ]]; then
+	BYPASS_REASON="short_question_mark"
+elif [ ${#USER_MESSAGE} -lt 12 ]; then
+	BYPASS_REASON="too_short"
+fi
+
+if [ -n "$BYPASS_REASON" ]; then
+	if [ "$CTX_HOOK_DEBUG" = "1" ]; then
+		echo "[ctx_debug status=bypassed reason=$BYPASS_REASON script_dir=$SCRIPT_DIR workspace_dir=$WORKSPACE_DIR config_file=$CONFIG_FILE] $USER_MESSAGE"
+	else
+		echo "$USER_MESSAGE"
+	fi
+	exit 0
+fi
+
+# Build ctx command with optional unicorn flag
+if [ -f "$SCRIPT_DIR/ctx.py" ]; then
+	# Use embedded ctx.py when running from the packaged extension
+	CTX_CMD=(python3 "$SCRIPT_DIR/ctx.py")
+else
+	# Fallback for repo-local usage
+	CTX_CMD=(python3 scripts/ctx.py)
+fi
 case "${CTX_DEFAULT_MODE,,}" in
 	unicorn)
 		CTX_CMD+=("--unicorn")
@@ -77,8 +175,49 @@ case "${CTX_DEFAULT_MODE,,}" in
 esac
 CTX_CMD+=("$USER_MESSAGE" --collection "$CTX_COLLECTION")
 
-# Run ctx with collection (extended timeout for multi-pass unicorn mode)
-ENHANCED=$(timeout 60 "${CTX_CMD[@]}" 2>/dev/null || echo "$USER_MESSAGE")
+# Run ctx with collection
+# When CTX_DEBUG_PATHS is enabled, preserve stderr so path-level debug from ctx.py is visible
+if [ -n "${CTX_DEBUG_PATHS:-}" ]; then
+	ENHANCED=$(timeout 120s "${CTX_CMD[@]}" 2>&1 || echo "$USER_MESSAGE")
+else
+	ENHANCED=$(timeout 120s "${CTX_CMD[@]}" 2>/dev/null || echo "$USER_MESSAGE")
+fi
+
+if [ -n "$WORKSPACE_DIR" ] && [ "${CTX_ROOT_HINT:-1}" != "0" ]; then
+	HINT="The user's project root directory is \"$WORKSPACE_DIR\" (WORKSPACE_DIR)."
+	if [ "${CTX_SURFACE_COLLECTION_HINT:-0}" = "1" ] && [ -n "$CTX_COLLECTION" ]; then
+		HINT="$HINT The Qdrant collection name for this workspace is \"$CTX_COLLECTION\". Specify this collection when using memory or qdrant-indexer MCP tool (if available)."
+	fi
+	if [ -n "${USER_CWD:-}" ]; then
+		HINT="$HINT Claude's current working directory is \"$USER_CWD\" (user_cwd). \
+When using tools like Read, Search, or Bash, treat WORKSPACE_DIR as the root \
+for repository files. If WORKSPACE_DIR and user_cwd differ, do not assume \
+files live under user_cwd; use the full paths under WORKSPACE_DIR or the \
+project-relative paths shown above."
+	fi
+	ENHANCED="$HINT
+
+$ENHANCED"
+fi
+
+# Log ctx output when logging is enabled
+if [ "$LOG_ENABLED" = "1" ]; then
+	{
+		echo "[$(date -Iseconds)] CTX_OUTPUT"
+		echo "PROMPT   = $USER_MESSAGE"
+		echo "ENHANCED = <<EOF"
+		echo "$ENHANCED"
+		echo "EOF"
+		echo
+	} >> "$LOG_FILE"
+fi
 
-# Replace user message with enhanced version using jq
-echo "$INPUT" | jq --arg enhanced "$ENHANCED" '.user_message = $enhanced'
+if [ "$CTX_HOOK_DEBUG" = "1" ]; then
+	HOOK_STATUS="unchanged"
+	if [ "$ENHANCED" != "$USER_MESSAGE" ]; then
+		HOOK_STATUS="enhanced"
+	fi
+	echo "[ctx_debug status=$HOOK_STATUS script_dir=$SCRIPT_DIR workspace_dir=$WORKSPACE_DIR config_file=$CONFIG_FILE] $ENHANCED"
+else
+	echo "$ENHANCED"
+fi
diff --git a/ctx_config.example.json b/ctx_config.example.json
@@ -11,6 +11,9 @@
   "streaming": true,
   "require_context": true,
   "relevance_gate_enabled": false,
-  "min_relevance": 0.1
+  "min_relevance": 0.1,
+  "rewrite_max_tokens": 420,
+  "surface_qdrant_collection_hint": true
+
 }
 
diff --git a/docker-compose.dev-remote.yml b/docker-compose.dev-remote.yml
@@ -349,6 +349,9 @@ services:
       - REMOTE_UPLOAD_ENABLED=1
       - REMOTE_UPLOAD_MODE=development
       - REMOTE_UPLOAD_DEBUG=1
+      - REMOTE_UPLOAD_TIMEOUT=300
+      - REMOTE_UPLOAD_MAX_RETRIES=5
+      - MAX_BUNDLE_SIZE_MB=256
 
       # Qdrant configuration
       - QDRANT_TIMEOUT=${QDRANT_TIMEOUT}

diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md
@@ -0,0 +1,128 @@
+This file is intended for AI agents (Claude, etc.) using the Context‑Engine Qdrant‑Indexer and Memory MCP tools. It encodes project‑specific best practices; adapt it per‑repo.
+
+
+Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep
+
+  Core Decision Rules (for AI agents)
+
+  - Use MCP Qdrant-Indexer when:
+    - You are exploring or don't know exact strings/symbols.
+    - You need semantic or cross-file understanding (relationships, patterns, architecture).
+    - You want ranked results with surrounding context, not just line hits.
+
+  - Use grep when:
+    - You know the exact string/function/variable or error message.
+    - You need fast literal search or are extremely token/latency constrained.
+
+  Quick Heuristics:
+
+  - If you know the exact string → start with grep, then switch to MCP for broader context.
+  - If the question is conceptual/architectural → start with MCP.
+  - If you need rich context/snippets around matches → MCP.
+  - If you just need to confirm existence/location → grep.
+
+  Grep Anti-Patterns:
+
+  # DON'T - Wasteful when semantic search needed
+  grep -r "auth" .                    # → Use MCP: "authentication mechanisms"
+  grep -r "cache" .                   # → Use MCP: "caching strategies"
+  grep -r "error" .                   # → Use MCP: "error handling patterns"
+  grep -r "database" .                # → Use MCP: "database operations"
+
+  # DO - Efficient for exact matches
+  grep -rn "UserAlreadyExists" .      # Specific error class
+  grep -rn "def authenticate_user" .  # Exact function name
+  grep -rn "REDIS_HOST" .            # Exact environment variable
+
+  MCP Tool Patterns:
+
+  # DO - Use concept/keyword-style queries (short natural-language fragments)
+  "input validation mechanisms"
+  "database connection handling"
+  "performance bottlenecks in request path"
+  "places where user sessions are managed"
+  "logging and error reporting patterns"
+
+  MCP Qdrant-Indexer Specific Knobs
+
+  Essential Parameters:
+
+  - limit: Control result count (3-8 for efficiency)
+  - per_path: Limit results per file (1-2 prevents redundancy)
+  - compact=true: Reduces token usage by 60-80%
+  - include_snippet=false: Headers only when speed matters
+  - collection: Target specific codebases for precision
+
+  Performance Optimization:
+
+  - Start with limit=3, compact=true for discovery
+  - Increase to limit=5, include_snippet=true for details
+  - Use language and under filters to narrow scope
+  - Set rerank_enabled=false for faster but less accurate results
+
+  When to Use Advanced Features:
+
+  - rerank_enabled=true: For complex queries needing best relevance
+  - context_lines=5+: When you need implementation details
+  - multiple collections: Cross-repo architectural analysis
+  - symbol filtering: When looking for specific function/class types
+
+  Anti-Patterns to Avoid:
+
+  - Don't use limit=20 with include_snippet=true (token waste)
+  - Don't search without collection specification (noise)
+  - Don't ignore per_path limits (duplicate results from same file)
+  - Don't use context lines for pure discovery (unnecessary tokens)
+
+  Tool Roles Cheat Sheet:
+
+  - repo_search / code_search:
+    - Use for: finding relevant files/spans and inspecting raw code.
+    - Think: "where is X implemented?", "show me usages of Y".
+  - context_search:
+    - Use for: combining code hits with memory/docs when both matter.
+    - Good for: "give me related code plus any notes/docs I wrote".
+  - context_answer:
+    - Use for: short natural-language summaries/explanations of specific modules or tools, grounded in code/docs with citations.
+    - Good for: "What does scripts/standalone_upload_client.py do at a high level?", "Summarize the remote upload client pipeline.".
+
+  Query Phrasing Tips for context_answer:
+
+  - Prefer behavior/architecture questions about a single module or tool:
+    - "What does scripts/standalone_upload_client.py do at a high level?"
+    - "Summarize how the remote upload client interacts with the indexer service."
+  - If you care about a specific file, mention it explicitly:
+    - "What does ingest_code.py do?", "Explain ensureIndexedWatcher in extension.js".
+  - Mentioning a specific filename can bias retrieval to that file; for cross-file wiring
+    questions, prefer behavior-describing queries without filenames.
+  - For very cross-file or multi-part questions, you can:
+    - First use repo_search to discover key files and read critical code directly,
+    - Then call context_answer to summarize behavior, using a behavior-focused question that doesn't over-specify filenames.
+  - Avoid using context_answer as a primary debugger for low-level helper/env behavior; prefer repo_search + direct code reading for detailed semantics.
+
+  Remember: the MCP tools themselves expose detailed descriptions and parameter docs.
+  Use those for exact knobs; this guide is about choosing the right tool and shaping good queries.
+
+  MCP Tool Families (for AI agents)
+
+  - Indexer / Qdrant tools:
+    - qdrant_index_root, qdrant_index, qdrant_prune
+    - qdrant_list, qdrant_status
+    - workspace_info, list_workspaces, collection_map
+    - set_session_defaults
+  - Search / QA tools:
+    - repo_search, code_search, context_search, context_answer
+    - search_tests_for, search_config_for, search_callers_for, search_importers_for
+    - change_history_for_path, expand_query
+  - Memory tools:
+    - memory.set_session_defaults, memory.store, memory.find
+
+  Additional behavioral tips:
+
+  - Call set_session_defaults (indexer and memory) early in a session so subsequent
+    calls inherit the right collection without repeating it in every request.
+  - Use context_search with include_memories and per_source_limits when you want
+    blended code + memory results instead of calling repo_search and memory.find
+    separately.
+  - Treat expand_query and the expand flag on context_answer as expensive options:
+    only use them after a normal search/answer attempt failed to find good context.