From 0bd6838b37510ee15509f346d77fc23961fc7bb1 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Sat, 22 Nov 2025 14:43:07 +0000
Subject: [PATCH 01/19] feat(vscode-ext): add MCP config writing for Claude
 Code and Windsurf

- Add `Write MCP Config (.mcp.json)` command to write project-local MCP server entries
- Support both Claude Code (.mcp.json) and Windsurf (mcp_config.json) MCP configurations
- Add settings for mcpIndexerUrl, mcpMemoryUrl, mcpClaudeEnabled, and mcpWindsurfEnabled
- Auto-update MCP configs when relevant settings change
- Handle Windows cmd wrapper for npx mcp-remote in Claude configs
- Support custom Windsurf MCP path
---
 .../context-engine-uploader/README.md         |   2 +
 .../context-engine-uploader/extension.js      | 177 +++++++++++++++++-
 .../context-engine-uploader/package.json      |  42 ++++-
 3 files changed, 204 insertions(+), 17 deletions(-)

diff --git a/vscode-extension/context-engine-uploader/README.md b/vscode-extension/context-engine-uploader/README.md
index 65d9d5bb..a3decdc2 100644
--- a/vscode-extension/context-engine-uploader/README.md
+++ b/vscode-extension/context-engine-uploader/README.md
@@ -18,11 +18,13 @@ Configuration
 - `Target Path` is auto-filled from the workspace but can be overridden if you need to upload a different folder.
 - **Python dependencies:** the extension runs the standalone upload client via your configured `pythonPath`. Ensure the interpreter has `requests`, `urllib3`, and `charset_normalizer` installed. Run `python3 -m pip install requests urllib3 charset_normalizer` (or replace `python3` with your configured path) before starting the uploader.
 - **Path mapping:** `Host Root` + `Container Root` control how local paths are rewritten before reaching the remote service. By default the host root mirrors your `Target Path` and the container root is `/work`, which keeps Windows paths working without extra config.
+ - **Claude Code MCP config:** `MCP Indexer Url` and `MCP Memory Url` control the URLs written into the project-local `.mcp.json` when you run the `Write MCP Config` command. This is only for configuring Claude Code MCP clients; other MCP integrations can be added separately later.
 
 Commands
 --------
 - Command Palette → “Context Engine Uploader” to access Start/Stop/Restart/Index Codebase.
 - Status-bar button (`Index Codebase`) mirrors the same behavior and displays progress.
+ - `Context Engine Uploader: Write MCP Config (.mcp.json)` writes or updates a project-local `.mcp.json` with MCP server entries for the Qdrant indexer and memory/search endpoints, using the configured MCP URLs.
 
 Logs
 ----
diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js
index d6cd6119..f7fec6d4 100644
--- a/vscode-extension/context-engine-uploader/extension.js
+++ b/vscode-extension/context-engine-uploader/extension.js
@@ -2,6 +2,7 @@ const vscode = require('vscode');
 const { spawn, spawnSync } = require('child_process');
 const path = require('path');
 const fs = require('fs');
+const os = require('os');
 let outputChannel;
 let watchProcess;
 let forceProcess;
@@ -33,6 +34,9 @@ function activate(context) {
     vscode.window.showInformationMessage('Context Engine indexing started.');
     runSequence('force').catch(error => log(`Index failed: ${error instanceof Error ? error.message : String(error)}`));
   });
+  const mcpConfigDisposable = vscode.commands.registerCommand('contextEngineUploader.writeMcpConfig', () => {
+    writeMcpConfig().catch(error => log(`MCP config write failed: ${error instanceof Error ? error.message : String(error)}`));
+  });
   const configDisposable = vscode.workspace.onDidChangeConfiguration(event => {
     if (event.affectsConfiguration('contextEngineUploader') && watchProcess) {
       runSequence('auto').catch(error => log(`Auto-restart failed: ${error instanceof Error ? error.message : String(error)}`));
@@ -40,11 +44,22 @@ function activate(context) {
     if (event.affectsConfiguration('contextEngineUploader.targetPath')) {
       updateStatusBarTooltip();
     }
+    if (
+      event.affectsConfiguration('contextEngineUploader.mcpIndexerUrl') ||
+      event.affectsConfiguration('contextEngineUploader.mcpMemoryUrl') ||
+      event.affectsConfiguration('contextEngineUploader.mcpConfigEnabled') ||
+      event.affectsConfiguration('contextEngineUploader.mcpClaudeEnabled') ||
+      event.affectsConfiguration('contextEngineUploader.mcpWindsurfEnabled') ||
+      event.affectsConfiguration('contextEngineUploader.windsurfMcpPath')
+    ) {
+      // Best-effort auto-update of project-local .mcp.json when MCP settings change
+      writeMcpConfig().catch(error => log(`Auto MCP config write failed: ${error instanceof Error ? error.message : String(error)}`));
+    }
   });
   const workspaceDisposable = vscode.workspace.onDidChangeWorkspaceFolders(() => {
     ensureTargetPathConfigured();
   });
-  context.subscriptions.push(startDisposable, stopDisposable, restartDisposable, indexDisposable, configDisposable, workspaceDisposable);
+  context.subscriptions.push(startDisposable, stopDisposable, restartDisposable, indexDisposable, mcpConfigDisposable, configDisposable, workspaceDisposable);
   const config = vscode.workspace.getConfiguration('contextEngineUploader');
   ensureTargetPathConfigured();
   if (config.get('runOnStartup')) {
@@ -392,18 +407,50 @@ function buildChildEnv(options) {
   if (options.containerRoot) {
     env.CONTAINER_ROOT = options.containerRoot;
   }
-   try {
-     const libsPath = path.join(options.workingDirectory, 'python_libs');
-     if (fs.existsSync(libsPath)) {
-       const existing = process.env.PYTHONPATH || '';
-       env.PYTHONPATH = existing ? `${libsPath}${path.delimiter}${existing}` : libsPath;
-       log(`Detected bundled python_libs at ${libsPath}; setting PYTHONPATH for child process.`);
-     }
-   } catch (error) {
-     log(`Failed to configure PYTHONPATH for bundled deps: ${error instanceof Error ? error.message : String(error)}`);
+  try {
+    const libsPath = path.join(options.workingDirectory, 'python_libs');
+    if (fs.existsSync(libsPath)) {
+      const existing = process.env.PYTHONPATH || '';
+      env.PYTHONPATH = existing ? `${libsPath}${path.delimiter}${existing}` : libsPath;
+      log(`Detected bundled python_libs at ${libsPath}; setting PYTHONPATH for child process.`);
+    }
+  } catch (error) {
+    log(`Failed to configure PYTHONPATH for bundled deps: ${error instanceof Error ? error.message : String(error)}`);
   }
   return env;
 }
+async function writeMcpConfig() {
+  const settings = vscode.workspace.getConfiguration('contextEngineUploader');
+  const claudeSetting = settings.get('mcpClaudeEnabled');
+  const legacySetting = settings.get('mcpConfigEnabled');
+  const claudeEnabled = typeof claudeSetting === 'boolean' ? claudeSetting : legacySetting;
+  const windsurfEnabled = settings.get('mcpWindsurfEnabled', false);
+  if (!claudeEnabled && !windsurfEnabled) {
+    vscode.window.showInformationMessage('Context Engine Uploader: MCP config writing is disabled in settings.');
+    return;
+  }
+  const indexerUrl = (settings.get('mcpIndexerUrl') || 'http://localhost:8001/sse').trim();
+  const memoryUrl = (settings.get('mcpMemoryUrl') || 'http://localhost:8000/sse').trim();
+  let wroteAny = false;
+  if (claudeEnabled) {
+    const root = getWorkspaceFolderPath();
+    if (!root) {
+      vscode.window.showErrorMessage('Context Engine Uploader: open a folder before writing .mcp.json.');
+    } else {
+      const result = await writeClaudeMcpServers(root, indexerUrl, memoryUrl);
+      wroteAny = wroteAny || result;
+    }
+  }
+  if (windsurfEnabled) {
+    const customPath = (settings.get('windsurfMcpPath') || '').trim();
+    const windsPath = customPath || getDefaultWindsurfMcpPath();
+    const result = await writeWindsurfMcpServers(windsPath, indexerUrl, memoryUrl);
+    wroteAny = wroteAny || result;
+  }
+  if (!wroteAny) {
+    log('Context Engine Uploader: MCP config write skipped (no targets succeeded).');
+  }
+}
 function deactivate() {
   return stopProcesses();
 }
@@ -411,3 +458,113 @@ module.exports = {
   activate,
   deactivate
 };
+
+function getDefaultWindsurfMcpPath() {
+  return path.join(os.homedir(), '.codeium', 'windsurf', 'mcp_config.json');
+}
+
+async function writeClaudeMcpServers(root, indexerUrl, memoryUrl) {
+  const configPath = path.join(root, '.mcp.json');
+  let config = { mcpServers: {} };
+  if (fs.existsSync(configPath)) {
+    try {
+      const raw = fs.readFileSync(configPath, 'utf8');
+      const parsed = JSON.parse(raw);
+      if (parsed && typeof parsed === 'object') {
+        config = parsed;
+      }
+    } catch (error) {
+      vscode.window.showErrorMessage('Context Engine Uploader: existing .mcp.json is invalid JSON; not modified.');
+      log(`Failed to parse .mcp.json: ${error instanceof Error ? error.message : String(error)}`);
+      return false;
+    }
+  }
+  if (!config.mcpServers || typeof config.mcpServers !== 'object') {
+    config.mcpServers = {};
+  }
+  log(`Preparing to write .mcp.json at ${configPath} with indexerUrl=${indexerUrl || '""'} memoryUrl=${memoryUrl || '""'}`);
+  const isWindows = process.platform === 'win32';
+  const makeServer = url => {
+    if (isWindows) {
+      return {
+        command: 'cmd',
+        args: ['/c', 'npx', 'mcp-remote', url, '--transport', 'sse-only'],
+        env: {}
+      };
+    }
+    return {
+      command: 'npx',
+      args: ['mcp-remote', url, '--transport', 'sse-only'],
+      env: {}
+    };
+  };
+  const servers = config.mcpServers;
+  if (indexerUrl) {
+    servers['qdrant-indexer'] = makeServer(indexerUrl);
+  }
+  if (memoryUrl) {
+    servers.memory = makeServer(memoryUrl);
+  }
+  try {
+    const json = JSON.stringify(config, null, 2) + '\n';
+    fs.writeFileSync(configPath, json, 'utf8');
+    vscode.window.showInformationMessage('Context Engine Uploader: .mcp.json updated for Context Engine MCP servers.');
+    log(`Wrote .mcp.json at ${configPath}`);
+    return true;
+  } catch (error) {
+    vscode.window.showErrorMessage('Context Engine Uploader: failed to write .mcp.json.');
+    log(`Failed to write .mcp.json: ${error instanceof Error ? error.message : String(error)}`);
+    return false;
+  }
+}
+
+async function writeWindsurfMcpServers(configPath, indexerUrl, memoryUrl) {
+  try {
+    fs.mkdirSync(path.dirname(configPath), { recursive: true });
+  } catch (error) {
+    log(`Failed to ensure Windsurf MCP directory: ${error instanceof Error ? error.message : String(error)}`);
+    vscode.window.showErrorMessage('Context Engine Uploader: failed to prepare Windsurf MCP directory.');
+    return false;
+  }
+  let config = { mcpServers: {} };
+  if (fs.existsSync(configPath)) {
+    try {
+      const raw = fs.readFileSync(configPath, 'utf8');
+      const parsed = JSON.parse(raw);
+      if (parsed && typeof parsed === 'object') {
+        config = parsed;
+      }
+    } catch (error) {
+      vscode.window.showErrorMessage('Context Engine Uploader: existing Windsurf mcp_config.json is invalid JSON; not modified.');
+      log(`Failed to parse Windsurf mcp_config.json: ${error instanceof Error ? error.message : String(error)}`);
+      return false;
+    }
+  }
+  if (!config.mcpServers || typeof config.mcpServers !== 'object') {
+    config.mcpServers = {};
+  }
+  log(`Preparing to write Windsurf mcp_config.json at ${configPath} with indexerUrl=${indexerUrl || '""'} memoryUrl=${memoryUrl || '""'}`);
+  const makeServer = url => ({
+    command: 'npx',
+    args: ['mcp-remote', url, '--transport', 'sse-only'],
+    env: {}
+  });
+  const servers = config.mcpServers;
+  if (indexerUrl) {
+    servers['qdrant-indexer'] = makeServer(indexerUrl);
+  }
+  if (memoryUrl) {
+    servers.memory = makeServer(memoryUrl);
+  }
+  try {
+    const json = JSON.stringify(config, null, 2) + '\n';
+    fs.writeFileSync(configPath, json, 'utf8');
+    vscode.window.showInformationMessage(`Context Engine Uploader: Windsurf MCP config updated at ${configPath}.`);
+    log(`Wrote Windsurf mcp_config.json at ${configPath}`);
+    return true;
+  } catch (error) {
+    vscode.window.showErrorMessage('Context Engine Uploader: failed to write Windsurf mcp_config.json.');
+    log(`Failed to write Windsurf mcp_config.json: ${error instanceof Error ? error.message : String(error)}`);
+    return false;
+  }
+}
diff --git a/vscode-extension/context-engine-uploader/package.json b/vscode-extension/context-engine-uploader/package.json
index 4e4c2ac2..7dad0611 100644
--- a/vscode-extension/context-engine-uploader/package.json
+++ b/vscode-extension/context-engine-uploader/package.json
@@ -10,12 +10,6 @@
   "categories": [
     "Other"
   ],
-  "activationEvents": [
-    "onStartupFinished",
-    "onCommand:contextEngineUploader.start",
-    "onCommand:contextEngineUploader.stop",
-    "onCommand:contextEngineUploader.restart"
-  ],
   "main": "./extension.js",
   "icon": "assets/icon.png",
   "contributes": {
@@ -35,6 +29,10 @@
       {
         "command": "contextEngineUploader.indexCodebase",
         "title": "Context Engine Uploader: Index Codebase"
+      },
+      {
+        "command": "contextEngineUploader.writeMcpConfig",
+        "title": "Context Engine Uploader: Write MCP Config (.mcp.json)"
       }
     ],
     "configuration": {
@@ -49,7 +47,7 @@
         "contextEngineUploader.pythonPath": {
           "type": "string",
           "default": "python3",
-          "description": "Python executable used to run scripts.remote_upload_client."
+          "description": "Python executable used to run scripts.remote_upload_client. E.G. 'python' or 'python3'"
         },
         "contextEngineUploader.scriptWorkingDirectory": {
           "type": "string",
@@ -102,6 +100,36 @@
           "type": "number",
           "default": 100,
           "description": "Maximum upload bundle size enforced by the Context Engine server (MB)."
+        },
+        "contextEngineUploader.mcpConfigEnabled": {
+          "type": "boolean",
+          "default": true,
+          "description": "[Deprecated] Legacy toggle for Claude MCP config writing. Use mcpClaudeEnabled instead."
+        },
+        "contextEngineUploader.mcpClaudeEnabled": {
+          "type": "boolean",
+          "default": true,
+          "description": "Enable writing the project-local .mcp.json used by Claude Code MCP clients."
+        },
+        "contextEngineUploader.mcpWindsurfEnabled": {
+          "type": "boolean",
+          "default": false,
+          "description": "Enable writing Windsurf's global MCP config (requires Windsurf or compatible clients)."
+        },
+        "contextEngineUploader.mcpIndexerUrl": {
+          "type": "string",
+          "default": "http://localhost:8001/sse",
+          "description": "Claude Code MCP server URL for the Qdrant indexer. Used when writing the project-local .mcp.json via 'Write MCP Config'."
+        },
+        "contextEngineUploader.mcpMemoryUrl": {
+          "type": "string",
+          "default": "http://localhost:8000/sse",
+          "description": "Claude Code MCP server URL for the memory/search MCP server. Used when writing the project-local .mcp.json via 'Write MCP Config'."
+        },
+        "contextEngineUploader.windsurfMcpPath": {
+          "type": "string",
+          "default": "",
+          "description": "Optional override for Windsurf's mcp_config.json path. Defaults to %USERPROFILE%/.codeium/windsurf/mcp_config.json."
         }
       }
     }

From 441211bd164e2585eb7e56c8f9e14d7d35375f6a Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Sat, 22 Nov 2025 15:32:54 +0000
Subject: [PATCH 02/19] feat(vscode-ext): add Claude Code hook
 auto-configuration and bundle hook scripts

- Add `claudeHookEnabled` setting to enable automatic Claude Code hook configuration - Linux only
- Bundle ctx-hook-simple.sh and ctx.py into VSIX for reference
- Write .claude/settings.local.json with UserPromptSubmit hook when enabled (for CTX)
- Auto-update hook config when claudeHookEnabled setting changes
- Add onStartupFinished activation event for earlier extension initialization
---
 vscode-extension/build/build.bat              |  6 ++
 vscode-extension/build/build.sh               | 11 +++
 .../context-engine-uploader/extension.js      | 82 +++++++++++++++++--
 .../context-engine-uploader/package.json      | 13 +--
 4 files changed, 99 insertions(+), 13 deletions(-)

diff --git a/vscode-extension/build/build.bat b/vscode-extension/build/build.bat
index 8696ca5d..24878f88 100644
--- a/vscode-extension/build/build.bat
+++ b/vscode-extension/build/build.bat
@@ -13,6 +13,8 @@ for %%I in ("..\..\scripts\standalone_upload_client.py") do set "SRC_SCRIPT=%%~f
 set "CLIENT=standalone_upload_client.py"
 set "STAGE_DIR=%OUT_DIR%\extension-stage"
 set "BUILD_RESULT=0"
+for %%I in ("..\..\ctx-hook-simple.sh") do set "HOOK_SRC=%%~fI"
+for %%I in ("..\..\scripts\ctx.py") do set "CTX_SRC=%%~fI"
 
 echo Building clean Context Engine Uploader extension...
 
@@ -55,6 +57,10 @@ if errorlevel 1 (
     goto cleanup
 )
 
+REM Bundle ctx hook script and ctx CLI into the staged extension for reference
+if exist "%HOOK_SRC%" copy /Y "%HOOK_SRC%" "%STAGE_DIR%\ctx-hook-simple.sh" >nul
+if exist "%CTX_SRC%" copy /Y "%CTX_SRC%" "%STAGE_DIR%\ctx.py" >nul
+
 REM Optional: bundle Python dependencies into the staged extension when requested
 if "%BUNDLE_DEPS%"=="1" (
     echo Bundling Python dependencies into staged extension using %PYTHON_BIN%...
diff --git a/vscode-extension/build/build.sh b/vscode-extension/build/build.sh
index 783832f3..c665fc69 100644
--- a/vscode-extension/build/build.sh
+++ b/vscode-extension/build/build.sh
@@ -9,6 +9,8 @@ CLIENT="standalone_upload_client.py"
 STAGE_DIR="$OUT_DIR/extension-stage"
 BUNDLE_DEPS="${1:-}"
 PYTHON_BIN="${PYTHON_BIN:-python3}"
+HOOK_SRC="$SCRIPT_DIR/../../ctx-hook-simple.sh"
+CTX_SRC="$SCRIPT_DIR/../../scripts/ctx.py"
 
 cleanup() {
     rm -rf "$STAGE_DIR"
@@ -34,6 +36,15 @@ cp -a "$EXT_DIR/." "$STAGE_DIR/"
 cp "$OUT_DIR/$CLIENT" "$STAGE_DIR/$CLIENT"
 chmod +x "$STAGE_DIR/$CLIENT"
 
+# Bundle ctx hook script and ctx CLI into the staged extension for reference
+if [[ -f "$HOOK_SRC" ]]; then
+    cp "$HOOK_SRC" "$STAGE_DIR/ctx-hook-simple.sh"
+    chmod +x "$STAGE_DIR/ctx-hook-simple.sh"
+fi
+if [[ -f "$CTX_SRC" ]]; then
+    cp "$CTX_SRC" "$STAGE_DIR/ctx.py"
+fi
+
 # Optional: bundle Python deps into the staged extension when requested
 if [[ "$BUNDLE_DEPS" == "--bundle-deps" ]]; then
     echo "Bundling Python dependencies into staged extension using $PYTHON_BIN..."
diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js
index f7fec6d4..5151c484 100644
--- a/vscode-extension/context-engine-uploader/extension.js
+++ b/vscode-extension/context-engine-uploader/extension.js
@@ -11,6 +11,7 @@ let statusBarItem;
 let statusMode = 'idle';
 const REQUIRED_PYTHON_MODULES = ['requests', 'urllib3', 'charset_normalizer'];
 const DEFAULT_CONTAINER_ROOT = '/work';
+const CLAUDE_HOOK_COMMAND = '/home/coder/project/Context-Engine/ctx-hook-simple.sh';
 function activate(context) {
   outputChannel = vscode.window.createOutputChannel('Context Engine Upload');
   context.subscriptions.push(outputChannel);
@@ -47,12 +48,12 @@ function activate(context) {
     if (
       event.affectsConfiguration('contextEngineUploader.mcpIndexerUrl') ||
       event.affectsConfiguration('contextEngineUploader.mcpMemoryUrl') ||
-      event.affectsConfiguration('contextEngineUploader.mcpConfigEnabled') ||
       event.affectsConfiguration('contextEngineUploader.mcpClaudeEnabled') ||
       event.affectsConfiguration('contextEngineUploader.mcpWindsurfEnabled') ||
-      event.affectsConfiguration('contextEngineUploader.windsurfMcpPath')
+      event.affectsConfiguration('contextEngineUploader.windsurfMcpPath') ||
+      event.affectsConfiguration('contextEngineUploader.claudeHookEnabled')
     ) {
-      // Best-effort auto-update of project-local .mcp.json when MCP settings change
+      // Best-effort auto-update of MCP + hook configurations when settings change
       writeMcpConfig().catch(error => log(`Auto MCP config write failed: ${error instanceof Error ? error.message : String(error)}`));
     }
   });
@@ -421,17 +422,18 @@ function buildChildEnv(options) {
 }
 async function writeMcpConfig() {
   const settings = vscode.workspace.getConfiguration('contextEngineUploader');
-  const claudeSetting = settings.get('mcpClaudeEnabled');
-  const legacySetting = settings.get('mcpConfigEnabled');
-  const claudeEnabled = typeof claudeSetting === 'boolean' ? claudeSetting : legacySetting;
+  const claudeEnabled = settings.get('mcpClaudeEnabled', true);
   const windsurfEnabled = settings.get('mcpWindsurfEnabled', false);
-  if (!claudeEnabled && !windsurfEnabled) {
+  const claudeHookEnabled = settings.get('claudeHookEnabled', false);
+  const isLinux = process.platform === 'linux';
+  if (!claudeEnabled && !windsurfEnabled && !claudeHookEnabled) {
     vscode.window.showInformationMessage('Context Engine Uploader: MCP config writing is disabled in settings.');
     return;
   }
   const indexerUrl = (settings.get('mcpIndexerUrl') || 'http://localhost:8001/sse').trim();
   const memoryUrl = (settings.get('mcpMemoryUrl') || 'http://localhost:8000/sse').trim();
   let wroteAny = false;
+  let hookWrote = false;
   if (claudeEnabled) {
     const root = getWorkspaceFolderPath();
     if (!root) {
@@ -447,7 +449,18 @@ async function writeMcpConfig() {
     const result = await writeWindsurfMcpServers(windsPath, indexerUrl, memoryUrl);
     wroteAny = wroteAny || result;
   }
-  if (!wroteAny) {
+  if (claudeHookEnabled) {
+    const root = getWorkspaceFolderPath();
+    if (!root) {
+      vscode.window.showErrorMessage('Context Engine Uploader: open a folder before writing Claude hook config.');
+    } else if (!isLinux) {
+      vscode.window.showWarningMessage('Context Engine Uploader: Claude hook auto-config is only wired for Linux/dev-remote at this time.');
+    } else {
+      const result = await writeClaudeHookConfig(root, CLAUDE_HOOK_COMMAND);
+      hookWrote = hookWrote || result;
+    }
+  }
+  if (!wroteAny && !hookWrote) {
     log('Context Engine Uploader: MCP config write skipped (no targets succeeded).');
   }
 }
@@ -568,3 +581,56 @@ async function writeWindsurfMcpServers(configPath, indexerUrl, memoryUrl) {
     return false;
   }
 }
+
+async function writeClaudeHookConfig(root, commandPath) {
+  try {
+    const claudeDir = path.join(root, '.claude');
+    fs.mkdirSync(claudeDir, { recursive: true });
+    const settingsPath = path.join(claudeDir, 'settings.local.json');
+    let config = {};
+    if (fs.existsSync(settingsPath)) {
+      try {
+        const raw = fs.readFileSync(settingsPath, 'utf8');
+        const parsed = JSON.parse(raw);
+        if (parsed && typeof parsed === 'object') {
+          config = parsed;
+        }
+      } catch (error) {
+        vscode.window.showErrorMessage('Context Engine Uploader: existing .claude/settings.local.json is invalid JSON; not modified.');
+        log(`Failed to parse .claude/settings.local.json: ${error instanceof Error ? error.message : String(error)}`);
+        return false;
+      }
+    }
+    if (!config.permissions || typeof config.permissions !== 'object') {
+      config.permissions = { allow: [], deny: [], ask: [] };
+    } else {
+      config.permissions.allow = config.permissions.allow || [];
+      config.permissions.deny = config.permissions.deny || [];
+      config.permissions.ask = config.permissions.ask || [];
+    }
+    if (!config.enabledMcpjsonServers) {
+      config.enabledMcpjsonServers = [];
+    }
+    if (!config.hooks || typeof config.hooks !== 'object') {
+      config.hooks = {};
+    }
+    config.hooks['UserPromptSubmit'] = [
+      {
+        hooks: [
+          {
+            type: 'command',
+            command: commandPath
+          }
+        ]
+      }
+    ];
+    fs.writeFileSync(settingsPath, JSON.stringify(config, null, 2) + '\n', 'utf8');
+    vscode.window.showInformationMessage('Context Engine Uploader: .claude/settings.local.json updated with Claude hook.');
+    log(`Wrote Claude hook config at ${settingsPath}`);
+    return true;
+  } catch (error) {
+    vscode.window.showErrorMessage('Context Engine Uploader: failed to write .claude/settings.local.json.');
+    log(`Failed to write .claude/settings.local.json: ${error instanceof Error ? error.message : String(error)}`);
+    return false;
+  }
+}
diff --git a/vscode-extension/context-engine-uploader/package.json b/vscode-extension/context-engine-uploader/package.json
index 7dad0611..2f064431 100644
--- a/vscode-extension/context-engine-uploader/package.json
+++ b/vscode-extension/context-engine-uploader/package.json
@@ -10,6 +10,9 @@
   "categories": [
     "Other"
   ],
+  "activationEvents": [
+    "onStartupFinished"
+  ],
   "main": "./extension.js",
   "icon": "assets/icon.png",
   "contributes": {
@@ -101,11 +104,6 @@
           "default": 100,
           "description": "Maximum upload bundle size enforced by the Context Engine server (MB)."
         },
-        "contextEngineUploader.mcpConfigEnabled": {
-          "type": "boolean",
-          "default": true,
-          "description": "[Deprecated] Legacy toggle for Claude MCP config writing. Use mcpClaudeEnabled instead."
-        },
         "contextEngineUploader.mcpClaudeEnabled": {
           "type": "boolean",
           "default": true,
@@ -130,6 +128,11 @@
           "type": "string",
           "default": "",
           "description": "Optional override for Windsurf's mcp_config.json path. Defaults to %USERPROFILE%/.codeium/windsurf/mcp_config.json."
+        },
+        "contextEngineUploader.claudeHookEnabled": {
+          "type": "boolean",
+          "default": false,
+          "description": "Enable writing the workspace .claude/settings.local.json hooks section for the Context Engine Claude Code hook."
         }
       }
     }

From 14cef1933bf7aebf97804eb94d8a506f19ddde7a Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Mon, 24 Nov 2025 11:26:56 +0000
Subject: [PATCH 03/19] feat(remote): enhance upload reliability and path
 resolution across environments

- Add configurable timeout and retry settings for remote uploads
- Implement robust timeout handling with server polling after timeout
- Enhance path resolution using origin metadata for accurate host/container mapping
- Add background processing for delta bundles to improve responsiveness
- Preserve dual-path metadata in search results for client preference
- Normalize repo name handling for slugged collections
- Remove client-side bundle size limits in favor of server-side enforcement
- Add group-writable permissions for shared workspace state
- Filter dev-workspace directories from file scanning in development mode
---
 docker-compose.dev-remote.yml       |   3 +
 scripts/ingest_code.py              |  77 +++++++++++++--
 scripts/mcp_indexer_server.py       |  14 +++
 scripts/remote_upload_client.py     | 146 +++++++++++++++++++++++++--
 scripts/standalone_upload_client.py | 148 ++++++++++++++++++++++++++--
 scripts/upload_service.py           | 120 ++++++++++++++--------
 scripts/workspace_state.py          |  34 ++++++-
 7 files changed, 475 insertions(+), 67 deletions(-)

diff --git a/docker-compose.dev-remote.yml b/docker-compose.dev-remote.yml
index 28440ebe..7b1630a4 100644
--- a/docker-compose.dev-remote.yml
+++ b/docker-compose.dev-remote.yml
@@ -349,6 +349,9 @@ services:
       - REMOTE_UPLOAD_ENABLED=1
       - REMOTE_UPLOAD_MODE=development
       - REMOTE_UPLOAD_DEBUG=1
+      - REMOTE_UPLOAD_TIMEOUT=300
+      - REMOTE_UPLOAD_MAX_RETRIES=5
+      - MAX_BUNDLE_SIZE_MB=256
       
       # Qdrant configuration
       - QDRANT_TIMEOUT=${QDRANT_TIMEOUT}
diff --git a/scripts/ingest_code.py b/scripts/ingest_code.py
index 9434794b..f75c0d85 100644
--- a/scripts/ingest_code.py
+++ b/scripts/ingest_code.py
@@ -1634,6 +1634,18 @@ def pick(sym):
     return "", "", ""
 
 
+def _get_host_path_from_origin(workspace_path: str, repo_name: str = None) -> Optional[str]:
+    """Get client host_path from origin source_path in workspace state."""
+    try:
+        from scripts.workspace_state import get_workspace_state
+        state = get_workspace_state(workspace_path, repo_name)
+        if state and state.get("origin", {}).get("source_path"):
+            return state["origin"]["source_path"]
+    except Exception:
+        pass
+    return None
+
+
 def index_single_file(
     client: QdrantClient,
     model: TextEmbedding,
@@ -1767,22 +1779,49 @@ def make_point(pid, dense_vec, lex_vec, payload):
             sym = ch.get("symbol") or sym
         if "symbol_path" in ch and ch.get("symbol_path"):
             sym_path = ch.get("symbol_path") or sym_path
-
         # Track both container path (/work mirror) and original host path for clarity across environments
         _cur_path = str(file_path)
         _host_root = str(os.environ.get("HOST_INDEX_PATH") or "").strip().rstrip("/")
         _host_path = None
         _container_path = None
+
+        # Try to get client workspace root from origin metadata first.
+        # upload_service writes origin.source_path from the client --path flag so we can
+        # reconstruct host paths even when indexing inside a slugged /work/<repo-hash> tree.
+        _origin_client_path = None
+        try:
+            # Get workspace path from file path for origin lookup
+            if _cur_path.startswith("/work/"):
+                # Extract workspace from container path
+                _parts = _cur_path[6:].split("/")  # Remove "/work/" prefix
+                if len(_parts) >= 2:
+                    _repo_name = _parts[0]  # First part is repo name
+                    _workspace_path = f"/work/{_repo_name}"
+                    _origin_client_path = _get_host_path_from_origin(_workspace_path, _repo_name)
+        except Exception:
+            pass
+
         try:
-            if _cur_path.startswith("/work/") and _host_root:
+            if _cur_path.startswith("/work/") and (_host_root or _origin_client_path):
                 _rel = _cur_path[len("/work/"):]
-                _host_path = os.path.realpath(os.path.join(_host_root, _rel))
+                # Prioritize client path from origin metadata over HOST_INDEX_PATH.
+                if _origin_client_path:
+                    # Drop the leading repo slug (e.g. Context-Engine-<hash>) when mapping
+                    # /work paths back to the client workspace root, so host_path is
+                    # /home/.../Context-Engine/<rel-path-inside-repo> instead of including
+                    # the slug directory.
+                    _parts = _rel.split("/", 1)
+                    _tail = _parts[1] if len(_parts) > 1 else ""
+                    _base = _origin_client_path.rstrip("/")
+                    _host_path = os.path.realpath(os.path.join(_base, _tail)) if _tail else _base
+                else:
+                    _host_path = os.path.realpath(os.path.join(_host_root, _rel))
                 _container_path = _cur_path
             else:
                 # Likely indexing on the host directly
                 _host_path = _cur_path
-                if _host_root and _cur_path.startswith((_host_root + "/")):
-                    _rel = _cur_path[len(_host_root) + 1 :]
+                if (_host_root or _origin_client_path) and _cur_path.startswith(((_origin_client_path or _host_root) + "/")):
+                    _rel = _cur_path[len((_origin_client_path or _host_root)) + 1 :]
                     _container_path = "/work/" + _rel
         except Exception:
             _host_path = _cur_path
@@ -2213,15 +2252,35 @@ def make_point(pid, dense_vec, lex_vec, payload):
             _host_root = str(os.environ.get("HOST_INDEX_PATH") or "").strip().rstrip("/")
             _host_path = None
             _container_path = None
+
+            # Try to get client path from origin metadata first (from --path upload flag)
+            _origin_client_path = None
+            try:
+                # Get workspace path from file path for origin lookup
+                if _cur_path.startswith("/work/"):
+                    # Extract workspace from container path
+                    _parts = _cur_path[6:].split("/")  # Remove "/work/" prefix
+                    if len(_parts) >= 2:
+                        _repo_name = _parts[0]  # First part is repo name
+                        _workspace_path = f"/work/{_repo_name}"
+                        _origin_client_path = _get_host_path_from_origin(_workspace_path, _repo_name)
+            except Exception:
+                pass
+
             try:
-                if _cur_path.startswith("/work/") and _host_root:
+                if _cur_path.startswith("/work/") and (_host_root or _origin_client_path):
                     _rel = _cur_path[len("/work/"):]
-                    _host_path = os.path.realpath(os.path.join(_host_root, _rel))
+                    # Prioritize client path from origin metadata over HOST_INDEX_PATH
+                    if _origin_client_path:
+                        _host_path = os.path.realpath(os.path.join(_origin_client_path, _rel))
+                    else:
+                        _host_path = os.path.realpath(os.path.join(_host_root, _rel))
                     _container_path = _cur_path
                 else:
+                    # Likely indexing on the host directly
                     _host_path = _cur_path
-                    if _host_root and _cur_path.startswith((_host_root + "/")):
-                        _rel = _cur_path[len(_host_root) + 1 :]
+                    if (_host_root or _origin_client_path) and _cur_path.startswith(((_origin_client_path or _host_root) + "/")):
+                        _rel = _cur_path[len((_origin_client_path or _host_root)) + 1 :]
                         _container_path = "/work/" + _rel
             except Exception:
                 _host_path = _cur_path
diff --git a/scripts/mcp_indexer_server.py b/scripts/mcp_indexer_server.py
index 263c336a..0acc9ec9 100644
--- a/scripts/mcp_indexer_server.py
+++ b/scripts/mcp_indexer_server.py
@@ -2201,6 +2201,13 @@ def _doc_for(obj: dict) -> str:
                             "components": (obj.get("components") or {})
                             | {"rerank_onnx": float(s)},
                         }
+                        # Preserve dual-path metadata when available so clients can prefer host paths
+                        _hostp = obj.get("host_path")
+                        _contp = obj.get("container_path")
+                        if _hostp:
+                            item["host_path"] = _hostp
+                        if _contp:
+                            item["container_path"] = _contp
                         tmp.append(item)
                     if tmp:
                         results = tmp
@@ -2324,6 +2331,13 @@ def _doc_for(obj: dict) -> str:
                 "why": obj.get("why", []),
                 "components": obj.get("components", {}),
             }
+            # Preserve dual-path metadata when available so clients can prefer host paths
+            _hostp = obj.get("host_path")
+            _contp = obj.get("container_path")
+            if _hostp:
+                item["host_path"] = _hostp
+            if _contp:
+                item["container_path"] = _contp
             # Pass-through optional relation hints
             if obj.get("relations"):
                 item["relations"] = obj.get("relations")
diff --git a/scripts/remote_upload_client.py b/scripts/remote_upload_client.py
index 4755136e..82aa8a1b 100644
--- a/scripts/remote_upload_client.py
+++ b/scripts/remote_upload_client.py
@@ -501,10 +501,8 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
                 if not os.path.exists(bundle_path):
                     return {"success": False, "error": {"code": "BUNDLE_NOT_FOUND", "message": f"Bundle not found: {bundle_path}"}}
 
-                # Check bundle size (100MB limit)
+                # Check bundle size (server-side enforcement)
                 bundle_size = os.path.getsize(bundle_path)
-                if bundle_size > 100 * 1024 * 1024:
-                    return {"success": False, "error": {"code": "BUNDLE_TOO_LARGE", "message": f"Bundle too large: {bundle_size} bytes"}}
 
                 with open(bundle_path, 'rb') as bundle_file:
                     files = {
@@ -525,12 +523,35 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
                         f"{self.upload_endpoint}/api/v1/delta/upload",
                         files=files,
                         data=data,
-                        timeout=self.timeout
+                        timeout=(10, self.timeout)
                     )
 
                     if response.status_code == 200:
                         result = response.json()
                         logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+
+                        seq = None
+                        try:
+                            seq = result.get("sequence_number")
+                        except Exception:
+                            seq = None
+                        if seq is not None:
+                            try:
+                                manifest["sequence"] = seq
+                            except Exception:
+                                pass
+
+                        poll_result = self._poll_after_timeout(manifest)
+                        if poll_result.get("success"):
+                            combined = dict(result)
+                            for k, v in poll_result.items():
+                                if k in ("success", "error"):
+                                    continue
+                                if k not in combined:
+                                    combined[k] = v
+                            return combined
+
+                        logger.warning("[remote_upload] Upload accepted but polling did not confirm processing; returning original result")
                         return result
 
                     # Handle error
@@ -552,9 +573,37 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
 
                     logger.warning(f"[remote_upload] Upload attempt {attempt + 1} failed: {error_msg}")
 
+            except requests.exceptions.ConnectTimeout as e:
+                last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
+                logger.warning(f"[remote_upload] Upload timeout on attempt {attempt + 1}: {e}")
+
+            except requests.exceptions.ReadTimeout as e:
+                last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
+                logger.warning(f"[remote_upload] Upload read timeout on attempt {attempt + 1}: {e}")
+                
+                # After read timeout, poll to check if server processed the bundle
+                logger.info(f"[remote_upload] Read timeout occurred, polling server to check if bundle was processed...")
+                poll_result = self._poll_after_timeout(manifest)
+                if poll_result.get("success"):
+                    logger.info(f"[remote_upload] Server confirmed processing of bundle {manifest['bundle_id']} after timeout")
+                    return poll_result
+                
+                logger.warning(f"[remote_upload] Server did not process bundle after timeout, proceeding with failure")
+                break
+
             except requests.exceptions.Timeout as e:
                 last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
                 logger.warning(f"[remote_upload] Upload timeout on attempt {attempt + 1}: {e}")
+                
+                # For generic timeout, also try polling
+                logger.info(f"[remote_upload] Timeout occurred, polling server to check if bundle was processed...")
+                poll_result = self._poll_after_timeout(manifest)
+                if poll_result.get("success"):
+                    logger.info(f"[remote_upload] Server confirmed processing of bundle {manifest['bundle_id']} after timeout")
+                    return poll_result
+                
+                logger.warning(f"[remote_upload] Server did not process bundle after timeout, proceeding with failure")
+                break
 
             except requests.exceptions.ConnectionError as e:
                 last_error = {"success": False, "error": {"code": "CONNECTION_ERROR", "message": f"Connection error: {str(e)}"}}
@@ -578,6 +627,87 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
             }
         }
 
+    def _poll_after_timeout(self, manifest: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Poll server status after a timeout to check if bundle was processed.
+        
+        Args:
+            manifest: Bundle manifest containing sequence information
+            
+        Returns:
+            Dictionary indicating success if bundle was processed
+        """
+        try:
+            # Get current server status to know the expected sequence
+            status = self.get_server_status()
+            if not status.get("success"):
+                return {"success": False, "error": status.get("error", {"code": "UNKNOWN", "message": "Failed to get status"})}
+
+            current_sequence = status.get("last_sequence", 0)
+            expected_sequence = manifest.get("sequence", current_sequence + 1)
+
+            logger.info(f"[remote_upload] Current server sequence: {current_sequence}, expected: {expected_sequence}")
+
+            # If server is already at expected sequence, bundle was processed
+            if current_sequence >= expected_sequence:
+                return {
+                    "success": True,
+                    "message": f"Bundle processed (server at sequence {current_sequence})",
+                    "sequence": current_sequence,
+                }
+
+            # Poll window is configurable via REMOTE_UPLOAD_POLL_MAX_SECS (seconds).
+            # Values <= 0 mean "no timeout" (poll until success or process exit).
+            try:
+                max_poll_time = int(os.environ.get("REMOTE_UPLOAD_POLL_MAX_SECS", "300"))
+            except Exception:
+                max_poll_time = 300
+            poll_interval = 5
+            start_time = time.time()
+
+            while True:
+                elapsed = time.time() - start_time
+                if max_poll_time > 0 and elapsed >= max_poll_time:
+                    logger.warning(
+                        f"[remote_upload] Polling timed out after {int(elapsed)}s (limit={max_poll_time}s), bundle was not confirmed as processed"
+                    )
+                    return {
+                        "success": False,
+                        "error": {
+                            "code": "POLL_TIMEOUT",
+                            "message": f"Bundle not confirmed processed after polling for {int(elapsed)}s (limit={max_poll_time}s)",
+                        },
+                    }
+
+                logger.info(
+                    f"[remote_upload] Polling server status... (elapsed: {int(elapsed)}s, limit={'no-limit' if max_poll_time <= 0 else max_poll_time}s)"
+                )
+                time.sleep(poll_interval)
+
+                status = self.get_server_status()
+                if status.get("success"):
+                    new_sequence = status.get("last_sequence", 0)
+                    if new_sequence >= expected_sequence:
+                        logger.info(
+                            f"[remote_upload] Server sequence advanced to {new_sequence}, bundle was processed!"
+                        )
+                        return {
+                            "success": True,
+                            "message": f"Bundle processed after timeout (server at sequence {new_sequence})",
+                            "sequence": new_sequence,
+                        }
+                    logger.debug(
+                        f"[remote_upload] Server sequence still at {new_sequence}, continuing to poll..."
+                    )
+                else:
+                    logger.warning(
+                        f"[remote_upload] Failed to get server status during poll: {status.get('error', {}).get('message', 'Unknown')}"
+                    )
+            
+        except Exception as e:
+            logger.error(f"[remote_upload] Error during post-timeout polling: {e}")
+            return {"success": False, "error": {"code": "POLL_ERROR", "message": f"Polling error: {str(e)}"}}
+
     def get_server_status(self) -> Dict[str, Any]:
         """Get server status with simplified error handling."""
         try:
@@ -702,11 +832,14 @@ def get_all_code_files(self) -> List[Path]:
                 all_files.extend(workspace_path.rglob(f"*{ext}"))
 
             # Filter out directories and hidden files
+            dev_remote = os.environ.get("DEV_REMOTE_MODE") == "1" or os.environ.get("REMOTE_UPLOAD_MODE") == "development"
+            ignored_dirs = {"dev-workspace"} if dev_remote else set()
             all_files = [
                 f for f in all_files
                 if f.is_file()
                 and not any(part.startswith('.') for part in f.parts)
                 and '.codebase' not in str(f)
+                and not any(part in ignored_dirs for part in f.parts)
             ]
         except Exception as e:
             logger.error(f"[watch] Error scanning files: {e}")
@@ -879,8 +1012,9 @@ def get_remote_config(cli_path: Optional[str] = None) -> Dict[str, str]:
         "upload_endpoint": os.environ.get("REMOTE_UPLOAD_ENDPOINT", "http://localhost:8080"),
         "workspace_path": workspace_path,
         "collection_name": collection_name,
-        "max_retries": int(os.environ.get("REMOTE_UPLOAD_MAX_RETRIES", "3")),
-        "timeout": int(os.environ.get("REMOTE_UPLOAD_TIMEOUT", "30"))
+        # Use higher, more robust defaults but still allow env overrides
+        "max_retries": int(os.environ.get("REMOTE_UPLOAD_MAX_RETRIES", "5")),
+        "timeout": int(os.environ.get("REMOTE_UPLOAD_TIMEOUT", "1800")),
     }
 
 
diff --git a/scripts/standalone_upload_client.py b/scripts/standalone_upload_client.py
index dfc63c8b..3992a6e6 100644
--- a/scripts/standalone_upload_client.py
+++ b/scripts/standalone_upload_client.py
@@ -656,10 +656,8 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
                 if not os.path.exists(bundle_path):
                     return {"success": False, "error": {"code": "BUNDLE_NOT_FOUND", "message": f"Bundle not found: {bundle_path}"}}
 
-                # Check bundle size (100MB limit)
+                # Check bundle size (server-side enforcement)
                 bundle_size = os.path.getsize(bundle_path)
-                if bundle_size > 100 * 1024 * 1024:
-                    return {"success": False, "error": {"code": "BUNDLE_TOO_LARGE", "message": f"Bundle too large: {bundle_size} bytes"}}
 
                 with open(bundle_path, 'rb') as bundle_file:
                     files = {
@@ -680,12 +678,35 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
                         f"{self.upload_endpoint}/api/v1/delta/upload",
                         files=files,
                         data=data,
-                        timeout=self.timeout
+                        timeout=(10, self.timeout)
                     )
 
                     if response.status_code == 200:
                         result = response.json()
                         logger.info(f"[remote_upload] Successfully uploaded bundle {manifest['bundle_id']}")
+
+                        seq = None
+                        try:
+                            seq = result.get("sequence_number")
+                        except Exception:
+                            seq = None
+                        if seq is not None:
+                            try:
+                                manifest["sequence"] = seq
+                            except Exception:
+                                pass
+
+                        poll_result = self._poll_after_timeout(manifest)
+                        if poll_result.get("success"):
+                            combined = dict(result)
+                            for k, v in poll_result.items():
+                                if k in ("success", "error"):
+                                    continue
+                                if k not in combined:
+                                    combined[k] = v
+                            return combined
+
+                        logger.warning("[remote_upload] Upload accepted but polling did not confirm processing; returning original result")
                         return result
                     # Handle error
                     error_msg = f"Upload failed with status {response.status_code}"
@@ -706,9 +727,37 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
 
                     logger.warning(f"[remote_upload] Upload attempt {attempt + 1} failed: {error_msg}")
 
+            except requests.exceptions.ConnectTimeout as e:
+                last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
+                logger.warning(f"[remote_upload] Upload timeout on attempt {attempt + 1}: {e}")
+
+            except requests.exceptions.ReadTimeout as e:
+                last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
+                logger.warning(f"[remote_upload] Upload read timeout on attempt {attempt + 1}: {e}")
+                
+                # After read timeout, poll to check if server processed the bundle
+                logger.info(f"[remote_upload] Read timeout occurred, polling server to check if bundle was processed...")
+                poll_result = self._poll_after_timeout(manifest)
+                if poll_result.get("success"):
+                    logger.info(f"[remote_upload] Server confirmed processing of bundle {manifest['bundle_id']} after timeout")
+                    return poll_result
+                
+                logger.warning(f"[remote_upload] Server did not process bundle after timeout, proceeding with failure")
+                break
+
             except requests.exceptions.Timeout as e:
                 last_error = {"success": False, "error": {"code": "TIMEOUT_ERROR", "message": f"Upload timeout: {str(e)}"}}
                 logger.warning(f"[remote_upload] Upload timeout on attempt {attempt + 1}: {e}")
+                
+                # For generic timeout, also try polling
+                logger.info(f"[remote_upload] Timeout occurred, polling server to check if bundle was processed...")
+                poll_result = self._poll_after_timeout(manifest)
+                if poll_result.get("success"):
+                    logger.info(f"[remote_upload] Server confirmed processing of bundle {manifest['bundle_id']} after timeout")
+                    return poll_result
+                
+                logger.warning(f"[remote_upload] Server did not process bundle after timeout, proceeding with failure")
+                break
 
             except requests.exceptions.ConnectionError as e:
                 last_error = {"success": False, "error": {"code": "CONNECTION_ERROR", "message": f"Connection error: {str(e)}"}}
@@ -732,6 +781,87 @@ def upload_bundle(self, bundle_path: str, manifest: Dict[str, Any]) -> Dict[str,
             }
         }
 
+    def _poll_after_timeout(self, manifest: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Poll server status after a timeout to check if bundle was processed.
+        
+        Args:
+            manifest: Bundle manifest containing sequence information
+            
+        Returns:
+            Dictionary indicating success if bundle was processed
+        """
+        try:
+            # Get current server status to know the expected sequence
+            status = self.get_server_status()
+            if not status.get("success"):
+                return {"success": False, "error": status.get("error", {"code": "UNKNOWN", "message": "Failed to get status"})}
+
+            current_sequence = status.get("last_sequence", 0)
+            expected_sequence = manifest.get("sequence", current_sequence + 1)
+
+            logger.info(f"[remote_upload] Current server sequence: {current_sequence}, expected: {expected_sequence}")
+
+            # If server is already at expected sequence, bundle was processed
+            if current_sequence >= expected_sequence:
+                return {
+                    "success": True,
+                    "message": f"Bundle processed (server at sequence {current_sequence})",
+                    "sequence": current_sequence,
+                }
+
+            # Poll window is configurable via REMOTE_UPLOAD_POLL_MAX_SECS (seconds).
+            # Values <= 0 mean "no timeout" (poll until success or process exit).
+            try:
+                max_poll_time = int(os.environ.get("REMOTE_UPLOAD_POLL_MAX_SECS", "300"))
+            except Exception:
+                max_poll_time = 300
+            poll_interval = 5
+            start_time = time.time()
+
+            while True:
+                elapsed = time.time() - start_time
+                if max_poll_time > 0 and elapsed >= max_poll_time:
+                    logger.warning(
+                        f"[remote_upload] Polling timed out after {int(elapsed)}s (limit={max_poll_time}s), bundle was not confirmed as processed"
+                    )
+                    return {
+                        "success": False,
+                        "error": {
+                            "code": "POLL_TIMEOUT",
+                            "message": f"Bundle not confirmed processed after polling for {int(elapsed)}s (limit={max_poll_time}s)",
+                        },
+                    }
+
+                logger.info(
+                    f"[remote_upload] Polling server status... (elapsed: {int(elapsed)}s, limit={'no-limit' if max_poll_time <= 0 else max_poll_time}s)"
+                )
+                time.sleep(poll_interval)
+
+                status = self.get_server_status()
+                if status.get("success"):
+                    new_sequence = status.get("last_sequence", 0)
+                    if new_sequence >= expected_sequence:
+                        logger.info(
+                            f"[remote_upload] Server sequence advanced to {new_sequence}, bundle was processed!"
+                        )
+                        return {
+                            "success": True,
+                            "message": f"Bundle processed after timeout (server at sequence {new_sequence})",
+                            "sequence": new_sequence,
+                        }
+                    logger.debug(
+                        f"[remote_upload] Server sequence still at {new_sequence}, continuing to poll..."
+                    )
+                else:
+                    logger.warning(
+                        f"[remote_upload] Failed to get server status during poll: {status.get('error', {}).get('message', 'Unknown')}"
+                    )
+
+        except Exception as e:
+            logger.error(f"[remote_upload] Error during post-timeout polling: {e}")
+            return {"success": False, "error": {"code": "POLL_ERROR", "message": f"Polling error: {str(e)}"}}
+
     def get_server_status(self) -> Dict[str, Any]:
         """Get server status with simplified error handling."""
         try:
@@ -898,11 +1028,14 @@ def get_all_code_files(self) -> List[Path]:
                 all_files.extend(workspace_path.rglob(f"*{ext}"))
 
             # Filter out directories and hidden files
+            dev_remote = os.environ.get("DEV_REMOTE_MODE") == "1" or os.environ.get("REMOTE_UPLOAD_MODE") == "development"
+            ignored_dirs = {"dev-workspace"} if dev_remote else set()
             all_files = [
                 f for f in all_files
                 if f.is_file()
                 and not any(part.startswith('.') for part in f.parts)
-                and '.context-engine' not in str(f)
+                and '.codebase' not in str(f)
+                and not any(part in ignored_dirs for part in f.parts)
             ]
         except Exception as e:
             logger.error(f"[watch] Error scanning files: {e}")
@@ -1033,8 +1166,9 @@ def get_remote_config(cli_path: Optional[str] = None) -> Dict[str, str]:
         "upload_endpoint": os.environ.get("REMOTE_UPLOAD_ENDPOINT", "http://localhost:8080"),
         "workspace_path": workspace_path,
         "collection_name": collection_name,
-        "max_retries": int(os.environ.get("REMOTE_UPLOAD_MAX_RETRIES", "3")),
-        "timeout": int(os.environ.get("REMOTE_UPLOAD_TIMEOUT", "30"))
+        # Use higher, more robust defaults but still allow env overrides
+        "max_retries": int(os.environ.get("REMOTE_UPLOAD_MAX_RETRIES", "5")),
+        "timeout": int(os.environ.get("REMOTE_UPLOAD_TIMEOUT", "1800")),
     }
 
 
diff --git a/scripts/upload_service.py b/scripts/upload_service.py
index 0b5c1589..a29ec357 100644
--- a/scripts/upload_service.py
+++ b/scripts/upload_service.py
@@ -167,7 +167,7 @@ def validate_bundle_format(bundle_path: Path) -> Dict[str, Any]:
     except Exception as e:
         raise ValueError(f"Invalid bundle format: {str(e)}")
 
-async def process_delta_bundle(workspace_path: str, bundle_path: Path, manifest: Dict[str, Any]) -> Dict[str, int]:
+def process_delta_bundle(workspace_path: str, bundle_path: Path, manifest: Dict[str, Any]) -> Dict[str, int]:
     """Process delta bundle and return operation counts."""
     operations_count = {
         "created": 0,
@@ -305,6 +305,49 @@ async def process_delta_bundle(workspace_path: str, bundle_path: Path, manifest:
         raise
 
 
+async def _process_bundle_background(
+    workspace_path: str,
+    bundle_path: Path,
+    manifest: Dict[str, Any],
+    sequence_number: Optional[int],
+    bundle_id: Optional[str],
+) -> None:
+    try:
+        start_time = datetime.now()
+        operations_count = await asyncio.to_thread(
+            process_delta_bundle, workspace_path, bundle_path, manifest
+        )
+        if sequence_number is not None:
+            key = get_workspace_key(workspace_path)
+            _sequence_tracker[key] = sequence_number
+        if log_activity:
+            try:
+                repo = _extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None
+                log_activity(
+                    repo_name=repo,
+                    action="uploaded",
+                    file_path=bundle_id,
+                    details={
+                        "bundle_id": bundle_id,
+                        "operations": operations_count,
+                        "source": "delta_upload",
+                    },
+                )
+            except Exception as activity_err:
+                logger.debug(f"[upload_service] Failed to log activity for bundle {bundle_id}: {activity_err}")
+        processing_time = (datetime.now() - start_time).total_seconds() * 1000
+        logger.info(
+            f"[upload_service] Finished processing bundle {bundle_id} seq {sequence_number} in {int(processing_time)}ms"
+        )
+    except Exception as e:
+        logger.error(f"[upload_service] Error in background processing for bundle {bundle_id}: {e}")
+    finally:
+        try:
+            bundle_path.unlink()
+        except Exception:
+            pass
+
+
 @app.get("/health", response_model=HealthResponse)
 async def health_check():
     """Health check endpoint."""
@@ -371,25 +414,28 @@ async def upload_delta_bundle(
 
         workspace_path = str(workspace.resolve())
 
-        # Get collection name
+        # Always derive repo_name from workspace_path for origin tracking
+        repo_name = _extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None
+        if not repo_name:
+            repo_name = Path(workspace_path).name
+
+        # Get collection name (respect client-supplied name when provided)
         if not collection_name:
-            if get_collection_name:
-                repo_name = _extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None
-                # Fallback to directory name if repo detection fails
-                if not repo_name:
-                    repo_name = Path(workspace_path).name
+            if get_collection_name and repo_name:
                 collection_name = get_collection_name(repo_name)
             else:
                 collection_name = DEFAULT_COLLECTION
 
-        # Persist origin metadata for remote lookups
+        # Persist origin metadata for remote lookups (including client source_path)
+        # Use slugged repo name (repo+16) for state so it matches ingest/watch_index usage
         try:
             if update_repo_origin and repo_name:
                 workspace_key = get_workspace_key(workspace_path)
-                container_workspace = str(Path(WORK_DIR) / f"{repo_name}-{workspace_key}")
+                slug_repo_name = f"{repo_name}-{workspace_key}"
+                container_workspace = str(Path(WORK_DIR) / slug_repo_name)
                 update_repo_origin(
                     workspace_path=container_workspace,
-                    repo_name=repo_name,
+                    repo_name=slug_repo_name,
                     container_path=container_workspace,
                     source_path=source_path or workspace_path,
                     collection_name=collection_name,
@@ -412,6 +458,8 @@ async def upload_delta_bundle(
             content = await bundle.read()
             bundle_path.write_bytes(content)
 
+        handed_off = False
+
         try:
             # Validate bundle format
             manifest = validate_bundle_format(bundle_path)
@@ -419,11 +467,14 @@ async def upload_delta_bundle(
             manifest_sequence = manifest.get("sequence_number")
 
             # Check sequence number
+            last_sequence = get_last_sequence(workspace_path)
             if sequence_number is None:
-                sequence_number = manifest_sequence
+                if manifest_sequence is not None:
+                    sequence_number = manifest_sequence
+                else:
+                    sequence_number = last_sequence + 1
 
             if not force and sequence_number is not None:
-                last_sequence = get_last_sequence(workspace_path)
                 if sequence_number != last_sequence + 1:
                     return UploadResponse(
                         success=False,
@@ -436,46 +487,33 @@ async def upload_delta_bundle(
                         }
                     )
 
-            # Process delta bundle
-            operations_count = await process_delta_bundle(workspace_path, bundle_path, manifest)
-
+            handed_off = True
 
-            # Update sequence tracking
-            if sequence_number is not None:
-                key = get_workspace_key(workspace_path)
-                _sequence_tracker[key] = sequence_number
-
-            # Log activity using cleaned workspace_state function
-            if log_activity:
-                log_activity(
-                    repo_name=_extract_repo_name_from_path(workspace_path) if _extract_repo_name_from_path else None,
-                    action="uploaded",
-                    file_path=bundle_id,
-                    details={
-                        "bundle_id": bundle_id,
-                        "operations": operations_count,
-                        "source": "delta_upload"
-                    }
+            asyncio.create_task(
+                _process_bundle_background(
+                    workspace_path=workspace_path,
+                    bundle_path=bundle_path,
+                    manifest=manifest,
+                    sequence_number=sequence_number,
+                    bundle_id=bundle_id,
                 )
-
-            # Calculate processing time
-            processing_time = (datetime.now() - start_time).total_seconds() * 1000
+            )
 
             return UploadResponse(
                 success=True,
                 bundle_id=bundle_id,
                 sequence_number=sequence_number,
-                processed_operations=operations_count,
-                processing_time_ms=int(processing_time),
+                processed_operations=None,
+                processing_time_ms=None,
                 next_sequence=sequence_number + 1 if sequence_number else None
             )
 
         finally:
-            # Clean up temporary file
-            try:
-                bundle_path.unlink()
-            except Exception:
-                pass
+            if not handed_off:
+                try:
+                    bundle_path.unlink()
+                except Exception:
+                    pass
 
     except HTTPException:
         raise
diff --git a/scripts/workspace_state.py b/scripts/workspace_state.py
index e05f80b5..99209c50 100644
--- a/scripts/workspace_state.py
+++ b/scripts/workspace_state.py
@@ -281,6 +281,12 @@ def get_workspace_state(
         if is_multi_repo_mode() and repo_name:
             state_dir = _get_repo_state_dir(repo_name)
             state_dir.mkdir(parents=True, exist_ok=True)
+            # Ensure repo state dir is group-writable so root upload service and
+            # non-root watcher/indexer processes can both write state/cache files.
+            try:
+                os.chmod(state_dir, 0o775)
+            except Exception:
+                pass
             state_path = state_dir / STATE_FILENAME
             lock_scope_path = state_dir
         else:
@@ -481,11 +487,31 @@ def _generate_collection_name_from_repo(repo_name: str) -> str:
     short_hash = hash_obj.hexdigest()[:8]
     return f"{repo_name}-{short_hash}"
 
+def _normalize_repo_name_for_collection(repo_name: str) -> str:
+    """Normalize repo identifier to a stable base name for collection naming.
+
+    In multi-repo remote mode, repo_name may be a slug like "name-<16hex>" used
+    for folder collision avoidance. For Qdrant collections we always want the
+    base repo directory name, so strip a trailing 16-hex segment when present.
+    """
+    try:
+        m = re.match(r"^(.*)-([0-9a-f]{16})$", repo_name)
+        if m:
+            base = (m.group(1) or "").strip()
+            if base:
+                return base
+    except Exception:
+        pass
+    return repo_name
+
+
 def get_collection_name(repo_name: Optional[str] = None) -> str:
     """Get collection name for repository or workspace."""
+    normalized = _normalize_repo_name_for_collection(repo_name) if repo_name else None
+
     # In multi-repo mode, prioritize repo-specific collection names
-    if is_multi_repo_mode() and repo_name:
-        return _generate_collection_name_from_repo(repo_name)
+    if is_multi_repo_mode() and normalized:
+        return _generate_collection_name_from_repo(normalized)
 
     # Check environment for single-repo mode or fallback
     env_coll = os.environ.get("COLLECTION_NAME", "").strip()
@@ -493,8 +519,8 @@ def get_collection_name(repo_name: Optional[str] = None) -> str:
         return env_coll
 
     # Use repo name if provided (for single-repo mode with repo name)
-    if repo_name:
-        return _generate_collection_name_from_repo(repo_name)
+    if normalized:
+        return _generate_collection_name_from_repo(normalized)
 
     # Default fallback
     return "global-collection"

From 4d6839fc3808bcaa983bacf5524601178aca2455 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 10:08:12 +0000
Subject: [PATCH 04/19] feat(ctx-hook): Enhance Claude prompt with context from
 a Qdrant collection

---
 ctx-hook-simple.sh | 189 +++++++++++++++++++++++++++++++++++++++------
 1 file changed, 164 insertions(+), 25 deletions(-)

diff --git a/ctx-hook-simple.sh b/ctx-hook-simple.sh
index f21b8a0e..55e46d3f 100755
--- a/ctx-hook-simple.sh
+++ b/ctx-hook-simple.sh
@@ -6,41 +6,108 @@
 # Read JSON input from stdin
 INPUT=$(cat)
 
-# Extract the user message using jq
+# Extract the prompt text from Claude's JSON payload
 if command -v jq >/dev/null 2>&1; then
-    USER_MESSAGE=$(echo "$INPUT" | jq -r '.user_message')
+	USER_MESSAGE=$(echo "$INPUT" | jq -r '.prompt')
+	USER_CWD=$(echo "$INPUT" | jq -r '.cwd // empty')
 else
-    echo "$INPUT"
-    exit 0
+	# Fallback: treat entire input as the prompt text
+	USER_MESSAGE="$INPUT"
 fi
 
 # Skip if empty message
 if [ -z "$USER_MESSAGE" ] || [ "$USER_MESSAGE" = "null" ]; then
-    echo "$INPUT"
-    exit 0
-fi
-
-# Easy bypass patterns - any of these will skip ctx enhancement
-if [[ "$USER_MESSAGE" =~ ^(noctx|raw|bypass|skip|no-enhance): ]] || \
-   [[ "$USER_MESSAGE" =~ ^\\ ]] || \
-   [[ "$USER_MESSAGE" =~ ^\< ]] || \
-   [[ "$USER_MESSAGE" =~ ^(/help|/clear|/exit|/quit) ]] || \
-   [[ "$USER_MESSAGE" =~ ^\?\s*$ ]] || \
-   [ ${#USER_MESSAGE} -lt 12 ]; then
-    echo "$INPUT"
-    exit 0
+	echo "$INPUT"
+	exit 0
 fi
 
 # Set working directory to where the hook script is located
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+# Determine workspace directory:
+# - If CTX_WORKSPACE_DIR is already set, honor it.
+# - If running from an embedded extension under ~/.windsurf-server/extensions,
+#   default to the caller's CWD (Claude/VS Code workspace root).
+# - Otherwise (repo-local hook), default to the script directory so it works
+#   even when Claude runs from a parent folder.
+if [ -n "${CTX_WORKSPACE_DIR:-}" ]; then
+	WORKSPACE_DIR="$CTX_WORKSPACE_DIR"
+elif [[ "$SCRIPT_DIR" == */.windsurf-server/extensions/* ]]; then
+	WORKSPACE_DIR="$PWD"
+else
+	WORKSPACE_DIR="$SCRIPT_DIR"
+fi
+export CTX_WORKSPACE_DIR="$WORKSPACE_DIR"
+
+# If the workspace root does not contain ctx_config.json, but exactly one
+# direct child directory does, treat that child directory as the effective
+# workspace. This supports multi-repo workspaces where the ctx-enabled repo
+# (with ctx_config.json and .env) lives one level below the VS Code root.
+if [ ! -f "$WORKSPACE_DIR/ctx_config.json" ]; then
+	FOUND_SUBDIR=""
+	for candidate in "$WORKSPACE_DIR"/*; do
+		if [ -d "$candidate" ] && [ -f "$candidate/ctx_config.json" ]; then
+			if [ -z "$FOUND_SUBDIR" ]; then
+				FOUND_SUBDIR="$candidate"
+			else
+				# More than one candidate; ambiguous, keep original WORKSPACE_DIR
+				FOUND_SUBDIR=""
+				break
+			fi
+		fi
+	done
+	if [ -n "$FOUND_SUBDIR" ]; then
+		WORKSPACE_DIR="$FOUND_SUBDIR"
+		export CTX_WORKSPACE_DIR="$WORKSPACE_DIR"
+	fi
+fi
+
+# Prefer workspace-level ctx_config.json, fall back to one next to the script
+if [ -f "$WORKSPACE_DIR/ctx_config.json" ]; then
+	CONFIG_FILE="$WORKSPACE_DIR/ctx_config.json"
+elif [ -f "$SCRIPT_DIR/ctx_config.json" ]; then
+	CONFIG_FILE="$SCRIPT_DIR/ctx_config.json"
+else
+	CONFIG_FILE=""
+fi
+
+# Optional: enable file logging when CTX_HOOK_LOG=1 or a .ctx_hook_log marker
+# file exists in the workspace. When disabled, no log file is written.
+if [ "${CTX_HOOK_LOG:-0}" = "1" ] || [ -f "$WORKSPACE_DIR/.ctx_hook_log" ]; then
+	LOG_FILE="$WORKSPACE_DIR/ctx-hook.log"
+	LOG_ENABLED=1
+else
+	LOG_ENABLED=0
+fi
+
 cd "$SCRIPT_DIR"
 
+# Optional: enable extra debug information in the JSON payload
+# when CTX_HOOK_DEBUG=1 is set in the environment, or when a
+# .ctx_hook_debug marker file exists in the workspace.
+CTX_HOOK_DEBUG="${CTX_HOOK_DEBUG:-}"
+if [ -z "$CTX_HOOK_DEBUG" ] && [ -f "$WORKSPACE_DIR/.ctx_hook_debug" ]; then
+	CTX_HOOK_DEBUG="1"
+fi
+
+# Log the incoming payload when logging is enabled
+if [ "$LOG_ENABLED" = "1" ]; then
+	{
+		echo "[$(date -Iseconds)] HOOK INVOKED"
+		echo "PWD   = $PWD"
+		echo "WORKSPACE_DIR = $WORKSPACE_DIR"
+		echo "INPUT = <<EOF"
+		echo "$INPUT"
+		echo "EOF"
+		echo
+	} >> "$LOG_FILE"
+fi
+
 # Read all settings from ctx_config.json
-CONFIG_FILE="ctx_config.json"
-if [ -f "$CONFIG_FILE" ]; then
-    CTX_COLLECTION=$(grep -o '"default_collection"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"default_collection"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+if [ -n "$CONFIG_FILE" ] && [ -f "$CONFIG_FILE" ]; then
+    CTX_COLLECTION=$(grep -o '"default_collection"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"default_collection"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' )
     REFRAG_RUNTIME=$(grep -o '"refrag_runtime"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"refrag_runtime"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' || echo "glm")
-    GLM_API_KEY=$(grep -o '"glm_api_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_key"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
+    GLM_API_KEY=$(grep -o '"glm_api_key"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_key"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/' )
     GLM_API_BASE=$(grep -o '"glm_api_base"[[:space:]]*:[[:space:]]*"[^"]*"' "$CONFIG_FILE" | sed 's/.*"glm_api_base"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/')
     GLM_MODEL=$(grep -o '"glm_model"[[:space:]]*:[[:space:]]*"[^\"]*"' "$CONFIG_FILE" | sed 's/.*"glm_model"[[:space:]]*:[[:space:]]*"\([^\"]*\)".*/\1/' || echo "glm-4.6")
     CTX_DEFAULT_MODE=$(grep -o '"default_mode"[[:space:]]*:[[:space:]]*"[^\"]*"' "$CONFIG_FILE" | sed 's/.*"default_mode"[[:space:]]*:[[:space:]]*"\([^\"]*\)".*/\1/')
@@ -63,8 +130,39 @@ CTX_MIN_RELEVANCE=${CTX_MIN_RELEVANCE:-0.1}
 # Export GLM/context environment variables from config
 export REFRAG_RUNTIME GLM_API_KEY GLM_API_BASE GLM_MODEL CTX_REQUIRE_CONTEXT CTX_RELEVANCE_GATE CTX_MIN_RELEVANCE
 
+# Easy bypass patterns - any of these will skip ctx enhancement
+BYPASS_REASON=""
+if [[ "$USER_MESSAGE" =~ ^(noctx|raw|bypass|skip|no-enhance): ]]; then
+	BYPASS_REASON="prefix_tag"
+elif [[ "$USER_MESSAGE" =~ ^\\ ]]; then
+	BYPASS_REASON="leading_backslash"
+elif [[ "$USER_MESSAGE" =~ ^\< ]]; then
+	BYPASS_REASON="leading_angle_bracket"
+elif [[ "$USER_MESSAGE" =~ ^(/help|/clear|/exit|/quit) ]]; then
+	BYPASS_REASON="slash_command"
+elif [[ "$USER_MESSAGE" =~ ^\?\s*$ ]]; then
+	BYPASS_REASON="short_question_mark"
+elif [ ${#USER_MESSAGE} -lt 12 ]; then
+	BYPASS_REASON="too_short"
+fi
+
+if [ -n "$BYPASS_REASON" ]; then
+	if [ "$CTX_HOOK_DEBUG" = "1" ]; then
+		echo "[ctx_debug status=bypassed reason=$BYPASS_REASON script_dir=$SCRIPT_DIR workspace_dir=$WORKSPACE_DIR config_file=$CONFIG_FILE] $USER_MESSAGE"
+	else
+		echo "$USER_MESSAGE"
+	fi
+	exit 0
+fi
+
 # Build ctx command with optional unicorn flag
-CTX_CMD=(python3 scripts/ctx.py)
+if [ -f "$SCRIPT_DIR/ctx.py" ]; then
+	# Use embedded ctx.py when running from the packaged extension
+	CTX_CMD=(python3 "$SCRIPT_DIR/ctx.py")
+else
+	# Fallback for repo-local usage
+	CTX_CMD=(python3 scripts/ctx.py)
+fi
 case "${CTX_DEFAULT_MODE,,}" in
 	unicorn)
 		CTX_CMD+=("--unicorn")
@@ -76,7 +174,48 @@ esac
 CTX_CMD+=("$USER_MESSAGE" --collection "$CTX_COLLECTION")
 
 # Run ctx with collection
-ENHANCED=$(timeout 30s "${CTX_CMD[@]}" 2>/dev/null || echo "$USER_MESSAGE")
+# When CTX_DEBUG_PATHS is enabled, preserve stderr so path-level debug from ctx.py is visible
+if [ -n "${CTX_DEBUG_PATHS:-}" ]; then
+	ENHANCED=$(timeout 120s "${CTX_CMD[@]}" 2>&1 || echo "$USER_MESSAGE")
+else
+	ENHANCED=$(timeout 120s "${CTX_CMD[@]}" 2>/dev/null || echo "$USER_MESSAGE")
+fi
+
+if [ -n "$WORKSPACE_DIR" ] && [ "${CTX_ROOT_HINT:-1}" != "0" ]; then
+	HINT="The user's project root directory is \"$WORKSPACE_DIR\" (WORKSPACE_DIR)."
+	if [ "${CTX_SURFACE_COLLECTION_HINT:-0}" = "1" ] && [ -n "$CTX_COLLECTION" ]; then
+		HINT="$HINT The Qdrant collection name for this workspace is \"$CTX_COLLECTION\". Specify this collection when using memory or qdrant-indexer MCP tool (if available)."
+	fi
+	if [ -n "${USER_CWD:-}" ]; then
+		HINT="$HINT Claude's current working directory is \"$USER_CWD\" (user_cwd). \
+When using tools like Read, Search, or Bash, treat WORKSPACE_DIR as the root \
+for repository files. If WORKSPACE_DIR and user_cwd differ, do not assume \
+files live under user_cwd; use the full paths under WORKSPACE_DIR or the \
+project-relative paths shown above."
+	fi
+	ENHANCED="$HINT
+
+$ENHANCED"
+fi
+
+# Log ctx output when logging is enabled
+if [ "$LOG_ENABLED" = "1" ]; then
+	{
+		echo "[$(date -Iseconds)] CTX_OUTPUT"
+		echo "PROMPT   = $USER_MESSAGE"
+		echo "ENHANCED = <<EOF"
+		echo "$ENHANCED"
+		echo "EOF"
+		echo
+	} >> "$LOG_FILE"
+fi
 
-# Replace user message with enhanced version using jq
-echo "$INPUT" | jq --arg enhanced "$ENHANCED" '.user_message = $enhanced'
\ No newline at end of file
+if [ "$CTX_HOOK_DEBUG" = "1" ]; then
+	HOOK_STATUS="unchanged"
+	if [ "$ENHANCED" != "$USER_MESSAGE" ]; then
+		HOOK_STATUS="enhanced"
+	fi
+	echo "[ctx_debug status=$HOOK_STATUS script_dir=$SCRIPT_DIR workspace_dir=$WORKSPACE_DIR config_file=$CONFIG_FILE] $ENHANCED"
+else
+	echo "$ENHANCED"
+fi
\ No newline at end of file

From bd0399c95fd14c815da6ab9a8a7dbbf2f5be9e6d Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 10:08:13 +0000
Subject: [PATCH 05/19] feat(ctx): Load .env from workspace if available,
 improve path handling and sanitization

---
 scripts/ctx.py | 122 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 97 insertions(+), 25 deletions(-)

diff --git a/scripts/ctx.py b/scripts/ctx.py
index 271e36fb..f6a815b9 100755
--- a/scripts/ctx.py
+++ b/scripts/ctx.py
@@ -58,25 +58,39 @@
 
 # Load .env file if it exists (for local CLI usage)
 def _load_env_file():
-    """Load .env file from project root if it exists."""
-    # Find project root (where .env should be)
-    script_dir = Path(__file__).resolve().parent
-    project_root = script_dir.parent
-    env_file = project_root / ".env"
-
-    if env_file.exists():
-        with open(env_file) as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" in line:
-                    key, value = line.split("=", 1)
-                    key = key.strip()
-                    value = value.strip().strip('"').strip("'")
-                    # Only set if not already in environment
-                    if key and key not in os.environ:
-                        os.environ[key] = value
+	"""Load .env file from workspace (if provided) or project root if it exists."""
+	# Prefer an explicit workspace root (set by the hook) when available,
+	# otherwise fall back to the original project-root behavior based on this file.
+	script_dir = Path(__file__).resolve().parent
+	candidates = []
+
+	workspace_dir = os.environ.get("CTX_WORKSPACE_DIR")
+	if workspace_dir:
+		try:
+			candidates.append(Path(workspace_dir) / ".env")
+		except Exception:
+			pass
+
+	# Original project-root-based .env (for CLI / repo-local usage)
+	candidates.append(script_dir.parent / ".env")
+
+	for env_file in candidates:
+		if not env_file.exists():
+			continue
+		with open(env_file) as f:
+			for line in f:
+				line = line.strip()
+				if not line or line.startswith("#"):
+					continue
+				if "=" in line:
+					key, value = line.split("=", 1)
+					key = key.strip()
+					value = value.strip().strip('"').strip("'")
+					# Only set if not already in environment
+					if key and key not in os.environ:
+						os.environ[key] = value
+		# Only load the first existing .env
+		break
 
 _load_env_file()
 
@@ -261,7 +275,8 @@ def format_search_results(results: List[Dict[str, Any]], include_snippets: bool
     """
     lines: List[str] = []
     for hit in results:
-        path = hit.get("path", "unknown")
+        # Prefer client-facing host_path, fall back to container path
+        path = hit.get("host_path") or hit.get("path", "unknown")
         start = hit.get("start_line", "?")
         end = hit.get("end_line", "?")
         language = hit.get("language") or ""
@@ -461,17 +476,48 @@ def sanitize_citations(text: str, allowed_paths: Set[str]) -> str:
         return text
     from os.path import basename
     allowed_set = set(allowed_paths or set())
-    allowed_basenames = {basename(p) for p in allowed_set}
+    basename_to_paths: Dict[str, Set[str]] = {}
+    for _p in allowed_set:
+        _b = basename(_p)
+        if _b:
+            basename_to_paths.setdefault(_b, set()).add(_p)
+
+    root = (os.environ.get("CTX_WORKSPACE_DIR") or "").strip()
+
+    def _to_display_path(full_path: str) -> str:
+        if not full_path:
+            return full_path
+        if not root:
+            return full_path
+        try:
+            root_norm = root.rstrip("/\\")
+            repo_name = os.path.basename(root_norm) if root_norm else ""
+            if full_path == root_norm:
+                return repo_name or "."
+            if full_path.startswith(root_norm + os.sep):
+                rel = os.path.relpath(full_path, root_norm)
+                if repo_name:
+                    return repo_name + os.sep + (rel or "")
+                return rel or "."
+        except Exception:
+            return full_path
+        return full_path
 
     def _repl(m):
         p = m.group(0)
-        if p in allowed_set or basename(p) in allowed_basenames:
+        if p in allowed_set:
+            return _to_display_path(p)
+        b = basename(p)
+        paths = basename_to_paths.get(b) if b else None
+        if paths:
+            if len(paths) == 1:
+                return _to_display_path(next(iter(paths)))
             return p
         return "the referenced file"
 
     cleaned = re.sub(r"/path/to/[^\s]+", "the referenced file", text)
     # Simple path-like matcher: segments with a slash and a dot-ext
-    cleaned = re.sub(r"(?<!\w)[./\w-]+/[./\w-]+\.[A-Za-z0-9_-]+", _repl, cleaned)
+    cleaned = re.sub(r"(?<!\w)([./\w-]+/[./\w-]+\.[A-Za-z0-9_-]+|[A-Za-z0-9_.-]+\.[A-Za-z0-9_-]+)", _repl, cleaned)
     return cleaned
 
 
@@ -901,6 +947,26 @@ def fetch_context(query: str, **filters) -> Tuple[str, str]:
     sys.stderr.write(f"[DEBUG] repo_search returned {len(hits)} hits (relevance={relevance:.3f})\n")
     sys.stderr.flush()
 
+    # Optional path-level debug: sample raw paths coming back from MCP
+    debug_paths_flag = os.environ.get("CTX_DEBUG_PATHS", "").strip().lower()
+    if debug_paths_flag in {"1", "true", "yes", "on"} and hits:
+        try:
+            sample = [
+                {
+                    "path": h.get("path"),
+                    "host_path": h.get("host_path"),
+                    "container_path": h.get("container_path"),
+                    "start_line": h.get("start_line"),
+                    "end_line": h.get("end_line"),
+                    "symbol": h.get("symbol"),
+                }
+                for h in hits[:5]
+            ]
+            sys.stderr.write("[DEBUG] repo_search sample paths:\n" + json.dumps(sample, indent=2) + "\n")
+            sys.stderr.flush()
+        except Exception:
+            pass
+
     gate_flag = os.environ.get("CTX_RELEVANCE_GATE", "").strip().lower()
     if hits and gate_flag in {"1", "true", "yes", "on"}:
         try:
@@ -960,10 +1026,12 @@ def rewrite_prompt(original_prompt: str, context: str, note: str, max_tokens: Op
     else:
         policy_system = (
             "If context is provided, use it to make the prompt more concrete by citing specific file paths, line ranges, and symbols that appear in the Context refs. "
+            "When you cite a file, use its full path exactly as it appears in the Context refs, including all directories and prefixes (for example, '/home/.../ctx.py'), rather than shortening it to just a filename. "
             "Never invent references - only cite what appears verbatim in the Context refs. "
         )
         policy_user = (
             "If the context above contains relevant references, cite concrete file paths, line ranges, and symbols in your rewrite. "
+            "When mentioning a file, use the full path exactly as shown in the Context refs (including directories), not a shortened form like 'ctx.py'. "
         )
 
     # Detect if we have actual code context or just a diagnostic note
@@ -1291,6 +1359,10 @@ def main():
         else:
             context_text, context_note = fetch_context(args.query, **filters)
 
+            # Derive allowed paths from the formatted context so we can validate/normalize
+            # any file-like mentions in the final rewrite.
+            allowed_paths, _ = extract_allowed_citations(context_text)
+
             require_ctx_flag = os.environ.get("CTX_REQUIRE_CONTEXT", "").strip().lower()
             if require_ctx_flag in {"1", "true", "yes", "on"}:
                 has_real_context = bool((context_text or "").strip()) and not (
@@ -1304,10 +1376,10 @@ def main():
                     output = (args.query or "").strip()
                 else:
                     rewritten = rewrite_prompt(args.query, context_text, context_note, max_tokens=args.rewrite_max_tokens)
-                    output = rewritten.strip()
+                    output = sanitize_citations(rewritten.strip(), allowed_paths)
             else:
                 rewritten = rewrite_prompt(args.query, context_text, context_note, max_tokens=args.rewrite_max_tokens)
-                output = rewritten.strip()
+                output = sanitize_citations(rewritten.strip(), allowed_paths)
 
         if args.cmd:
             subprocess.run(args.cmd, input=output.encode("utf-8"), shell=True, check=False)

From 4d9574fd5fe1e63713b64ffe61c10a7ec28251f5 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 10:08:14 +0000
Subject: [PATCH 06/19] feat(vscode-extension): Bundle additional scripts and
 improve configuration

---
 vscode-extension/build/build.bat              |   8 +
 vscode-extension/build/build.sh               |  13 +
 .../context-engine-uploader/README.md         |  21 +-
 .../context-engine-uploader/extension.js      | 490 +++++++++++++++++-
 .../context-engine-uploader/package.json      |  46 +-
 5 files changed, 557 insertions(+), 21 deletions(-)
 mode change 100644 => 100755 vscode-extension/build/build.sh

diff --git a/vscode-extension/build/build.bat b/vscode-extension/build/build.bat
index 24878f88..8db62e3f 100644
--- a/vscode-extension/build/build.bat
+++ b/vscode-extension/build/build.bat
@@ -15,6 +15,10 @@ set "STAGE_DIR=%OUT_DIR%\extension-stage"
 set "BUILD_RESULT=0"
 for %%I in ("..\..\ctx-hook-simple.sh") do set "HOOK_SRC=%%~fI"
 for %%I in ("..\..\scripts\ctx.py") do set "CTX_SRC=%%~fI"
+for %%I in ("..\..\scripts\mcp_router.py") do set "ROUTER_SRC=%%~fI"
+for %%I in ("..\..\scripts\refrag_glm.py") do set "REFRAG_SRC=%%~fI"
+for %%I in ("..\..\scripts\mcp_router.py") do set "ROUTER_SRC=%%~fI"
+for %%I in ("..\..\.env.example") do set "ENV_EXAMPLE_SRC=%%~fI"
 
 echo Building clean Context Engine Uploader extension...
 
@@ -60,6 +64,10 @@ if errorlevel 1 (
 REM Bundle ctx hook script and ctx CLI into the staged extension for reference
 if exist "%HOOK_SRC%" copy /Y "%HOOK_SRC%" "%STAGE_DIR%\ctx-hook-simple.sh" >nul
 if exist "%CTX_SRC%" copy /Y "%CTX_SRC%" "%STAGE_DIR%\ctx.py" >nul
+if exist "%ROUTER_SRC%" copy /Y "%ROUTER_SRC%" "%STAGE_DIR%\mcp_router.py" >nul
+if exist "%REFRAG_SRC%" copy /Y "%REFRAG_SRC%" "%STAGE_DIR%\refrag_glm.py" >nul
+if exist "%ROUTER_SRC%" copy /Y "%ROUTER_SRC%" "%STAGE_DIR%\mcp_router.py" >nul
+if exist "%ENV_EXAMPLE_SRC%" copy /Y "%ENV_EXAMPLE_SRC%" "%STAGE_DIR%\env.example" >nul
 
 REM Optional: bundle Python dependencies into the staged extension when requested
 if "%BUNDLE_DEPS%"=="1" (
diff --git a/vscode-extension/build/build.sh b/vscode-extension/build/build.sh
old mode 100644
new mode 100755
index c665fc69..5c50933f
--- a/vscode-extension/build/build.sh
+++ b/vscode-extension/build/build.sh
@@ -11,6 +11,9 @@ BUNDLE_DEPS="${1:-}"
 PYTHON_BIN="${PYTHON_BIN:-python3}"
 HOOK_SRC="$SCRIPT_DIR/../../ctx-hook-simple.sh"
 CTX_SRC="$SCRIPT_DIR/../../scripts/ctx.py"
+ROUTER_SRC="$SCRIPT_DIR/../../scripts/mcp_router.py"
+REFRAG_SRC="$SCRIPT_DIR/../../scripts/refrag_glm.py"
+ENV_EXAMPLE_SRC="$SCRIPT_DIR/../../.env.example"
 
 cleanup() {
     rm -rf "$STAGE_DIR"
@@ -44,6 +47,16 @@ fi
 if [[ -f "$CTX_SRC" ]]; then
     cp "$CTX_SRC" "$STAGE_DIR/ctx.py"
 fi
+if [[ -f "$ROUTER_SRC" ]]; then
+    cp "$ROUTER_SRC" "$STAGE_DIR/mcp_router.py"
+fi
+if [[ -f "$REFRAG_SRC" ]]; then
+    cp "$REFRAG_SRC" "$STAGE_DIR/refrag_glm.py"
+fi
+
+if [[ -f "$ENV_EXAMPLE_SRC" ]]; then
+    cp "$ENV_EXAMPLE_SRC" "$STAGE_DIR/env.example"
+fi
 
 # Optional: bundle Python deps into the staged extension when requested
 if [[ "$BUNDLE_DEPS" == "--bundle-deps" ]]; then
diff --git a/vscode-extension/context-engine-uploader/README.md b/vscode-extension/context-engine-uploader/README.md
index a3decdc2..2051d704 100644
--- a/vscode-extension/context-engine-uploader/README.md
+++ b/vscode-extension/context-engine-uploader/README.md
@@ -18,13 +18,30 @@ Configuration
 - `Target Path` is auto-filled from the workspace but can be overridden if you need to upload a different folder.
 - **Python dependencies:** the extension runs the standalone upload client via your configured `pythonPath`. Ensure the interpreter has `requests`, `urllib3`, and `charset_normalizer` installed. Run `python3 -m pip install requests urllib3 charset_normalizer` (or replace `python3` with your configured path) before starting the uploader.
 - **Path mapping:** `Host Root` + `Container Root` control how local paths are rewritten before reaching the remote service. By default the host root mirrors your `Target Path` and the container root is `/work`, which keeps Windows paths working without extra config.
- - **Claude Code MCP config:** `MCP Indexer Url` and `MCP Memory Url` control the URLs written into the project-local `.mcp.json` when you run the `Write MCP Config` command. This is only for configuring Claude Code MCP clients; other MCP integrations can be added separately later.
+- **Claude Code MCP config:** `MCP Indexer Url` and `MCP Memory Url` control the URLs written into the project-local `.mcp.json` when you run the `Write MCP Config` command. This is only for configuring Claude Code MCP clients; other MCP integrations can be added separately later.
+- **CTX + GLM settings:**
+  - `contextEngineUploader.ctxIndexerUrl` is copied into `.env` (as `MCP_INDEXER_URL`) so the embedded `ctx.py` knows which MCP indexer to call when enhancing prompts.
+  - `contextEngineUploader.glmApiKey`, `glmApiBase`, and `glmModel` are used when scaffolding `ctx_config.json`/`.env` to pre-fill GLM decoder options. Existing non-placeholder values are preserved, so you can override them in the files at any time.
+- **Context scaffolding:**
+  - `contextEngineUploader.scaffoldCtxConfig` (default `true`) controls whether the extension keeps a minimal `ctx_config.json` + `.env` in sync with your workspace. When enabled, running `Write MCP Config` or `Write CTX Config` will seed the files from the bundled `env.example` and inferred collection name, only overwriting placeholder/empty values.
+  - `contextEngineUploader.surfaceQdrantCollectionHint` gates whether the Claude hook adds a hint line with the Qdrant collection ID when ctx is enhancing prompts. This setting is also respected when the extension writes `.claude/settings.local.json`.
+
+Workspace-level ctx integration
+-------------------------------
+- The VSIX bundles an `env.example` template plus the ctx hook/CLI so you can dogfood the workflow without copying files manually.
+- When scaffolding is enabled (see above), running the `Context Engine Uploader: Write CTX Config (ctx_config.json/.env)` command will:
+  - Infer the collection name from the standalone upload client (`--show-mapping`).
+  - Create or update `ctx_config.json` with that collection and sensible defaults (GLM runtime, `default_mode`, `require_context`, etc.).
+  - Create or update `.env` from the bundled template, ensuring CTX-critical values such as `MULTI_REPO_MODE=1`, `REFRAG_RUNTIME=glm`, and `REFRAG_DECODER=1` are set. Non-placeholder values (e.g., a real `GLM_API_KEY`) are left alone.
+- You still own the files: if you need a custom value, edit `.env` or `ctx_config.json` directly. The scaffolder only touches keys that are missing, empty, or obviously placeholders.
+- The Claude hook + ctx prompt enhancement is currently wired for Linux/dev-remote environments only. On other platforms, MCP config and uploading still work, but the automatic prompt rewrite hook is disabled.
 
 Commands
 --------
 - Command Palette → “Context Engine Uploader” to access Start/Stop/Restart/Index Codebase.
 - Status-bar button (`Index Codebase`) mirrors the same behavior and displays progress.
- - `Context Engine Uploader: Write MCP Config (.mcp.json)` writes or updates a project-local `.mcp.json` with MCP server entries for the Qdrant indexer and memory/search endpoints, using the configured MCP URLs.
+- `Context Engine Uploader: Write MCP Config (.mcp.json)` writes or updates a project-local `.mcp.json` with MCP server entries for the Qdrant indexer and memory/search endpoints, using the configured MCP URLs.
+- `Context Engine Uploader: Write CTX Config (ctx_config.json/.env)` scaffolds the ctx config + env files as described above. This command runs automatically after `Write MCP Config` if scaffolding is enabled, but it is also exposed in the Command Palette for manual use.
 
 Logs
 ----
diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js
index 5151c484..8c26a3ef 100644
--- a/vscode-extension/context-engine-uploader/extension.js
+++ b/vscode-extension/context-engine-uploader/extension.js
@@ -11,7 +11,7 @@ let statusBarItem;
 let statusMode = 'idle';
 const REQUIRED_PYTHON_MODULES = ['requests', 'urllib3', 'charset_normalizer'];
 const DEFAULT_CONTAINER_ROOT = '/work';
-const CLAUDE_HOOK_COMMAND = '/home/coder/project/Context-Engine/ctx-hook-simple.sh';
+// const CLAUDE_HOOK_COMMAND = '/home/coder/project/Context-Engine/ctx-hook-simple.sh';
 function activate(context) {
   outputChannel = vscode.window.createOutputChannel('Context Engine Upload');
   context.subscriptions.push(outputChannel);
@@ -35,6 +35,9 @@ function activate(context) {
     vscode.window.showInformationMessage('Context Engine indexing started.');
     runSequence('force').catch(error => log(`Index failed: ${error instanceof Error ? error.message : String(error)}`));
   });
+  const ctxConfigDisposable = vscode.commands.registerCommand('contextEngineUploader.writeCtxConfig', () => {
+    writeCtxConfig().catch(error => log(`CTX config write failed: ${error instanceof Error ? error.message : String(error)}`));
+  });
   const mcpConfigDisposable = vscode.commands.registerCommand('contextEngineUploader.writeMcpConfig', () => {
     writeMcpConfig().catch(error => log(`MCP config write failed: ${error instanceof Error ? error.message : String(error)}`));
   });
@@ -51,7 +54,8 @@ function activate(context) {
       event.affectsConfiguration('contextEngineUploader.mcpClaudeEnabled') ||
       event.affectsConfiguration('contextEngineUploader.mcpWindsurfEnabled') ||
       event.affectsConfiguration('contextEngineUploader.windsurfMcpPath') ||
-      event.affectsConfiguration('contextEngineUploader.claudeHookEnabled')
+      event.affectsConfiguration('contextEngineUploader.claudeHookEnabled') ||
+      event.affectsConfiguration('contextEngineUploader.surfaceQdrantCollectionHint')
     ) {
       // Best-effort auto-update of MCP + hook configurations when settings change
       writeMcpConfig().catch(error => log(`Auto MCP config write failed: ${error instanceof Error ? error.message : String(error)}`));
@@ -60,12 +64,17 @@ function activate(context) {
   const workspaceDisposable = vscode.workspace.onDidChangeWorkspaceFolders(() => {
     ensureTargetPathConfigured();
   });
-  context.subscriptions.push(startDisposable, stopDisposable, restartDisposable, indexDisposable, mcpConfigDisposable, configDisposable, workspaceDisposable);
+  context.subscriptions.push(startDisposable, stopDisposable, restartDisposable, indexDisposable, mcpConfigDisposable, ctxConfigDisposable, configDisposable, workspaceDisposable);
   const config = vscode.workspace.getConfiguration('contextEngineUploader');
   ensureTargetPathConfigured();
   if (config.get('runOnStartup')) {
     runSequence('auto').catch(error => log(`Startup run failed: ${error instanceof Error ? error.message : String(error)}`));
   }
+
+  // When enabled, best-effort auto-scaffold ctx_config.json/.env for the current targetPath on activation
+  if (config.get('scaffoldCtxConfig', true)) {
+    writeCtxConfig().catch(error => log(`CTX config auto-scaffold on activation failed: ${error instanceof Error ? error.message : String(error)}`));
+  }
 }
 async function runSequence(mode = 'auto') {
   const options = resolveOptions();
@@ -156,7 +165,29 @@ function resolveOptions() {
   };
 }
 function getTargetPath(config) {
+  let inspected;
+  try {
+    if (typeof config.inspect === 'function') {
+      inspected = config.inspect('targetPath');
+    }
+  } catch (error) {
+    inspected = undefined;
+  }
   let targetPath = (config.get('targetPath') || '').trim();
+  if (inspected && targetPath) {
+    let sourceLabel = 'default';
+    if (inspected.workspaceFolderValue !== undefined) {
+      sourceLabel = 'workspaceFolder';
+    } else if (inspected.workspaceValue !== undefined) {
+      sourceLabel = 'workspace';
+    } else if (inspected.globalValue !== undefined) {
+      sourceLabel = 'user';
+    }
+    log(`Target path resolved to ${targetPath} (source: ${sourceLabel} settings)`);
+    if (inspected.globalValue !== undefined && inspected.workspaceValue !== undefined && inspected.globalValue !== inspected.workspaceValue) {
+      log('Target path has different user and workspace values; using workspace value. Update workspace settings (e.g. .vscode/settings.json) to change it.');
+    }
+  }
   if (targetPath) {
     updateStatusBarTooltip(targetPath);
     return targetPath;
@@ -402,6 +433,19 @@ function buildChildEnv(options) {
     WORKSPACE_PATH: options.targetPath,
     WATCH_ROOT: options.targetPath
   };
+  try {
+    const settings = vscode.workspace.getConfiguration('contextEngineUploader');
+    const devRemoteMode = settings.get('devRemoteMode', false);
+    if (devRemoteMode) {
+      // Enable dev-remote upload mode for the standalone upload client.
+      // This causes standalone_upload_client.py to ignore any 'dev-workspace'
+      // directories when scanning for files to upload.
+      env.REMOTE_UPLOAD_MODE = 'development';
+      env.DEV_REMOTE_MODE = '1';
+    }
+  } catch (error) {
+    log(`Failed to read devRemoteMode setting: ${error instanceof Error ? error.message : String(error)}`);
+  }
   if (options.hostRoot) {
     env.HOST_ROOT = options.hostRoot;
   }
@@ -456,13 +500,349 @@ async function writeMcpConfig() {
     } else if (!isLinux) {
       vscode.window.showWarningMessage('Context Engine Uploader: Claude hook auto-config is only wired for Linux/dev-remote at this time.');
     } else {
-      const result = await writeClaudeHookConfig(root, CLAUDE_HOOK_COMMAND);
-      hookWrote = hookWrote || result;
+      const commandPath = getClaudeHookCommand();
+      if (!commandPath) {
+        vscode.window.showErrorMessage('Context Engine Uploader: embedded Claude hook script not found in extension; .claude/settings.local.json was not updated.');
+        log('Claude hook config skipped because embedded ctx-hook-simple.sh could not be resolved.');
+      } else {
+        const result = await writeClaudeHookConfig(root, commandPath);
+        hookWrote = hookWrote || result;
+      }
     }
   }
   if (!wroteAny && !hookWrote) {
     log('Context Engine Uploader: MCP config write skipped (no targets succeeded).');
   }
+
+  // Optionally scaffold ctx_config.json and .env using the inferred collection
+  if (settings.get('scaffoldCtxConfig', true)) {
+    try {
+      await writeCtxConfig();
+    } catch (error) {
+      log(`CTX config auto-scaffolding failed: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+}
+
+async function writeCtxConfig() {
+  const settings = vscode.workspace.getConfiguration('contextEngineUploader');
+  const enabled = settings.get('scaffoldCtxConfig', true);
+  if (!enabled) {
+    vscode.window.showInformationMessage('Context Engine Uploader: ctx_config/.env scaffolding is disabled (contextEngineUploader.scaffoldCtxConfig=false).');
+    log('CTX config scaffolding skipped because scaffoldCtxConfig is false.');
+    return;
+  }
+  const options = resolveOptions();
+  if (!options) {
+    return;
+  }
+  const collectionName = inferCollectionFromUpload(options);
+  if (!collectionName) {
+    vscode.window.showErrorMessage('Context Engine Uploader: failed to infer collection name from upload client. Check the Output panel for details.');
+    return;
+  }
+  await scaffoldCtxConfigFiles(options.targetPath, collectionName);
+}
+
+function inferCollectionFromUpload(options) {
+  try {
+    const args = ['-u', options.scriptPath, '--path', options.targetPath, '--endpoint', options.endpoint, '--show-mapping'];
+    const result = spawnSync(options.pythonPath, args, {
+      cwd: options.workingDirectory,
+      env: buildChildEnv(options),
+      encoding: 'utf8'
+    });
+    if (result.error) {
+      log(`Failed to run standalone_upload_client for collection inference: ${result.error.message || String(result.error)}`);
+      return undefined;
+    }
+    const stdout = result.stdout || '';
+    const stderr = result.stderr || '';
+
+    if (stdout) {
+      log(`[ctx-config] upload client --show-mapping output:\n${stdout}`);
+    }
+    if (stderr) {
+      log(`[ctx-config] upload client stderr:\n${stderr}`);
+    }
+
+    const combined = `${stdout}\n${stderr}`;
+    if (combined.trim()) {
+      const lines = combined.split(/\r?\n/);
+      for (const line of lines) {
+        const m = line.match(/collection_name:\s*(.+)$/);
+        if (m && m[1]) {
+          const name = m[1].trim();
+          if (name) {
+            return name;
+          }
+        }
+      }
+    }
+  } catch (error) {
+    log(`Error inferring collection from upload client: ${error instanceof Error ? error.message : String(error)}`);
+  }
+  return undefined;
+}
+
+async function scaffoldCtxConfigFiles(workspaceDir, collectionName) {
+  try {
+    const placeholders = new Set(['', 'default-collection', 'my-collection', 'codebase']);
+
+    // Read GLM settings from extension configuration (with sane defaults)
+    let glmApiKey = '';
+    let glmApiBase = 'https://api.z.ai/api/coding/paas/v4/';
+    let glmModel = 'glm-4.6';
+    try {
+      const settings = vscode.workspace.getConfiguration('contextEngineUploader');
+      const cfgKey = (settings.get('glmApiKey') || '').trim();
+      const cfgBase = (settings.get('glmApiBase') || '').trim();
+      const cfgModel = (settings.get('glmModel') || '').trim();
+      if (cfgKey) {
+        glmApiKey = cfgKey;
+      }
+      if (cfgBase) {
+        glmApiBase = cfgBase;
+      }
+      if (cfgModel) {
+        glmModel = cfgModel;
+      }
+    } catch (error) {
+      log(`Failed to read GLM settings from configuration: ${error instanceof Error ? error.message : String(error)}`);
+    }
+
+    // ctx_config.json
+    const ctxConfigPath = path.join(workspaceDir, 'ctx_config.json');
+    let ctxConfig = {};
+    if (fs.existsSync(ctxConfigPath)) {
+      try {
+        const raw = fs.readFileSync(ctxConfigPath, 'utf8');
+        const parsed = JSON.parse(raw);
+        if (parsed && typeof parsed === 'object') {
+          ctxConfig = parsed;
+        }
+      } catch (error) {
+        log(`Failed to parse existing ctx_config.json at ${ctxConfigPath}; overwriting with minimal config. Error: ${error instanceof Error ? error.message : String(error)}`);
+        ctxConfig = {};
+      }
+    }
+    const currentDefault = typeof ctxConfig.default_collection === 'string' ? ctxConfig.default_collection.trim() : '';
+    let ctxChanged = false;
+    let notifiedDefault = false;
+    if (!currentDefault || placeholders.has(currentDefault)) {
+      ctxConfig.default_collection = collectionName;
+      ctxChanged = true;
+      notifiedDefault = true;
+    }
+    if (ctxConfig.default_mode === undefined) {
+      ctxConfig.default_mode = 'default';
+      ctxChanged = true;
+    }
+    if (ctxConfig.require_context === undefined) {
+      ctxConfig.require_context = true;
+      ctxChanged = true;
+    }
+    if (ctxConfig.refrag_runtime === undefined) {
+      ctxConfig.refrag_runtime = 'glm';
+      ctxChanged = true;
+    }
+    if (ctxConfig.glm_api_base === undefined) {
+      ctxConfig.glm_api_base = glmApiBase;
+      ctxChanged = true;
+    }
+    if (ctxConfig.glm_model === undefined) {
+      ctxConfig.glm_model = glmModel;
+      ctxChanged = true;
+    }
+    const existingGlmKey = typeof ctxConfig.glm_api_key === 'string' ? ctxConfig.glm_api_key.trim() : '';
+    if (glmApiKey) {
+      if (!existingGlmKey) {
+        ctxConfig.glm_api_key = glmApiKey;
+        ctxChanged = true;
+      }
+    } else if (ctxConfig.glm_api_key === undefined) {
+      ctxConfig.glm_api_key = '';
+      ctxChanged = true;
+    }
+    if (ctxChanged) {
+      fs.writeFileSync(ctxConfigPath, JSON.stringify(ctxConfig, null, 2) + '\n', 'utf8');
+      if (notifiedDefault) {
+        vscode.window.showInformationMessage(`Context Engine Uploader: ctx_config.json updated with default_collection=${collectionName}.`);
+      } else {
+        vscode.window.showInformationMessage('Context Engine Uploader: ctx_config.json refreshed with required defaults.');
+      }
+      log(`Wrote ctx_config.json at ${ctxConfigPath}`);
+    } else {
+      log(`ctx_config.json at ${ctxConfigPath} already satisfied required values; not modified.`);
+    }
+
+    // .env
+    const envPath = path.join(workspaceDir, '.env');
+    let envContent = '';
+
+    // Seed from bundled env.example (extension root) when workspace .env is missing
+    const baseDir = extensionRoot || __dirname;
+    const envExamplePath = path.join(baseDir, 'env.example');
+    if (fs.existsSync(envPath)) {
+      try {
+        envContent = fs.readFileSync(envPath, 'utf8');
+      } catch (error) {
+        log(`Failed to read existing .env at ${envPath}; skipping .env update. Error: ${error instanceof Error ? error.message : String(error)}`);
+        return;
+      }
+    } else if (fs.existsSync(envExamplePath)) {
+      try {
+        envContent = fs.readFileSync(envExamplePath, 'utf8');
+        log(`Seeding new .env for ${workspaceDir} from bundled env.example.`);
+      } catch (error) {
+        log(`Failed to read bundled env.example at ${envExamplePath}; starting with minimal .env. Error: ${error instanceof Error ? error.message : String(error)}`);
+        envContent = '';
+      }
+    }
+    let envLines = envContent ? envContent.split(/\r?\n/) : [];
+    let envChanged = false;
+    let collectionUpdated = false;
+
+    let idx = -1;
+    for (let i = 0; i < envLines.length; i++) {
+      if (envLines[i].trim().startsWith('COLLECTION_NAME=')) {
+        idx = i;
+        break;
+      }
+    }
+    let currentEnvVal = '';
+    if (idx >= 0) {
+      const m = envLines[idx].match(/^COLLECTION_NAME=(.*)$/);
+      if (m) {
+        currentEnvVal = (m[1] || '').trim();
+      }
+    }
+    if (idx === -1 || placeholders.has(currentEnvVal)) {
+      const newLine = `COLLECTION_NAME=${collectionName}`;
+      if (idx === -1) {
+        if (envLines.length && envLines[envLines.length - 1].trim() !== '') {
+          envLines.push('');
+        }
+        envLines.push(newLine);
+      } else {
+        envLines[idx] = newLine;
+      }
+      envChanged = true;
+      collectionUpdated = true;
+      vscode.window.showInformationMessage(`Context Engine Uploader: .env updated with COLLECTION_NAME=${collectionName}.`);
+      log(`Updated .env at ${envPath}`);
+    } else {
+      log(`.env at ${envPath} already has non-placeholder COLLECTION_NAME; not modified.`);
+    }
+
+    function getEnvEntry(key) {
+      for (let i = 0; i < envLines.length; i++) {
+        const line = envLines[i];
+        if (!line || line.trim().startsWith('#')) {
+          continue;
+        }
+        const eqIndex = line.indexOf('=');
+        if (eqIndex === -1) {
+          continue;
+        }
+        const candidate = line.slice(0, eqIndex).trim();
+        if (candidate === key) {
+          return { index: i, value: line.slice(eqIndex + 1) };
+        }
+      }
+      return { index: -1, value: undefined };
+    }
+
+    function upsertEnv(key, desiredValue, options = {}) {
+      const {
+        overwrite = false,
+        treatEmptyAsUnset = false,
+        placeholderValues = [],
+        skipIfDesiredEmpty = false
+      } = options;
+      const desired = desiredValue ?? '';
+      const desiredStr = String(desired);
+      if (!desiredStr && skipIfDesiredEmpty) {
+        return false;
+      }
+      const { index, value } = getEnvEntry(key);
+      const current = typeof value === 'string' ? value.trim() : '';
+      const normalizedDesired = desiredStr.trim();
+      const placeholderSet = new Set((placeholderValues || []).map(val => (val || '').trim().toLowerCase()));
+      let shouldUpdate = false;
+
+      if (index === -1) {
+        shouldUpdate = true;
+      } else if (overwrite) {
+        if (current !== normalizedDesired) {
+          shouldUpdate = true;
+        }
+      } else if (treatEmptyAsUnset && !current) {
+        shouldUpdate = true;
+      } else if (placeholderSet.size && placeholderSet.has(current.toLowerCase())) {
+        shouldUpdate = true;
+      }
+
+      if (!shouldUpdate) {
+        return false;
+      }
+
+      const newLine = `${key}=${desiredStr}`;
+      if (index === -1) {
+        if (envLines.length && envLines[envLines.length - 1].trim() !== '') {
+          envLines.push('');
+        }
+        envLines.push(newLine);
+      } else {
+        envLines[index] = newLine;
+      }
+      envChanged = true;
+      return true;
+    }
+
+    // Force CTX-critical defaults regardless of template values
+    upsertEnv('MULTI_REPO_MODE', '1', { overwrite: true });
+    upsertEnv('REFRAG_MODE', '1', { overwrite: true });
+    upsertEnv('REFRAG_DECODER', '1', { overwrite: true });
+    upsertEnv('REFRAG_RUNTIME', 'glm', { overwrite: true, placeholderValues: ['llamacpp'] });
+
+    // Ensure decoder/GLM env vars exist with sane defaults
+    upsertEnv('REFRAG_ENCODER_MODEL', 'BAAI/bge-base-en-v1.5', { treatEmptyAsUnset: true });
+    upsertEnv('REFRAG_PHI_PATH', '/work/models/refrag_phi_768_to_dmodel.bin', { treatEmptyAsUnset: true });
+    upsertEnv('REFRAG_SENSE', 'heuristic', { treatEmptyAsUnset: true });
+
+    const glmKeyPlaceholders = ['YOUR_GLM_API_KEY', '"YOUR_GLM_API_KEY"', "''", '""'];
+    if (glmApiKey) {
+      upsertEnv('GLM_API_KEY', glmApiKey, {
+        treatEmptyAsUnset: true,
+        placeholderValues: glmKeyPlaceholders
+      });
+    } else {
+      upsertEnv('GLM_API_KEY', '', {});
+    }
+    upsertEnv('GLM_API_BASE', glmApiBase, { treatEmptyAsUnset: true });
+    upsertEnv('GLM_MODEL', glmModel, { treatEmptyAsUnset: true });
+
+    // Ensure MCP_INDEXER_URL is present based on extension setting (for ctx.py)
+    try {
+      const settings = vscode.workspace.getConfiguration('contextEngineUploader');
+      const ctxIndexerUrl = (settings.get('ctxIndexerUrl') || 'http://localhost:8003/mcp').trim();
+      if (ctxIndexerUrl) {
+        upsertEnv('MCP_INDEXER_URL', ctxIndexerUrl, { treatEmptyAsUnset: true });
+      }
+    } catch (error) {
+      log(`Failed to read ctxIndexerUrl setting for MCP_INDEXER_URL: ${error instanceof Error ? error.message : String(error)}`);
+    }
+
+    if (envChanged) {
+      fs.writeFileSync(envPath, envLines.join('\n') + '\n', 'utf8');
+      log(`Ensured decoder/GLM/MCP settings in .env at ${envPath}`);
+    } else {
+      log(`.env at ${envPath} already satisfied CTX defaults; not modified.`);
+    }
+  } catch (error) {
+    log(`Error scaffolding ctx_config/.env: ${error instanceof Error ? error.message : String(error)}`);
+  }
 }
 function deactivate() {
   return stopProcesses();
@@ -472,6 +852,28 @@ module.exports = {
   deactivate
 };
 
+function getClaudeHookCommand() {
+  const isLinux = process.platform === 'linux';
+  if (!isLinux) {
+    return '';
+  }
+  if (!extensionRoot) {
+    log('Claude hook command resolution failed: extensionRoot is undefined.');
+    return '';
+  }
+  try {
+    const embeddedPath = path.join(extensionRoot, 'ctx-hook-simple.sh');
+    if (fs.existsSync(embeddedPath)) {
+      log(`Using embedded Claude hook at ${embeddedPath}`);
+      return embeddedPath;
+    }
+    log(`Claude hook command resolution failed: ctx-hook-simple.sh not found at ${embeddedPath}`);
+  } catch (error) {
+    log(`Failed to resolve embedded Claude hook path: ${error instanceof Error ? error.message : String(error)}`);
+  }
+  return '';
+}
+
 function getDefaultWindsurfMcpPath() {
   return path.join(os.homedir(), '.codeium', 'windsurf', 'mcp_config.json');
 }
@@ -614,16 +1016,78 @@ async function writeClaudeHookConfig(root, commandPath) {
     if (!config.hooks || typeof config.hooks !== 'object') {
       config.hooks = {};
     }
-    config.hooks['UserPromptSubmit'] = [
-      {
-        hooks: [
-          {
-            type: 'command',
-            command: commandPath
+    // Derive CTX workspace directory and optional hint flags for the hook from extension settings
+    let hookEnv;
+    let surfaceHintEnabled = false;
+    try {
+      const uploaderConfig = vscode.workspace.getConfiguration('contextEngineUploader');
+      const targetPath = (uploaderConfig.get('targetPath') || '').trim();
+      if (targetPath) {
+        const resolvedTarget = path.resolve(targetPath);
+        hookEnv = { CTX_WORKSPACE_DIR: resolvedTarget };
+      }
+      const surfaceHint = uploaderConfig.get('surfaceQdrantCollectionHint', true);
+      const claudeMcpEnabled = uploaderConfig.get('mcpClaudeEnabled', true);
+      surfaceHintEnabled = !!(surfaceHint && claudeMcpEnabled);
+      if (surfaceHintEnabled) {
+        if (!hookEnv) {
+          hookEnv = {};
+        }
+        hookEnv.CTX_SURFACE_COLLECTION_HINT = '1';
+      }
+    } catch (error) {
+      // Best-effort only; if anything fails, fall back to no extra env
+      hookEnv = undefined;
+      surfaceHintEnabled = false;
+    }
+
+    const hook = {
+      type: 'command',
+      command: commandPath
+    };
+    if (hookEnv) {
+      hook.env = hookEnv;
+    }
+
+    // Append or update our hook under UserPromptSubmit without clobbering existing hooks
+    let userPromptHooks = config.hooks['UserPromptSubmit'];
+    if (!Array.isArray(userPromptHooks)) {
+      userPromptHooks = [];
+    }
+
+    let found = false;
+    for (const entry of userPromptHooks) {
+      if (!entry || !Array.isArray(entry.hooks)) {
+        continue;
+      }
+      for (const existing of entry.hooks) {
+        if (existing && existing.type === 'command' && existing.command === commandPath) {
+          // Our hook is already present; optionally refresh env and toggle the collection hint flag
+          if (!existing.env) {
+            existing.env = {};
+          }
+          if (hookEnv) {
+            existing.env = { ...existing.env, ...hookEnv };
+          }
+          if (!surfaceHintEnabled && Object.prototype.hasOwnProperty.call(existing.env, 'CTX_SURFACE_COLLECTION_HINT')) {
+            delete existing.env.CTX_SURFACE_COLLECTION_HINT;
+          } else if (surfaceHintEnabled) {
+            existing.env.CTX_SURFACE_COLLECTION_HINT = '1';
           }
-        ]
+          found = true;
+          break;
+        }
+      }
+      if (found) {
+        break;
       }
-    ];
+    }
+
+    if (!found) {
+      userPromptHooks.push({ hooks: [hook] });
+    }
+
+    config.hooks['UserPromptSubmit'] = userPromptHooks;
     fs.writeFileSync(settingsPath, JSON.stringify(config, null, 2) + '\n', 'utf8');
     vscode.window.showInformationMessage('Context Engine Uploader: .claude/settings.local.json updated with Claude hook.');
     log(`Wrote Claude hook config at ${settingsPath}`);
diff --git a/vscode-extension/context-engine-uploader/package.json b/vscode-extension/context-engine-uploader/package.json
index 2f064431..e6b02c1d 100644
--- a/vscode-extension/context-engine-uploader/package.json
+++ b/vscode-extension/context-engine-uploader/package.json
@@ -36,6 +36,10 @@
       {
         "command": "contextEngineUploader.writeMcpConfig",
         "title": "Context Engine Uploader: Write MCP Config (.mcp.json)"
+      },
+      {
+        "command": "contextEngineUploader.writeCtxConfig",
+        "title": "Context Engine Uploader: Write CTX Config (ctx_config.json/.env)"
       }
     ],
     "configuration": {
@@ -64,7 +68,7 @@
         },
         "contextEngineUploader.endpoint": {
           "type": "string",
-          "default": "http://mcp.speramus.id:8004",
+          "default": "http://localhost:8004",
           "description": "Endpoint URL for the remote upload client."
         },
         "contextEngineUploader.intervalSeconds": {
@@ -99,11 +103,6 @@
           "default": "/work",
           "description": "Container path that mirrors the host root on the upload server."
         },
-        "contextEngineUploader.bundleSizeLimitMB": {
-          "type": "number",
-          "default": 100,
-          "description": "Maximum upload bundle size enforced by the Context Engine server (MB)."
-        },
         "contextEngineUploader.mcpClaudeEnabled": {
           "type": "boolean",
           "default": true,
@@ -124,6 +123,11 @@
           "default": "http://localhost:8000/sse",
           "description": "Claude Code MCP server URL for the memory/search MCP server. Used when writing the project-local .mcp.json via 'Write MCP Config'."
         },
+        "contextEngineUploader.ctxIndexerUrl": {
+          "type": "string",
+          "default": "http://localhost:8003/mcp",
+          "description": "CTX MCP indexer HTTP endpoint (MCP_INDEXER_URL) used by ctx.py when enhancing prompts via the Claude hook."
+        },
         "contextEngineUploader.windsurfMcpPath": {
           "type": "string",
           "default": "",
@@ -133,6 +137,36 @@
           "type": "boolean",
           "default": false,
           "description": "Enable writing the workspace .claude/settings.local.json hooks section for the Context Engine Claude Code hook."
+        },
+        "contextEngineUploader.surfaceQdrantCollectionHint": {
+          "type": "boolean",
+          "default": false,
+          "description": "When enabled and Claude MCP + hook configs are written, the CTX Claude hook adds a brief hint line with the Qdrant collection ID to enhanced prompts."
+        },
+        "contextEngineUploader.devRemoteMode": {
+          "type": "boolean",
+          "default": false,
+          "description": "Enable dev-remote upload mode for the standalone upload client. When enabled, the extension sets REMOTE_UPLOAD_MODE=development and DEV_REMOTE_MODE=1 for the child process, causing standalone_upload_client.py to ignore any 'dev-workspace' directories when scanning for files to upload."
+        },
+        "contextEngineUploader.scaffoldCtxConfig": {
+          "type": "boolean",
+          "default": true,
+          "description": "When enabled, the extension can create or update a minimal ctx_config.json and .env in the targetPath using the inferred collection name from the upload client."
+        },
+        "contextEngineUploader.glmApiKey": {
+          "type": "string",
+          "default": "",
+          "description": "GLM API key used by ctx.py when REFRAG_RUNTIME=glm. When scaffolding, this value is written to glm_api_key in ctx_config.json and GLM_API_KEY in .env if those fields are missing."
+        },
+        "contextEngineUploader.glmApiBase": {
+          "type": "string",
+          "default": "https://api.z.ai/api/coding/paas/v4/",
+          "description": "GLM API base URL (GLM_API_BASE) used by refrag_glm/ctx.py when REFRAG_RUNTIME=glm."
+        },
+        "contextEngineUploader.glmModel": {
+          "type": "string",
+          "default": "glm-4.6",
+          "description": "GLM model name (GLM_MODEL) used by refrag_glm/ctx.py when REFRAG_RUNTIME=glm."
         }
       }
     }

From c8d4b883bcc5e32995cc8c48a1f20d25af930800 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 10:10:37 +0000
Subject: [PATCH 07/19] chore(config): remove duplicate MULTI_REPO_MODE
 configuration in .env.example

---
 .env.example | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.env.example b/.env.example
index 48e82544..5abf546c 100644
--- a/.env.example
+++ b/.env.example
@@ -1,6 +1,6 @@
 # Qdrant connection
 QDRANT_URL=http://localhost:6333
-QDRANT_API_KEY=
+# QDRANT_API_KEY=
 
 # Multi-repo mode: 0=single-repo (default), 1=multi-repo
 # Single-repo: All files go into one collection (COLLECTION_NAME)
@@ -11,10 +11,6 @@ MULTI_REPO_MODE=0
 # Leave unset or use "codebase" for unified search across all your code
 COLLECTION_NAME=codebase
 
-# Repository mode: 0=single-repo (default), 1=multi-repo
-# Single-repo: All files go into one collection (COLLECTION_NAME)
-# Multi-repo: Each subdirectory gets its own collection
-MULTI_REPO_MODE=0
 
 # Embeddings
 EMBEDDING_MODEL=BAAI/bge-base-en-v1.5

From 9829f9290d212b9a46356f4b876159dd54d03f01 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 13:38:21 +0000
Subject: [PATCH 08/19] Updates Claude hook configuration logic

Improves the Claude hook configuration logic to correctly update or add the hook.

Normalizes command paths to account for versioned directory names and deduplicates hook entries to avoid accidental double entries. Also ensures that only valid hook entries are persisted.
---
 .../context-engine-uploader/extension.js      | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js
index 41d95499..b7f7e7f4 100644
--- a/vscode-extension/context-engine-uploader/extension.js
+++ b/vscode-extension/context-engine-uploader/extension.js
@@ -888,6 +888,7 @@ function buildChildEnv(options) {
       // directories when scanning for files to upload.
       env.REMOTE_UPLOAD_MODE = 'development';
       env.DEV_REMOTE_MODE = '1';
+      log('Context Engine Uploader: devRemoteMode enabled (REMOTE_UPLOAD_MODE=development, DEV_REMOTE_MODE=1).');
     }
   } catch (error) {
     log(`Failed to read devRemoteMode setting: ${error instanceof Error ? error.message : String(error)}`);
@@ -1502,14 +1503,26 @@ async function writeClaudeHookConfig(root, commandPath) {
       userPromptHooks = [];
     }
 
-    let found = false;
+    const normalizeCommand = value => {
+      if (!value) return '';
+      const resolved = path.resolve(value);
+      return resolved.replace(/context-engine\.context-engine-uploader-[0-9.]+/, 'context-engine.context-engine-uploader');
+    };
+
+    const normalizedNewCommand = normalizeCommand(commandPath);
+    let updated = false;
+
     for (const entry of userPromptHooks) {
       if (!entry || !Array.isArray(entry.hooks)) {
         continue;
       }
       for (const existing of entry.hooks) {
-        if (existing && existing.type === 'command' && existing.command === commandPath) {
-          // Our hook is already present; optionally refresh env and toggle the collection hint flag
+        if (!existing || existing.type !== 'command') {
+          continue;
+        }
+        const normalizedExisting = normalizeCommand(existing.command);
+        if (normalizedExisting === normalizedNewCommand) {
+          existing.command = commandPath;
           if (!existing.env) {
             existing.env = {};
           }
@@ -1521,20 +1534,35 @@ async function writeClaudeHookConfig(root, commandPath) {
           } else if (surfaceHintEnabled) {
             existing.env.CTX_SURFACE_COLLECTION_HINT = '1';
           }
-          found = true;
-          break;
+          updated = true;
         }
       }
-      if (found) {
-        break;
-      }
     }
 
-    if (!found) {
+    if (!updated) {
       userPromptHooks.push({ hooks: [hook] });
     }
 
-    config.hooks['UserPromptSubmit'] = userPromptHooks;
+    // Deduplicate any accidental double entries for the same command
+    const seenCommands = new Set();
+    for (const entry of userPromptHooks) {
+      if (!entry || !Array.isArray(entry.hooks)) {
+        continue;
+      }
+      entry.hooks = entry.hooks.filter(existing => {
+        if (!existing || existing.type !== 'command') {
+          return true;
+        }
+        const normalized = normalizeCommand(existing.command);
+        if (seenCommands.has(normalized)) {
+          return false;
+        }
+        seenCommands.add(normalized);
+        return true;
+      });
+    }
+
+    config.hooks['UserPromptSubmit'] = userPromptHooks.filter(entry => Array.isArray(entry.hooks) && entry.hooks.length);
     fs.writeFileSync(settingsPath, JSON.stringify(config, null, 2) + '\n', 'utf8');
     vscode.window.showInformationMessage('Context Engine Uploader: .claude/settings.local.json updated with Claude hook.');
     log(`Wrote Claude hook config at ${settingsPath}`);

From 7237a12566234d60bade85880e1fb07963025c3c Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 14:25:00 +0000
Subject: [PATCH 09/19] Improves context scaffolding and file scanning

- Refactors file scanning logic for improved performance and accuracy, mimicking the standalone client's pruning logic.
- Standardizes decoder configuration, using a `decoderRuntime` setting instead of a boolean flag.
- Improves ctx_config.json and .env scaffolding, preserving existing user values and enforcing defaults to ensure remote upload compatibility.
---
 scripts/remote_upload_client.py               |  43 ++++--
 scripts/standalone_upload_client.py           |   2 +-
 .../context-engine-uploader/README.md         |   5 +-
 .../context-engine-uploader/extension.js      | 128 ++++++++++--------
 .../context-engine-uploader/package.json      |  15 +-
 5 files changed, 115 insertions(+), 78 deletions(-)

diff --git a/scripts/remote_upload_client.py b/scripts/remote_upload_client.py
index 1be51735..908f6645 100644
--- a/scripts/remote_upload_client.py
+++ b/scripts/remote_upload_client.py
@@ -827,26 +827,43 @@ def process_changes_and_upload(self, changes: Dict[str, List]) -> bool:
 
     def get_all_code_files(self) -> List[Path]:
         """Get all code files in the workspace."""
-        all_files = []
+        files: List[Path] = []
         try:
             workspace_path = Path(self.workspace_path)
-            for ext in idx.CODE_EXTS:
-                all_files.extend(workspace_path.rglob(f"*{ext}"))
+            if not workspace_path.exists():
+                return files
 
-            # Filter out directories and hidden files
+            # Single walk with early pruning similar to standalone client
+            ext_suffixes = {str(ext).lower() for ext in idx.CODE_EXTS if str(ext).startswith('.')}
+            name_matches = {str(ext) for ext in idx.CODE_EXTS if not str(ext).startswith('.')}
             dev_remote = os.environ.get("DEV_REMOTE_MODE") == "1" or os.environ.get("REMOTE_UPLOAD_MODE") == "development"
-            ignored_dirs = {"dev-workspace"} if dev_remote else set()
-            all_files = [
-                f for f in all_files
-                if f.is_file()
-                and not any(part.startswith('.') for part in f.parts)
-                and '.codebase' not in str(f)
-                and not any(part in ignored_dirs for part in f.parts)
-            ]
+            excluded = {
+                "node_modules", "vendor", "dist", "build", "target", "out",
+                ".git", ".hg", ".svn", ".vscode", ".idea", ".venv", "venv",
+                "__pycache__", ".pytest_cache", ".mypy_cache", ".cache",
+                ".context-engine", ".context-engine-uploader", ".codebase"
+            }
+            if dev_remote:
+                excluded.add("dev-workspace")
+
+            seen = set()
+            for root, dirnames, filenames in os.walk(workspace_path):
+                dirnames[:] = [d for d in dirnames if d not in excluded and not d.startswith('.')]
+
+                for filename in filenames:
+                    if filename.startswith('.'):
+                        continue
+                    candidate = Path(root) / filename
+                    suffix = candidate.suffix.lower()
+                    if filename in name_matches or suffix in ext_suffixes:
+                        resolved = candidate.resolve()
+                        if resolved not in seen:
+                            seen.add(resolved)
+                            files.append(candidate)
         except Exception as e:
             logger.error(f"[watch] Error scanning files: {e}")
 
-        return all_files
+        return files
 
     def watch_loop(self, interval: int = 5):
         """Main file watching loop using existing detection and upload methods."""
diff --git a/scripts/standalone_upload_client.py b/scripts/standalone_upload_client.py
index d0d75dc0..c6ae1163 100644
--- a/scripts/standalone_upload_client.py
+++ b/scripts/standalone_upload_client.py
@@ -1042,7 +1042,7 @@ def get_all_code_files(self) -> List[Path]:
                 "__pycache__", ".pytest_cache", ".mypy_cache", ".cache",
                 ".context-engine", ".context-engine-uploader", ".codebase"
             }
-            if not dev_remote:
+            if dev_remote:
                 excluded.add("dev-workspace")
 
             seen = set()
diff --git a/vscode-extension/context-engine-uploader/README.md b/vscode-extension/context-engine-uploader/README.md
index 5e847f84..ddf24590 100644
--- a/vscode-extension/context-engine-uploader/README.md
+++ b/vscode-extension/context-engine-uploader/README.md
@@ -18,13 +18,14 @@ Configuration
 - `Target Path` is auto-filled from the workspace but can be overridden if you need to upload a different folder.
 - **Python dependencies:** the extension runs the standalone upload client via your configured `pythonPath`. Ensure the interpreter has `requests`, `urllib3`, and `charset_normalizer` installed. Run `python3 -m pip install requests urllib3 charset_normalizer` (or replace `python3` with your configured path) before starting the uploader.
 - **Path mapping:** `Host Root` + `Container Root` control how local paths are rewritten before reaching the remote service. By default the host root mirrors your `Target Path` and the container root is `/work`, which keeps Windows paths working without extra config.
-- **Prompt+ decoder:** set `Context Engine Uploader: Decoder Url` (default `http://localhost:8081`, auto-appends `/completion`) to point at your local llama.cpp decoder. For Ollama, set it to `http://localhost:11434/api/chat`. Enable `Context Engine Uploader: Use Glm Decoder` to set `REFRAG_RUNTIME=glm` for GLM backends. Turn on `Use Gpu Decoder` to set `USE_GPU_DECODER=1` so ctx.py prefers the GPU llama.cpp sidecar.
+- **Prompt+ decoder:** set `Context Engine Uploader: Decoder Url` (default `http://localhost:8081`, auto-appends `/completion`) to point at your local llama.cpp decoder. For Ollama, set it to `http://localhost:11434/api/chat`. Turn on `Use Gpu Decoder` to set `USE_GPU_DECODER=1` so ctx.py prefers the GPU llama.cpp sidecar.
 - **Claude Code MCP config:** `MCP Indexer Url` and `MCP Memory Url` control the URLs written into the project-local `.mcp.json` when you run the `Write MCP Config` command. This is only for configuring Claude Code MCP clients; other MCP integrations can be added separately later.
 - **CTX + GLM settings:**
   - `contextEngineUploader.ctxIndexerUrl` is copied into `.env` (as `MCP_INDEXER_URL`) so the embedded `ctx.py` knows which MCP indexer to call when enhancing prompts.
   - `contextEngineUploader.glmApiKey`, `glmApiBase`, and `glmModel` are used when scaffolding `ctx_config.json`/`.env` to pre-fill GLM decoder options. Existing non-placeholder values are preserved, so you can override them in the files at any time.
 - **Context scaffolding:**
-  - `contextEngineUploader.scaffoldCtxConfig` (default `true`) controls whether the extension keeps a minimal `ctx_config.json` + `.env` in sync with your workspace. When enabled, running `Write MCP Config` or `Write CTX Config` will seed the files from the bundled `env.example` and inferred collection name, only overwriting placeholder/empty values.
+  - `contextEngineUploader.scaffoldCtxConfig` (default `true`) controls whether the extension keeps a minimal `ctx_config.json` + `.env` in sync with your workspace. When enabled, running `Write MCP Config` or `Write CTX Config` will reuse the workspace’s existing files (if present) and only backfill placeholder or missing values from the bundled `env.example` plus the inferred collection name. Existing custom values are preserved.
+  - The scaffolder also enforces CTX defaults (e.g., `MULTI_REPO_MODE=1`, `REFRAG_RUNTIME=glm`, `REFRAG_DECODER=1`) so the embedded `ctx.py` is ready for remote uploads, regardless of the “Use GLM Decoder” toggle.
   - `contextEngineUploader.surfaceQdrantCollectionHint` gates whether the Claude hook adds a hint line with the Qdrant collection ID when ctx is enhancing prompts. This setting is also respected when the extension writes `.claude/settings.local.json`.
 
 Workspace-level ctx integration
diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js
index b7f7e7f4..481c8753 100644
--- a/vscode-extension/context-engine-uploader/extension.js
+++ b/vscode-extension/context-engine-uploader/extension.js
@@ -687,10 +687,6 @@ async function enhanceSelectionWithUnicorn() {
   }
   try {
     const cfg = vscode.workspace.getConfiguration('contextEngineUploader');
-    const useGlmDecoder = cfg.get('useGlmDecoder', false);
-    if (useGlmDecoder) {
-      env.REFRAG_RUNTIME = 'glm';
-    }
     const useGpuDecoder = cfg.get('useGpuDecoder', false);
     if (useGpuDecoder) {
       env.USE_GPU_DECODER = '1';
@@ -1036,26 +1032,42 @@ async function scaffoldCtxConfigFiles(workspaceDir, collectionName) {
   try {
     const placeholders = new Set(['', 'default-collection', 'my-collection', 'codebase']);
 
-    // Read GLM settings from extension configuration (with sane defaults)
+    let uploaderSettings;
+    try {
+      uploaderSettings = vscode.workspace.getConfiguration('contextEngineUploader');
+    } catch (error) {
+      log(`Failed to read uploader settings: ${error instanceof Error ? error.message : String(error)}`);
+      uploaderSettings = undefined;
+    }
+
+    // Decoder/runtime settings from configuration
+    let decoderRuntime = 'glm';
+    let useGpuDecoderSetting = false;
     let glmApiKey = '';
     let glmApiBase = 'https://api.z.ai/api/coding/paas/v4/';
     let glmModel = 'glm-4.6';
-    try {
-      const settings = vscode.workspace.getConfiguration('contextEngineUploader');
-      const cfgKey = (settings.get('glmApiKey') || '').trim();
-      const cfgBase = (settings.get('glmApiBase') || '').trim();
-      const cfgModel = (settings.get('glmModel') || '').trim();
-      if (cfgKey) {
-        glmApiKey = cfgKey;
-      }
-      if (cfgBase) {
-        glmApiBase = cfgBase;
-      }
-      if (cfgModel) {
-        glmModel = cfgModel;
+    if (uploaderSettings) {
+      try {
+        const runtimeSetting = String(uploaderSettings.get('decoderRuntime') ?? 'glm').trim().toLowerCase();
+        if (runtimeSetting === 'llamacpp') {
+          decoderRuntime = 'llamacpp';
+        }
+        useGpuDecoderSetting = !!uploaderSettings.get('useGpuDecoder', false);
+        const cfgKey = (uploaderSettings.get('glmApiKey') || '').trim();
+        const cfgBase = (uploaderSettings.get('glmApiBase') || '').trim();
+        const cfgModel = (uploaderSettings.get('glmModel') || '').trim();
+        if (cfgKey) {
+          glmApiKey = cfgKey;
+        }
+        if (cfgBase) {
+          glmApiBase = cfgBase;
+        }
+        if (cfgModel) {
+          glmModel = cfgModel;
+        }
+      } catch (error) {
+        log(`Failed to read decoder/GLM settings from configuration: ${error instanceof Error ? error.message : String(error)}`);
       }
-    } catch (error) {
-      log(`Failed to read GLM settings from configuration: ${error instanceof Error ? error.message : String(error)}`);
     }
 
     // ctx_config.json
@@ -1089,27 +1101,29 @@ async function scaffoldCtxConfigFiles(workspaceDir, collectionName) {
       ctxConfig.require_context = true;
       ctxChanged = true;
     }
-    if (ctxConfig.refrag_runtime === undefined) {
-      ctxConfig.refrag_runtime = 'glm';
-      ctxChanged = true;
-    }
-    if (ctxConfig.glm_api_base === undefined) {
-      ctxConfig.glm_api_base = glmApiBase;
+    if (ctxConfig.refrag_runtime !== decoderRuntime) {
+      ctxConfig.refrag_runtime = decoderRuntime;
       ctxChanged = true;
     }
-    if (ctxConfig.glm_model === undefined) {
-      ctxConfig.glm_model = glmModel;
-      ctxChanged = true;
-    }
-    const existingGlmKey = typeof ctxConfig.glm_api_key === 'string' ? ctxConfig.glm_api_key.trim() : '';
-    if (glmApiKey) {
-      if (!existingGlmKey) {
-        ctxConfig.glm_api_key = glmApiKey;
+    if (decoderRuntime === 'glm') {
+      if (ctxConfig.glm_api_base === undefined) {
+        ctxConfig.glm_api_base = glmApiBase;
+        ctxChanged = true;
+      }
+      if (ctxConfig.glm_model === undefined) {
+        ctxConfig.glm_model = glmModel;
+        ctxChanged = true;
+      }
+      const existingGlmKey = typeof ctxConfig.glm_api_key === 'string' ? ctxConfig.glm_api_key.trim() : '';
+      if (glmApiKey) {
+        if (!existingGlmKey) {
+          ctxConfig.glm_api_key = glmApiKey;
+          ctxChanged = true;
+        }
+      } else if (ctxConfig.glm_api_key === undefined) {
+        ctxConfig.glm_api_key = '';
         ctxChanged = true;
       }
-    } else if (ctxConfig.glm_api_key === undefined) {
-      ctxConfig.glm_api_key = '';
-      ctxChanged = true;
     }
     if (ctxChanged) {
       fs.writeFileSync(ctxConfigPath, JSON.stringify(ctxConfig, null, 2) + '\n', 'utf8');
@@ -1251,34 +1265,38 @@ async function scaffoldCtxConfigFiles(workspaceDir, collectionName) {
     upsertEnv('MULTI_REPO_MODE', '1', { overwrite: true });
     upsertEnv('REFRAG_MODE', '1', { overwrite: true });
     upsertEnv('REFRAG_DECODER', '1', { overwrite: true });
-    upsertEnv('REFRAG_RUNTIME', 'glm', { overwrite: true, placeholderValues: ['llamacpp'] });
+    upsertEnv('REFRAG_RUNTIME', decoderRuntime, { overwrite: true, placeholderValues: ['llamacpp', 'glm'] });
+    upsertEnv('USE_GPU_DECODER', useGpuDecoderSetting ? '1' : '0', { overwrite: true });
 
     // Ensure decoder/GLM env vars exist with sane defaults
     upsertEnv('REFRAG_ENCODER_MODEL', 'BAAI/bge-base-en-v1.5', { treatEmptyAsUnset: true });
     upsertEnv('REFRAG_PHI_PATH', '/work/models/refrag_phi_768_to_dmodel.bin', { treatEmptyAsUnset: true });
     upsertEnv('REFRAG_SENSE', 'heuristic', { treatEmptyAsUnset: true });
 
-    const glmKeyPlaceholders = ['YOUR_GLM_API_KEY', '"YOUR_GLM_API_KEY"', "''", '""'];
-    if (glmApiKey) {
-      upsertEnv('GLM_API_KEY', glmApiKey, {
-        treatEmptyAsUnset: true,
-        placeholderValues: glmKeyPlaceholders
-      });
-    } else {
-      upsertEnv('GLM_API_KEY', '', {});
+    if (decoderRuntime === 'glm') {
+      const glmKeyPlaceholders = ['YOUR_GLM_API_KEY', '"YOUR_GLM_API_KEY"', "''", '""'];
+      if (glmApiKey) {
+        upsertEnv('GLM_API_KEY', glmApiKey, {
+          treatEmptyAsUnset: true,
+          placeholderValues: glmKeyPlaceholders
+        });
+      } else {
+        upsertEnv('GLM_API_KEY', '', {});
+      }
+      upsertEnv('GLM_API_BASE', glmApiBase, { treatEmptyAsUnset: true });
+      upsertEnv('GLM_MODEL', glmModel, { treatEmptyAsUnset: true });
     }
-    upsertEnv('GLM_API_BASE', glmApiBase, { treatEmptyAsUnset: true });
-    upsertEnv('GLM_MODEL', glmModel, { treatEmptyAsUnset: true });
 
     // Ensure MCP_INDEXER_URL is present based on extension setting (for ctx.py)
-    try {
-      const settings = vscode.workspace.getConfiguration('contextEngineUploader');
-      const ctxIndexerUrl = (settings.get('ctxIndexerUrl') || 'http://localhost:8003/mcp').trim();
-      if (ctxIndexerUrl) {
-        upsertEnv('MCP_INDEXER_URL', ctxIndexerUrl, { treatEmptyAsUnset: true });
+    if (uploaderSettings) {
+      try {
+        const ctxIndexerUrl = (uploaderSettings.get('ctxIndexerUrl') || 'http://localhost:8003/mcp').trim();
+        if (ctxIndexerUrl) {
+          upsertEnv('MCP_INDEXER_URL', ctxIndexerUrl, { treatEmptyAsUnset: true });
+        }
+      } catch (error) {
+        log(`Failed to read ctxIndexerUrl setting for MCP_INDEXER_URL: ${error instanceof Error ? error.message : String(error)}`);
       }
-    } catch (error) {
-      log(`Failed to read ctxIndexerUrl setting for MCP_INDEXER_URL: ${error instanceof Error ? error.message : String(error)}`);
     }
 
     if (envChanged) {
diff --git a/vscode-extension/context-engine-uploader/package.json b/vscode-extension/context-engine-uploader/package.json
index 419f8860..c25d0af4 100644
--- a/vscode-extension/context-engine-uploader/package.json
+++ b/vscode-extension/context-engine-uploader/package.json
@@ -73,16 +73,17 @@
           "default": "",
           "description": "Optional override for the folder that contains standalone_upload_client.py. Defaults to the extension install directory or the workspace ./scripts folder."
         },
+        "contextEngineUploader.decoderRuntime": {
+          "type": "string",
+          "enum": ["glm", "llamacpp"],
+          "default": "glm",
+          "description": "Preferred decoder runtime for ctx.py scaffolding (controls REFRAG_RUNTIME in ctx_config.json/.env)."
+        },
         "contextEngineUploader.decoderUrl": {
           "type": "string",
           "default": "http://localhost:8081",
           "description": "Override DECODER_URL when running Prompt+ (ctx.py unicorn mode). Defaults to local llama.cpp on :8081 (appends /completion automatically)."
         },
-        "contextEngineUploader.useGlmDecoder": {
-          "type": "boolean",
-          "default": false,
-          "description": "Use GLM decoder for Prompt+ (sets REFRAG_RUNTIME=glm). Leave off to use Ollama/llama.cpp style decoder."
-        },
         "contextEngineUploader.useGpuDecoder": {
           "type": "boolean",
           "default": false,
@@ -132,8 +133,8 @@
         },
         "contextEngineUploader.autoTailUploadLogs": {
           "type": "boolean",
-          "default": true,
-          "description": "Automatically open a terminal and tail 'docker compose logs -f upload_service' when indexing starts."
+          "default": false,
+          "description": "Automatically open a terminal and tail 'docker compose logs -f upload_service' when indexing starts (for local docker setups)."
         },
         "contextEngineUploader.startWatchAfterForce": {
           "type": "boolean",

From 6165417a80b0c24dea3262d132b4aefba80e9f65 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 14:36:54 +0000
Subject: [PATCH 10/19] Updates documentation for Prompt+ and commands

Updates the README to reflect changes to Prompt+ usage,
including the bundled ctx.py execution and command palette.
Also clarifies the usage of command palette options and
status bar buttons.
---
 vscode-extension/context-engine-uploader/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vscode-extension/context-engine-uploader/README.md b/vscode-extension/context-engine-uploader/README.md
index ddf24590..356a3d84 100644
--- a/vscode-extension/context-engine-uploader/README.md
+++ b/vscode-extension/context-engine-uploader/README.md
@@ -18,7 +18,7 @@ Configuration
 - `Target Path` is auto-filled from the workspace but can be overridden if you need to upload a different folder.
 - **Python dependencies:** the extension runs the standalone upload client via your configured `pythonPath`. Ensure the interpreter has `requests`, `urllib3`, and `charset_normalizer` installed. Run `python3 -m pip install requests urllib3 charset_normalizer` (or replace `python3` with your configured path) before starting the uploader.
 - **Path mapping:** `Host Root` + `Container Root` control how local paths are rewritten before reaching the remote service. By default the host root mirrors your `Target Path` and the container root is `/work`, which keeps Windows paths working without extra config.
-- **Prompt+ decoder:** set `Context Engine Uploader: Decoder Url` (default `http://localhost:8081`, auto-appends `/completion`) to point at your local llama.cpp decoder. For Ollama, set it to `http://localhost:11434/api/chat`. Turn on `Use Gpu Decoder` to set `USE_GPU_DECODER=1` so ctx.py prefers the GPU llama.cpp sidecar.
+- **Prompt+ decoder:** set `Context Engine Uploader: Decoder Url` (default `http://localhost:8081`, auto-appends `/completion`) to point at your local llama.cpp decoder. For Ollama, set it to `http://localhost:11434/api/chat`. Turn on `Use Gpu Decoder` to set `USE_GPU_DECODER=1` so ctx.py prefers the GPU llama.cpp sidecar. Prompt+ automatically runs the bundled `scripts/ctx.py` when an embedded copy is available, falling back to the workspace version if not.
 - **Claude Code MCP config:** `MCP Indexer Url` and `MCP Memory Url` control the URLs written into the project-local `.mcp.json` when you run the `Write MCP Config` command. This is only for configuring Claude Code MCP clients; other MCP integrations can be added separately later.
 - **CTX + GLM settings:**
   - `contextEngineUploader.ctxIndexerUrl` is copied into `.env` (as `MCP_INDEXER_URL`) so the embedded `ctx.py` knows which MCP indexer to call when enhancing prompts.
@@ -40,8 +40,8 @@ Workspace-level ctx integration
 
 Commands
 --------
-- Command Palette → “Context Engine Uploader” to access Start/Stop/Restart/Index Codebase.
-- Status-bar button (`Index Codebase`) mirrors the same behavior and displays progress.
+- Command Palette → “Context Engine Uploader” exposes Start/Stop/Restart/Index Codebase and Prompt+ (unicorn) rewrite commands.
+- Status-bar button (`Index Codebase`) mirrors Start/Stop/Restart/Index status, while the `Prompt+` status button runs the ctx rewrite command on the current selection.
 - `Context Engine Uploader: Write MCP Config (.mcp.json)` writes or updates a project-local `.mcp.json` with MCP server entries for the Qdrant indexer and memory/search endpoints, using the configured MCP URLs.
 - `Context Engine Uploader: Write CTX Config (ctx_config.json/.env)` scaffolds the ctx config + env files as described above. This command runs automatically after `Write MCP Config` if scaffolding is enabled, but it is also exposed in the Command Palette for manual use.
 

From e9d511aafd316c5b648447abb273db6f6db4f11b Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 15:19:49 +0000
Subject: [PATCH 11/19] Remove duplicate ctx script in extension - bundled at
 build time - ctx is available in-repo (scripts/ctx.py)

---
 .../context-engine-uploader/scripts/ctx.py    | 1399 -----------------
 1 file changed, 1399 deletions(-)
 delete mode 100755 vscode-extension/context-engine-uploader/scripts/ctx.py

diff --git a/vscode-extension/context-engine-uploader/scripts/ctx.py b/vscode-extension/context-engine-uploader/scripts/ctx.py
deleted file mode 100755
index 29a0e65e..00000000
--- a/vscode-extension/context-engine-uploader/scripts/ctx.py
+++ /dev/null
@@ -1,1399 +0,0 @@
-#!/usr/bin/env python3
-import re
-import difflib
-"""
-Context-aware prompt enhancer CLI.
-
-Retrieves relevant code context from the Context-Engine MCP server and enhances
-your prompts with it using a local LLM decoder. Works with both questions and
-commands/instructions. Outputs at least two detailed paragraphs.
-
-Usage:
-  ctx "how does hybrid search work?"              # Question → enhanced question
-  ctx "refactor the caching logic"                # Command → enhanced instructions
-  ctx --language python "explain the indexer"     # Filter by language
-  ctx --detail "add error handling to ctx.py"     # Include code snippets
-
-Examples:
-  # Enhance questions with context
-  ctx "how does the indexer work?"
-  # Output: Two detailed question paragraphs with file/line references
-
-  # Enhance commands with specific details
-  ctx "refactor ctx.py to improve modularity"
-  # Output: Two detailed instruction paragraphs with concrete steps
-
-  # Detail mode: include short code snippets (slower but richer)
-  ctx --detail "explain the caching logic"
-
-  # Unicorn mode: staged 2-3 pass enhancement for best quality
-  ctx --unicorn "refactor ctx.py"
-  ctx --unicorn "what is ReFRAG and how does it work?"
-
-  # Pipe to LLM
-  ctx "fix the bug in watcher.py" | llm
-
-  # Filter by language and path
-  ctx --language python --under scripts/ "caching implementation"
-
-Environment:
-  MCP_INDEXER_URL       - MCP indexer endpoint (default: http://localhost:8003/mcp)
-  CTX_LIMIT             - Default result limit (default: 5)
-  CTX_CONTEXT_LINES     - Context lines for snippets (default: 0)
-  CTX_REWRITE_MAX_TOKENS - Max tokens for LLM rewrite (default: 320)
-  DECODER_URL           - Override decoder endpoint
-  USE_GPU_DECODER       - Use GPU decoder on port 8081 (default: 0)
-"""
-
-import sys
-import json
-import os
-import argparse
-import subprocess
-from urllib import request
-from urllib.parse import urlparse
-from urllib.error import HTTPError, URLError
-from typing import Dict, Any, List, Optional, Tuple
-from pathlib import Path
-
-# Load .env file if it exists (for local CLI usage)
-def _load_env_file():
-    """Load .env file from project root if it exists."""
-    # Find project root (where .env should be)
-    script_dir = Path(__file__).resolve().parent
-    project_root = script_dir.parent
-    env_file = project_root / ".env"
-
-    if env_file.exists():
-        with open(env_file) as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" in line:
-                    key, value = line.split("=", 1)
-                    key = key.strip()
-                    value = value.strip().strip('"').strip("'")
-                    # Only set if not already in environment
-                    if key and key not in os.environ:
-                        os.environ[key] = value
-
-_load_env_file()
-
-try:
-    from scripts.mcp_router import call_tool_http  # type: ignore
-except ModuleNotFoundError:  # pragma: no cover - local execution fallback
-    try:
-        from mcp_router import call_tool_http  # type: ignore
-    except ModuleNotFoundError:
-        # Lightweight HTTP-only fallback to avoid bundling the full router
-        def call_tool_http(base_url: str, tool_name: str, args: Dict[str, Any], timeout: float = 120.0) -> Dict[str, Any]:
-            headers = {"Content-Type": "application/json", "Accept": "application/json, text/event-stream"}
-            # Best-effort handshake to obtain mcp-session-id
-            init_payload = {
-                "jsonrpc": "2.0",
-                "id": 0,
-                "method": "initialize",
-                "params": {
-                    "protocolVersion": "2024-11-05",
-                    "capabilities": {},
-                    "clientInfo": {"name": "ctx-cli-lite", "version": "1.0.0"}
-                }
-            }
-            try:
-                req = request.Request(base_url, data=json.dumps(init_payload).encode("utf-8"), headers=headers)
-                with request.urlopen(req, timeout=min(timeout, 10.0)) as resp:
-                    sid = resp.headers.get("mcp-session-id") or resp.headers.get("Mcp-Session-Id")
-                    if sid:
-                        headers["mcp-session-id"] = sid
-                    # Drain body to keep connection healthy (ignore content)
-                    try:
-                        resp.read()
-                    except Exception:
-                        pass
-            except Exception:
-                pass  # fall through; server may still accept calls without handshake
-
-            payload = {
-                "jsonrpc": "2.0",
-                "id": 1,
-                "method": "tools/call",
-                "params": {"name": tool_name, "arguments": args},
-            }
-            req = request.Request(base_url, data=json.dumps(payload).encode("utf-8"), headers=headers)
-            with request.urlopen(req, timeout=timeout) as resp:
-                raw = resp.read().decode("utf-8", errors="ignore").strip()
-            if raw.startswith("data:"):
-                try:
-                    raw = raw.split("data:", 1)[1].strip()
-                except Exception:
-                    pass
-            try:
-                return json.loads(raw)
-            except Exception:
-                return {"result": {"content": [{"type": "text", "text": raw}]}}
-
-# Configuration from environment
-MCP_URL = os.environ.get("MCP_INDEXER_URL", "http://localhost:8003/mcp")
-DEFAULT_LIMIT = int(os.environ.get("CTX_LIMIT", "5"))
-DEFAULT_CONTEXT_LINES = int(os.environ.get("CTX_CONTEXT_LINES", "0"))
-DEFAULT_REWRITE_TOKENS = int(os.environ.get("CTX_REWRITE_MAX_TOKENS", "320"))
-DEFAULT_PER_PATH = int(os.environ.get("CTX_PER_PATH", "2"))
-
-# User preferences config file
-CTX_CONFIG_FILE = os.path.expanduser("~/.ctx_config.json")
-
-# Local decoder configuration (llama.cpp server)
-def resolve_decoder_url() -> str:
-    """Resolve decoder endpoint, honoring USE_GPU_DECODER + overrides."""
-    override = os.environ.get("DECODER_URL", "").strip()
-    if override:
-        base = override
-    else:
-        use_gpu = str(os.environ.get("USE_GPU_DECODER", "0")).strip().lower()
-        if use_gpu in {"1", "true", "yes", "on"}:
-            host = "host.docker.internal" if os.path.exists("/.dockerenv") else "localhost"
-            base = f"http://{host}:8081"
-        else:
-            base = os.environ.get("LLAMACPP_URL", "http://localhost:8080").strip()
-    base = base or "http://localhost:8080"
-    if base.endswith("/completion"):
-        return base
-    return base.rstrip("/") + "/completion"
-
-
-DECODER_URL = resolve_decoder_url()
-DECODER_TIMEOUT = int(os.environ.get("CTX_DECODER_TIMEOUT", "300"))
-
-
-# Global session ID for MCP HTTP calls
-_session_id: Optional[str] = None
-
-
-def parse_sse_response(text: str) -> Dict[str, Any]:
-    """Parse SSE format response (event: message\\ndata: {...})."""
-    for line in text.strip().split('\n'):
-        if line.startswith('data: '):
-            return json.loads(line[6:])
-    raise ValueError("No data line found in SSE response")
-
-
-def get_session_id(timeout: int = 10) -> str:
-    """Initialize MCP session and return session ID."""
-    global _session_id
-    if _session_id:
-        return _session_id
-
-    payload = {
-        "jsonrpc": "2.0",
-        "id": 0,
-        "method": "initialize",
-        "params": {
-            "protocolVersion": "2024-11-05",
-            "capabilities": {},
-            "clientInfo": {"name": "ctx-cli", "version": "1.0.0"}
-        }
-    }
-
-    try:
-        req = request.Request(
-            MCP_URL,
-            data=json.dumps(payload).encode(),
-            headers={
-                "Content-Type": "application/json",
-                "Accept": "application/json, text/event-stream"
-            }
-        )
-        with request.urlopen(req, timeout=timeout) as resp:
-            session_id = resp.headers.get("mcp-session-id")
-            if not session_id:
-                raise RuntimeError("Server did not return session ID")
-            # Read the initialization response to ensure session is fully established
-            init_response = resp.read().decode('utf-8')
-            # Wait a moment for session to be fully processed
-            import time
-            time.sleep(0.5)
-            _session_id = session_id
-            return session_id
-    except Exception as e:
-        raise RuntimeError(f"Failed to initialize MCP session: {e}")
-
-
-def call_mcp_tool(tool_name: str, params: Dict[str, Any], timeout: int = 30) -> Dict[str, Any]:
-    """Call MCP tool via HTTP JSON-RPC with session management."""
-    payload = {
-        "jsonrpc": "2.0",
-        "id": 1,
-        "method": "tools/call",
-        "params": {"name": tool_name, "arguments": params}
-    }
-
-    # Debug output (opt-in to avoid leaking queries in normal use)
-    debug_flag = os.environ.get("CTX_DEBUG", "").strip().lower()
-    if debug_flag in {"1", "true", "yes", "on"}:
-        sys.stderr.write(f"[DEBUG] Calling MCP tool '{tool_name}' at {MCP_URL}\n")
-        sys.stderr.write(f"[DEBUG] Sending payload: {json.dumps(payload, indent=2)}\n")
-        sys.stderr.flush()
-
-    try:
-        return call_tool_http(MCP_URL, tool_name, params, timeout=float(timeout))
-    except Exception as e:
-        sys.stderr.write(f"[ERROR] MCP call to '{tool_name}' at {MCP_URL} failed: {type(e).__name__}: {e}\n")
-        sys.stderr.flush()
-        return {"error": f"Request failed: {str(e)}"}
-
-
-def parse_mcp_response(result: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    """Parse MCP response and extract the actual result.
-
-    Supports both text and json content items from FastMCP.
-    """
-    if "error" in result:
-        return None
-
-    # FastMCP typically wraps results in a content array
-    res = result.get("result", {})
-    content = res.get("content", [])
-
-    # Some servers may return a dict directly (no content array)
-    if isinstance(res, dict) and content == [] and any(k in res for k in ("results", "answer", "total")):
-        return res
-
-    if not content:
-        return None
-
-    item = content[0] or {}
-
-    # Prefer typed JSON content
-    if isinstance(item, dict) and "json" in item:
-        return item.get("json")
-
-    # Fallback: parse text as JSON or return raw text
-    text = item.get("text", "") if isinstance(item, dict) else ""
-    if not text:
-        return None
-
-    try:
-        return json.loads(text)
-    except json.JSONDecodeError:
-        return {"raw": text}
-
-
-def _compress_snippet(snippet: str, max_lines: int = 6) -> str:
-    """Compact, high-signal subset of a code snippet.
-
-    Heuristics: prefer signatures, guards, returns/raises, asserts; fall back to head/tail.
-    """
-    try:
-        raw_lines = [ln.rstrip() for ln in snippet.splitlines() if ln.strip()]
-        if not raw_lines:
-            return ""
-        keys = ("def ", "class ", "return", "raise", "assert", "if ", "except", "try:")
-        scored = [(sum(k in ln for k in keys), idx, ln) for idx, ln in enumerate(raw_lines)]
-        keep_idx = sorted({idx for _, idx, _ in sorted(scored, key=lambda t: (-t[0], t[1]))[:max_lines]})
-        kept = [raw_lines[i] for i in keep_idx]
-        if not kept:
-            head = raw_lines[: max(1, max_lines // 2)]
-            tail = raw_lines[-(max_lines - len(head)) :]
-            kept = head + tail
-        return "\n".join(kept[:max_lines])
-    except Exception:
-        return (snippet or "").splitlines()[0][:160]
-
-
-def format_search_results(results: List[Dict[str, Any]], include_snippets: bool = False) -> str:
-    """Format search results succinctly for LLM rewrite.
-
-    When include_snippets is False (default), only include headers with path and line ranges.
-    This keeps prompts small and fast for Granite via llama.cpp.
-    """
-    lines: List[str] = []
-    for hit in results:
-        path = hit.get("path", "unknown")
-        start = hit.get("start_line", "?")
-        end = hit.get("end_line", "?")
-        language = hit.get("language") or ""
-        symbol = hit.get("symbol") or ""
-        snippet = (hit.get("snippet") or "").strip()
-
-        # Only include line ranges when both start and end are known
-        if start in (None, "?") or end in (None, "?"):
-            header = f"- {path}"
-        else:
-            header = f"- {path}:{start}-{end}"
-        meta: List[str] = []
-        if language:
-            meta.append(language)
-        if symbol:
-            meta.append(f"{symbol}")
-        if meta:
-            header += f" ({', '.join(meta)})"
-        lines.append(header)
-
-        if include_snippets and snippet:
-            compact = _compress_snippet(snippet, max_lines=6)
-            if compact:
-                for ln in compact.splitlines():
-                    # Inline compact snippet (no fences to keep token count small)
-                    lines.append(f"    {ln}")
-
-    return "\n".join(lines)
-
-
-
-def _ensure_two_paragraph_questions(text: str) -> str:
-    """Normalize to at least two paragraphs.
-
-    - Collapse excessive whitespace
-    - For questions: ensure each paragraph ends with '?'
-    - For commands/instructions: ensure proper punctuation
-    - If only one paragraph, split heuristically or add a generic follow-up
-    """
-    if not text:
-        return ""
-    # Normalize whitespace/newlines
-    t = text.replace("\r\n", "\n").replace("\r", "\n").strip()
-    # Collapse triple+ newlines to double
-    while "\n\n\n" in t:
-        t = t.replace("\n\n\n", "\n\n")
-    raw_paras = [p.strip() for p in t.split("\n\n") if p.strip()]
-
-    # Deduplicate paragraphs (case/whitespace insensitive, tolerance for near-duplicates)
-    paras: list[str] = []
-    dedup_keys: list[str] = []
-    for p in raw_paras:
-        key = re.sub(r"\s+", " ", p).strip().lower()
-        if any(difflib.SequenceMatcher(None, key, existing).ratio() >= 0.99 for existing in dedup_keys):
-            continue
-        dedup_keys.append(key)
-        paras.append(p)
-
-    def normalize_paragraph(s: str) -> str:
-        """Ensure proper punctuation - keep questions as questions, commands as commands."""
-        s = s.strip()
-        if not s:
-            return s
-        # If already ends with proper punctuation, keep as-is
-        if s[-1] in "?!.":
-            return s
-        # Check if it looks like a question (starts with question words or contains '?')
-        question_starters = ("what", "how", "why", "when", "where", "who", "which", "can", "could", "would", "should", "is", "are", "does", "do")
-        first_word = s.split()[0].lower() if s.split() else ""
-        if first_word in question_starters or "?" in s:
-            # It's a question - ensure it ends with '?'
-            if s[-1] in ".!:":
-                return s[:-1].rstrip() + "?"
-            return s + "?"
-        # It's a command/statement - ensure it ends with '.'
-        if s[-1] in ":":
-            return s[:-1].rstrip() + "."
-        return s + "."
-
-    max_paragraphs = 3
-    if len(paras) >= 2:
-        selected = [normalize_paragraph(p) for p in paras[:max_paragraphs]]
-        return "\n\n".join(selected)
-
-    # Single paragraph: try to split by sentence boundary
-    p = paras[0] if paras else t
-    # Naive sentence split
-    sentences = [s.strip() for s in p.replace("?", ". ").replace("!", ". ").split(". ") if s.strip()]
-    if len(sentences) > 1:
-        half = max(1, len(sentences) // 2)
-        p1 = ". ".join(sentences[:half]).strip()
-        p2 = ". ".join(sentences[half:]).strip()
-    else:
-        p1 = p.strip()
-        p2 = (
-            "Detail the exact systems involved (e.g., files, classes, state machines), how data flows between them, and any validation before emitting updates."
-        )
-    return normalize_paragraph(p1) + "\n\n" + normalize_paragraph(p2)
-
-
-# --- Grounding helpers to reduce hallucinated paths/symbols
-from typing import Set
-
-def extract_allowed_citations(context_text: str) -> tuple[Set[str], Set[str]]:
-    """Extract allowed file paths and symbols from formatted context lines.
-
-    Parses lines produced by format_search_results. Returns (paths, symbols).
-    """
-    allowed_paths: Set[str] = set()
-    allowed_symbols: Set[str] = set()
-    for raw in (context_text or "").splitlines():
-        line = (raw or "").strip()
-        if not line:
-            continue
-        if line.startswith("- "):
-            header = line[2:].strip()
-            header_main = header.split(" (")[0]
-            path_part = header_main.split(":")[0]
-            if path_part:
-                allowed_paths.add(path_part)
-            # symbols are inside parens, after optional language
-            m = re.search(r"\(([^)]+)\)", header)
-            if m:
-                for part in m.group(1).split(","):
-                    sym = part.strip()
-                    if sym and sym.lower() not in {
-                        "python", "typescript", "javascript", "go", "rust", "java", "c", "c++", "c#", "shell", "bash", "markdown", "json", "yaml", "toml"
-                    }:
-                        allowed_symbols.add(sym)
-    return allowed_paths, allowed_symbols
-
-
-def build_refined_query(original_query: str, allowed_paths: Set[str], allowed_symbols: Set[str], max_terms: int = 6) -> str:
-    """Construct a grounded follow-up query using only known paths/symbols."""
-    from os.path import basename
-    terms: list[str] = []
-    for p in list(allowed_paths)[: max_terms // 2]:
-        base = basename(p)
-        if base and base not in terms:
-            terms.append(base)
-    for s in list(allowed_symbols)[: max_terms - len(terms)]:
-        if s and s not in terms:
-            terms.append(s)
-    return (original_query or "").strip() + (" " + " ".join(terms) if terms else "")
-
-
-def _simple_tokenize(text: str) -> List[str]:
-    tokens = re.findall(r"[A-Za-z0-9_]+", text or "")
-    return [t.lower() for t in tokens if t]
-
-
-def _token_overlap_ratio(a: str, b: str) -> float:
-    a_tokens = set(_simple_tokenize(a))
-    b_tokens = set(_simple_tokenize(b))
-    if not a_tokens or not b_tokens:
-        return 0.0
-    inter = len(a_tokens & b_tokens)
-    union = len(a_tokens | b_tokens)
-    if not union:
-        return 0.0
-    return inter / union
-
-
-def _estimate_query_result_relevance(query: str, results: List[Dict[str, Any]]) -> float:
-    q_tokens = set(_simple_tokenize(query))
-    if not q_tokens or not results:
-        return 0.0
-    scores: List[float] = []
-    for hit in results[:5]:
-        parts: List[str] = []
-        for key in ("path", "symbol", "snippet"):
-            val = hit.get(key)
-            if isinstance(val, str):
-                parts.append(val)
-        if not parts:
-            continue
-        r_tokens = set()
-        for part in parts:
-            r_tokens.update(_simple_tokenize(part))
-        if not r_tokens:
-            continue
-        inter = len(q_tokens & r_tokens)
-        union = len(q_tokens | r_tokens)
-        if union:
-            scores.append(inter / union)
-    if not scores:
-        return 0.0
-    return sum(scores) / len(scores)
-
-
-def sanitize_citations(text: str, allowed_paths: Set[str]) -> str:
-    """Replace path-like strings not present in allowed_paths with a neutral phrase.
-
-    Keeps exact paths and basenames that appear in allowed_paths; replaces others.
-    """
-    if not text:
-        return text
-    from os.path import basename
-    allowed_set = set(allowed_paths or set())
-    allowed_basenames = {basename(p) for p in allowed_set}
-
-    def _repl(m):
-        p = m.group(0)
-        if p in allowed_set or basename(p) in allowed_basenames:
-            return p
-        return "the referenced file"
-
-    cleaned = re.sub(r"/path/to/[^\s]+", "the referenced file", text)
-    # Simple path-like matcher: segments with a slash and a dot-ext
-    cleaned = re.sub(r"(?<!\w)[./\w-]+/[./\w-]+\.[A-Za-z0-9_-]+", _repl, cleaned)
-    return cleaned
-
-
-
-def _load_user_preferences() -> dict:
-    """Load user preferences from ~/.ctx_config.json if it exists.
-
-    Example config:
-    {
-        "always_include_tests": true,
-        "prefer_bullet_commands": true,
-        "extra_instructions": "Always include error handling considerations",
-        "default_mode": "unicorn",
-        "streaming": true
-    }
-    """
-    if not os.path.exists(CTX_CONFIG_FILE):
-        return {}
-    try:
-        with open(CTX_CONFIG_FILE, 'r') as f:
-            return json.load(f)
-    except Exception:
-        return {}
-
-
-def _apply_user_preferences(system_msg: str, user_msg: str, prefs: dict) -> tuple[str, str]:
-    """Apply user preferences to system and user messages.
-
-    Allows personalization like:
-    - Always include test-plan paragraph
-    - Prefer bullet commands
-    - Custom instructions
-    """
-    if not prefs:
-        return system_msg, user_msg
-
-    # Add extra instructions to system message
-    if prefs.get("extra_instructions"):
-        system_msg += f"\n\nUser preference: {prefs['extra_instructions']}"
-
-    # Modify user message based on preferences
-    if prefs.get("always_include_tests"):
-        user_msg += "\n\nAlways include a paragraph about testing considerations and test cases."
-
-    if prefs.get("prefer_bullet_commands"):
-        user_msg += "\n\nFor commands/instructions, prefer bullet-point format for clarity."
-
-    return system_msg, user_msg
-
-
-def _adaptive_context_sizing(query: str, filters: dict) -> dict:
-    """Adaptively adjust limit and context_lines based on query characteristics.
-
-    - Short/vague queries → increase limit and context for richer grounding
-    - Queries with file/function names → lighter settings for speed
-    """
-    import re
-    adjusted = dict(filters)
-
-    # Detect if query mentions specific files or functions
-    has_file_ref = bool(re.search(r'\b\w+\.(py|js|ts|go|rs|java|cpp|c|h)\b', query))
-    has_function_ref = bool(re.search(r'\b(function|class|def|func|fn|method)\s+\w+', query))
-    is_specific = has_file_ref or has_function_ref
-
-    # Query length heuristic
-    word_count = len(query.split())
-    is_short = word_count < 5
-
-    # Adaptive sizing
-    if is_short and not is_specific:
-        # Short, vague query → need more context
-        adjusted["limit"] = max(adjusted.get("limit", DEFAULT_LIMIT), 6)
-        if adjusted.get("with_snippets"):
-            adjusted["context_lines"] = max(adjusted.get("context_lines", DEFAULT_CONTEXT_LINES), 10)
-    elif is_specific:
-        # Specific query → can use lighter settings
-        adjusted["limit"] = min(adjusted.get("limit", DEFAULT_LIMIT), 4)
-        if adjusted.get("with_snippets"):
-            adjusted["context_lines"] = min(adjusted.get("context_lines", DEFAULT_CONTEXT_LINES) or 8, 6)
-
-    return adjusted
-
-
-def enhance_prompt(query: str, **filters) -> str:
-    """Retrieve context, invoke the LLM, and return a final enhanced prompt.
-
-    Uses adaptive context sizing to balance quality and speed.
-    """
-    # Apply adaptive sizing
-    filters = _adaptive_context_sizing(query, filters)
-
-    context_text, context_note = fetch_context(query, **filters)
-
-    require_ctx_flag = os.environ.get("CTX_REQUIRE_CONTEXT", "").strip().lower()
-    if require_ctx_flag in {"1", "true", "yes", "on"}:
-        has_real_context = bool((context_text or "").strip()) and not (
-            context_note and (
-                "failed" in context_note.lower()
-                or "no relevant" in context_note.lower()
-                or "no data" in context_note.lower()
-            )
-        )
-        if not has_real_context:
-            return (query or "").strip()
-
-    rewrite_opts = filters.get("rewrite_options") or {}
-    rewritten = rewrite_prompt(
-        query,
-        context_text,
-        context_note,
-        max_tokens=rewrite_opts.get("max_tokens"),
-    )
-    return rewritten.strip()
-
-
-def _generate_plan(enhanced_prompt: str, context: str, note: str) -> str:
-    """Generate a step-by-step execution plan for a command/instruction.
-
-    Uses the LLM to create a concrete action plan based on the enhanced prompt and code context.
-    Returns empty string if plan generation fails or is not applicable.
-    """
-    import sys
-
-    # Detect if we have actual code context
-    has_code_context = bool((context or "").strip() and not (note and ("failed" in note.lower() or "no relevant" in note.lower() or "no data" in note.lower())))
-
-    if not has_code_context:
-        # No code context - skip plan generation
-        return ""
-
-    system_msg = (
-        "You are a technical planning assistant. Your job is to create a step-by-step execution plan. "
-        "Given an enhanced prompt and code context, generate a numbered list of concrete steps to accomplish the task. "
-        "Each step should be specific and actionable. "
-        "Format: Start with 'EXECUTION PLAN:' followed by numbered steps (1., 2., 3., etc.). "
-        "Keep it concise - aim for 3-7 steps maximum. "
-        "Only reference files, functions, or code elements that appear in the provided context. "
-        "Do NOT invent file paths or function names. "
-        "Output format: plain text only, no markdown, no code fences."
-    )
-
-    user_msg = (
-        f"Code context:\n{context}\n\n"
-        f"Enhanced prompt:\n{enhanced_prompt}\n\n"
-        "Generate a step-by-step execution plan to accomplish this task. "
-        "Use only the files and functions mentioned in the code context above. "
-        "Format as: EXECUTION PLAN: followed by numbered steps."
-    )
-
-    meta_prompt = (
-        "<|start_of_role|>system<|end_of_role|>" + system_msg + "<|end_of_text|>\n"
-        "<|start_of_role|>user<|end_of_role|>" + user_msg + "<|end_of_text|>\n"
-        "<|start_of_role|>assistant<|end_of_role|>"
-    )
-
-    decoder_url = DECODER_URL
-    # Safety: restrict to local decoder hosts
-    parsed = urlparse(decoder_url)
-    if parsed.hostname not in {"localhost", "127.0.0.1", "host.docker.internal"}:
-        return ""
-
-    payload = {
-        "prompt": meta_prompt,
-        "n_predict": 200,  # Shorter for plan generation
-        "temperature": 0.3,  # Lower temperature for more focused plans
-        "stream": False,  # Silent plan generation
-    }
-
-    try:
-        req = request.Request(
-            decoder_url,
-            data=json.dumps(payload).encode("utf-8"),
-            headers={"Content-Type": "application/json"},
-        )
-
-        # Use shorter timeout for plan generation (60 seconds instead of 300)
-        plan_timeout = min(60, DECODER_TIMEOUT)
-        with request.urlopen(req, timeout=plan_timeout) as resp:
-            raw = resp.read().decode("utf-8", errors="ignore")
-            data = json.loads(raw)
-
-            plan = (
-                (data.get("content") if isinstance(data, dict) else None)
-                or ((data.get("choices") or [{}])[0].get("content") if isinstance(data, dict) else None)
-                or ((data.get("choices") or [{}])[0].get("text") if isinstance(data, dict) else None)
-                or (data.get("generated_text") if isinstance(data, dict) else None)
-                or (data.get("text") if isinstance(data, dict) else None)
-                or ""
-            )
-
-            plan = plan.strip()
-
-            # Relaxed validation: return any non-empty plan; add header if missing
-            if not plan:
-                return ""
-            if "EXECUTION PLAN" not in plan.upper():
-                plan = "EXECUTION PLAN:\n" + plan
-            return plan
-
-    except Exception as e:
-        # Plan generation failed - not critical, just skip it
-        sys.stderr.write(f"[DEBUG] Plan generation failed: {type(e).__name__}: {e}\n")
-        sys.stderr.flush()
-        return ""
-
-
-def _needs_polish(text: str) -> bool:
-    """Enhanced QA heuristic to decide if a third polishing pass is needed.
-
-    Checks for:
-    - Too short output
-    - Generic/vague language
-    - Missing concrete details
-    - Lack of code-specific references
-    """
-    if not text:
-        return True
-    t = text.strip()
-
-    # Length check
-    if len(t) < 180:
-        return True
-
-    # Generic language cues (expanded list)
-    generic_cues = (
-        "overall structure", "consider ", "ensure ", "improve its",
-        "you should", "it is important", "make sure", "be sure to",
-        "in general", "typically", "usually", "often"
-    )
-    generic_count = sum(1 for cue in generic_cues if cue in t.lower())
-    if generic_count >= 3:
-        return True
-
-    # Check for concrete details (file paths, line numbers, function names, etc.)
-    import re
-    has_file_ref = bool(re.search(r'\b\w+\.(py|js|ts|go|rs|java|cpp|c|h)\b', t))
-    has_line_ref = bool(re.search(r'\bline[s]?\s+\d+', t, re.IGNORECASE))
-    has_function_ref = bool(re.search(r'\b(function|class|method|def|fn)\s+\w+', t))
-    has_concrete = has_file_ref or has_line_ref or has_function_ref
-
-    # If no concrete references and has generic language, needs polish
-    if not has_concrete and generic_count >= 2:
-        return True
-
-    # Check paragraph structure (should have at least 2 paragraphs)
-    paragraphs = [p.strip() for p in t.split('\n\n') if p.strip()]
-    if len(paragraphs) < 2:
-        return True
-
-    return False
-
-
-def _dedup_paragraphs(text: str, max_paragraphs: int = 3) -> str:
-    """Deterministic paragraph-level deduplication and truncation.
-
-    - Split on double-newline boundaries
-    - Drop duplicate paragraphs beyond the first occurrence (case/whitespace insensitive)
-    - Cap total paragraphs to max_paragraphs
-    """
-    if not text:
-        return ""
-
-    # Normalize newlines and split into paragraphs
-    t = text.replace("\r\n", "\n").replace("\r", "\n").strip()
-    raw_paras = [p.strip() for p in t.split("\n\n") if p.strip()]
-    if not raw_paras:
-        return text.strip()
-
-    seen_keys: set[str] = set()
-    out: list[str] = []
-    for p in raw_paras:
-        key = re.sub(r"\s+", " ", p).strip().lower()
-        if key in seen_keys:
-            continue
-        seen_keys.add(key)
-        out.append(p)
-        if len(out) >= max_paragraphs:
-            break
-
-    if not out:
-        return text.strip()
-    return "\n\n".join(out)
-
-
-def enhance_unicorn(query: str, **filters) -> str:
-    """Multi-pass staged enhancement for higher quality with optional plan generation.
-
-    Pass 1: rich snippets to draft sharper intent
-    Pass 2: refined retrieval using the draft, with even richer snippets to ground specifics
-    Pass 3: polish if output looks short/generic
-    Pass 4 (optional): generate execution plan if query is a command/instruction
-
-    Falls back to single-pass enhance_prompt if no context is available.
-    Stops immediately when repo search returns no hits to avoid hallucinated references.
-    """
-    # ---- Pass 1: draft (rich snippets for grounding)
-    f1 = dict(filters)
-    rewrite_opts = f1.get("rewrite_options") or {}
-    try:
-        max_budget = int(rewrite_opts.get("max_tokens", DEFAULT_REWRITE_TOKENS))
-    except Exception:
-        max_budget = DEFAULT_REWRITE_TOKENS
-    f1.update({
-        "with_snippets": True,
-        "limit": max(1, min(int(f1.get("limit", DEFAULT_LIMIT) or 3), 3)),
-        "per_path": 2,
-        "context_lines": 8,  # Rich context for understanding
-    })
-    ctx1, note1 = fetch_context(query, **f1)
-
-    # Early exit: if first pass has no context AND note indicates failure/no results, fall back immediately
-    has_context1 = bool((ctx1 or "").strip())
-    has_error1 = note1 and ("failed" in note1.lower() or "no relevant" in note1.lower() or "no data" in note1.lower())
-
-    if not has_context1:
-        # No context at all - fall back to single-pass with the diagnostic note
-        return enhance_prompt(query, **filters)
-
-    # Pass 1: silent (no streaming)
-    draft = rewrite_prompt(
-        query,
-        ctx1,
-        note1,
-        max_tokens=min(180, max_budget),
-        citation_policy="snippets",
-        stream=False,
-    )
-
-    # Build a grounded follow-up query from original query + allowed paths/symbols
-    allowed_paths1, allowed_symbols1 = extract_allowed_citations(ctx1)
-    refined_query = build_refined_query(query, allowed_paths1, allowed_symbols1)
-
-    overlap = _token_overlap_ratio(query, draft)
-    sys.stderr.write(f"[DEBUG] Unicorn draft similarity={overlap:.3f}\n")
-    sys.stderr.flush()
-    gate_flag = os.environ.get("CTX_DRAFT_SIM_GATE", "").strip().lower()
-    if gate_flag in {"1", "true", "yes", "on"}:
-        try:
-            min_sim = float(os.environ.get("CTX_MIN_DRAFT_SIM", "0.4"))
-        except Exception:
-            min_sim = 0.4
-        if overlap < min_sim:
-            sys.stderr.write(f"[DEBUG] Draft similarity below threshold {min_sim:.3f}; reusing original query for pass2.\n")
-            sys.stderr.flush()
-            refined_query = query
-
-    # ---- Pass 2: refine (even richer snippets, focused results)
-    f2 = dict(filters)
-    f2.update({
-        "with_snippets": True,
-        "limit": 4,
-        "per_path": 1,
-        "context_lines": 12,  # Very rich context for detailed grounding
-    })
-    ctx2, note2 = fetch_context(refined_query, **f2)
-
-    # Check if second pass has context
-    has_context2 = bool((ctx2 or "").strip())
-
-    # If second-pass retrieval is empty, reuse first-pass context to avoid invented refs
-    if not has_context2:
-        ctx2 = ctx1
-        note2 = note1
-
-    # Pass 2: silent (no streaming). Use paths policy for clearer file/line anchoring.
-    final = rewrite_prompt(
-        draft,
-        ctx2,
-        note2,
-        max_tokens=min(300, max_budget),
-        citation_policy="paths",
-        stream=False,
-    )
-
-    # ---- Pass 3: polish if clearly needed (optional via CTX_UNICORN_POLISH)
-    polish_flag = os.environ.get("CTX_UNICORN_POLISH", "1").strip().lower()
-    if polish_flag in {"1", "true", "yes", "on"} and _needs_polish(final):
-        # Polish pass: silent (no streaming yet)
-        final = rewrite_prompt(final, ctx2, note2, max_tokens=140, citation_policy="snippets", stream=False)
-
-    # ---- Pass 4: Generate execution plan if this is a command/instruction
-    plan = ""
-    is_command = not query.strip().endswith("?")
-
-    # Only generate plan if we have actual code context (not just error notes)
-    has_real_context = has_context1 and bool((ctx2 or "").strip())
-
-    import sys as _sys
-    _sys.stderr.write(f"[DEBUG] Plan generation: is_command={is_command}, has_real_context={has_real_context}\n")
-    _sys.stderr.flush()
-
-    if is_command and has_real_context:
-        # Generate a step-by-step execution plan based on code context
-        _sys.stderr.write("[DEBUG] Generating plan...\n")
-        _sys.stderr.flush()
-        plan = _generate_plan(final, ctx2, note2)
-        _sys.stderr.write(f"[DEBUG] Plan length: {len(plan)} chars\n")
-        _sys.stderr.flush()
-
-    # Combine enhanced prompt with plan if available
-    if plan:
-        output = final + "\n\n" + plan
-    else:
-        output = final
-
-    # Sanitize citations on the final output and return
-    allowed_paths2, _ = extract_allowed_citations(ctx2)
-    return sanitize_citations(output.strip(), allowed_paths1.union(allowed_paths2))
-
-
-def fetch_context(query: str, **filters) -> Tuple[str, str]:
-    """Fetch repository context text plus a note describing the status.
-
-    Defaults to header-only refs for speed unless with_snippets=True is provided.
-    Falls back to context_search (with memories) if repo_search returns no hits.
-    """
-    with_snippets = bool(filters.get("with_snippets", False))
-    # Resolve collection: explicit filter wins, then env COLLECTION_NAME, then default "codebase"
-    collection_name = filters.get("collection") or os.environ.get("COLLECTION_NAME", "codebase")
-
-    params = {
-        "query": query,
-        "limit": filters.get("limit", DEFAULT_LIMIT),
-        "include_snippet": with_snippets,
-        "context_lines": filters.get("context_lines", DEFAULT_CONTEXT_LINES),
-        "collection": collection_name,
-    }
-    for key in ["language", "under", "path_glob", "not_glob", "kind", "symbol", "ext"]:
-        if filters.get(key):
-            params[key] = filters[key]
-
-    result = call_mcp_tool("repo_search", params)
-    if "error" in result:
-        error_msg = result.get('error', 'Unknown error')
-        sys.stderr.write(f"[DEBUG] repo_search error: {error_msg}\n")
-        sys.stderr.flush()
-        return "", f"Context retrieval failed: {error_msg}"
-
-    data = parse_mcp_response(result)
-    if not data:
-        sys.stderr.write("[DEBUG] repo_search returned no data\n")
-        sys.stderr.flush()
-        return "", "Context retrieval returned no data."
-
-    hits = data.get("results") or []
-    relevance = _estimate_query_result_relevance(query, hits)
-    sys.stderr.write(f"[DEBUG] repo_search returned {len(hits)} hits (relevance={relevance:.3f})\n")
-    sys.stderr.flush()
-
-    gate_flag = os.environ.get("CTX_RELEVANCE_GATE", "").strip().lower()
-    if hits and gate_flag in {"1", "true", "yes", "on"}:
-        try:
-            min_rel = float(os.environ.get("CTX_MIN_RELEVANCE", "0.15"))
-        except Exception:
-            min_rel = 0.15
-        if relevance < min_rel:
-            sys.stderr.write(f"[DEBUG] Relevance below threshold {min_rel:.3f}; treating as no relevant context.\n")
-            sys.stderr.flush()
-            return "", "No relevant context found for the prompt (low retrieval relevance)."
-
-    if not hits:
-        # Memory blending: try context_search with memories as fallback
-        memory_params = {
-            "query": query,
-            "limit": filters.get("limit", DEFAULT_LIMIT),
-            "include_memories": True,
-            "include_snippet": with_snippets,
-            "context_lines": filters.get("context_lines", DEFAULT_CONTEXT_LINES),
-            "collection": collection_name,
-        }
-        memory_result = call_mcp_tool("context_search", memory_params)
-        if "error" not in memory_result:
-            memory_data = parse_mcp_response(memory_result)
-            if memory_data:
-                memory_hits = memory_data.get("results") or []
-                if memory_hits:
-                    return format_search_results(memory_hits, include_snippets=with_snippets), "Using memories and design docs"
-        return "", "No relevant context found for the prompt."
-
-    return format_search_results(hits, include_snippets=with_snippets), ""
-
-
-def rewrite_prompt(original_prompt: str, context: str, note: str, max_tokens: Optional[int], citation_policy: str = "paths", stream: bool = True) -> str:
-    """Use the configured decoder (GLM or llama.cpp) to rewrite the prompt with repository context.
-
-    Returns ONLY the improved prompt text. Raises exception if decoder fails.
-    If stream=True (default), prints tokens as they arrive for instant feedback.
-    """
-    import sys
-    ctx = (context or "").strip()
-    nt = (note or "").strip()
-    effective_context = ctx if ctx else (nt or "No context available.")
-
-    # Granite 4.0 chat template with explicit rewrite-only instruction
-    if (citation_policy or "paths") == "snippets":
-        policy_system = (
-            "Use code snippets provided in Context refs to ground the rewrite. "
-            "Do NOT include file paths or line numbers. "
-            "You may quote very short code fragments directly from the snippets if essential, but never use markdown or code fences. "
-            "Never invent identifiers not present in the snippets. "
-        )
-        policy_user = (
-            "When relevant, reference concrete behaviors and small code fragments from the snippets above. "
-            "Do not mention file paths or line numbers. "
-        )
-    else:
-        policy_system = (
-            "If context is provided, use it to make the prompt more concrete by citing specific file paths, line ranges, and symbols that appear in the Context refs. "
-            "Never invent references - only cite what appears verbatim in the Context refs. "
-        )
-        policy_user = (
-            "If the context above contains relevant references, cite concrete file paths, line ranges, and symbols in your rewrite. "
-        )
-
-    # Detect if we have actual code context or just a diagnostic note
-    has_code_context = bool((ctx or "").strip() and not (nt and ("failed" in nt.lower() or "no relevant" in nt.lower() or "no data" in nt.lower())))
-
-    system_msg = (
-        "You are a prompt rewriter. Your ONLY job is to rewrite prompts to be more specific and detailed. "
-        "CRITICAL: You must NEVER answer questions or execute commands. You must ONLY rewrite the prompt to be better and more specific. "
-        "ALWAYS enhance the prompt to be more detailed and actionable. "
-        + policy_system
-    )
-
-    if has_code_context:
-        # We have real code context - encourage using it
-        system_msg += (
-            "Use the provided context to make the prompt more concrete and specific. "
-            "Your rewrite must be at least two short paragraphs separated by a single blank line. "
-            "For questions: rewrite as more specific questions. For commands/instructions: rewrite as more detailed, specific instructions with concrete targets. "
-            "Each paragraph should explore different aspects of the topic. "
-            "Output format: plain text only, no markdown, no code fences, no answers, no explanations."
-        )
-    else:
-        # No code context - stay generic and don't invent details
-        system_msg += (
-            "IMPORTANT: No code context is available for this query. "
-            "Do NOT invent file paths, line numbers, function names, or other specific code references. "
-            "Instead, rewrite the prompt to be more general and exploratory, asking about concepts, approaches, and best practices. "
-            "Your rewrite must be at least two short paragraphs separated by a single blank line. "
-            "For questions: expand into multiple related questions about the topic. For commands/instructions: expand into general guidance about the task. "
-            "Stay generic - do not hallucinate specific files, functions, or code locations. "
-            "Output format: plain text only, no markdown, no code fences, no answers, no explanations."
-        )
-
-    label = "with snippets" if "\n    " in effective_context else "headers only"
-    user_msg = (
-        f"Context refs ({label}):\n{effective_context}\n\n"
-        f"Original prompt: {(original_prompt or '').strip()}\n\n"
-        "Rewrite this as a more specific, detailed prompt using at least two short paragraphs separated by a blank line. "
-        + policy_user
-    )
-
-    if has_code_context:
-        user_msg += (
-            "Use the context above to make the rewrite concrete and specific. "
-            "For questions: make them more specific and multi-faceted (each paragraph should be a question ending with '?'). "
-            "For commands/instructions: make them more detailed and concrete (specify exact functions, parameters, edge cases to handle). "
-        )
-    else:
-        user_msg += (
-            "Since no code context is available, keep the rewrite general and exploratory. "
-            "Do NOT invent specific file paths, line numbers, or function names. "
-            "For questions: expand into related conceptual questions. For commands/instructions: provide general guidance about the task. "
-        )
-
-    user_msg += (
-        "Remember: ONLY rewrite the prompt - do NOT answer questions or execute commands. "
-        "Avoid generic phrasing. No markdown or code fences."
-    )
-
-    # Apply user preferences if config exists
-    prefs = _load_user_preferences()
-    system_msg, user_msg = _apply_user_preferences(system_msg, user_msg, prefs)
-
-    # Override stream setting from preferences if specified
-    if prefs.get("streaming") is not None:
-        stream = prefs.get("streaming")
-
-    # Check which decoder runtime to use
-    runtime_kind = str(os.environ.get("REFRAG_RUNTIME", "llamacpp")).strip().lower()
-
-    if runtime_kind == "glm":
-        from refrag_glm import GLMRefragClient  # type: ignore
-        client = GLMRefragClient()
-
-        # GLM uses OpenAI-style chat completions, convert context to user prompt format
-        # Note: For GLM, we need to convert the meta_prompt format to simple user message
-        user_msg = (
-            f"Context refs:\n{effective_context}\n\n"
-            f"Original prompt: {(original_prompt or '').strip()}\n\n"
-            "Rewrite this as a more specific, detailed prompt using at least two short paragraphs separated by a blank line. "
-        )
-
-        if has_code_context:
-            user_msg += (
-                "Use the context above to make the rewrite concrete and specific. "
-                "For questions: make them more specific and multi-faceted (each paragraph should be a question ending with '?'). "
-                "For commands/instructions: make them more detailed and concrete (specify exact functions, parameters, edge cases to handle). "
-            )
-        else:
-            user_msg += (
-                "Since no code context is available, keep the rewrite general and exploratory. "
-                "Do NOT invent specific file paths, line numbers, or function names. "
-                "For questions: expand into related conceptual questions. For commands/instructions: provide general guidance about the task. "
-            )
-
-        # GLM API call
-        response = client.client.chat.completions.create(
-            model=os.environ.get("GLM_MODEL", "glm-4.6"),
-            messages=[
-                {"role": "system", "content": system_msg},
-                {"role": "user", "content": user_msg}
-            ],
-            max_tokens=int(max_tokens or DEFAULT_REWRITE_TOKENS),
-            temperature=0.45,
-            stream=stream
-        )
-
-        enhanced = ""
-        if stream:
-            # Streaming mode for GLM
-            for chunk in response:
-                if chunk.choices[0].delta.content:
-                    token = chunk.choices[0].delta.content
-                    sys.stdout.write(token)
-                    sys.stdout.flush()
-                    enhanced += token
-            sys.stdout.write("\n")
-            sys.stdout.flush()
-        else:
-            # Non-streaming mode for GLM
-            enhanced = response.choices[0].message.content
-
-    else:
-        # Use llama.cpp decoder (original logic)
-        meta_prompt = (
-            "<|start_of_role|>system<|end_of_role|>" + system_msg + "<|end_of_text|>\n"
-            "<|start_of_role|>user<|end_of_role|>" + user_msg + "<|end_of_text|>\n"
-            "<|start_of_role|>assistant<|end_of_role|>"
-        )
-
-        decoder_url = DECODER_URL
-        # Safety: only allow local decoder hosts
-        parsed = urlparse(decoder_url)
-        if parsed.hostname not in {"localhost", "127.0.0.1", "host.docker.internal"}:
-            raise ValueError(f"Unsafe decoder host: {parsed.hostname}")
-        payload = {
-            "prompt": meta_prompt,
-            "n_predict": int(max_tokens or DEFAULT_REWRITE_TOKENS),
-            "temperature": 0.45,
-            "stream": stream,
-        }
-
-        req = request.Request(
-            decoder_url,
-            data=json.dumps(payload).encode("utf-8"),
-            headers={"Content-Type": "application/json"},
-        )
-
-        enhanced = ""
-        try:
-            if stream:
-                # Streaming mode: print tokens as they arrive for instant feedback
-                with request.urlopen(req, timeout=DECODER_TIMEOUT) as resp:
-                    for line in resp:
-                        line_str = line.decode("utf-8", errors="ignore").strip()
-                        if not line_str or line_str.startswith(":"):
-                            continue
-                        if line_str.startswith("data: "):
-                            line_str = line_str[6:]
-                        try:
-                            chunk = json.loads(line_str)
-                            token = chunk.get("content", "")
-                            if token:
-                                sys.stdout.write(token)
-                                sys.stdout.flush()
-                                enhanced += token
-                            if chunk.get("stop", False):
-                                break
-                        except json.JSONDecodeError as e:
-                            # Warn once per malformed line but keep streaming the final output only
-                            sys.stderr.write(f"[WARN] decoder stream JSON decode failed: {str(e)}\n")
-                            sys.stderr.flush()
-                            continue
-                sys.stdout.write("\n")
-                sys.stdout.flush()
-            else:
-                # Non-streaming mode: wait for full response
-                with request.urlopen(req, timeout=DECODER_TIMEOUT) as resp:
-                    raw = resp.read().decode("utf-8", errors="ignore")
-                    data = json.loads(raw)
-
-                    # Extract content from llama.cpp response
-                    enhanced = (
-                        (data.get("content") if isinstance(data, dict) else None)
-                        or ((data.get("choices") or [{}])[0].get("content") if isinstance(data, dict) else None)
-                        or ((data.get("choices") or [{}])[0].get("text") if isinstance(data, dict) else None)
-                        or (data.get("generated_text") if isinstance(data, dict) else None)
-                        or (data.get("text") if isinstance(data, dict) else None)
-                    )
-        except Exception as e:
-            sys.stderr.write(f"[ERROR] Decoder call to {decoder_url} failed: {type(e).__name__}: {e}\n")
-            sys.stderr.flush()
-            raise
-
-    # Normalize and strip formatting / template artifacts from decoder output
-    enhanced = (enhanced or "")
-    enhanced = enhanced.replace("```", "").replace("`", "")
-    # Remove stray chat-template tags like <|user|>, <|assistant|>, etc.
-    enhanced = re.sub(r"<\|[^|>]+?\|>", "", enhanced)
-    enhanced = enhanced.strip()
-
-    if not enhanced:
-        raise ValueError("Decoder returned empty response")
-
-    # Enforce at least two question paragraphs, then deduplicate and cap paragraphs
-    enhanced = _ensure_two_paragraph_questions(enhanced)
-    enhanced = _dedup_paragraphs(enhanced, max_paragraphs=3)
-    return enhanced
-
-
-
-
-
-def build_final_output(
-    rewritten_prompt: str, context: str, note: str, include_context: bool
-) -> str:
-    """Combine LLM rewrite with optional supporting context for downstream tools."""
-    improved = rewritten_prompt.strip() or "No rewrite generated."
-    if not include_context:
-        return improved
-
-    context_block = context.strip() if context.strip() else (note or "No supporting context.")
-
-    return f"""# Improved Prompt
-{improved}
-
----
-
-# Supporting Context
-{context_block}
-"""
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Context-aware prompt enhancer - rewrites questions and commands with codebase context",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  # Questions: enhanced with specific details
-  ctx "how does hybrid search work?"
-
-  # Commands: enhanced with concrete implementation steps
-  ctx "refactor ctx.py to improve modularity"
-
-  # Unicorn mode: staged 2–3 pass enhancement for best results
-  ctx --unicorn "refactor ctx.py"
-
-  # Detail mode: include code snippets (slower but richer)
-  ctx --detail "explain the caching logic"
-
-  # Pipe to LLM or clipboard
-  ctx --cmd llm "explain the caching logic"
-  ctx --cmd pbcopy --language python "fix bug in watcher"
-        """
-    )
-
-    parser.add_argument("query", help="Your question or command to enhance")
-
-    # Command execution
-    parser.add_argument("--cmd", "-c", help="Command to pipe enhanced prompt to (e.g., llm, pbcopy)")
-    parser.add_argument("--with-context", action="store_true",
-                        help="Append supporting context after the improved prompt")
-    parser.add_argument("--unicorn", action="store_true",
-                        help="One-size 'amazing' mode: staged 2–3 calls for best prompts (keeps defaults unchanged)")
-
-    # Search filters
-    parser.add_argument("--language", "-l", help="Filter by language (e.g., python, typescript)")
-    parser.add_argument("--under", "-u", help="Filter by path prefix (e.g., scripts/)")
-    parser.add_argument("--path-glob", help="Filter by path glob pattern")
-    parser.add_argument("--not-glob", help="Exclude paths matching glob pattern")
-    parser.add_argument("--kind", help="Filter by symbol kind (e.g., function, class)")
-    parser.add_argument("--symbol", help="Filter by symbol name")
-    parser.add_argument("--ext", help="Filter by file extension")
-    parser.add_argument("--collection", help="Override collection name (default: env COLLECTION_NAME)")
-
-    # Output control
-    parser.add_argument("--limit", type=int, default=DEFAULT_LIMIT,
-                       help=f"Max results (default: {DEFAULT_LIMIT})")
-    parser.add_argument("--context-lines", type=int, default=DEFAULT_CONTEXT_LINES,
-                       help=f"Context lines for snippets (default: {DEFAULT_CONTEXT_LINES})")
-    parser.add_argument("--per-path", type=int,
-                       help="Limit results per file (default: server setting)")
-    parser.add_argument("--rewrite-max-tokens", type=int, default=DEFAULT_REWRITE_TOKENS,
-                       help=f"Max tokens for LLM rewrite (default: {DEFAULT_REWRITE_TOKENS})")
-
-    # Detail mode
-    parser.add_argument("--detail", action="store_true",
-                       help="Include short code snippets for richer rewrites (slower but more specific; auto-clamps to limit=4, per_path=1)")
-
-    args = parser.parse_args()
-
-    # Build filter dict
-    filters = {
-        "limit": args.limit,
-        "context_lines": args.context_lines,
-        "language": args.language,
-        "under": args.under,
-        "path_glob": args.path_glob,
-        "not_glob": args.not_glob,
-        "kind": args.kind,
-        "symbol": args.symbol,
-        "ext": args.ext,
-        "collection": args.collection,
-        "per_path": args.per_path,
-        "with_snippets": args.detail,
-        "rewrite_options": {
-            "max_tokens": args.rewrite_max_tokens,
-        },
-    }
-
-    # If detail mode is on and context_lines equals the default (0), bump to 1 for a short snippet
-    if args.detail and args.context_lines == DEFAULT_CONTEXT_LINES:
-        filters["context_lines"] = 1
-    # Clamp result counts in detail mode for latency
-    if args.detail:
-        try:
-            filters["limit"] = max(1, min(int(filters.get("limit", DEFAULT_LIMIT)), 4))
-        except Exception:
-            filters["limit"] = 4
-        filters["per_path"] = 1
-
-    # Remove None values
-    filters = {k: v for k, v in filters.items() if v is not None}
-
-    try:
-        # Enhance prompt
-        if args.unicorn:
-            output = enhance_unicorn(args.query, **filters)
-        else:
-            context_text, context_note = fetch_context(args.query, **filters)
-
-            require_ctx_flag = os.environ.get("CTX_REQUIRE_CONTEXT", "").strip().lower()
-            if require_ctx_flag in {"1", "true", "yes", "on"}:
-                has_real_context = bool((context_text or "").strip()) and not (
-                    context_note and (
-                        "failed" in context_note.lower()
-                        or "no relevant" in context_note.lower()
-                        or "no data" in context_note.lower()
-                    )
-                )
-                if not has_real_context:
-                    output = (args.query or "").strip()
-                else:
-                    rewritten = rewrite_prompt(args.query, context_text, context_note, max_tokens=args.rewrite_max_tokens)
-                    output = rewritten.strip()
-            else:
-                rewritten = rewrite_prompt(args.query, context_text, context_note, max_tokens=args.rewrite_max_tokens)
-                output = rewritten.strip()
-
-        if args.cmd:
-            subprocess.run(args.cmd, input=output.encode("utf-8"), shell=True, check=False)
-        else:
-            print(output)
-
-    except KeyboardInterrupt:
-        print("\nInterrupted.", file=sys.stderr)
-        sys.exit(130)
-    except Exception as e:
-        print(f"Error: {e}", file=sys.stderr)
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()

From 89291e969a43af618a8352e93e3e952138a4f4fb Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 16:20:46 +0000
Subject: [PATCH 12/19] upload_client: Improves file change detection

Enhances file change detection by considering both the file system and the local cache.

This ensures that deleted files, which are only present in the cache, are also detected as changes, preventing inconsistencies during remote uploads.
Additionally, upon detecting and processing a file deletion, the corresponding cache entry is now removed to avoid repeated reporting of the same deletion in subsequent scans.
---
 scripts/remote_upload_client.py     | 64 +++++++++++++++++++++++++++--
 scripts/standalone_upload_client.py | 60 +++++++++++++++++++++++++--
 2 files changed, 118 insertions(+), 6 deletions(-)

diff --git a/scripts/remote_upload_client.py b/scripts/remote_upload_client.py
index 908f6645..449aa0bc 100644
--- a/scripts/remote_upload_client.py
+++ b/scripts/remote_upload_client.py
@@ -37,12 +37,43 @@
     set_cached_file_hash,
     get_collection_name,
     _extract_repo_name_from_path,
+    remove_cached_file,
 )
 
 # Import existing hash function
 import scripts.ingest_code as idx
 
 
+def _load_local_cache_file_hashes(workspace_path: str, repo_name: Optional[str]) -> Dict[str, str]:
+    """Best-effort read of the local cache.json file_hashes map.
+
+    This mirrors the layout used by workspace_state without introducing new
+    dependencies. It is used only to enumerate candidate paths; normal hash
+    lookups still go through get_cached_file_hash.
+    """
+    try:
+        base = Path(os.environ.get("WORKSPACE_PATH") or workspace_path).resolve()
+        multi_repo = os.environ.get("MULTI_REPO_MODE", "0").strip().lower() in {"1", "true", "yes", "on"}
+        if multi_repo and repo_name:
+            cache_path = base / ".codebase" / "repos" / repo_name / "cache.json"
+        else:
+            cache_path = base / ".codebase" / "cache.json"
+
+        if not cache_path.exists():
+            return {}
+
+        with open(cache_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        if not isinstance(data, dict):
+            return {}
+        file_hashes = data.get("file_hashes", {})
+        if not isinstance(file_hashes, dict):
+            return {}
+        return file_hashes
+    except Exception:
+        return {}
+
+
 class RemoteUploadClient:
     """Client for uploading delta bundles to remote server."""
 
@@ -425,6 +456,13 @@ def create_delta_bundle(self, changes: Dict[str, List]) -> Tuple[str, Dict[str,
                     }
                     operations.append(operation)
 
+                    # Once a delete operation has been recorded, drop the cache entry
+                    # so subsequent scans do not keep re-reporting the same deletion.
+                    try:
+                        remove_cached_file(str(path.resolve()), self.repo_name)
+                    except Exception:
+                        pass
+
                 except Exception as e:
                     print(f"[bundle_create] Error processing deleted file {path}: {e}")
                     continue
@@ -874,9 +912,29 @@ def watch_loop(self, interval: int = 5):
         try:
             while True:
                 try:
-                    # Use existing change detection (get all files in workspace)
-                    all_files = self.get_all_code_files()
-                    changes = self.detect_file_changes(all_files)
+                    # Use existing change detection over both filesystem and cached registry
+                    fs_files = self.get_all_code_files()
+                    path_map = {}
+                    for p in fs_files:
+                        try:
+                            resolved = p.resolve()
+                        except Exception:
+                            continue
+                        path_map[resolved] = p
+
+                    # Include any paths that are only present in the local cache (deleted files)
+                    cached_file_hashes = _load_local_cache_file_hashes(self.workspace_path, self.repo_name)
+                    for cached_abs in cached_file_hashes.keys():
+                        try:
+                            cached_path = Path(cached_abs)
+                            resolved = cached_path.resolve()
+                        except Exception:
+                            continue
+                        if resolved not in path_map:
+                            path_map[resolved] = cached_path
+
+                    all_paths = list(path_map.values())
+                    changes = self.detect_file_changes(all_paths)
 
                     # Count only meaningful changes (exclude unchanged)
                     meaningful_changes = len(changes.get("created", [])) + len(changes.get("updated", [])) + len(changes.get("deleted", [])) + len(changes.get("moved", []))
diff --git a/scripts/standalone_upload_client.py b/scripts/standalone_upload_client.py
index c6ae1163..d3e7a1ba 100644
--- a/scripts/standalone_upload_client.py
+++ b/scripts/standalone_upload_client.py
@@ -182,6 +182,19 @@ def set_hash(self, file_path: str, file_hash: str):
         file_hashes[abs_path] = file_hash
         self._save_cache(file_hashes)
 
+    def all_paths(self) -> List[str]:
+        """Return all cached absolute file paths."""
+        file_hashes = self._load_cache()
+        return list(file_hashes.keys())
+
+    def remove_hash(self, file_path: str) -> None:
+        """Remove a cached file hash if present."""
+        file_hashes = self._load_cache()
+        abs_path = str(Path(file_path).resolve())
+        if abs_path in file_hashes:
+            file_hashes.pop(abs_path, None)
+            self._save_cache(file_hashes)
+
 # Create global cache instance (will be initialized in RemoteUploadClient)
 _hash_cache: Optional[SimpleHashCache] = None
 
@@ -199,6 +212,25 @@ def set_cached_file_hash(file_path: str, file_hash: str, repo_name: Optional[str
         _hash_cache.set_hash(file_path, file_hash)
 
 
+def get_all_cached_paths(repo_name: Optional[str] = None) -> List[str]:
+    """Return all tracked file paths from the local cache.
+
+    The repo_name parameter is accepted for API symmetry with the non-standalone
+    client but is not used here, since this cache is always per-workspace.
+    """
+    global _hash_cache
+    if _hash_cache:
+        return _hash_cache.all_paths()
+    return []
+
+
+def remove_cached_file(file_path: str, repo_name: Optional[str] = None) -> None:
+    """Remove a file entry from the local cache if present."""
+    global _hash_cache
+    if _hash_cache:
+        _hash_cache.remove_hash(file_path)
+
+
 class RemoteUploadClient:
     """Client for uploading delta bundles to remote server."""
 
@@ -582,6 +614,9 @@ def create_delta_bundle(self, changes: Dict[str, List]) -> Tuple[str, Dict[str,
                         "language": CODE_EXTS.get(path.suffix.lower(), "unknown")
                     }
                     operations.append(operation)
+                    # Once a delete operation has been recorded, drop the cache entry
+                    # so subsequent scans do not keep re-reporting the same deletion.
+                    remove_cached_file(str(path.resolve()), self.repo_name)
 
                 except Exception as e:
                     print(f"[bundle_create] Error processing deleted file {path}: {e}")
@@ -991,9 +1026,28 @@ def watch_loop(self, interval: int = 5):
         try:
             while True:
                 try:
-                    # Use existing change detection (get all files in workspace)
-                    all_files = self.get_all_code_files()
-                    changes = self.detect_file_changes(all_files)
+                    # Use existing change detection over both filesystem and cached registry
+                    fs_files = self.get_all_code_files()
+                    path_map = {}
+                    for p in fs_files:
+                        try:
+                            resolved = p.resolve()
+                        except Exception:
+                            continue
+                        path_map[resolved] = p
+
+                    # Include any paths that are only present in the local cache (deleted files)
+                    for cached_abs in get_all_cached_paths(self.repo_name):
+                        try:
+                            cached_path = Path(cached_abs)
+                            resolved = cached_path.resolve()
+                        except Exception:
+                            continue
+                        if resolved not in path_map:
+                            path_map[resolved] = cached_path
+
+                    all_paths = list(path_map.values())
+                    changes = self.detect_file_changes(all_paths)
 
                     # Count only meaningful changes (exclude unchanged)
                     meaningful_changes = len(changes.get("created", [])) + len(changes.get("updated", [])) + len(changes.get("deleted", [])) + len(changes.get("moved", []))

From c2e6b633ade8908531d3a0062cb9ad2550688b25 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 16:21:39 +0000
Subject: [PATCH 13/19] chore(ctx_config.example.json): Adds rewrite_max_tokens
 config option

Adds a new configuration option, `rewrite_max_tokens`, to allow control over the maximum number of tokens used during content rewriting.
---
 ctx_config.example.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ctx_config.example.json b/ctx_config.example.json
index 4c3efcb6..e4c97798 100644
--- a/ctx_config.example.json
+++ b/ctx_config.example.json
@@ -11,6 +11,7 @@
   "streaming": true,
   "require_context": true,
   "relevance_gate_enabled": false,
-  "min_relevance": 0.1
+  "min_relevance": 0.1,
+  "rewrite_max_tokens": 420
 }
 

From 685e023e7f08d6b56a71be6a73c2043d68b0a72c Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 18:23:50 +0000
Subject: [PATCH 14/19] feat(ingest_code): Adds GLM runtime support for pseudo
 tags

Implements support for the GLM runtime when generating pseudo tags, including a JSON-only prompt mode for GLM.

Also introduces command-line arguments for testing the pseudo tag generation with specific code snippets or files, facilitating debugging and validation. These test arguments will be removed once the GLM integration is stable.
---
 scripts/ingest_code.py | 118 +++++++++++++++++++++++++++++++++++------
 scripts/refrag_glm.py  |  27 ++++++----
 2 files changed, 120 insertions(+), 25 deletions(-)

diff --git a/scripts/ingest_code.py b/scripts/ingest_code.py
index f75c0d85..3bdcdcb9 100644
--- a/scripts/ingest_code.py
+++ b/scripts/ingest_code.py
@@ -637,24 +637,49 @@ def generate_pseudo_tags(text: str) -> tuple[str, list[str]]:
     if not _pseudo_describe_enabled() or not text.strip():
         return pseudo, tags
     try:
-        from scripts.refrag_llamacpp import LlamaCppRefragClient, is_decoder_enabled  # type: ignore
+        from scripts.refrag_llamacpp import (  # type: ignore
+            LlamaCppRefragClient,
+            is_decoder_enabled,
+            get_runtime_kind,
+        )
         if not is_decoder_enabled():
             return "", []
-        # Keep decoding tight/fast – this is only enrichment for retrieval
-        prompt = (
-            "You label code spans for search enrichment.\n"
-            "Return strictly JSON: {\"pseudo\": string (<=20 tokens), \"tags\": [3-6 short strings]}.\n"
-            "Code:\n" + text[:2000]
-        )
-        client = LlamaCppRefragClient()
-        out = client.generate_with_soft_embeddings(
-            prompt=prompt,
-            max_tokens=int(os.environ.get("PSEUDO_MAX_TOKENS", "96") or 96),
-            temperature=float(os.environ.get("PSEUDO_TEMPERATURE", "0.10") or 0.10),
-            top_k=int(os.environ.get("PSEUDO_TOP_K", "30") or 30),
-            top_p=float(os.environ.get("PSEUDO_TOP_P", "0.9") or 0.9),
-            stop=["\n\n"],
-        )
+        runtime = get_runtime_kind()
+        # Keep decoding tight/fast – this is only enrichment for retrieval.
+        # Preserve original llama.cpp prompt semantics, and use a stricter
+        # JSON-only prompt only for the GLM runtime.
+        if runtime == "glm":
+            prompt = (
+                "You are a JSON-only function that labels code spans for search enrichment.\n"
+                "Respond with a single JSON object and nothing else (no prose, no markdown).\n"
+                "Exact format: {\"pseudo\": string (<=20 tokens), \"tags\": [3-6 short strings]}.\n"
+                "Code:\n" + text[:2000]
+            )
+            from scripts.refrag_glm import GLMRefragClient  # type: ignore
+            client = GLMRefragClient()
+            out = client.generate_with_soft_embeddings(
+                prompt=prompt,
+                max_tokens=int(os.environ.get("PSEUDO_MAX_TOKENS", "96") or 96),
+                temperature=float(os.environ.get("PSEUDO_TEMPERATURE", "0.10") or 0.10),
+                top_p=float(os.environ.get("PSEUDO_TOP_P", "0.9") or 0.9),
+                stop=["\n\n"],
+                force_json=True,
+            )
+        else:
+            prompt = (
+                "You label code spans for search enrichment.\n"
+                "Return strictly JSON: {\"pseudo\": string (<=20 tokens), \"tags\": [3-6 short strings]}.\n"
+                "Code:\n" + text[:2000]
+            )
+            client = LlamaCppRefragClient()
+            out = client.generate_with_soft_embeddings(
+                prompt=prompt,
+                max_tokens=int(os.environ.get("PSEUDO_MAX_TOKENS", "96") or 96),
+                temperature=float(os.environ.get("PSEUDO_TEMPERATURE", "0.10") or 0.10),
+                top_k=int(os.environ.get("PSEUDO_TOP_K", "30") or 30),
+                top_p=float(os.environ.get("PSEUDO_TOP_P", "0.9") or 0.9),
+                stop=["\n\n"],
+            )
         import json as _json
         try:
             obj = _json.loads(out)
@@ -2533,6 +2558,20 @@ def main():
         default=None,
         help="Print progress every N files (default 200; 0 disables)",
     )
+    # GLM psueo tag test - # TODO: Remove GLM psuedo tag test harness after confirming 100% stable and not needed
+    parser.add_argument(
+        "--test-pseudo",
+        type=str,
+        default=None,
+        help="Test generate_pseudo_tags on the given code snippet and print result, then exit",
+    )
+    parser.add_argument(
+        "--test-pseudo-file",
+        type=str,
+        default=None,
+        help="Test generate_pseudo_tags on the contents of the given file and print result, then exit",
+    )
+    # End
 
     args = parser.parse_args()
 
@@ -2557,6 +2596,53 @@ def main():
     if args.progress_every is not None:
         os.environ["INDEX_PROGRESS_EVERY"] = str(args.progress_every)
 
+    # TODO: Remove GLM psuedo tag test harness after confirming 100% stable and not needed
+    # # Optional test mode: exercise generate_pseudo_tags (including GLM runtime) and exit
+    if args.test_pseudo or args.test_pseudo_file:
+        import json as _json
+
+        code_text = ""
+        if args.test_pseudo:
+            code_text = args.test_pseudo
+        if args.test_pseudo_file:
+            try:
+                code_text = Path(args.test_pseudo_file).read_text(
+                    encoding="utf-8", errors="ignore"
+                )
+            except Exception as e:
+                print(f"[TEST_PSEUDO] Failed to read file {args.test_pseudo_file}: {e}")
+                return
+        if not code_text.strip():
+            print("[TEST_PSEUDO] No code text provided")
+            return
+
+        # Use the normal generate_pseudo_tags path so behavior matches indexing.
+        try:
+            from scripts.refrag_llamacpp import get_runtime_kind  # type: ignore
+
+            runtime = get_runtime_kind()
+        except Exception:
+            runtime = "unknown"
+
+        pseudo, tags = "", []
+        try:
+            pseudo, tags = generate_pseudo_tags(code_text)
+        except Exception as e:
+            print(f"[TEST_PSEUDO] Error while generating pseudo tags: {e}")
+
+        print(
+            _json.dumps(
+                {
+                    "runtime": runtime,
+                    "pseudo": pseudo,
+                    "tags": tags,
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        return
+
     qdrant_url = os.environ.get("QDRANT_URL", "http://localhost:6333")
     api_key = os.environ.get("QDRANT_API_KEY")
     collection = os.environ.get("COLLECTION_NAME") or os.environ.get("DEFAULT_COLLECTION") or "codebase"
diff --git a/scripts/refrag_glm.py b/scripts/refrag_glm.py
index 7905c4b2..c04b7ff8 100644
--- a/scripts/refrag_glm.py
+++ b/scripts/refrag_glm.py
@@ -39,21 +39,30 @@ def generate_with_soft_embeddings(
         top_p = float(gen_kwargs.get("top_p", 0.95))
         stop = gen_kwargs.get("stop")
         timeout = gen_kwargs.pop("timeout", None)
+        # Optional hint from callers that they want strict JSON output.
+        force_json = bool(gen_kwargs.pop("force_json", False))
         try:
             timeout_val = float(timeout) if timeout is not None else None
         except Exception:
             timeout_val = None
 
         try:
-            response = self.client.chat.completions.create(
-                model=model,
-                messages=[{"role": "user", "content": prompt}],
-                max_tokens=int(gen_kwargs.get("max_tokens", max_tokens)),
-                temperature=temperature,
-                top_p=top_p,
-                stop=stop if stop else None,
-                timeout=timeout_val,
-            )
+            create_kwargs: dict[str, Any] = {
+                "model": model,
+                "messages": [{"role": "user", "content": prompt}],
+                "max_tokens": int(gen_kwargs.get("max_tokens", max_tokens)),
+                "temperature": temperature,
+                "top_p": top_p,
+                "stop": stop if stop else None,
+                "timeout": timeout_val,
+            }
+            # When explicitly requested and supported by the backend, ask for
+            # JSON-only responses. If the provider rejects this parameter, the
+            # API call will raise and the caller will handle the failure.
+            if force_json:
+                create_kwargs["response_format"] = {"type": "json_object"}
+
+            response = self.client.chat.completions.create(**create_kwargs)
             msg = response.choices[0].message
             # GLM-4.6 uses reasoning_content for thinking models
             content = getattr(msg, 'reasoning_content', None) or msg.content or ""

From 2f3b6eab26e597ea26824cb00235702f7c967e31 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 19:40:57 +0000
Subject: [PATCH 15/19] wire env-gated pseudo tag boost into hybrid_search and
 document REFRAG_PSEUDO_DESCRIBE toggle

---
 .env.example             |  3 +++
 scripts/hybrid_search.py | 28 +++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index 5abf546c..52f6b595 100644
--- a/.env.example
+++ b/.env.example
@@ -117,6 +117,9 @@ REFRAG_ENCODER_MODEL=BAAI/bge-base-en-v1.5
 REFRAG_PHI_PATH=/work/models/refrag_phi_768_to_dmodel.json
 REFRAG_SENSE=heuristic
 
+# Enable index-time pseudo descriptions for micro-chunks (requires REFRAG_DECODER)
+# REFRAG_PSEUDO_DESCRIBE=1
+
 # Llama.cpp sidecar (optional)
 # Docker CPU-only (stable): http://llamacpp:8080
 # Native GPU-accelerated (fast): http://localhost:8081
diff --git a/scripts/hybrid_search.py b/scripts/hybrid_search.py
index d2fe5af8..21046ac5 100644
--- a/scripts/hybrid_search.py
+++ b/scripts/hybrid_search.py
@@ -294,6 +294,10 @@ def _embed_queries_cached(
 IMPLEMENTATION_BOOST = _safe_float(os.environ.get("HYBRID_IMPLEMENTATION_BOOST", "0.2"), 0.2)
 DOCUMENTATION_PENALTY = _safe_float(os.environ.get("HYBRID_DOCUMENTATION_PENALTY", "0.1"), 0.1)
 
+# Modest boost for matches against pseudo/tags produced at index time.
+# Default 0.0 = disabled; set HYBRID_PSEUDO_BOOST>0 to experiment.
+PSEUDO_BOOST = _safe_float(os.environ.get("HYBRID_PSEUDO_BOOST", "0.0"), 0.0)
+
 # Penalize comment-heavy snippets so code (not comments) ranks higher
 COMMENT_PENALTY = _safe_float(os.environ.get("HYBRID_COMMENT_PENALTY", "0.2"), 0.2)
 COMMENT_RATIO_THRESHOLD = _safe_float(os.environ.get("HYBRID_COMMENT_RATIO_THRESHOLD", "0.6"), 0.6)
@@ -929,6 +933,13 @@ def lexical_score(phrases: List[str], md: Dict[str, Any], token_weights: Dict[st
     sym = str(md.get("symbol", "")).lower()
     symp = str(md.get("symbol_path", "")).lower()
     code = str(md.get("code", ""))[:2000].lower()
+    # Optional index-time pseudo/tags enrichment
+    pseudo = str(md.get("pseudo") or "").lower()
+    tags_val = md.get("tags") or []
+    if isinstance(tags_val, list):
+        tags_text = " ".join(str(x) for x in tags_val).lower()
+    else:
+        tags_text = str(tags_val).lower()
     s = 0.0
     for t in tokens:
         if not t:
@@ -940,6 +951,12 @@ def lexical_score(phrases: List[str], md: Dict[str, Any], token_weights: Dict[st
             contrib += 0.6
         if t in code:
             contrib += 1.0
+        # Pseudo/tags signals: gentle, optional boost
+        if PSEUDO_BOOST > 0.0:
+            if pseudo and t in pseudo:
+                contrib += PSEUDO_BOOST
+            if tags_text and t in tags_text:
+                contrib += 0.5 * PSEUDO_BOOST
         if contrib > 0 and token_weights and bm25_weight:
             w = float(token_weights.get(t, 1.0) or 1.0)
             contrib *= (1.0 + float(bm25_weight) * (w - 1.0))
@@ -2068,7 +2085,16 @@ def _bn(p: str) -> str:
     # Lexical + boosts
     timestamps: List[int] = []
     for pid, rec in list(score_map.items()):
-        md = (rec["pt"].payload or {}).get("metadata") or {}
+        payload = rec["pt"].payload or {}
+        base_md = payload.get("metadata") or {}
+        # Merge top-level pseudo/tags into the view passed to lexical_score so
+        # HYBRID_PSEUDO_BOOST can see index-time GLM/llamacpp labels.
+        md = dict(base_md)
+        if "pseudo" in payload:
+            md["pseudo"] = payload["pseudo"]
+        if "tags" in payload:
+            md["tags"] = payload["tags"]
+
         lx = (_AD_LEX_TEXT_W * lexical_score(qlist, md, token_weights=_bm25_tok_w, bm25_weight=_BM25_W)) if _USE_ADAPT else (LEXICAL_WEIGHT * lexical_score(qlist, md, token_weights=_bm25_tok_w, bm25_weight=_BM25_W))
         rec["lx"] += lx
         rec["s"] += lx

From cd2f2b1011a9c5934c42ed9556e8385ff6c7efac Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 19:47:49 +0000
Subject: [PATCH 16/19] Updates extension version

Increments the extension version from 0.1.25 to 0.1.26.
---
 vscode-extension/context-engine-uploader/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vscode-extension/context-engine-uploader/package.json b/vscode-extension/context-engine-uploader/package.json
index c25d0af4..47e29a64 100644
--- a/vscode-extension/context-engine-uploader/package.json
+++ b/vscode-extension/context-engine-uploader/package.json
@@ -2,7 +2,7 @@
   "name": "context-engine-uploader",
   "displayName": "Context Engine Uploader",
   "description": "Runs the Context-Engine remote upload client with a force sync on startup followed by watch mode. Requires Python with pip install requests urllib3 charset_normalizer.",
-  "version": "0.1.25",
+  "version": "0.1.26",
   "publisher": "context-engine",
   "engines": {
     "vscode": "^1.85.0"

From 1ef692db47360b6d1f68894d7e2dbfa84854bc9c Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 20:36:14 +0000
Subject: [PATCH 17/19] vscodeext: Surfaces Qdrant collection hint via config

Drives Qdrant collection hint behavior via `ctx_config.json` instead of hook environment variables.

This change ensures consistent configuration and removes dependency on potentially unreliable hook configurations.
---
 ctx_config.example.json                       |  4 ++-
 .../context-engine-uploader/extension.js      | 31 +++++++++----------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/ctx_config.example.json b/ctx_config.example.json
index e4c97798..81eb779e 100644
--- a/ctx_config.example.json
+++ b/ctx_config.example.json
@@ -12,6 +12,8 @@
   "require_context": true,
   "relevance_gate_enabled": false,
   "min_relevance": 0.1,
-  "rewrite_max_tokens": 420
+  "rewrite_max_tokens": 420,
+  "surface_qdrant_collection_hint": true
+
 }
 
diff --git a/vscode-extension/context-engine-uploader/extension.js b/vscode-extension/context-engine-uploader/extension.js
index 481c8753..deb9d355 100644
--- a/vscode-extension/context-engine-uploader/extension.js
+++ b/vscode-extension/context-engine-uploader/extension.js
@@ -1101,6 +1101,18 @@ async function scaffoldCtxConfigFiles(workspaceDir, collectionName) {
       ctxConfig.require_context = true;
       ctxChanged = true;
     }
+    if (ctxConfig.surface_qdrant_collection_hint === undefined) {
+      let surfaceHintSetting = true;
+      if (uploaderSettings) {
+        try {
+          surfaceHintSetting = !!uploaderSettings.get('surfaceQdrantCollectionHint', true);
+        } catch (error) {
+          log(`Failed to read surfaceQdrantCollectionHint from configuration: ${error instanceof Error ? error.message : String(error)}`);
+        }
+      }
+      ctxConfig.surface_qdrant_collection_hint = surfaceHintSetting;
+      ctxChanged = true;
+    }
     if (ctxConfig.refrag_runtime !== decoderRuntime) {
       ctxConfig.refrag_runtime = decoderRuntime;
       ctxChanged = true;
@@ -1482,9 +1494,9 @@ async function writeClaudeHookConfig(root, commandPath) {
     if (!config.hooks || typeof config.hooks !== 'object') {
       config.hooks = {};
     }
-    // Derive CTX workspace directory and optional hint flags for the hook from extension settings
+    // Derive CTX workspace directory for the hook from extension settings.
+    // Collection hint behavior is now driven by ctx_config.json, not hook env.
     let hookEnv;
-    let surfaceHintEnabled = false;
     try {
       const uploaderConfig = vscode.workspace.getConfiguration('contextEngineUploader');
       const targetPath = (uploaderConfig.get('targetPath') || '').trim();
@@ -1492,19 +1504,9 @@ async function writeClaudeHookConfig(root, commandPath) {
         const resolvedTarget = path.resolve(targetPath);
         hookEnv = { CTX_WORKSPACE_DIR: resolvedTarget };
       }
-      const surfaceHint = uploaderConfig.get('surfaceQdrantCollectionHint', true);
-      const claudeMcpEnabled = uploaderConfig.get('mcpClaudeEnabled', true);
-      surfaceHintEnabled = !!(surfaceHint && claudeMcpEnabled);
-      if (surfaceHintEnabled) {
-        if (!hookEnv) {
-          hookEnv = {};
-        }
-        hookEnv.CTX_SURFACE_COLLECTION_HINT = '1';
-      }
     } catch (error) {
       // Best-effort only; if anything fails, fall back to no extra env
       hookEnv = undefined;
-      surfaceHintEnabled = false;
     }
 
     const hook = {
@@ -1547,11 +1549,6 @@ async function writeClaudeHookConfig(root, commandPath) {
           if (hookEnv) {
             existing.env = { ...existing.env, ...hookEnv };
           }
-          if (!surfaceHintEnabled && Object.prototype.hasOwnProperty.call(existing.env, 'CTX_SURFACE_COLLECTION_HINT')) {
-            delete existing.env.CTX_SURFACE_COLLECTION_HINT;
-          } else if (surfaceHintEnabled) {
-            existing.env.CTX_SURFACE_COLLECTION_HINT = '1';
-          }
           updated = true;
         }
       }

From 95a87467b5cc13c212d6096455e395e8821746e9 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 21:58:20 +0000
Subject: [PATCH 18/19] Adds AI agent usage guide for MCP Qdrant-Indexer

Introduces a guide for AI agents (like Claude) on using the MCP Qdrant-Indexer and Memory tools effectively.

The guide outlines when to use the Qdrant-Indexer versus `grep`, emphasizing semantic understanding, ranked results, and contextual awareness.
It provides practical tips, performance optimization strategies, and anti-patterns to avoid, ensuring efficient and accurate code exploration.
---
 docs/CLAUDE.example.md | 127 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 docs/CLAUDE.example.md

diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md
new file mode 100644
index 00000000..5a95ce43
--- /dev/null
+++ b/docs/CLAUDE.example.md
@@ -0,0 +1,127 @@
+This file is intended for AI agents (Claude, etc.) using the Context‑Engine Qdrant‑Indexer and Memory MCP tools. It encodes project‑specific best practices; adapt it per‑repo.
+
+
+Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep
+
+  Core Decision Rules (for AI agents)
+
+  - Use MCP Qdrant-Indexer when:
+    - You are exploring or don't know exact strings/symbols.
+    - You need semantic or cross-file understanding (relationships, patterns, architecture).
+    - You want ranked results with surrounding context, not just line hits.
+
+  - Use grep when:
+    - You know the exact string/function/variable or error message.
+    - You need fast literal search or are extremely token/latency constrained.
+
+  Quick Heuristics:
+
+  - If you know the exact string → start with grep, then switch to MCP for broader context.
+  - If the question is conceptual/architectural → start with MCP.
+  - If you need rich context/snippets around matches → MCP.
+  - If you just need to confirm existence/location → grep.
+
+  Grep Anti-Patterns:
+
+  # DON'T - Wasteful when semantic search needed
+  grep -r "auth" .                    # → Use MCP: "authentication mechanisms"
+  grep -r "cache" .                   # → Use MCP: "caching strategies"
+  grep -r "error" .                   # → Use MCP: "error handling patterns"
+  grep -r "database" .                # → Use MCP: "database operations"
+
+  # DO - Efficient for exact matches
+  grep -rn "UserAlreadyExists" .      # Specific error class
+  grep -rn "def authenticate_user" .  # Exact function name
+  grep -rn "REDIS_HOST" .            # Exact environment variable
+
+  MCP Tool Patterns:
+
+  # DO - Use concept/keyword-style queries (short natural-language fragments)
+  "input validation mechanisms"
+  "database connection handling"
+  "performance bottlenecks in request path"
+  "places where user sessions are managed"
+  "logging and error reporting patterns"
+
+  MCP Qdrant-Indexer Specific Knobs
+
+  Essential Parameters:
+
+  - limit: Control result count (3-8 for efficiency)
+  - per_path: Limit results per file (1-2 prevents redundancy)
+  - compact=true: Reduces token usage by 60-80%
+  - include_snippet=false: Headers only when speed matters
+  - collection: Target specific codebases for precision
+
+  Performance Optimization:
+
+  - Start with limit=3, compact=true for discovery
+  - Increase to limit=5, include_snippet=true for details
+  - Use language and under filters to narrow scope
+  - Set rerank_enabled=false for faster but less accurate results
+
+  When to Use Advanced Features:
+
+  - rerank_enabled=true: For complex queries needing best relevance
+  - context_lines=5+: When you need implementation details
+  - multiple collections: Cross-repo architectural analysis
+  - symbol filtering: When looking for specific function/class types
+
+  Anti-Patterns to Avoid:
+
+  - Don't use limit=20 with include_snippet=true (token waste)
+  - Don't search without collection specification (noise)
+  - Don't ignore per_path limits (duplicate results from same file)
+  - Don't use context lines for pure discovery (unnecessary tokens)
+
+  Tool Roles Cheat Sheet:
+
+  - repo_search / code_search:
+    - Use for: finding relevant files/spans and inspecting raw code.
+    - Think: "where is X implemented?", "show me usages of Y".
+  - context_search:
+    - Use for: combining code hits with memory/docs when both matter.
+    - Good for: "give me related code plus any notes/docs I wrote".
+  - context_answer:
+    - Use for: natural-language explanations grounded in code, with citations.
+    - Good for: "how do uploads get triggered when files change?", "where is the watcher wired into the indexer?".
+
+  Query Phrasing Tips for context_answer:
+
+  - Prefer behavior/architecture questions:
+    - "How do uploads get triggered when files change?"
+    - "Where is the VS Code file watcher that triggers indexing uploads?"
+  - If you care about a specific file, mention it explicitly:
+    - "What does ingest_code.py do?", "Explain ensureIndexedWatcher in extension.js".
+  - Mentioning a specific filename can bias retrieval to that file; for cross-file wiring
+    questions, prefer behavior-describing queries without filenames.
+  - For very cross-file questions, you can:
+    - First use repo_search to discover key files,
+    - Then call context_answer with a behavior-focused question that doesn't over-specify filenames.
+
+  Remember: the MCP tools themselves expose detailed descriptions and parameter docs.
+  Use those for exact knobs; this guide is about choosing the right tool and shaping good queries.
+
+  MCP Tool Families (for AI agents)
+
+  - Indexer / Qdrant tools:
+    - qdrant_index_root, qdrant_index, qdrant_prune
+    - qdrant_list, qdrant_status
+    - workspace_info, list_workspaces, collection_map
+    - set_session_defaults
+  - Search / QA tools:
+    - repo_search, code_search, context_search, context_answer
+    - search_tests_for, search_config_for, search_callers_for, search_importers_for
+    - change_history_for_path, expand_query
+  - Memory tools:
+    - memory.set_session_defaults, memory.store, memory.find
+
+  Additional behavioral tips:
+
+  - Call set_session_defaults (indexer and memory) early in a session so subsequent
+    calls inherit the right collection without repeating it in every request.
+  - Use context_search with include_memories and per_source_limits when you want
+    blended code + memory results instead of calling repo_search and memory.find
+    separately.
+  - Treat expand_query and the expand flag on context_answer as expensive options:
+    only use them after a normal search/answer attempt failed to find good context.
\ No newline at end of file

From eb412acd67d52419b942728013fe75e7feee7b30 Mon Sep 17 00:00:00 2001
From: Reese <reesevader@hotmail.co.uk>
Date: Tue, 25 Nov 2025 23:17:03 +0000
Subject: [PATCH 19/19] docs(claude example): Clarifies context_answer usage
 and query tips

Refines the documentation for the `context_answer` tool within the MCP framework.

It clarifies its intended use for summarizing modules/tools and answering architecture questions, emphasizing the importance of specifying modules when relevant.

Also, provides guidance on formulating effective queries for better results and highlights the limitations of `context_answer` for low-level debugging, suggesting `repo_search` and direct code reading as alternatives.
---
 docs/CLAUDE.example.md | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/docs/CLAUDE.example.md b/docs/CLAUDE.example.md
index 5a95ce43..ce132d4e 100644
--- a/docs/CLAUDE.example.md
+++ b/docs/CLAUDE.example.md
@@ -83,21 +83,22 @@ Agentic AI Project Rules: When to Use MCP Qdrant-Indexer vs Grep
     - Use for: combining code hits with memory/docs when both matter.
     - Good for: "give me related code plus any notes/docs I wrote".
   - context_answer:
-    - Use for: natural-language explanations grounded in code, with citations.
-    - Good for: "how do uploads get triggered when files change?", "where is the watcher wired into the indexer?".
+    - Use for: short natural-language summaries/explanations of specific modules or tools, grounded in code/docs with citations.
+    - Good for: "What does scripts/standalone_upload_client.py do at a high level?", "Summarize the remote upload client pipeline.".
 
   Query Phrasing Tips for context_answer:
 
-  - Prefer behavior/architecture questions:
-    - "How do uploads get triggered when files change?"
-    - "Where is the VS Code file watcher that triggers indexing uploads?"
+  - Prefer behavior/architecture questions about a single module or tool:
+    - "What does scripts/standalone_upload_client.py do at a high level?"
+    - "Summarize how the remote upload client interacts with the indexer service."
   - If you care about a specific file, mention it explicitly:
     - "What does ingest_code.py do?", "Explain ensureIndexedWatcher in extension.js".
   - Mentioning a specific filename can bias retrieval to that file; for cross-file wiring
     questions, prefer behavior-describing queries without filenames.
-  - For very cross-file questions, you can:
-    - First use repo_search to discover key files,
-    - Then call context_answer with a behavior-focused question that doesn't over-specify filenames.
+  - For very cross-file or multi-part questions, you can:
+    - First use repo_search to discover key files and read critical code directly,
+    - Then call context_answer to summarize behavior, using a behavior-focused question that doesn't over-specify filenames.
+  - Avoid using context_answer as a primary debugger for low-level helper/env behavior; prefer repo_search + direct code reading for detailed semantics.
 
   Remember: the MCP tools themselves expose detailed descriptions and parameter docs.
   Use those for exact knobs; this guide is about choosing the right tool and shaping good queries.