diff --git a/.env.example b/.env.example
index 644358f..6a85307 100644
--- a/.env.example
+++ b/.env.example
@@ -10,20 +10,66 @@
 # Default: databricks
 MODEL_PROVIDER=ollama
 
+# Default model to use (overrides provider-specific defaults)
+# Without this, defaults to a Databricks Claude model regardless of MODEL_PROVIDER.
+# Set this when using Ollama or other local providers to match your installed model.
+# MODEL_DEFAULT=qwen2.5-coder:latest
+
+# Force server-side model configuration, ignoring client requests
+# When enabled, the server will always use MODEL_DEFAULT regardless of what model the client requests
+# Useful when you want to enforce a specific model (e.g., qwen/qwen3-coder-next) despite clients asking for Claude models
+# Default: false (respect client model preferences)
+# ENFORCE_SERVER_MODEL=false
+
 # ==============================================================================
 # Ollama Configuration (Hybrid Routing)
 # ==============================================================================
+#
+# Three supported configurations:
+#
+# 1. LOCAL ONLY (free, runs on your machine):
+#    OLLAMA_ENDPOINT=http://localhost:11434
+#    OLLAMA_MODEL=qwen2.5-coder:latest
+#
+# 2. CLOUD ONLY (no local Ollama needed):
+#    OLLAMA_CLOUD_ENDPOINT=https://ollama.com
+#    OLLAMA_MODEL=glm-4.7:cloud
+#    OLLAMA_API_KEY=your-ollama-cloud-api-key
+#
+# 3. MIXED (local chat + cloud tools, or vice versa):
+#    OLLAMA_ENDPOINT=http://192.168.100.201:11434
+#    OLLAMA_MODEL=qwen3:0.6b
+#    OLLAMA_CLOUD_ENDPOINT=https://ollama.com
+#    OLLAMA_API_KEY=your-ollama-cloud-api-key
+#    TOOL_EXECUTION_MODEL=glm-4.7:cloud
+#
+# OLLAMA_MODEL is REQUIRED — there is no default.
+# At least one of OLLAMA_ENDPOINT or OLLAMA_CLOUD_ENDPOINT is REQUIRED.
+# Cloud models are identified by name: "model:cloud" or "model:tag-cloud".
+# ==============================================================================
 
 # Enable Ollama preference for simple requests
 PREFER_OLLAMA=false
 
-# Ollama model to use (must be compatible with tool calling)
-# Options: qwen2.5-coder:latest, llama3.1, mistral-nemo, nemotron-3-nano:30b-cloud, etc.
+# Ollama model to use (REQUIRED, no default)
+# Options: qwen2.5-coder:latest, llama3.1, glm-4.7:cloud, etc.
 OLLAMA_MODEL=qwen2.5-coder:latest
 
-# Ollama endpoint (default: http://localhost:11434)
+# Ollama local endpoint (omit for cloud-only setups)
 OLLAMA_ENDPOINT=http://localhost:11434
 
+# Ollama cloud endpoint for cloud-hosted models
+# Models with ":cloud" or "-cloud" in the name route here with Authorization header.
+# OLLAMA_CLOUD_ENDPOINT=https://ollama.com
+# OLLAMA_API_KEY=your-ollama-cloud-api-key
+
+# Ollama request timeout in milliseconds (default: 120000)
+# OLLAMA_TIMEOUT_MS=120000
+
+# Ollama keep_alive parameter - how long to keep model loaded in memory
+# Values: -1 (forever), 0 (unload immediately), or duration string (e.g. "5m", "1h")
+# OLLAMA_KEEP_ALIVE=-1
+
 # Ollama embeddings configuration (for Cursor @Codebase semantic search)
 # Embedding models for local, privacy-first semantic search
 # Popular models:
@@ -203,7 +249,12 @@ WEB_SEARCH_ENDPOINT=http://localhost:8888/search
 # Policy Configuration
 POLICY_MAX_STEPS=20
 POLICY_MAX_TOOL_CALLS=12
+# Max tool calls the model can make in a single LLM request (not per turn).
+# Prevents runaway parallel tool calling within one response.
+POLICY_MAX_TOOL_CALLS_PER_REQUEST=12
 
+# Max duration (ms) for a single agent loop turn. Increase for slow models.
+POLICY_MAX_DURATION_MS=120000
 # Tool loop guard - max tool results in conversation before force-terminating
 # Prevents infinite tool loops. Set higher for complex multi-step tasks.
 POLICY_TOOL_LOOP_THRESHOLD=10
@@ -217,6 +268,66 @@ WORKSPACE_INDEX_ENABLED=true
 # - client/passthrough: Return tool calls to CLI for local execution
 TOOL_EXECUTION_MODE=server
 
+# ==============================================================================
+# Tool Execution Provider Configuration
+# ==============================================================================
+
+# Provider to use for tool calling decisions (optional)
+# If set, tool-calling decisions will be routed to this provider
+# while conversation stays with the main provider.
+# This enables using cheap/fast/local models for chat while using
+# reliable models (like Claude Sonnet) for tool calling.
+#
+# Options: databricks, azure-anthropic, openrouter, openai, bedrock, etc.
+# Leave empty to use the main provider for both conversation and tools.
+# TOOL_EXECUTION_PROVIDER=
+
+# Model to use for tool execution (optional)
+# If not set, uses the provider's default model.
+# Examples:
+#   - anthropic/claude-sonnet-4 (for OpenRouter)
+#   - claude-3-5-sonnet-20241022 (for OpenAI-compatible APIs)
+# TOOL_EXECUTION_MODEL=
+
+# Enable comparison mode to call BOTH providers and compare their tool calls
+# When enabled, both the conversation provider and tool execution provider
+# will be called, and their tool calls will be compared. The better set
+# of tool calls will be selected based on quality heuristics.
+# Useful for evaluating which provider gives better tool calls.
+# Default: false (only uses tool execution provider when configured)
+# TOOL_EXECUTION_COMPARE_MODE=false
+
+# ==============================================================================
+# Tool Execution Provider Examples
+# ==============================================================================
+
+# Example 1: Use local Ollama for chat, Claude Sonnet for tool calling
+# This provides fast, private conversation with reliable tool execution.
+#
+# MODEL_PROVIDER=ollama
+# OLLAMA_MODEL=qwen3-coder-next
+# TOOL_EXECUTION_PROVIDER=openrouter
+# TOOL_EXECUTION_MODEL=anthropic/claude-sonnet-4
+# OPENROUTER_API_KEY=your-key-here
+
+# Example 2: Use qwen3-coder-next for conversation, compare with Claude for tools
+# This lets you evaluate tool call quality differences between providers.
+#
+# MODEL_PROVIDER=ollama
+# OLLAMA_MODEL=qwen3-coder-next
+# TOOL_EXECUTION_PROVIDER=openrouter
+# TOOL_EXECUTION_MODEL=anthropic/claude-sonnet-4
+# TOOL_EXECUTION_COMPARE_MODE=true
+# OPENROUTER_API_KEY=your-key-here
+
+# Example 3: Use Databricks for chat, Bedrock Claude for tools
+# This enables cost optimization between different cloud providers.
+#
+# MODEL_PROVIDER=databricks
+# TOOL_EXECUTION_PROVIDER=bedrock
+# TOOL_EXECUTION_MODEL=anthropic.claude-3-5-sonnet-20241022-v2:0
+# AWS_BEDROCK_API_KEY=your-key-here
+
 # Suggestion mode model override
 # Controls which model handles suggestion mode (predicting next user input).
 # Values:
@@ -225,10 +336,23 @@ TOOL_EXECUTION_MODE=server
 #   <model> - Use a specific model (e.g. "llama3.1" for a lighter model)
 SUGGESTION_MODE_MODEL=default
 
+# Topic detection model override
+# Redirects topic classification to a lighter/faster model to reduce GPU load.
+# Values:
+#   default - Use the main model (no change)
+#   skip/none - Skip topic detection entirely (recommended for Ollama to avoid GPU contention)
+#   <model> - Use a specific model (e.g. "llama3.2:1b" for a lightweight classifier)
+TOPIC_DETECTION_MODEL=default
+
 # Enable/disable automatic tool injection for local models
 INJECT_TOOLS_LLAMACPP=true
 INJECT_TOOLS_OLLAMA=true
 
+# Aggressive tool patching: try ALL text-to-tool extraction strategies for any model
+# When false (default), only model-specific strategies are used (e.g. GLM gets bullet-point
+# and fenced-code-block extraction). When true, all strategies are tried for every model.
+# AGGRESSIVE_TOOL_PATCHING=false
+
 # ==============================================================================
 # Semantic Response Cache
 # ==============================================================================
@@ -276,6 +400,24 @@ MEMORY_INJECTION_FORMAT=system
 # Enable automatic extraction
 MEMORY_EXTRACTION_ENABLED=true
 
+# ==============================================================================
+# TOON JSON→TOON Prompt Compression
+# ==============================================================================
+
+# Enable TOON compression for large structured JSON contexts (opt-in)
+# TOON provides 10-39% payload reduction for API calls with large tool outputs
+# Safe fallback: if compression fails, original payload is sent
+TOON_ENABLED=false
+
+# Minimum payload size in bytes before attempting TOON compression
+TOON_MIN_BYTES=4096
+
+# Fail open strategy: if compression fails, send original (true) or fail request (false)
+TOON_FAIL_OPEN=true
+
+# Log compression statistics (useful for monitoring)
+TOON_LOG_STATS=true
+
 # ==============================================================================
 # Token Optimization Settings (60-80% Cost Reduction)
 # ==============================================================================
@@ -294,12 +436,6 @@ TOKEN_BUDGET_WARNING=100000
 TOKEN_BUDGET_MAX=180000
 TOKEN_BUDGET_ENFORCEMENT=true
 
-# TOON JSON->TOON prompt compression (opt-in; for large structured JSON context)
-TOON_ENABLED=false
-TOON_MIN_BYTES=4096
-TOON_FAIL_OPEN=true
-TOON_LOG_STATS=true
-
 # ==============================================================================
 # Smart Tool Selection (Advanced Token Optimization)
 # ==============================================================================
@@ -310,6 +446,49 @@ SMART_TOOL_SELECTION_MODE=heuristic
 # Maximum token budget for tools per request
 SMART_TOOL_SELECTION_TOKEN_BUDGET=2500
 
+# ==============================================================================
+# Tool Needs Classification (LLM-Based)
+# ==============================================================================
+
+# Enable LLM-based tool needs classification
+# When enabled, classifies requests as tool-needed or conversational before routing
+# This reduces token overhead and latency for simple questions and greetings
+# Default: false (disabled)
+# TOOL_NEEDS_CLASSIFICATION_ENABLED=true
+
+# Model to use for classification (lightweight recommended)
+# Options: qwen2.5:1b, llama3.2:1b, or "skip" to disable LLM (whitelist-only)
+# Lightweight models are fast (~100-500ms) and accurate for yes/no classification
+# Default: qwen2.5:1b
+# TOOL_NEEDS_CLASSIFICATION_MODEL=qwen2.5:1b
+
+# Path to default whitelist file with known patterns
+# Whitelist provides fast-path matching for common requests (no LLM call)
+# Default: ./config/tool-whitelist.json
+# TOOL_NEEDS_CLASSIFICATION_WHITELIST=./config/tool-whitelist.json
+
+# Path to user whitelist file (optional, extends default)
+# Create your own patterns in a separate file to avoid conflicts on updates
+# Example: ./config/tool-whitelist-user.json
+# TOOL_NEEDS_CLASSIFICATION_USER_WHITELIST=./config/tool-whitelist-user.json
+
+# Custom shell commands that need tools (comma-separated)
+# Automatically adds both "command" and "command *" patterns to whitelist
+# Example: bd,mycommand,anothercmd
+# Use this for project-specific CLIs like bd (beads), make, cargo, etc.
+# Default: empty
+# TOOL_NEEDS_CLASSIFICATION_CUSTOM_COMMANDS=bd
+
+# Enable result caching to avoid repeated classification
+# Caches results by normalized message content
+# Default: true
+# TOOL_NEEDS_CLASSIFICATION_CACHE_ENABLED=true
+
+# Enable LLM fallback when whitelist doesn't match
+# If false, defaults to "needs tools" when whitelist misses
+# Default: true
+# TOOL_NEEDS_CLASSIFICATION_LLM_ENABLED=true
+
 # ==============================================================================
 # Performance & Security
 # ==============================================================================
@@ -334,6 +513,37 @@ HOT_RELOAD_ENABLED=true
 # Debounce delay in ms (prevents rapid reloads)
 HOT_RELOAD_DEBOUNCE_MS=1000
 
+# ==============================================================================
+# LLM Audit Logging
+# ==============================================================================
+
+# Enable LLM audit logging (default: false)
+# Logs all LLM requests/responses for debugging and analysis
+LLM_AUDIT_ENABLED=false
+
+# Audit log file path
+# LLM_AUDIT_LOG_FILE=./logs/llm-audit.log
+
+# Maximum content length per field (characters) - controls log file size
+# LLM_AUDIT_MAX_CONTENT_LENGTH=5000
+# LLM_AUDIT_MAX_SYSTEM_LENGTH=2000
+# LLM_AUDIT_MAX_USER_LENGTH=3000
+# LLM_AUDIT_MAX_RESPONSE_LENGTH=3000
+
+# Log rotation settings
+# LLM_AUDIT_MAX_FILES=30
+# LLM_AUDIT_MAX_SIZE=100M
+
+# Include annotation metadata in audit logs (default: true)
+# LLM_AUDIT_ANNOTATIONS=true
+
+# Deduplication - reduces log size by referencing repeated content
+# LLM_AUDIT_DEDUP_ENABLED=true
+# LLM_AUDIT_DEDUP_MIN_SIZE=500
+# LLM_AUDIT_DEDUP_CACHE_SIZE=100
+# LLM_AUDIT_DEDUP_SANITIZE=true
+# LLM_AUDIT_DEDUP_SESSION_CACHE=true
+
 # ==============================================================================
 # Quick Start Examples
 # ==============================================================================
diff --git a/README.md b/README.md
index 69d017a..358d93e 100644
--- a/README.md
+++ b/README.md
@@ -287,6 +287,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c
 - ✅ **Streaming Support** - Real-time token streaming for all providers
 - ✅ **Memory System** - Titans-inspired long-term memory with surprise-based filtering
 - ✅ **Tool Calling** - Full tool support with server and passthrough execution modes
+- ✅ **Progress Reporting** - Real-time agent execution tracking with WebSocket broadcasting (port 8765)
 - ✅ **Production Ready** - Battle-tested with 400+ tests, observability, and error resilience
 - ✅ **Node 20-25 Support** - Works with latest Node.js versions including v25
 - ✅ **Semantic Caching** - Cache responses for similar prompts (requires embeddings)
@@ -318,6 +319,76 @@ OLLAMA_EMBEDDINGS_ENDPOINT=http://localhost:11434/api/embeddings
 
 ---
 
+## Progress Reporting
+
+Lynkr emits **real-time progress events** throughout agent execution, enabling comprehensive monitoring of tool execution, model invocations, and reasoning steps. These events are:
+- Emitted as Node.js events internally
+- Automatically broadcasted via WebSocket (port 8765) for external clients
+- Logged for observability
+
+**WebSocket Server (for External Clients):**
+- **Port**: `8765`
+- **Endpoint**: `ws://localhost:8765`
+- **Required Dependency**: `ws` (auto-installed with `npm install`)
+
+**Events Emitted:**
+- `agent_loop_started` / `agentLoopCompleted` - Agent execution lifecycle
+- `agent_loop_step_started` - Individual step in agent reasoning
+- `model_invocation_started` / `modelInvocationCompleted` - LLM calls with provider info
+- `tool_execution_started` / `toolExecutionCompleted` - Tool execution with request/response previews
+
+**Built-in Progress Listener (Python):**
+
+Lynkr includes a ready-to-use Python client that connects to the WebSocket server and displays formatted progress updates:
+
+```bash
+# Install Python dependencies (one-time)
+pip install websockets
+
+# Run the listener in one terminal
+python tools/progress-listener.py
+
+# In another terminal, run Lynkr and Claude Code
+npm start
+claude "Your prompt"
+```
+
+**Features:**
+- 🎨 Color-coded output with timestamps
+- 🔄 Real-time agent hierarchy tracking (shows parent/child agent relationships)
+- ⏱️ Duration and token tracking for model invocations
+- 🛠️ Tool execution details with request/response previews
+- 🌐 Remote monitoring: `python tools/progress-listener.py --host 192.168.1.100`
+- 🔧 Environment variables: `LYNKR_PROGRESS_HOST` and `LYNKR_PROGRESS_PORT`
+
+**Custom Python Client Example:**
+```python
+import json
+import asyncio
+import websockets
+
+async def monitor_progress():
+    uri = "ws://localhost:8765"
+    async with websockets.connect(uri) as websocket:
+        while True:
+            event = await websocket.recv()
+            data = json.loads(event)
+            print(f"Event: {data['type']}")
+            print(f"Data: {json.dumps(data, indent=2)}")
+
+asyncio.run(monitor_progress())
+```
+
+**Use Cases:**
+- Monitor tool execution in real-time during Claude Code CLI runs
+- Track agent reasoning steps and model invocations
+- Build custom dashboards showing agent progress
+- Debug multi-step agentic workflows
+- Troubleshoot subagent spawning and routing
+- Monitor remote Lynkr instances
+
+---
+
 ## Architecture
 
 ```
diff --git a/config/tool-whitelist-user.json.example b/config/tool-whitelist-user.json.example
new file mode 100644
index 0000000..c34df82
--- /dev/null
+++ b/config/tool-whitelist-user.json.example
@@ -0,0 +1,16 @@
+{
+  "version": "1.0",
+  "description": "User whitelist - extends default whitelist without modifying it. Copy to tool-whitelist-user.json and add your custom patterns.",
+  "needsTools": [
+    "bd",
+    "bd *",
+    "make",
+    "make *",
+    "cargo",
+    "cargo *"
+  ],
+  "noTools": [
+    "what does bd do",
+    "how does * work"
+  ]
+}
diff --git a/package-lock.json b/package-lock.json
index 2befc91..38d1706 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -12,7 +12,6 @@
         "@azure/openai": "^2.0.0",
         "@babel/parser": "^7.29.0",
         "@babel/traverse": "^7.29.0",
-        "@toon-format/toon": "^2.1.0",
         "compression": "^1.7.4",
         "diff": "^5.2.0",
         "dotenv": "^16.4.5",
@@ -23,7 +22,8 @@
         "openai": "^6.14.0",
         "pino": "^8.17.2",
         "pino-http": "^8.6.0",
-        "undici": "^6.22.0"
+        "undici": "^6.22.0",
+        "ws": "^8.19.0"
       },
       "bin": {
         "lynkr": "bin/cli.js",
@@ -664,12 +664,6 @@
       "license": "BSD-3-Clause",
       "optional": true
     },
-    "node_modules/@toon-format/toon": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/@toon-format/toon/-/toon-2.1.0.tgz",
-      "integrity": "sha512-JwWptdF5eOA0HaQxbKAzkpQtR4wSWTEfDlEy/y3/4okmOAX1qwnpLZMmtEWr+ncAhTTY1raCKH0kteHhSXnQqg==",
-      "license": "MIT"
-    },
     "node_modules/@types/node": {
       "version": "25.2.2",
       "resolved": "https://registry.npmjs.org/@types/node/-/node-25.2.2.tgz",
@@ -4430,6 +4424,27 @@
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
       "license": "ISC"
     },
+    "node_modules/ws": {
+      "version": "8.19.0",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
+      "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/y18n": {
       "version": "5.0.8",
       "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
diff --git a/package.json b/package.json
index 0590bab..b0f3a2b 100644
--- a/package.json
+++ b/package.json
@@ -14,12 +14,12 @@
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
-    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
-    "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
-    "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
-    "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
-    "test:benchmark": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-benchmark.js",
-    "test:quick": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js",
+    "test:unit": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/tool-execution-provider.test.js test/parsers.test.js test/qwen3-markdown-extraction.test.js test/tool-call-cleaning.test.js",
+    "test:memory": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
+    "test:new-features": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
+    "test:performance": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
+    "test:benchmark": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-benchmark.js",
+    "test:quick": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js",
     "test:all": "npm run test:unit && npm run test:performance && npm run test:benchmark"
   },
   "keywords": [
@@ -47,7 +47,6 @@
     "@azure/openai": "^2.0.0",
     "@babel/parser": "^7.29.0",
     "@babel/traverse": "^7.29.0",
-    "@toon-format/toon": "^2.1.0",
     "compression": "^1.7.4",
     "diff": "^5.2.0",
     "dotenv": "^16.4.5",
@@ -58,7 +57,8 @@
     "openai": "^6.14.0",
     "pino": "^8.17.2",
     "pino-http": "^8.6.0",
-    "undici": "^6.22.0"
+    "undici": "^6.22.0",
+    "ws": "^8.19.0"
   },
   "optionalDependencies": {
     "better-sqlite3": "^12.6.2",
diff --git a/src/agents/executor.js b/src/agents/executor.js
index 39be0ab..8d79056 100644
--- a/src/agents/executor.js
+++ b/src/agents/executor.js
@@ -162,22 +162,14 @@ class SubagentExecutor {
       payload.tools = filteredTools;
     }
 
-    // Determine provider based on model family.
-    // Subagents should use the currently configured MODEL_PROVIDER and avoid
-    // hard-fallbacks to Azure when Azure is not selected/configured.
-    let forceProvider = null;
-    const modelLower = String(payload.model || "").toLowerCase();
-    const isClaudeFamilyModel =
-      modelLower.includes("claude") ||
-      modelLower.includes("sonnet") ||
-      modelLower.includes("haiku") ||
-      modelLower.includes("opus");
-    const isGptFamilyModel = modelLower.includes("gpt");
-
-    if (isClaudeFamilyModel || isGptFamilyModel) {
-      const config = require('../config');
-      // `type` is the canonical key; `provider` kept as legacy fallback.
-      forceProvider = config.modelProvider?.type || config.modelProvider?.provider || null;
+    // Subagents always use the same provider as the main session
+    const config = require('../config');
+    const forceProvider = config.modelProvider?.type || null;
+
+    // For Ollama: agent definitions use Claude names ("haiku" → "claude-3-haiku-20240307")
+    // which Ollama doesn't know. Override to use the configured Ollama model.
+    if (forceProvider === 'ollama' && config.ollama?.model) {
+      payload.model = config.ollama.model;
     }
 
     logger.debug({
diff --git a/src/agents/tool-agent-mapper.js b/src/agents/tool-agent-mapper.js
new file mode 100644
index 0000000..c602c1f
--- /dev/null
+++ b/src/agents/tool-agent-mapper.js
@@ -0,0 +1,64 @@
+/**
+ * Maps tool names from "Invoking tool(s):" text to appropriate subagent types.
+ * Used when models (e.g. GLM-4.7) output tool invocations as plain text
+ * instead of structured tool_calls — we auto-spawn a subagent to do the work.
+ */
+
+// Tool name → agent type mapping
+// "general-purpose" agents can read AND write; "Explore" agents are read-only.
+const TOOL_TO_AGENT = {
+  Read: 'Explore',
+  Grep: 'Explore',
+  Glob: 'Explore',
+  workspace_search: 'Explore',
+  workspace_symbol_search: 'Explore',
+  Edit: 'general-purpose',
+  Write: 'general-purpose',
+  Bash: 'general-purpose',
+  // Unmapped tools default to 'Explore' (safe read-only fallback)
+};
+
+/**
+ * Determine which agent type to spawn based on tool names the model mentioned.
+ * Returns the "strongest" agent needed:
+ *   - If ANY tool maps to 'general-purpose', return 'general-purpose'
+ *   - Otherwise return 'Explore'
+ *
+ * @param {string[]} mentionedTools - e.g. ["Read", "Read", "Grep"]
+ * @returns {string} Agent type name
+ */
+function mapToolsToAgentType(mentionedTools) {
+  if (!Array.isArray(mentionedTools) || mentionedTools.length === 0) {
+    return 'Explore'; // safe default
+  }
+
+  for (const tool of mentionedTools) {
+    if (TOOL_TO_AGENT[tool] === 'general-purpose') {
+      return 'general-purpose';
+    }
+  }
+
+  return 'Explore';
+}
+
+/**
+ * Build a task prompt for the subagent that will fulfil the model's intent.
+ *
+ * @param {string} userText  - The last user message (what they asked)
+ * @param {string} modelText - The model's raw text response (includes "Invoking tool(s):…")
+ * @param {string[]} mentionedTools - Parsed tool names from the model text
+ * @returns {string} Prompt to pass to spawnAgent()
+ */
+function buildSubagentPrompt(userText, modelText, mentionedTools) {
+  const toolList = [...new Set(mentionedTools)].join(', ');
+  return [
+    `The user asked:\n${userText}`,
+    '',
+    `The model intended to use these tools: ${toolList}`,
+    '',
+    'Complete this task using the tools listed above.',
+    'Return a concise summary of your findings or actions.',
+  ].join('\n');
+}
+
+module.exports = { mapToolsToAgentType, buildSubagentPrompt, TOOL_TO_AGENT };
diff --git a/src/api/health.js b/src/api/health.js
index a190b3d..8bfb55f 100644
--- a/src/api/health.js
+++ b/src/api/health.js
@@ -195,13 +195,16 @@ async function checkDatabricks() {
  */
 async function checkOllama() {
   try {
-    if (!config.ollama?.endpoint) {
+    if (!config.ollama?.endpoint && !config.ollama?.cloudEndpoint) {
       return { healthy: true, note: "No Ollama endpoint configured" };
     }
 
-    const endpoint = `${config.ollama.endpoint}/api/tags`;
+    const { getOllamaHeaders, getOllamaEndpointForModel } = require("../clients/ollama-utils");
+    const baseEndpoint = config.ollama.endpoint || getOllamaEndpointForModel(config.ollama.model);
+    const endpoint = `${baseEndpoint}/api/tags`;
     const response = await fetch(endpoint, {
       method: "GET",
+      headers: getOllamaHeaders(),
       signal: AbortSignal.timeout(5000),
     });
 
diff --git a/src/api/middleware/logging.js b/src/api/middleware/logging.js
index e53faee..dc72b03 100644
--- a/src/api/middleware/logging.js
+++ b/src/api/middleware/logging.js
@@ -12,26 +12,92 @@ function maskHeaders(headers = {}) {
   return clone;
 }
 
-const loggingMiddleware = pinoHttp({
+const baseLoggingMiddleware = pinoHttp({
   logger,
-  customProps: (req) => ({
+  autoLogging: false, // Disable automatic logging so we can log manually with bodies
+  customProps: (req, res) => ({
     sessionId: req.sessionId ?? null,
   }),
-  customLogLevel: (req, res, err) => {
-    if (err || res.statusCode >= 500) return "error";
-    if (res.statusCode >= 400) return "warn";
-    return "info";
-  },
-  wrapSerializers: true,
-  serializers: {
-    req(req) {
-      return {
+});
+
+// Wrapper middleware to capture and log full request/response bodies
+function loggingMiddleware(req, res, next) {
+  const startTime = Date.now();
+
+  // Log request with full body immediately
+  logger.info({
+    sessionId: req.sessionId ?? null,
+    req: {
+      method: req.method,
+      url: req.url,
+      headers: maskHeaders(req.headers),
+    },
+    requestBody: req.body, // Full request body without truncation
+  }, 'request started');
+
+  // Intercept res.write for streaming responses
+  const originalWrite = res.write;
+  const chunks = [];
+  res.write = function (chunk) {
+    if (chunk) {
+      chunks.push(Buffer.from(chunk));
+    }
+    return originalWrite.apply(this, arguments);
+  };
+
+  // Intercept res.send to capture the body
+  const originalSend = res.send;
+  res.send = function (body) {
+    res._capturedBody = body;
+
+    // Parse if it's a JSON string for better logging
+    if (typeof body === 'string') {
+      try {
+        res._capturedBody = JSON.parse(body);
+      } catch (e) {
+        res._capturedBody = body;
+      }
+    }
+
+    return originalSend.call(this, body);
+  };
+
+  // Log response when finished
+  res.on('finish', () => {
+    const responseTime = Date.now() - startTime;
+
+    // Capture streaming body if not already captured via send()
+    if (chunks.length > 0 && !res._capturedBody) {
+      const fullBody = Buffer.concat(chunks).toString('utf8');
+      res._capturedBody = {
+        type: 'stream',
+        contentType: res.getHeader('content-type'),
+        size: fullBody.length,
+        preview: fullBody.substring(0, 1000)
+      };
+    }
+
+    const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info';
+
+    logger[logLevel]({
+      sessionId: req.sessionId ?? null,
+      req: {
         method: req.method,
         url: req.url,
         headers: maskHeaders(req.headers),
-      };
-    },
-  },
-});
+      },
+      res: {
+        statusCode: res.statusCode,
+        headers: res.getHeaders ? res.getHeaders() : res.headers,
+      },
+      requestBody: req.body, // Full request body without truncation
+      responseBody: res._capturedBody, // Full response body without truncation
+      responseTime,
+    }, 'request completed');
+  });
+
+  // Still call base middleware to set up req.log
+  baseLoggingMiddleware(req, res, next);
+}
 
 module.exports = loggingMiddleware;
diff --git a/src/api/middleware/request-logging.js b/src/api/middleware/request-logging.js
index 8352e1a..cf2709e 100644
--- a/src/api/middleware/request-logging.js
+++ b/src/api/middleware/request-logging.js
@@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) {
   // Add to response headers
   res.setHeader("X-Request-ID", requestId);
 
-  // Log request start
+// Log request start with full body
   logger.info(
     {
       requestId,
       method: req.method,
       path: req.path || req.url,
       query: req.query,
+      body: req.body, // Full request body without truncation
       ip: req.ip || req.socket.remoteAddress,
       userAgent: req.headers["user-agent"],
     },
@@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) {
   res.send = function (body) {
     const duration = Date.now() - startTime;
 
-    // Log request completion
+    // Parse body if it's a string
+    let responseBody = body;
+    if (typeof body === 'string') {
+      try {
+        responseBody = JSON.parse(body);
+      } catch (e) {
+        // Keep as string if not JSON
+        responseBody = body;
+      }
+    }
+
+    // Log request completion with full request and response bodies
     logger.info(
       {
         requestId,
@@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) {
         status: res.statusCode,
         duration,
         contentLength: res.getHeader("content-length"),
+        requestBody: req.body, // Full request body for reference
+        responseBody, // Full response body without truncation
       },
       "Request completed"
     );
diff --git a/src/api/openai-router.js b/src/api/openai-router.js
index ab283e7..c0a7f1b 100644
--- a/src/api/openai-router.js
+++ b/src/api/openai-router.js
@@ -727,11 +727,11 @@ function getConfiguredProviders() {
   }
 
   // Check Ollama
-  if (config.ollama?.endpoint) {
+  if (config.ollama?.endpoint || config.ollama?.cloudEndpoint) {
     providers.push({
       name: "ollama",
       type: "ollama",
-      models: [config.ollama.model || "qwen2.5-coder:7b"]
+      models: [config.ollama.model || "unknown"]
     });
   }
 
@@ -1026,11 +1026,10 @@ async function generateOllamaEmbeddings(inputs, embeddingConfig) {
     const input = inputs[i];
 
     try {
+      const { getOllamaHeaders } = require("../clients/ollama-utils");
       const response = await fetch(endpoint, {
         method: "POST",
-        headers: {
-          "Content-Type": "application/json"
-        },
+        headers: getOllamaHeaders(),
         body: JSON.stringify({
           model: model,
           prompt: input
diff --git a/src/api/providers-handler.js b/src/api/providers-handler.js
index 9b85848..bb3174c 100644
--- a/src/api/providers-handler.js
+++ b/src/api/providers-handler.js
@@ -125,14 +125,14 @@ function getConfiguredProviders() {
   }
 
   // Check Ollama
-  if (config.ollama?.endpoint) {
+  if (config.ollama?.endpoint || config.ollama?.cloudEndpoint) {
     providers.push({
       name: "ollama",
       type: "ollama",
-      baseUrl: config.ollama.endpoint,
+      baseUrl: config.ollama.endpoint || config.ollama.cloudEndpoint,
       enabled: true,
       models: [
-        { id: config.ollama.model || "qwen2.5-coder:7b", name: "Configured Model" }
+        { id: config.ollama.model || "unknown", name: "Configured Model" }
       ]
     });
   }
diff --git a/src/api/router.js b/src/api/router.js
index b3ed198..95565e1 100644
--- a/src/api/router.js
+++ b/src/api/router.js
@@ -2,11 +2,13 @@ const express = require("express");
 const { processMessage } = require("../orchestrator");
 const { getSession } = require("../sessions");
 const metrics = require("../metrics");
+const config = require("../config");
 const { createRateLimiter } = require("./middleware/rate-limiter");
 const openaiRouter = require("./openai-router");
 const providersRouter = require("./providers-handler");
 const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
 const { validateCwd } = require("../workspace");
+const logger = require("../logger");
 
 const router = express.Router();
 
@@ -121,6 +123,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
     const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream);
     const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0;
 
+    logger.info({
+      sessionId: req.headers['x-claude-session-id'],
+      wantsStream,
+      hasTools,
+      willUseStreamingPath: wantsStream || hasTools
+    }, "=== REQUEST ROUTING DECISION ===");
+
     // Analyze complexity for routing headers (Phase 3)
     const complexity = analyzeComplexity(req.body);
     const routingHeaders = getRoutingHeaders({
@@ -243,6 +252,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
       const msg = result.body;
 
+      // Surface error responses (e.g. tool_call_loop) as visible text
+      if (msg.error && !msg.content) {
+        const errorText = msg.error.message || msg.message || JSON.stringify(msg.error);
+        msg.content = [{ type: "text", text: `⚠️ ${errorText}` }];
+        msg.stop_reason = "end_turn";
+      }
+
       // 1. message_start
       res.write(`event: message_start\n`);
       res.write(`data: ${JSON.stringify({
@@ -338,6 +354,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
     // Legacy streaming wrapper (for tool-based requests that requested streaming)
     if (wantsStream && hasTools) {
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        pathType: 'legacy_streaming_wrapper',
+        wantsStream,
+        hasTools
+      }, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ===");
+
       metrics.recordStreamingStart();
       res.set({
         "Content-Type": "text/event-stream",
@@ -359,6 +382,20 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       // Use proper Anthropic SSE format
       const msg = result.body;
 
+      // Surface error responses (e.g. tool_call_loop) as visible text
+      if (msg.error && !msg.content) {
+        const errorText = msg.error.message || msg.message || JSON.stringify(msg.error);
+        msg.content = [{ type: "text", text: `⚠️ ${errorText}` }];
+        msg.stop_reason = "end_turn";
+      }
+
+      logger.info({
+        sessionId: req.headers['x-claude-session-id'],
+        eventType: 'message_start',
+        streamingWithTools: true,
+        hasContent: !!(msg.content && msg.content.length > 0)
+      }, "=== SENDING SSE MESSAGE_START ===");
+
       // 1. message_start
       res.write(`event: message_start\n`);
       res.write(`data: ${JSON.stringify({
@@ -419,9 +456,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
 
           res.write(`event: content_block_stop\n`);
           res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+        } else if (block.type === "tool_result") {
+          // === TOOL_RESULT SSE STREAMING - ENTERED ===
+          logger.info({
+            blockIndex: i,
+            blockType: block.type,
+            toolUseId: block.tool_use_id,
+            contentType: typeof block.content,
+            contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ===");
+
+          // Stream tool_result blocks so CLI can display actual tool output
+          res.write(`event: content_block_start\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_start",
+            index: i,
+            content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" }
+          })}\n\n`);
+
+          // Stream the actual content
+          const content = typeof block.content === 'string'
+            ? block.content
+            : JSON.stringify(block.content);
+
+          logger.info({
+            blockIndex: i,
+            contentLength: content.length,
+            contentPreview: content.substring(0, 200)
+          }, "=== SSE: STREAMING TOOL_RESULT CONTENT ===");
+
+          res.write(`event: content_block_delta\n`);
+          res.write(`data: ${JSON.stringify({
+            type: "content_block_delta",
+            index: i,
+            delta: { type: "tool_result_delta", content: content }
+          })}\n\n`);
+
+          res.write(`event: content_block_stop\n`);
+          res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+
+          // === TOOL_RESULT SSE STREAMING - COMPLETED ===
+          logger.info({
+            blockIndex: i,
+            toolUseId: block.tool_use_id
+          }, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ===");
         }
       }
-
       // 3. message_delta with stop_reason
       res.write(`event: message_delta\n`);
       res.write(`data: ${JSON.stringify({
@@ -446,6 +526,26 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       }
     });
 
+    // Add tool execution provider headers
+    if (config.toolExecutionProvider) {
+      res.setHeader('X-Tool-Execution-Provider', config.toolExecutionProvider);
+      if (config.toolExecutionModel) {
+        res.setHeader('X-Tool-Execution-Model', config.toolExecutionModel);
+      }
+      if (config.toolExecutionCompareMode) {
+        res.setHeader('X-Tool-Execution-Compare-Mode', 'true');
+      }
+    }
+
+    // Add tool call comparison headers if available
+    if (result.toolCallComparison) {
+      res.setHeader('X-Tool-Call-Selected-Provider', result.toolCallComparison.selectedProvider);
+      res.setHeader('X-Tool-Call-Selection-Reason', result.toolCallComparison.reason);
+      if (result.toolCallComparison.scores) {
+        res.setHeader('X-Tool-Call-Scores', JSON.stringify(result.toolCallComparison.scores));
+      }
+    }
+
     if (result.headers) {
       Object.entries(result.headers).forEach(([key, value]) => {
         if (value !== undefined) {
@@ -454,6 +554,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       });
     }
 
+
+    // DIAGNOSTIC: Log response being sent to client
+    logger.info({
+      status: result.status,
+      hasBody: !!result.body,
+      bodyKeys: result.body ? Object.keys(result.body) : [],
+      bodyType: typeof result.body,
+      contentLength: result.body ? JSON.stringify(result.body).length : 0
+    }, "=== SENDING RESPONSE TO CLIENT ===");
+
     metrics.recordResponse(result.status);
     res.status(result.status).send(result.body);
   } catch (error) {
diff --git a/src/budget/index.js b/src/budget/index.js
index ca7294e..ae1941e 100644
--- a/src/budget/index.js
+++ b/src/budget/index.js
@@ -11,14 +11,13 @@ const logger = require('../logger');
 class BudgetManager {
   constructor(options = {}) {
     this.enabled = options.enabled !== false;
-    let dbPath = null;
     if (!this.enabled || !Database) {
       this.enabled = false;
       return;
     }
 
+    const dbPath = path.join(process.cwd(), 'data', 'budgets.db');
     try {
-      dbPath = path.join(process.cwd(), 'data', 'budgets.db');
       const dbDir = path.dirname(dbPath);
 
       if (!fs.existsSync(dbDir)) {
@@ -27,14 +26,13 @@ class BudgetManager {
 
       this.db = new Database(dbPath);
       this.initDatabase();
-      logger.info({ dbPath }, 'Budget manager initialized');
     } catch (err) {
       logger.warn({ err: err.message }, "BudgetManager: better-sqlite3 not available");
       this.enabled = false;
       return;
     }
 
-   
+    logger.info({ dbPath }, 'Budget manager initialized');
   }
 
   initDatabase() {
diff --git a/src/cache/embeddings.js b/src/cache/embeddings.js
index 4edb974..3691aad 100644
--- a/src/cache/embeddings.js
+++ b/src/cache/embeddings.js
@@ -16,12 +16,13 @@ const logger = require('../logger');
  * @returns {Promise<number[]>} - Embedding vector
  */
 async function generateOllamaEmbedding(text) {
+  const { getOllamaHeaders } = require("../clients/ollama-utils");
   const endpoint = config.ollama?.embeddingsEndpoint || 'http://localhost:11434/api/embeddings';
   const model = config.ollama?.embeddingsModel || 'nomic-embed-text';
 
   const response = await fetch(endpoint, {
     method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
+    headers: getOllamaHeaders(),
     body: JSON.stringify({
       model,
       prompt: text,
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index 9b536cd..fe44927 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -11,6 +11,7 @@ const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils");
 const {
   detectModelFamily
 } = require("./bedrock-utils");
+const { getContextWindow } = require("../providers/context-window");
 
 
 
@@ -181,7 +182,7 @@ async function invokeDatabricks(body) {
   const databricksBody = { ...body };
 
   // Inject standard tools if client didn't send any (passthrough mode)
-  if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0)) {
     databricksBody.tools = STANDARD_TOOLS;
     logger.info({
       injectedToolCount: STANDARD_TOOLS.length,
@@ -222,7 +223,7 @@ async function invokeAzureAnthropic(body) {
   }
 
   // Inject standard tools if client didn't send any (passthrough mode)
-  if (!Array.isArray(body.tools) || body.tools.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(body.tools) || body.tools.length === 0)) {
     body.tools = STANDARD_TOOLS;
     logger.info({
       injectedToolCount: STANDARD_TOOLS.length,
@@ -244,14 +245,20 @@ async function invokeAzureAnthropic(body) {
 }
 
 async function invokeOllama(body) {
-  if (!config.ollama?.endpoint) {
-    throw new Error("Ollama endpoint is not configured.");
+  if (!config.ollama?.endpoint && !config.ollama?.cloudEndpoint) {
+    throw new Error("Ollama endpoint is not configured. Set OLLAMA_ENDPOINT or OLLAMA_CLOUD_ENDPOINT.");
   }
 
-  const { convertAnthropicToolsToOllama, checkOllamaToolSupport } = require("./ollama-utils");
+  const { convertAnthropicToolsToOllama, checkOllamaToolSupport, getOllamaHeaders, getOllamaEndpointForModel } = require("./ollama-utils");
 
-  const endpoint = `${config.ollama.endpoint}/api/chat`;
-  const headers = { "Content-Type": "application/json" };
+  // Resolve the target model FIRST so we can derive the correct endpoint + headers
+  const resolvedModel = body._suggestionModeModel
+    || (body._requestMode === 'tool_execution' && body.model ? body.model : null)
+    || config.ollama.model;
+
+  const baseEndpoint = getOllamaEndpointForModel(resolvedModel);
+  const endpoint = `${baseEndpoint}/api/chat`;
+  const headers = getOllamaHeaders(resolvedModel);
 
   // Convert Anthropic messages format to Ollama format
   // Ollama expects content as string, not content blocks array
@@ -308,14 +315,20 @@ async function invokeOllama(body) {
     }, 'Ollama: Removed consecutive duplicate roles from message sequence');
   }
 
+  // Get context window size so Ollama doesn't silently truncate at its default (2048-4096).
+  // Without this, system prompt + tool schemas can exceed the default and get silently dropped.
+  const ctxWindow = await getContextWindow();
+  const numCtx = ctxWindow > 0 ? ctxWindow : 65536;
+
   const ollamaBody = {
-    model: body._suggestionModeModel || config.ollama.model,
+    model: resolvedModel,
     messages: deduplicated,
     stream: false,  // Force non-streaming for Ollama - streaming format conversion not yet implemented
     options: {
       temperature: body.temperature ?? 0.7,
       num_predict: body.max_tokens ?? 4096,
       top_p: body.top_p ?? 1.0,
+      num_ctx: numCtx,
     },
   };
 
@@ -331,7 +344,7 @@ async function invokeOllama(body) {
   }
 
   // Check if model supports tools FIRST (before wasteful injection)
-  const supportsTools = await checkOllamaToolSupport(config.ollama.model);
+  const supportsTools = await checkOllamaToolSupport(ollamaBody.model);
 
   // Inject standard tools if client didn't send any (passthrough mode)
   let toolsToSend = body.tools;
@@ -342,7 +355,7 @@ async function invokeOllama(body) {
   if (!supportsTools) {
     // Model doesn't support tools - don't inject them
     toolsToSend = null;
-  } else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
+  } else if (injectToolsOllama && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Model supports tools and none provided - inject them
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -370,14 +383,72 @@ async function invokeOllama(body) {
   }
 
   logger.info({
-    model: config.ollama.model,
+    model: resolvedModel,
+    endpoint: baseEndpoint,
     toolCount,
     toolsInjected,
     supportsTools,
+    num_ctx: numCtx,
     toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
-  }, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
+  }, `=== Ollama STANDARD TOOLS INJECTION for ${resolvedModel} === ${logMessage}`);
+
+  // Try the request - if it fails with model errors, try to load the model
+  try {
+    const result = await performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
+
+    // Check for Ollama-specific errors in the response
+    if (!result.ok && result.json?.error) {
+      const errorMsg = result.json.error.toLowerCase();
+
+      // Check if it's a model-not-loaded error
+      if (errorMsg.includes('model') && (errorMsg.includes('not found') || errorMsg.includes('not loaded') || errorMsg.includes('unavailable'))) {
+        logger.warn({
+          model: resolvedModel,
+          error: result.json.error
+        }, "Ollama model error detected, attempting on-demand load");
+
+        // Try to ensure model is ready
+        const { ensureModelReady } = require('./ollama-startup');
+        const loadResult = await ensureModelReady(
+          baseEndpoint,
+          resolvedModel,
+          config.ollama.keepAlive,
+          false // on-demand, not startup
+        );
 
-  return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
+        if (!loadResult.ready) {
+          // Model load failed - enhance error message
+          const enhancedError = {
+            ...result,
+            json: {
+              error: loadResult.error || result.json.error,
+              type: 'model_unavailable',
+              originalError: result.json.error
+            }
+          };
+          return enhancedError;
+        }
+
+        // Model loaded successfully - retry the original request
+        logger.info({ model: resolvedModel }, "Model loaded, retrying request");
+        return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
+      }
+    }
+
+    return result;
+  } catch (err) {
+    // Network/connection errors
+    if (err.code === 'ECONNREFUSED' || err.message.includes('ECONNREFUSED')) {
+      logger.error({
+        endpoint: baseEndpoint,
+        error: err.message
+      }, "Ollama connection refused");
+
+      throw new Error(`Ollama service unreachable at ${baseEndpoint}. Is it running?`);
+    }
+
+    throw err;
+  }
 }
 
 async function invokeOpenRouter(body) {
@@ -422,7 +493,7 @@ async function invokeOpenRouter(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -503,7 +574,7 @@ async function invokeAzureOpenAI(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -854,7 +925,7 @@ async function invokeOpenAI(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
@@ -956,7 +1027,7 @@ async function invokeLlamaCpp(body) {
   let toolsInjected = false;
 
   const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false";
-  if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
+  if (injectToolsLlamacpp && !body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1039,7 +1110,7 @@ async function invokeLMStudio(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1086,7 +1157,7 @@ async function invokeBedrock(body) {
   let toolsToSend = body.tools;
   let toolsInjected = false;
 
-  if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
+  if (!body._noToolInjection && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
     toolsToSend = STANDARD_TOOLS;
     toolsInjected = true;
     logger.info({
@@ -1370,7 +1441,7 @@ async function invokeZai(body) {
     zaiBody.model = mappedModel;
 
     // Inject standard tools if client didn't send any (passthrough mode)
-    if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) {
+    if (!body._noToolInjection && (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0)) {
       zaiBody.tools = STANDARD_TOOLS;
       logger.info({
         injectedToolCount: STANDARD_TOOLS.length,
@@ -1821,6 +1892,9 @@ async function invokeModel(body, options = {}) {
   const registry = getCircuitBreakerRegistry();
   const healthTracker = getHealthTracker();
 
+  // Extract call purpose from options (conversation or tool_execution)
+  const callPurpose = options.callPurpose || 'conversation';
+
   // Analyze complexity and determine provider
   const complexityAnalysis = analyzeComplexity(body);
   const initialProvider = options.forceProvider ?? determineProvider(body);
@@ -1829,6 +1903,7 @@ async function invokeModel(body, options = {}) {
   // Build routing decision object for response headers
   const routingDecision = {
     provider: initialProvider,
+    callPurpose: callPurpose,
     score: complexityAnalysis.score,
     threshold: complexityAnalysis.threshold,
     mode: complexityAnalysis.mode,
@@ -1912,6 +1987,7 @@ async function invokeModel(body, options = {}) {
     return {
       ...result,
       actualProvider: initialProvider,
+      callPurpose: callPurpose,
       routingDecision,
     };
 
@@ -2004,6 +2080,7 @@ async function invokeModel(body, options = {}) {
       return {
         ...fallbackResult,
         actualProvider: fallbackProvider,
+        callPurpose: callPurpose,
         routingDecision: {
           ...routingDecision,
           provider: fallbackProvider,
diff --git a/src/clients/ollama-startup.js b/src/clients/ollama-startup.js
index 4e3565f..9f04174 100644
--- a/src/clients/ollama-startup.js
+++ b/src/clients/ollama-startup.js
@@ -1,120 +1,393 @@
 const config = require("../config");
 const logger = require("../logger");
+const { getOllamaHeaders, isCloudModel, getOllamaEndpointForModel } = require("./ollama-utils");
 
 const POLL_INTERVAL_MS = 5000;  // 5 seconds
-const MAX_WAIT_MS = 60000;      // 60 seconds
+const MAX_WAIT_MS = parseInt(process.env.OLLAMA_STARTUP_TIMEOUT_MS || "300000", 10); // 5 minutes default
 
 /**
- * Wait for Ollama server to be ready and model to be loaded.
- * Only runs when Ollama is the configured provider.
- *
- * @returns {Promise<boolean>} true if Ollama is ready, false if timeout
+ * Check if Ollama server is reachable
+ * @returns {Promise<boolean>}
  */
-async function waitForOllama() {
-  const endpoint = config.ollama?.endpoint;
-  const model = config.ollama?.model;
+async function checkServerReachable(endpoint) {
+  try {
+    const response = await fetch(`${endpoint}/api/tags`, {
+      headers: getOllamaHeaders(),
+      signal: AbortSignal.timeout(5000)
+    });
+    return response.ok;
+  } catch (err) {
+    return false;
+  }
+}
 
-  if (!endpoint) {
-    return true;
+/**
+ * Check if model exists locally (downloaded)
+ * @returns {Promise<{exists: boolean, models: string[]}>}
+ */
+async function checkModelExists(endpoint, model) {
+  try {
+    const response = await fetch(`${endpoint}/api/tags`, {
+      headers: getOllamaHeaders(),
+      signal: AbortSignal.timeout(5000)
+    });
+
+    if (!response.ok) {
+      return { exists: false, models: [] };
+    }
+
+    const data = await response.json();
+    const models = data.models || [];
+    const modelNames = models.map(m => m.name);
+
+    const exists = modelNames.some(name =>
+      name === model || name.startsWith(`${model}:`)
+    );
+
+    return { exists, models: modelNames };
+  } catch (err) {
+    logger.debug({ error: err.message }, "Failed to check model existence");
+    return { exists: false, models: [] };
   }
+}
 
-  console.log(`[Ollama] Waiting for server at ${endpoint}...`);
-  console.log(`[Ollama] Model: ${model}`);
+/**
+ * Check if model is currently loaded in memory
+ * @returns {Promise<boolean>}
+ */
+async function checkModelLoaded(endpoint, model) {
+  try {
+    const response = await fetch(`${endpoint}/api/ps`, {
+      headers: getOllamaHeaders(),
+      signal: AbortSignal.timeout(5000)
+    });
+
+    if (!response.ok) {
+      return false;
+    }
+
+    const data = await response.json();
+    const loadedModels = data.models || [];
+
+    return loadedModels.some(m =>
+      m.name === model || m.name.startsWith(`${model}:`)
+    );
+  } catch (err) {
+    logger.debug({ error: err.message }, "Failed to check if model is loaded");
+    return false;
+  }
+}
+
+/**
+ * Pull (download) a model from Ollama registry
+ * @returns {Promise<{success: boolean, error?: string}>}
+ */
+async function pullModel(endpoint, model) {
+  console.log(`[Ollama] Model "${model}" not found locally, pulling from registry...`);
+  logger.info({ model }, "Pulling Ollama model");
+
+  try {
+    const response = await fetch(`${endpoint}/api/pull`, {
+      method: "POST",
+      headers: getOllamaHeaders(),
+      body: JSON.stringify({ name: model, stream: false }),
+      signal: AbortSignal.timeout(300000) // 5 minutes for model download
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      let errorMsg = "Unknown error";
+
+      try {
+        const errorData = JSON.parse(errorText);
+        errorMsg = errorData.error || errorText;
+      } catch {
+        errorMsg = errorText;
+      }
+
+      // Parse specific error cases
+      if (errorMsg.includes("not found") || errorMsg.includes("does not exist")) {
+        return {
+          success: false,
+          error: `Model "${model}" not found in Ollama registry. Check available models at: https://ollama.com/library`
+        };
+      } else if (errorMsg.includes("connect") || errorMsg.includes("ECONNREFUSED")) {
+        return {
+          success: false,
+          error: "Cannot pull model: Ollama service unreachable"
+        };
+      } else if (errorMsg.includes("disk") || errorMsg.includes("space")) {
+        return {
+          success: false,
+          error: "Model pull failed: insufficient disk space"
+        };
+      } else if (errorMsg.includes("permission")) {
+        return {
+          success: false,
+          error: `Model pull failed: permission denied. Try: sudo ollama pull ${model}`
+        };
+      } else if (errorMsg.includes("network") || errorMsg.includes("timeout")) {
+        return {
+          success: false,
+          error: "Model pull failed: network error. Check internet connection"
+        };
+      }
+
+      return {
+        success: false,
+        error: `Model pull failed: ${errorMsg}`
+      };
+    }
+
+    console.log(`[Ollama] Model "${model}" pulled successfully`);
+    logger.info({ model }, "Ollama model pulled successfully");
+    return { success: true };
+
+  } catch (err) {
+    if (err.name === "AbortError") {
+      return {
+        success: false,
+        error: "Model pull timeout (5 minutes). Model may be too large or connection too slow."
+      };
+    }
+    return {
+      success: false,
+      error: `Model pull failed: ${err.message}`
+    };
+  }
+}
+
+/**
+ * Load a model into memory by sending a simple message
+ * @returns {Promise<{success: boolean, error?: string}>}
+ */
+async function loadModel(endpoint, model, keepAlive) {
+  console.log(`[Ollama] Loading model "${model}" into memory...`);
+  logger.info({ model }, "Loading Ollama model");
+
+  try {
+    const body = {
+      model,
+      messages: [{ role: "user", content: "hi" }],
+      stream: false
+    };
+
+    // Use keep_alive setting if configured
+    if (keepAlive !== undefined) {
+      body.keep_alive = /^-?\d+$/.test(keepAlive)
+        ? parseInt(keepAlive, 10)
+        : keepAlive;
+    }
+
+    const response = await fetch(`${endpoint}/api/chat`, {
+      method: "POST",
+      headers: getOllamaHeaders(model),
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(120000) // 2 minutes for model load
+    });
 
+    if (!response.ok) {
+      const errorText = await response.text();
+      return {
+        success: false,
+        error: `Model load failed: ${errorText}`
+      };
+    }
+
+    console.log(`[Ollama] Model "${model}" loaded successfully`);
+    logger.info({ model }, "Ollama model loaded into memory");
+    return { success: true };
+
+  } catch (err) {
+    if (err.name === "AbortError") {
+      return {
+        success: false,
+        error: "Model load timeout (2 minutes). Model may be very large."
+      };
+    }
+    return {
+      success: false,
+      error: `Model load failed: ${err.message}`
+    };
+  }
+}
+
+/**
+ * Ensure Ollama model is ready (exists, pulled if needed, loaded)
+ * Can be called at startup or on-demand
+ *
+ * @param {boolean} isStartup - true if called during server startup, false for on-demand
+ * @returns {Promise<{ready: boolean, error?: string}>}
+ */
+async function ensureModelReady(endpoint, model, keepAlive, isStartup = true) {
   const startTime = Date.now();
+  const maxWaitMs = isStartup ? MAX_WAIT_MS : 180000; // 3 minutes for on-demand
   let attempt = 0;
 
-  while (Date.now() - startTime < MAX_WAIT_MS) {
+  const cloud = isCloudModel(model);
+
+  while (Date.now() - startTime < maxWaitMs) {
     attempt++;
     const elapsed = Math.round((Date.now() - startTime) / 1000);
 
-    try {
-      // Check if server is reachable
-      const tagsResponse = await fetch(`${endpoint}/api/tags`, {
-        signal: AbortSignal.timeout(5000)
-      });
+    // Cloud models skip local-only checks (server reachable, model exists/loaded, pull).
+    // Just send a test chat request to verify the cloud endpoint responds.
+    if (cloud) {
+      logger.info({ model, endpoint, attempt }, "Cloud model detected, skipping local checks");
+      const loadResult = await loadModel(endpoint, model, keepAlive);
+      if (loadResult.success) {
+        console.log(`[Ollama] Cloud model "${model}" ready (${elapsed}s)`);
+        return { ready: true };
+      }
+      if (!isStartup) {
+        return { ready: false, error: loadResult.error };
+      }
+      logger.debug({ error: loadResult.error }, "Cloud model load failed, will retry");
+      await sleep(POLL_INTERVAL_MS);
+      continue;
+    }
 
-      if (!tagsResponse.ok) {
-        console.log(`[Ollama] Server not ready (${elapsed}s elapsed)...`);
+    // Step 1: Check if server is reachable
+    const serverReachable = await checkServerReachable(endpoint);
+    if (!serverReachable) {
+      if (isStartup) {
+        console.log(`[Ollama] Waiting for server (${elapsed}s elapsed)...`);
         await sleep(POLL_INTERVAL_MS);
         continue;
+      } else {
+        return {
+          ready: false,
+          error: `Ollama service unreachable at ${endpoint}. Is it running?`
+        };
       }
+    }
 
-      const tagsData = await tagsResponse.json();
-      const models = tagsData.models || [];
-      const modelNames = models.map(m => m.name);
-
-      // Check if our model is available
-      const modelReady = modelNames.some(name =>
-        name === model || name.startsWith(`${model}:`)
-      );
-
-      if (modelReady) {
-        console.log(`[Ollama] Server ready, model "${model}" available (${elapsed}s)`);
-        logger.info({
-          endpoint,
-          model,
-          elapsedSeconds: elapsed,
-          attempts: attempt
-        }, "Ollama startup check passed");
-        return true;
+    // Step 2: Check if model is loaded in memory
+    const loaded = await checkModelLoaded(endpoint, model);
+    if (loaded) {
+      if (isStartup) {
+        console.log(`[Ollama] Model "${model}" ready (${elapsed}s)`);
       }
-
-      // Model not yet available - try to preload it
-      console.log(`[Ollama] Server up, loading model "${model}" (${elapsed}s elapsed)...`);
       logger.info({
         endpoint,
         model,
-        availableModels: modelNames
-      }, "Ollama server up, preloading model");
+        elapsedSeconds: elapsed,
+        attempts: attempt
+      }, "Ollama model ready");
+      return { ready: true };
+    }
 
-      // Preload model with empty generate request
-      try {
-        const preloadBody = { model, prompt: "", stream: false };
-
-        // Use keep_alive setting if configured
-        if (config.ollama.keepAlive !== undefined) {
-          const keepAlive = config.ollama.keepAlive;
-          preloadBody.keep_alive = /^-?\d+$/.test(keepAlive)
-            ? parseInt(keepAlive, 10)
-            : keepAlive;
-        }
-
-        await fetch(`${endpoint}/api/generate`, {
-          method: "POST",
-          headers: { "Content-Type": "application/json" },
-          body: JSON.stringify(preloadBody),
-          signal: AbortSignal.timeout(30000)
-        });
-      } catch (preloadErr) {
-        // Ignore preload errors, we'll check again on next iteration
-        logger.debug({ error: preloadErr.message }, "Ollama model preload request failed (will retry)");
+    // Step 3: Check if model exists locally
+    const { exists, models } = await checkModelExists(endpoint, model);
+
+    if (!exists) {
+      // Model not downloaded - try to pull it
+      console.log(`[Ollama] Model "${model}" not found locally (${elapsed}s elapsed)`);
+      logger.info({
+        model,
+        availableModels: models
+      }, "Ollama model not found locally, attempting pull");
+
+      const pullResult = await pullModel(endpoint, model);
+      if (!pullResult.success) {
+        // Pull failed - return error immediately, don't keep retrying
+        return {
+          ready: false,
+          error: pullResult.error
+        };
       }
 
-    } catch (err) {
-      console.log(`[Ollama] Waiting for server (${elapsed}s elapsed)...`);
-      logger.debug({
-        error: err.message,
-        attempt,
-        elapsed
-      }, "Ollama server not yet reachable");
+      // Pull succeeded - continue to load it
+      console.log(`[Ollama] Model pulled, now loading...`);
     }
 
-    await sleep(POLL_INTERVAL_MS);
+    // Step 4: Model exists but not loaded - load it
+    console.log(`[Ollama] Loading model "${model}" (${elapsed}s elapsed)...`);
+    const loadResult = await loadModel(endpoint, model, keepAlive);
+
+    if (!loadResult.success) {
+      if (isStartup) {
+        logger.debug({ error: loadResult.error }, "Model load failed, will retry");
+        await sleep(POLL_INTERVAL_MS);
+        continue;
+      } else {
+        return {
+          ready: false,
+          error: loadResult.error
+        };
+      }
+    }
+
+    // Load succeeded — the model responded to a chat request, so it's ready.
+    // Skip the /api/ps re-check: cloud models (e.g. ollama.com) may not
+    // appear in /api/ps, which would cause an infinite retry loop.
+    console.log(`[Ollama] Model "${model}" ready (${elapsed}s)`);
+    return { ready: true };
+  }
+
+  // Timeout
+  if (isStartup) {
+    console.error(`[Ollama] Timeout after ${Math.round(maxWaitMs/1000)}s - model not ready`);
+    console.error(`[Ollama] Continuing startup, but requests may fail`);
+    logger.warn({
+      endpoint,
+      model,
+      maxWaitMs
+    }, "Ollama startup check timed out - continuing anyway");
+    return { ready: false };
+  } else {
+    return {
+      ready: false,
+      error: `Timeout after ${Math.round(maxWaitMs/1000)}s waiting for model "${model}" to load`
+    };
+  }
+}
+
+/**
+ * Wait for Ollama server to be ready and model to be loaded.
+ * Only runs when Ollama is the configured provider.
+ *
+ * @returns {Promise<boolean>} true if Ollama is ready, false if timeout
+ */
+async function waitForOllama() {
+  const model = config.ollama?.model;
+  const keepAlive = config.ollama?.keepAlive;
+  const endpoint = getOllamaEndpointForModel(model);
+
+  if (!config.ollama?.endpoint && !config.ollama?.cloudEndpoint) {
+    return true;
+  }
+
+  console.log(`[Ollama] Waiting for server at ${endpoint}...`);
+  console.log(`[Ollama] Model: ${model}${isCloudModel(model) ? ' (cloud)' : ''}`);
+  console.log(`[Ollama] Timeout: ${Math.round(MAX_WAIT_MS/1000)}s`);
+
+  const result = await ensureModelReady(endpoint, model, keepAlive, true);
+
+  // Also pre-load the tool execution model if it uses Ollama and is a different model
+  const toolProvider = config.toolExecutionProvider;
+  const toolModel = config.toolExecutionModel;
+  if (result.ready && toolProvider === 'ollama' && toolModel && toolModel !== model) {
+    const toolEndpoint = getOllamaEndpointForModel(toolModel);
+    console.log(`[Ollama] Also loading tool execution model: ${toolModel}${isCloudModel(toolModel) ? ' (cloud)' : ''}`);
+    const toolResult = await ensureModelReady(toolEndpoint, toolModel, keepAlive, true);
+    if (!toolResult.ready) {
+      console.warn(`[Ollama] Tool execution model "${toolModel}" failed to load - tool routing may fall back`);
+    }
   }
 
-  console.error(`[Ollama] Timeout after 60s - server or model not ready`);
-  console.error(`[Ollama] Continuing startup, but requests may fail`);
-  logger.warn({
-    endpoint,
-    model,
-    maxWaitMs: MAX_WAIT_MS
-  }, "Ollama startup check timed out - continuing anyway");
-  return false;
+  return result.ready;
 }
 
 function sleep(ms) {
   return new Promise(resolve => setTimeout(resolve, ms));
 }
 
-module.exports = { waitForOllama };
+module.exports = {
+  waitForOllama,
+  ensureModelReady,
+  checkModelLoaded,
+  checkModelExists,
+  pullModel,
+  loadModel
+};
diff --git a/src/clients/ollama-utils.js b/src/clients/ollama-utils.js
index 7582f05..8c5036c 100644
--- a/src/clients/ollama-utils.js
+++ b/src/clients/ollama-utils.js
@@ -4,6 +4,57 @@ const logger = require("../logger");
 // Cache for model capabilities
 const modelCapabilitiesCache = new Map();
 
+/**
+ * Check if a model name indicates a cloud-hosted model.
+ * Cloud models follow Ollama's naming convention with "-cloud" in the tag
+ * (e.g. "deepseek-v3.1:671b-cloud", "nemotron-3-nano:30b-cloud").
+ */
+function isCloudModel(modelName) {
+  if (!modelName || typeof modelName !== 'string') return false;
+  const lower = modelName.toLowerCase();
+  // Match cloud indicators in Ollama model naming:
+  // - Tag ends with "-cloud" (e.g., "deepseek-v3.1:671b-cloud")
+  // - Tag is exactly "cloud" (e.g., "glm-4.7:cloud")
+  return lower.endsWith('-cloud') || lower.endsWith(':cloud');
+}
+
+/**
+ * Get the correct Ollama endpoint for a given model.
+ * Cloud models route to OLLAMA_CLOUD_ENDPOINT; local models route to OLLAMA_ENDPOINT.
+ * Falls back to the standard endpoint if no cloud endpoint is configured.
+ */
+function getOllamaEndpointForModel(modelName) {
+  if (isCloudModel(modelName) && config.ollama?.cloudEndpoint) {
+    return config.ollama.cloudEndpoint;
+  }
+  if (config.ollama?.endpoint) {
+    return config.ollama.endpoint;
+  }
+  // Cloud-only mode: use cloud endpoint even for non-cloud-named models
+  if (config.ollama?.cloudEndpoint) {
+    return config.ollama.cloudEndpoint;
+  }
+  return 'http://localhost:11434';
+}
+
+/**
+ * Build standard headers for Ollama API requests.
+ * Includes Authorization header when OLLAMA_API_KEY is configured.
+ * When a cloud endpoint is configured, auth is only sent for cloud models
+ * (to avoid leaking keys to local endpoints). When no cloud endpoint is
+ * configured, auth is sent to all requests (legacy/single-endpoint behavior).
+ */
+function getOllamaHeaders(modelName) {
+  const headers = { "Content-Type": "application/json" };
+  if (config.ollama?.apiKey) {
+    // Send auth if: model is cloud, OR no cloud endpoint configured (legacy compat)
+    if (isCloudModel(modelName) || !config.ollama?.cloudEndpoint) {
+      headers["Authorization"] = `Bearer ${config.ollama.apiKey}`;
+    }
+  }
+  return headers;
+}
+
 /**
  * Known models with tool calling support
  */
@@ -15,7 +66,13 @@ const TOOL_CAPABLE_MODELS = new Set([
   "mistral-nemo",
   "firefunction-v2",
   "kimi-k2.5",
-  "nemotron"
+  "nemotron",
+  "glm-4",
+  "glm4",
+  "qwen3",
+  "qwen3-coder",
+  "deepseek-v3",
+  "kimi-k2"
 ]);
 
 /**
@@ -93,6 +150,30 @@ function convertAnthropicToolsToOllama(anthropicTools) {
   }));
 }
 
+/**
+ * Extract tool calls from text using the per-model parser registry.
+ *
+ * Delegates to src/parsers/ — each model family has its own parser class.
+ * Falls back to GenericToolParser for unknown models.
+ *
+ * @param {string} text - Text content that may contain tool calls
+ * @param {string} [modelName] - Optional model name for model-specific strategies
+ * @returns {object[]|null} - Array of tool call objects in Ollama format, or null if none found
+ */
+function extractToolCallsFromText(text, modelName) {
+  if (!text || typeof text !== 'string') return null;
+
+  const { getParserForModel } = require('../parsers');
+  const parser = getParserForModel(modelName);
+  return parser.extractToolCallsFromText(text);
+}
+
+// Backward-compatible wrapper — returns first match only
+function extractToolCallFromText(text, modelName) {
+  const results = extractToolCallsFromText(text, modelName);
+  return results ? results[0] : null;
+}
+
 /**
  * Convert Ollama tool call response to Anthropic format
  *
@@ -121,14 +202,34 @@ function convertAnthropicToolsToOllama(anthropicTools) {
  *   stop_reason: "tool_use"
  * }
  */
-function convertOllamaToolCallsToAnthropic(ollamaResponse) {
+function convertOllamaToolCallsToAnthropic(ollamaResponse, modelName = null) {
   const message = ollamaResponse?.message || {};
-  const toolCalls = message.tool_calls || [];
-  const textContent = message.content || "";
+  let toolCalls = message.tool_calls || [];
+  let textContent = message.content || "";
+  let toolCallsWereExtracted = false;
+
+  // FALLBACK: If no tool_calls but text contains tool calls, parse them
+  if (toolCalls.length === 0 && textContent) {
+    const extracted = extractToolCallsFromText(textContent, modelName);
+    if (extracted && extracted.length > 0) {
+      logger.info({
+        extractedCount: extracted.length,
+        toolNames: extracted.map(tc => tc.function?.name),
+        modelName
+      }, "Using fallback text parsing for tool calls");
+      toolCalls = extracted;
+      toolCallsWereExtracted = true;
+
+      // Strip extracted tool calls from text content to prevent double-display
+      // This ensures tool results are shown instead of the command text
+      textContent = "";
+      logger.debug("Stripped tool call text from response to allow tool results display");
+    }
+  }
 
   const contentBlocks = [];
 
-  // Add text content if present
+  // Add text content if present (will be empty if tool calls were extracted)
   if (textContent && textContent.trim()) {
     contentBlocks.push({
       type: "text",
@@ -183,7 +284,7 @@ function convertOllamaToolCallsToAnthropic(ollamaResponse) {
  * Build complete Anthropic response from Ollama with tool calls
  */
 function buildAnthropicResponseFromOllama(ollamaResponse, requestedModel) {
-  const { contentBlocks, stopReason } = convertOllamaToolCallsToAnthropic(ollamaResponse);
+  const { contentBlocks, stopReason } = convertOllamaToolCallsToAnthropic(ollamaResponse, requestedModel);
 
   // Ensure at least one content block
   const finalContent = contentBlocks.length > 0
@@ -211,10 +312,43 @@ function buildAnthropicResponseFromOllama(ollamaResponse, requestedModel) {
   };
 }
 
+/**
+ * Strip markdown code fences and prompt characters from a command string
+ * Exported for universal use in tool call cleaning.
+ *
+ * @param {string} command - Command that may contain markdown or prompt chars
+ * @returns {string} - Cleaned command
+ */
+function stripMarkdownFromCommand(command) {
+  if (!command || typeof command !== 'string') {
+    return command;
+  }
+
+  let cleaned = command;
+
+  // Check for code fence
+  const fenceRe = /```(?:bash|sh|shell|zsh|console|terminal)\s*\n([\s\S]*?)```/i;
+  const fenceMatch = command.match(fenceRe);
+  if (fenceMatch && fenceMatch[1]) {
+    cleaned = fenceMatch[1];
+  }
+
+  // Strip prompt characters from each line
+  cleaned = cleaned.replace(/^\s*[$#]\s+/gm, '');
+
+  return cleaned.trim();
+}
+
 module.exports = {
   checkOllamaToolSupport,
   convertAnthropicToolsToOllama,
   convertOllamaToolCallsToAnthropic,
   buildAnthropicResponseFromOllama,
   modelNameSupportsTools,
+  extractToolCallFromText,
+  extractToolCallsFromText,
+  stripMarkdownFromCommand,
+  getOllamaHeaders,
+  isCloudModel,
+  getOllamaEndpointForModel,
 };
diff --git a/src/clients/standard-tools.js b/src/clients/standard-tools.js
index 61ac791..b96f433 100644
--- a/src/clients/standard-tools.js
+++ b/src/clients/standard-tools.js
@@ -24,7 +24,7 @@ const STANDARD_TOOLS = [
   },
   {
     name: "Read",
-    description: "Reads a file from the local filesystem. You can access any file directly by using this tool. For files outside the workspace, the user must approve access first.",
+    description: "Reads a file from the local filesystem. You can access any file directly by using this tool.\n\nEXTERNAL FILE APPROVAL FLOW: When reading a file outside the workspace, the tool will return an [APPROVAL REQUIRED] message instead of the file content. When this happens you MUST: (1) Tell the user the file is outside the workspace and ask for permission. (2) If the user approves, call this tool again with the SAME file_path and set user_approved=true. (3) Only then will the file content be returned.",
     input_schema: {
       type: "object",
       properties: {
@@ -74,6 +74,24 @@ const STANDARD_TOOLS = [
       required: ["file_path", "old_string", "new_string"]
     }
   },
+  {
+    name: "edit_patch",
+    description: "Apply unified diff patches to workspace files. Use this for complex multi-line edits or when you have a patch in unified diff format. For simple string replacements, use the Edit tool instead.",
+    input_schema: {
+      type: "object",
+      properties: {
+        file_path: {
+          type: "string",
+          description: "Relative path within workspace (e.g., 'src/main.js'). DO NOT use absolute paths."
+        },
+        patch: {
+          type: "string",
+          description: "Unified diff patch to apply. Must be in standard diff format with @@ line numbers."
+        }
+      },
+      required: ["file_path", "patch"]
+    }
+  },
   {
     name: "Bash",
     description: "Executes a bash command in a persistent shell session. Use for terminal operations like git, npm, docker, etc. DO NOT use for file operations - use specialized tools instead.",
diff --git a/src/config/index.js b/src/config/index.js
index d75d045..ba071c4 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -1,7 +1,9 @@
 const path = require("path");
 const dotenv = require("dotenv");
 
-dotenv.config();
+// .env must be authoritative over shell env vars (e.g. stale exports in .bashrc).
+// Skip override in test mode so tests can set process.env before requiring config.
+dotenv.config({ override: process.env.NODE_ENV !== "test" });
 
 function trimTrailingSlash(value) {
   if (typeof value !== "string") return value;
@@ -83,12 +85,15 @@ const azureAnthropicEndpoint = process.env.AZURE_ANTHROPIC_ENDPOINT ?? null;
 const azureAnthropicApiKey = process.env.AZURE_ANTHROPIC_API_KEY ?? null;
 const azureAnthropicVersion = process.env.AZURE_ANTHROPIC_VERSION ?? "2023-06-01";
 
-const ollamaEndpoint = process.env.OLLAMA_ENDPOINT ?? "http://localhost:11434";
-const ollamaModel = process.env.OLLAMA_MODEL ?? "qwen2.5-coder:7b";
+const ollamaEndpoint = process.env.OLLAMA_ENDPOINT?.trim() || null;
+const ollamaModel = process.env.OLLAMA_MODEL?.trim() || null;
 const ollamaTimeout = Number.parseInt(process.env.OLLAMA_TIMEOUT_MS ?? "120000", 10);
 const ollamaKeepAlive = process.env.OLLAMA_KEEP_ALIVE ?? undefined;
+const ollamaApiKey = process.env.OLLAMA_API_KEY?.trim() || null;
+const ollamaCloudEndpoint = process.env.OLLAMA_CLOUD_ENDPOINT?.trim() || null;
 // Accepts: duration strings ("10m", "24h"), numbers (seconds), -1 (permanent), 0 (immediate unload)
-const ollamaEmbeddingsEndpoint = process.env.OLLAMA_EMBEDDINGS_ENDPOINT ?? `${ollamaEndpoint}/api/embeddings`;
+const ollamaEmbeddingsEndpoint = process.env.OLLAMA_EMBEDDINGS_ENDPOINT
+  ?? (ollamaEndpoint ? `${ollamaEndpoint}/api/embeddings` : null);
 const ollamaEmbeddingsModel = process.env.OLLAMA_EMBEDDINGS_MODEL ?? "nomic-embed-text";
 
 // OpenRouter configuration
@@ -140,10 +145,23 @@ const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
 // Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name
 const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
 
+// Topic detection model override
+// Values: "default" (use main model) or a model name to redirect topic detection to a lighter model
+const topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim();
+
 // Hot reload configuration
 const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true
 const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10);
 
+// Aggressive tool patching: try all text-to-tool extraction strategies for any model
+const aggressiveToolPatching = process.env.AGGRESSIVE_TOOL_PATCHING === "true";
+
+// Per-model tool parser configuration
+// TOOL_PARSER_MODE: "parser" (default, use per-model parsers), "legacy" (old strategy registry), "compare" (run both, log diff)
+const toolParserMode = (process.env.TOOL_PARSER_MODE ?? "parser").toLowerCase();
+// TOOL_PARSER_COMPARE_MODE: when true, logs comparison between parser and legacy paths
+const toolParserCompareMode = process.env.TOOL_PARSER_COMPARE_MODE === "true";
+
 // Hybrid routing configuration
 const preferOllama = process.env.PREFER_OLLAMA === "true";
 const fallbackEnabled = process.env.FALLBACK_ENABLED !== "false"; // default true
@@ -176,6 +194,39 @@ if (!["server", "client", "passthrough"].includes(toolExecutionMode)) {
     "TOOL_EXECUTION_MODE must be one of: server, client, passthrough (default: server)"
   );
 }
+console.log(`[CONFIG] Tool execution mode: ${toolExecutionMode}`);
+
+// Tool execution provider configuration
+// Enables routing tool-calling decisions to a dedicated model/provider
+const toolExecutionProvider = (process.env.TOOL_EXECUTION_PROVIDER ?? "").toLowerCase().trim();
+const toolExecutionModel = (process.env.TOOL_EXECUTION_MODEL ?? "").trim();
+const toolExecutionCompareMode = process.env.TOOL_EXECUTION_COMPARE_MODE === "true";
+
+// Validate tool execution provider if specified
+if (toolExecutionProvider && !SUPPORTED_MODEL_PROVIDERS.has(toolExecutionProvider)) {
+  const supportedList = Array.from(SUPPORTED_MODEL_PROVIDERS).sort().join(", ");
+  throw new Error(
+    `Unsupported TOOL_EXECUTION_PROVIDER: "${toolExecutionProvider}". ` +
+    `Valid options are: ${supportedList}`
+  );
+}
+
+// Log configuration
+if (toolExecutionProvider) {
+  console.log(`[CONFIG] Tool execution provider: ${toolExecutionProvider}`);
+  if (toolExecutionModel) {
+    console.log(`[CONFIG] Tool execution model: ${toolExecutionModel}`);
+  }
+  if (toolExecutionCompareMode) {
+    console.log(`[CONFIG] Tool execution compare mode: enabled`);
+  }
+}
+if (suggestionModeModel.toLowerCase() !== "default") {
+  console.log(`[CONFIG] Suggestion mode model: ${suggestionModeModel}`);
+}
+if (topicDetectionModel.toLowerCase() !== "default") {
+  console.log(`[CONFIG] Topic detection model: ${topicDetectionModel}`);
+}
 
 // Memory system configuration (Titans-inspired long-term memory)
 const memoryEnabled = process.env.MEMORY_ENABLED !== "false"; // default true
@@ -217,6 +268,7 @@ const smartToolSelectionTokenBudget = Number.parseInt(
   10
 );
 
+
 // Headroom sidecar configuration
 const headroomEnabled = process.env.HEADROOM_ENABLED === "true";
 const headroomEndpoint = process.env.HEADROOM_ENDPOINT?.trim() || "http://localhost:8787";
@@ -280,10 +332,27 @@ if (modelProvider === "openai" && !openAIApiKey) {
 }
 
 if (modelProvider === "ollama") {
-  try {
-    new URL(ollamaEndpoint);
-  } catch (err) {
-    throw new Error("OLLAMA_ENDPOINT must be a valid URL (default: http://localhost:11434)");
+  if (!ollamaModel) {
+    throw new Error("OLLAMA_MODEL is required when MODEL_PROVIDER=ollama");
+  }
+  if (!ollamaEndpoint && !ollamaCloudEndpoint) {
+    throw new Error(
+      "Set OLLAMA_ENDPOINT (local) or OLLAMA_CLOUD_ENDPOINT (cloud) when MODEL_PROVIDER=ollama"
+    );
+  }
+  if (ollamaEndpoint) {
+    try {
+      new URL(ollamaEndpoint);
+    } catch {
+      throw new Error("OLLAMA_ENDPOINT must be a valid URL");
+    }
+  }
+  if (ollamaCloudEndpoint) {
+    try {
+      new URL(ollamaCloudEndpoint);
+    } catch {
+      throw new Error("OLLAMA_CLOUD_ENDPOINT must be a valid URL");
+    }
   }
 }
 
@@ -313,8 +382,8 @@ if (modelProvider === "bedrock" && !bedrockApiKey) {
 
 // Validate hybrid routing configuration
 if (preferOllama) {
-  if (!ollamaEndpoint) {
-    throw new Error("PREFER_OLLAMA is set but OLLAMA_ENDPOINT is not configured");
+  if (!ollamaEndpoint && !ollamaCloudEndpoint) {
+    throw new Error("PREFER_OLLAMA is set but neither OLLAMA_ENDPOINT nor OLLAMA_CLOUD_ENDPOINT is configured");
   }
   if (fallbackEnabled && !SUPPORTED_MODEL_PROVIDERS.has(fallbackProvider)) {
     throw new Error(
@@ -354,10 +423,16 @@ const databricksUrl =
     ? `${rawBaseUrl}${endpointPath.startsWith("/") ? "" : "/"}${endpointPath}`
     : null;
 
+// Set MODEL_DEFAULT env var to use a specific model (e.g. "llama3.1" for Ollama).
+// Without it, the default falls back to a Databricks Claude model regardless of MODEL_PROVIDER.
 const defaultModel =
   process.env.MODEL_DEFAULT ??
   (modelProvider === "azure-anthropic" ? "claude-opus-4-5" : "databricks-claude-sonnet-4-5");
 
+// Force server-side model configuration, ignoring client requests
+// Useful when you want to enforce a specific model regardless of what the client asks for
+const enforceServerModel = process.env.ENFORCE_SERVER_MODEL?.toLowerCase() === "true";
+
 const port = Number.parseInt(process.env.PORT ?? "8080", 10);
 const sessionDbPath =
   process.env.SESSION_DB_PATH ?? path.join(process.cwd(), "data", "sessions.db");
@@ -393,6 +468,8 @@ const webSearchMaxRetries = Number.parseInt(process.env.WEB_SEARCH_MAX_RETRIES ?
 
 const policyMaxSteps = Number.parseInt(process.env.POLICY_MAX_STEPS ?? "8", 10);
 const policyMaxToolCalls = Number.parseInt(process.env.POLICY_MAX_TOOL_CALLS ?? "12", 10);
+const policyMaxToolCallsPerRequest = Number.parseInt(process.env.POLICY_MAX_TOOL_CALLS_PER_REQUEST ?? "12", 10);
+const policyMaxDurationMs = Number.parseInt(process.env.POLICY_MAX_DURATION_MS ?? "120000", 10);
 const policyToolLoopThreshold = Number.parseInt(process.env.POLICY_TOOL_LOOP_THRESHOLD ?? "10", 10);
 const policyDisallowedTools =
   process.env.POLICY_DISALLOWED_TOOLS?.split(",")
@@ -498,6 +575,7 @@ const agentsMaxConcurrent = Number.parseInt(process.env.AGENTS_MAX_CONCURRENT ??
 const agentsDefaultModel = process.env.AGENTS_DEFAULT_MODEL ?? "haiku";
 const agentsMaxSteps = Number.parseInt(process.env.AGENTS_MAX_STEPS ?? "15", 10);
 const agentsTimeout = Number.parseInt(process.env.AGENTS_TIMEOUT ?? "120000", 10);
+const agentsAutoSpawn = process.env.AGENTS_AUTO_SPAWN !== "false"; // default true when agents enabled
 
 // LLM Audit logging configuration
 const auditEnabled = process.env.LLM_AUDIT_ENABLED === "true"; // default false
@@ -551,6 +629,8 @@ var config = {
     model: ollamaModel,
     timeout: Number.isNaN(ollamaTimeout) ? 120000 : ollamaTimeout,
     keepAlive: ollamaKeepAlive,
+    apiKey: ollamaApiKey,
+    cloudEndpoint: ollamaCloudEndpoint,
     embeddingsEndpoint: ollamaEmbeddingsEndpoint,
     embeddingsModel: ollamaEmbeddingsModel,
   },
@@ -606,7 +686,9 @@ var config = {
   modelProvider: {
     type: modelProvider,
     defaultModel,
+    enforceServerModel,
     suggestionModeModel,
+    topicDetectionModel,
     // Hybrid routing settings
     preferOllama,
     fallbackEnabled,
@@ -614,7 +696,13 @@ var config = {
     openRouterMaxToolsForRouting,
     fallbackProvider,
   },
+  aggressiveToolPatching,
+  toolParserMode,
+  toolParserCompareMode,
   toolExecutionMode,
+  toolExecutionProvider,
+  toolExecutionModel,
+  toolExecutionCompareMode,
   server: {
     jsonLimit: process.env.REQUEST_JSON_LIMIT ?? "1gb",
   },
@@ -647,6 +735,8 @@ var config = {
   policy: {
     maxStepsPerTurn: Number.isNaN(policyMaxSteps) ? 8 : policyMaxSteps,
     maxToolCallsPerTurn: Number.isNaN(policyMaxToolCalls) ? 12 : policyMaxToolCalls,
+    maxToolCallsPerRequest: Number.isNaN(policyMaxToolCallsPerRequest) ? 12 : policyMaxToolCallsPerRequest,
+    maxDurationMs: Number.isNaN(policyMaxDurationMs) ? 120000 : policyMaxDurationMs,
     toolLoopThreshold: Number.isNaN(policyToolLoopThreshold) ? 10 : policyToolLoopThreshold, // Max tool results before force-terminating
     disallowedTools: policyDisallowedTools,
     git: {
@@ -719,6 +809,7 @@ var config = {
     defaultModel: agentsDefaultModel,
     maxSteps: Number.isNaN(agentsMaxSteps) ? 15 : agentsMaxSteps,
     timeout: Number.isNaN(agentsTimeout) ? 120000 : agentsTimeout,
+    autoSpawn: agentsAutoSpawn,
   },
   tests: {
     defaultCommand: testDefaultCommand ? testDefaultCommand.trim() : null,
@@ -773,7 +864,7 @@ var config = {
   },
   toon: {
     enabled: toonEnabled,
-    minBytes: Number.isNaN(toonMinBytes) ? 4096 : toonMinBytes,
+    minBytes: toonMinBytes,
     failOpen: toonFailOpen,
     logStats: toonLogStats,
   },
@@ -883,7 +974,8 @@ function reloadConfig() {
   // API keys and endpoints
   config.databricks.apiKey = process.env.DATABRICKS_API_KEY;
   config.azureAnthropic.apiKey = process.env.AZURE_ANTHROPIC_API_KEY ?? null;
-  config.ollama.model = process.env.OLLAMA_MODEL ?? "qwen2.5-coder:7b";
+  config.ollama.model = process.env.OLLAMA_MODEL?.trim() || null;
+  config.ollama.cloudEndpoint = process.env.OLLAMA_CLOUD_ENDPOINT?.trim() || null;
   config.openrouter.apiKey = process.env.OPENROUTER_API_KEY ?? null;
   config.openrouter.model = process.env.OPENROUTER_MODEL ?? "openai/gpt-4o-mini";
   config.azureOpenAI.apiKey = process.env.AZURE_OPENAI_API_KEY?.trim() || null;
@@ -894,6 +986,13 @@ function reloadConfig() {
   config.vertex.apiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null;
   config.vertex.model = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
 
+  // Aggressive tool patching
+  config.aggressiveToolPatching = process.env.AGGRESSIVE_TOOL_PATCHING === "true";
+
+  // Per-model tool parser
+  config.toolParserMode = (process.env.TOOL_PARSER_MODE ?? "parser").toLowerCase();
+  config.toolParserCompareMode = process.env.TOOL_PARSER_COMPARE_MODE === "true";
+
   // Model provider settings
   const newProvider = (process.env.MODEL_PROVIDER ?? "databricks").toLowerCase();
   if (SUPPORTED_MODEL_PROVIDERS.has(newProvider)) {
@@ -903,12 +1002,7 @@ function reloadConfig() {
   config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false";
   config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
   config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
-
-  config.toon.enabled = process.env.TOON_ENABLED === "true";
-  const newToonMinBytes = Number.parseInt(process.env.TOON_MIN_BYTES ?? "4096", 10);
-  config.toon.minBytes = Number.isNaN(newToonMinBytes) ? 4096 : newToonMinBytes;
-  config.toon.failOpen = process.env.TOON_FAIL_OPEN !== "false";
-  config.toon.logStats = process.env.TOON_LOG_STATS !== "false";
+  config.modelProvider.topicDetectionModel = (process.env.TOPIC_DETECTION_MODEL ?? "default").trim();
 
   // Log level
   config.logger.level = process.env.LOG_LEVEL ?? "info";
diff --git a/src/context/compression.js b/src/context/compression.js
index 518aaba..47b0413 100644
--- a/src/context/compression.js
+++ b/src/context/compression.js
@@ -2,24 +2,63 @@
  * History Compression for Token Optimization
  *
  * Compresses conversation history to reduce token usage while
- * maintaining context quality. Uses sliding window approach:
- * - Keep recent turns verbatim
- * - Summarize older turns
- * - Compress tool results
+ * maintaining context quality. Uses sliding window approach with
+ * percentage-based tiered compression that scales with recency
+ * and the model's context window size.
  *
+ * Tiers:
+ * - veryRecent (last 4 messages): keep 90% of content
+ * - recent (messages 5-10): keep 50% of content
+ * - old (11+): keep 20% of content
  */
 
 const logger = require('../logger');
 const config = require('../config');
 
+// Compression tiers: ratio = percentage of content to keep, minFloor = minimum chars
+const COMPRESSION_TIERS = {
+  veryRecent: { ratio: 0.9, minFloor: 500 },
+  recent:     { ratio: 0.5, minFloor: 300 },
+  old:        { ratio: 0.2, minFloor: 200 },
+};
+
+// How many of the recent messages count as "very recent"
+const VERY_RECENT_COUNT = 4;
+
 /**
- * Compress conversation history to fit within token budget
+ * Compute the maximum character cap for a tier based on context window size.
+ *
+ * @param {number} contextWindowTokens - Model's context window in tokens (-1 = unknown)
+ * @param {string} tierName - "veryRecent", "recent", or "old"
+ * @returns {number} Maximum characters for tool result content in this tier
+ */
+function computeMaxCap(contextWindowTokens, tierName) {
+  // Convert tokens to chars (~4 chars/token), default to 8K tokens if unknown
+  const contextChars = (contextWindowTokens === -1 ? 8000 : contextWindowTokens) * 4;
+  const budgetRatios = {
+    veryRecent: 0.25,
+    recent: 0.10,
+    old: 0.03,
+  };
+  return Math.floor(contextChars * (budgetRatios[tierName] ?? 0.03));
+}
+
+/**
+ * Compute the character limit for a piece of content based on tier and context window.
  *
- * Strategy:
- * 1. Keep last N turns verbatim (fresh context)
- * 2. Summarize older turns (compressed history)
- * 3. Compress tool results to key information only
- * 4. Remove redundant exchanges
+ * @param {string} text - The text content
+ * @param {string} tierName - Tier name
+ * @param {number} contextWindowTokens - Context window in tokens
+ * @returns {number} Character limit
+ */
+function computeLimit(text, tierName, contextWindowTokens) {
+  const tier = COMPRESSION_TIERS[tierName] || COMPRESSION_TIERS.old;
+  const maxCap = computeMaxCap(contextWindowTokens, tierName);
+  return Math.min(maxCap, Math.max(tier.minFloor, Math.floor(text.length * tier.ratio)));
+}
+
+/**
+ * Compress conversation history to fit within token budget
  *
  * @param {Array} messages - Conversation history
  * @param {Object} options - Compression options
@@ -28,6 +67,8 @@ const config = require('../config');
 function compressHistory(messages, options = {}) {
   if (!messages || messages.length === 0) return messages;
 
+  const contextWindowTokens = options.contextWindowTokens ?? -1;
+
   const opts = {
     keepRecentTurns: options.keepRecentTurns ?? config.historyCompression?.keepRecentTurns ?? 10,
     summarizeOlder: options.summarizeOlder ?? config.historyCompression?.summarizeOlder ?? true,
@@ -58,12 +99,16 @@ function compressHistory(messages, options = {}) {
       compressed.push(summary);
     }
   } else {
-    // Just compress tool results in old messages
-    compressed = oldMessages.map(msg => compressMessage(msg));
+    // Compress tool results in old messages using "old" tier
+    compressed = oldMessages.map(msg => compressMessage(msg, "old", contextWindowTokens));
   }
 
-  // Add recent messages (may compress tool results but keep content)
-  const recentCompressed = recentMessages.map(msg => compressToolResults(msg));
+  // Add recent messages with tiered compression
+  const recentCompressed = recentMessages.map((msg, i) => {
+    const isVeryRecent = i >= recentMessages.length - VERY_RECENT_COUNT;
+    const tierName = isVeryRecent ? "veryRecent" : "recent";
+    return compressToolResults(msg, tierName, contextWindowTokens);
+  });
 
   const finalMessages = [...compressed, ...recentCompressed];
 
@@ -82,7 +127,8 @@ function compressHistory(messages, options = {}) {
       percentage: ((saved / originalLength) * 100).toFixed(1),
       splitIndex,
       oldMessages: oldMessages.length,
-      recentMessages: recentMessages.length
+      recentMessages: recentMessages.length,
+      contextWindowTokens,
     }, 'History compression applied');
   }
 
@@ -149,26 +195,28 @@ function summarizeOldHistory(messages) {
 }
 
 /**
- * Compress a single message
- *
- * Reduces message size while preserving essential information.
+ * Compress a single message (used for old messages outside the recent window)
  *
  * @param {Object} message - Message to compress
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Compressed message
  */
-function compressMessage(message) {
+function compressMessage(message, tierName = "old", contextWindowTokens = -1) {
   if (!message) return message;
 
+  const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens);
+
   const compressed = {
     role: message.role
   };
 
   // Compress content based on type
   if (typeof message.content === 'string') {
-    compressed.content = compressText(message.content, 300);
+    compressed.content = compressText(message.content, limit);
   } else if (Array.isArray(message.content)) {
     compressed.content = message.content
-      .map(block => compressContentBlock(block))
+      .map(block => compressContentBlock(block, tierName, contextWindowTokens))
       .filter(Boolean);
   } else {
     compressed.content = message.content;
@@ -180,13 +228,12 @@ function compressMessage(message) {
 /**
  * Compress tool results in a message while keeping other content
  *
- * Tool results can be very large. This compresses them while
- * keeping user and assistant text intact.
- *
  * @param {Object} message - Message to process
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Message with compressed tool results
  */
-function compressToolResults(message) {
+function compressToolResults(message, tierName = "recent", contextWindowTokens = -1) {
   if (!message) return message;
 
   const compressed = {
@@ -199,7 +246,7 @@ function compressToolResults(message) {
     compressed.content = message.content.map(block => {
       // Compress tool_result blocks
       if (block.type === 'tool_result') {
-        return compressToolResultBlock(block);
+        return compressToolResultBlock(block, tierName, contextWindowTokens);
       }
       // Keep other blocks as-is
       return block;
@@ -215,16 +262,20 @@ function compressToolResults(message) {
  * Compress a content block
  *
  * @param {Object} block - Content block
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object|null} Compressed block or null if removed
  */
-function compressContentBlock(block) {
+function compressContentBlock(block, tierName = "old", contextWindowTokens = -1) {
   if (!block) return null;
 
+  const limit = computeLimit("x".repeat(300), tierName, contextWindowTokens);
+
   switch (block.type) {
     case 'text':
       return {
         type: 'text',
-        text: compressText(block.text, 300)
+        text: compressText(block.text, limit)
       };
 
     case 'tool_use':
@@ -237,7 +288,7 @@ function compressContentBlock(block) {
       };
 
     case 'tool_result':
-      return compressToolResultBlock(block);
+      return compressToolResultBlock(block, tierName, contextWindowTokens);
 
     default:
       return block;
@@ -247,13 +298,15 @@ function compressContentBlock(block) {
 /**
  * Compress tool result block
  *
- * Tool results can be very large (file contents, bash output).
- * Compress while preserving essential information.
+ * Uses dynamic limits based on compression tier and context window size
+ * instead of a hardcoded character limit.
  *
  * @param {Object} block - tool_result block
+ * @param {string} tierName - Compression tier
+ * @param {number} contextWindowTokens - Context window in tokens
  * @returns {Object} Compressed tool_result
  */
-function compressToolResultBlock(block) {
+function compressToolResultBlock(block, tierName = "old", contextWindowTokens = -1) {
   if (!block || block.type !== 'tool_result') return block;
 
   const compressed = {
@@ -261,17 +314,20 @@ function compressToolResultBlock(block) {
     tool_use_id: block.tool_use_id,
   };
 
-  // Compress content
+  // Compress content using dynamic limits
   if (typeof block.content === 'string') {
-    compressed.content = compressText(block.content, 500);
+    const limit = computeLimit(block.content, tierName, contextWindowTokens);
+    compressed.content = compressText(block.content, limit);
   } else if (Array.isArray(block.content)) {
     compressed.content = block.content.map(item => {
       if (typeof item === 'string') {
-        return compressText(item, 500);
+        const limit = computeLimit(item, tierName, contextWindowTokens);
+        return compressText(item, limit);
       } else if (item.type === 'text') {
+        const limit = computeLimit(item.text || "", tierName, contextWindowTokens);
         return {
           type: 'text',
-          text: compressText(item.text, 500)
+          text: compressText(item.text, limit)
         };
       }
       return item;
@@ -456,4 +512,6 @@ module.exports = {
   calculateCompressionStats,
   needsCompression,
   summarizeOldHistory,
+  COMPRESSION_TIERS,
+  computeMaxCap,
 };
diff --git a/src/logger/audit-logger.js b/src/logger/audit-logger.js
index 56b2830..27efc92 100644
--- a/src/logger/audit-logger.js
+++ b/src/logger/audit-logger.js
@@ -40,7 +40,18 @@ function createAuditLogger(config) {
       level: "info", // Always log at info level for compliance
       name: "llm-audit",
       base: null, // Don't include pid/hostname to keep logs clean
-      timestamp: pino.stdTimeFunctions.isoTime,
+      // Use local timezone for timestamps instead of UTC
+      timestamp: () => {
+        const now = new Date();
+        const isoString = now.toISOString();
+        const offset = -now.getTimezoneOffset();
+        const offsetHours = String(Math.floor(Math.abs(offset) / 60)).padStart(2, '0');
+        const offsetMins = String(Math.abs(offset) % 60).padStart(2, '0');
+        const offsetSign = offset >= 0 ? '+' : '-';
+        // Replace Z with local offset (e.g., +05:30 or -08:00)
+        const localIso = isoString.replace('Z', `${offsetSign}${offsetHours}:${offsetMins}`);
+        return `,"time":"${localIso}"`;
+      },
       formatters: {
         level: (label) => {
           return { level: label };
diff --git a/src/logger/index.js b/src/logger/index.js
index 7f49d9f..2f0b4f6 100644
--- a/src/logger/index.js
+++ b/src/logger/index.js
@@ -1,4 +1,6 @@
 const pino = require("pino");
+const fs = require("fs");
+const path = require("path");
 const config = require("../config");
 const { createOversizedErrorStream } = require("./oversized-error-stream");
 
@@ -64,6 +66,19 @@ streams.push({
 			: process.stdout,
 });
 
+// File output stream (LOG_FILE env var, e.g. ./logs/lynkr.log)
+const logFile = process.env.LOG_FILE;
+if (logFile) {
+	const logDir = path.dirname(logFile);
+	if (!fs.existsSync(logDir)) {
+		fs.mkdirSync(logDir, { recursive: true });
+	}
+	streams.push({
+		level: config.logger.level,
+		stream: pino.destination({ dest: logFile, sync: false }),
+	});
+}
+
 // Oversized error stream (if enabled)
 if (config.oversizedErrorLogging?.enabled) {
 	streams.push({
@@ -80,6 +95,29 @@ const logger = pino(
 		base: {
 			env: config.env,
 		},
+		// Use local timezone for timestamps instead of UTC
+		timestamp: () => {
+			const now = new Date();
+
+			// Get all components in local timezone
+			const year = now.getFullYear();
+			const month = String(now.getMonth() + 1).padStart(2, '0');
+			const day = String(now.getDate()).padStart(2, '0');
+			const hours = String(now.getHours()).padStart(2, '0');
+			const minutes = String(now.getMinutes()).padStart(2, '0');
+			const seconds = String(now.getSeconds()).padStart(2, '0');
+			const ms = String(now.getMilliseconds()).padStart(3, '0');
+
+			// Get timezone offset
+			const tzOffset = -now.getTimezoneOffset();
+			const offsetHours = String(Math.floor(Math.abs(tzOffset) / 60)).padStart(2, '0');
+			const offsetMins = String(Math.abs(tzOffset) % 60).padStart(2, '0');
+			const offsetSign = tzOffset >= 0 ? '+' : '-';
+
+			const timestamp = `${year}-${month}-${day}T${hours}:${minutes}:${seconds}.${ms}${offsetSign}${offsetHours}:${offsetMins}`;
+
+			return `,"time":"${timestamp}"`;
+		},
 		redact: {
 			paths: ["req.headers.authorization", "req.headers.cookie"],
 			censor: "***redacted***",
diff --git a/src/orchestrator/index.js b/src/orchestrator/index.js
index 9825c92..f900948 100644
--- a/src/orchestrator/index.js
+++ b/src/orchestrator/index.js
@@ -11,6 +11,7 @@ const systemPrompt = require("../prompts/system");
 const historyCompression = require("../context/compression");
 const tokenBudget = require("../context/budget");
 const { applyToonCompression } = require("../context/toon");
+const { getContextWindow } = require("../providers/context-window");
 const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection");
 const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom");
 const { createAuditLogger } = require("../logger/audit-logger");
@@ -20,6 +21,17 @@ const crypto = require("crypto");
 const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
 const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
 const lazyLoader = require("../tools/lazy-loader");
+const { spawnAgent } = require("../agents");
+const { mapToolsToAgentType, buildSubagentPrompt } = require("../agents/tool-agent-mapper");
+const { getProgressEmitter } = require("../progress/emitter");
+
+/**
+ * Generate a unique agent ID
+ * Format: agent_<timestamp>_<random>
+ */
+function generateAgentId() {
+  return `agent_${Date.now()}_${crypto.randomBytes(8).toString('hex')}`;
+}
 
 /**
  * Get destination URL for audit logging based on provider type
@@ -33,7 +45,7 @@ function getDestinationUrl(providerType) {
     case 'azure-anthropic':
       return config.azureAnthropic?.endpoint ?? 'unknown';
     case 'ollama':
-      return config.ollama?.endpoint ?? 'unknown';
+      return config.ollama?.endpoint ?? config.ollama?.cloudEndpoint ?? 'unknown';
     case 'azure-openai':
       return config.azureOpenAI?.endpoint ?? 'unknown';
     case 'openrouter':
@@ -196,6 +208,99 @@ function normaliseMessages(payload, options = {}) {
   return normalised;
 }
 
+/**
+ * Clean user input that was concatenated due to request interruption.
+ * When Claude Code interrupts a request and the user types a new command,
+ * the client may concatenate old + new messages (e.g. "ls[Request interrupted by user]ls").
+ * This function strips the old prefix if a pending flag exists on the session.
+ *
+ * @param {object} session - Session object (has _pendingUserInput flag)
+ * @param {Array} messages - Raw messages array from payload
+ * @returns {Array} messages - Cleaned messages (modified in place)
+ */
+function cleanInterruptedInput(session, messages) {
+  if (!session || !messages || !Array.isArray(messages)) return messages;
+
+  const pendingInput = session._pendingUserInput;
+  if (!pendingInput) return messages;  // No interrupted request, nothing to clean
+
+  // Find the last user message
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg?.role !== 'user') continue;
+
+    // Extract text content
+    let text = '';
+    if (typeof msg.content === 'string') {
+      text = msg.content;
+    } else if (Array.isArray(msg.content)) {
+      const textBlocks = msg.content.filter(b => b?.type === 'text');
+      text = textBlocks.map(b => b.text || '').join('\n');
+    }
+    if (!text) break;
+
+    // Check if message starts with the pending input (concatenation pattern)
+    if (text.length > pendingInput.length && text.startsWith(pendingInput)) {
+      let cleanedText = text.slice(pendingInput.length);
+
+      // Strip common separators between old and new input
+      cleanedText = cleanedText
+        .replace(/^\[Request interrupted by user\]/i, '')
+        .replace(/^\n+/, '')
+        .replace(/^\s+/, '');
+
+      if (cleanedText.length > 0) {
+        logger.info({
+          original: text.substring(0, 100),
+          cleaned: cleanedText.substring(0, 100),
+          pendingInput: pendingInput.substring(0, 50)
+        }, "[INPUT_CLEANUP] Stripped interrupted request prefix from user input");
+
+        // Update the message content
+        if (typeof msg.content === 'string') {
+          msg.content = cleanedText;
+        } else if (Array.isArray(msg.content)) {
+          // Find and update the text block(s)
+          for (const block of msg.content) {
+            if (block?.type === 'text' && block.text) {
+              if (block.text.startsWith(pendingInput)) {
+                let cleanBlock = block.text.slice(pendingInput.length);
+                cleanBlock = cleanBlock
+                  .replace(/^\[Request interrupted by user\]/i, '')
+                  .replace(/^\n+/, '')
+                  .replace(/^\s+/, '');
+                block.text = cleanBlock;
+              }
+              break;  // Only clean the first text block
+            }
+          }
+        }
+      }
+    }
+    break;  // Only process the last user message
+  }
+
+  return messages;
+}
+
+/**
+ * Extract the last user message text from payload messages.
+ * Used for setting the pending input flag.
+ */
+function extractLastUserText(messages) {
+  if (!messages || !Array.isArray(messages)) return '';
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg?.role !== 'user') continue;
+    if (typeof msg.content === 'string') return msg.content.trim();
+    if (Array.isArray(msg.content)) {
+      const textBlocks = msg.content.filter(b => b?.type === 'text');
+      return textBlocks.map(b => b.text || '').join('\n').trim();
+    }
+  }
+  return '';
+}
+
 function normaliseTools(tools) {
   if (!Array.isArray(tools) || tools.length === 0) return undefined;
   return tools.map((tool) => ({
@@ -670,53 +775,11 @@ function normaliseToolChoice(choice) {
 }
 
 /**
- * Strip thinking-style reasoning from Ollama model outputs
- * Patterns to remove:
- * - Lines starting with bullet points (●, •, -, *)
- * - Explanatory reasoning before the actual response
- * - Multiple newlines used to separate thinking from response
+ * Strip <think>...</think> tags that some models (DeepSeek, Qwen) emit for chain-of-thought reasoning.
  */
-function stripThinkingBlocks(text) {
+function stripThinkTags(text) {
   if (typeof text !== "string") return text;
-
-  // Split into lines
-  const lines = text.split("\n");
-  const cleanedLines = [];
-  let inThinkingBlock = false;
-  let consecutiveEmptyLines = 0;
-
-  for (const line of lines) {
-    const trimmed = line.trim();
-
-    // Detect thinking block markers (bullet points followed by reasoning)
-    if (/^[●•\-\*]\s/.test(trimmed)) {
-      inThinkingBlock = true;
-      continue;
-    }
-
-    // Empty lines might separate thinking from response
-    if (trimmed === "") {
-      consecutiveEmptyLines++;
-      // If we've seen 2+ empty lines, likely end of thinking block
-      if (consecutiveEmptyLines >= 2) {
-        inThinkingBlock = false;
-      }
-      continue;
-    }
-
-    // Reset empty line counter
-    consecutiveEmptyLines = 0;
-
-    // Skip lines that are part of thinking block
-    if (inThinkingBlock) {
-      continue;
-    }
-
-    // Keep this line
-    cleanedLines.push(line);
-  }
-
-  return cleanedLines.join("\n").trim();
+  return text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
 }
 
 function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
@@ -733,18 +796,27 @@ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) {
 
   // Add text content if present, after stripping thinking blocks
   if (typeof rawContent === "string" && rawContent.trim()) {
-    const cleanedContent = stripThinkingBlocks(rawContent);
+    const cleanedContent = stripThinkTags(rawContent);
     if (cleanedContent) {
       contentItems.push({ type: "text", text: cleanedContent });
     }
   }
 
   // Add tool calls if present
+  // Always go through buildAnthropicResponseFromOllama for Ollama responses
+  // It handles both native tool_calls AND text extraction fallback
+  const { buildAnthropicResponseFromOllama } = require("../clients/ollama-utils");
   if (Array.isArray(toolCalls) && toolCalls.length > 0) {
-    const { buildAnthropicResponseFromOllama } = require("../clients/ollama-utils");
-    // Use the utility function for tool call conversion
     return buildAnthropicResponseFromOllama(ollamaResponse, requestedModel);
   }
+  // FALLBACK: Check for tool calls in text content even without native tool_calls
+  if (typeof rawContent === 'string' && rawContent.trim()) {
+    const fallbackResponse = buildAnthropicResponseFromOllama(ollamaResponse, requestedModel);
+    // Only use fallback response if it actually found tool calls
+    if (fallbackResponse.stop_reason === "tool_use") {
+      return fallbackResponse;
+    }
+  }
 
   if (contentItems.length === 0) {
     contentItems.push({ type: "text", text: "" });
@@ -843,7 +915,7 @@ function toAnthropicResponse(openai, requestedModel, wantsThinking) {
   };
 }
 
-function sanitizePayload(payload) {
+async function sanitizePayload(payload) {
   const clean = JSON.parse(JSON.stringify(payload ?? {}));
   const requestedModel =
     (typeof payload?.model === "string" && payload.model.trim().length > 0
@@ -920,12 +992,36 @@ function sanitizePayload(payload) {
         : "claude-opus-4-5";
     clean.model = azureDefaultModel;
   } else if (providerType === "ollama") {
+    // Override client model with Ollama config model
+    const ollamaConfiguredModel = config.ollama?.model;
+    clean.model = ollamaConfiguredModel;
+
     // Ollama format conversion
-    // Check if model supports tools
-    const { modelNameSupportsTools } = require("../clients/ollama-utils");
-    const modelSupportsTools = modelNameSupportsTools(config.ollama?.model);
+    // Check if tools should be enabled (native support OR tool execution provider configured)
+    const toolConfig = shouldEnableToolsForRequest(providerType, config);
 
-    if (!modelSupportsTools) {
+    logger.warn({
+      location: "sanitizePayload - ollama start",
+      toolsBeforeProcessing: Array.isArray(clean.tools) ? clean.tools.length : 'not array or null',
+      toolConfigShouldEnable: toolConfig.shouldEnableTools,
+      toolConfigReason: toolConfig.reason,
+      toolConfigLogOverride: toolConfig.logOverride,
+      toolExecutionProvider: config.toolExecutionProvider,
+      providerType
+    }, "[TOOL_FLOW_1] Ollama processing start");
+
+    // Log override if tools are enabled via TOOL_EXECUTION_PROVIDER
+    if (toolConfig.logOverride) {
+      logger.info({
+        conversationModel: config.ollama?.model,
+        conversationProvider: providerType,
+        toolExecutionProvider: config.toolExecutionProvider,
+        toolExecutionModel: config.toolExecutionModel || 'default',
+        reason: 'TOOL_EXECUTION_PROVIDER configured'
+      }, "Enabling tools despite conversation model not supporting tools - will route to tool execution provider");
+    }
+
+    if (!toolConfig.shouldEnableTools) {
       // Filter out tool_result content blocks for models without tool support
       clean.messages = clean.messages
         .map((msg) => {
@@ -989,9 +1085,8 @@ function sanitizePayload(payload) {
         }));
     delete clean.tool_choice;
   } else if (providerType === "ollama") {
-    // Check if model supports tools
-    const { modelNameSupportsTools } = require("../clients/ollama-utils");
-    const modelSupportsTools = modelNameSupportsTools(config.ollama?.model);
+    // Check if tools should be enabled (native support OR tool execution provider configured)
+    const toolConfig = shouldEnableToolsForRequest(providerType, config);
 
     // Check if this is a simple conversational message (no tools needed)
     const isConversational = (() => {
@@ -1025,8 +1120,15 @@ function sanitizePayload(payload) {
       }
 
       // Very short messages (< 20 chars) without code/technical keywords
-      if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create/.test(trimmed)) {
-        logger.debug({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - matched");
+      // BUT: Common shell commands should NOT be treated as conversational
+      const shellCommands = /^(pwd|ls|cd|cat|echo|grep|find|ps|top|df|du|whoami|which|env)[\s\.\!\?]*$/;
+      if (shellCommands.test(trimmed)) {
+        logger.info({ matched: "shell_command", trimmed }, "Ollama conversational check - SHELL COMMAND detected, keeping tools");
+        return false; // NOT conversational - needs tools!
+      }
+
+      if (trimmed.length < 20 && !/code|file|function|error|bug|fix|write|read|create|python|rust|javascript|typescript|java|csharp|go|cpp|c\+\+|kotlin|swift|php|ruby|lua|perl|scala|haskell|clojure|r|matlab|sql|bash|shell|powershell/.test(trimmed)) {
+        logger.warn({ matched: "short", trimmed, length: trimmed.length }, "Ollama conversational check - SHORT MESSAGE matched, DELETING TOOLS");
         return true;
       }
 
@@ -1034,15 +1136,50 @@ function sanitizePayload(payload) {
       return false;
     })();
 
+    logger.warn({
+      location: "sanitizePayload - before conversational check",
+      isConversational,
+      toolsPresent: Array.isArray(clean.tools) ? clean.tools.length : 'not array',
+      toolConfigShouldEnable: toolConfig.shouldEnableTools
+    }, "[TOOL_FLOW_2] Before conversational branch");
+
     if (isConversational) {
       // Strip all tools for simple conversational messages
-      delete clean.tools;
-      delete clean.tool_choice;
-      logger.debug({
-        model: config.ollama?.model,
-        message: "Removed tools for conversational message"
-      }, "Ollama conversational mode");
-    } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
+      // UNLESS tool execution provider is configured (tools will be routed there)
+      const toolExecutionProviderConfigured = hasDedicatedToolModel(providerType);
+
+      logger.warn({
+        location: "conversational branch",
+        toolExecutionProviderConfigured,
+        toolExecutionProvider: config.toolExecutionProvider,
+        providerType,
+        toolsBefore: Array.isArray(clean.tools) ? clean.tools.length : 'not array'
+      }, "[TOOL_FLOW_3] In conversational branch");
+
+      if (!toolExecutionProviderConfigured) {
+        const originalToolCount = Array.isArray(clean.tools) ? clean.tools.length : 0;
+        delete clean.tools;
+        delete clean.tool_choice;
+        clean._noToolInjection = true;
+        logger.warn({
+          model: config.ollama?.model,
+          message: "Removed tools for conversational message",
+          originalToolCount,
+          userMessage: clean.messages?.[clean.messages.length - 1]?.content?.substring(0, 50),
+        }, "Ollama conversational mode - ALL TOOLS DELETED!");
+      } else {
+        logger.warn({
+          model: config.ollama?.model,
+          toolExecutionProvider: config.toolExecutionProvider,
+          message: "Keeping tools despite conversational message - tool execution provider configured",
+          toolsAfter: Array.isArray(clean.tools) ? clean.tools.length : 'not array'
+        }, "[TOOL_FLOW_4] Ollama conversational mode - KEEPING tools for tool execution provider");
+      }
+    } else if (toolConfig.shouldEnableTools && Array.isArray(clean.tools) && clean.tools.length > 0) {
+      logger.warn({
+        location: "else if - tool limiting branch",
+        toolCount: clean.tools.length
+      }, "[TOOL_FLOW_5] In tool limiting branch");
       // Ollama performance degrades with too many tools
       // Limit to essential tools only
       const OLLAMA_ESSENTIAL_TOOLS = new Set([
@@ -1053,7 +1190,8 @@ function sanitizePayload(payload) {
         "Glob",
         "Grep",
         "WebSearch",
-        "WebFetch"
+        "WebFetch",
+        "shell",  // Tool is registered as "shell" internally
       ]);
 
       const limitedTools = clean.tools.filter(tool =>
@@ -1072,10 +1210,49 @@ function sanitizePayload(payload) {
         delete clean.tools;
       }
     } else {
-      // Remove tools for models without tool support
-      delete clean.tools;
-      delete clean.tool_choice;
+      logger.warn({
+        location: "else block - fallback",
+        toolsBefore: Array.isArray(clean.tools) ? clean.tools.length : 'not array',
+        toolConfigShouldEnable: toolConfig.shouldEnableTools
+      }, "[TOOL_FLOW_6] In else block - fallback case");
+
+      // Check if tool execution provider is configured
+      const toolExecutionProviderConfigured = hasDedicatedToolModel(providerType);
+
+      logger.warn({
+        location: "else block - provider check",
+        toolExecutionProviderConfigured,
+        toolExecutionProvider: config.toolExecutionProvider,
+        providerType
+      }, "[TOOL_FLOW_7] Else block - checking tool execution provider");
+
+      if (!toolExecutionProviderConfigured) {
+        // Remove tools only if no tool execution provider configured
+        logger.warn({
+          location: "else block - deleting tools",
+          toolsDeleted: true
+        }, "[TOOL_FLOW_8] DELETING TOOLS - no tool execution provider");
+        delete clean.tools;
+        delete clean.tool_choice;
+      } else {
+        // Keep tools field (even if empty) for tool execution provider
+        // The Ollama client will inject STANDARD_TOOLS later
+        logger.warn({
+          model: config.ollama?.model,
+          toolExecutionProvider: config.toolExecutionProvider,
+          message: "Keeping empty tools - will be injected by Ollama client or handled by tool execution provider",
+          toolsAfter: Array.isArray(clean.tools) ? clean.tools.length : 'not array'
+        }, "[TOOL_FLOW_9] Ollama tools preserved for tool execution provider");
+      }
     }
+
+    logger.warn({
+      location: "sanitizePayload - ollama end",
+      toolsAfterOllamaProcessing: Array.isArray(clean.tools) ? clean.tools.length : 'deleted or not array',
+      hasToolsProperty: 'tools' in clean,
+      toolsValue: clean.tools
+    }, "[TOOL_FLOW_10] Ollama processing complete - final tools state");
+
   } else if (providerType === "openrouter") {
     // OpenRouter supports tools - keep them as-is
     // Tools are already in Anthropic format and will be converted by openrouter-utils
@@ -1109,10 +1286,10 @@ function sanitizePayload(payload) {
     else delete clean.tool_choice;
   } else if (providerType === "ollama") {
     // Tool choice handling
-    const { modelNameSupportsTools } = require("../clients/ollama-utils");
-    const modelSupportsTools = modelNameSupportsTools(config.ollama?.model);
+    // Check if tools are enabled (to maintain consistency with tool handling above)
+    const toolConfig = shouldEnableToolsForRequest(providerType, config);
 
-    if (!modelSupportsTools) {
+    if (!toolConfig.shouldEnableTools) {
       delete clean.tool_choice;
     }
     // For tool-capable models, Ollama doesn't support tool_choice, so remove it
@@ -1121,26 +1298,74 @@ function sanitizePayload(payload) {
     delete clean.tool_choice;
   }
 
-  // Smart tool selection (universal, applies to all providers)
-  if (config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0) {
-    const classification = classifyRequestType(clean);
-    const selectedTools = selectToolsSmartly(clean.tools, classification, {
-      provider: providerType,
-      tokenBudget: config.smartToolSelection.tokenBudget,
-      config: config.smartToolSelection
-    });
-
-    // Only log if tools were actually filtered (avoid logging overhead)
-    if (selectedTools.length !== clean.tools.length) {
+  // (a) Server mode: override client-provided tools with server's STANDARD_TOOLS
+  if (config.toolExecutionMode === 'server' && Array.isArray(clean.tools) && clean.tools.length > 0) {
+    const { STANDARD_TOOLS } = require('../clients/standard-tools');
+    const clientNames = clean.tools.map(t => t.name).sort();
+    const serverNames = STANDARD_TOOLS.map(t => t.name).sort();
+    const isSubset = clientNames.length < serverNames.length
+      || clientNames.some(n => !serverNames.includes(n));
+    if (isSubset) {
       logger.info({
-        requestType: classification.type,
-        originalCount: clean.tools.length,
-        selectedCount: selectedTools.length,
-        provider: providerType
-      }, "Smart tool selection applied");
+        clientToolCount: clean.tools.length,
+        clientToolNames: clean.tools.map(t => t.name),
+        serverToolCount: STANDARD_TOOLS.length,
+      }, "Client tools overridden — TOOL_EXECUTION_MODE=server enforces STANDARD_TOOLS");
+      clean.tools = STANDARD_TOOLS;
     }
+  }
+
+  // Smart tool selection (universal, applies to all providers)
+  // Single-pass: classifies request type and filters tools accordingly
+  // Skip smart-selection if this is a retry after "Invoking tool(s):" text (keeps core tools)
+  if (config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0 && !clean._invokeTextRetry) {
+    // (b) Skip smart selection for cloud models in tool-capable whitelist
+    //     Cloud endpoints have large context windows — no need to trim tools
+    const { isCloudModel, modelNameSupportsTools } = require('../clients/ollama-utils');
+    const resolvedModel = clean.model || config.ollama?.model;
+    const skipSmartSelection = isCloudModel(resolvedModel) && modelNameSupportsTools(resolvedModel);
+
+    if (skipSmartSelection) {
+      logger.info({
+        model: resolvedModel,
+        toolCount: clean.tools.length,
+      }, "[TOOL_FLOW_SMART] Skipped — cloud model in tool-capable whitelist");
+    } else {
+      const classification = classifyRequestType(clean);
+      const selectedTools = selectToolsSmartly(clean.tools, classification, {
+        provider: providerType,
+        tokenBudget: config.smartToolSelection.tokenBudget,
+        config: config.smartToolSelection
+      });
+
+      const toolExecutionProviderConfigured = hasDedicatedToolModel(providerType);
+
+      if (selectedTools.length !== clean.tools.length) {
+        logger.warn({
+          requestType: classification.type,
+          confidence: classification.confidence,
+          originalCount: clean.tools.length,
+          selectedCount: selectedTools.length,
+          provider: providerType,
+          toolExecutionProviderConfigured
+        }, "[TOOL_FLOW_SMART] Smart tool selection applied");
+      }
 
-    clean.tools = selectedTools.length > 0 ? selectedTools : undefined;
+      // If tool execution provider configured and selection filtered to 0, keep tools anyway
+      if (toolExecutionProviderConfigured && selectedTools.length === 0) {
+        logger.warn({
+          requestType: classification.type,
+          originalCount: clean.tools.length,
+          toolExecutionProvider: config.toolExecutionProvider,
+          reason: "TOOL_EXECUTION_PROVIDER configured - overriding smart selection"
+        }, "[TOOL_FLOW_OVERRIDE] Keeping tools despite smart selection filtering to 0");
+      } else {
+        clean.tools = selectedTools.length > 0 ? selectedTools : undefined;
+        if (!selectedTools.length) {
+          clean._noToolInjection = true;
+        }
+      }
+    }
   }
 
   clean.stream = payload.stream ?? false;
@@ -1248,13 +1473,54 @@ function sanitizePayload(payload) {
     clean._suggestionModeModel = smConfig;
   }
 
+  // === Topic detection: tag request and override model if configured ===
+  if (clean._requestMode === "main") {
+    const { isTopicDetection: isTopic } = detectTopicDetection(clean);
+    if (isTopic) {
+      clean._requestMode = "topic";
+      const tdConfig = config.modelProvider?.topicDetectionModel ?? "default";
+      if (tdConfig.toLowerCase() !== "default") {
+        clean.model = tdConfig;
+        clean._topicDetectionModel = tdConfig;
+      }
+    }
+  }
+
+  logger.warn({
+    location: "sanitizePayload - FINAL RETURN",
+    providerType,
+    toolsFinal: Array.isArray(clean.tools) ? clean.tools.length : 'deleted or not array',
+    hasToolsProperty: 'tools' in clean,
+    toolsValue: clean.tools === undefined ? 'undefined' : (clean.tools === null ? 'null' : `array[${clean.tools.length}]`)
+  }, "[TOOL_FLOW_FINAL] sanitizePayload returning - FINAL TOOL STATE");
+
+  // Proactive tool-call nudge: always tell the model to call tools directly rather than describing intent
+  if (Array.isArray(clean.tools) && clean.tools.length > 0) {
+    const nudge = "Go ahead and use the tool calls if you want to. Do not describe what you are about to do — just call the tools directly.";
+    if (typeof clean.system === "string" && clean.system.length > 0) {
+      // azure-anthropic + ollama: system is a top-level string field
+      clean.system += "\n\n" + nudge;
+    } else if (typeof clean.system === "string") {
+      clean.system = nudge;
+    } else {
+      // OpenAI-style providers: system lives as a role="system" message in messages array
+      // (clean.system was deleted for these providers)
+      const sysMsg = clean.messages?.find(m => m.role === "system");
+      if (sysMsg && typeof sysMsg.content === "string") {
+        sysMsg.content += "\n\n" + nudge;
+      } else if (!sysMsg) {
+        clean.messages?.unshift({ role: "system", content: nudge });
+      }
+    }
+  }
+
   return clean;
 }
 
 const DEFAULT_LOOP_OPTIONS = {
   maxSteps: config.policy.maxStepsPerTurn ?? 6,
-  maxDurationMs: 120000,
-  maxToolCallsPerRequest: config.policy.maxToolCallsPerRequest ?? 20, // Prevent runaway tool calling
+  maxDurationMs: config.policy.maxDurationMs ?? 120000,
+  maxToolCallsPerRequest: config.policy.maxToolCallsPerRequest ?? 12,
 };
 
 function resolveLoopOptions(options = {}) {
@@ -1303,6 +1569,246 @@ function getToolCallSignature(toolCall) {
   return crypto.createHash('sha256').update(signature).digest('hex').substring(0, 16);
 }
 
+/**
+ * Check if a dedicated tool model is configured that differs from the conversation model.
+ * Returns true when tool calls should be routed to a different model, even if both
+ * use the same provider (e.g. Ollama chat llama3.1:8b + Ollama tools qwen3:32b).
+ *
+ * @param {string} providerType - Current conversation provider (ollama, openrouter, etc)
+ * @returns {boolean}
+ */
+function hasDedicatedToolModel(providerType) {
+  if (!config.toolExecutionProvider) return false;
+  if (config.toolExecutionProvider !== providerType) return true;
+  // Same provider — only route if a DIFFERENT model is specified
+  if (!config.toolExecutionModel) return false;
+  const conversationModel = providerType === 'ollama' ? config.ollama?.model
+    : providerType === 'openrouter' ? config.openrouter?.model
+    : null;
+  return config.toolExecutionModel !== conversationModel;
+}
+
+/**
+ * Determine if tools should be enabled for this request
+ * Tools are enabled if EITHER:
+ * 1. The conversation model natively supports tools, OR
+ * 2. A separate tool execution provider is configured (tools will be routed there)
+ *
+ * @param {string} providerType - Current provider (ollama, openrouter, etc)
+ * @param {object} config - Configuration object
+ * @returns {{ shouldEnableTools: boolean, reason: string, logOverride: boolean }}
+ */
+function shouldEnableToolsForRequest(providerType, config) {
+  // Check if model natively supports tools
+  let modelSupportsTools = true; // Default for most providers
+
+  if (providerType === 'ollama') {
+    const { modelNameSupportsTools } = require('../clients/ollama-utils');
+    modelSupportsTools = modelNameSupportsTools(config.ollama?.model);
+    // Also check if the dedicated tool model supports tools
+    if (!modelSupportsTools && config.toolExecutionModel) {
+      modelSupportsTools = modelNameSupportsTools(config.toolExecutionModel);
+    }
+  }
+
+  // Check if user configured separate tool execution provider
+  const toolExecutionProviderConfigured = hasDedicatedToolModel(providerType);
+
+  return {
+    shouldEnableTools: modelSupportsTools || toolExecutionProviderConfigured,
+    reason: modelSupportsTools
+      ? 'model_native_support'
+      : toolExecutionProviderConfigured
+        ? 'tool_execution_provider_override'
+        : 'not_supported',
+    logOverride: toolExecutionProviderConfigured && !modelSupportsTools
+  };
+}
+
+/**
+ * Extract tool calls from provider response
+ * Handles different provider formats (Anthropic, OpenAI, Ollama)
+ *
+ * @param {Object} response - The LLM response JSON
+ * @param {string} providerType - Provider type for format detection
+ * @returns {Array} Array of tool call objects
+ */
+function extractToolCallsFromResponse(response, providerType) {
+  if (!response) return [];
+
+  let toolCalls = [];
+
+  try {
+    // Anthropic format: { content: [{ type: "tool_use", ... }], stop_reason }
+    if (Array.isArray(response.content) && response.stop_reason !== undefined) {
+      toolCalls = response.content
+        .filter(block => block?.type === "tool_use")
+        .map(block => ({
+          id: block.id,
+          function: {
+            name: block.name,
+            arguments: JSON.stringify(block.input ?? {}),
+          },
+          _anthropic_block: block,
+          _source_provider: providerType,
+        }));
+    }
+    // Ollama format: { message: { tool_calls: [...] } }
+    else if (response.message?.tool_calls) {
+      toolCalls = Array.isArray(response.message.tool_calls)
+        ? response.message.tool_calls.map(tc => ({
+            ...tc,
+            _source_provider: providerType,
+          }))
+        : [];
+    }
+    // OpenAI format: { choices: [{ message: { tool_calls: [...] } }] }
+    else if (response.choices?.[0]?.message?.tool_calls) {
+      toolCalls = Array.isArray(response.choices[0].message.tool_calls)
+        ? response.choices[0].message.tool_calls.map(tc => ({
+            ...tc,
+            _source_provider: providerType,
+          }))
+        : [];
+    }
+  } catch (err) {
+    logger.warn({ error: err.message, providerType }, "Failed to extract tool calls from response");
+  }
+
+  return toolCalls;
+}
+
+/**
+ * Score a set of tool calls based on quality heuristics
+ * Higher score = better quality
+ */
+function scoreToolCalls(toolCalls) {
+  let score = 0;
+
+  for (const tc of toolCalls) {
+    // Base score for each tool call
+    score += 10;
+
+    // Bonus for having function name
+    if (tc.function?.name) {
+      score += 5;
+    }
+
+    // Bonus for having arguments
+    if (tc.function?.arguments) {
+      try {
+        const args = JSON.parse(tc.function.arguments);
+        const argCount = Object.keys(args).length;
+
+        // More arguments = more specific = better
+        score += argCount * 2;
+
+        // Bonus for non-empty string values
+        for (const value of Object.values(args)) {
+          if (typeof value === "string" && value.length > 0) {
+            score += 1;
+          }
+        }
+      } catch (e) {
+        // Invalid JSON arguments = penalty
+        score -= 5;
+      }
+    }
+  }
+
+  return score;
+}
+
+/**
+ * Compare tool calls from two providers and select the best
+ *
+ * @param {Array} conversationToolCalls - Tool calls from conversation provider
+ * @param {Array} toolProviderToolCalls - Tool calls from tool execution provider
+ * @param {Object} context - Context for logging
+ * @returns {Object} { toolCalls: Array, selectedProvider: string, reason: string }
+ */
+function compareAndSelectToolCalls(conversationToolCalls, toolProviderToolCalls, context) {
+  const { sessionId } = context;
+
+  // If only one provider returned tool calls, use that
+  if (toolProviderToolCalls.length === 0 && conversationToolCalls.length > 0) {
+    logger.info({ sessionId, count: conversationToolCalls.length },
+      "Tool execution provider returned no tools, using conversation provider");
+    return {
+      toolCalls: conversationToolCalls,
+      selectedProvider: 'conversation',
+      reason: 'tool_provider_empty'
+    };
+  }
+
+  if (conversationToolCalls.length === 0 && toolProviderToolCalls.length > 0) {
+    logger.info({ sessionId, count: toolProviderToolCalls.length },
+      "Conversation provider returned no tools, using tool execution provider");
+    return {
+      toolCalls: toolProviderToolCalls,
+      selectedProvider: 'tool_execution',
+      reason: 'conversation_provider_empty'
+    };
+  }
+
+  // If both returned nothing, return empty
+  if (conversationToolCalls.length === 0 && toolProviderToolCalls.length === 0) {
+    return {
+      toolCalls: [],
+      selectedProvider: 'none',
+      reason: 'both_empty'
+    };
+  }
+
+  // Both returned tool calls - compare them
+
+  logger.info({
+    sessionId,
+    conversationTools: conversationToolCalls.map(tc => ({
+      name: tc.function?.name,
+      argCount: Object.keys(JSON.parse(tc.function?.arguments || '{}')).length
+    })),
+    toolProviderTools: toolProviderToolCalls.map(tc => ({
+      name: tc.function?.name,
+      argCount: Object.keys(JSON.parse(tc.function?.arguments || '{}')).length
+    }))
+  }, "Comparing tool calls from both providers");
+
+  // Score each set
+  const conversationScore = scoreToolCalls(conversationToolCalls);
+  const toolProviderScore = scoreToolCalls(toolProviderToolCalls);
+
+  if (toolProviderScore >= conversationScore) {
+    logger.info({
+      sessionId,
+      toolProviderScore,
+      conversationScore,
+      selected: 'tool_execution'
+    }, "Selected tool execution provider (higher or equal score)");
+
+    return {
+      toolCalls: toolProviderToolCalls,
+      selectedProvider: 'tool_execution',
+      reason: 'higher_score',
+      scores: { tool_execution: toolProviderScore, conversation: conversationScore }
+    };
+  } else {
+    logger.info({
+      sessionId,
+      toolProviderScore,
+      conversationScore,
+      selected: 'conversation'
+    }, "Selected conversation provider (higher score)");
+
+    return {
+      toolCalls: conversationToolCalls,
+      selectedProvider: 'conversation',
+      reason: 'higher_score',
+      scores: { tool_execution: toolProviderScore, conversation: conversationScore }
+    };
+  }
+}
+
 function buildNonJsonResponse(databricksResponse) {
   return {
     status: databricksResponse.status,
@@ -1333,6 +1839,91 @@ function buildErrorResponse(databricksResponse) {
   };
 }
 
+/**
+ * Attempt to generate synthetic tool calls based on "Let me [action]..." pattern
+ * Approach 2: Context-aware tool generation
+ * @returns {Array|null} Generated tool calls or null if not possible
+ */
+function attemptGenerateToolCallsFromAction(action, fullText, payload) {
+  const toolCalls = [];
+
+  // Extract common patterns from the text
+  const filePathMatch = fullText.match(/(?:file|path|location):\s*([^\n,\.]+)/i);
+  const filePath = filePathMatch ? filePathMatch[1].trim() : null;
+
+  switch (action) {
+    case 'read':
+    case 'check':
+    case 'view':
+      // Generate Read tool call
+      if (filePath) {
+        toolCalls.push({
+          id: `call_letme_read_${Date.now()}`,
+          function: {
+            name: 'Read',
+            arguments: { file_path: filePath }
+          }
+        });
+      }
+      break;
+
+    case 'verify':
+      // Generate verification tool calls (Read or Grep)
+      if (filePath) {
+        toolCalls.push({
+          id: `call_letme_verify_${Date.now()}`,
+          function: {
+            name: 'Read',
+            arguments: { file_path: filePath }
+          }
+        });
+      }
+      break;
+
+    case 'run':
+    case 'execute':
+      // Generate Bash tool call for running tests/commands
+      if (fullText.includes('test')) {
+        toolCalls.push({
+          id: `call_letme_run_${Date.now()}`,
+          function: {
+            name: 'Bash',
+            arguments: { command: 'npm run test:unit 2>&1 | tail -20', description: 'Run unit tests' }
+          }
+        });
+      }
+      break;
+
+    case 'search':
+    case 'find':
+    case 'grep':
+      // Generate Grep/search tool call
+      const searchTermMatch = fullText.match(/(?:for|search|find)\s+["\']?([^"\'\.]+)["\']?/i);
+      if (searchTermMatch) {
+        toolCalls.push({
+          id: `call_letme_search_${Date.now()}`,
+          function: {
+            name: 'Grep',
+            arguments: { pattern: searchTermMatch[1], path: 'src', output_mode: 'files_with_matches' }
+          }
+        });
+      }
+      break;
+
+    case 'edit':
+    case 'update':
+    case 'modify':
+      // For edits, we can't generate without more context
+      // Return null to fallback to retry
+      return null;
+
+    default:
+      return null;
+  }
+
+  return toolCalls.length > 0 ? toolCalls : null;
+}
+
 async function runAgentLoop({
   cleanPayload,
   requestedModel,
@@ -1344,9 +1935,12 @@ async function runAgentLoop({
   providerType,
   headers,
 }) {
-  console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length);
+  console.log('[DEBUG] runAgentLoop ENTERED - providerType:', providerType, 'messages:', cleanPayload.messages?.length, 'mode:', cleanPayload._requestMode || 'main', 'model:', cleanPayload.model);
   logger.info({ providerType, messageCount: cleanPayload.messages?.length }, 'runAgentLoop ENTERED');
   const settings = resolveLoopOptions(options);
+  // Detect context window size for intelligent compression
+  const contextWindowTokens = await getContextWindow();
+  console.log('[DEBUG] Context window detected:', contextWindowTokens, 'tokens for provider:', providerType);
   // Initialize audit logger (no-op if disabled)
   const auditLogger = createAuditLogger(config.audit);
   const start = Date.now();
@@ -1354,8 +1948,43 @@ async function runAgentLoop({
   let toolCallsExecuted = 0;
   let fallbackPerformed = false;
   const toolCallNames = new Map();
-  const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> count
+  const toolCallHistory = new Map(); // Track tool calls to detect loops: signature -> counta
   let loopWarningInjected = false; // Track if we've already warned about loops
+  let emptyResponseRetried = false; // Track if we've retried after an empty LLM response
+  let invokeTextRetries = 0;        // How many times we've retried after "Invoking tool(s):" text
+  const MAX_INVOKE_TEXT_RETRIES = 3; // GLM-4.7 may need multiple nudges before producing tool_calls
+  let autoSpawnAttempts = 0;          // How many times we've auto-spawned a subagent for "Invoking tool(s):" text
+  const MAX_AUTO_SPAWN_ATTEMPTS = 2;  // Cap auto-spawn attempts to prevent infinite loops
+  let classifierRetries = 0;          // How many times we've retried after classifier detects intent-narration
+  const MAX_CLASSIFIER_RETRIES = 2;   // Max retries via LLM classifier for intent-narration detection
+
+  // Log agent loop start
+  logger.info(
+    {
+      sessionId: session?.id ?? null,
+      model: requestedModel,
+      maxSteps: settings.maxSteps,
+      maxDurationMs: settings.maxDurationMs,
+      wantsThinking,
+      providerType,
+    },
+    "Agent loop started",
+  );
+
+  // Emit agent loop started event for external progress listeners
+  const progress = getProgressEmitter();
+
+  // Generate unique agent ID for this agent loop execution
+  const agentId = generateAgentId();
+
+  progress.agentLoopStarted({
+    sessionId: session?.id ?? null,
+    agentId,
+    model: requestedModel,
+    maxSteps: settings.maxSteps,
+    maxDurationMs: settings.maxDurationMs,
+    providerType,
+  });
 
   while (steps < settings.maxSteps) {
     if (Date.now() - start > settings.maxDurationMs) {
@@ -1392,7 +2021,6 @@ async function runAgentLoop({
     }
 
     steps += 1;
-    console.log('[LOOP DEBUG] Entered while loop - step:', steps);
     logger.debug(
       {
         sessionId: session?.id ?? null,
@@ -1402,6 +2030,14 @@ async function runAgentLoop({
       "Agent loop step",
     );
 
+    // Emit agent loop step started event
+    progress.agentLoopStepStarted({
+      sessionId: session?.id ?? null,
+      agentId,
+      step: steps,
+      maxSteps: settings.maxSteps,
+    });
+
     // Debug: Log payload before sending to Azure
     if (providerType === "azure-anthropic") {
       logger.debug(
@@ -1423,7 +2059,8 @@ async function runAgentLoop({
           cleanPayload.messages = historyCompression.compressHistory(originalMessages, {
             keepRecentTurns: config.historyCompression?.keepRecentTurns ?? 10,
             summarizeOlder: config.historyCompression?.summarizeOlder ?? true,
-            enabled: true
+            enabled: true,
+            contextWindowTokens,
           });
 
           if (cleanPayload.messages !== originalMessages) {
@@ -1708,22 +2345,209 @@ IMPORTANT TOOL USAGE RULES:
     });
   }
 
+  // Check if tools are in the request and determine provider routing
+  const hasTools = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
+
+  // Check if last message is a tool result
+  const lastMessage = cleanPayload.messages?.[cleanPayload.messages.length - 1];
+  const hasToolResults = lastMessage?.role === 'tool';
+
+  let shouldUseToolProvider = false;
+  let providerForThisCall = providerType;
+
+  // DEBUG: Log the condition check
+  logger.info({
+    sessionId: session?.id ?? null,
+    hasTools,
+    toolCount: cleanPayload.tools?.length || 0,
+    hasToolResults,
+    lastMessageRole: lastMessage?.role,
+    configToolExecutionProvider: config.toolExecutionProvider,
+    providerType,
+    willTrigger: hasTools && !hasToolResults && hasDedicatedToolModel(providerType)
+  }, "Tool execution provider condition check");
+
+  // Only use tool execution provider if:
+  // 1. We have tools available
+  // 2. We DON'T have tool results (not processing results from a previous call)
+  // 3. A dedicated tool model is configured (different provider OR same provider with different model)
+  if (hasTools && !hasToolResults && hasDedicatedToolModel(providerType)) {
+    shouldUseToolProvider = true;
+    providerForThisCall = config.toolExecutionProvider;
+
+    logger.info({
+      sessionId: session?.id ?? null,
+      conversationProvider: providerType,
+      toolProvider: config.toolExecutionProvider,
+      toolModel: config.toolExecutionModel || 'default',
+      toolCount: cleanPayload.tools.length,
+      compareMode: config.toolExecutionCompareMode
+    }, "Using tool execution provider for tool calling decision");
+  } else if (hasToolResults) {
+    // When a dedicated tool model handles tool calling, strip tools from the
+    // result-processing payload. The conversation model only needs to summarize
+    // the result — if it wants to make more tool calls, the next iteration will
+    // route back to the tool model anyway.
+    if (hasDedicatedToolModel(providerType) && hasTools) {
+      const strippedCount = cleanPayload.tools?.length || 0;
+      delete cleanPayload.tools;
+      delete cleanPayload.tool_choice;
+      logger.info({
+        sessionId: session?.id ?? null,
+        provider: providerType,
+        conversationModel: providerType === 'ollama' ? config.ollama?.model : config.openrouter?.model,
+        strippedToolCount: strippedCount
+      }, "Stripped tools from tool-results call - dedicated tool model handles tool decisions");
+    }
+    logger.info({
+      sessionId: session?.id ?? null,
+      provider: providerType
+    }, "Processing tool results - using conversation provider");
+  }
+
   let databricksResponse;
+  let conversationResponse = null;
+
+  // Emit model invocation started event
+  const modelInvocationStartTime = Date.now();
+
+  // Determine the actual model being invoked (not the CLI-side model)
+  const effectiveModel = providerType === 'ollama'
+    ? (config.ollama?.model || requestedModel)
+    : requestedModel;
+
+  progress.modelInvocationStarted({
+    sessionId: session?.id ?? null,
+    agentId,
+    step: steps,
+    model: effectiveModel,
+    providerType,
+    estimatedTokens: cleanPayload._estimatedTokens,
+  });
+
   try {
-    databricksResponse = await invokeModel(cleanPayload);
+    if (shouldUseToolProvider) {
+      // Build request for tool execution provider
+      const toolExecutionPayload = {
+        ...cleanPayload,
+        model: config.toolExecutionModel || cleanPayload.model,
+        _requestMode: 'tool_execution',
+      };
+
+      try {
+        // Call tool execution provider
+        databricksResponse = await invokeModel(toolExecutionPayload, {
+          forceProvider: config.toolExecutionProvider,
+          callPurpose: 'tool_execution'
+        });
+
+        // If compare mode enabled, also call conversation provider
+        if (config.toolExecutionCompareMode) {
+          logger.info({ sessionId: session?.id ?? null },
+            "Compare mode enabled - calling conversation provider too");
+
+          try {
+            conversationResponse = await invokeModel(cleanPayload, {
+              forceProvider: providerType,
+              callPurpose: 'conversation'
+            });
+          } catch (convErr) {
+            logger.warn({ error: convErr.message },
+              "Conversation provider call failed in compare mode");
+          }
+        }
+      } catch (toolProviderError) {
+        logger.error({
+          error: toolProviderError.message,
+          toolProvider: config.toolExecutionProvider
+        }, "Tool execution provider failed, falling back to conversation provider");
+
+        // Fallback to conversation provider
+        databricksResponse = await invokeModel(cleanPayload, {
+          forceProvider: providerType,
+          callPurpose: 'conversation'
+        });
+      }
+    } else {
+      // Normal flow - use conversation provider
+      databricksResponse = await invokeModel(cleanPayload);
+    }
   } catch (modelError) {
+    // Check for Ollama-specific model errors first
+    if (providerType === 'ollama' && modelError.message) {
+      const errorMsg = modelError.message.toLowerCase();
+
+      // Model not loaded or not found
+      if (errorMsg.includes('model') && (errorMsg.includes('not found') || errorMsg.includes('not loaded') || errorMsg.includes('unavailable'))) {
+        logger.error({
+          provider: providerType,
+          model: config.ollama?.model,
+          error: modelError.message
+        }, "Ollama model unavailable");
+
+        return {
+          response: {
+            status: 503,
+            body: {
+              error: {
+                type: "model_unavailable",
+                message: modelError.message,
+              },
+            },
+            terminationReason: "model_unavailable",
+          },
+          steps,
+          durationMs: Date.now() - start,
+          terminationReason: "model_unavailable",
+        };
+      }
+
+      // Check if Ollama service is unreachable (specific check for Ollama)
+      if (errorMsg.includes('unreachable') || errorMsg.includes('is it running')) {
+        logger.error({
+          provider: providerType,
+          endpoint: config.ollama?.endpoint ?? config.ollama?.cloudEndpoint,
+          error: modelError.message
+        }, "Ollama service unreachable");
+
+        return {
+          response: {
+            status: 503,
+            body: {
+              error: {
+                type: "provider_unreachable",
+                message: modelError.message,
+              },
+            },
+            terminationReason: "provider_unreachable",
+          },
+          steps,
+          durationMs: Date.now() - start,
+          terminationReason: "provider_unreachable",
+        };
+      }
+    }
+
+    // Generic connection error check (for all providers)
     const isConnectionError = modelError.cause?.code === 'ECONNREFUSED'
       || modelError.message?.includes('fetch failed')
       || modelError.code === 'ECONNREFUSED';
+
     if (isConnectionError) {
-      logger.error(`Provider ${providerType} is unreachable (connection refused). Is it running?`);
+      const endpoint = config[providerType]?.endpoint || config[providerType]?.url || 'unknown';
+      logger.error({
+        provider: providerType,
+        endpoint,
+        error: modelError.message
+      }, `Provider ${providerType} connection refused`);
+
       return {
         response: {
           status: 503,
           body: {
             error: {
               type: "provider_unreachable",
-              message: `Provider ${providerType} is unreachable. Is the service running?`,
+              message: `Provider ${providerType} is unreachable at ${endpoint}. Is the service running?`,
             },
           },
           terminationReason: "provider_unreachable",
@@ -1733,6 +2557,7 @@ IMPORTANT TOOL USAGE RULES:
         terminationReason: "provider_unreachable",
       };
     }
+
     throw modelError;
   }
 
@@ -1750,6 +2575,19 @@ IMPORTANT TOOL USAGE RULES:
     }
   }
 
+  // Emit model invocation completed event
+  const modelInvocationDurationMs = Date.now() - modelInvocationStartTime;
+  progress.modelInvocationCompleted({
+    sessionId: session?.id ?? null,
+    agentId,
+    step: steps,
+    model: requestedModel,
+    providerType,
+    inputTokens: actualUsage?.input_tokens ?? actualUsage?.prompt_tokens ?? null,
+    outputTokens: actualUsage?.output_tokens ?? actualUsage?.completion_tokens ?? null,
+    durationMs: modelInvocationDurationMs,
+  });
+
   // Log LLM response after invocation
   if (auditLogger.enabled) {
     const latencyMs = Date.now() - start;
@@ -1801,6 +2639,15 @@ IMPORTANT TOOL USAGE RULES:
       });
     }
   }
+    logger.info({
+      messageContent: databricksResponse.json?.message?.content
+        ? (typeof databricksResponse.json.message.content === 'string'
+          ? databricksResponse.json.message.content.substring(0, 500)
+          : JSON.stringify(databricksResponse.json.message.content).substring(0, 500))
+        : 'NO_CONTENT',
+      hasToolCalls: !!databricksResponse.json?.message?.tool_calls,
+      toolCallCount: databricksResponse.json?.message?.tool_calls?.length || 0
+    }, "=== RAW LLM RESPONSE CONTENT ===");
 
     // Handle streaming responses (pass through without buffering)
     if (databricksResponse.stream) {
@@ -1900,26 +2747,138 @@ IMPORTANT TOOL USAGE RULES:
           _anthropic_block: block,
         }));
 
-      logger.debug(
-        {
-          sessionId: session?.id ?? null,
-          contentBlocks: contentArray.length,
-          toolCallsFound: toolCalls.length,
-          stopReason: databricksResponse.json?.stop_reason,
-        },
-        "Azure Anthropic response parsed",
-      );
+      logger.info(
+        {
+          sessionId: session?.id ?? null,
+          step: steps,
+          contentBlocks: contentArray.length,
+          toolCallsFound: toolCalls.length,
+          toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+          stopReason: databricksResponse.json?.stop_reason,
+        },
+        "Azure Anthropic response parsed",
+      );
+    } else if (providerType === "ollama") {
+      // Ollama format: { message: { role, content, tool_calls }, done }
+      message = databricksResponse.json?.message ?? {};
+      toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+      // FALLBACK: If no native tool_calls but text contains tool patterns,
+      // extract them using per-model parser (model responded with text instead of tool call format)
+      if (toolCalls.length === 0 && message.content && typeof message.content === 'string') {
+        const { getParserForModel } = require("../parsers");
+        const modelName = config.ollama?.model;
+        const parser = getParserForModel(modelName);
+        const extracted = parser.extractToolCallsFromText(message.content);
+        if (extracted && extracted.length > 0) {
+          logger.info({
+            extractedCount: extracted.length,
+            toolNames: extracted.map(tc => tc.function?.name),
+            model: modelName,
+            parser: parser.constructor.name,
+            originalText: message.content.substring(0, 200),
+          }, "[TOOL_EXTRACTION_FALLBACK] Extracted tool calls from Ollama text response (via parser)");
+          toolCalls = extracted;
+          // Clear text content to prevent double display (command text + tool result)
+          message.content = "";
+        }
+      }
+
+      logger.info({
+        hasMessage: !!databricksResponse.json?.message,
+        hasToolCalls: toolCalls.length > 0,
+        toolCallCount: toolCalls.length,
+        toolNames: toolCalls.map(tc => tc.function?.name),
+        done: databricksResponse.json?.done,
+        fullToolCalls: JSON.stringify(toolCalls),
+        fullResponseMessage: JSON.stringify(databricksResponse.json?.message)
+      }, "=== OLLAMA TOOL CALLS EXTRACTION ===");
+
+      // Deduplicate tool calls for Ollama format
+      if (toolCalls.length > 0) {
+        const uniqueToolCalls = [];
+        const seenSignatures = new Set();
+        let duplicatesRemoved = 0;
+
+        for (const call of toolCalls) {
+          const signature = getToolCallSignature(call);
+          if (!seenSignatures.has(signature)) {
+            seenSignatures.add(signature);
+            uniqueToolCalls.push(call);
+          } else {
+            duplicatesRemoved++;
+            logger.warn({
+              sessionId: session?.id ?? null,
+              toolName: call.function?.name || call.name,
+              toolId: call.id,
+              signature: signature.substring(0, 32),
+            }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+          }
+        }
+
+        toolCalls = uniqueToolCalls;
+
+        logger.info(
+          {
+            sessionId: session?.id ?? null,
+            step: steps,
+            toolCallsFound: toolCalls.length,
+            duplicatesRemoved,
+            toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+          },
+          "LLM Response: Tool calls requested (after deduplication)",
+        );
+      }
     } else {
       // OpenAI/Databricks format: { choices: [{ message: { tool_calls: [...] } }] }
       const choice = databricksResponse.json?.choices?.[0];
       message = choice?.message ?? {};
       toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
+
+      // Deduplicate tool calls for OpenAI format too
+      if (toolCalls.length > 0) {
+        const uniqueToolCalls = [];
+        const seenSignatures = new Set();
+        let duplicatesRemoved = 0;
+
+        for (const call of toolCalls) {
+          const signature = getToolCallSignature(call);
+          if (!seenSignatures.has(signature)) {
+            seenSignatures.add(signature);
+            uniqueToolCalls.push(call);
+          } else {
+            duplicatesRemoved++;
+            logger.warn({
+              sessionId: session?.id ?? null,
+              toolName: call.function?.name || call.name,
+              toolId: call.id,
+              signature: signature.substring(0, 32),
+            }, "Duplicate tool call removed (same tool with identical parameters in single response)");
+          }
+        }
+
+        toolCalls = uniqueToolCalls;
+
+        logger.info(
+          {
+            sessionId: session?.id ?? null,
+            step: steps,
+            toolCallsFound: toolCalls.length,
+            duplicatesRemoved,
+            toolNames: toolCalls.map(tc => tc.function?.name || tc.name),
+          },
+          "LLM Response: Tool calls requested (after deduplication)",
+        );
+      }
     }
 
     // Guard: drop hallucinated tool calls when no tools were sent to the model.
     // Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation
     // history even when the request contained zero tool definitions.
-    const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
+    // For Ollama, the client injects STANDARD_TOOLS independently of cleanPayload.tools,
+    // so only treat tool calls as hallucinated if _noToolInjection was explicitly set.
+    const ollamaToolsInjected = providerType === 'ollama' && !cleanPayload._noToolInjection;
+    const toolsWereSent = (Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0) || ollamaToolsInjected;
     if (toolCalls.length > 0 && !toolsWereSent) {
       logger.warn({
         sessionId: session?.id ?? null,
@@ -1931,6 +2890,368 @@ IMPORTANT TOOL USAGE RULES:
       // If there's also no text content, treat as empty response (handled below)
     }
 
+    // If compare mode is enabled and we have both responses, compare tool calls
+    let toolCallComparison = null;
+    if (config.toolExecutionCompareMode && conversationResponse?.json && shouldUseToolProvider) {
+      const conversationToolCalls = extractToolCallsFromResponse(
+        conversationResponse.json,
+        providerType
+      );
+
+      if (conversationToolCalls.length > 0 || toolCalls.length > 0) {
+        const comparison = compareAndSelectToolCalls(
+          conversationToolCalls,
+          toolCalls,
+          { sessionId: session?.id ?? null }
+        );
+
+        // Use selected tool calls
+        toolCalls = comparison.toolCalls;
+        toolCallComparison = comparison;
+
+        // Log comparison result
+        logger.info({
+          sessionId: session?.id ?? null,
+          selectedProvider: comparison.selectedProvider,
+          reason: comparison.reason,
+          scores: comparison.scores
+        }, "Tool call comparison complete");
+      }
+    }
+
+    // === EMPTY RESPONSE DETECTION (primary) ===
+    // Check raw extracted message for empty content before tool handling or conversion
+    const rawTextContent = (() => {
+      if (typeof message.content === 'string') return message.content.trim();
+      if (Array.isArray(message.content)) {
+        return message.content
+          .filter(b => b.type === 'text')
+          .map(b => b.text || '')
+          .join('')
+          .trim();
+      }
+      return '';
+    })();
+
+    if (toolCalls.length === 0 && !rawTextContent) {
+      console.log('[EMPTY RESPONSE] No text content and no tool calls - step:', steps, 'retried:', emptyResponseRetried);
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        messageKeys: Object.keys(message),
+        contentType: typeof message.content,
+        rawContentPreview: String(message.content || '').substring(0, 100),
+      }, "Empty LLM response detected (no text, no tool calls)");
+
+      // Retry once with a nudge
+      if (steps < settings.maxSteps && !emptyResponseRetried) {
+        emptyResponseRetried = true;
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: "",
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: "Please provide a response to the user's message.",
+        });
+        logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge");
+        continue;
+      }
+
+      // Fallback after retry also returned empty
+      logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry");
+      return {
+        response: {
+          status: 200,
+          body: {
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            model: requestedModel,
+            content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }],
+            stop_reason: "end_turn",
+            usage: { input_tokens: 0, output_tokens: 0 },
+          },
+          terminationReason: "empty_response_fallback",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "empty_response_fallback",
+      };
+    }
+
+    // === "Invoking tool(s):" TEXT DETECTION ===
+    // Some models (GLM-4.7, etc.) respond with "Invoking tool(s): Read, Read, Read" as TEXT
+    // instead of actual tool_calls. Always detect and log this pattern — even when tool_calls
+    // ARE present — so developers can diagnose tool dispatch issues across execution modes.
+    // GLM-4.7 also leaks XML/think tags into the content (e.g. "Grep</arg_value>", "Glob</think>").
+    const invokingToolMatch = rawTextContent &&
+      /Invoking tool\(s\):\s*(.+)/im.exec(rawTextContent.trim());
+    // Extract mentioned tools from "Invoking tool(s):" text (hoisted for use by auto-spawn below)
+    let mentionedToolsRaw = [];
+    if (invokingToolMatch) {
+      // Clean garbled XML/think tags from tool names (GLM-4.7 leaks </arg_value>, </think>, etc.)
+      mentionedToolsRaw = invokingToolMatch[1]
+        .replace(/<\/?\w+[^>]*>/g, '')  // strip any XML/HTML tags
+        .split(',')
+        .map(t => t.trim())
+        .filter(Boolean);
+      const executionModeCurrent = config.toolExecutionMode || "server";
+      const toolsStrippedBySmartSelection = !!cleanPayload._noToolInjection;
+      const toolsInPayload = Array.isArray(cleanPayload.tools) ? cleanPayload.tools.length : 0;
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        mentionedTools: mentionedToolsRaw,   // Full list, no dedup (e.g. ["Read", "Read", "Read"])
+        mentionedToolCount: mentionedToolsRaw.length,
+        actualToolCallCount: toolCalls.length,
+        hasActualToolCalls: toolCalls.length > 0,
+        executionMode: executionModeCurrent,  // "server", "client", or "passthrough"
+        toolsStrippedBySmartSelection,        // true = smart-selection removed tools from request
+        toolsInPayload,                       // how many tool defs are currently in the payload
+        invokeTextRetries,                    // how many retries we've done so far
+        model: requestedModel,
+        rawText: rawTextContent.substring(0, 300),
+      }, `Model output 'Invoking tool(s):' as text — actualToolCalls=${toolCalls.length}, mode=${executionModeCurrent}, toolsStripped=${toolsStrippedBySmartSelection}, retry=${invokeTextRetries}/${MAX_INVOKE_TEXT_RETRIES}`);
+    }
+
+    // Handle "Invoking tool(s):" text with NO actual tool_calls:
+    // 1. Try auto-spawning a subagent to fulfil the model's intent
+    // 2. Fall back to nudge-retry if subagent is disabled or fails
+    if (invokingToolMatch && toolCalls.length === 0 && steps < settings.maxSteps) {
+
+      // --- Auto-spawn subagent ---
+      if (config.agents?.enabled && config.agents?.autoSpawn !== false && autoSpawnAttempts < MAX_AUTO_SPAWN_ATTEMPTS) {
+        autoSpawnAttempts++;
+        const uniqueMentionedTools = [...new Set(mentionedToolsRaw)];
+        const agentType = mapToolsToAgentType(uniqueMentionedTools);
+        const userText = extractLastUserText(cleanPayload.messages);
+        const prompt = buildSubagentPrompt(userText, rawTextContent, uniqueMentionedTools);
+
+        logger.info({
+          sessionId: session?.id ?? null,
+          step: steps,
+          agentType,
+          mentionedTools: mentionedToolsRaw,
+          autoSpawnAttempt: autoSpawnAttempts,
+        }, `Auto-spawning ${agentType} subagent for 'Invoking tool(s):' text (attempt ${autoSpawnAttempts}/${MAX_AUTO_SPAWN_ATTEMPTS})`);
+
+        try {
+          const result = await spawnAgent(agentType, prompt, { sessionId: session?.id ?? null, mainContext: cleanPayload.messages });
+          if (result.success) {
+            // Inject model's text as assistant msg + subagent result as user msg
+            cleanPayload.messages.push({ role: "assistant", content: rawTextContent });
+            cleanPayload.messages.push({
+              role: "user",
+              content: `[Subagent ${agentType} completed]\n${result.result}`,
+            });
+            logger.info({
+              sessionId: session?.id ?? null,
+              step: steps,
+              agentType,
+              resultLength: result.result?.length ?? 0,
+            }, "Subagent completed successfully — injecting result into conversation");
+            continue; // Re-enter loop so the model can synthesize the subagent output
+          }
+          logger.warn({ sessionId: session?.id ?? null, step: steps, error: result.error }, "Subagent returned failure — falling through to nudge");
+        } catch (err) {
+          logger.warn({ sessionId: session?.id ?? null, step: steps, error: err.message }, "Subagent spawn failed — falling through to nudge");
+        }
+      }
+
+      // --- Nudge-retry fallback ---
+      if (invokeTextRetries < MAX_INVOKE_TEXT_RETRIES) {
+        invokeTextRetries++;
+
+        // === LONG-TERM ROBUSTNESS: Always keep core tools ===
+        // Set flag to prevent smart-selection from stripping core tools on retry
+        // Core tools (Read, Write, Edit, Bash, Grep, Glob) are essential for the agent to function
+        // and should never be filtered out regardless of request classification.
+        cleanPayload._invokeTextRetry = true;
+
+        // Smart-selection may have stripped tools from this request (classified as "conversation").
+        // The model clearly WANTS to use tools, so restore them for the retry.
+        if (cleanPayload._noToolInjection || !Array.isArray(cleanPayload.tools) || cleanPayload.tools.length === 0) {
+          const { STANDARD_TOOLS } = require('../clients/standard-tools');
+          cleanPayload.tools = STANDARD_TOOLS;
+          delete cleanPayload._noToolInjection;
+          logger.info({
+            sessionId: session?.id ?? null,
+            step: steps,
+            restoredToolCount: STANDARD_TOOLS.length,
+          }, "Restored STANDARD_TOOLS for 'Invoking tool(s):' retry — smart-selection had stripped them");
+        }
+
+        // Feed the model's text back and tell it to use actual tool calls
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: rawTextContent,
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: `You responded with tool invocation text instead of using actual tool calls (attempt ${invokeTextRetries}/${MAX_INVOKE_TEXT_RETRIES}). `
+            + "Please use the tool_call format, not text. Call the tools now with the correct parameters.",
+        });
+        continue;
+      }
+    }
+
+    // LLM-classifier route: ask the same model if this text indicates suppressed tool-call intent
+    if (
+      toolCalls.length === 0 &&
+      rawTextContent &&
+      classifierRetries < MAX_CLASSIFIER_RETRIES
+    ) {
+      try {
+        const classifierPrompt =
+          `You are a classifier. Answer only YES or NO.\n\n` +
+          `Does the following model response indicate the model INTENDS to call a tool ` +
+          `(e.g. "Let me read...", "I'll create...", "Now let me run...", "I need to check...") ` +
+          `but did NOT actually emit a tool call?\n\n` +
+          `Model response:\n"""\n${rawTextContent.slice(0, 500)}\n"""\n\n` +
+          `Answer YES if narrating tool intent. Answer NO if it is a complete, informational, or conversational response.`;
+
+        const classifierResponse = await invokeModel(
+          {
+            model:       cleanPayload.model,
+            messages:    [{ role: 'user', content: classifierPrompt }],
+            max_tokens:  10,
+            temperature: 0,
+          },
+          { forceProvider: providerType, callPurpose: 'classifier' }
+        );
+
+        const classifierText = (
+          classifierResponse.json?.message?.content ??
+          classifierResponse.json?.choices?.[0]?.message?.content ??
+          ''
+        ).trim().toUpperCase();
+
+        logger.info({
+          sessionId: session?.id ?? null,
+          step: steps,
+          classifierModel: config.classifierModel,
+          classifierAnswer: classifierText,
+          rawTextPreview: rawTextContent.slice(0, 100),
+          classifierRetries,
+        }, `[CLASSIFIER] Intent-narration check: ${classifierText}`);
+
+        if (classifierText.startsWith('YES')) {
+          classifierRetries++;
+          cleanPayload._invokeTextRetry = true;
+
+          // Restore tools if smart-selection stripped them
+          if (cleanPayload._noToolInjection || !Array.isArray(cleanPayload.tools) || cleanPayload.tools.length === 0) {
+            const { STANDARD_TOOLS } = require('../clients/standard-tools');
+            cleanPayload.tools = STANDARD_TOOLS;
+            delete cleanPayload._noToolInjection;
+            logger.info(
+              { restoredToolCount: STANDARD_TOOLS.length },
+              '[CLASSIFIER] Restored STANDARD_TOOLS for classifier retry'
+            );
+          }
+
+          cleanPayload.messages.push({ role: 'assistant', content: rawTextContent });
+          cleanPayload.messages.push({
+            role: 'user',
+            content: `Please stop narrating what you are about to do and just call the tools directly ` +
+                     `(classifier retry ${classifierRetries}/${MAX_CLASSIFIER_RETRIES}).`,
+          });
+
+          logger.info({
+            sessionId: session?.id ?? null,
+            step: steps,
+            variant: 'CLASSIFIER_RETRY',
+            retryCount: classifierRetries,
+          }, '[LET-ME] Executing: Classifier Retry (YES detected)');
+
+          continue;
+        }
+
+        // ===== APPROACH 1 & 2: Smart narration pattern detection with tool generation =====
+        // Match: "Let me...", "Now let me...", "First let me...", "I'll...", "I'm going to..."
+        const narrationPatterns = [
+          /^(?:Now\s+|First\s+)?Let me\s+(\w+)/i,
+          /^I'll\s+(\w+)/i,
+          /^I'm going to\s+(\w+)/i,
+          /^Let me\s+(\w+)/i,
+        ];
+
+        let letMeMatch = null;
+        for (const pattern of narrationPatterns) {
+          letMeMatch = rawTextContent.match(pattern);
+          if (letMeMatch) break;
+        }
+
+        if (letMeMatch && (!classifierText || classifierText.trim().length === 0)) {
+          const action = letMeMatch[1].toLowerCase();
+
+          logger.info({
+            sessionId: session?.id ?? null,
+            step: steps,
+            detectedAction: action,
+            classifierAnswer: classifierText || '(empty)',
+            rawPreview: rawTextContent.slice(0, 100),
+          }, `[LET-ME] Detected "Let me ${action}..." pattern`);
+
+          // Attempt Approach 2: Generate synthetic tool calls for common actions
+          const generatedToolCalls = attemptGenerateToolCallsFromAction(action, rawTextContent, cleanPayload);
+
+          if (generatedToolCalls && generatedToolCalls.length > 0) {
+            // Approach 2 succeeded - inject synthetic tool calls
+            logger.info({
+              sessionId: session?.id ?? null,
+              step: steps,
+              variant: 'AUTO_TOOL_GENERATION',
+              action: action,
+              generatedCount: generatedToolCalls.length,
+              toolNames: generatedToolCalls.map(tc => tc.name || tc.function?.name),
+            }, '[LET-ME] Executing: Auto Tool Generation (Approach 2)');
+
+            // Inject the synthetic tool calls
+            cleanPayload.messages.push({ role: 'assistant', content: rawTextContent });
+            toolCalls = generatedToolCalls;
+            // Skip the normal tool call processing and go straight to execution
+            if (toolCalls.length > 0) {
+              // Mark that we're using synthetic calls from "Let me..." pattern
+              cleanPayload._letMeSynthetic = true;
+            }
+          } else {
+            // Approach 2 failed or not applicable - fallback to Approach 1: Smart retry
+            logger.info({
+              sessionId: session?.id ?? null,
+              step: steps,
+              variant: 'SMART_RETRY',
+              action: action,
+            }, '[LET-ME] Executing: Smart Retry (Approach 1) - tool generation not possible');
+
+            classifierRetries++;
+            cleanPayload._invokeTextRetry = true;
+
+            // Restore tools if smart-selection stripped them
+            if (cleanPayload._noToolInjection || !Array.isArray(cleanPayload.tools) || cleanPayload.tools.length === 0) {
+              const { STANDARD_TOOLS } = require('../clients/standard-tools');
+              cleanPayload.tools = STANDARD_TOOLS;
+              delete cleanPayload._noToolInjection;
+            }
+
+            cleanPayload.messages.push({ role: 'assistant', content: rawTextContent });
+            cleanPayload.messages.push({
+              role: 'user',
+              content: `Don't narrate what you're about to do - actually execute the ${action} operation now by calling the appropriate tools directly.`,
+            });
+
+            continue;
+          }
+        }
+      } catch (err) {
+        logger.warn(
+          { sessionId: session?.id ?? null, step: steps, err: err.message },
+          '[CLASSIFIER] Classifier call failed — falling through to normal response'
+        );
+      }
+    }
+
     if (toolCalls.length > 0) {
       // Convert OpenAI/OpenRouter format to Anthropic format for session storage
       let sessionContent;
@@ -2021,11 +3342,24 @@ IMPORTANT TOOL USAGE RULES:
         const toolNames = toolCalls
           .map((call) => call.function?.name ?? "tool")
           .join(", ");
-        assistantToolMessage.content = `Invoking tool(s): ${toolNames}`;
+        assistantToolMessage.content = `[tool-calls: ${toolNames}]`;
       }
 
       cleanPayload.messages.push(assistantToolMessage);
 
+      // === UNIVERSAL TOOL CALL CLEANING (via per-model parser) ===
+      // Clean all tool calls by extracting commands from markdown formatting
+      // This runs for ALL providers, not just as an Ollama fallback
+      if (toolCalls && toolCalls.length > 0) {
+        const { cleanToolCalls } = require('../tools/tool-call-cleaner');
+        toolCalls = cleanToolCalls(toolCalls, requestedModel);
+
+        // Update assistantToolMessage if it was modified
+        if (providerType !== "azure-anthropic" && assistantToolMessage.tool_calls) {
+          assistantToolMessage.tool_calls = toolCalls;
+        }
+      }
+
       // Check if tool execution should happen on client side
       const executionMode = config.toolExecutionMode || "server";
 
@@ -2184,6 +3518,7 @@ IMPORTANT TOOL USAGE RULES:
               session,
               cwd,
               requestMessages: cleanPayload.messages,
+              providerType,
             }))
           );
 
@@ -2230,6 +3565,15 @@ IMPORTANT TOOL USAGE RULES:
 
             cleanPayload.messages.push(toolMessage);
 
+            logger.info(
+              {
+                toolName: execution.name,
+                content: typeof toolMessage.content === 'string'
+                ? toolMessage.content.substring(0, 500)
+                : JSON.stringify(toolMessage.content).substring(0, 500)
+              }, "Tool result content sent to LLM",
+            );
+
             // Convert to Anthropic format for session storage
             let sessionToolResultContent;
             if (providerType === "azure-anthropic") {
@@ -2274,7 +3618,7 @@ IMPORTANT TOOL USAGE RULES:
                 maxToolCallsPerRequest: settings.maxToolCallsPerRequest,
                 steps,
               },
-              "Maximum tool calls per request exceeded after parallel Task execution - terminating",
+              "Maximum tool calls per request (POLICY_MAX_TOOL_CALLS_PER_REQUEST) exceeded after parallel Task execution - terminating",
             );
 
             return {
@@ -2283,7 +3627,9 @@ IMPORTANT TOOL USAGE RULES:
                 body: {
                   error: {
                     type: "max_tool_calls_exceeded",
-                    message: `Maximum tool calls per request exceeded. The model attempted to execute ${toolCallsExecuted} tool calls, but the limit is ${settings.maxToolCallsPerRequest}. This may indicate a complex task that requires breaking down into smaller steps.`,
+                    message: `Maximum tool calls per request exceeded. The model attempted to execute ${toolCallsExecuted} tool calls, but the limit is ${settings.maxToolCallsPerRequest}. This may indicate a complex task that requires breaking down into smaller steps.
+
+To increase the limit: Set POLICY_MAX_TOOL_CALLS_PER_REQUEST`,
                   },
                 },
                 terminationReason: "max_tool_calls_exceeded",
@@ -2393,7 +3739,7 @@ IMPORTANT TOOL USAGE RULES:
               maxToolCallsPerRequest: settings.maxToolCallsPerRequest,
               steps,
             },
-            "Maximum tool calls per request exceeded - terminating",
+            "Maximum tool calls per request (POLICY_MAX_TOOL_CALLS_PER_REQUEST) exceeded - terminating",
           );
 
           return {
@@ -2402,7 +3748,9 @@ IMPORTANT TOOL USAGE RULES:
               body: {
                 error: {
                   type: "max_tool_calls_exceeded",
-                  message: `Maximum tool calls per request exceeded. The model attempted to execute ${toolCallsExecuted} tool calls, but the limit is ${settings.maxToolCallsPerRequest}. This may indicate a complex task that requires breaking down into smaller steps.`,
+                  message: `Maximum tool calls per request exceeded. The model attempted to execute ${toolCallsExecuted} tool calls, but the limit is ${settings.maxToolCallsPerRequest}. This may indicate a complex task that requires breaking down into smaller steps.
+
+To increase the limit: Set POLICY_MAX_TOOL_CALLS_PER_REQUEST`,
                 },
               },
               terminationReason: "max_tool_calls_exceeded",
@@ -2413,12 +3761,55 @@ IMPORTANT TOOL USAGE RULES:
           };
         }
 
+        const toolName = call.function?.name ?? call.name ?? "unknown";
+
+        // Helper to get first 200 chars of any value
+        const getPreview = (val, maxChars = 200) => {
+          if (!val) return null;
+          const str = typeof val === 'string' ? val : JSON.stringify(val);
+          if (str.length > maxChars) return str.substring(0, maxChars) + '...';
+          return str;
+        };
+
+        const requestPreview = getPreview(call.arguments ?? call.function?.arguments);
+        progress.toolExecutionStarted({
+          sessionId: session?.id ?? null,
+          agentId,
+          step: steps,
+          toolName,
+          toolId: call.id,
+          requestPreview,
+        });
+
+        const _toolExecStart = Date.now();
         const execution = await executeToolCall(call, {
           session,
           cwd,
           requestMessages: cleanPayload.messages,
+          providerType,
+        });
+
+        const responsePreview = getPreview(execution.content);
+        progress.toolExecutionCompleted({
+          sessionId: session?.id ?? null,
+          agentId,
+          step: steps,
+          toolName,
+          toolId: call.id,
+          ok: execution.ok !== false,
+          durationMs: Date.now() - _toolExecStart,
+          responsePreview,
         });
 
+        logger.debug(
+          {
+            id: execution.id ?? null,
+            name: execution.name ?? null,
+            arguments: execution.arguments ?? null,
+            content: execution.content ?? null,
+            is_error: execution.ok === false,
+          }, "executeToolCall response" );
+
         let toolMessage;
         if (providerType === "azure-anthropic") {
           const parsedContent = parseExecutionContent(execution.content);
@@ -2617,7 +4008,16 @@ IMPORTANT TOOL USAGE RULES:
         }
       }
 
-      continue;
+      logger.info({
+        sessionId: session?.id ?? null,
+        step: steps,
+        toolCallsExecuted: toolCallsExecuted,
+        totalToolCallsInThisStep: toolCalls.length,
+        messageCount: cleanPayload.messages.length,
+        lastMessageRole: cleanPayload.messages[cleanPayload.messages.length - 1]?.role,
+      }, "Tool execution complete");
+
+      continue; // Loop back to invoke model with tool results in context
     }
 
     let anthropicPayload;
@@ -2879,6 +4279,68 @@ IMPORTANT TOOL USAGE RULES:
       anthropicPayload.content = policy.sanitiseContent(anthropicPayload.content);
     }
 
+    // === EMPTY RESPONSE DETECTION (safety net — post-conversion) ===
+    // Primary detection is earlier (before tool handling). This catches edge cases
+    // where conversion produces empty content from non-empty raw data.
+    const hasTextContent = (() => {
+      if (Array.isArray(anthropicPayload.content)) {
+        return anthropicPayload.content.some(b => b.type === "text" && b.text?.trim());
+      }
+      if (typeof anthropicPayload.content === "string") {
+        return anthropicPayload.content.trim().length > 0;
+      }
+      return false;
+    })();
+
+    const hasToolUseBlocks = Array.isArray(anthropicPayload.content) &&
+      anthropicPayload.content.some(b => b.type === "tool_use");
+
+    if (!hasToolUseBlocks && !hasTextContent) {
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        messageKeys: Object.keys(anthropicPayload),
+        contentType: typeof anthropicPayload.content,
+        contentLength: Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content || "").length,
+      }, "Empty LLM response detected (no text, no tool calls)");
+
+      // Retry once with a nudge
+      if (steps < settings.maxSteps && !emptyResponseRetried) {
+        emptyResponseRetried = true;
+        cleanPayload.messages.push({
+          role: "assistant",
+          content: "",
+        });
+        cleanPayload.messages.push({
+          role: "user",
+          content: "Please provide a response to the user's message.",
+        });
+        logger.info({ sessionId: session?.id ?? null }, "Retrying after empty response with nudge");
+        continue;  // Go back to top of while loop
+      }
+
+      // If retry also returned empty, return a fallback message
+      logger.warn({ sessionId: session?.id ?? null, steps }, "Empty response persisted after retry");
+      return {
+        response: {
+          status: 200,
+          body: {
+            id: `msg_${Date.now()}`,
+            type: "message",
+            role: "assistant",
+            model: requestedModel,
+            content: [{ type: "text", text: "I wasn't able to generate a response. Could you try rephrasing your message?" }],
+            stop_reason: "end_turn",
+            usage: { input_tokens: 0, output_tokens: 0 },
+          },
+          terminationReason: "empty_response_fallback",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "empty_response_fallback",
+      };
+    }
+
     // Ensure content is an array before calling .find()
     const content = Array.isArray(anthropicPayload.content) ? anthropicPayload.content : [];
     const fallbackCandidate = content.find(
@@ -3064,6 +4526,7 @@ IMPORTANT TOOL USAGE RULES:
             session,
             cwd,
             requestMessages: cleanPayload.messages,
+            providerType,            
           });
 
           const toolResultMessage = createFallbackToolResultMessage(providerType, {
@@ -3116,7 +4579,7 @@ IMPORTANT TOOL USAGE RULES:
                 maxToolCallsPerRequest: settings.maxToolCallsPerRequest,
                 steps,
               },
-              "Maximum tool calls per request exceeded during fallback - terminating",
+              "Maximum tool calls per request (POLICY_MAX_TOOL_CALLS_PER_REQUEST) exceeded during fallback - terminating",
             );
 
             return {
@@ -3125,7 +4588,9 @@ IMPORTANT TOOL USAGE RULES:
                 body: {
                   error: {
                     type: "max_tool_calls_exceeded",
-                    message: `Maximum tool calls per request exceeded. The model attempted to execute ${toolCallsExecuted} tool calls, but the limit is ${settings.maxToolCallsPerRequest}. This may indicate a complex task that requires breaking down into smaller steps.`,
+                    message: `Maximum tool calls per request exceeded. The model attempted to execute ${toolCallsExecuted} tool calls, but the limit is ${settings.maxToolCallsPerRequest}. This may indicate a complex task that requires breaking down into smaller steps.
+
+To increase the limit: Set POLICY_MAX_TOOL_CALLS_PER_REQUEST`,
                   },
                 },
                 terminationReason: "max_tool_calls_exceeded",
@@ -3194,6 +4659,52 @@ IMPORTANT TOOL USAGE RULES:
     }
 
     const finalDurationMs = Date.now() - start;
+
+    // === LIMIT PROXIMITY WARNING ===
+    // If the response completed but we're at/near a limit, append a warning
+    // so the user knows the response may be truncated.
+    const limitWarnings = [];
+    if (steps >= settings.maxSteps - 1) {
+      limitWarnings.push(
+        `Step limit reached (${steps}/${settings.maxSteps}). ` +
+        `Increase with POLICY_MAX_STEPS (current: ${settings.maxSteps}).`
+      );
+    }
+    if (toolCallsExecuted >= settings.maxToolCallsPerRequest - 1) {
+      limitWarnings.push(
+        `Tool call limit reached (${toolCallsExecuted}/${settings.maxToolCallsPerRequest}). ` +
+        `Increase with POLICY_MAX_TOOL_CALLS_PER_REQUEST (current: ${settings.maxToolCallsPerRequest}).`
+      );
+    }
+    const durationPct = finalDurationMs / settings.maxDurationMs;
+    if (durationPct >= 0.9) {
+      limitWarnings.push(
+        `Duration limit nearly reached (${Math.round(finalDurationMs / 1000)}s/${Math.round(settings.maxDurationMs / 1000)}s). ` +
+        `Increase with POLICY_MAX_DURATION_MS (current: ${settings.maxDurationMs}).`
+      );
+    }
+
+    if (limitWarnings.length > 0) {
+      const warningText = `\n\n---\n**Agent loop limit warning:** ${limitWarnings.join(' ')} The response above may be incomplete.`;
+      logger.warn({
+        sessionId: session?.id ?? null,
+        steps,
+        toolCallsExecuted,
+        durationMs: finalDurationMs,
+        limits: {
+          maxSteps: settings.maxSteps,
+          maxToolCallsPerRequest: settings.maxToolCallsPerRequest,
+          maxDurationMs: settings.maxDurationMs,
+        },
+        warnings: limitWarnings,
+      }, "Agent loop completed near limits — appending warning to response");
+
+      // Append warning text block to the response content
+      if (Array.isArray(anthropicPayload?.content)) {
+        anthropicPayload.content.push({ type: "text", text: warningText });
+      }
+    }
+
     logger.info(
       {
         sessionId: session?.id ?? null,
@@ -3203,36 +4714,77 @@ IMPORTANT TOOL USAGE RULES:
         toolCallLoopWarnings: loopWarningInjected ? 1 : 0,
         durationMs: finalDurationMs,
         avgDurationPerStep: steps > 0 ? Math.round(finalDurationMs / steps) : 0,
+        limitWarnings: limitWarnings.length > 0 ? limitWarnings : undefined,
       },
       "Agent loop completed successfully",
     );
+
+    // DIAGNOSTIC: Log response being returned
+    logger.info({
+      sessionId: session?.id ?? null,
+      status: 200,
+      hasBody: !!anthropicPayload,
+      bodyKeys: anthropicPayload ? Object.keys(anthropicPayload) : [],
+      contentType: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? 'array' : typeof anthropicPayload.content) : 'none',
+      contentLength: anthropicPayload?.content ? (Array.isArray(anthropicPayload.content) ? anthropicPayload.content.length : String(anthropicPayload.content).length) : 0,
+      stopReason: anthropicPayload?.stop_reason
+    }, "=== RETURNING RESPONSE TO CLIENT ===");
+
+    progress.agentLoopCompleted({
+      sessionId: session?.id ?? null,
+      agentId,
+      steps,
+      toolCallsExecuted,
+      durationMs: finalDurationMs,
+      terminationReason: "completion",
+    });
+
     return {
       response: {
         status: 200,
         body: anthropicPayload,
         terminationReason: "completion",
+        toolCallComparison,
       },
       steps,
       durationMs: finalDurationMs,
       terminationReason: "completion",
+      toolCallComparison,
     };
   }
 
+  const finalDurationMs = Date.now() - start;
+
+  // Determine which specific limit was hit
+  const hitLimits = [];
+  if (steps >= settings.maxSteps) {
+    hitLimits.push(`Step limit reached (${steps}/${settings.maxSteps}). Increase with POLICY_MAX_STEPS.`);
+  }
+  if (finalDurationMs >= settings.maxDurationMs) {
+    hitLimits.push(`Duration limit reached (${Math.round(finalDurationMs / 1000)}s/${Math.round(settings.maxDurationMs / 1000)}s). Increase with POLICY_MAX_DURATION_MS.`);
+  }
+  if (toolCallsExecuted >= settings.maxToolCallsPerRequest) {
+    hitLimits.push(`Tool call limit reached (${toolCallsExecuted}/${settings.maxToolCallsPerRequest}). Increase with POLICY_MAX_TOOL_CALLS_PER_REQUEST.`);
+  }
+  const limitMessage = hitLimits.length > 0
+    ? `Agent loop limit exceeded: ${hitLimits.join(' ')}`
+    : "Reached agent loop limits without producing a response.";
+
   appendTurnToSession(session, {
     role: "assistant",
     type: "error",
     status: 504,
     content: {
       error: "max_steps_exceeded",
-      message: "Reached agent loop limits without producing a response.",
+      message: limitMessage,
       limits: {
         maxSteps: settings.maxSteps,
         maxDurationMs: settings.maxDurationMs,
+        maxToolCallsPerRequest: settings.maxToolCallsPerRequest,
       },
     },
     metadata: { termination: "max_steps" },
   });
-  const finalDurationMs = Date.now() - start;
   logger.warn(
     {
       sessionId: session?.id ?? null,
@@ -3243,16 +4795,26 @@ IMPORTANT TOOL USAGE RULES:
       maxSteps: settings.maxSteps,
       maxDurationMs: settings.maxDurationMs,
       maxToolCallsPerRequest: settings.maxToolCallsPerRequest,
+      hitLimits,
     },
     "Agent loop exceeded limits",
   );
 
+  progress.agentLoopCompleted({
+    sessionId: session?.id ?? null,
+    agentId,
+    steps,
+    toolCallsExecuted,
+    durationMs: finalDurationMs,
+    terminationReason: "max_steps",
+  });
+
   return {
     response: {
       status: 504,
       body: {
         error: "max_steps_exceeded",
-        message: "Reached agent loop limits without producing a response.",
+        message: limitMessage,
         limits: {
           maxSteps: settings.maxSteps,
           maxDurationMs: settings.maxDurationMs,
@@ -3263,6 +4825,7 @@ IMPORTANT TOOL USAGE RULES:
           toolCallsExecuted,
           durationMs: finalDurationMs,
         },
+        hint: hitLimits,
       },
       terminationReason: "max_steps",
     },
@@ -3300,6 +4863,86 @@ function detectSuggestionMode(messages) {
   return { isSuggestionMode: false };
 }
 
+/**
+ * Detect if the current request is a topic detection/classification call.
+ * These requests typically have a system prompt asking to classify conversation
+ * topics, with no tools and very short messages. They waste GPU time on large
+ * models (30-90s just to classify a topic).
+ *
+ * Detection heuristics:
+ *  1. System prompt contains topic classification instructions
+ *  2. No tools in the payload (topic detection never needs tools)
+ *  3. Short message count (typically 1-3 messages)
+ *
+ * @param {Object} payload - The request payload
+ * @returns {{ isTopicDetection: boolean }}
+ */
+function detectTopicDetection(payload) {
+  if (!payload) return { isTopicDetection: false };
+
+  // Topic detection requests have no tools
+  if (Array.isArray(payload.tools) && payload.tools.length > 0) {
+    return { isTopicDetection: false };
+  }
+
+  // Check system prompt for topic classification patterns
+  const systemText = typeof payload.system === 'string'
+    ? payload.system
+    : Array.isArray(payload.system)
+      ? payload.system.map(b => b.text || '').join(' ')
+      : '';
+
+  // Also check first message if system prompt is embedded there
+  let firstMsgText = '';
+  if (Array.isArray(payload.messages) && payload.messages.length > 0) {
+    const first = payload.messages[0];
+    if (first?.role === 'user' || first?.role === 'system') {
+      firstMsgText = typeof first.content === 'string'
+        ? first.content
+        : Array.isArray(first.content)
+          ? first.content.map(b => b.text || '').join(' ')
+          : '';
+    }
+  }
+
+  const combined = systemText + ' ' + firstMsgText;
+  const lc = combined.toLowerCase();
+
+  // Match patterns that Claude Code uses for topic detection
+  const topicPatterns = [
+    'new conversation topic',
+    'topic change',
+    'classify the topic',
+    'classify this message',
+    'conversation topic',
+    'topic classification',
+    'determines the topic',
+    'determine the topic',
+    'categorize the topic',
+    'what topic',
+    'identify the topic',
+  ];
+
+  const hasTopicPattern = topicPatterns.some(p => lc.includes(p));
+
+  if (hasTopicPattern) {
+    return { isTopicDetection: true };
+  }
+
+  // Additional heuristic: very short payload with no tools and system prompt
+  // mentioning "topic" or "classify"
+  if (
+    !payload.tools &&
+    Array.isArray(payload.messages) &&
+    payload.messages.length <= 3 &&
+    (lc.includes('topic') || lc.includes('classify'))
+  ) {
+    return { isTopicDetection: true };
+  }
+
+  return { isTopicDetection: false };
+}
+
 async function processMessage({ payload, headers, session, cwd, options = {} }) {
   const requestedModel =
     payload?.model ??
@@ -3314,9 +4957,10 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
   const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default";
   if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") {
     logger.info('Suggestion mode: skipping LLM call (SUGGESTION_MODE_MODEL=none)');
+    if (session) session._pendingUserInput = null;
     return {
       response: {
-        json: {
+        body: {
           id: `msg_suggestion_skip_${Date.now()}`,
           type: "message",
           role: "assistant",
@@ -3426,6 +5070,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
       },
     };
 
+    if (session) session._pendingUserInput = null;
     return {
       status: 200,
       body: forcedResponse,
@@ -3433,7 +5078,18 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     };
   }
 
-  const cleanPayload = sanitizePayload(payload);
+  // === INPUT CLEANUP: Strip interrupted request prefix ===
+  if (session && payload?.messages) {
+    cleanInterruptedInput(session, payload.messages);
+  }
+
+  // Set pending input flag (will be cleared on completion)
+  const userText = extractLastUserText(payload?.messages);
+  if (session && userText) {
+    session._pendingUserInput = userText;
+  }
+
+  const cleanPayload = await sanitizePayload(payload);
 
   // Proactively load tools based on prompt content (lazy loading)
   try {
@@ -3501,6 +5157,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
       "Agent response served from prompt cache",
     );
 
+    if (session) session._pendingUserInput = null;
     return {
       status: 200,
       body: anthropicPayload,
@@ -3534,6 +5191,7 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
           },
         });
 
+        if (session) session._pendingUserInput = null;
         return {
           status: 200,
           body: cachedBody,
@@ -3560,6 +5218,11 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     headers,
   });
 
+  // Clear pending input flag - request completed
+  if (session) {
+    session._pendingUserInput = null;
+  }
+
   // Store successful responses in semantic cache for future fuzzy matching
   if (semanticCache.isEnabled() && semanticLookupResult && !semanticLookupResult.hit) {
     if (loopResult.response?.status === 200 && loopResult.response?.body) {
diff --git a/src/parsers/base-tool-parser.js b/src/parsers/base-tool-parser.js
new file mode 100644
index 0000000..e2b15f4
--- /dev/null
+++ b/src/parsers/base-tool-parser.js
@@ -0,0 +1,64 @@
+/**
+ * Abstract base class for per-model tool parsers.
+ *
+ * Inspired by vLLM's ToolParser hierarchy — each model family gets its own
+ * subclass that owns all regex patterns, tag detection, and argument parsing.
+ *
+ * Subclasses MUST override at least `extractToolCallsFromText`.
+ */
+class BaseToolParser {
+  /**
+   * @param {string} modelName - Full model name (e.g. "glm-4.7:cloud")
+   */
+  constructor(modelName) {
+    this.modelName = modelName;
+  }
+
+  /**
+   * Parse tool calls from raw text when the model outputs text instead of
+   * native tool_calls.
+   *
+   * @param {string} text - Raw assistant text
+   * @returns {object[]|null} Array of Ollama-format tool call objects, or null
+   *   Each element: { function: { name, arguments } }
+   */
+  extractToolCallsFromText(text) {
+    throw new Error(`${this.constructor.name} must implement extractToolCallsFromText`);
+  }
+
+  /**
+   * Normalize / fix native tool_calls from the Ollama response.
+   * Default implementation is identity (pass-through).
+   *
+   * @param {object[]} toolCalls - Ollama-format tool_calls array
+   * @returns {object[]} Cleaned tool calls
+   */
+  normalizeToolCalls(toolCalls) {
+    return toolCalls;
+  }
+
+  /**
+   * Clean a single tool call's arguments (strip markdown, fix formatting).
+   * Default: pass-through.
+   *
+   * @param {object} toolCall - Single tool call (Anthropic or OpenAI format)
+   * @returns {object} Cleaned tool call
+   */
+  cleanArguments(toolCall) {
+    return toolCall;
+  }
+
+  /**
+   * Strip model-specific reasoning tags (e.g. <think> for DeepSeek/Qwen).
+   * Default strips the universal <think>...</think> pattern.
+   *
+   * @param {string} text
+   * @returns {string}
+   */
+  stripReasoningTags(text) {
+    if (typeof text !== 'string') return text;
+    return text.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+  }
+}
+
+module.exports = BaseToolParser;
diff --git a/src/parsers/generic-tool-parser.js b/src/parsers/generic-tool-parser.js
new file mode 100644
index 0000000..2ba0c05
--- /dev/null
+++ b/src/parsers/generic-tool-parser.js
@@ -0,0 +1,131 @@
+/**
+ * Generic (fallback) tool parser.
+ *
+ * Handles JSON tool call extraction and markdown argument cleaning.
+ * Used for any model that doesn't have a dedicated parser.
+ */
+const BaseToolParser = require('./base-tool-parser');
+const logger = require('../logger');
+
+// Shared regex for shell command validation
+const SHELL_COMMAND_RE = /^(git|ls|cd|cat|head|tail|grep|find|mkdir|rm|cp|mv|pwd|echo|curl|wget|npm|node|python|pip|docker|kubectl|make|go|cargo|rustc)\b/;
+
+// Markdown cleaning regex (used in cleanArguments)
+const FENCE_REGEX = /```(?:bash|sh|shell|zsh|console|terminal)\s*\n([\s\S]*?)```/i;
+const BULLET_POINT_REGEX = /^\s*[●•\-\*❯>]\s+/gm;
+const PROMPT_CHAR_REGEX = /^\s*[$#]\s+/gm;
+
+class GenericToolParser extends BaseToolParser {
+  /**
+   * Extract tool calls from text using JSON detection.
+   * Looks for {"name": "...", "parameters": {...}} patterns.
+   */
+  extractToolCallsFromText(text) {
+    if (!text || typeof text !== 'string') return null;
+    return this._jsonToolCall(text);
+  }
+
+  /**
+   * Clean a tool call's arguments by stripping markdown formatting.
+   * Currently handles Bash tool command cleanup.
+   */
+  cleanArguments(toolCall) {
+    if (!toolCall) return toolCall;
+
+    const toolName = toolCall.name ?? toolCall.function?.name;
+    if (toolName !== 'Bash') return toolCall;
+
+    // Handle Anthropic format
+    if (toolCall.input?.command && typeof toolCall.input.command === 'string') {
+      const cleaned = this._stripMarkdownFromCommand(toolCall.input.command);
+      if (cleaned !== toolCall.input.command) {
+        return { ...toolCall, input: { ...toolCall.input, command: cleaned } };
+      }
+      return toolCall;
+    }
+
+    // Handle OpenAI format
+    if (toolCall.function?.arguments) {
+      const args = typeof toolCall.function.arguments === 'string'
+        ? JSON.parse(toolCall.function.arguments)
+        : toolCall.function.arguments;
+      if (args?.command && typeof args.command === 'string') {
+        const cleaned = this._stripMarkdownFromCommand(args.command);
+        if (cleaned !== args.command) {
+          return {
+            ...toolCall,
+            function: {
+              ...toolCall.function,
+              arguments: JSON.stringify({ ...args, command: cleaned })
+            }
+          };
+        }
+      }
+    }
+
+    return toolCall;
+  }
+
+  // -- Private helpers -------------------------------------------------------
+
+  /**
+   * JSON tool call extraction: {"name": "...", "parameters": {...}}
+   */
+  _jsonToolCall(text) {
+    const startMatch = text.match(/\{\s*"name"\s*:/);
+    if (!startMatch) return null;
+
+    const startIdx = startMatch.index;
+    let braceCount = 0;
+    let endIdx = -1;
+    for (let i = startIdx; i < text.length; i++) {
+      if (text[i] === '{') braceCount++;
+      else if (text[i] === '}') {
+        braceCount--;
+        if (braceCount === 0) { endIdx = i + 1; break; }
+      }
+    }
+    if (endIdx === -1) return null;
+
+    try {
+      const parsed = JSON.parse(text.substring(startIdx, endIdx));
+      if (parsed.name && parsed.parameters) {
+        logger.info({ toolName: parsed.name }, 'GenericToolParser: JSON tool call extracted');
+        return [{ function: { name: parsed.name, arguments: parsed.parameters } }];
+      }
+    } catch (e) {
+      logger.debug({ error: e.message }, 'GenericToolParser: JSON parse failed');
+    }
+    return null;
+  }
+
+  /**
+   * Strip markdown code fences, bullet points, and prompt characters.
+   */
+  _stripMarkdownFromCommand(command) {
+    if (!command || typeof command !== 'string') return command;
+
+    let cleaned = command;
+
+    // Code fence extraction
+    const fenceMatch = command.match(FENCE_REGEX);
+    if (fenceMatch && fenceMatch[1]) {
+      cleaned = fenceMatch[1];
+    }
+
+    // Bullet points
+    cleaned = cleaned.replace(BULLET_POINT_REGEX, '');
+
+    // Prompt characters
+    cleaned = cleaned.replace(PROMPT_CHAR_REGEX, '');
+
+    return cleaned.trim();
+  }
+}
+
+// Export the class and shared constants for testing
+module.exports = GenericToolParser;
+module.exports.SHELL_COMMAND_RE = SHELL_COMMAND_RE;
+module.exports.FENCE_REGEX = FENCE_REGEX;
+module.exports.BULLET_POINT_REGEX = BULLET_POINT_REGEX;
+module.exports.PROMPT_CHAR_REGEX = PROMPT_CHAR_REGEX;
diff --git a/src/parsers/glm47-tool-parser.js b/src/parsers/glm47-tool-parser.js
new file mode 100644
index 0000000..028c9f4
--- /dev/null
+++ b/src/parsers/glm47-tool-parser.js
@@ -0,0 +1,266 @@
+/**
+ * GLM-4.7 tool parser.
+ *
+ * Handles GLM-4.7's native XML tool call format:
+ *   <tool_call>funcname
+ *   <arg_key>key</arg_key>
+ *   <arg_value>value</arg_value>
+ *   </tool_call>
+ *
+ * Also handles GLM's common fallback patterns:
+ *   - Bullet-point shell commands
+ *   - Fenced code block shell commands
+ *
+ * Based on vLLM's glm4_tool_parser.py
+ */
+const BaseToolParser = require('./base-tool-parser');
+const logger = require('../logger');
+
+// Shared constants from generic parser
+const { SHELL_COMMAND_RE, FENCE_REGEX, BULLET_POINT_REGEX, PROMPT_CHAR_REGEX } = require('./generic-tool-parser');
+
+// GLM-4.7 XML format regex
+const TOOL_CALL_BLOCK_RE = /<tool_call>([\s\S]*?)<\/tool_call>/g;
+const ARG_PAIR_RE = /<arg_key>([\s\S]*?)<\/arg_key>\s*<arg_value>([\s\S]*?)<\/arg_value>/g;
+
+// Orphaned closing tags that GLM-4.7 leaks into content when it fails to produce
+// proper tool_calls (e.g. "Invoking tool(s): Grep</arg_value>").
+// Also catches orphaned </think> from reasoning tag leaks.
+const ORPHAN_CLOSING_TAG_RE = /<\/(?:arg_value|arg_key|tool_call|think)>/g;
+
+// Fenced code block regex (for fallback extraction)
+const FENCED_BLOCK_RE = /```(?:bash|sh|shell|zsh|console|terminal)\s*\n([\s\S]*?)```/gi;
+
+class Glm47ToolParser extends BaseToolParser {
+  /**
+   * Extract tool calls from GLM-4.7 text output.
+   *
+   * Strategy order:
+   *   1. GLM XML format (<tool_call>...) — model's native non-API tool format
+   *   2. Bullet-point shell commands
+   *   3. Fenced code block shell commands
+   */
+  extractToolCallsFromText(text) {
+    if (!text || typeof text !== 'string') return null;
+
+    // Strip reasoning tags (<think>...</think>) and orphaned closing tags
+    // that GLM-4.7 leaks into content (e.g. "</arg_value>", "</think>").
+    // Must happen before extraction — these fragments break regex matching
+    // and pollute "Invoking tool(s):" text detection downstream.
+    // stripReasoningTags handles both complete pairs AND orphaned closers.
+    const cleaned = this.stripReasoningTags(text);
+
+    // 1. Try GLM XML tool call format
+    const xmlResults = this._extractXmlToolCalls(cleaned);
+    if (xmlResults) return xmlResults;
+
+    // 2. Try bullet-point commands
+    const bulletResults = this._extractBulletPointCommands(cleaned);
+    if (bulletResults) return bulletResults;
+
+    // 3. Try fenced code block commands
+    const fencedResults = this._extractFencedCodeBlockCommands(cleaned);
+    if (fencedResults) return fencedResults;
+
+    return null;
+  }
+
+  /**
+   * Clean Bash tool arguments by stripping markdown formatting.
+   */
+  cleanArguments(toolCall) {
+    if (!toolCall) return toolCall;
+
+    const toolName = toolCall.name ?? toolCall.function?.name;
+    if (toolName !== 'Bash') return toolCall;
+
+    // Anthropic format
+    if (toolCall.input?.command && typeof toolCall.input.command === 'string') {
+      const cleaned = this._stripMarkdownFromCommand(toolCall.input.command);
+      if (cleaned !== toolCall.input.command) {
+        return { ...toolCall, input: { ...toolCall.input, command: cleaned } };
+      }
+      return toolCall;
+    }
+
+    // OpenAI format
+    if (toolCall.function?.arguments) {
+      const args = typeof toolCall.function.arguments === 'string'
+        ? JSON.parse(toolCall.function.arguments)
+        : toolCall.function.arguments;
+      if (args?.command && typeof args.command === 'string') {
+        const cleaned = this._stripMarkdownFromCommand(args.command);
+        if (cleaned !== args.command) {
+          return {
+            ...toolCall,
+            function: {
+              ...toolCall.function,
+              arguments: JSON.stringify({ ...args, command: cleaned })
+            }
+          };
+        }
+      }
+    }
+
+    return toolCall;
+  }
+
+  /**
+   * Override: strip <think> tags AND orphaned GLM closing tags from text.
+   */
+  stripReasoningTags(text) {
+    if (typeof text !== 'string') return text;
+    // First strip complete <think>...</think> blocks (base behavior)
+    let cleaned = text.replace(/<think>[\s\S]*?<\/think>/g, '');
+    // Then strip orphaned closing tags specific to GLM
+    cleaned = this._stripOrphanedClosingTags(cleaned);
+    return cleaned.trim();
+  }
+
+  // -- Private: orphaned tag stripping ----------------------------------------
+
+  /**
+   * Strip orphaned closing tags that GLM-4.7 leaks into content.
+   * Only removes a closing tag if its matching opener is NOT present in the text.
+   * E.g. "Grep</arg_value>" → "Grep", but "<arg_value>val</arg_value>" stays intact.
+   */
+  _stripOrphanedClosingTags(text) {
+    return text.replace(ORPHAN_CLOSING_TAG_RE, (match) => {
+      // Extract tag name from </tagname>
+      const tagName = match.slice(2, -1);
+      const opener = `<${tagName}>`;
+      // If the opener exists, this closing tag is NOT orphaned — keep it
+      if (text.includes(opener)) return match;
+      // Orphaned — strip it
+      return '';
+    }).trim();
+  }
+
+  // -- Private: GLM XML extraction -------------------------------------------
+
+  _extractXmlToolCalls(text) {
+    if (!text.includes('<tool_call>')) return null;
+
+    TOOL_CALL_BLOCK_RE.lastIndex = 0;
+    const results = [];
+    let match;
+
+    while ((match = TOOL_CALL_BLOCK_RE.exec(text)) !== null) {
+      const block = match[1].trim();
+      const parsed = this._parseXmlToolCallBlock(block);
+      if (parsed) results.push(parsed);
+    }
+
+    if (results.length > 0) {
+      logger.info({
+        count: results.length,
+        toolNames: results.map(r => r.function.name),
+      }, 'Glm47ToolParser: XML tool calls extracted');
+      return results;
+    }
+    return null;
+  }
+
+  /**
+   * Parse a single <tool_call> block body.
+   * Format: funcname\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n...
+   */
+  _parseXmlToolCallBlock(block) {
+    // Function name is the first line (before any <arg_key>)
+    const firstTagIdx = block.indexOf('<arg_key>');
+    let funcName;
+    let argsText;
+
+    if (firstTagIdx === -1) {
+      // No arguments — entire block is the function name
+      funcName = block.trim();
+      argsText = '';
+    } else {
+      funcName = block.substring(0, firstTagIdx).trim();
+      argsText = block.substring(firstTagIdx);
+    }
+
+    if (!funcName) return null;
+
+    // Extract key-value pairs
+    const args = {};
+    ARG_PAIR_RE.lastIndex = 0;
+    let argMatch;
+    while ((argMatch = ARG_PAIR_RE.exec(argsText)) !== null) {
+      const key = argMatch[1].trim();
+      let value = argMatch[2];
+      // Trim leading/trailing newlines (vLLM convention)
+      if (value.startsWith('\n')) value = value.substring(1);
+      if (value.endsWith('\n')) value = value.slice(0, -1);
+      args[key] = value;
+    }
+
+    return {
+      function: {
+        name: funcName,
+        arguments: args,
+      }
+    };
+  }
+
+  // -- Private: bullet-point extraction --------------------------------------
+
+  _extractBulletPointCommands(text) {
+    const results = [];
+    const lines = text.split('\n');
+    for (const line of lines) {
+      const match = line.match(/^\s*[●•\-*❯>]\s+(.+)$/);
+      if (match) {
+        const command = match[1].trim();
+        if (SHELL_COMMAND_RE.test(command)) {
+          results.push({ function: { name: 'Bash', arguments: { command } } });
+        }
+      }
+    }
+    if (results.length > 0) {
+      logger.info({ count: results.length }, 'Glm47ToolParser: bullet-point commands extracted');
+      return results;
+    }
+    return null;
+  }
+
+  // -- Private: fenced code block extraction ---------------------------------
+
+  _extractFencedCodeBlockCommands(text) {
+    FENCED_BLOCK_RE.lastIndex = 0;
+    const results = [];
+    let fenceMatch;
+    while ((fenceMatch = FENCED_BLOCK_RE.exec(text)) !== null) {
+      const blockContent = fenceMatch[1];
+      for (const line of blockContent.split('\n')) {
+        const cleaned = line.replace(/^\s*[$#]\s*/, '').trim();
+        if (!cleaned) continue;
+        if (SHELL_COMMAND_RE.test(cleaned)) {
+          results.push({ function: { name: 'Bash', arguments: { command: cleaned } } });
+        }
+      }
+    }
+    if (results.length > 0) {
+      logger.info({ count: results.length }, 'Glm47ToolParser: fenced code block commands extracted');
+      return results;
+    }
+    return null;
+  }
+
+  // -- Private: markdown stripping -------------------------------------------
+
+  _stripMarkdownFromCommand(command) {
+    if (!command || typeof command !== 'string') return command;
+
+    let cleaned = command;
+    const fenceMatch = command.match(FENCE_REGEX);
+    if (fenceMatch && fenceMatch[1]) cleaned = fenceMatch[1];
+
+    cleaned = cleaned.replace(BULLET_POINT_REGEX, '');
+    cleaned = cleaned.replace(PROMPT_CHAR_REGEX, '');
+
+    return cleaned.trim();
+  }
+}
+
+module.exports = Glm47ToolParser;
diff --git a/src/parsers/index.js b/src/parsers/index.js
new file mode 100644
index 0000000..1e7726e
--- /dev/null
+++ b/src/parsers/index.js
@@ -0,0 +1,77 @@
+/**
+ * Parser registry — maps model name prefixes to parser classes.
+ *
+ * Follows vLLM's pattern: each model family gets a dedicated parser.
+ * Unknown models fall back to GenericToolParser.
+ */
+const logger = require('../logger');
+const GenericToolParser = require('./generic-tool-parser');
+const Glm47ToolParser = require('./glm47-tool-parser');
+
+// Model prefix → parser class.
+// Order matters: longer/more-specific prefixes first.
+const PARSER_REGISTRY = [
+  { prefix: 'glm-4.7', ParserClass: Glm47ToolParser },
+  { prefix: 'glm4',    ParserClass: Glm47ToolParser },
+  { prefix: 'glm-4',   ParserClass: Glm47ToolParser },
+  // Qwen3 uses the same fenced-block + bullet-point strategies as GLM for now.
+  // TODO: Replace with dedicated Qwen3CoderToolParser that handles <function=name> XML format.
+  { prefix: 'qwen3-coder', ParserClass: Glm47ToolParser },
+  { prefix: 'qwen3',       ParserClass: Glm47ToolParser },
+  // Future:
+  // { prefix: 'deepseek',    ParserClass: DeepSeekToolParser },
+  // { prefix: 'llama',       ParserClass: LlamaToolParser },
+];
+
+// Instance cache (model name → parser instance)
+const _cache = new Map();
+
+/**
+ * Get the appropriate parser for a model name.
+ *
+ * @param {string} modelName - Full model name (e.g. "glm-4.7:cloud", "qwen3-coder-next")
+ * @returns {BaseToolParser} Parser instance (cached)
+ */
+function getParserForModel(modelName) {
+  if (!modelName || typeof modelName !== 'string') {
+    return _getOrCreate('__generic__', GenericToolParser, 'generic');
+  }
+
+  // Check cache
+  if (_cache.has(modelName)) {
+    return _cache.get(modelName);
+  }
+
+  const normalized = modelName.toLowerCase();
+
+  for (const { prefix, ParserClass } of PARSER_REGISTRY) {
+    if (normalized.startsWith(prefix)) {
+      logger.debug({ modelName, prefix, parser: ParserClass.name }, 'Parser registry: matched');
+      return _getOrCreate(modelName, ParserClass, modelName);
+    }
+  }
+
+  // Fallback to generic
+  logger.debug({ modelName }, 'Parser registry: no match, using GenericToolParser');
+  return _getOrCreate(modelName, GenericToolParser, modelName);
+}
+
+function _getOrCreate(cacheKey, ParserClass, modelName) {
+  if (_cache.has(cacheKey)) return _cache.get(cacheKey);
+  const instance = new ParserClass(modelName);
+  _cache.set(cacheKey, instance);
+  return instance;
+}
+
+/**
+ * Clear the parser cache (for testing).
+ */
+function clearParserCache() {
+  _cache.clear();
+}
+
+module.exports = {
+  getParserForModel,
+  clearParserCache,
+  PARSER_REGISTRY,
+};
diff --git a/src/progress/client.js b/src/progress/client.js
new file mode 100644
index 0000000..34dacff
--- /dev/null
+++ b/src/progress/client.js
@@ -0,0 +1,283 @@
+/**
+ * Progress Client - Sends progress updates to an external listener
+ *
+ * This module provides a simple way to send progress updates from Lynkr
+ * to an external server (e.g., Python progress listener) during agent execution.
+ *
+ * Usage:
+ *   const progress = require('../progress/client');
+ *   progress.startStep({ step: 1, message: "Thinking..." });
+ *   progress.update({ percent: 50, message: "Executing tools..." });
+ *   progress.complete();
+ */
+
+const http = require('http');
+const https = require('https');
+const logger = require('../logger');
+
+// Configuration from environment
+const PROGRESS_ENABLED = process.env.PROGRESS_ENABLED === 'true';
+const PROGRESS_URL = process.env.PROGRESS_URL || 'http://localhost:7337';
+const PROGRESS_TIMEOUT_MS = parseInt(process.env.PROGRESS_TIMEOUT_MS || '5000', 10);
+
+// Session tracking
+let currentSessionId = null;
+let currentStep = 0;
+let startTime = null;
+
+/**
+ * Parse the progress URL to extract protocol, host, port, and path
+ */
+function parseProgressUrl(url) {
+  try {
+    const parsed = new URL(url);
+    return {
+      protocol: parsed.protocol,
+      host: parsed.hostname,
+      port: parsed.port || (parsed.protocol === 'https:' ? 443 : 80),
+      path: parsed.pathname || '/progress'
+    };
+  } catch (err) {
+    logger.warn({ url, error: err.message }, 'Failed to parse PROGRESS_URL');
+    return null;
+  }
+}
+
+/**
+ * Send a progress update to the external server
+ */
+function sendProgressUpdate(type, data = {}) {
+  if (!PROGRESS_ENABLED) {
+    return; // Progress reporting disabled
+  }
+
+  const urlInfo = parseProgressUrl(PROGRESS_URL);
+  if (!urlInfo) {
+    return;
+  }
+
+  const payload = {
+    type,
+    sessionId: currentSessionId,
+    timestamp: Date.now(),
+    step: currentStep,
+    elapsedMs: startTime ? Date.now() - startTime : null,
+    ...data
+  };
+
+  const postData = JSON.stringify(payload);
+
+  const options = {
+    hostname: urlInfo.host,
+    port: urlInfo.port,
+    path: urlInfo.path,
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Content-Length': Buffer.byteLength(postData),
+    },
+    timeout: PROGRESS_TIMEOUT_MS,
+  };
+
+  const client = urlInfo.protocol === 'https:' ? https : http;
+
+  const req = client.request(options, (res) => {
+    // Silently consume response
+    res.on('data', () => {});
+    res.on('end', () => {});
+  });
+
+  req.on('error', (err) => {
+    // Silently ignore errors - progress updates are fire-and-forget
+    // Only log at debug level to avoid spamming logs
+    logger.debug({
+      error: err.message,
+      progressUrl: PROGRESS_URL
+    }, 'Progress update failed (non-critical)');
+  });
+
+  req.on('timeout', () => {
+    req.destroy();
+    logger.debug({
+      progressUrl: PROGRESS_URL
+    }, 'Progress update timed out (non-critical)');
+  });
+
+  req.write(postData);
+  req.end();
+}
+
+/**
+ * Start a new session
+ */
+function startSession(sessionId) {
+  currentSessionId = sessionId || `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
+  currentStep = 0;
+  startTime = Date.now();
+
+  logger.debug({ sessionId: currentSessionId }, 'Progress session started');
+  sendProgressUpdate('session_start', { sessionId: currentSessionId });
+}
+
+/**
+ * Start a new step in the agent loop
+ */
+function startStep(stepInfo) {
+  currentStep = stepInfo.step || (currentStep + 1);
+  const info = {
+    step: currentStep,
+    message: stepInfo.message || `Step ${currentStep}`,
+    maxSteps: stepInfo.maxSteps,
+    toolCallsCount: stepInfo.toolCallsCount,
+  };
+
+  logger.debug(info, 'Progress step started');
+  sendProgressUpdate('step_start', info);
+}
+
+/**
+ * Update progress within the current step
+ */
+function update(updateInfo) {
+  const info = {
+    step: currentStep,
+    message: updateInfo.message || '',
+    percent: updateInfo.percent,
+    detail: updateInfo.detail,
+  };
+
+  logger.debug(info, 'Progress updated');
+  sendProgressUpdate('progress', info);
+}
+
+/**
+ * Report that the LLM is being called
+ */
+function callingModel(modelInfo) {
+  const info = {
+    step: currentStep,
+    provider: modelInfo.provider,
+    model: modelInfo.model,
+    message: `Calling ${modelInfo.provider} model: ${modelInfo.model}`,
+  };
+
+  logger.debug(info, 'Progress: calling model');
+  sendProgressUpdate('model_call', info);
+}
+
+/**
+ * Report that tools are being executed
+ */
+function executingTools(toolInfo) {
+  const info = {
+    step: currentStep,
+    toolCount: toolInfo.toolCount,
+    toolNames: toolInfo.toolNames,
+    message: toolInfo.message || `Executing ${toolInfo.toolCount} tool(s)`,
+  };
+
+  logger.debug(info, 'Progress: executing tools');
+  sendProgressUpdate('tools_execute', info);
+}
+
+/**
+ * Report a tool result
+ */
+function toolResult(toolInfo) {
+  const info = {
+    step: currentStep,
+    toolName: toolInfo.toolName,
+    ok: toolInfo.ok,
+    message: toolInfo.message || `Tool ${toolInfo.toolName} ${toolInfo.ok ? 'completed' : 'failed'}`,
+  };
+
+  logger.debug(info, 'Progress: tool result');
+  sendProgressUpdate('tool_result', info);
+}
+
+/**
+ * Report an error
+ */
+function error(errorInfo) {
+  const info = {
+    step: currentStep,
+    error: errorInfo.error,
+    message: errorInfo.message || 'An error occurred',
+  };
+
+  logger.debug(info, 'Progress: error');
+  sendProgressUpdate('error', info);
+}
+
+/**
+ * Complete the session successfully
+ */
+function complete(completionInfo = {}) {
+  const info = {
+    step: currentStep,
+    totalSteps: currentStep,
+    durationMs: startTime ? Date.now() - startTime : null,
+    message: completionInfo.message || 'Completed successfully',
+    terminationReason: completionInfo.terminationReason,
+  };
+
+  logger.debug(info, 'Progress: session completed');
+  sendProgressUpdate('session_complete', info);
+
+  // Reset session state
+  currentSessionId = null;
+  currentStep = 0;
+  startTime = null;
+}
+
+/**
+ * End the session with an error
+ */
+function abort(errorInfo = {}) {
+  const info = {
+    step: currentStep,
+    totalSteps: currentStep,
+    durationMs: startTime ? Date.now() - startTime : null,
+    error: errorInfo.error,
+    message: errorInfo.message || 'Session aborted',
+  };
+
+  logger.debug(info, 'Progress: session aborted');
+  sendProgressUpdate('session_abort', info);
+
+  // Reset session state
+  currentSessionId = null;
+  currentStep = 0;
+  startTime = null;
+}
+
+/**
+ * Get current progress state
+ */
+function getState() {
+  return {
+    enabled: PROGRESS_ENABLED,
+    progressUrl: PROGRESS_URL,
+    sessionId: currentSessionId,
+    step: currentStep,
+    startTime,
+    elapsedMs: startTime ? Date.now() - startTime : null,
+  };
+}
+
+module.exports = {
+  startSession,
+  startStep,
+  update,
+  callingModel,
+  executingTools,
+  toolResult,
+  error,
+  complete,
+  abort,
+  getState,
+  // Configuration constants
+  PROGRESS_ENABLED,
+  PROGRESS_URL,
+  PROGRESS_TIMEOUT_MS,
+};
\ No newline at end of file
diff --git a/src/progress/emitter.js b/src/progress/emitter.js
new file mode 100644
index 0000000..1e0383e
--- /dev/null
+++ b/src/progress/emitter.js
@@ -0,0 +1,202 @@
+/**
+ * Progress Event Emitter
+ *
+ * Emits progress events during agent execution for real-time monitoring.
+ * Events can be subscribed to by WebSocket clients or other listeners.
+ */
+
+const EventEmitter = require('events');
+const logger = require('../logger');
+
+class ProgressEmitter extends EventEmitter {
+  constructor() {
+    super();
+    this.setMaxListeners(50); // Allow many concurrent listeners
+  }
+
+  /**
+   * Emit an agent loop started event
+   */
+  agentLoopStarted({ sessionId, agentId, parentAgentId, parentCallId, model, maxSteps, maxDurationMs, providerType }) {
+    const event = {
+      type: 'agent_loop_started',
+      sessionId,
+      agentId,
+      parentAgentId,
+      parentCallId,
+      model,
+      maxSteps,
+      maxDurationMs,
+      providerType,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Agent loop started');
+  }
+
+  /**
+   * Emit an agent loop step started event
+   */
+  agentLoopStepStarted({ sessionId, agentId, step, maxSteps }) {
+    const event = {
+      type: 'agent_loop_step_started',
+      sessionId,
+      agentId,
+      step,
+      maxSteps,
+      progress: Math.round((step / maxSteps) * 100),
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Agent loop step started');
+  }
+
+  /**
+   * Emit a model invocation started event
+   */
+  modelInvocationStarted({ sessionId, agentId, step, model, providerType, estimatedTokens }) {
+    const event = {
+      type: 'model_invocation_started',
+      sessionId,
+      agentId,
+      step,
+      model,
+      providerType,
+      estimatedTokens,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Model invocation started');
+  }
+
+  /**
+   * Emit a model invocation completed event
+   */
+  modelInvocationCompleted({ sessionId, agentId, step, model, providerType, inputTokens, outputTokens, durationMs }) {
+    const event = {
+      type: 'model_invocation_completed',
+      sessionId,
+      agentId,
+      step,
+      model,
+      providerType,
+      inputTokens,
+      outputTokens,
+      durationMs,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Model invocation completed');
+  }
+
+  /**
+   * Emit a tool execution started event
+   * requestPreview: First 200 characters of tool arguments
+   */
+  toolExecutionStarted({ sessionId, agentId, step, toolName, toolId, requestPreview }) {
+    const event = {
+      type: 'tool_execution_started',
+      sessionId,
+      agentId,
+      step,
+      toolName,
+      toolId,
+      requestPreview,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Tool execution started');
+  }
+
+  /**
+   * Emit a tool execution completed event
+   * responsePreview: First 200 characters of tool result
+   */
+  toolExecutionCompleted({ sessionId, agentId, step, toolName, toolId, ok, durationMs, responsePreview }) {
+    const event = {
+      type: 'tool_execution_completed',
+      sessionId,
+      agentId,
+      step,
+      toolName,
+      toolId,
+      ok,
+      durationMs,
+      responsePreview,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Tool execution completed');
+  }
+
+  /**
+   * Emit an agent loop completed event
+   */
+  agentLoopCompleted({ sessionId, agentId, steps, toolCallsExecuted, durationMs, terminationReason }) {
+    const event = {
+      type: 'agent_loop_completed',
+      sessionId,
+      agentId,
+      steps,
+      toolCallsExecuted,
+      durationMs,
+      terminationReason,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Agent loop completed');
+  }
+
+  /**
+   * Emit an error event
+   */
+  error({ sessionId, errorType, errorMessage }) {
+    const event = {
+      type: 'error',
+      sessionId,
+      errorType,
+      errorMessage,
+      timestamp: Date.now(),
+    };
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Error');
+  }
+
+  /**
+   * Emit a custom progress event
+   */
+  custom(event) {
+    event.timestamp = Date.now();
+    this.emit('progress', event);
+    logger.debug(event, '[Progress] Custom event');
+  }
+}
+
+// Singleton instance
+let instance = null;
+
+/**
+ * Get the ProgressEmitter singleton instance
+ */
+function getProgressEmitter() {
+  if (!instance) {
+    instance = new ProgressEmitter();
+  }
+  return instance;
+}
+
+/**
+ * Reset the singleton (mainly for testing)
+ */
+function resetProgressEmitter() {
+  if (instance) {
+    instance.removeAllListeners();
+    instance = null;
+  }
+}
+
+module.exports = {
+  ProgressEmitter,
+  getProgressEmitter,
+  resetProgressEmitter,
+};
\ No newline at end of file
diff --git a/src/progress/server.js b/src/progress/server.js
new file mode 100644
index 0000000..a0fee20
--- /dev/null
+++ b/src/progress/server.js
@@ -0,0 +1,295 @@
+/**
+ * Progress WebSocket Server
+ *
+ * Provides real-time progress updates to connected clients via WebSocket.
+ * This allows external tools (like the Python progress-listener) to receive
+ * live updates during agent execution.
+ */
+
+const EventEmitter = require('events');
+const WebSocket = require('ws');
+const logger = require('../logger');
+const config = require('../config');
+
+class ProgressWebSocketServer extends EventEmitter {
+  constructor(port = 8765) {
+    super();
+    this.port = port;
+    this.wss = null;
+    this.clients = new Set();
+    this.clientIdCounter = 0;
+  }
+
+  /**
+   * Start the WebSocket server
+   */
+  start() {
+    if (this.wss) {
+      logger.warn('Progress WebSocket server already running');
+      return;
+    }
+
+    this.wss = new WebSocket.Server({
+      port: this.port,
+      perMessageDeflate: false // Disable compression for faster message delivery
+    });
+
+    this.wss.on('listening', () => {
+      logger.info(
+        { port: this.port },
+        'Progress WebSocket server started - clients can connect to receive real-time updates'
+      );
+      this.emit('server:started', { port: this.port });
+    });
+
+    this.wss.on('connection', (ws, req) => {
+      const clientId = ++this.clientIdCounter;
+      const clientIp = req.socket.remoteAddress;
+
+      this.clients.add(ws);
+
+      logger.info(
+        { clientId, clientIp, totalClients: this.clients.size },
+        'Progress client connected'
+      );
+
+      // Send welcome message
+      this.sendToClient(ws, {
+        type: 'connected',
+        clientId,
+        serverInfo: {
+          version: '1.0.0',
+          features: ['agent-loop', 'model-invocation', 'tool-execution', 'progress']
+        }
+      });
+
+      // Send initial status
+      this.sendToClient(ws, {
+        type: 'ready',
+        message: 'Progress reporting ready - waiting for agent execution...'
+      });
+
+      ws.on('message', (data) => {
+        try {
+          const message = JSON.parse(data.toString());
+          this.handleClientMessage(ws, clientId, message);
+        } catch (err) {
+          logger.debug({ clientId, error: err.message }, 'Invalid message from client');
+        }
+      });
+
+      ws.on('close', () => {
+        this.clients.delete(ws);
+        logger.info(
+          { clientId, remainingClients: this.clients.size },
+          'Progress client disconnected'
+        );
+        this.emit('client:disconnected', { clientId });
+      });
+
+      ws.on('error', (err) => {
+        logger.error({ clientId, error: err.message }, 'Progress client error');
+        this.clients.delete(ws);
+      });
+
+      this.emit('client:connected', { clientId, clientIp });
+    });
+
+    this.wss.on('error', (err) => {
+      if (err.code === 'EADDRINUSE') {
+        logger.warn(
+          { port: this.port },
+          'Progress WebSocket server port already in use - progress reporting unavailable'
+        );
+      } else {
+        logger.error({ error: err.message }, 'Progress WebSocket server error');
+      }
+    });
+  }
+
+  /**
+   * Stop the WebSocket server
+   */
+  stop() {
+    if (!this.wss) {
+      return;
+    }
+
+    // Notify all clients of shutdown
+    this.broadcast({
+      type: 'server:shutdown',
+      message: 'Progress server shutting down'
+    });
+
+    // Close all client connections
+    this.clients.forEach(ws => {
+      try {
+        ws.close(1000, 'Server shutting down');
+      } catch (err) {
+        // Ignore close errors
+      }
+    });
+
+    this.clients.clear();
+
+    // Close the server
+    this.wss.close(err => {
+      if (err) {
+        logger.error({ error: err.message }, 'Error closing Progress WebSocket server');
+      } else {
+        logger.info('Progress WebSocket server stopped');
+      }
+    });
+
+    this.wss = null;
+  }
+
+  /**
+   * Handle messages from clients
+   */
+  handleClientMessage(ws, clientId, message) {
+    switch (message.type) {
+      case 'ping':
+        this.sendToClient(ws, { type: 'pong' });
+        break;
+
+      case 'subscribe':
+        // Client wants specific event types
+        ws.subscriptions = message.events || [];
+        this.sendToClient(ws, {
+          type: 'subscribed',
+          events: ws.subscriptions
+        });
+        break;
+
+      case 'get_status':
+        this.sendToClient(ws, {
+          type: 'status',
+          connectedClients: this.clients.size,
+          uptime: process.uptime()
+        });
+        break;
+
+      default:
+        logger.debug({ clientId, messageType: message.type }, 'Unknown message type from client');
+    }
+  }
+
+  /**
+   * Send a message to a specific client
+   */
+  sendToClient(ws, data) {
+    if (ws.readyState === WebSocket.OPEN) {
+      try {
+        ws.send(JSON.stringify(data));
+      } catch (err) {
+        logger.debug({ error: err.message }, 'Failed to send message to client');
+      }
+    }
+  }
+
+  /**
+   * Broadcast a message to all connected clients
+   */
+  broadcast(data) {
+    if (this.clients.size === 0) {
+      return;
+    }
+
+    const message = JSON.stringify(data);
+    const deadClients = new Set();
+
+    this.clients.forEach(ws => {
+      if (ws.readyState !== WebSocket.OPEN) {
+        deadClients.add(ws);
+        return;
+      }
+
+      // Check if client has subscriptions
+      if (ws.subscriptions && !ws.subscriptions.includes(data.type)) {
+        return;
+      }
+
+      try {
+        ws.send(message);
+      } catch (err) {
+        deadClients.add(ws);
+      }
+    });
+
+    // Remove dead clients
+    deadClients.forEach(ws => {
+      this.clients.delete(ws);
+    });
+
+    if (deadClients.size > 0) {
+      logger.debug(
+        { removedCount: deadClients.size, remaining: this.clients.size },
+        'Cleaned up disconnected clients'
+      );
+    }
+  }
+
+  /**
+   * Check if server is running
+   */
+  isRunning() {
+    return this.wss !== null;
+  }
+
+  /**
+   * Get connection count
+   */
+  getClientCount() {
+    return this.clients.size;
+  }
+}
+
+// Singleton instance
+let serverInstance = null;
+
+/**
+ * Get or create the progress WebSocket server singleton
+ */
+function getProgressServer(port = 8765) {
+  if (!serverInstance) {
+    serverInstance = new ProgressWebSocketServer(port);
+  }
+  return serverInstance;
+}
+
+/**
+ * Initialize and start the progress server if enabled
+ */
+function initializeProgressServer() {
+  const progressConfig = config.progress || {};
+  const enabled = progressConfig.enabled !== false;
+  const port = progressConfig.port || 8765;
+
+  if (!enabled) {
+    logger.info('Progress WebSocket server disabled via config');
+    return null;
+  }
+
+  const server = getProgressServer(port);
+  server.start();
+
+  return server;
+}
+
+/**
+ * Shutdown the progress server
+ */
+function shutdownProgressServer() {
+  if (serverInstance) {
+    serverInstance.stop();
+    serverInstance = null;
+  }
+}
+
+module.exports = {
+  ProgressWebSocketServer,
+  getProgressServer,
+  initializeProgressServer,
+  shutdownProgressServer
+};
\ No newline at end of file
diff --git a/src/providers/context-window.js b/src/providers/context-window.js
new file mode 100644
index 0000000..663e48a
--- /dev/null
+++ b/src/providers/context-window.js
@@ -0,0 +1,147 @@
+/**
+ * Context Window Detection
+ *
+ * Queries the active provider for its context window size (in tokens).
+ * Returns -1 if unknown. Caches the result for the lifetime of the process.
+ */
+
+const config = require("../config");
+const logger = require("../logger");
+
+// Known context sizes for proprietary models (tokens)
+const KNOWN_CONTEXT_SIZES = {
+  // Anthropic
+  "claude-3-opus": 200000,
+  "claude-3-sonnet": 200000,
+  "claude-3-haiku": 200000,
+  "claude-3.5-sonnet": 200000,
+  "claude-4": 200000,
+  // OpenAI
+  "gpt-4o": 128000,
+  "gpt-4o-mini": 128000,
+  "gpt-4-turbo": 128000,
+  "gpt-4": 8192,
+  "gpt-3.5-turbo": 16385,
+};
+
+// null = not yet detected, -1 = detected but unknown, >0 = known
+let cachedContextWindow = null;
+
+async function detectContextWindow() {
+  const provider = config.modelProvider.type;
+
+  try {
+    if (provider === "ollama") {
+      return await detectOllamaContextWindow();
+    }
+    if (provider === "openrouter") {
+      return await detectOpenRouterContextWindow();
+    }
+    if (provider === "openai") {
+      return detectFromKnownSizes(config.openai.model);
+    }
+    // azure-anthropic, bedrock — use known Anthropic sizes
+    if (["azure-anthropic", "bedrock"].includes(provider)) {
+      return 200000;
+    }
+    if (provider === "azure-openai") {
+      return detectFromKnownSizes(config.azureOpenAI.deployment);
+    }
+    if (provider === "llamacpp" || provider === "lmstudio") {
+      return -1; // No standard API to query
+    }
+    if (provider === "zai") {
+      return 128000; // GLM-4 family
+    }
+    if (provider === "vertex") {
+      return 1000000; // Gemini models
+    }
+  } catch (err) {
+    logger.warn({ err, provider }, "Failed to detect context window");
+  }
+
+  return -1;
+}
+
+async function detectOllamaContextWindow() {
+  const { getOllamaHeaders, getOllamaEndpointForModel } = require("../clients/ollama-utils");
+  const model = config.ollama.model;
+  const baseEndpoint = getOllamaEndpointForModel(model);
+  const endpoint = `${baseEndpoint}/api/show`;
+  const response = await fetch(endpoint, {
+    method: "POST",
+    headers: getOllamaHeaders(model),
+    body: JSON.stringify({ name: model }),
+    signal: AbortSignal.timeout(5000),
+  });
+  if (!response.ok) return -1;
+  const data = await response.json();
+
+  // Ollama prefixes context_length with the architecture name
+  // (e.g. "llama.context_length", "qwen2.context_length", "gemma.context_length")
+  // Search for any key ending in ".context_length" or exactly "context_length"
+  if (data.model_info && typeof data.model_info === "object") {
+    for (const [key, value] of Object.entries(data.model_info)) {
+      if (key === "context_length" || key.endsWith(".context_length")) {
+        if (typeof value === "number" && value > 0) return value;
+      }
+    }
+  }
+
+  // Fallback: parse from parameters string (e.g. "num_ctx 32768")
+  const match = data.parameters?.match(/num_ctx\s+(\d+)/);
+  if (match) return parseInt(match[1], 10);
+  return -1;
+}
+
+async function detectOpenRouterContextWindow() {
+  const baseEndpoint = config.openrouter.endpoint || "https://openrouter.ai/api/v1/chat/completions";
+  // Derive the models endpoint from the chat endpoint
+  const modelsEndpoint = baseEndpoint.replace(/\/v1\/chat\/completions$/, "/v1/models");
+  const response = await fetch(modelsEndpoint, {
+    headers: { Authorization: `Bearer ${config.openrouter.apiKey}` },
+    signal: AbortSignal.timeout(5000),
+  });
+  if (!response.ok) return -1;
+  const data = await response.json();
+  const model = data.data?.find((m) => m.id === config.openrouter.model);
+  return model?.context_length ?? -1;
+}
+
+function detectFromKnownSizes(modelName) {
+  if (!modelName) return -1;
+  const lower = modelName.toLowerCase();
+  for (const [key, size] of Object.entries(KNOWN_CONTEXT_SIZES)) {
+    if (lower.includes(key)) return size;
+  }
+  return -1;
+}
+
+async function getContextWindow() {
+  if (cachedContextWindow !== null) return cachedContextWindow;
+  cachedContextWindow = await detectContextWindow();
+  if (cachedContextWindow === -1) {
+    logger.warn(
+      { provider: config.modelProvider.type },
+      "Could not detect context window size — falling back to 8K tokens. " +
+      "Compression may be more aggressive than necessary.",
+    );
+  } else {
+    logger.info(
+      { contextWindow: cachedContextWindow, provider: config.modelProvider.type },
+      "Context window detected",
+    );
+  }
+  return cachedContextWindow;
+}
+
+function resetCache() {
+  cachedContextWindow = null;
+}
+
+module.exports = {
+  getContextWindow,
+  detectContextWindow,
+  resetCache,
+  KNOWN_CONTEXT_SIZES,
+};
diff --git a/src/routing/complexity-analyzer.js b/src/routing/complexity-analyzer.js
index 0929de4..3bcb666 100644
--- a/src/routing/complexity-analyzer.js
+++ b/src/routing/complexity-analyzer.js
@@ -469,6 +469,7 @@ async function getContentEmbedding(content) {
   }
 
   try {
+    const { getOllamaHeaders } = require("../clients/ollama-utils");
     const endpoint = config.ollama?.embeddingsEndpoint ||
                      config.llamacpp?.embeddingsEndpoint;
 
@@ -476,7 +477,7 @@ async function getContentEmbedding(content) {
 
     const response = await fetch(endpoint, {
       method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
+      headers: getOllamaHeaders(),
       body: JSON.stringify({
         model: config.ollama?.embeddingsModel || 'nomic-embed-text',
         prompt: content.slice(0, 512),  // Limit for performance
diff --git a/src/routing/index.js b/src/routing/index.js
index f47853f..a36d621 100644
--- a/src/routing/index.js
+++ b/src/routing/index.js
@@ -84,7 +84,7 @@ function getBestCloudProvider(options = {}) {
  * Get the best available local provider
  */
 function getBestLocalProvider() {
-  if (config.ollama?.endpoint) return 'ollama';
+  if (config.ollama?.endpoint || config.ollama?.cloudEndpoint) return 'ollama';
   if (config.llamacpp?.endpoint) return 'llamacpp';
   if (config.lmstudio?.endpoint) return 'lmstudio';
 
diff --git a/src/server.js b/src/server.js
index 612af5d..41043b4 100644
--- a/src/server.js
+++ b/src/server.js
@@ -1,6 +1,51 @@
 const express = require("express");
 const compression = require("compression");
 const config = require("./config");
+
+// Clear logs directory BEFORE initializing loggers
+// This prevents the issue where loggers hold file handles to deleted files
+const fs = require("fs");
+const path = require("path");
+const logFile = process.env.LOG_FILE;
+if (logFile) {
+  const logsDir = path.dirname(logFile);
+  try {
+    if (fs.existsSync(logsDir)) {
+      const files = fs.readdirSync(logsDir);
+      let deletedCount = 0;
+      for (const file of files) {
+        const filePath = path.join(logsDir, file);
+        const stat = fs.statSync(filePath);
+        if (stat.isFile()) {
+          fs.unlinkSync(filePath);
+          deletedCount++;
+        }
+      }
+      if (deletedCount > 0) {
+        console.log(`[STARTUP] Cleared ${deletedCount} log file(s) from ${logsDir}`);
+      }
+
+      // Also delete oversized-errors directory and its contents
+      const oversizedErrorsDir = path.join(logsDir, "oversized-errors");
+      if (fs.existsSync(oversizedErrorsDir)) {
+        const errorFiles = fs.readdirSync(oversizedErrorsDir);
+        let errorFilesDeleted = 0;
+        for (const file of errorFiles) {
+          const filePath = path.join(oversizedErrorsDir, file);
+          fs.unlinkSync(filePath);
+          errorFilesDeleted++;
+        }
+        fs.rmdirSync(oversizedErrorsDir);
+        if (errorFilesDeleted > 0) {
+          console.log(`[STARTUP] Cleared ${errorFilesDeleted} oversized error file(s) and removed directory`);
+        }
+      }
+    }
+  } catch (err) {
+    console.error(`[STARTUP] Failed to clear logs: ${err.message}`);
+  }
+}
+
 const loggingMiddleware = require("./api/middleware/logging");
 const router = require("./api/router");
 const { sessionMiddleware } = require("./api/middleware/session");
@@ -33,6 +78,7 @@ const { getWorkerPool, isWorkerPoolReady } = require("./workers/pool");
 const lazyLoader = require("./tools/lazy-loader");
 const { setLazyLoader } = require("./tools");
 const { waitForOllama } = require("./clients/ollama-startup");
+const { initializeProgressServer, shutdownProgressServer } = require("./progress/server");
 
 // Initialize MCP
 initialiseMcp();
@@ -211,6 +257,13 @@ async function start() {
     console.log(`Claude→Databricks proxy listening on http://localhost:${config.port}`);
   });
 
+  // Initialize progress server for external tool listening
+  const progressServer = initializeProgressServer();
+  if (progressServer) {
+    const { getProgressEmitter } = require('./progress/emitter');
+    getProgressEmitter().on('progress', (event) => progressServer.broadcast(event));
+  }
+
   // Start session cleanup manager
   const { getSessionCleanupManager } = require("./sessions/cleanup");
   const sessionCleanup = getSessionCleanupManager();
@@ -238,6 +291,12 @@ async function start() {
     });
   }
 
+  // Register Progress Server shutdown callback
+  shutdownManager.onShutdown(() => {
+    logger.info("Stopping progress WebSocket server on shutdown");
+    shutdownProgressServer();
+  });
+
   // Initialize hot reload config watcher
   if (config.hotReload?.enabled !== false) {
     const watcher = initConfigWatcher({
diff --git a/src/tools/index.js b/src/tools/index.js
index 11227f0..24d4948 100644
--- a/src/tools/index.js
+++ b/src/tools/index.js
@@ -35,7 +35,6 @@ const TOOL_ALIASES = {
   read: "fs_read",
   fileread: "fs_read",
   patch: "edit_patch",
-  edit: "edit_patch",
   list: "workspace_list",
   ls: "workspace_list",
   dir: "workspace_list",
@@ -88,8 +87,43 @@ const TOOL_ALIASES = {
   runtests: "workspace_test_run",
   testsummary: "workspace_test_summary",
   testhistory: "workspace_test_history",
+  // Glob has dedicated tool in src/tools/indexer.js (registerGlobTool)
+  // - returns plain text format instead of JSON
+  // glob: "workspace_list",
+  // Glob: "workspace_list",
 };
 
+/**
+ * Recursively parse string values that look like JSON arrays/objects.
+ * Some providers double-serialize nested parameters (e.g. questions: "[{...}]"
+ * instead of questions: [{...}]), which causes schema validation failures.
+ */
+function deepParseStringifiedJson(obj) {
+  if (typeof obj !== "object" || obj === null) return obj;
+  if (Array.isArray(obj)) return obj.map(deepParseStringifiedJson);
+
+  const result = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (typeof value === "string") {
+      const trimmed = value.trim();
+      if (
+        (trimmed.startsWith("[") && trimmed.endsWith("]")) ||
+        (trimmed.startsWith("{") && trimmed.endsWith("}"))
+      ) {
+        try {
+          result[key] = deepParseStringifiedJson(JSON.parse(trimmed));
+          continue;
+        } catch {
+          // Not valid JSON, keep as string
+        }
+      }
+    }
+    result[key] =
+      typeof value === "object" ? deepParseStringifiedJson(value) : value;
+  }
+  return result;
+}
+
 function coerceString(value) {
   if (value === undefined || value === null) return "";
   if (typeof value === "string") return value;
@@ -124,24 +158,65 @@ function normalizeHandlerResult(result) {
   return { ok, status, content, metadata };
 }
 
-function parseArguments(call) {
+function parseArguments(call, providerType = null) {
   const raw = call?.function?.arguments;
-  if (typeof raw !== "string" || raw.trim().length === 0) return {};
+
+  // DEBUG: Log full call structure for diagnosis
+  logger.info({
+    providerType,
+    fullCall: JSON.stringify(call),
+    hasFunction: !!call?.function,
+    functionKeys: call?.function ? Object.keys(call.function) : [],
+    argumentsType: typeof raw,
+    argumentsValue: raw,
+    argumentsIsNull: raw === null,
+    argumentsIsUndefined: raw === undefined,
+  }, "=== PARSING TOOL ARGUMENTS ===");
+
+  // Ollama sends arguments as an object, OpenAI as a JSON string
+  if (typeof raw === "object" && raw !== null) {
+    if (providerType !== "ollama") {
+      logger.warn({
+        providerType,
+        expectedProvider: "ollama",
+        argumentsType: typeof raw,
+        arguments: raw
+      }, `Received object arguments but provider is ${providerType || "unknown"}, expected ollama format. Continuing with object.`);
+    } else {
+      logger.info({
+        type: "object",
+        arguments: raw
+      }, "Tool arguments already parsed (Ollama format)");
+    }
+    return deepParseStringifiedJson(raw);
+  }
+
+  if (typeof raw !== "string" || raw.trim().length === 0) {
+    logger.warn({
+      argumentsType: typeof raw,
+      argumentsEmpty: !raw || raw.trim().length === 0,
+      providerType
+    }, "Arguments not a string or empty - returning {}");
+    return {};
+  }
+
   try {
-    return JSON.parse(raw);
+    const parsed = JSON.parse(raw);
+    logger.info({ parsed }, "Parsed JSON string arguments");
+    return deepParseStringifiedJson(parsed);
   } catch (err) {
-    logger.warn({ err }, "Failed to parse tool arguments");
+    logger.warn({ err, raw }, "Failed to parse tool arguments");
     return {};
   }
 }
 
-function normaliseToolCall(call) {
+function normaliseToolCall(call, providerType = null) {
   const name = call?.function?.name ?? call?.name;
   const id = call?.id ?? `${name ?? "tool"}_${Date.now()}`;
   return {
     id,
     name,
-    arguments: parseArguments(call),
+    arguments: parseArguments(call, providerType),
     raw: call,
   };
 }
@@ -182,7 +257,8 @@ function listTools() {
 }
 
 async function executeToolCall(call, context = {}) {
-  const normalisedCall = normaliseToolCall(call);
+  const providerType = context?.providerType || context?.provider || null;  
+  const normalisedCall = normaliseToolCall(call, providerType);
   let registered = registry.get(normalisedCall.name);
   if (!registered) {
     const aliasTarget = TOOL_ALIASES[normalisedCall.name.toLowerCase()];
@@ -225,6 +301,10 @@ async function executeToolCall(call, context = {}) {
   }
 
   if (!registered) {
+    logger.warn({
+      tool: normalisedCall.name,
+      id: normalisedCall.id
+    }, "Tool not registered");
     const content = coerceString({
       error: "tool_not_registered",
       tool: normalisedCall.name,
@@ -241,6 +321,17 @@ async function executeToolCall(call, context = {}) {
     };
   }
 
+  // Log tool invocation with full details for debugging
+  logger.info({
+    tool: normalisedCall.name,
+    id: normalisedCall.id,
+    args: normalisedCall.arguments,
+    argsKeys: Object.keys(normalisedCall.arguments || {}),
+    rawCall: JSON.stringify(normalisedCall.raw)
+  }, "=== EXECUTING TOOL ===");
+
+  startTime = Date.now()
+
   try {
     const result = await registered.handler(
       {
@@ -251,11 +342,47 @@ async function executeToolCall(call, context = {}) {
       },
       context,
     );
-    const formatted = normalizeHandlerResult(result);
+    let formatted = normalizeHandlerResult(result);
+
+    // Auto-approve external file reads: the user already asked to read the file,
+    // so re-execute transparently with user_approved=true instead of relying
+    // on the LLM to manage a multi-step approval conversation.
+    if (
+      formatted.content &&
+      typeof formatted.content === "string" &&
+      formatted.content.startsWith("[APPROVAL REQUIRED]")
+    ) {
+      logger.info(
+        { tool: normalisedCall.name, id: normalisedCall.id },
+        "Auto-approving external file read (user initiated the request)",
+      );
+      const approvedResult = await registered.handler(
+        {
+          id: normalisedCall.id,
+          name: normalisedCall.name,
+          args: { ...normalisedCall.arguments, user_approved: true },
+          raw: normalisedCall.raw,
+        },
+        context,
+      );
+      formatted = normalizeHandlerResult(approvedResult);
+    }
 
     // Apply tool output truncation for token efficiency
     const truncatedContent = truncateToolOutput(normalisedCall.name, formatted.content);
 
+    const durationMs = Date.now() - startTime;
+
+    // Log successful execution
+    logger.info({
+      tool: normalisedCall.name,
+      id: normalisedCall.id,
+      status: formatted.status,
+      durationMs,
+      outputLength: truncatedContent?.length || 0,
+      truncated: truncatedContent !== formatted.content
+    }, "Tool execution completed");
+
     return {
       id: normalisedCall.id,
       name: normalisedCall.name,
@@ -267,11 +394,55 @@ async function executeToolCall(call, context = {}) {
         registered: true,
         truncated: truncatedContent !== formatted.content,
         originalLength: formatted.content?.length,
-        truncatedLength: truncatedContent?.length
+        truncatedLength: truncatedContent?.length,
+        durationMs
       },
     };
   } catch (err) {
-    logger.error({ err, tool: normalisedCall.name }, "Tool execution failed");
+    const durationMs = Date.now() - startTime;
+
+    // Intercept workspace access error and ask for permission
+    if (err.message === "Access outside workspace is not permitted.") {
+      const requestedPath =
+        normalisedCall.arguments?.file_path ??
+        normalisedCall.arguments?.path ??
+        normalisedCall.arguments?.target_path ??
+        normalisedCall.arguments?.name ??
+        'the requested path';
+
+      logger.info({
+        tool: normalisedCall.name,
+        id: normalisedCall.id,
+        requestedPath,
+        durationMs
+      }, "Access outside workspace requested - asking user for permission");
+
+      return {
+        id: normalisedCall.id,
+        name: normalisedCall.name,
+        ok: true,
+        status: 200,
+        content: [
+          `[PERMISSION REQUIRED] "${requestedPath}" is outside the workspace root and cannot be accessed without user approval.`,
+          ``,
+          `You MUST now call AskUserQuestion with:`,
+          `  question: "Allow writing to '${requestedPath}' outside the workspace?"`,
+          `  options: [{label: "Allow", description: "Proceed — retry this tool with user_approved=true"}, {label: "Deny", description: "Abandon this operation"}]`,
+          ``,
+          `If the user selects Allow, retry the same tool call with user_approved=true added to the arguments.`,
+          `If the user selects Deny, inform the user the operation was cancelled.`,
+        ].join('\n'),
+        metadata: { permissionRequired: true, requestedPath, durationMs },
+      };
+    }
+
+    logger.error({
+      err,
+      tool: normalisedCall.name,
+      id: normalisedCall.id,
+      durationMs
+    }, "Tool execution failed");
+
     return {
       id: normalisedCall.id,
       name: normalisedCall.name,
@@ -286,6 +457,7 @@ async function executeToolCall(call, context = {}) {
       metadata: {
         registered: true,
         error: true,
+        durationMs
       },
       error: err,
     };
diff --git a/src/tools/indexer.js b/src/tools/indexer.js
index eb0a981..bf13ca8 100644
--- a/src/tools/indexer.js
+++ b/src/tools/indexer.js
@@ -16,11 +16,13 @@ function registerWorkspaceListTool() {
   registerTool(
     "workspace_list",
     async ({ args = {} }) => {
+      // Support both 'pattern' (Glob tool) and 'patterns' (workspace_list)
+      const rawPatterns = args.pattern ?? args.patterns;
       const patterns =
-        typeof args.patterns === "string"
-          ? [args.patterns]
-          : Array.isArray(args.patterns)
-          ? args.patterns
+        typeof rawPatterns === "string"
+          ? [rawPatterns]
+          : Array.isArray(rawPatterns)
+          ? rawPatterns
           : undefined;
       const ignore =
         typeof args.ignore === "string"
@@ -53,10 +55,62 @@ function registerWorkspaceListTool() {
   );
 }
 
+/**
+ * Search recent conversation context for content matching a query.
+ *
+ * Scans the last 10 messages for tool_result content that matches
+ * the query words. Returns matches sorted by relevance.
+ *
+ * @param {string} query - Search query
+ * @param {Array} messages - Recent conversation messages
+ * @returns {Array} Matching context snippets
+ */
+function searchRecentContext(query, messages) {
+  if (!query || !messages || !Array.isArray(messages)) return [];
+
+  const queryLower = query.toLowerCase();
+  const queryWords = queryLower.split(/\s+/).filter((w) => w.length > 2);
+  if (queryWords.length === 0) return [];
+
+  const matches = [];
+
+  // Scan last 10 messages for tool_result content
+  const recent = messages.slice(-10);
+  for (const msg of recent) {
+    if (msg.role !== "tool" && msg.role !== "user") continue;
+
+    const content =
+      typeof msg.content === "string"
+        ? msg.content
+        : Array.isArray(msg.content)
+          ? msg.content
+              .filter((b) => b.type === "tool_result" || b.type === "text")
+              .map((b) => b.content ?? b.text ?? "")
+              .join("\n")
+          : "";
+
+    if (!content || content.length < 20) continue;
+
+    // Check if any query words appear in the content
+    const contentLower = content.toLowerCase();
+    const matchCount = queryWords.filter((w) => contentLower.includes(w)).length;
+
+    if (matchCount > 0 && matchCount / queryWords.length >= 0.3) {
+      matches.push({
+        source: "conversation_context",
+        relevance: matchCount / queryWords.length,
+        preview: content.substring(0, 500),
+      });
+    }
+  }
+
+  return matches.sort((a, b) => b.relevance - a.relevance).slice(0, 3);
+}
+
 function registerWorkspaceSearchTool() {
   registerTool(
     "workspace_search",
-    async ({ args = {} }) => {
+    async ({ args = {} }, context = {}) => {
       const query = args.query ?? args.term ?? args.pattern;
       const regex = args.regex === true || args.is_regex === true;
       const limit = Number.isInteger(args.limit) ? args.limit : undefined;
@@ -67,6 +121,9 @@ function registerWorkspaceSearchTool() {
           ? args.ignore
           : undefined;
 
+      // Check recent conversation context for matching content
+      const contextMatches = searchRecentContext(query, context.requestMessages);
+
       const result = await searchWorkspace({
         query,
         regex,
@@ -74,12 +131,21 @@ function registerWorkspaceSearchTool() {
         ignore,
       });
 
+      // Prepend context matches if found
+      if (contextMatches.length > 0) {
+        result.context_matches = contextMatches;
+        result.note =
+          "Results from recently read files are listed in context_matches. " +
+          "Prefer these over workspace matches when answering about previously read content.";
+      }
+
       return {
         ok: true,
         status: 200,
         content: JSON.stringify(result, null, 2),
         metadata: {
           total: result.matches.length,
+          contextTotal: contextMatches.length,
           engine: result.engine,
         },
       };
@@ -260,6 +326,45 @@ function registerSymbolReferencesTool() {
   );
 }
 
+
+/**
+ * Dedicated Glob tool for Claude Code compatibility (maybe others?).
+ *
+ * Why this exists (instead of using workspace_list alias):
+ * - Claude Code's Glob tool returns plain text (one path per line)
+ * - workspace_list returns JSON with entries array
+ * - Models expect plain text format from Glob tool
+ *
+ * See also: TOOL_ALIASES in src/tools/index.js (commented glob entries)
+ */
+function registerGlobTool() {
+  registerTool(
+    "Glob",
+    async ({ args = {} }) => {
+      const pattern = args.pattern;
+      const basePath = args.path;
+
+      let patterns;
+      if (basePath) {
+        const cleanPath = basePath.replace(/\/+$/, "");
+        patterns = pattern ? [`${cleanPath}/${pattern}`] : [`${cleanPath}/**/*`];
+      } else {
+        patterns = pattern ? [pattern] : undefined;
+      }
+
+      const entries = await listWorkspaceFiles({ patterns, limit: 1000 });
+
+      // Plain text output: one path per line (Claude Code format)
+      return {
+        ok: true,
+        status: 200,
+        content: entries.map((e) => e.path).join("\n"),
+      };
+    },
+    { category: "indexing" },
+  );
+}
+
 function registerGotoDefinitionTool() {
   registerTool(
     "workspace_goto_definition",
@@ -353,6 +458,7 @@ function registerIndexerTools() {
   registerSymbolSearchTool();
   registerSymbolReferencesTool();
   registerGotoDefinitionTool();
+  registerGlobTool();
 }
 
 module.exports = {
diff --git a/src/tools/smart-selection.js b/src/tools/smart-selection.js
index 67628ce..b238d1e 100644
--- a/src/tools/smart-selection.js
+++ b/src/tools/smart-selection.js
@@ -14,16 +14,19 @@ const SYSTEM_REMINDER_PATTERN = /<system-reminder>[\s\S]*?<\/system-reminder>/g;
 
 // Pre-compiled regex patterns for performance (avoid recompiling on every request)
 const GREETING_PATTERN = /^(hi|hello|hey|good morning|good afternoon|good evening|howdy|greetings|sup|yo)[\s\.\!\?]*$/i;
-const QUESTION_PATTERN = /^(what is|what's|how does|when|where|why|explain|define|tell me about|can you explain)/i;
+const QUESTION_PATTERN = /^(what is|what's|how does|when|where|why|define|tell me about|can you explain)/i;
 const TECHNICAL_KEYWORDS = /code|function|class|file|module|import|export|async|await|promise|callback|api|database|server|client|component|method|variable|array|object|string|number/i;
 const EXPLANATION_PATTERN = /explain|describe|summarize|what does|how does|tell me about|give me an overview|clarify|elaborate/i;
 const WEB_PATTERN = /search|lookup|find info|google|documentation|docs|website|url|link|online|internet|browse/i;
-const READ_PATTERN = /read|show|display|view|cat|check|inspect|look at|see|examine|review|print|output/i;
-const WRITE_PATTERN = /write|create|add|update|modify|change|fix|delete|remove|insert|append|replace|save|put|make|generate|produce/i;
+const READ_PATTERN = /\bread\b|show|display|view|\bcat\b|\bcheck\b|inspect|look at|see|examine|review|print|output|\blist\b/i;
+const WRITE_PATTERN = /write|create|add|update|modify|change|fix|delete|remove|insert|append|replace|save|put|make|generate|produce|copy/i;
 const EDIT_PATTERN = /edit|refactor|rename|move|reorganize|restructure|rewrite/i;
-const EXECUTION_PATTERN = /run|execute|test|compile|build|deploy|start|install|launch|boot|fire up|npm|git|python|node|docker|bash|sh|cmd/i;
+const EXECUTION_PATTERN = /\brun\b|execute|\btest\b|compile|build|deploy|\bstart\b|install|launch|boot|fire up|npm|npx|yarn|pnpm|pip|git|python|node|docker|bash|\bsh\b|\bcmd\b|\bls\b|\bpwd\b|\bcd\b|grep|curl|wget|\bmake\b|\bbd\b/i;
 const COMPLEX_PATTERN = /implement|build|create|develop|design|architect|plan|strategy|approach|help with|work on|improve|optimize|enhance|refactor|migrate/i;
 
+// Catches short shell/tool commands that should not be classified as conversational
+const COMMAND_PATTERN = /^\s*(ls|cd|pwd|cat|git|npm|npx|yarn|pnpm|pip|bd|run|edit|read|write|grep|find|curl|wget|make|docker|bash|sh|node|python|test|check|install|create|delete|remove|rename|move|copy|open|save|build|compile|deploy|start|stop|kill)\b/i;
+
 /**
  * Tool selection map: request type → relevant tools
  */
@@ -41,25 +44,27 @@ const TOOL_SELECTION_MAP = {
   ],
 
   file_modification: [
-    'Read', 'Write', 'Edit',          // Full I/O
-    'Grep', 'Glob', 'Bash'            // Support tools
+    'Read', 'Write', 'Edit', 'edit_patch',  // Full I/O
+    'Grep', 'Glob', 'Bash',                 // Support tools
+    'NotebookEdit'
   ],
 
   code_execution: [
-    'Read', 'Write', 'Edit',          // File operations
-    'Bash', 'Grep', 'Glob'            // Execution + search
+    'Read', 'Write', 'Edit', 'edit_patch',  // File operations
+    'Bash', 'Grep', 'Glob'                  // Execution + search
   ],
 
   coding: [
-    'Read', 'Write', 'Edit',          // Core file ops
-    'Bash', 'Grep', 'Glob'            // Support tools
+    'Read', 'Write', 'Edit', 'edit_patch',  // Core file ops
+    'Bash', 'Grep', 'Glob'                  // Support tools
   ],
 
   complex_task: [
-    'Read', 'Write', 'Edit',          // Tier 1
-    'Bash', 'Grep', 'Glob',           // Tier 1
-    'WebSearch', 'WebFetch',          // Tier 2
-    'Task', 'TodoWrite', 'AskUserQuestion'  // Tier 3+4
+    'Read', 'Write', 'Edit', 'edit_patch',            // Tier 1
+    'Bash', 'Grep', 'Glob',                           // Tier 1
+    'WebSearch', 'WebFetch',                          // Tier 2
+    'NotebookEdit',                                    // Tier 2
+    'Task', 'TodoWrite', 'AskUserQuestion'            // Tier 3+4
   ]
 };
 
@@ -114,14 +119,16 @@ function isGreeting(content) {
  */
 function isShortNonTechnical(content) {
   const trimmed = content.trim();
-  return trimmed.length < 20 && !TECHNICAL_KEYWORDS.test(trimmed);
+  return trimmed.length < 20
+    && !TECHNICAL_KEYWORDS.test(trimmed)
+    && !COMMAND_PATTERN.test(trimmed);
 }
 
 /**
  * Check if content is a simple question
  */
 function isSimpleQuestion(content) {
-  return QUESTION_PATTERN.test(content.trim());
+  return QUESTION_PATTERN.test(content.trim()) && !COMMAND_PATTERN.test(content.trim());
 }
 
 /**
@@ -200,21 +207,12 @@ function classifyRequestType(payload) {
   const contentLower = content.toLowerCase();
   const messageCount = payload.messages?.length ?? 0;
 
-  // 1. Conversational (no tools)
+  // 1. Conversational — exact greeting match (no tools)
   if (isGreeting(contentLower)) {
     return { type: 'conversational', confidence: 1.0, keywords: ['greeting'] };
   }
 
-  if (isShortNonTechnical(contentLower)) {
-    return { type: 'conversational', confidence: 0.8, keywords: ['short', 'non-technical'] };
-  }
-
-  // 2. Simple Q&A (no tools)
-  if (isSimpleQuestion(contentLower) && !hasTechnicalKeywords(contentLower)) {
-    return { type: 'simple_qa', confidence: 0.9, keywords: ['question', 'non-technical'] };
-  }
-
-  // 3. Research/Explanation (minimal tools)
+  // 2. Research/Explanation (minimal tools) — before simple_qa so "explain X" → research
   if (hasExplanationKeywords(contentLower)) {
     return { type: 'research', confidence: 0.85, keywords: ['explanation'] };
   }
@@ -223,26 +221,37 @@ function classifyRequestType(payload) {
     return { type: 'research', confidence: 0.9, keywords: ['web', 'search'] };
   }
 
-  // 4. File reading (read-only tools)
+  // 3. Simple Q&A (no tools) — after explanation so "explain X" isn't caught here
+  if (isSimpleQuestion(contentLower) && !hasTechnicalKeywords(contentLower)) {
+    return { type: 'simple_qa', confidence: 0.9, keywords: ['question', 'non-technical'] };
+  }
+
+  // 4. Execution/Testing (execution tools) — before read/write
+  //    because commands like "check git status" contain both read and exec keywords
+  if (hasExecutionKeywords(contentLower)) {
+    return { type: 'code_execution', confidence: 0.8, keywords: ['execution'] };
+  }
+
+  // 5. File reading (read-only tools)
   if (hasReadKeywords(contentLower) && !hasWriteKeywords(contentLower)) {
     return { type: 'file_reading', confidence: 0.8, keywords: ['read'] };
   }
 
-  // 5. File modification (full I/O tools)
+  // 6. File modification (full I/O tools)
   if (hasWriteKeywords(contentLower) || hasEditKeywords(contentLower)) {
     return { type: 'file_modification', confidence: 0.85, keywords: ['write', 'edit'] };
   }
 
-  // 6. Execution/Testing (execution tools)
-  if (hasExecutionKeywords(contentLower)) {
-    return { type: 'code_execution', confidence: 0.8, keywords: ['execution'] };
-  }
-
   // 7. Complex task (all tools)
   if (hasComplexKeywords(contentLower)) {
     return { type: 'complex_task', confidence: 0.75, keywords: ['complex'] };
   }
 
+  // 8. Short non-technical fallback — only after all keyword checks have had a chance
+  if (isShortNonTechnical(contentLower)) {
+    return { type: 'conversational', confidence: 0.8, keywords: ['short', 'non-technical'] };
+  }
+
   // Long conversations likely need more tools
   if (messageCount > 10) {
     return { type: 'complex_task', confidence: 0.7, keywords: ['long_conversation'] };
diff --git a/src/tools/stubs.js b/src/tools/stubs.js
index c026e8e..d2f1bd3 100644
--- a/src/tools/stubs.js
+++ b/src/tools/stubs.js
@@ -41,12 +41,41 @@ function createStubHandler(name, description) {
   });
 }
 
+function askUserQuestionHandler({ args }) {
+  let questions = args?.questions ?? [];
+
+  if (typeof questions === "string") {
+    try { questions = JSON.parse(questions); } catch { questions = []; }
+  }
+
+  if (!Array.isArray(questions)) questions = [questions];
+  const lines = questions.map((q, i) => {
+    const header = q.header ? `[${q.header}] ` : "";
+    const opts = (q.options ?? [])
+      .map((o, j) => `  ${j + 1}. ${o.label} — ${o.description}`)
+      .join("\n");
+    return `${header}${q.question}\n${opts}`;
+  });
+
+  return {
+    ok: true,
+    status: 200,
+    content: lines.join("\n\n"),
+  };
+}
+
 function registerStubTools() {
   STUB_TOOLS.forEach((tool) => {
     if (!hasTool(tool.name)) {
       registerTool(tool.name, createStubHandler(tool.name, tool.description), tool);
     }
   });
+
+  if (!hasTool("AskUserQuestion")) {
+    registerTool("AskUserQuestion", askUserQuestionHandler, {
+      description: "Returns the model's question to the user as assistant output.",
+    });
+  }
 }
 
 module.exports = {
diff --git a/src/tools/tool-call-cleaner.js b/src/tools/tool-call-cleaner.js
new file mode 100644
index 0000000..1974667
--- /dev/null
+++ b/src/tools/tool-call-cleaner.js
@@ -0,0 +1,99 @@
+/**
+ * Universal tool call argument cleaning
+ *
+ * Delegates to the per-model parser registry for argument cleaning.
+ * This module provides the backward-compatible API that the orchestrator calls.
+ */
+
+const logger = require('../logger');
+const { getParserForModel } = require('../parsers');
+
+// Re-export regex constants from GenericToolParser for test compatibility
+const { FENCE_REGEX, BULLET_POINT_REGEX, PROMPT_CHAR_REGEX } = require('../parsers/generic-tool-parser');
+
+/**
+ * Strip markdown code fences and prompt characters from a command string.
+ * Delegates to GenericToolParser's implementation.
+ *
+ * @param {string} command - Command that may contain markdown or prompt chars
+ * @returns {string} - Cleaned command
+ */
+function stripMarkdownFromCommand(command) {
+  if (!command || typeof command !== 'string') {
+    return command;
+  }
+
+  let cleaned = command;
+
+  // 1. Check for code fence
+  const fenceMatch = command.match(FENCE_REGEX);
+  if (fenceMatch && fenceMatch[1]) {
+    cleaned = fenceMatch[1];
+  }
+
+  // 2. Strip bullet points at line start
+  cleaned = cleaned.replace(BULLET_POINT_REGEX, '');
+
+  // 3. Strip prompt characters from each line
+  cleaned = cleaned.replace(PROMPT_CHAR_REGEX, '');
+
+  return cleaned.trim();
+}
+
+/**
+ * Clean tool call arguments by extracting from markdown/formatting.
+ * Delegates to the appropriate parser's cleanArguments method.
+ *
+ * @param {object} toolCall - Tool call in Anthropic/OpenAI format
+ * @param {string} [modelName] - Optional model name for model-specific cleaning
+ * @returns {object} - Cleaned tool call (may be same object if no cleaning needed)
+ */
+function cleanToolCallArguments(toolCall, modelName) {
+  if (!toolCall) return toolCall;
+
+  const parser = getParserForModel(modelName);
+  return parser.cleanArguments(toolCall);
+}
+
+/**
+ * Clean an array of tool calls.
+ *
+ * @param {object[]} toolCalls - Array of tool calls
+ * @param {string} [modelName] - Optional model name for model-specific cleaning
+ * @returns {object[]} - Array of cleaned tool calls
+ */
+function cleanToolCalls(toolCalls, modelName) {
+  if (!Array.isArray(toolCalls) || toolCalls.length === 0) {
+    return toolCalls;
+  }
+
+  const parser = getParserForModel(modelName);
+  let cleanedCount = 0;
+
+  const cleaned = toolCalls.map(call => {
+    const cleanedCall = parser.cleanArguments(call);
+    if (cleanedCall !== call) cleanedCount++;
+    return cleanedCall;
+  });
+
+  if (cleanedCount > 0) {
+    logger.info({
+      totalCalls: toolCalls.length,
+      cleanedCalls: cleanedCount,
+      tools: cleaned.map(tc => tc.name ?? tc.function?.name),
+      parser: parser.constructor.name,
+    }, 'Universal tool call cleaning applied (via parser)');
+  }
+
+  return cleaned;
+}
+
+module.exports = {
+  cleanToolCallArguments,
+  cleanToolCalls,
+  stripMarkdownFromCommand,
+  // Export regex for testing
+  FENCE_REGEX,
+  BULLET_POINT_REGEX,
+  PROMPT_CHAR_REGEX
+};
diff --git a/src/tools/workspace.js b/src/tools/workspace.js
index 3eaeeb0..355d016 100644
--- a/src/tools/workspace.js
+++ b/src/tools/workspace.js
@@ -1,4 +1,5 @@
 const path = require("path");
+const fsp = require("fs/promises");
 const {
   readFile,
   writeFile,
@@ -43,13 +44,9 @@ function registerWorkspaceTools() {
           const expanded = expandTilde(targetPath);
           const resolved = path.resolve(expanded);
           return {
-            ok: false,
-            status: 403,
-            content: JSON.stringify({
-              error: "external_path_requires_approval",
-              message: `The file "${targetPath}" resolves to "${resolved}" which is outside the workspace. You MUST ask the user for permission before reading this file. If the user approves, call this tool again with the same path and set user_approved to true.`,
-              resolved_path: resolved,
-            }),
+            ok: true,
+            status: 200,
+            content: `[APPROVAL REQUIRED] The file "${resolved}" is outside the workspace and cannot be read without user permission.\n\nYou must now ask the user: "The file ${resolved} is outside the workspace. May I read it?"\n\nIf the user says yes, call the Read tool again with file_path="${targetPath}" and user_approved=true.`,
           };
         }
         // User approved — read external file
@@ -99,10 +96,35 @@ function registerWorkspaceTools() {
           : "";
       const createParents = args.create_parents !== false;
 
-      const writeResult = await writeFile(relativePath, content, {
-        encoding,
-        createParents,
-      });
+      // Handle user_approved bypass for workspace access
+      let writeResult;
+      if (args.user_approved === true) {
+        const expandedPath = expandTilde(relativePath);
+        const resolvedPath = path.resolve(expandedPath);
+        const dir = path.dirname(resolvedPath);
+        if (createParents) {
+          await fsp.mkdir(dir, { recursive: true });
+        }
+        let previousContent = null;
+        try {
+          previousContent = await fsp.readFile(resolvedPath, { encoding });
+        } catch (err) {
+          if (err.code !== "ENOENT") {
+            throw err;
+          }
+        }
+        await fsp.writeFile(resolvedPath, content, { encoding });
+        writeResult = {
+          resolvedPath,
+          previousContent,
+          nextContent: content,
+        };
+      } else {
+        writeResult = await writeFile(relativePath, content, {
+          encoding,
+          createParents,
+        });
+      }
 
       try {
         recordEdit({
@@ -142,19 +164,174 @@ function registerWorkspaceTools() {
     { category: "workspace" },
   );
 
+  registerTool(
+    "Edit",
+    async ({ args = {} }, context = {}) => {
+      const relativePath = validateString(args.file_path, "file_path");
+      const oldString = validateString(args.old_string, "old_string");
+      const newString = args.new_string; // Can be empty string
+      const replaceAll = args.replace_all === true;
+      const encoding = normalizeEncoding(args.encoding);
+
+      if (typeof newString !== "string") {
+        throw new Error("new_string must be a string");
+      }
+
+      if (oldString === newString) {
+        throw new Error("old_string and new_string must be different");
+      }
+
+      // Handle user_approved bypass for workspace access - check if file exists
+      let fileExistsResult;
+      if (args.user_approved === true) {
+        const expandedPath = expandTilde(relativePath);
+        const resolvedPath = path.resolve(expandedPath);
+        try {
+          await fsp.access(resolvedPath);
+          fileExistsResult = true;
+        } catch {
+          fileExistsResult = false;
+        }
+      } else {
+        fileExistsResult = await fileExists(relativePath);
+      }
+
+      if (!fileExistsResult) {
+        throw new Error("Cannot edit non-existent file. Use Write tool to create new files.");
+      }
+
+      // Read current content
+      let beforeContent;
+      if (args.user_approved === true) {
+        const expandedPath = expandTilde(relativePath);
+        const resolvedPath = path.resolve(expandedPath);
+        beforeContent = await fsp.readFile(resolvedPath, { encoding });
+      } else {
+        beforeContent = await readFile(relativePath, encoding);
+      }
+
+      // Check if old_string exists in file
+      if (!beforeContent.includes(oldString)) {
+        throw new Error(`old_string not found in file: ${relativePath}`);
+      }
+
+      // Perform replacement
+      let afterContent;
+      if (replaceAll) {
+        // Replace all occurrences
+        afterContent = beforeContent.split(oldString).join(newString);
+      } else {
+        // Replace only first occurrence and check for uniqueness
+        const firstIndex = beforeContent.indexOf(oldString);
+        const secondIndex = beforeContent.indexOf(oldString, firstIndex + oldString.length);
+
+        if (secondIndex !== -1) {
+          throw new Error(
+            "old_string appears multiple times in the file. " +
+            "Either provide a larger string with more context to make it unique, " +
+            "or use replace_all=true to replace all occurrences."
+          );
+        }
+
+        afterContent = beforeContent.replace(oldString, newString);
+      }
+
+      // Write updated content
+      if (args.user_approved === true) {
+        const expandedPath = expandTilde(relativePath);
+        const resolvedPath = path.resolve(expandedPath);
+        await fsp.writeFile(resolvedPath, afterContent, { encoding });
+      } else {
+        await writeFile(relativePath, afterContent, { encoding });
+      }
+
+      // Record edit
+      try {
+        recordEdit({
+          sessionId: context.session?.id ?? context.sessionId ?? null,
+          filePath: relativePath,
+          source: "Edit",
+          beforeContent,
+          afterContent,
+          metadata: {
+            encoding,
+            oldStringLength: oldString.length,
+            newStringLength: newString.length,
+            replaceAll,
+          },
+        });
+      } catch (err) {
+        logger.warn({ err }, "Failed to record Edit edit");
+      }
+
+      return {
+        ok: true,
+        status: 200,
+        content: JSON.stringify(
+          {
+            path: relativePath,
+            resolved_path: resolveWorkspacePath(relativePath),
+            replacements: replaceAll ? "all" : 1,
+          },
+          null,
+          2,
+        ),
+        metadata: {
+          path: relativePath,
+          replacements: replaceAll ? "all" : 1,
+        },
+      };
+    },
+    { category: "workspace" },
+  );
+
   registerTool(
     "edit_patch",
     async ({ args = {} }, context = {}) => {
-      const relativePath = validateString(args.path ?? args.file, "path");
+      const relativePath = validateString(args.path ?? args.file ?? args.file_path, "path");
       const patch = validateString(args.patch, "patch");
       const encoding = normalizeEncoding(args.encoding);
 
-      const exists = await fileExists(relativePath);
-      if (!exists) {
+      // Handle user_approved bypass for workspace access - check if file exists
+      let fileExistsResult;
+      if (args.user_approved === true) {
+        const expandedPath = expandTilde(relativePath);
+        const resolvedPath = path.resolve(expandedPath);
+        try {
+          await fsp.access(resolvedPath);
+          fileExistsResult = true;
+        } catch {
+          fileExistsResult = false;
+        }
+      } else {
+        fileExistsResult = await fileExists(relativePath);
+      }
+
+      if (!fileExistsResult) {
         throw new Error("Cannot apply patch to non-existent file.");
       }
 
-      const patchResult = await applyFilePatch(relativePath, patch, { encoding });
+      // Apply patch
+      let patchResult;
+      if (args.user_approved === true) {
+        // Manual patch application for approved external path
+        const { applyPatch } = require("diff");
+        const expandedPath = expandTilde(relativePath);
+        const resolvedPath = path.resolve(expandedPath);
+        const original = await fsp.readFile(resolvedPath, { encoding });
+        const patched = applyPatch(original, patch);
+        if (patched === false) {
+          throw new Error("Failed to apply patch.");
+        }
+        await fsp.writeFile(resolvedPath, patched, { encoding });
+        patchResult = {
+          resolvedPath,
+          previousContent: original,
+          nextContent: patched,
+        };
+      } else {
+        patchResult = await applyFilePatch(relativePath, patch, { encoding });
+      }
 
       try {
         recordEdit({
diff --git a/test/auto-spawn-integration.test.js b/test/auto-spawn-integration.test.js
new file mode 100644
index 0000000..a145c1f
--- /dev/null
+++ b/test/auto-spawn-integration.test.js
@@ -0,0 +1,329 @@
+/**
+ * Integration tests for the auto-spawn subagent pipeline.
+ *
+ * The orchestrator's "Invoking tool(s):" block is too coupled to mock end-to-end,
+ * so these tests exercise the same logic flow the orchestrator uses:
+ *   detection regex → mapToolsToAgentType → buildSubagentPrompt → spawnAgent → inject result
+ *
+ * This validates the integration between the components without requiring the full
+ * orchestrator machinery (which would need ~15 mocks for invokeModel, sessions, etc.)
+ */
+
+const assert = require("assert");
+const { describe, it } = require("node:test");
+const { mapToolsToAgentType, buildSubagentPrompt } = require("../src/agents/tool-agent-mapper");
+
+// ── Helpers ──────────────────────────────────────────────────────────────────
+
+/** The exact regex used in the orchestrator (src/orchestrator/index.js) */
+const INVOKING_TOOL_PATTERN = /^Invoking tool\(s\):\s*(.+)/im;
+
+/**
+ * Parse the "Invoking tool(s):" line exactly as the orchestrator does:
+ *   strip XML/GLM-leaked tags, split by comma, trim, filter empties.
+ */
+function parseInvokingToolText(rawText) {
+  const match = rawText?.trim().match(INVOKING_TOOL_PATTERN);
+  if (!match) return null;
+  return match[1]
+    .replace(/<\/?\w+[^>]*>/g, "")
+    .split(",")
+    .map((t) => t.trim())
+    .filter(Boolean);
+}
+
+/**
+ * Simulate the orchestrator's auto-spawn block.
+ * Returns an object describing what happened (spawned, nudged, or skipped).
+ */
+async function runAutoSpawnBlock({
+  rawTextContent,
+  messages,
+  agentsEnabled = true,
+  autoSpawn = true,
+  autoSpawnAttempts = 0,
+  maxAutoSpawnAttempts = 2,
+  invokeTextRetries = 0,
+  maxInvokeTextRetries = 3,
+  spawnAgentFn,
+}) {
+  const invokingToolMatch = rawTextContent?.trim().match(INVOKING_TOOL_PATTERN);
+  if (!invokingToolMatch) return { action: "no_match" };
+
+  const mentionedToolsRaw = invokingToolMatch[1]
+    .replace(/<\/?\w+[^>]*>/g, "")
+    .split(",")
+    .map((t) => t.trim())
+    .filter(Boolean);
+
+  // ── Auto-spawn branch ──
+  if (agentsEnabled && autoSpawn !== false && autoSpawnAttempts < maxAutoSpawnAttempts) {
+    autoSpawnAttempts++;
+    const agentType = mapToolsToAgentType(mentionedToolsRaw);
+    const userText = (() => {
+      for (let i = messages.length - 1; i >= 0; i--) {
+        const m = messages[i];
+        if (m?.role !== "user") continue;
+        if (typeof m.content === "string") return m.content.trim();
+        if (Array.isArray(m.content)) return m.content.map((b) => b.text || "").join("\n").trim();
+      }
+      return "";
+    })();
+    const prompt = buildSubagentPrompt(userText, rawTextContent, mentionedToolsRaw);
+
+    try {
+      const result = await spawnAgentFn(agentType, prompt, {});
+      if (result.success) {
+        messages.push({ role: "assistant", content: rawTextContent });
+        messages.push({ role: "user", content: `[Subagent ${agentType} completed]\n${result.result}` });
+        return { action: "spawned", agentType, prompt, autoSpawnAttempts, messages };
+      }
+      // spawn returned failure — fall through to nudge
+    } catch (_err) {
+      // spawn threw — fall through to nudge
+    }
+  }
+
+  // ── Nudge-retry fallback ──
+  if (invokeTextRetries < maxInvokeTextRetries) {
+    invokeTextRetries++;
+    messages.push({ role: "assistant", content: rawTextContent });
+    messages.push({
+      role: "user",
+      content:
+        `You responded with tool invocation text instead of using actual tool calls (attempt ${invokeTextRetries}/${maxInvokeTextRetries}). ` +
+        "Please use the tool_call format, not text. Call the tools now with the correct parameters.",
+    });
+    return { action: "nudged", invokeTextRetries, messages };
+  }
+
+  return { action: "exhausted" };
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+describe("Auto-Spawn Integration — detection regex", () => {
+  it("should match plain 'Invoking tool(s):' text", () => {
+    const tools = parseInvokingToolText("Invoking tool(s): Read, Grep");
+    assert.deepStrictEqual(tools, ["Read", "Grep"]);
+  });
+
+  it("should match mid-string (model adds preamble)", () => {
+    const tools = parseInvokingToolText("I need to look at the file.\nInvoking tool(s): Read");
+    assert.deepStrictEqual(tools, ["Read"]);
+  });
+
+  it("should strip GLM-leaked XML tags from tool names", () => {
+    const tools = parseInvokingToolText("Invoking tool(s): Grep</arg_value>, Glob</think>");
+    assert.deepStrictEqual(tools, ["Grep", "Glob"]);
+  });
+
+  it("should return null for unrelated text", () => {
+    assert.strictEqual(parseInvokingToolText("I will help you."), null);
+    assert.strictEqual(parseInvokingToolText(""), null);
+    assert.strictEqual(parseInvokingToolText(null), null);
+  });
+
+  it("should handle repeated tool names (GLM-4.7 pattern)", () => {
+    const tools = parseInvokingToolText("Invoking tool(s): Read, Read, Read");
+    assert.deepStrictEqual(tools, ["Read", "Read", "Read"]);
+  });
+});
+
+describe("Auto-Spawn Integration — full pipeline", () => {
+  it("should call spawnAgent with Explore for read-only tools", async () => {
+    let spawnedType, spawnedPrompt;
+    const spawnAgentFn = async (type, prompt) => {
+      spawnedType = type;
+      spawnedPrompt = prompt;
+      return { success: true, result: "EXPLORATION COMPLETE: found config.js" };
+    };
+
+    const messages = [{ role: "user", content: "Where is the config file?" }];
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read, Grep",
+      messages,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "spawned");
+    assert.strictEqual(spawnedType, "Explore");
+    assert.ok(spawnedPrompt.includes("Where is the config file?"), "Prompt should include user text");
+    assert.ok(spawnedPrompt.includes("Read"), "Prompt should mention Read");
+    assert.ok(spawnedPrompt.includes("Grep"), "Prompt should mention Grep");
+  });
+
+  it("should call spawnAgent with general-purpose for write tools", async () => {
+    let spawnedType;
+    const spawnAgentFn = async (type) => {
+      spawnedType = type;
+      return { success: true, result: "TASK COMPLETE: edited the file" };
+    };
+
+    const messages = [{ role: "user", content: "Update the config" }];
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read, Edit",
+      messages,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "spawned");
+    assert.strictEqual(spawnedType, "general-purpose");
+  });
+
+  it("should inject assistant + user messages after successful spawn", async () => {
+    const spawnAgentFn = async () => ({ success: true, result: "Found: src/config/index.js" });
+    const messages = [{ role: "user", content: "Find config" }];
+    const rawText = "Invoking tool(s): Read";
+
+    const result = await runAutoSpawnBlock({ rawTextContent: rawText, messages, spawnAgentFn });
+
+    assert.strictEqual(result.action, "spawned");
+    // messages: original user + injected assistant + injected user
+    assert.strictEqual(messages.length, 3);
+    assert.strictEqual(messages[1].role, "assistant");
+    assert.strictEqual(messages[1].content, rawText);
+    assert.strictEqual(messages[2].role, "user");
+    assert.ok(messages[2].content.includes("[Subagent Explore completed]"));
+    assert.ok(messages[2].content.includes("Found: src/config/index.js"));
+  });
+});
+
+describe("Auto-Spawn Integration — fallback to nudge", () => {
+  it("should nudge when spawnAgent returns failure", async () => {
+    const spawnAgentFn = async () => ({ success: false, error: "agent timed out" });
+    const messages = [{ role: "user", content: "Search for files" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Glob",
+      messages,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "nudged");
+    assert.strictEqual(result.invokeTextRetries, 1);
+    // messages: original user + injected assistant + nudge user
+    assert.strictEqual(messages.length, 3);
+    assert.ok(messages[2].content.includes("tool_call format"), "Nudge should mention tool_call format");
+  });
+
+  it("should nudge when spawnAgent throws", async () => {
+    const spawnAgentFn = async () => { throw new Error("network error"); };
+    const messages = [{ role: "user", content: "Search for files" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read",
+      messages,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "nudged");
+  });
+
+  it("should nudge when agents are disabled (agentsEnabled=false)", async () => {
+    let spawnCalled = false;
+    const spawnAgentFn = async () => { spawnCalled = true; return { success: true, result: "x" }; };
+    const messages = [{ role: "user", content: "test" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read",
+      messages,
+      agentsEnabled: false,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "nudged");
+    assert.strictEqual(spawnCalled, false, "spawnAgent should NOT be called when disabled");
+  });
+
+  it("should nudge when autoSpawn config is false", async () => {
+    let spawnCalled = false;
+    const spawnAgentFn = async () => { spawnCalled = true; return { success: true, result: "x" }; };
+    const messages = [{ role: "user", content: "test" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read",
+      messages,
+      agentsEnabled: true,
+      autoSpawn: false,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "nudged");
+    assert.strictEqual(spawnCalled, false, "spawnAgent should NOT be called when autoSpawn=false");
+  });
+});
+
+describe("Auto-Spawn Integration — attempt limits", () => {
+  it("should not spawn when autoSpawnAttempts >= MAX_AUTO_SPAWN_ATTEMPTS", async () => {
+    let spawnCalled = false;
+    const spawnAgentFn = async () => { spawnCalled = true; return { success: true, result: "x" }; };
+    const messages = [{ role: "user", content: "test" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read",
+      messages,
+      autoSpawnAttempts: 2,       // already at cap
+      maxAutoSpawnAttempts: 2,
+      spawnAgentFn,
+    });
+
+    assert.ok(result.action !== "spawned", "Should not spawn when at attempt cap");
+    assert.strictEqual(spawnCalled, false, "spawnAgent should not be called");
+  });
+
+  it("should nudge when spawn attempts exhausted but nudge retries remain", async () => {
+    const spawnAgentFn = async () => ({ success: false, error: "fail" });
+    const messages = [{ role: "user", content: "test" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read",
+      messages,
+      autoSpawnAttempts: 2,       // spawn cap reached
+      maxAutoSpawnAttempts: 2,
+      invokeTextRetries: 0,
+      maxInvokeTextRetries: 3,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "nudged");
+  });
+
+  it("should return exhausted when both spawn and nudge retries are maxed", async () => {
+    const spawnAgentFn = async () => ({ success: false, error: "fail" });
+    const messages = [{ role: "user", content: "test" }];
+
+    const result = await runAutoSpawnBlock({
+      rawTextContent: "Invoking tool(s): Read",
+      messages,
+      autoSpawnAttempts: 2,
+      maxAutoSpawnAttempts: 2,
+      invokeTextRetries: 3,
+      maxInvokeTextRetries: 3,
+      spawnAgentFn,
+    });
+
+    assert.strictEqual(result.action, "exhausted", "Should be exhausted when all retries used up");
+  });
+
+  it("autoSpawnAttempts increments correctly across sequential calls", async () => {
+    let callCount = 0;
+    const spawnAgentFn = async () => { callCount++; return { success: false, error: "fail" }; };
+
+    // Simulate 2 sequential loop iterations (each time spawn fails → nudge)
+    for (let attempt = 0; attempt < 2; attempt++) {
+      const messages = [{ role: "user", content: "test" }];
+      await runAutoSpawnBlock({
+        rawTextContent: "Invoking tool(s): Read",
+        messages,
+        autoSpawnAttempts: attempt,
+        maxAutoSpawnAttempts: 2,
+        spawnAgentFn,
+      });
+    }
+
+    // Attempt 0 → spawn called (becomes 1, fails → nudge)
+    // Attempt 1 → spawn called (becomes 2, fails → nudge)
+    assert.strictEqual(callCount, 2, "spawnAgent should be called once per attempt below cap");
+  });
+});
diff --git a/test/auto-spawn-subagent.test.js b/test/auto-spawn-subagent.test.js
new file mode 100644
index 0000000..2f8c3a2
--- /dev/null
+++ b/test/auto-spawn-subagent.test.js
@@ -0,0 +1,99 @@
+const assert = require("assert");
+const { describe, it } = require("node:test");
+const { mapToolsToAgentType, buildSubagentPrompt, TOOL_TO_AGENT } = require("../src/agents/tool-agent-mapper");
+
+describe("Auto-Spawn Subagent — tool-agent-mapper", () => {
+
+  describe("mapToolsToAgentType", () => {
+    it("should return 'Explore' for read-only tools (Read, Grep, Glob)", () => {
+      assert.strictEqual(mapToolsToAgentType(["Read"]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(["Grep"]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(["Glob"]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(["Read", "Grep", "Glob"]), "Explore");
+    });
+
+    it("should return 'general-purpose' when Edit is mentioned", () => {
+      assert.strictEqual(mapToolsToAgentType(["Edit"]), "general-purpose");
+    });
+
+    it("should return 'general-purpose' when Write is mentioned", () => {
+      assert.strictEqual(mapToolsToAgentType(["Write"]), "general-purpose");
+    });
+
+    it("should return 'general-purpose' when Bash is mentioned", () => {
+      assert.strictEqual(mapToolsToAgentType(["Bash"]), "general-purpose");
+    });
+
+    it("should return 'general-purpose' for mixed read + write tools", () => {
+      assert.strictEqual(mapToolsToAgentType(["Read", "Edit"]), "general-purpose");
+      assert.strictEqual(mapToolsToAgentType(["Grep", "Bash", "Read"]), "general-purpose");
+    });
+
+    it("should handle duplicate tool names (GLM-4.7 repeats)", () => {
+      assert.strictEqual(mapToolsToAgentType(["Read", "Read", "Read"]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(["Read", "Read", "Edit"]), "general-purpose");
+    });
+
+    it("should return 'Explore' for unknown tools (safe default)", () => {
+      assert.strictEqual(mapToolsToAgentType(["UnknownTool"]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(["FooBar", "Read"]), "Explore");
+    });
+
+    it("should return 'Explore' for empty or invalid input", () => {
+      assert.strictEqual(mapToolsToAgentType([]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(null), "Explore");
+      assert.strictEqual(mapToolsToAgentType(undefined), "Explore");
+    });
+
+    it("should return 'Explore' for workspace_search and workspace_symbol_search", () => {
+      assert.strictEqual(mapToolsToAgentType(["workspace_search"]), "Explore");
+      assert.strictEqual(mapToolsToAgentType(["workspace_symbol_search"]), "Explore");
+    });
+  });
+
+  describe("TOOL_TO_AGENT mapping", () => {
+    it("should map all read-only tools to Explore", () => {
+      assert.strictEqual(TOOL_TO_AGENT.Read, "Explore");
+      assert.strictEqual(TOOL_TO_AGENT.Grep, "Explore");
+      assert.strictEqual(TOOL_TO_AGENT.Glob, "Explore");
+      assert.strictEqual(TOOL_TO_AGENT.workspace_search, "Explore");
+      assert.strictEqual(TOOL_TO_AGENT.workspace_symbol_search, "Explore");
+    });
+
+    it("should map write/execute tools to general-purpose", () => {
+      assert.strictEqual(TOOL_TO_AGENT.Edit, "general-purpose");
+      assert.strictEqual(TOOL_TO_AGENT.Write, "general-purpose");
+      assert.strictEqual(TOOL_TO_AGENT.Bash, "general-purpose");
+    });
+  });
+
+  describe("buildSubagentPrompt", () => {
+    it("should include user text in the prompt", () => {
+      const prompt = buildSubagentPrompt("Show me the config file", "Invoking tool(s): Read", ["Read"]);
+      assert.ok(prompt.includes("Show me the config file"), "Prompt should contain user text");
+    });
+
+    it("should include deduplicated tool list", () => {
+      const prompt = buildSubagentPrompt("search code", "Invoking tool(s): Read, Read, Grep", ["Read", "Read", "Grep"]);
+      // Should deduplicate: "Read, Grep" not "Read, Read, Grep"
+      assert.ok(prompt.includes("Read, Grep"), "Prompt should contain deduplicated tool list");
+    });
+
+    it("should include tool names in prompt", () => {
+      const prompt = buildSubagentPrompt("find files", "Invoking tool(s): Glob, Grep", ["Glob", "Grep"]);
+      assert.ok(prompt.includes("Glob"), "Prompt should mention Glob");
+      assert.ok(prompt.includes("Grep"), "Prompt should mention Grep");
+    });
+
+    it("should include instruction to complete the task", () => {
+      const prompt = buildSubagentPrompt("test", "Invoking tool(s): Read", ["Read"]);
+      assert.ok(prompt.includes("Complete this task"), "Prompt should include task completion instruction");
+    });
+
+    it("should handle empty user text gracefully", () => {
+      const prompt = buildSubagentPrompt("", "Invoking tool(s): Read", ["Read"]);
+      assert.ok(typeof prompt === "string", "Should return a string");
+      assert.ok(prompt.length > 0, "Should not be empty");
+    });
+  });
+});
diff --git a/test/dual-ollama-endpoint.test.js b/test/dual-ollama-endpoint.test.js
new file mode 100644
index 0000000..d900043
--- /dev/null
+++ b/test/dual-ollama-endpoint.test.js
@@ -0,0 +1,265 @@
+const assert = require("assert");
+const { describe, it, beforeEach } = require("node:test");
+
+describe("Dual Ollama Endpoint Routing", () => {
+  let ollamaUtils;
+
+  beforeEach(() => {
+    // Set minimum config to avoid validation errors
+    process.env.MODEL_PROVIDER = "ollama";
+    process.env.OLLAMA_ENDPOINT = "http://192.168.100.201:11434";
+    process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
+
+    // Clear relevant module caches
+    delete require.cache[require.resolve("../src/clients/ollama-utils")];
+    delete require.cache[require.resolve("../src/config")];
+  });
+
+  describe("isCloudModel()", () => {
+    beforeEach(() => {
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      delete process.env.OLLAMA_API_KEY;
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+    });
+
+    it("should detect cloud models with -cloud in tag", () => {
+      assert.strictEqual(ollamaUtils.isCloudModel("deepseek-v3.1:671b-cloud"), true);
+      assert.strictEqual(ollamaUtils.isCloudModel("nemotron-3-nano:30b-cloud"), true);
+    });
+
+    it("should detect cloud models with :cloud tag", () => {
+      assert.strictEqual(ollamaUtils.isCloudModel("glm-4.7:cloud"), true);
+      assert.strictEqual(ollamaUtils.isCloudModel("some-model:cloud"), true);
+    });
+
+    it("should detect cloud models case-insensitively", () => {
+      assert.strictEqual(ollamaUtils.isCloudModel("deepseek-v3.1:671b-CLOUD"), true);
+      assert.strictEqual(ollamaUtils.isCloudModel("model:tag-Cloud"), true);
+      assert.strictEqual(ollamaUtils.isCloudModel("glm-4.7:CLOUD"), true);
+    });
+
+    it("should return false for local models", () => {
+      assert.strictEqual(ollamaUtils.isCloudModel("qwen2.5-coder:latest"), false);
+      assert.strictEqual(ollamaUtils.isCloudModel("llama3.1:8b"), false);
+      assert.strictEqual(ollamaUtils.isCloudModel("mistral-nemo"), false);
+    });
+
+    it("should handle null/undefined/empty", () => {
+      assert.strictEqual(ollamaUtils.isCloudModel(null), false);
+      assert.strictEqual(ollamaUtils.isCloudModel(undefined), false);
+      assert.strictEqual(ollamaUtils.isCloudModel(""), false);
+      assert.strictEqual(ollamaUtils.isCloudModel(123), false);
+    });
+  });
+
+  describe("getOllamaEndpointForModel()", () => {
+    it("should route cloud models to cloud endpoint when configured", () => {
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      assert.strictEqual(
+        ollamaUtils.getOllamaEndpointForModel("deepseek-v3.1:671b-cloud"),
+        "https://ollama.com"
+      );
+    });
+
+    it("should route local models to local endpoint even when cloud is configured", () => {
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      assert.strictEqual(
+        ollamaUtils.getOllamaEndpointForModel("qwen2.5-coder:latest"),
+        "http://192.168.100.201:11434"
+      );
+    });
+
+    it("should route cloud models to local endpoint when no cloud endpoint configured", () => {
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      // Without cloud endpoint, even cloud-named models use local endpoint
+      assert.strictEqual(
+        ollamaUtils.getOllamaEndpointForModel("deepseek-v3.1:671b-cloud"),
+        "http://192.168.100.201:11434"
+      );
+    });
+
+    it("should fall back to localhost when no endpoint configured at all", () => {
+      delete process.env.OLLAMA_ENDPOINT;
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      process.env.MODEL_PROVIDER = "databricks";
+      process.env.DATABRICKS_API_KEY = "test-key";
+      process.env.DATABRICKS_API_BASE = "http://test.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      assert.strictEqual(
+        ollamaUtils.getOllamaEndpointForModel("some-model"),
+        "http://localhost:11434"
+      );
+    });
+  });
+
+  describe("Cloud-only configuration (no OLLAMA_ENDPOINT)", () => {
+    beforeEach(() => {
+      delete process.env.OLLAMA_ENDPOINT;
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      process.env.OLLAMA_MODEL = "glm-4.7:cloud";
+      process.env.MODEL_PROVIDER = "ollama";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+    });
+
+    it("should route cloud model to cloud endpoint", () => {
+      assert.strictEqual(
+        ollamaUtils.getOllamaEndpointForModel("glm-4.7:cloud"),
+        "https://ollama.com"
+      );
+    });
+
+    it("should route non-cloud model to cloud endpoint as fallback", () => {
+      // In cloud-only mode, even non-cloud-named models go to cloud endpoint
+      assert.strictEqual(
+        ollamaUtils.getOllamaEndpointForModel("some-local-model"),
+        "https://ollama.com"
+      );
+    });
+
+    it("should pass config validation with only cloud endpoint", () => {
+      // If we got here without throwing, validation passed
+      const config = require("../src/config");
+      assert.strictEqual(config.ollama.endpoint, null);
+      assert.strictEqual(config.ollama.cloudEndpoint, "https://ollama.com");
+      assert.strictEqual(config.ollama.model, "glm-4.7:cloud");
+    });
+
+    it("should have null embeddings endpoint when no local endpoint", () => {
+      const config = require("../src/config");
+      assert.strictEqual(config.ollama.embeddingsEndpoint, null);
+    });
+  });
+
+  describe("Config validation", () => {
+    it("should throw when MODEL_PROVIDER=ollama but no model set", () => {
+      delete process.env.OLLAMA_MODEL;
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+      process.env.MODEL_PROVIDER = "ollama";
+      delete require.cache[require.resolve("../src/config")];
+
+      assert.throws(
+        () => require("../src/config"),
+        { message: /OLLAMA_MODEL is required/ }
+      );
+    });
+
+    it("should throw when MODEL_PROVIDER=ollama but no endpoints set", () => {
+      delete process.env.OLLAMA_ENDPOINT;
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
+      process.env.MODEL_PROVIDER = "ollama";
+      delete require.cache[require.resolve("../src/config")];
+
+      assert.throws(
+        () => require("../src/config"),
+        { message: /OLLAMA_ENDPOINT.*OLLAMA_CLOUD_ENDPOINT/ }
+      );
+    });
+
+    it("should accept local-only config", () => {
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+      process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      process.env.MODEL_PROVIDER = "ollama";
+      delete require.cache[require.resolve("../src/config")];
+
+      const config = require("../src/config");
+      assert.strictEqual(config.ollama.endpoint, "http://localhost:11434");
+      assert.strictEqual(config.ollama.cloudEndpoint, null);
+    });
+  });
+
+  describe("getOllamaHeaders() with model-aware auth", () => {
+    it("should include auth for cloud models when API key and cloud endpoint configured", () => {
+      process.env.OLLAMA_API_KEY = "test-key-123";
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      const headers = ollamaUtils.getOllamaHeaders("deepseek-v3.1:671b-cloud");
+      assert.strictEqual(headers["Authorization"], "Bearer test-key-123");
+      assert.strictEqual(headers["Content-Type"], "application/json");
+    });
+
+    it("should NOT include auth for local models when cloud endpoint is configured", () => {
+      process.env.OLLAMA_API_KEY = "test-key-123";
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      const headers = ollamaUtils.getOllamaHeaders("qwen2.5-coder:latest");
+      assert.strictEqual(headers["Authorization"], undefined);
+      assert.strictEqual(headers["Content-Type"], "application/json");
+    });
+
+    it("should include auth for ALL models when no cloud endpoint (legacy compat)", () => {
+      process.env.OLLAMA_API_KEY = "test-key-123";
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      const localHeaders = ollamaUtils.getOllamaHeaders("qwen2.5-coder:latest");
+      assert.strictEqual(localHeaders["Authorization"], "Bearer test-key-123");
+
+      const cloudHeaders = ollamaUtils.getOllamaHeaders("deepseek-v3.1:671b-cloud");
+      assert.strictEqual(cloudHeaders["Authorization"], "Bearer test-key-123");
+    });
+
+    it("should NOT include auth for any model when no API key", () => {
+      delete process.env.OLLAMA_API_KEY;
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      const headers = ollamaUtils.getOllamaHeaders("deepseek-v3.1:671b-cloud");
+      assert.strictEqual(headers["Authorization"], undefined);
+    });
+
+    it("should NOT include auth when called without model arg and cloud endpoint is set", () => {
+      process.env.OLLAMA_API_KEY = "test-key-123";
+      process.env.OLLAMA_CLOUD_ENDPOINT = "https://ollama.com";
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      // No model arg = not a cloud model, and cloud endpoint is set, so no auth
+      const headers = ollamaUtils.getOllamaHeaders();
+      assert.strictEqual(headers["Authorization"], undefined);
+    });
+
+    it("should include auth when called without model arg and no cloud endpoint (legacy)", () => {
+      process.env.OLLAMA_API_KEY = "test-key-123";
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
+      delete require.cache[require.resolve("../src/clients/ollama-utils")];
+      delete require.cache[require.resolve("../src/config")];
+      ollamaUtils = require("../src/clients/ollama-utils");
+
+      // No cloud endpoint = legacy mode, auth sent to all
+      const headers = ollamaUtils.getOllamaHeaders();
+      assert.strictEqual(headers["Authorization"], "Bearer test-key-123");
+    });
+  });
+});
diff --git a/test/edit-tools.test.js b/test/edit-tools.test.js
new file mode 100644
index 0000000..414b921
--- /dev/null
+++ b/test/edit-tools.test.js
@@ -0,0 +1,321 @@
+const { describe, it, before, after } = require("node:test");
+const assert = require("node:assert");
+const fs = require("fs");
+const path = require("path");
+const os = require("os");
+
+// Mock configuration
+process.env.NODE_ENV = "test";
+process.env.MODEL_PROVIDER = "databricks";
+process.env.DATABRICKS_API_KEY = "test-key";
+process.env.DATABRICKS_API_BASE = "http://test.com";
+
+// Create a temporary workspace for testing
+const testWorkspaceRoot = path.join(os.tmpdir(), `lynkr-test-edit-${Date.now()}`);
+fs.mkdirSync(testWorkspaceRoot, { recursive: true });
+process.env.WORKSPACE_ROOT = testWorkspaceRoot;
+
+const { executeToolCall } = require("../src/tools");
+require("../src/tools/workspace").registerWorkspaceTools();
+
+describe("Edit Tools Tests", () => {
+  let testFilePath;
+
+  before(() => {
+    // Create test file
+    testFilePath = "test-file.txt";
+    const fullPath = path.join(testWorkspaceRoot, testFilePath);
+    fs.writeFileSync(
+      fullPath,
+      "Hello World\nThis is a test\nHello again\nEnd of file"
+    );
+  });
+
+  after(() => {
+    // Clean up
+    try {
+      fs.rmSync(testWorkspaceRoot, { recursive: true, force: true });
+    } catch (err) {
+      console.error("Failed to clean up test workspace:", err);
+    }
+  });
+
+  describe("Edit tool (string replacement)", () => {
+    it("should replace a unique string", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            old_string: "This is a test",
+            new_string: "This is modified",
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, true);
+      assert.strictEqual(result.status, 200);
+
+      // Verify file content
+      const content = fs.readFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "utf8"
+      );
+      assert.strictEqual(
+        content,
+        "Hello World\nThis is modified\nHello again\nEnd of file"
+      );
+
+      // Restore for next test
+      fs.writeFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "Hello World\nThis is a test\nHello again\nEnd of file"
+      );
+    });
+
+    it("should fail when old_string is not unique (without replace_all)", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            old_string: "Hello",
+            new_string: "Hi",
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, false);
+      assert.match(
+        result.content,
+        /appears multiple times|not unique/i
+      );
+    });
+
+    it("should replace all occurrences with replace_all=true", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            old_string: "Hello",
+            new_string: "Hi",
+            replace_all: true,
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, true);
+      assert.strictEqual(result.status, 200);
+
+      // Verify file content
+      const content = fs.readFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "utf8"
+      );
+      assert.strictEqual(
+        content,
+        "Hi World\nThis is a test\nHi again\nEnd of file"
+      );
+
+      // Restore for next test
+      fs.writeFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "Hello World\nThis is a test\nHello again\nEnd of file"
+      );
+    });
+
+    it("should fail when old_string is not found", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            old_string: "NonexistentString",
+            new_string: "Something",
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, false);
+      assert.match(result.content, /not found/i);
+    });
+
+    it("should fail when editing non-existent file", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: "nonexistent.txt",
+            old_string: "test",
+            new_string: "modified",
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, false);
+      assert.match(result.content, /non-existent file/i);
+    });
+
+    it("should fail when old_string equals new_string", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            old_string: "Hello",
+            new_string: "Hello",
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, false);
+      assert.match(result.content, /must be different/i);
+    });
+  });
+
+  describe("edit_patch tool (unified diff)", () => {
+    before(() => {
+      // Reset file for patch tests
+      fs.writeFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "Hello World\nThis is a test\nHello again\nEnd of file"
+      );
+    });
+
+    it("should apply a valid unified diff patch", async () => {
+      const patch = `--- test-file.txt
++++ test-file.txt
+@@ -1,4 +1,4 @@
+ Hello World
+-This is a test
++This is PATCHED
+ Hello again
+ End of file`;
+
+      const result = await executeToolCall({
+        function: {
+          name: "edit_patch",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            patch: patch,
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, true);
+      assert.strictEqual(result.status, 200);
+
+      // Verify file content
+      const content = fs.readFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "utf8"
+      );
+      assert.match(content, /PATCHED/);
+
+      // Restore
+      fs.writeFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "Hello World\nThis is a test\nHello again\nEnd of file"
+      );
+    });
+
+    it("should fail when patch parameter is missing", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "edit_patch",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            // Missing patch parameter
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, false);
+      assert.match(result.content, /patch must be a non-empty string/i);
+    });
+
+    it("should fail when patching non-existent file", async () => {
+      const patch = `--- nonexistent.txt
++++ nonexistent.txt
+@@ -1 +1 @@
+-old
++new`;
+
+      const result = await executeToolCall({
+        function: {
+          name: "edit_patch",
+          arguments: JSON.stringify({
+            file_path: "nonexistent.txt",
+            patch: patch,
+          }),
+        },
+      });
+
+      assert.strictEqual(result.ok, false);
+      assert.match(result.content, /non-existent file/i);
+    });
+  });
+
+  describe("Tool separation verification", () => {
+    it("should have both Edit and edit_patch as separate tools", async () => {
+      const { hasTool } = require("../src/tools");
+
+      assert.strictEqual(hasTool("Edit"), true, "Edit tool should exist");
+      assert.strictEqual(
+        hasTool("edit_patch"),
+        true,
+        "edit_patch tool should exist"
+      );
+    });
+
+    it("Edit should accept old_string/new_string parameters", async () => {
+      const result = await executeToolCall({
+        function: {
+          name: "Edit",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            old_string: "test",
+            new_string: "TEST",
+          }),
+        },
+      });
+
+      // Should succeed with these parameters
+      assert.strictEqual(result.ok, true);
+
+      // Restore
+      fs.writeFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "Hello World\nThis is a test\nHello again\nEnd of file"
+      );
+    });
+
+    it("edit_patch should accept patch parameter", async () => {
+      const patch = `--- test-file.txt
++++ test-file.txt
+@@ -1,1 +1,1 @@
+-Hello World
++Hi World`;
+
+      const result = await executeToolCall({
+        function: {
+          name: "edit_patch",
+          arguments: JSON.stringify({
+            file_path: testFilePath,
+            patch: patch,
+          }),
+        },
+      });
+
+      // Should succeed with patch parameter
+      assert.strictEqual(result.ok, true);
+
+      // Restore
+      fs.writeFileSync(
+        path.join(testWorkspaceRoot, testFilePath),
+        "Hello World\nThis is a test\nHello again\nEnd of file"
+      );
+    });
+  });
+});
diff --git a/test/enforce-server-model.test.js b/test/enforce-server-model.test.js
new file mode 100644
index 0000000..6c56eb5
--- /dev/null
+++ b/test/enforce-server-model.test.js
@@ -0,0 +1,72 @@
+const assert = require("assert");
+const { describe, it, beforeEach, afterEach } = require("node:test");
+
+describe("ENFORCE_SERVER_MODEL Tests", () => {
+  let originalEnv;
+
+  beforeEach(() => {
+    delete require.cache[require.resolve("../src/config")];
+    originalEnv = { ...process.env };
+  });
+
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+
+  describe("Configuration", () => {
+    it("should default to false when not set", () => {
+      delete process.env.ENFORCE_SERVER_MODEL;
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.enforceServerModel, false);
+    });
+
+    it("should be true when set to 'true'", () => {
+      process.env.ENFORCE_SERVER_MODEL = "true";
+      delete require.cache[require.resolve("../src/config")];
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.enforceServerModel, true);
+    });
+
+    it("should be false when set to 'false'", () => {
+      process.env.ENFORCE_SERVER_MODEL = "false";
+      delete require.cache[require.resolve("../src/config")];
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.enforceServerModel, false);
+    });
+
+    it("should be false for any other value", () => {
+      process.env.ENFORCE_SERVER_MODEL = "yes";
+      delete require.cache[require.resolve("../src/config")];
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.enforceServerModel, false);
+    });
+  });
+
+  describe("Model Selection Behavior", () => {
+    it("should use MODEL_DEFAULT when ENFORCE_SERVER_MODEL is true", () => {
+      process.env.MODEL_PROVIDER = "openrouter";
+      process.env.MODEL_DEFAULT = "qwen/qwen3-coder-next";
+      process.env.ENFORCE_SERVER_MODEL = "true";
+      delete require.cache[require.resolve("../src/config")];
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.defaultModel, "qwen/qwen3-coder-next");
+      assert.strictEqual(config.modelProvider.enforceServerModel, true);
+    });
+
+    it("should respect client model when ENFORCE_SERVER_MODEL is false", () => {
+      process.env.MODEL_PROVIDER = "openrouter";
+      process.env.MODEL_DEFAULT = "qwen/qwen3-coder-next";
+      process.env.ENFORCE_SERVER_MODEL = "false";
+      delete require.cache[require.resolve("../src/config")];
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.defaultModel, "qwen/qwen3-coder-next");
+      assert.strictEqual(config.modelProvider.enforceServerModel, false);
+    });
+  });
+});
diff --git a/test/hybrid-routing-integration.test.js b/test/hybrid-routing-integration.test.js
index e39e29a..8d007bc 100644
--- a/test/hybrid-routing-integration.test.js
+++ b/test/hybrid-routing-integration.test.js
@@ -29,17 +29,18 @@ describe("Hybrid Routing Integration Tests", () => {
   });
 
   describe("Configuration Validation", () => {
-    it("should use default OLLAMA_ENDPOINT when not specified", () => {
+    it("should require OLLAMA_ENDPOINT or OLLAMA_CLOUD_ENDPOINT when PREFER_OLLAMA is set", () => {
       process.env.PREFER_OLLAMA = "true";
       delete process.env.OLLAMA_ENDPOINT;
+      delete process.env.OLLAMA_CLOUD_ENDPOINT;
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.DATABRICKS_API_KEY = "test-key";
       process.env.DATABRICKS_API_BASE = "http://test.com";
 
-      const config = require("../src/config");
-
-      // Should use default localhost:11434
-      assert.strictEqual(config.ollama.endpoint, "http://localhost:11434");
+      // Should throw because no endpoint is configured
+      assert.throws(() => {
+        require("../src/config");
+      }, /OLLAMA_ENDPOINT.*OLLAMA_CLOUD_ENDPOINT/);
     });
 
     it("should reject invalid FALLBACK_PROVIDER", () => {
diff --git a/test/llamacpp-integration.test.js b/test/llamacpp-integration.test.js
index 1b6006a..88b72e9 100644
--- a/test/llamacpp-integration.test.js
+++ b/test/llamacpp-integration.test.js
@@ -127,6 +127,7 @@ describe("llama.cpp Integration", () => {
     it("should throw error when llamacpp is set as FALLBACK_PROVIDER", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.FALLBACK_PROVIDER = "llamacpp";
       process.env.LLAMACPP_ENDPOINT = "http://localhost:8080";
diff --git a/test/ollama-message-conversion.test.js b/test/ollama-message-conversion.test.js
new file mode 100644
index 0000000..05f3f91
--- /dev/null
+++ b/test/ollama-message-conversion.test.js
@@ -0,0 +1,262 @@
+const assert = require("assert");
+const { describe, it, beforeEach } = require("node:test");
+
+describe("Ollama message conversion — convertAnthropicMessagesToOpenRouter", () => {
+  let convertAnthropicMessagesToOpenRouter;
+
+  beforeEach(() => {
+    process.env.MODEL_PROVIDER = "databricks";
+    process.env.DATABRICKS_API_KEY = "test-key";
+    process.env.DATABRICKS_API_BASE = "http://test.com";
+    delete require.cache[require.resolve("../src/clients/openrouter-utils")];
+    delete require.cache[require.resolve("../src/config")];
+    delete require.cache[require.resolve("../src/logger")];
+    ({ convertAnthropicMessagesToOpenRouter } = require("../src/clients/openrouter-utils"));
+  });
+
+  it("preserves tool_use blocks as tool_calls array on assistant messages", () => {
+    const messages = [{
+      role: "assistant",
+      content: [
+        { type: "text", text: "Let me read that file." },
+        {
+          type: "tool_use",
+          id: "toolu_abc123",
+          name: "Read",
+          input: { file_path: "/tmp/test.js" }
+        }
+      ]
+    }];
+
+    const converted = convertAnthropicMessagesToOpenRouter(messages);
+
+    // Should produce one assistant message with tool_calls
+    const assistantMsg = converted.find(m => m.role === "assistant");
+    assert.ok(assistantMsg, "assistant message should exist");
+    assert.ok(Array.isArray(assistantMsg.tool_calls), "tool_calls should be an array");
+    assert.strictEqual(assistantMsg.tool_calls.length, 1);
+    assert.strictEqual(assistantMsg.tool_calls[0].id, "toolu_abc123");
+    assert.strictEqual(assistantMsg.tool_calls[0].function.name, "Read");
+    assert.deepStrictEqual(
+      JSON.parse(assistantMsg.tool_calls[0].function.arguments),
+      { file_path: "/tmp/test.js" }
+    );
+  });
+
+  it("preserves tool_result blocks as role:'tool' messages", () => {
+    const messages = [{
+      role: "user",
+      content: [
+        {
+          type: "tool_result",
+          tool_use_id: "toolu_abc123",
+          content: "File contents here"
+        }
+      ]
+    }];
+
+    const converted = convertAnthropicMessagesToOpenRouter(messages);
+
+    const toolMsg = converted.find(m => m.role === "tool");
+    assert.ok(toolMsg, "tool message should exist");
+    assert.strictEqual(toolMsg.tool_call_id, "toolu_abc123");
+    assert.strictEqual(toolMsg.content, "File contents here");
+  });
+
+  it("handles mixed text + tool_use in assistant message", () => {
+    const messages = [{
+      role: "assistant",
+      content: [
+        { type: "text", text: "I'll search for that." },
+        {
+          type: "tool_use",
+          id: "toolu_grep1",
+          name: "Grep",
+          input: { pattern: "foo", path: "/src" }
+        },
+        {
+          type: "tool_use",
+          id: "toolu_grep2",
+          name: "Read",
+          input: { file_path: "/src/bar.js" }
+        }
+      ]
+    }];
+
+    const converted = convertAnthropicMessagesToOpenRouter(messages);
+    const assistantMsg = converted.find(m => m.role === "assistant");
+
+    assert.ok(assistantMsg);
+    assert.strictEqual(assistantMsg.content, "I'll search for that.");
+    assert.strictEqual(assistantMsg.tool_calls.length, 2);
+    assert.strictEqual(assistantMsg.tool_calls[0].function.name, "Grep");
+    assert.strictEqual(assistantMsg.tool_calls[1].function.name, "Read");
+  });
+
+  it("preserves consecutive tool results — none dropped", () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: [
+          {
+            type: "tool_use",
+            id: "toolu_1",
+            name: "Read",
+            input: { file_path: "/a.js" }
+          },
+          {
+            type: "tool_use",
+            id: "toolu_2",
+            name: "Read",
+            input: { file_path: "/b.js" }
+          }
+        ]
+      },
+      {
+        role: "user",
+        content: [
+          {
+            type: "tool_result",
+            tool_use_id: "toolu_1",
+            content: "contents of a.js"
+          },
+          {
+            type: "tool_result",
+            tool_use_id: "toolu_2",
+            content: "contents of b.js"
+          }
+        ]
+      }
+    ];
+
+    const converted = convertAnthropicMessagesToOpenRouter(messages);
+
+    // Should have: 1 assistant + 2 tool messages
+    const toolMsgs = converted.filter(m => m.role === "tool");
+    assert.strictEqual(toolMsgs.length, 2, "both tool results should be preserved");
+    assert.strictEqual(toolMsgs[0].tool_call_id, "toolu_1");
+    assert.strictEqual(toolMsgs[1].tool_call_id, "toolu_2");
+    assert.strictEqual(toolMsgs[0].content, "contents of a.js");
+    assert.strictEqual(toolMsgs[1].content, "contents of b.js");
+  });
+
+  it("passes through already-in-OpenAI-format messages unchanged", () => {
+    const messages = [
+      { role: "system", content: "You are helpful." },
+      { role: "user", content: "Hello" },
+      { role: "assistant", content: "Hi there!" }
+    ];
+
+    const converted = convertAnthropicMessagesToOpenRouter(messages);
+
+    assert.strictEqual(converted.length, 3);
+    assert.strictEqual(converted[0].role, "system");
+    assert.strictEqual(converted[0].content, "You are helpful.");
+    assert.strictEqual(converted[1].role, "user");
+    assert.strictEqual(converted[1].content, "Hello");
+    assert.strictEqual(converted[2].role, "assistant");
+    assert.strictEqual(converted[2].content, "Hi there!");
+  });
+});
+
+describe("Ollama merge-dedup logic", () => {
+
+  /**
+   * Simulates the merge-dedup logic from invokeOllama.
+   * Extracted here for unit testing without needing to call the full invokeOllama.
+   */
+  function mergeConsecutiveSameRole(messages) {
+    const merged = [];
+    for (const msg of messages) {
+      const prev = merged[merged.length - 1];
+      if (prev && prev.role === msg.role
+          && typeof prev.content === 'string' && typeof msg.content === 'string') {
+        prev.content = prev.content ? `${prev.content}\n${msg.content}` : msg.content;
+      } else {
+        merged.push({ ...msg });
+      }
+    }
+    return merged;
+  }
+
+  it("merges consecutive user messages instead of dropping", () => {
+    const messages = [
+      { role: "user", content: "First message" },
+      { role: "user", content: "Second message" }
+    ];
+
+    const merged = mergeConsecutiveSameRole(messages);
+
+    assert.strictEqual(merged.length, 1);
+    assert.strictEqual(merged[0].content, "First message\nSecond message");
+  });
+
+  it("does NOT merge messages with non-string content (tool_calls, tool_call_id)", () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: "",
+        tool_calls: [{ id: "toolu_1", type: "function", function: { name: "Read", arguments: "{}" } }]
+      },
+      {
+        role: "tool",
+        tool_call_id: "toolu_1",
+        content: "result"
+      },
+      {
+        role: "tool",
+        tool_call_id: "toolu_2",
+        content: "result2"
+      }
+    ];
+
+    const merged = mergeConsecutiveSameRole(messages);
+
+    // Two consecutive tool messages with string content WILL be merged
+    // This is fine — Ollama doesn't support role:"tool" with tool_call_id natively
+    // in the same way, but the content is preserved
+    assert.strictEqual(merged.length, 2);
+    assert.strictEqual(merged[0].role, "assistant");
+    assert.strictEqual(merged[1].role, "tool");
+    assert.strictEqual(merged[1].content, "result\nresult2");
+  });
+
+  it("preserves alternating role sequence", () => {
+    const messages = [
+      { role: "user", content: "Q1" },
+      { role: "assistant", content: "A1" },
+      { role: "user", content: "Q2" },
+      { role: "assistant", content: "A2" }
+    ];
+
+    const merged = mergeConsecutiveSameRole(messages);
+
+    assert.strictEqual(merged.length, 4);
+    assert.strictEqual(merged[0].content, "Q1");
+    assert.strictEqual(merged[1].content, "A1");
+    assert.strictEqual(merged[2].content, "Q2");
+    assert.strictEqual(merged[3].content, "A2");
+  });
+
+  it("does not merge when previous has tool_calls (non-string content check)", () => {
+    const messages = [
+      {
+        role: "assistant",
+        content: "thinking...",
+        tool_calls: [{ id: "t1", type: "function", function: { name: "Grep", arguments: "{}" } }]
+      },
+      {
+        role: "assistant",
+        content: "Here's what I found."
+      }
+    ];
+
+    // First message has tool_calls — spread operator copies it, but both have string content
+    // so they WILL be merged (content is string in both). This is intentional: the second
+    // assistant message would otherwise cause an API error.
+    const merged = mergeConsecutiveSameRole(messages);
+    assert.strictEqual(merged.length, 1);
+    assert.ok(merged[0].content.includes("thinking..."));
+    assert.ok(merged[0].content.includes("Here's what I found."));
+  });
+});
diff --git a/test/openai-integration.test.js b/test/openai-integration.test.js
index f85b976..915ceb6 100644
--- a/test/openai-integration.test.js
+++ b/test/openai-integration.test.js
@@ -112,6 +112,7 @@ describe("OpenAI Integration", () => {
 
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "2";
       process.env.OPENROUTER_MAX_TOOLS_FOR_ROUTING = "5";
@@ -136,6 +137,7 @@ describe("OpenAI Integration", () => {
     it("should use openai as fallback provider when configured", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.FALLBACK_PROVIDER = "openai";
       process.env.OPENAI_API_KEY = "sk-test-key";
diff --git a/test/parsers.test.js b/test/parsers.test.js
new file mode 100644
index 0000000..4ab42d5
--- /dev/null
+++ b/test/parsers.test.js
@@ -0,0 +1,352 @@
+/**
+ * Tests for the per-model tool parser architecture (src/parsers/)
+ */
+const { describe, it, beforeEach } = require('node:test');
+const assert = require('node:assert');
+
+process.env.NODE_ENV = 'test';
+
+const BaseToolParser = require('../src/parsers/base-tool-parser');
+const GenericToolParser = require('../src/parsers/generic-tool-parser');
+const Glm47ToolParser = require('../src/parsers/glm47-tool-parser');
+const { getParserForModel, clearParserCache, PARSER_REGISTRY } = require('../src/parsers');
+
+describe('BaseToolParser', () => {
+  it('should throw on unimplemented extractToolCallsFromText', () => {
+    const parser = new BaseToolParser('test-model');
+    assert.throws(
+      () => parser.extractToolCallsFromText('text'),
+      /must implement extractToolCallsFromText/
+    );
+  });
+
+  it('should pass-through normalizeToolCalls', () => {
+    const parser = new BaseToolParser('test-model');
+    const calls = [{ function: { name: 'Bash', arguments: {} } }];
+    assert.deepStrictEqual(parser.normalizeToolCalls(calls), calls);
+  });
+
+  it('should pass-through cleanArguments', () => {
+    const parser = new BaseToolParser('test-model');
+    const call = { name: 'Bash', input: { command: 'ls' } };
+    assert.strictEqual(parser.cleanArguments(call), call);
+  });
+
+  it('should strip <think> tags', () => {
+    const parser = new BaseToolParser('test-model');
+    const text = '<think>reasoning</think>The answer is 42';
+    assert.strictEqual(parser.stripReasoningTags(text), 'The answer is 42');
+  });
+
+  it('should handle non-string in stripReasoningTags', () => {
+    const parser = new BaseToolParser('test-model');
+    assert.strictEqual(parser.stripReasoningTags(null), null);
+    assert.strictEqual(parser.stripReasoningTags(undefined), undefined);
+  });
+});
+
+describe('GenericToolParser', () => {
+  let parser;
+  beforeEach(() => { parser = new GenericToolParser('unknown-model'); });
+
+  describe('extractToolCallsFromText', () => {
+    it('should extract JSON tool calls', () => {
+      const text = 'I will call {"name": "Bash", "parameters": {"command": "ls"}} now';
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 1);
+      assert.strictEqual(result[0].function.name, 'Bash');
+      assert.deepStrictEqual(result[0].function.arguments, { command: 'ls' });
+    });
+
+    it('should return null for plain text', () => {
+      assert.strictEqual(parser.extractToolCallsFromText('Hello world'), null);
+    });
+
+    it('should return null for null/empty input', () => {
+      assert.strictEqual(parser.extractToolCallsFromText(null), null);
+      assert.strictEqual(parser.extractToolCallsFromText(''), null);
+    });
+
+    it('should not extract fenced code blocks (that is model-specific)', () => {
+      const text = '```bash\nls -la\n```';
+      assert.strictEqual(parser.extractToolCallsFromText(text), null);
+    });
+  });
+
+  describe('cleanArguments', () => {
+    it('should clean Bash tool with code fence (Anthropic format)', () => {
+      const dirty = { name: 'Bash', input: { command: '```bash\nls -la\n```' } };
+      const clean = parser.cleanArguments(dirty);
+      assert.strictEqual(clean.input.command, 'ls -la');
+    });
+
+    it('should clean Bash tool with prompt char (Anthropic format)', () => {
+      const dirty = { name: 'Bash', input: { command: '$ pwd' } };
+      const clean = parser.cleanArguments(dirty);
+      assert.strictEqual(clean.input.command, 'pwd');
+    });
+
+    it('should clean Bash tool with bullet point', () => {
+      const dirty = { name: 'Bash', input: { command: '● ls' } };
+      const clean = parser.cleanArguments(dirty);
+      assert.strictEqual(clean.input.command, 'ls');
+    });
+
+    it('should not modify non-Bash tools', () => {
+      const tool = { name: 'Read', input: { file_path: '/tmp/test.txt' } };
+      assert.strictEqual(parser.cleanArguments(tool), tool);
+    });
+
+    it('should not modify clean Bash commands', () => {
+      const tool = { name: 'Bash', input: { command: 'ls -la' } };
+      assert.strictEqual(parser.cleanArguments(tool), tool);
+    });
+
+    it('should handle null', () => {
+      assert.strictEqual(parser.cleanArguments(null), null);
+    });
+
+    it('should clean OpenAI format', () => {
+      const dirty = {
+        function: { name: 'Bash', arguments: JSON.stringify({ command: '```bash\nls\n```' }) }
+      };
+      const clean = parser.cleanArguments(dirty);
+      const args = JSON.parse(clean.function.arguments);
+      assert.strictEqual(args.command, 'ls');
+    });
+  });
+});
+
+describe('Glm47ToolParser', () => {
+  let parser;
+  beforeEach(() => { parser = new Glm47ToolParser('glm-4.7:cloud'); });
+
+  describe('extractToolCallsFromText — XML format', () => {
+    it('should extract GLM XML tool call', () => {
+      const text = `<tool_call>Bash
+<arg_key>command</arg_key>
+<arg_value>ls -la</arg_value>
+</tool_call>`;
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 1);
+      assert.strictEqual(result[0].function.name, 'Bash');
+      assert.strictEqual(result[0].function.arguments.command, 'ls -la');
+    });
+
+    it('should extract multiple XML tool calls', () => {
+      const text = `<tool_call>Read
+<arg_key>file_path</arg_key>
+<arg_value>/tmp/test.txt</arg_value>
+</tool_call>
+<tool_call>Bash
+<arg_key>command</arg_key>
+<arg_value>pwd</arg_value>
+</tool_call>`;
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 2);
+      assert.strictEqual(result[0].function.name, 'Read');
+      assert.strictEqual(result[0].function.arguments.file_path, '/tmp/test.txt');
+      assert.strictEqual(result[1].function.name, 'Bash');
+      assert.strictEqual(result[1].function.arguments.command, 'pwd');
+    });
+
+    it('should handle multi-arg XML tool call', () => {
+      const text = `<tool_call>Write
+<arg_key>file_path</arg_key>
+<arg_value>/tmp/out.txt</arg_value>
+<arg_key>content</arg_key>
+<arg_value>Hello world</arg_value>
+</tool_call>`;
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 1);
+      assert.strictEqual(result[0].function.name, 'Write');
+      assert.strictEqual(result[0].function.arguments.file_path, '/tmp/out.txt');
+      assert.strictEqual(result[0].function.arguments.content, 'Hello world');
+    });
+
+    it('should handle tool call with no arguments', () => {
+      const text = `<tool_call>SomeToolWithNoArgs</tool_call>`;
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 1);
+      assert.strictEqual(result[0].function.name, 'SomeToolWithNoArgs');
+      assert.deepStrictEqual(result[0].function.arguments, {});
+    });
+  });
+
+  describe('extractToolCallsFromText — bullet points', () => {
+    it('should extract bullet-point shell commands', () => {
+      const text = 'I will run these commands:\n● git status\n● ls -la';
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 2);
+      assert.strictEqual(result[0].function.name, 'Bash');
+      assert.strictEqual(result[0].function.arguments.command, 'git status');
+      assert.strictEqual(result[1].function.arguments.command, 'ls -la');
+    });
+
+    it('should not extract non-shell-command bullets', () => {
+      const text = '● This is just a note\n● Another note';
+      assert.strictEqual(parser.extractToolCallsFromText(text), null);
+    });
+  });
+
+  describe('extractToolCallsFromText — fenced code blocks', () => {
+    it('should extract commands from bash code block', () => {
+      const text = '```bash\nls -la\npwd\n```';
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 2);
+      assert.strictEqual(result[0].function.arguments.command, 'ls -la');
+      assert.strictEqual(result[1].function.arguments.command, 'pwd');
+    });
+
+    it('should strip $ and # prompt chars', () => {
+      const text = '```bash\n$ ls -la\n# pwd\n```';
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result[0].function.arguments.command, 'ls -la');
+      assert.strictEqual(result[1].function.arguments.command, 'pwd');
+    });
+  });
+
+  describe('extractToolCallsFromText — priority', () => {
+    it('should prefer XML over bullet points', () => {
+      const text = `<tool_call>Read
+<arg_key>file_path</arg_key>
+<arg_value>/tmp/test.txt</arg_value>
+</tool_call>
+● ls -la`;
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      // XML match should win
+      assert.strictEqual(result[0].function.name, 'Read');
+    });
+  });
+
+  describe('orphaned closing tag stripping', () => {
+    it('should strip orphaned </arg_value> from "Invoking tool(s):" text', () => {
+      const text = 'Invoking tool(s): Grep</arg_value>';
+      const result = parser.extractToolCallsFromText(text);
+      // After stripping, text becomes "Invoking tool(s): Grep" — no tool_call structure
+      assert.strictEqual(result, null);
+    });
+
+    it('should strip orphaned </think> from text', () => {
+      const text = 'Invoking tool(s): Grep, Grep, Glob</think>';
+      const result = parser.extractToolCallsFromText(text);
+      assert.strictEqual(result, null);
+    });
+
+    it('should NOT strip </arg_value> when matching <arg_value> opener exists', () => {
+      const text = `<tool_call>Bash
+<arg_key>command</arg_key>
+<arg_value>ls -la</arg_value>
+</tool_call>`;
+      const result = parser.extractToolCallsFromText(text);
+      assert.ok(result);
+      assert.strictEqual(result.length, 1);
+      assert.strictEqual(result[0].function.name, 'Bash');
+      assert.strictEqual(result[0].function.arguments.command, 'ls -la');
+    });
+
+    it('should NOT strip </think> when matching <think> opener exists', () => {
+      // Complete <think>...</think> pairs are kept by orphan stripping
+      // (they get stripped by stripReasoningTags instead)
+      const text = '<think>reasoning here</think>\n● git status';
+      const result = parser.extractToolCallsFromText(text);
+      // The complete <think> pair remains but is on its own line;
+      // bullet extraction finds "● git status" on the next line
+      assert.ok(result);
+      assert.strictEqual(result[0].function.arguments.command, 'git status');
+    });
+  });
+
+  describe('stripReasoningTags', () => {
+    it('should strip complete <think> blocks', () => {
+      assert.strictEqual(parser.stripReasoningTags('<think>reasoning</think>Answer'), 'Answer');
+    });
+
+    it('should strip orphaned </think> closing tag', () => {
+      assert.strictEqual(parser.stripReasoningTags('Answer</think>'), 'Answer');
+    });
+
+    it('should strip orphaned </arg_value> closing tag', () => {
+      assert.strictEqual(parser.stripReasoningTags('Grep</arg_value>'), 'Grep');
+    });
+
+    it('should not strip tags with matching openers', () => {
+      const text = '<arg_value>val</arg_value>';
+      assert.strictEqual(parser.stripReasoningTags(text), text);
+    });
+  });
+
+  describe('cleanArguments', () => {
+    it('should clean markdown from Bash commands', () => {
+      const dirty = { name: 'Bash', input: { command: '```bash\nls -la\n```' } };
+      const clean = parser.cleanArguments(dirty);
+      assert.strictEqual(clean.input.command, 'ls -la');
+    });
+
+    it('should pass through non-Bash tools', () => {
+      const tool = { name: 'Read', input: { file_path: '/tmp/test.txt' } };
+      assert.strictEqual(parser.cleanArguments(tool), tool);
+    });
+  });
+});
+
+describe('Parser Registry', () => {
+  beforeEach(() => { clearParserCache(); });
+
+  it('should return Glm47ToolParser for glm-4.7 models', () => {
+    const parser = getParserForModel('glm-4.7:cloud');
+    assert.strictEqual(parser.constructor.name, 'Glm47ToolParser');
+  });
+
+  it('should return Glm47ToolParser for glm4 models', () => {
+    const parser = getParserForModel('glm4-9b');
+    assert.strictEqual(parser.constructor.name, 'Glm47ToolParser');
+  });
+
+  it('should return Glm47ToolParser for glm-4 models', () => {
+    const parser = getParserForModel('glm-4-base');
+    assert.strictEqual(parser.constructor.name, 'Glm47ToolParser');
+  });
+
+  it('should return parser for qwen3-coder models', () => {
+    const parser = getParserForModel('qwen3-coder-next');
+    assert.notStrictEqual(parser.constructor.name, 'GenericToolParser');
+  });
+
+  it('should return parser for qwen3 models', () => {
+    const parser = getParserForModel('qwen3-base');
+    assert.notStrictEqual(parser.constructor.name, 'GenericToolParser');
+  });
+
+  it('should return GenericToolParser for unknown models', () => {
+    const parser = getParserForModel('some-random-model');
+    assert.strictEqual(parser.constructor.name, 'GenericToolParser');
+  });
+
+  it('should return GenericToolParser for null model name', () => {
+    const parser = getParserForModel(null);
+    assert.strictEqual(parser.constructor.name, 'GenericToolParser');
+  });
+
+  it('should cache parser instances', () => {
+    const p1 = getParserForModel('glm-4.7:cloud');
+    const p2 = getParserForModel('glm-4.7:cloud');
+    assert.strictEqual(p1, p2);
+  });
+
+  it('should clear cache', () => {
+    const p1 = getParserForModel('glm-4.7:cloud');
+    clearParserCache();
+    const p2 = getParserForModel('glm-4.7:cloud');
+    assert.notStrictEqual(p1, p2);
+  });
+});
diff --git a/test/performance-tests.js b/test/performance-tests.js
index f25e089..366a90b 100755
--- a/test/performance-tests.js
+++ b/test/performance-tests.js
@@ -66,7 +66,7 @@ async function testDatabaseIndexes() {
   if (!fs.existsSync(dbPath)) {
     log('⚠️  Database not found. Creating test database...', 'yellow');
     // Initialize database
-    require('../src/db/index.js');
+    require('./src/db/index.js');
   }
 
   const db = new Database(dbPath);
diff --git a/test/qwen3-markdown-extraction.test.js b/test/qwen3-markdown-extraction.test.js
new file mode 100644
index 0000000..6cd31b4
--- /dev/null
+++ b/test/qwen3-markdown-extraction.test.js
@@ -0,0 +1,119 @@
+const { describe, it } = require("node:test");
+const assert = require("node:assert");
+
+// Set up test environment
+process.env.NODE_ENV = "test";
+
+const {
+  extractToolCallsFromText,
+} = require("../src/clients/ollama-utils");
+
+const { getParserForModel, PARSER_REGISTRY } = require("../src/parsers");
+
+describe("Qwen3 Markdown Tool Extraction", () => {
+  it("should have qwen3 registered in the parser registry", () => {
+    const hasQwen3 = PARSER_REGISTRY.some(entry => entry.prefix === "qwen3");
+    assert.ok(hasQwen3, "qwen3 should be in parser registry");
+  });
+
+  it("should return a parser for qwen3-coder-next", () => {
+    const parser = getParserForModel("qwen3-coder-next");
+    assert.ok(parser, "Should return a parser instance");
+    assert.notStrictEqual(parser.constructor.name, "GenericToolParser",
+      "qwen3 should not fall back to GenericToolParser");
+  });
+
+  it("should extract bash command from markdown code block for qwen3-coder-next", () => {
+    const content = `Let me check the log files to understand what's happening:
+
+\`\`\`bash
+ls -la ./logs 2>/dev/null || echo "logs directory not found"
+\`\`\``;
+
+    const extracted = extractToolCallsFromText(content, "qwen3-coder-next");
+
+    assert.ok(extracted, "Should extract tool calls");
+    assert.strictEqual(extracted.length, 1, "Should extract 1 tool call");
+    assert.strictEqual(extracted[0].function.name, "Bash", "Should be Bash tool");
+    assert.ok(
+      extracted[0].function.arguments.command.includes("ls -la ./logs"),
+      "Should extract the ls command"
+    );
+  });
+
+  it("should extract multiple commands from multiple code blocks", () => {
+    const content = `First check the directory:
+
+\`\`\`bash
+pwd
+\`\`\`
+
+Then list the files:
+
+\`\`\`bash
+ls -la
+\`\`\``;
+
+    const extracted = extractToolCallsFromText(content, "qwen3-coder-next");
+
+    assert.ok(extracted, "Should extract tool calls");
+    assert.strictEqual(extracted.length, 2, "Should extract 2 tool calls");
+    assert.strictEqual(extracted[0].function.arguments.command, "pwd");
+    assert.strictEqual(extracted[1].function.arguments.command, "ls -la");
+  });
+
+  it("should work with sh, shell, and console code blocks", () => {
+    const testCases = [
+      { fence: "sh", command: "echo test" },
+      { fence: "shell", command: "cat file.txt" },
+      { fence: "console", command: "npm install" },
+    ];
+
+    for (const { fence, command } of testCases) {
+      const content = `\`\`\`${fence}\n${command}\n\`\`\``;
+      const extracted = extractToolCallsFromText(content, "qwen3-coder-next");
+
+      assert.ok(extracted, `Should extract from ${fence} block`);
+      assert.strictEqual(extracted.length, 1);
+      assert.strictEqual(extracted[0].function.arguments.command, command);
+    }
+  });
+
+  it("should strip prompt characters ($, #)", () => {
+    const content = `\`\`\`bash
+$ ls -la
+# cat file.txt
+\`\`\``;
+
+    const extracted = extractToolCallsFromText(content, "qwen3-coder-next");
+
+    assert.ok(extracted, "Should extract tool calls");
+    assert.strictEqual(extracted.length, 2, "Should extract 2 commands");
+    assert.strictEqual(extracted[0].function.arguments.command, "ls -la");
+    assert.strictEqual(extracted[1].function.arguments.command, "cat file.txt");
+  });
+
+  it("should not extract from non-registered models without AGGRESSIVE_TOOL_PATCHING", () => {
+    const content = `\`\`\`bash
+ls -la
+\`\`\``;
+
+    // Random model that's not in registry — falls to GenericToolParser (JSON only)
+    const extracted = extractToolCallsFromText(content, "some-other-model");
+
+    // Should be null because GenericToolParser only extracts JSON tool calls
+    assert.strictEqual(extracted, null, "Should not extract for unlisted models");
+  });
+
+  it("should only extract valid shell commands", () => {
+    const content = `\`\`\`bash
+This is just text
+not a command
+\`\`\``;
+
+    const extracted = extractToolCallsFromText(content, "qwen3-coder-next");
+
+    // Should be null because no valid shell commands match the regex
+    assert.strictEqual(extracted, null, "Should not extract non-commands");
+  });
+});
diff --git a/test/routing.test.js b/test/routing.test.js
index 5e9ba03..d0eef50 100644
--- a/test/routing.test.js
+++ b/test/routing.test.js
@@ -42,6 +42,7 @@ describe("Routing Logic", () => {
     it("should route to ollama when no tools and PREFER_OLLAMA is true", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
 
       config = require("../src/config");
@@ -59,6 +60,7 @@ describe("Routing Logic", () => {
     it("should route to ollama when tool count < threshold", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "3";
 
@@ -80,6 +82,7 @@ describe("Routing Logic", () => {
     it("should route to cloud when tool count >= threshold", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "3";
       process.env.OPENROUTER_MAX_TOOLS_FOR_ROUTING = "3"; // Set same as ollama to skip openrouter tier
@@ -114,6 +117,7 @@ describe("Routing Logic", () => {
     it("should route to cloud when model doesn't support tools", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "llama3:latest"; // Non-tool-capable model
       process.env.OLLAMA_FALLBACK_PROVIDER = "databricks";
       process.env.FALLBACK_ENABLED = "true"; // Ensure fallback is enabled
@@ -135,6 +139,7 @@ describe("Routing Logic", () => {
     it("should use custom max tools threshold", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.OLLAMA_MAX_TOOLS_FOR_ROUTING = "5";
       process.env.OLLAMA_FALLBACK_PROVIDER = "databricks";
@@ -164,6 +169,7 @@ describe("Routing Logic", () => {
     it("should return true by default", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       // Override .env file which sets FALLBACK_ENABLED=false
       // Test default behavior when not set to "false"
@@ -178,6 +184,7 @@ describe("Routing Logic", () => {
     it("should return false when explicitly disabled", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.FALLBACK_ENABLED = "false";
 
@@ -192,6 +199,7 @@ describe("Routing Logic", () => {
     it("should return databricks by default", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.DATABRICKS_API_KEY = "test-key";
       process.env.DATABRICKS_API_BASE = "http://test.com";
@@ -205,6 +213,7 @@ describe("Routing Logic", () => {
     it("should return configured fallback provider", () => {
       process.env.MODEL_PROVIDER = "ollama";
       process.env.PREFER_OLLAMA = "true";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
       process.env.OLLAMA_MODEL = "qwen2.5-coder:latest";
       process.env.FALLBACK_PROVIDER = "azure-anthropic";
       process.env.AZURE_ANTHROPIC_ENDPOINT = "http://test.com";
diff --git a/test/smart-selection-classification.test.js b/test/smart-selection-classification.test.js
new file mode 100644
index 0000000..dfc1945
--- /dev/null
+++ b/test/smart-selection-classification.test.js
@@ -0,0 +1,72 @@
+/**
+ * Smart Selection Classification Tests
+ *
+ * Verifies that classifyRequestType() correctly categorizes user messages.
+ * Add new entries to TESTS[] when classification regressions are found.
+ *
+ * Usage:
+ *   NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com \
+ *     node --test test/smart-selection-classification.test.js
+ */
+
+const { describe, it } = require('node:test');
+const assert = require('node:assert');
+const { classifyRequestType } = require('../src/tools/smart-selection');
+
+function classify(msg) {
+  return classifyRequestType({ messages: [{ role: 'user', content: msg }] });
+}
+
+const TESTS = [
+  // Shell commands — must NOT be conversational
+  { msg: 'run npm test',           expect: 'code_execution' },
+  { msg: 'check git status',       expect: 'code_execution' },
+  { msg: 'ls -la',                 expect: 'code_execution' },
+  { msg: 'git log --oneline',      expect: 'code_execution' },
+  { msg: 'bd ready',               expect: 'code_execution' },
+  { msg: 'pwd',                    expect: 'code_execution' },
+  { msg: 'npm install lodash',     expect: 'code_execution' },
+  { msg: 'test the auth module',   expect: 'code_execution' },
+
+  // File reading
+  { msg: 'cat package.json',       expect: 'file_reading' },
+  { msg: 'list all files',         expect: 'file_reading' },
+  { msg: 'read the config',        expect: 'file_reading' },
+  { msg: 'show me the README',     expect: 'file_reading' },
+
+  // File modification — word boundaries must not match "readme"/"ready"
+  { msg: 'edit the README',        expect: 'file_modification' },
+  { msg: 'create a new file',      expect: 'file_modification' },
+
+  // Research / explanation
+  { msg: 'explain closures',       expect: 'research' },
+  { msg: 'describe the architecture', expect: 'research' },
+  { msg: 'summarize this',         expect: 'research' },
+  { msg: 'search for auth bugs',   expect: 'research' },
+
+  // Simple Q&A
+  { msg: 'what is a closure?',     expect: 'simple_qa' },
+
+  // Conversational — no tools needed
+  { msg: 'hello',                  expect: 'conversational' },
+  { msg: 'hi',                     expect: 'conversational' },
+  { msg: 'good morning',           expect: 'conversational' },
+  { msg: 'thanks',                 expect: 'conversational' },
+  { msg: 'should I use TS?',       expect: 'conversational' },
+
+  // Complex task
+  { msg: 'implement dark mode',    expect: 'complex_task' },
+
+  // Word boundary regressions — "sh" must not match inside "should"/"show"
+  { msg: 'show the logs',          expect: 'file_reading' },
+];
+
+describe('Smart Selection - classifyRequestType', () => {
+  for (const t of TESTS) {
+    it(`"${t.msg}" → ${t.expect}`, () => {
+      const result = classify(t.msg);
+      assert.strictEqual(result.type, t.expect,
+        `"${t.msg}" classified as ${result.type}, expected ${t.expect}`);
+    });
+  }
+});
diff --git a/test/tool-call-cleaning.test.js b/test/tool-call-cleaning.test.js
new file mode 100644
index 0000000..39becf6
--- /dev/null
+++ b/test/tool-call-cleaning.test.js
@@ -0,0 +1,620 @@
+/**
+ * Tests for universal tool call cleaning
+ */
+
+const assert = require('assert');
+const { describe, it } = require('node:test');
+const {
+  cleanToolCallArguments,
+  cleanToolCalls,
+  stripMarkdownFromCommand,
+  FENCE_REGEX,
+  BULLET_POINT_REGEX,
+  PROMPT_CHAR_REGEX
+} = require('../src/tools/tool-call-cleaner');
+
+describe('Universal Tool Call Cleaning', () => {
+  describe('stripMarkdownFromCommand', () => {
+    it('should strip bash code fences', () => {
+      const dirty = '```bash\nls -la\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should strip sh code fences', () => {
+      const dirty = '```sh\ncd /tmp\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'cd /tmp');
+    });
+
+    it('should strip shell code fences', () => {
+      const dirty = '```shell\npwd\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'pwd');
+    });
+
+    it('should strip zsh code fences', () => {
+      const dirty = '```zsh\necho "test"\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'echo "test"');
+    });
+
+    it('should strip console code fences', () => {
+      const dirty = '```console\ngit status\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'git status');
+    });
+
+    it('should strip terminal code fences', () => {
+      const dirty = '```terminal\nnpm test\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'npm test');
+    });
+
+    it('should strip $ prompt characters', () => {
+      const dirty = '$ ls -la';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should strip # prompt characters', () => {
+      const dirty = '# apt-get update';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'apt-get update');
+    });
+
+    it('should strip ● bullet points', () => {
+      const dirty = '● ls -la';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should strip • bullet points', () => {
+      const dirty = '• pwd';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'pwd');
+    });
+
+    it('should strip - bullet points', () => {
+      const dirty = '- git status';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'git status');
+    });
+
+    it('should strip * bullet points', () => {
+      const dirty = '* npm test';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'npm test');
+    });
+
+    it('should strip ❯ arrow prompts', () => {
+      const dirty = '❯ cd /tmp';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'cd /tmp');
+    });
+
+    it('should strip > angle bracket prompts', () => {
+      const dirty = '> echo "test"';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'echo "test"');
+    });
+
+    it('should fix user reported issue: "● ls"', () => {
+      const dirty = '● ls';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls');
+    });
+
+    it('should handle combined: bullet + prompt', () => {
+      const dirty = '● $ ls -la';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should handle combined: bullet + prompt + fence', () => {
+      const dirty = '```bash\n● $ ls -la\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should clean multiline with mixed bullets and prompts', () => {
+      const dirty = '● $ cd /tmp\n• # ls -la';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'cd /tmp\nls -la');
+    });
+
+    it('should handle bullet points with extra spacing', () => {
+      const dirty = '●   ls -la';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should handle multiline commands with prompt chars', () => {
+      const dirty = '```bash\n$ cd /tmp\n$ ls -la\n# pwd\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'cd /tmp\nls -la\npwd');
+    });
+
+    it('should handle code fence with extra whitespace', () => {
+      const dirty = '```bash  \n  ls -la  \n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should handle prompt chars with extra spacing', () => {
+      const dirty = '$   ls -la';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'ls -la');
+    });
+
+    it('should not modify clean commands', () => {
+      const clean = 'ls -la /home/user';
+      const result = stripMarkdownFromCommand(clean);
+      assert.strictEqual(result, clean);
+    });
+
+    it('should handle empty string', () => {
+      const result = stripMarkdownFromCommand('');
+      assert.strictEqual(result, '');
+    });
+
+    it('should handle null', () => {
+      const result = stripMarkdownFromCommand(null);
+      assert.strictEqual(result, null);
+    });
+
+    it('should handle undefined', () => {
+      const result = stripMarkdownFromCommand(undefined);
+      assert.strictEqual(result, undefined);
+    });
+
+    it('should preserve commands with # that are not prompts', () => {
+      const command = 'echo "# This is a comment"';
+      const result = stripMarkdownFromCommand(command);
+      assert.strictEqual(result, command);
+    });
+
+    it('should handle complex multiline with mixed content', () => {
+      const dirty = '```bash\n$ echo "Starting process"\n# ls -la\ngrep "pattern" file.txt\n```';
+      const clean = stripMarkdownFromCommand(dirty);
+      assert.strictEqual(clean, 'echo "Starting process"\nls -la\ngrep "pattern" file.txt');
+    });
+  });
+
+  describe('cleanToolCallArguments', () => {
+    describe('Anthropic format', () => {
+      it('should clean Bash tool with code fence', () => {
+        const dirty = {
+          name: 'Bash',
+          id: 'toolu_123',
+          input: { command: '```bash\nls -la\n```' }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        assert.strictEqual(clean.input.command, 'ls -la');
+        assert.strictEqual(clean.name, 'Bash');
+        assert.strictEqual(clean.id, 'toolu_123');
+      });
+
+      it('should clean Bash tool with prompt characters', () => {
+        const dirty = {
+          name: 'Bash',
+          input: { command: '$ ls -la' }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        assert.strictEqual(clean.input.command, 'ls -la');
+      });
+
+      it('should handle multiline commands', () => {
+        const dirty = {
+          name: 'Bash',
+          input: { command: '```bash\n$ cd /tmp\n# ls -la\n```' }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        assert.strictEqual(clean.input.command, 'cd /tmp\nls -la');
+      });
+
+      it('should preserve other input fields', () => {
+        const dirty = {
+          name: 'Bash',
+          input: {
+            command: '```bash\nls\n```',
+            timeout: 5000,
+            description: 'List files'
+          }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        assert.strictEqual(clean.input.command, 'ls');
+        assert.strictEqual(clean.input.timeout, 5000);
+        assert.strictEqual(clean.input.description, 'List files');
+      });
+
+      it('should not modify non-Bash tools', () => {
+        const tool = {
+          name: 'Read',
+          input: { file_path: '/tmp/test.txt' }
+        };
+        const clean = cleanToolCallArguments(tool);
+        assert.deepStrictEqual(clean, tool);
+      });
+
+      it('should not modify Bash tools with clean commands', () => {
+        const tool = {
+          name: 'Bash',
+          input: { command: 'ls -la /home/user' }
+        };
+        const clean = cleanToolCallArguments(tool);
+        assert.strictEqual(clean, tool); // Should be same object reference
+      });
+    });
+
+    describe('OpenAI format', () => {
+      it('should clean Bash tool with code fence (string arguments)', () => {
+        const dirty = {
+          id: 'call_123',
+          function: {
+            name: 'Bash',
+            arguments: JSON.stringify({ command: '```bash\nls -la\n```' })
+          }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        const args = JSON.parse(clean.function.arguments);
+        assert.strictEqual(args.command, 'ls -la');
+      });
+
+      it('should clean Bash tool with code fence (object arguments)', () => {
+        const dirty = {
+          id: 'call_123',
+          function: {
+            name: 'Bash',
+            arguments: { command: '```bash\nls -la\n```' }
+          }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        const args = JSON.parse(clean.function.arguments);
+        assert.strictEqual(args.command, 'ls -la');
+      });
+
+      it('should clean Bash tool with prompt characters', () => {
+        const dirty = {
+          id: 'call_123',
+          function: {
+            name: 'Bash',
+            arguments: JSON.stringify({ command: '$ pwd' })
+          }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        const args = JSON.parse(clean.function.arguments);
+        assert.strictEqual(args.command, 'pwd');
+      });
+
+      it('should preserve other arguments', () => {
+        const dirty = {
+          id: 'call_123',
+          function: {
+            name: 'Bash',
+            arguments: JSON.stringify({
+              command: '```bash\nls\n```',
+              timeout: 5000
+            })
+          }
+        };
+        const clean = cleanToolCallArguments(dirty);
+        const args = JSON.parse(clean.function.arguments);
+        assert.strictEqual(args.command, 'ls');
+        assert.strictEqual(args.timeout, 5000);
+      });
+
+      it('should not modify non-Bash tools', () => {
+        const tool = {
+          id: 'call_123',
+          function: {
+            name: 'Read',
+            arguments: JSON.stringify({ file_path: '/tmp/test.txt' })
+          }
+        };
+        const clean = cleanToolCallArguments(tool);
+        assert.strictEqual(clean, tool);
+      });
+    });
+
+    describe('Edge cases', () => {
+      it('should handle null input', () => {
+        const result = cleanToolCallArguments(null);
+        assert.strictEqual(result, null);
+      });
+
+      it('should handle undefined input', () => {
+        const result = cleanToolCallArguments(undefined);
+        assert.strictEqual(result, undefined);
+      });
+
+      it('should handle tool call without command', () => {
+        const tool = {
+          name: 'Bash',
+          input: {}
+        };
+        const clean = cleanToolCallArguments(tool);
+        assert.strictEqual(clean, tool);
+      });
+
+      it('should handle tool call with non-string command', () => {
+        const tool = {
+          name: 'Bash',
+          input: { command: 123 }
+        };
+        const clean = cleanToolCallArguments(tool);
+        assert.strictEqual(clean, tool);
+      });
+    });
+  });
+
+  describe('cleanToolCalls', () => {
+    it('should clean multiple tool calls', () => {
+      const dirty = [
+        {
+          name: 'Bash',
+          input: { command: '```bash\nls -la\n```' }
+        },
+        {
+          name: 'Bash',
+          input: { command: '$ pwd' }
+        },
+        {
+          name: 'Read',
+          input: { file_path: '/tmp/test.txt' }
+        }
+      ];
+
+      const clean = cleanToolCalls(dirty);
+      assert.strictEqual(clean.length, 3);
+      assert.strictEqual(clean[0].input.command, 'ls -la');
+      assert.strictEqual(clean[1].input.command, 'pwd');
+      assert.strictEqual(clean[2].input.file_path, '/tmp/test.txt');
+    });
+
+    it('should handle empty array', () => {
+      const result = cleanToolCalls([]);
+      assert.deepStrictEqual(result, []);
+    });
+
+    it('should handle null', () => {
+      const result = cleanToolCalls(null);
+      assert.strictEqual(result, null);
+    });
+
+    it('should handle undefined', () => {
+      const result = cleanToolCalls(undefined);
+      assert.strictEqual(result, undefined);
+    });
+
+    it('should handle array with no cleanable calls', () => {
+      const tools = [
+        {
+          name: 'Read',
+          input: { file_path: '/tmp/test.txt' }
+        },
+        {
+          name: 'Write',
+          input: { file_path: '/tmp/out.txt', content: 'test' }
+        }
+      ];
+
+      const clean = cleanToolCalls(tools);
+      assert.strictEqual(clean.length, 2);
+      assert.strictEqual(clean[0], tools[0]);
+      assert.strictEqual(clean[1], tools[1]);
+    });
+
+    it('should preserve tool call order', () => {
+      const dirty = [
+        {
+          name: 'Read',
+          input: { file_path: '/tmp/test.txt' }
+        },
+        {
+          name: 'Bash',
+          input: { command: '```bash\nls\n```' }
+        },
+        {
+          name: 'Write',
+          input: { file_path: '/tmp/out.txt', content: 'test' }
+        }
+      ];
+
+      const clean = cleanToolCalls(dirty);
+      assert.strictEqual(clean.length, 3);
+      assert.strictEqual(clean[0].name, 'Read');
+      assert.strictEqual(clean[1].name, 'Bash');
+      assert.strictEqual(clean[2].name, 'Write');
+    });
+  });
+
+  describe('Regular expressions', () => {
+    it('FENCE_REGEX should match all supported code fence types', () => {
+      const fences = [
+        '```bash\ncommand\n```',
+        '```sh\ncommand\n```',
+        '```shell\ncommand\n```',
+        '```zsh\ncommand\n```',
+        '```console\ncommand\n```',
+        '```terminal\ncommand\n```'
+      ];
+
+      fences.forEach(fence => {
+        assert.strictEqual(FENCE_REGEX.test(fence), true);
+      });
+    });
+
+    it('FENCE_REGEX should not match non-shell code fences', () => {
+      const fences = [
+        '```javascript\ncode\n```',
+        '```python\ncode\n```',
+        '```json\n{}\n```'
+      ];
+
+      fences.forEach(fence => {
+        FENCE_REGEX.lastIndex = 0; // Reset regex state
+        assert.strictEqual(FENCE_REGEX.test(fence), false);
+      });
+    });
+
+    it('PROMPT_CHAR_REGEX should match $ and # at line start', () => {
+      const prompts = [
+        '$ command',
+        '# command',
+        '  $ command',
+        '  # command'
+      ];
+
+      prompts.forEach(prompt => {
+        PROMPT_CHAR_REGEX.lastIndex = 0; // Reset regex state
+        assert.strictEqual(PROMPT_CHAR_REGEX.test(prompt), true);
+      });
+    });
+
+    it('PROMPT_CHAR_REGEX should not match $ and # in middle of line', () => {
+      const strings = [
+        'echo $VAR',
+        'price is $100',
+        'echo "# comment"'
+      ];
+
+      strings.forEach(str => {
+        // Reset regex state
+        PROMPT_CHAR_REGEX.lastIndex = 0;
+        const match = PROMPT_CHAR_REGEX.test(str);
+        assert.strictEqual(match, false);
+      });
+    });
+
+    it('BULLET_POINT_REGEX should match all bullet point types at line start', () => {
+      const bullets = [
+        '● command',
+        '• command',
+        '- command',
+        '* command',
+        '❯ command',
+        '> command',
+        '  ● command',
+        '  • command'
+      ];
+
+      bullets.forEach(bullet => {
+        BULLET_POINT_REGEX.lastIndex = 0; // Reset regex state
+        assert.strictEqual(BULLET_POINT_REGEX.test(bullet), true, `Failed for: ${bullet}`);
+      });
+    });
+
+    it('BULLET_POINT_REGEX should not match bullets in middle of line', () => {
+      const strings = [
+        'echo ● test',
+        'list • item',
+        'math: 5 - 3',
+        'multiply: 5 * 2',
+        'echo > output.txt'
+      ];
+
+      strings.forEach(str => {
+        BULLET_POINT_REGEX.lastIndex = 0; // Reset regex state
+        const match = BULLET_POINT_REGEX.test(str);
+        assert.strictEqual(match, false, `Should not match: ${str}`);
+      });
+    });
+  });
+
+  describe('Integration scenarios', () => {
+    it('should fix user reported issue: "● ls" tool call', () => {
+      const toolCall = {
+        name: 'Bash',
+        id: 'toolu_123',
+        input: { command: '● ls' }
+      };
+      const clean = cleanToolCallArguments(toolCall);
+      assert.strictEqual(clean.input.command, 'ls');
+    });
+
+    it('should clean real-world LLM response with markdown', () => {
+      const toolCall = {
+        name: 'Bash',
+        id: 'toolu_abc123',
+        input: {
+          command: '```bash\n$ ls -la /home/user/projects\n```',
+          description: 'List project files'
+        }
+      };
+
+      const clean = cleanToolCallArguments(toolCall);
+      assert.strictEqual(clean.input.command, 'ls -la /home/user/projects');
+      assert.strictEqual(clean.input.description, 'List project files');
+    });
+
+    it('should clean complex multiline command', () => {
+      const toolCall = {
+        name: 'Bash',
+        input: {
+          command: '```bash\n$ cd /tmp\n$ mkdir test\n# ls -la\n$ cd test\n```'
+        }
+      };
+
+      const clean = cleanToolCallArguments(toolCall);
+      assert.strictEqual(clean.input.command, 'cd /tmp\nmkdir test\nls -la\ncd test');
+    });
+
+    it('should clean command with bullets, fences, and prompts combined', () => {
+      const toolCall = {
+        name: 'Bash',
+        input: {
+          command: '```bash\n● $ cd /tmp\n• # mkdir test\n- ls -la\n```'
+        }
+      };
+
+      const clean = cleanToolCallArguments(toolCall);
+      assert.strictEqual(clean.input.command, 'cd /tmp\nmkdir test\nls -la');
+    });
+
+    it('should handle mixed tool calls array', () => {
+      const toolCalls = [
+        {
+          name: 'Bash',
+          input: { command: '```bash\nls\n```' }
+        },
+        {
+          name: 'Read',
+          input: { file_path: '/tmp/test.txt' }
+        },
+        {
+          name: 'Bash',
+          input: { command: '$ pwd' }
+        },
+        {
+          name: 'Write',
+          input: { file_path: '/tmp/out.txt', content: 'test' }
+        }
+      ];
+
+      const clean = cleanToolCalls(toolCalls);
+      assert.strictEqual(clean[0].input.command, 'ls');
+      assert.strictEqual(clean[1].input.file_path, '/tmp/test.txt');
+      assert.strictEqual(clean[2].input.command, 'pwd');
+      assert.strictEqual(clean[3].input.file_path, '/tmp/out.txt');
+    });
+
+    it('should handle OpenAI format from comparison mode', () => {
+      const toolCalls = [
+        {
+          id: 'call_123',
+          type: 'function',
+          function: {
+            name: 'Bash',
+            arguments: '{"command":"```bash\\nls -la\\n```","timeout":5000}'
+          }
+        }
+      ];
+
+      const clean = cleanToolCalls(toolCalls);
+      const args = JSON.parse(clean[0].function.arguments);
+      assert.strictEqual(args.command, 'ls -la');
+      assert.strictEqual(args.timeout, 5000);
+    });
+  });
+});
diff --git a/test/tool-classification-accuracy.test.js b/test/tool-classification-accuracy.test.js
new file mode 100644
index 0000000..61549fb
--- /dev/null
+++ b/test/tool-classification-accuracy.test.js
@@ -0,0 +1,188 @@
+/**
+ * Tool Classification Accuracy Test
+ *
+ * Tests whether the LLM-based tool classification (via TOOL_NEEDS_CLASSIFICATION_MODEL)
+ * correctly distinguishes tool-needing vs conversational messages.
+ *
+ * Usage:
+ *   OLLAMA_ENDPOINT=http://192.168.100.201:11434 \
+ *   TOOL_NEEDS_CLASSIFICATION_MODEL=qwen3:1.7b \
+ *   node test/tool-classification-accuracy.test.js
+ */
+
+const { classifyToolNeeds } = require('../src/tools/tool-classification.js');
+
+// --- Test cases: 25 tool-needing, 25 conversational ---
+
+const TEST_CASES = [
+  // ========== TOOL-NEEDING (expected: needsTools = true) ==========
+  { message: "list all files in the current directory", expected: true },
+  { message: "show me the contents of package.json", expected: true },
+  { message: "create a new file called utils.js with a helper function", expected: true },
+  { message: "run npm test", expected: true },
+  { message: "search for all TODO comments in the codebase", expected: true },
+  { message: "delete the temp folder", expected: true },
+  { message: "what's in the src directory?", expected: true },
+  { message: "rename server.js to app.js", expected: true },
+  { message: "find all files that import lodash", expected: true },
+  { message: "check git status", expected: true },
+  { message: "add a login route to the express server", expected: true },
+  { message: "fix the syntax error on line 42 of index.js", expected: true },
+  { message: "install the axios package", expected: true },
+  { message: "write a unit test for the auth middleware", expected: true },
+  { message: "show me the last 5 git commits", expected: true },
+  { message: "refactor the database module to use async/await", expected: true },
+  { message: "what port is the server listening on? check the config", expected: true },
+  { message: "grep for 'password' across all source files", expected: true },
+  { message: "make a backup copy of the .env file", expected: true },
+  { message: "count how many test files we have", expected: true },
+  { message: "edit the README to add installation instructions", expected: true },
+  { message: "check if Docker is running", expected: true },
+  { message: "compile the typescript files", expected: true },
+  { message: "move all log files to an archive folder", expected: true },
+  { message: "check disk usage of the project directory", expected: true },
+
+  // ========== CONVERSATIONAL (expected: needsTools = false) ==========
+  { message: "hello, how are you?", expected: false },
+  { message: "what is a closure in JavaScript?", expected: false },
+  { message: "explain the difference between let and const", expected: false },
+  { message: "how does async/await work under the hood?", expected: false },
+  { message: "what are the SOLID principles?", expected: false },
+  { message: "can you summarize what we discussed earlier?", expected: false },
+  { message: "thanks for the help!", expected: false },
+  { message: "what's the best practice for error handling in Node.js?", expected: false },
+  { message: "why is my code slow? any general tips?", expected: false },
+  { message: "explain REST vs GraphQL", expected: false },
+  { message: "what does the spread operator do?", expected: false },
+  { message: "how should I structure a monorepo?", expected: false },
+  { message: "what is event-driven architecture?", expected: false },
+  { message: "tell me about design patterns in JavaScript", expected: false },
+  { message: "what's the difference between SQL and NoSQL?", expected: false },
+  { message: "good morning!", expected: false },
+  { message: "how do promises work?", expected: false },
+  { message: "what is dependency injection?", expected: false },
+  { message: "explain the observer pattern", expected: false },
+  { message: "what are websockets used for?", expected: false },
+  { message: "should I use TypeScript for my next project?", expected: false },
+  { message: "what's new in ES2024?", expected: false },
+  { message: "bye, talk to you later", expected: false },
+  { message: "what is the CAP theorem?", expected: false },
+  { message: "how do I become a better programmer?", expected: false },
+];
+
+// --- Mock invokeModel that calls Ollama directly ---
+
+async function invokeModel({ model, messages, temperature, max_tokens }) {
+  const endpoint = process.env.OLLAMA_ENDPOINT || 'http://192.168.100.201:11434';
+
+  const response = await fetch(`${endpoint}/api/chat`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      model: model,
+      messages: messages,
+      stream: false,
+      options: {
+        temperature: temperature ?? 0,
+        num_predict: max_tokens ?? 150,
+      },
+    }),
+    signal: AbortSignal.timeout(30000),
+  });
+
+  if (!response.ok) {
+    throw new Error(`Ollama error: ${response.status} ${await response.text()}`);
+  }
+
+  const data = await response.json();
+
+  // Return in the format classifyToolNeeds expects
+  return {
+    ok: true,
+    json: {
+      choices: [{
+        message: {
+          content: data.message?.content || '',
+        },
+      }],
+    },
+  };
+}
+
+// --- Run tests ---
+
+async function runTests() {
+  const model = process.env.TOOL_NEEDS_CLASSIFICATION_MODEL || 'qwen3:1.7b';
+  console.log(`\n🧪 Tool Classification Accuracy Test`);
+  console.log(`   Model: ${model}`);
+  console.log(`   Test cases: ${TEST_CASES.length} (${TEST_CASES.filter(t => t.expected).length} tool, ${TEST_CASES.filter(t => !t.expected).length} conversational)\n`);
+
+  const config = {
+    whitelist: './config/tool-whitelist.json',
+    model: model,
+    cacheEnabled: false,  // Disable cache so every message hits the LLM
+    llmEnabled: true,
+  };
+
+  let correct = 0;
+  let wrong = 0;
+  const failures = [];
+  const results = { tool: { correct: 0, total: 0 }, conv: { correct: 0, total: 0 } };
+
+  for (let i = 0; i < TEST_CASES.length; i++) {
+    const tc = TEST_CASES[i];
+    const payload = {
+      messages: [{ role: 'user', content: tc.message }],
+    };
+
+    try {
+      const result = await classifyToolNeeds(payload, config, invokeModel);
+      const got = result.needsTools;
+      const pass = got === tc.expected;
+
+      if (tc.expected) results.tool.total++;
+      else results.conv.total++;
+
+      if (pass) {
+        correct++;
+        if (tc.expected) results.tool.correct++;
+        else results.conv.correct++;
+        console.log(`  ✓ [${i + 1}/${TEST_CASES.length}] "${tc.message.substring(0, 50)}..." → ${got ? 'TOOL' : 'CONV'} (${result.source})`);
+      } else {
+        wrong++;
+        failures.push({ ...tc, got, source: result.source, reason: result.reason });
+        console.log(`  ✗ [${i + 1}/${TEST_CASES.length}] "${tc.message.substring(0, 50)}..." → ${got ? 'TOOL' : 'CONV'} expected ${tc.expected ? 'TOOL' : 'CONV'} (${result.source}: ${result.reason})`);
+      }
+    } catch (err) {
+      wrong++;
+      failures.push({ ...tc, got: 'ERROR', source: 'error', reason: err.message });
+      console.log(`  ✗ [${i + 1}/${TEST_CASES.length}] "${tc.message.substring(0, 50)}..." → ERROR: ${err.message}`);
+    }
+  }
+
+  // --- Summary ---
+  const total = correct + wrong;
+  const pct = ((correct / total) * 100).toFixed(1);
+  const toolPct = results.tool.total ? ((results.tool.correct / results.tool.total) * 100).toFixed(1) : 'N/A';
+  const convPct = results.conv.total ? ((results.conv.correct / results.conv.total) * 100).toFixed(1) : 'N/A';
+
+  console.log(`\n${'='.repeat(60)}`);
+  console.log(`  RESULTS: ${correct}/${total} correct (${pct}%)`);
+  console.log(`    Tool detection:  ${results.tool.correct}/${results.tool.total} (${toolPct}%)`);
+  console.log(`    Conv detection:  ${results.conv.correct}/${results.conv.total} (${convPct}%)`);
+  console.log(`${'='.repeat(60)}`);
+
+  if (failures.length > 0) {
+    console.log(`\n  FAILURES:`);
+    for (const f of failures) {
+      console.log(`    - "${f.message}" → got ${f.got}, expected ${f.expected ? 'TOOL' : 'CONV'} (${f.source}: ${f.reason})`);
+    }
+  }
+
+  console.log('');
+}
+
+runTests().catch(err => {
+  console.error('Fatal error:', err);
+  process.exit(1);
+});
diff --git a/test/tool-classification.test.js b/test/tool-classification.test.js
new file mode 100644
index 0000000..19ba0d5
--- /dev/null
+++ b/test/tool-classification.test.js
@@ -0,0 +1,214 @@
+/**
+ * Unit tests for Tool Needs Classification
+ */
+
+const { describe, it, beforeEach } = require('node:test');
+const assert = require('node:assert');
+const path = require('path');
+const ToolClassificationWhitelist = require('../src/tools/tool-classification-whitelist.js');
+const {
+  getLastUserMessage,
+  buildClassificationPrompt,
+  parseClassificationResult
+} = require('../src/tools/tool-classification-llm.js');
+
+describe('Tool Classification - Whitelist', () => {
+  let whitelist;
+
+  beforeEach(() => {
+    whitelist = new ToolClassificationWhitelist(
+      path.join(__dirname, '../config/tool-whitelist.json'),
+      { customCommands: [] }
+    );
+    whitelist.load();
+  });
+
+  it('should match exact pattern for needsTools', () => {
+    const result = whitelist.check('list all files');
+    assert.strictEqual(result.matched, true);
+    assert.strictEqual(result.needsTools, true);
+    assert.strictEqual(result.pattern, 'list all files');
+  });
+
+  it('should match exact pattern for noTools', () => {
+    const result = whitelist.check('hello');
+    assert.strictEqual(result.matched, true);
+    assert.strictEqual(result.needsTools, false);
+    assert.strictEqual(result.pattern, 'hello');
+  });
+
+  it('should match wildcard pattern', () => {
+    const result = whitelist.check('bd show 123');
+    assert.strictEqual(result.matched, true);
+    assert.strictEqual(result.needsTools, true);
+    assert.ok(result.pattern.includes('*'));
+  });
+
+  it('should be case insensitive', () => {
+    const result = whitelist.check('HELLO');
+    assert.strictEqual(result.matched, true);
+    assert.strictEqual(result.needsTools, false);
+  });
+
+  it('should normalize whitespace', () => {
+    const result = whitelist.check('list   all   files');
+    assert.strictEqual(result.matched, true);
+    assert.strictEqual(result.needsTools, true);
+  });
+
+  it('should return not matched for unknown patterns', () => {
+    const result = whitelist.check('something completely unknown');
+    assert.strictEqual(result.matched, false);
+  });
+
+  it('should cache results', () => {
+    const result1 = whitelist.check('hello');
+    const result2 = whitelist.check('hello');
+
+    assert.deepStrictEqual(result1, result2);
+    assert.strictEqual(whitelist.cache.size > 0, true);
+  });
+});
+
+describe('Tool Classification - LLM Helpers', () => {
+  it('should extract last user message from simple payload', () => {
+    const payload = {
+      messages: [
+        { role: 'user', content: 'first message' },
+        { role: 'assistant', content: 'response' },
+        { role: 'user', content: 'last message' }
+      ]
+    };
+
+    const message = getLastUserMessage(payload);
+    assert.strictEqual(message, 'last message');
+  });
+
+  it('should extract text from content blocks', () => {
+    const payload = {
+      messages: [
+        {
+          role: 'user',
+          content: [
+            { type: 'text', text: 'hello' },
+            { type: 'image', source: 'data:...' },
+            { type: 'text', text: 'world' }
+          ]
+        }
+      ]
+    };
+
+    const message = getLastUserMessage(payload);
+    assert.strictEqual(message, 'hello\nworld');
+  });
+
+  it('should return empty string for no user message', () => {
+    const payload = {
+      messages: [
+        { role: 'assistant', content: 'only assistant message' }
+      ]
+    };
+
+    const message = getLastUserMessage(payload);
+    assert.strictEqual(message, '');
+  });
+
+  it('should build classification prompt', () => {
+    const prompt = buildClassificationPrompt('list all files');
+
+    assert.ok(prompt.includes('list all files'));
+    assert.ok(prompt.includes('needsTools'));
+    assert.ok(prompt.includes('JSON'));
+  });
+
+  it('should parse valid JSON response', () => {
+    const response = '{"needsTools": true, "reason": "requires file access"}';
+    const result = parseClassificationResult(response);
+
+    assert.strictEqual(result.needsTools, true);
+    assert.strictEqual(result.reason, 'requires file access');
+  });
+
+  it('should parse JSON in markdown code block', () => {
+    const response = '```json\n{"needsTools": false, "reason": "greeting"}\n```';
+    const result = parseClassificationResult(response);
+
+    assert.strictEqual(result.needsTools, false);
+    assert.strictEqual(result.reason, 'greeting');
+  });
+
+  it('should fallback parse when JSON is invalid', () => {
+    const response = 'This requires tools to complete';
+    const result = parseClassificationResult(response);
+
+    // Should default to true when uncertain
+    assert.strictEqual(typeof result.needsTools, 'boolean');
+    assert.strictEqual(typeof result.reason, 'string');
+  });
+
+  it('should handle malformed JSON gracefully', () => {
+    const response = '{"needsTools": "not a boolean"}';
+    const result = parseClassificationResult(response);
+
+    // Should fall back to heuristic
+    assert.strictEqual(typeof result.needsTools, 'boolean');
+  });
+});
+
+describe('Tool Classification - Integration', () => {
+  it('should prioritize needsTools patterns over noTools', () => {
+    const whitelist = new ToolClassificationWhitelist(
+      path.join(__dirname, '../config/tool-whitelist.json'),
+      { customCommands: [] }
+    );
+    whitelist.load();
+
+    // "git status" should match needsTools, not fall through to noTools
+    const result = whitelist.check('git status');
+    assert.strictEqual(result.matched, true);
+    assert.strictEqual(result.needsTools, true);
+  });
+
+  it('should handle edge cases in whitelist', () => {
+    const whitelist = new ToolClassificationWhitelist(
+      path.join(__dirname, '../config/tool-whitelist.json'),
+      { customCommands: [] }
+    );
+    whitelist.load();
+
+    // Empty message
+    const result1 = whitelist.check('');
+    assert.strictEqual(result1.matched, false);
+
+    // Just whitespace
+    const result2 = whitelist.check('   ');
+    assert.strictEqual(result2.matched, false);
+
+    // Special characters
+    const result3 = whitelist.check('hello!!!');
+    // Might not match due to ! - that's expected
+  });
+
+  it('should support custom shell commands', () => {
+    const whitelist = new ToolClassificationWhitelist(
+      path.join(__dirname, '../config/tool-whitelist.json'),
+      { customCommands: ['bd', 'mycommand'] }
+    );
+    whitelist.load();
+
+    // Custom command without args
+    const result1 = whitelist.check('bd');
+    assert.strictEqual(result1.matched, true);
+    assert.strictEqual(result1.needsTools, true);
+
+    // Custom command with args
+    const result2 = whitelist.check('bd show 123');
+    assert.strictEqual(result2.matched, true);
+    assert.strictEqual(result2.needsTools, true);
+
+    // Another custom command
+    const result3 = whitelist.check('mycommand --flag');
+    assert.strictEqual(result3.matched, true);
+    assert.strictEqual(result3.needsTools, true);
+  });
+});
diff --git a/test/tool-execution-provider.test.js b/test/tool-execution-provider.test.js
new file mode 100644
index 0000000..a0af28c
--- /dev/null
+++ b/test/tool-execution-provider.test.js
@@ -0,0 +1,141 @@
+const assert = require("assert");
+const { describe, it, beforeEach, afterEach } = require("node:test");
+
+describe("Tool Execution Provider Tests", () => {
+  let originalEnv;
+  let shouldEnableToolsForRequest;
+
+  beforeEach(() => {
+    // Clear require cache to reload config
+    delete require.cache[require.resolve("../src/config")];
+    delete require.cache[require.resolve("../src/orchestrator")];
+    delete require.cache[require.resolve("../src/clients/ollama-utils")];
+    originalEnv = { ...process.env };
+
+    // Set required environment variables for tests
+    process.env.DATABRICKS_API_BASE = "http://test.com";
+    process.env.DATABRICKS_API_KEY = "test-key";
+  });
+
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+
+  describe("shouldEnableToolsForRequest helper", () => {
+    it("should enable tools for non-ollama providers by default", () => {
+      process.env.MODEL_PROVIDER = "openrouter";
+      const config = require("../src/config");
+
+      // Import the orchestrator module to access the helper
+      // Note: Since shouldEnableToolsForRequest is not exported, we test via integration
+      // For now, we'll test the expected behavior through config
+
+      assert.strictEqual(config.modelProvider.type, "openrouter");
+    });
+
+    it("should enable tools when TOOL_EXECUTION_PROVIDER is configured for non-tool-capable model", () => {
+      process.env.MODEL_PROVIDER = "ollama";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+      process.env.OLLAMA_MODEL = "qwen3-coder-next";
+      process.env.TOOL_EXECUTION_PROVIDER = "openrouter";
+      process.env.TOOL_EXECUTION_MODEL = "deepseek/deepseek-chat";
+
+      const config = require("../src/config");
+
+      assert.strictEqual(config.modelProvider.type, "ollama");
+      assert.strictEqual(config.ollama.model, "qwen3-coder-next");
+      assert.strictEqual(config.toolExecutionProvider, "openrouter");
+      assert.strictEqual(config.toolExecutionModel, "deepseek/deepseek-chat");
+    });
+
+    it("should enable compare mode when TOOL_EXECUTION_COMPARE_MODE is true", () => {
+      process.env.MODEL_PROVIDER = "ollama";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+      process.env.OLLAMA_MODEL = "qwen3-coder-next";
+      process.env.TOOL_EXECUTION_PROVIDER = "openrouter";
+      process.env.TOOL_EXECUTION_MODEL = "deepseek/deepseek-chat";
+      process.env.TOOL_EXECUTION_COMPARE_MODE = "true";
+
+      const config = require("../src/config");
+
+      assert.strictEqual(config.toolExecutionCompareMode, true);
+    });
+  });
+
+  describe("Tool capability detection", () => {
+    it("should recognize qwen3 as tool-capable", () => {
+      const { modelNameSupportsTools } = require("../src/clients/ollama-utils");
+
+      assert.strictEqual(modelNameSupportsTools("qwen3-coder-next"), true);
+      assert.strictEqual(modelNameSupportsTools("qwen3"), true);
+    });
+
+    it("should recognize llama3.1 as tool-capable", () => {
+      const { modelNameSupportsTools } = require("../src/clients/ollama-utils");
+
+      assert.strictEqual(modelNameSupportsTools("llama3.1"), true);
+      assert.strictEqual(modelNameSupportsTools("llama3.1:8b"), true);
+    });
+
+    it("should recognize non-tool-capable models", () => {
+      const { modelNameSupportsTools } = require("../src/clients/ollama-utils");
+
+      // Example of a model that doesn't support tools
+      assert.strictEqual(modelNameSupportsTools("llama2"), false);
+      assert.strictEqual(modelNameSupportsTools("codellama"), false);
+    });
+  });
+
+  describe("Tool Execution Provider Configuration", () => {
+    it("should route tool calls to tool execution provider when configured", () => {
+      process.env.MODEL_PROVIDER = "ollama";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+      process.env.OLLAMA_MODEL = "qwen3-coder-next";
+      process.env.TOOL_EXECUTION_PROVIDER = "openrouter";
+      process.env.TOOL_EXECUTION_MODEL = "deepseek/deepseek-chat";
+
+      const config = require("../src/config");
+
+      // Verify configuration is set up correctly
+      assert.strictEqual(config.toolExecutionProvider, "openrouter");
+      assert.strictEqual(config.toolExecutionModel, "deepseek/deepseek-chat");
+
+      // Tool execution provider should be different from conversation provider
+      assert.notStrictEqual(config.toolExecutionProvider, config.modelProvider.type);
+    });
+
+    it("should not route when TOOL_EXECUTION_PROVIDER equals conversation provider", () => {
+      process.env.MODEL_PROVIDER = "openrouter";
+      process.env.TOOL_EXECUTION_PROVIDER = "openrouter";
+
+      const config = require("../src/config");
+
+      // When providers are the same, no routing should occur
+      assert.strictEqual(config.toolExecutionProvider, config.modelProvider.type);
+    });
+  });
+
+  describe("Integration: Tool enabling logic", () => {
+    it("should keep tools when TOOL_EXECUTION_PROVIDER configured", () => {
+      // This tests the fix: tools should NOT be removed when tool execution provider is configured
+      // even if the conversation model doesn't natively support tools
+
+      process.env.MODEL_PROVIDER = "ollama";
+      process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+      process.env.OLLAMA_MODEL = "some-non-tool-model";  // Hypothetical non-tool model
+      process.env.TOOL_EXECUTION_PROVIDER = "openrouter";
+      process.env.TOOL_EXECUTION_MODEL = "deepseek/deepseek-chat";
+
+      const config = require("../src/config");
+
+      // The fix ensures that:
+      // 1. toolExecutionProvider is configured
+      assert.ok(config.toolExecutionProvider);
+      // 2. It's different from the conversation provider
+      assert.notStrictEqual(config.toolExecutionProvider, config.modelProvider.type);
+
+      // Expected behavior: Tools should be enabled despite non-tool-capable conversation model
+      // This is validated by the shouldEnableToolsForRequest function in orchestrator
+    });
+  });
+});
diff --git a/test/tool-whitelist-coordination.test.js b/test/tool-whitelist-coordination.test.js
new file mode 100644
index 0000000..3aef193
--- /dev/null
+++ b/test/tool-whitelist-coordination.test.js
@@ -0,0 +1,209 @@
+const assert = require("assert");
+const { describe, it, beforeEach, afterEach } = require("node:test");
+const path = require("path");
+
+// Set environment variables BEFORE any modules are loaded
+process.env.DATABRICKS_API_BASE = "http://test.com";
+process.env.DATABRICKS_API_KEY = "test-key";
+process.env.MODEL_PROVIDER = "ollama";
+process.env.OLLAMA_ENDPOINT = "http://localhost:11434";
+process.env.OLLAMA_MODEL = "llama3.1";
+process.env.SMART_TOOL_SELECTION_ENABLED = "true";
+// Set absolute path to whitelist file
+process.env.TOOL_NEEDS_CLASSIFICATION_WHITELIST = path.join(__dirname, "../config/tool-whitelist.json");
+
+describe("Tool Whitelist Coordination Tests (Phase 1)", () => {
+  let originalEnv;
+
+  beforeEach(() => {
+    // Don't clear require cache - env vars are set at module load time
+    // Clearing cache would reload modules with wrong env vars
+    originalEnv = { ...process.env };
+  });
+
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+
+  describe("Phase 1: Whitelist-Smart Selection Coordination", () => {
+    it("should debug config whitelist path", () => {
+      const config = require("../src/config");
+      const fs = require("fs");
+      console.log("ENV VAR:", process.env.TOOL_NEEDS_CLASSIFICATION_WHITELIST);
+      console.log("Config whitelist path:", config.toolNeedsClassification.whitelist);
+      console.log("File exists at config path:", fs.existsSync(config.toolNeedsClassification.whitelist));
+      console.log("CWD:", process.cwd());
+    });
+
+    it("should store classification result in clean._toolNeedsClassification", async () => {
+      const { classifyToolNeeds } = require("../src/tools/tool-classification");
+      const config = require("../src/config");
+
+      const mockRequest = {
+        model: "test-model",
+        messages: [{ role: "user", content: "ls" }]
+      };
+
+      const classification = await classifyToolNeeds(
+        mockRequest,
+        config.toolNeedsClassification, // Pass config.toolNeedsClassification, not mockContext
+        null // invokeModel not needed for whitelist match
+      );
+
+      // Verify classification result structure
+      assert.ok(classification, "Classification result should exist");
+      assert.ok(typeof classification.needsTools === "boolean", "Should have needsTools boolean");
+      assert.ok(classification.source, "Should have source field");
+
+      if (classification.source === "whitelist") {
+        assert.strictEqual(classification.needsTools, true, "'ls' should match whitelist needsTools");
+        assert.ok(classification.reason, "Should have reason field");
+      }
+    });
+
+    it("should recognize 'ls' as whitelisted needsTools pattern", async () => {
+      const { classifyToolNeeds } = require("../src/tools/tool-classification");
+      const config = require("../src/config");
+
+      const mockRequest = {
+        model: "test-model",
+        messages: [{ role: "user", content: "ls" }]
+      };
+
+      const classification = await classifyToolNeeds(mockRequest, config.toolNeedsClassification, null);
+
+      // Accept both 'whitelist' (first time) and 'cache' (if cached from previous test)
+      assert.ok(["whitelist", "cache"].includes(classification.source), "Should match whitelist or cache");
+      assert.strictEqual(classification.needsTools, true, "'ls' should need tools");
+      assert.ok(classification.reason.includes("ls"), "Reason should mention 'ls'");
+    });
+
+    it("should recognize 'hello' as whitelisted noTools pattern", async () => {
+      const { classifyToolNeeds } = require("../src/tools/tool-classification");
+      const config = require("../src/config");
+
+      const mockRequest = {
+        model: "test-model",
+        messages: [{ role: "user", content: "hello" }]
+      };
+
+      const classification = await classifyToolNeeds(mockRequest, config.toolNeedsClassification, null);
+
+      assert.strictEqual(classification.source, "whitelist", "Should match whitelist");
+      assert.strictEqual(classification.needsTools, false, "'hello' should not need tools");
+      assert.ok(classification.reason.includes("hello"), "Reason should mention 'hello'");
+    });
+
+    it("should recognize 'git status' as whitelisted needsTools pattern", async () => {
+      const { classifyToolNeeds } = require("../src/tools/tool-classification");
+      const config = require("../src/config");
+
+      const mockRequest = {
+        model: "test-model",
+        messages: [{ role: "user", content: "git status" }]
+      };
+
+      const classification = await classifyToolNeeds(mockRequest, config.toolNeedsClassification, null);
+
+      assert.strictEqual(classification.source, "whitelist", "Should match whitelist");
+      assert.strictEqual(classification.needsTools, true, "'git status' should need tools");
+    });
+
+    it("should verify Smart Selection would classify 'ls' as conversational (the bug)", () => {
+      const { classifyRequestType } = require("../src/tools/smart-selection");
+
+      const result = classifyRequestType({ messages: [{ role: "user", content: "ls" }] });
+
+      // This demonstrates the bug: Smart Selection incorrectly classifies 'ls' as conversational
+      // because it's < 20 chars and has no TECHNICAL_KEYWORDS
+      assert.strictEqual(result.type, "conversational", "Smart Selection incorrectly sees 'ls' as conversational");
+      assert.ok(result.keywords.includes("short") || result.keywords.includes("non-technical"),
+        "Should detect as short non-technical");
+    });
+  });
+
+  describe("Integration: Expected behavior after Phase 1 fix", () => {
+    it("should document expected orchestrator behavior for 'ls' command", () => {
+      // This test documents the expected flow after Phase 1 implementation:
+      //
+      // 1. Tool Needs Classification runs first (line ~1214)
+      //    - Checks whitelist
+      //    - "ls" matches pattern → needsTools=true, source=whitelist
+      //    - Stores in clean._toolNeedsClassification
+      //
+      // 2. Smart Tool Selection check (line ~1243)
+      //    - Sees clean._toolNeedsClassification.source === 'whitelist'
+      //    - Sees clean._toolNeedsClassification.needsTools === true
+      //    - SKIPS Smart Selection (coordination)
+      //    - Logs: [WHITELIST_OVERRIDE] Whitelist match - skipping smart tool selection
+      //
+      // 3. Tools are kept (not filtered to 0)
+      //
+      // 4. Tool execution provider check passes
+      //
+      // Result: "ls" command now works correctly!
+
+      assert.ok(true, "Documentation test always passes");
+    });
+
+    it("should document expected orchestrator behavior for 'hello' command", () => {
+      // Expected flow after Phase 1:
+      //
+      // 1. Tool Needs Classification
+      //    - "hello" matches noTools pattern
+      //    - needsTools=false
+      //    - Tools REMOVED (line ~1217)
+      //
+      // 2. Smart Tool Selection
+      //    - Skipped (no tools to select from)
+      //
+      // Result: "hello" correctly has no tools
+
+      assert.ok(true, "Documentation test always passes");
+    });
+
+    it("should document expected orchestrator behavior for non-whitelisted request", () => {
+      // Expected flow after Phase 1:
+      //
+      // 1. Tool Needs Classification
+      //    - No whitelist match
+      //    - Falls back to LLM or default
+      //    - clean._toolNeedsClassification.source !== 'whitelist'
+      //
+      // 2. Smart Tool Selection
+      //    - Runs normally (enters else block at line ~1255)
+      //    - Classifies request type
+      //    - Selects appropriate tools
+      //
+      // Result: Smart Selection still works for edge cases
+
+      assert.ok(true, "Documentation test always passes");
+    });
+  });
+
+  describe("Verification: Check whitelist patterns", () => {
+    it("should load tool-whitelist.json correctly", () => {
+      const fs = require("fs");
+      const path = require("path");
+
+      const whitelistPath = path.join(__dirname, "../config/tool-whitelist.json");
+      assert.ok(fs.existsSync(whitelistPath), "tool-whitelist.json should exist");
+
+      const whitelist = JSON.parse(fs.readFileSync(whitelistPath, "utf8"));
+
+      assert.ok(Array.isArray(whitelist.needsTools), "Should have needsTools array");
+      assert.ok(Array.isArray(whitelist.noTools), "Should have noTools array");
+
+      assert.ok(whitelist.needsTools.length > 0, "needsTools should not be empty");
+      assert.ok(whitelist.noTools.length > 0, "noTools should not be empty");
+
+      // Verify key patterns exist
+      assert.ok(whitelist.needsTools.includes("ls"), "Should include 'ls'");
+      assert.ok(whitelist.needsTools.includes("pwd"), "Should include 'pwd'");
+      assert.ok(whitelist.needsTools.some(p => p.includes("git")), "Should include git patterns");
+
+      assert.ok(whitelist.noTools.includes("hello"), "Should include 'hello'");
+      assert.ok(whitelist.noTools.some(p => p.includes("explain")), "Should include explain patterns");
+    });
+  });
+});
diff --git a/tools/progress-listener.py b/tools/progress-listener.py
new file mode 100644
index 0000000..572ae68
--- /dev/null
+++ b/tools/progress-listener.py
@@ -0,0 +1,311 @@
+#!/usr/bin/env python3
+"""
+Progress Listener for Lynkr
+
+Connects to Lynkr's WebSocket server and displays real-time progress updates
+during agent execution.
+
+Usage:
+    python tools/progress-listener.py [--host HOST] [--port PORT]
+"""
+
+import argparse
+import asyncio
+import json
+import sys
+import time
+from datetime import datetime
+
+try:
+    import websockets
+except ImportError:
+    print("Error: websockets library is required.", file=sys.stderr)
+    print("Install with: pip install websockets", file=sys.stderr)
+    sys.exit(1)
+
+
+# ANSI color codes for better formatting
+class Colors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+
+# Track agent hierarchy and timing
+class AgentTracker:
+    def __init__(self):
+        self.agents = {}  # agentId -> {parentId, startTime, depth}
+        self.parent_children = {}  # parentId -> [childIds]
+
+    def start_agent(self, agent_id, parent_id=None):
+        depth = 0
+        if parent_id and parent_id in self.agents:
+            depth = self.agents[parent_id]['depth'] + 1
+
+        self.agents[agent_id] = {
+            'parentId': parent_id,
+            'startTime': time.time(),
+            'depth': depth
+        }
+
+        if parent_id:
+            if parent_id not in self.parent_children:
+                self.parent_children[parent_id] = []
+            self.parent_children[parent_id].append(agent_id)
+
+    def get_agent_prefix(self, agent_id):
+        """Get a formatted prefix showing agent hierarchy"""
+        if agent_id not in self.agents:
+            return "[Agent]"
+
+        agent = self.agents[agent_id]
+        depth = agent['depth']
+        parent_id = agent['parentId']
+
+        # Build hierarchy string like [Agent #1] or [Agent #2 → #1]
+        if parent_id:
+            parent_num = self._get_agent_num(parent_id)
+            child_num = self._get_agent_num(agent_id)
+            return f"[Agent #{child_num} → #{parent_num}]"
+        else:
+            agent_num = self._get_agent_num(agent_id)
+            return f"[Agent #{agent_num}]"
+
+    def _get_agent_num(self, agent_id):
+        """Get a simple number for an agent (based on creation order)"""
+        sorted_agents = sorted(self.agents.keys(), key=lambda a: self.agents[a]['startTime'])
+        try:
+            return sorted_agents.index(agent_id) + 1
+        except ValueError:
+            return 0
+
+    def get_indent(self, agent_id):
+        """Get indentation for nested agents"""
+        if agent_id not in self.agents:
+            return ""
+        depth = self.agents[agent_id]['depth']
+        return "  " * depth
+
+
+# Global agent tracker
+agent_tracker = AgentTracker()
+
+
+def format_timestamp(timestamp_ms):
+    """Format millisecond timestamp to HH:MM:SS"""
+    return datetime.fromtimestamp(timestamp_ms / 1000).strftime('%H:%M:%S')
+
+
+def format_duration(ms):
+    """Format milliseconds to human-readable duration"""
+    if ms < 1000:
+        return f"{ms}ms"
+    elif ms < 60000:
+        return f"{ms / 1000:.1f}s"
+    else:
+        return f"{ms / 60000:.1f}m"
+
+
+def format_event(event):
+    """Format a progress event for display"""
+    event_type = event.get('type', 'unknown')
+    timestamp = format_timestamp(event.get('timestamp', time.time() * 1000))
+    agent_id = event.get('agentId')
+    parent_agent_id = event.get('parentAgentId')
+
+    output = []
+    indent = ""
+    agent_prefix = ""
+
+    # Track and format agent information
+    if event_type == 'agent_loop_started' and agent_id:
+        agent_tracker.start_agent(agent_id, parent_agent_id)
+        agent_prefix = agent_tracker.get_agent_prefix(agent_id)
+        indent = agent_tracker.get_indent(agent_id)
+    elif agent_id:
+        agent_prefix = agent_tracker.get_agent_prefix(agent_id)
+        indent = agent_tracker.get_indent(agent_id)
+
+    if event_type == 'connected':
+        output.append(f"{Colors.OKGREEN}[{timestamp}] Connected to Lynkr progress server{Colors.ENDC}")
+        output.append(f"  Client ID: {event.get('clientId')}")
+        server_info = event.get('serverInfo', {})
+        if server_info:
+            output.append(f"  Features: {', '.join(server_info.get('features', []))}")
+    
+    elif event_type == 'ready':
+        output.append(f"{Colors.OKCYAN}[{timestamp}] {event.get('message', 'Ready')}{Colors.ENDC}")
+    
+    elif event_type == 'agent_loop_started':
+        output.append(f"{indent}{Colors.HEADER}[{timestamp}] {agent_prefix} {Colors.BOLD}Started{Colors.ENDC}")
+        output.append(f"{indent}  Model: {Colors.OKCYAN}{event.get('model')}{Colors.ENDC}")
+        output.append(f"{indent}  Provider: {event.get('providerType')}")
+        output.append(f"{indent}  Max steps: {event.get('maxSteps')}")
+        output.append(f"{indent}  Max duration: {format_duration(event.get('maxDurationMs', 0))}")
+    
+    elif event_type == 'agent_loop_step_started':
+        step = event.get('step', 0)
+        max_steps = event.get('maxSteps', 0)
+        progress_pct = event.get('progress', 0)
+        output.append(f"{indent}{Colors.OKBLUE}[{timestamp}] {agent_prefix} Step {Colors.BOLD}{step}/{max_steps}{Colors.ENDC} ({progress_pct}%)")
+    
+    elif event_type == 'model_invocation_started':
+        output.append(f"{indent}{Colors.OKCYAN}[{timestamp}] {agent_prefix} Calling model...{Colors.ENDC}")
+        output.append(f"{indent}  Model: {event.get('model')}")
+        output.append(f"{indent}  Provider: {event.get('providerType')}")
+        estimated = event.get('estimatedTokens')
+        if estimated:
+            output.append(f"{indent}  Estimated tokens: ~{estimated}")
+    
+    elif event_type == 'model_invocation_completed':
+        duration = event.get('durationMs', 0)
+        input_tokens = event.get('inputTokens', 0)
+        output_tokens = event.get('outputTokens', 0)
+        output.append(f"{indent}{Colors.OKGREEN}[{timestamp}] {agent_prefix} Model response received{Colors.ENDC}")
+        output.append(f"{indent}  Duration: {format_duration(duration)}")
+        output.append(f"{indent}  Tokens: {input_tokens} in → {output_tokens} out")
+    
+    elif event_type == 'tool_execution_started':
+        tool_name = event.get('toolName', 'unknown')
+        tool_id = event.get('toolId', '')
+        request_preview = event.get('requestPreview')
+        output.append(f"{indent}{Colors.WARNING}[{timestamp}] {agent_prefix} Executing tool: {Colors.BOLD}{tool_name}{Colors.ENDC}")
+        if request_preview:
+            output.append(f"{indent}  Request: {request_preview}")
+        if tool_id:
+            output.append(f"{indent}  ID: {tool_id}")
+    
+    elif event_type == 'tool_execution_completed':
+        tool_name = event.get('toolName', 'unknown')
+        ok = event.get('ok', True)
+        duration = event.get('durationMs', 0)
+        response_preview = event.get('responsePreview')
+        status = f"{Colors.OKGREEN}OK{Colors.ENDC}" if ok else f"{Colors.FAIL}FAILED{Colors.ENDC}"
+        output.append(f"{indent}{Colors.OKCYAN}[{timestamp}] {agent_prefix} Tool {tool_name}: {status}{Colors.ENDC}")
+        output.append(f"{indent}  Duration: {format_duration(duration)}")
+        if response_preview:
+            output.append(f"{indent}  Response: {response_preview}")
+    
+    elif event_type == 'agent_loop_completed':
+        duration = event.get('durationMs', 0)
+        steps = event.get('steps', 0)
+        tool_calls = event.get('toolCallsExecuted', 0)
+        reason = event.get('terminationReason', 'completion')
+        output.append(f"{indent}{Colors.OKGREEN}{Colors.BOLD}[{timestamp}] {agent_prefix} Completed{Colors.ENDC}")
+        output.append(f"{indent}  Duration: {format_duration(duration)}")
+        output.append(f"{indent}  Steps: {steps}")
+        output.append(f"{indent}  Tool calls: {tool_calls}")
+        output.append(f"{indent}  Reason: {reason}")
+    
+    elif event_type == 'error':
+        error_type = event.get('errorType', 'unknown')
+        message = event.get('errorMessage', 'No message')
+        output.append(f"{Colors.FAIL}{Colors.BOLD}[{timestamp}] ERROR: {error_type}{Colors.ENDC}")
+        output.append(f"  {message}")
+    
+    elif event_type == 'server:shutdown':
+        output.append(f"{Colors.WARNING}[{timestamp}] Server shutting down{Colors.ENDC}")
+    
+    else:
+        # Unknown event type - just display the raw data
+        output.append(f"{Colors.OKCYAN}[{timestamp}] {event_type}{Colors.ENDC}")
+        for key, value in event.items():
+            if key not in ['type', 'timestamp']:
+                output.append(f"  {key}: {value}")
+    
+    return '\n'.join(output)
+
+
+async def listen_progress(host, port):
+    """Connect to Lynkr progress WebSocket server and listen for events"""
+    uri = f"ws://{host}:{port}"
+    print(f"{Colors.BOLD}Connecting to Lynkr progress server at {uri}...{Colors.ENDC}")
+    
+    try:
+        async with websockets.connect(uri) as websocket:
+            print(f"{Colors.OKGREEN}Connected! Waiting for progress updates...{Colors.ENDC}\n")
+            
+            while True:
+                message = await websocket.recv()
+                try:
+                    event = json.loads(message)
+                    print(format_event(event))
+                    print()  # Empty line between events
+                    sys.stdout.flush()
+                except json.JSONDecodeError as e:
+                    print(f"{Colors.FAIL}[ERROR] Failed to parse message: {e}{Colors.ENDC}")
+                    print(f"  Raw message: {message[:200]}")
+                    sys.stdout.flush()
+                    
+    except websockets.exceptions.ConnectionClosed as e:
+        print(f"\n{Colors.WARNING}Connection closed: {e}{Colors.ENDC}")
+    except websockets.exceptions.WebSocketException as e:
+        print(f"\n{Colors.FAIL}WebSocket error: {e}{Colors.ENDC}", file=sys.stderr)
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print(f"\n{Colors.OKCYAN}Stopped by user{Colors.ENDC}")
+    except ConnectionRefusedError:
+        print(f"\n{Colors.FAIL}Connection refused. Is Lynkr running with PROGRESS_ENABLED=true?{Colors.ENDC}", file=sys.stderr)
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n{Colors.FAIL}Unexpected error: {e}{Colors.ENDC}", file=sys.stderr)
+        sys.exit(1)
+
+
+def main():
+    """Main entry point"""
+    parser = argparse.ArgumentParser(
+        description='Lynkr Progress Listener - Display real-time agent execution progress',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python tools/progress-listener.py
+  python tools/progress-listener.py --host localhost --port 8765
+  python tools/progress-listener.py --host 192.168.1.100
+
+Environment variables:
+  LYNKR_PROGRESS_HOST    WebSocket server host (default: localhost)
+  LYNKR_PROGRESS_PORT    WebSocket server port (default: 8765)
+        """
+    )
+    parser.add_argument(
+        '--host',
+        default=None,
+        help='WebSocket server host (default: from LYNKR_PROGRESS_HOST or localhost)'
+    )
+    parser.add_argument(
+        '--port',
+        type=int,
+        default=None,
+        help='WebSocket server port (default: from LYNKR_PROGRESS_PORT or 8765)'
+    )
+    parser.add_argument(
+        '--no-color',
+        action='store_true',
+        help='Disable colored output'
+    )
+    
+    args = parser.parse_args()
+    
+    # Read from environment if not specified
+    host = args.host or os.getenv('LYNKR_PROGRESS_HOST', 'localhost')
+    port = args.port or int(os.getenv('LYNKR_PROGRESS_PORT', '8765'))
+    
+    # Disable colors if requested
+    if args.no_color or not sys.stdout.isatty():
+        for attr in dir(Colors):
+            if not attr.startswith('_'):
+                setattr(Colors, attr, '')
+    
+    asyncio.run(listen_progress(host, port))
+
+
+if __name__ == '__main__':
+    import os
+    main()
\ No newline at end of file