diff --git a/README.md b/README.md
index 9a51f4e..05e70bb 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,8 @@ Now when your agent needs deep codebase analysis, large-scale refactoring, or co
 
 - **4-Agent Swarm** — Grok 4.20 coordinates multiple agents for deeper analysis
 - **Massive Context** — ~2M token window, handles entire codebases
+- **Native MCP Server** — Grok appears as a first-class tool in Claude Code
+- **Multi-Turn Sessions** — Stateful conversations with Grok across multiple calls
 - **5 Modes** — Analyze, Refactor, Code, Reason, Orchestrate
 - **Tool Passthrough** — Pass OpenAI-format tool schemas for function calling
 - **File Writing** — Write annotated code blocks directly to disk
@@ -258,26 +260,36 @@ claude mcp add morphllm
 
 ## Architecture
 
+### Claude Code (MCP — Preferred)
+
+```
+Claude Code
+    │ native MCP tool calls
+    ▼
+grok_server.py (MCP stdio server)
+    │ manages sessions, dispatches tools
+    ▼
+grok_bridge.py (Python/OpenAI SDK)
+    │
+    ▼
+OpenRouter API → xAI Grok 4.20 Multi-Agent
+```
+
+MCP tools: `grok_query`, `grok_session_start`, `grok_session_continue`, `grok_agent`
+
+### OpenClaw (Plugin)
+
 ```
-OpenClaw Agent / Claude Code
-         │
-         ▼
-grok_swarm tool / skill
-         │
-         ▼
-index.js (Node wrapper)
-         │
-         ▼
+OpenClaw Agent
+    │ tool call
+    ▼
+index.ts (OpenClaw plugin)
+    │ spawns subprocess
+    ▼
 grok_bridge.py (Python/OpenAI SDK)
-         │
-         ▼
-OpenRouter API
-         │
-         ▼
-xAI Grok 4.20 Multi-Agent Beta
-         │
-         ▼
-Response
+    │
+    ▼
+OpenRouter API → xAI Grok 4.20 Multi-Agent
 ```
 
 ---
diff --git a/bin/grok-swarm-mcp.js b/bin/grok-swarm-mcp.js
new file mode 100755
index 0000000..8eae66b
--- /dev/null
+++ b/bin/grok-swarm-mcp.js
@@ -0,0 +1,45 @@
+#!/usr/bin/env node
+/**
+ * Cross-platform wrapper for the Grok Swarm MCP server.
+ * Spawns python3 (or python on Windows) to run grok_server.py,
+ * forwarding stdin/stdout for the MCP stdio transport.
+ */
+const { spawn } = require('child_process');
+const path = require('path');
+
+const serverScript = path.join(__dirname, '..', 'src', 'mcp', 'grok_server.py');
+
+// Try python3 first, fall back to python (common on Windows)
+const pythonCandidates = process.platform === 'win32'
+  ? ['python3', 'python']
+  : ['python3'];
+
+function trySpawn(candidates) {
+  const cmd = candidates[0];
+  if (!cmd) {
+    process.stderr.write(
+      'ERROR: Python 3 not found. Install Python 3.8+ and ensure python3 (or python) is on PATH.\n'
+    );
+    process.exit(1);
+  }
+
+  const child = spawn(cmd, [serverScript], {
+    stdio: ['inherit', 'inherit', 'inherit'],
+  });
+
+  child.on('error', (err) => {
+    if (err.code === 'ENOENT' && candidates.length > 1) {
+      // python3 not found, try next candidate
+      trySpawn(candidates.slice(1));
+    } else {
+      process.stderr.write(`ERROR: Failed to start MCP server: ${err.message}\n`);
+      process.exit(1);
+    }
+  });
+
+  child.on('exit', (code) => {
+    process.exit(code || 0);
+  });
+}
+
+trySpawn(pythonCandidates);
diff --git a/package.json b/package.json
index 3f96ef3..f0486ee 100644
--- a/package.json
+++ b/package.json
@@ -28,9 +28,11 @@
   ],
   "main": "dist/index.js",
   "bin": {
-    "grok-swarm": "dist/index.js"
+    "grok-swarm": "dist/index.js",
+    "grok-swarm-mcp": "bin/grok-swarm-mcp.js"
   },
   "files": [
+    "bin/",
     "dist/",
     "src/",
     "README.md",
diff --git a/platforms/claude/.claude-plugin/setup.sh b/platforms/claude/.claude-plugin/setup.sh
index 999e320..f22e23c 100755
--- a/platforms/claude/.claude-plugin/setup.sh
+++ b/platforms/claude/.claude-plugin/setup.sh
@@ -51,8 +51,6 @@ if [ -f "$OAUTH_SCRIPT" ]; then
     echo
     if python3 "$OAUTH_SCRIPT"; then
         log "Setup complete!"
-        echo
-        echo "Run '/grok-swarm:analyze Find bugs in this codebase' to get started."
     else
         error "OAuth setup failed. Set OPENROUTER_API_KEY env var as a fallback."
         exit 1
@@ -65,3 +63,20 @@ else
     echo "  Get a key at: https://openrouter.ai/keys"
     exit 1
 fi
+
+# Register MCP server for native tool integration
+MCP_SERVER="${PLUGIN_ROOT}/src/mcp/grok_server.py"
+if [ -f "$MCP_SERVER" ] && command -v claude >/dev/null 2>&1; then
+    echo
+    log "Registering Grok Swarm MCP server..."
+    if claude mcp add grok-swarm -- python3 "$MCP_SERVER" 2>/dev/null; then
+        log "MCP server registered — grok_query, grok_session_start/continue, grok_agent tools available"
+    else
+        warn "MCP registration failed (non-fatal). Register manually:"
+        warn "  claude mcp add grok-swarm -- python3 $MCP_SERVER"
+    fi
+fi
+
+echo
+echo "Run '/grok-swarm:analyze Find bugs in this codebase' to get started."
+echo "Or use the native MCP tools: grok_query, grok_session_start, grok_session_continue"
diff --git a/platforms/claude/commands/set-key.md b/platforms/claude/commands/set-key.md
new file mode 100644
index 0000000..419655a
--- /dev/null
+++ b/platforms/claude/commands/set-key.md
@@ -0,0 +1,86 @@
+---
+name: set-key
+description: Manually save an OpenRouter API key to config. Last-resort fallback when OAuth fails. WARNING - key will be visible in conversation context.
+argument-hint: YOUR_API_KEY
+allowed-tools:
+  - Bash
+---
+
+# Set Grok Swarm API Key (Manual Fallback)
+
+**Use this only when the OAuth flow (`/grok-swarm:setup`) has failed repeatedly.**
+
+## Step 1 — Display security warning
+
+Before doing anything else, output this warning verbatim to the user:
+
+---
+
+> **WARNING: Security Risk**
+>
+> You are about to paste your OpenRouter API key directly into this conversation.
+> This means your API key **will be visible in the LLM context window** and may be
+> stored in conversation logs.
+>
+> **Only proceed if you understand and accept these risks.** The recommended method
+> is `/grok-swarm:setup` which uses OAuth so your key never enters the conversation.
+>
+> Type **yes** to confirm you accept this risk, or anything else to cancel.
+
+---
+
+Wait for the user's response. If they do not reply with exactly `yes` (case-insensitive), stop here and remind them to use `/grok-swarm:setup` instead.
+
+## Step 2 — Validate the key argument
+
+The API key is the argument passed to this command (e.g. `/grok-swarm:set-key sk-or-v1-...`).
+
+If no argument was provided, ask the user to re-run with their key:
+```
+/grok-swarm:set-key YOUR_API_KEY_HERE
+```
+Do not ask them to type the key in a follow-up message.
+
+## Step 3 — Save the key
+
+Run (pass the key via environment variable so it is never interpolated into shell or Python source):
+```bash
+mkdir -p ~/.config/grok-swarm
+GROK_SET_KEY="<the key argument>" python3 -c "
+import json, os
+from pathlib import Path
+
+api_key = os.environ['GROK_SET_KEY']
+config_file = Path.home() / '.config' / 'grok-swarm' / 'config.json'
+existing = {}
+if config_file.exists():
+    try:
+        existing = json.loads(config_file.read_text())
+    except Exception:
+        pass
+existing['api_key'] = api_key
+json_bytes = (json.dumps(existing, indent=2) + '\n').encode()
+tmp = str(config_file.parent / f'.config.json.tmp.{os.getpid()}')
+fd = os.open(tmp, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600)
+try:
+    os.write(fd, json_bytes)
+    os.fsync(fd)
+finally:
+    os.close(fd)
+os.replace(tmp, str(config_file))
+print('saved')
+"
+```
+
+## Step 4 — Confirm success
+
+Tell the user:
+- Key saved to `~/.config/grok-swarm/config.json` (permissions: 600)
+- Show only the first 8 characters: `sk-or-v1-...` → `sk-or-v1-` + `[redacted]`
+- Suggest running `/grok-swarm:analyze Hello world` to verify the key works
+
+## Notes
+
+- This command exists as a last resort. Always prefer `/grok-swarm:setup` (OAuth flow).
+- The key is saved with file permissions 600 (owner read/write only).
+- If you need to rotate the key later, run this command again or re-run `/grok-swarm:setup`.
diff --git a/platforms/claude/commands/setup.md b/platforms/claude/commands/setup.md
index b5ff7a8..9887c77 100644
--- a/platforms/claude/commands/setup.md
+++ b/platforms/claude/commands/setup.md
@@ -55,13 +55,20 @@ else
   OAUTH_PATH="$(find /usr /usr/local ~/.local -name 'oauth_setup.py' 2>/dev/null | head -1)"
   [ -n "$OAUTH_PATH" ] && PLUGIN_ROOT="$(cd "$(dirname "$OAUTH_PATH")/../.." 2>/dev/null && pwd)"
 fi
-timeout 240s python3 "$PLUGIN_ROOT/src/bridge/oauth_setup.py"
+# timeout is not available on macOS without Homebrew coreutils (gtimeout).
+# The script has an internal 180s timeout, so the outer wrapper is best-effort only.
+TIMEOUT_CMD=$(command -v gtimeout 2>/dev/null || command -v timeout 2>/dev/null || true)
+if [ -n "$TIMEOUT_CMD" ]; then
+  "$TIMEOUT_CMD" 240s python3 "$PLUGIN_ROOT/src/bridge/oauth_setup.py"
+else
+  python3 "$PLUGIN_ROOT/src/bridge/oauth_setup.py"
+fi
 ```
 
-**Note**: The `timeout 240s` wrapper ensures the command terminates if the OAuth
-flow exceeds 240 seconds. The script itself has an internal OAUTH_TIMEOUT_SECS
-(180s) for the callback phase plus roughly 30s for token exchange, so the 240s
-outer limit provides a safe margin.
+**Note**: The wrapper tries `gtimeout` (macOS/Homebrew), then `timeout` (Linux)
+to enforce a 240s outer limit. If neither is available (e.g. stock macOS), the
+script runs without an outer wrapper — its internal `OAUTH_TIMEOUT_SECS` (180s)
+for the callback phase plus ~30s for token exchange still prevents hangs.
 
 The script will:
 1. Print an authorization URL
diff --git a/platforms/claude/skills/grok-swarm/SKILL.md b/platforms/claude/skills/grok-swarm/SKILL.md
index 071ca6d..630b7b5 100644
--- a/platforms/claude/skills/grok-swarm/SKILL.md
+++ b/platforms/claude/skills/grok-swarm/SKILL.md
@@ -50,6 +50,27 @@ This stores your API key in `~/.claude/grok-swarm.local.md` (plugin settings pat
 /grok-swarm:reason Compare microservices vs monolith for this project
 ```
 
+## MCP Tools (Native Integration)
+
+When installed via the plugin system or `claude mcp add`, Grok Swarm registers as a native MCP server. These tools are available directly — no slash commands needed:
+
+| Tool | Description |
+|------|-------------|
+| `grok_query` | Stateless single call — analyze, refactor, code, or reason |
+| `grok_session_start` | Begin a multi-turn conversation with Grok |
+| `grok_session_continue` | Continue an existing session (Grok remembers history) |
+| `grok_agent` | Run the autonomous agent loop (discover → modify → verify) |
+
+### Multi-Turn Sessions
+
+```
+grok_session_start(mode="analyze", files=["src/auth.py"])
+→ { session_id: "abc123" }
+
+grok_session_continue(session_id="abc123", message="What about password hashing?")
+→ Grok remembers the previous analysis
+```
+
 ## First-Time Setup
 
 Grok Swarm uses a PKCE OAuth flow to obtain your OpenRouter API key. **Your key never passes through the LLM context window.**
diff --git a/platforms/claude/skills/grok-swarm/SKILL_AGENT.md b/platforms/claude/skills/grok-swarm/SKILL_AGENT.md
index 7d749ab..00309cf 100644
--- a/platforms/claude/skills/grok-swarm/SKILL_AGENT.md
+++ b/platforms/claude/skills/grok-swarm/SKILL_AGENT.md
@@ -42,18 +42,19 @@ Grok has a **2M token context window**. Claude Code typically has **200K-1M toke
 ┌─────────────────────────────────────────────────────────────┐
 │ Claude Code (200K-1M context)                                │
 │ - Orchestrates tasks                                          │
-│ - Delegates to Grok                                           │
+│ - Calls Grok via native MCP tools                            │
 │ - Synthesizes results                                         │
 └─────────────────────┬───────────────────────────────────────┘
-                      │ CLI invocation (small prompt)
+                      │ MCP tool call (native)
                       ▼
 ┌─────────────────────────────────────────────────────────────┐
-│ Grok Bridge (local process)                                   │
-│ - Reads files directly from disk                              │
-│ - Sends to Grok API (2M context)                             │
-│ - Writes results to disk                                      │
+│ Grok MCP Server (grok_server.py)                              │
+│ - grok_query: stateless single call                          │
+│ - grok_session_start/continue: multi-turn sessions           │
+│ - grok_agent: autonomous agent loop                          │
+│ - Reads files from disk, manages sessions in-memory          │
 └─────────────────────┬───────────────────────────────────────┘
-                      │ API call
+                      │ OpenAI SDK → OpenRouter API
                       ▼
 ┌─────────────────────────────────────────────────────────────┐
 │ Grok 4.20 (2M token context)                                 │
@@ -62,11 +63,22 @@ Grok has a **2M token context window**. Claude Code typically has **200K-1M toke
 └─────────────────────────────────────────────────────────────┘
 ```
 
-**Key insight:** File content goes `disk → bridge → API → Grok`. It does NOT pass through Claude's context. Claude only receives the final response text.
+**Key insight:** File content goes `disk → MCP server → API → Grok`. It does NOT pass through Claude's context. Claude only receives the final response text.
 
 ## Invocation Pattern
 
-Grok Swarm is **not a native Claude sub-agent** — it's an external API. Use it via skill invocation:
+### Preferred: MCP Tools (Native)
+
+When the MCP server is registered, Grok tools appear as native tools — call them directly:
+
+```
+grok_query(prompt="Find bugs in auth module", mode="analyze", files=["src/auth.py"])
+grok_session_start(mode="analyze", files=["src/auth.py"])
+grok_session_continue(session_id="abc123", message="What about the password hashing?")
+grok_agent(task="Refactor auth module", target="./src/auth", apply=true)
+```
+
+### Alternative: Skill Commands
 
 ```
 /grok-swarm:reason <task>
@@ -75,7 +87,7 @@ Grok Swarm is **not a native Claude sub-agent** — it's an external API. Use it
 /grok-swarm:refactor <task>
 ```
 
-Or directly via CLI:
+### Alternative: Direct CLI
 ```bash
 python3 src/bridge/grok_bridge.py --mode <mode> --prompt "<task>" [options]
 ```
@@ -141,35 +153,34 @@ When using `--write-files --output-dir /path`, Grok writes directly to disk. The
 2. `// FILE: path/to/file.py` inside block
 3. `# filename.py` comment header
 
-## Critical Limitations
+## Multi-Turn Sessions
 
-### 1. ⚠️ STATELESS — No Multi-turn Sessions
-Each invocation is **independent**. Grok has no memory of previous calls.
+The MCP server supports **stateful multi-turn sessions** — Grok remembers previous messages:
 
-**Context must be explicitly provided each time:**
 ```
-# BAD — Grok won't remember
-"Continue the security audit from before"
+# Start a session
+grok_session_start(mode="analyze", files=["src/auth.py"])
+→ returns session_id: "abc123"
 
-/# GOOD — Include all context explicitly
-"Continue security audit. Previous findings: [paste findings]. Now check data validation."
+# Continue the conversation — Grok remembers
+grok_session_continue(session_id="abc123", message="What about the password hashing?")
+grok_session_continue(session_id="abc123", message="Suggest fixes for the issues found")
 ```
 
-**See Issue #30** for planned stateful session support.
+Sessions have:
+- **30-minute TTL** — auto-expire after inactivity
+- **Token budget tracking** — warns when approaching cost limits
+- **Max 10 concurrent sessions** — oldest evicted when full
 
-### 2. ⚠️ NO Streaming — Single Response Only
-The bridge waits for full response before returning. No real-time updates.
+## Limitations
 
-### 3. ⚠️ NO Native Agent Pool
-Grok cannot be registered as a Claude sub-agent. Orchestration is manual:
-- Run Grok in background: `python3 grok_bridge.py [args] &`
-- Continue Claude work independently
-- Collect Grok results when needed
+### 1. ⚠️ NO Streaming — Single Response Only
+The bridge waits for full response before returning. No real-time updates.
 
-### 4. ⚠️ NO Tool Passthrough (Currently)
-Built-in Grok tools (web search, code execution) are **not enabled** by default. The bridge uses Grok for reasoning only, not tool calling.
+### 2. ⚠️ NO Tool Passthrough (Currently)
+Built-in Grok tools (web search, code execution) are **not enabled** by default. Sub-agent tools (read_file, write_file, search_code) are planned for Phase 3.
 
-### 5. ⚠️ Rate Limiting
+### 3. ⚠️ Rate Limiting
 OpenRouter may rate limit. If seeing errors, add delays between calls.
 
 ## Safe Multi-Agent Coordination Patterns
diff --git a/scripts/postinstall.js b/scripts/postinstall.js
index f287512..227bc99 100644
--- a/scripts/postinstall.js
+++ b/scripts/postinstall.js
@@ -24,7 +24,12 @@ if (hasEnvKey) {
 } else {
   console.log('⚠ No API key configured yet');
   console.log('');
-  console.log('Run ./scripts/setup.sh to configure your OpenRouter API key');
+  if (process.platform === 'win32') {
+    console.log('Run:  python3 scripts/oauth_setup.py');
+    console.log('  or: set OPENROUTER_API_KEY=sk-or-v1-...');
+  } else {
+    console.log('Run ./scripts/setup.sh to configure your OpenRouter API key');
+  }
   console.log('Get a key at: https://openrouter.ai/keys');
 }
 
diff --git a/src/bridge/grok_bridge.py b/src/bridge/grok_bridge.py
index 23671a3..c0eaaea 100644
--- a/src/bridge/grok_bridge.py
+++ b/src/bridge/grok_bridge.py
@@ -318,6 +318,77 @@ def detect_high_thinking(prompt):
     return any(phrase in lower for phrase in HIGH_THINKING_PHRASES)
 
 
+def _get_client(api_key=None, timeout=120):
+    """Create an OpenAI client configured for OpenRouter."""
+    key = api_key or get_api_key()
+    if not key:
+        raise RuntimeError(
+            "No OpenRouter API key found. "
+            "Tried: OPENROUTER_API_KEY env, ~/.config/grok-swarm/config.json, "
+            "~/.claude/grok-swarm.local.md, OpenClaw auth profiles. "
+            "Run: python3 src/bridge/oauth_setup.py  or  /grok-swarm:setup"
+        )
+    return OpenAI(base_url=OPENROUTER_BASE, api_key=key, timeout=timeout)
+
+
+def call_grok_with_messages(messages, tools=None, timeout=120, thinking="low", mode="reason"):
+    """
+    Call Grok with a pre-built message list.
+
+    This is the low-level entry point used by sessions (multi-turn) and by
+    call_grok() for single-turn requests.
+
+    Args:
+        messages: List of OpenAI-format message dicts (system, user, assistant, tool).
+        tools: Optional list of OpenAI-format tool definitions.
+        timeout: API timeout in seconds.
+        thinking: "low" (4 agents) or "high" (16 agents).
+        mode: Task mode (for usage logging).
+
+    Returns:
+        The raw ChatCompletion response object from the OpenAI SDK.
+
+    Raises:
+        RuntimeError: If no API key is found.
+        Exception: Propagated from the OpenAI SDK on API errors.
+    """
+    client = _get_client(timeout=timeout)
+
+    kwargs = {
+        "model": MODEL_ID,
+        "messages": messages,
+        "max_tokens": 16384,
+        "temperature": 0.3,
+        "extra_body": {"agent_count": AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"])},
+    }
+
+    if tools:
+        kwargs["tools"] = tools
+
+    start = time.time()
+    response = client.chat.completions.create(**kwargs)
+    elapsed = time.time() - start
+
+    # Log usage
+    if hasattr(response, "usage") and response.usage:
+        u = response.usage
+        agent_count = AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"])
+        print(
+            f"USAGE: mode={mode} thinking={thinking} agents={agent_count} "
+            f"prompt={u.prompt_tokens} completion={u.completion_tokens} "
+            f"total={u.total_tokens} time={elapsed:.1f}s",
+            file=sys.stderr,
+        )
+        # Record to persistent usage tracker (best-effort)
+        try:
+            from usage_tracker import record_usage
+            record_usage(mode, thinking, u.prompt_tokens, u.completion_tokens, u.total_tokens, elapsed)
+        except Exception as exc:
+            print(f"DEBUG: Usage tracking failed: {exc}", file=sys.stderr)
+
+    return response
+
+
 def call_grok(prompt, mode="reason", context="", system_override=None, tools=None, timeout=120, thinking="low"):
     """Make the API call to Grok 4.20 Multi-Agent Beta."""
     api_key = get_api_key()
@@ -344,56 +415,31 @@ def call_grok(prompt, mode="reason", context="", system_override=None, tools=Non
     if context:
         system_content += f"\n\n## Codebase Context\n{context}"
 
-    client = OpenAI(
-        base_url=OPENROUTER_BASE,
-        api_key=api_key,
-        timeout=timeout,
-    )
-
     messages = [
         {"role": "system", "content": system_content},
         {"role": "user", "content": prompt},
     ]
 
-    kwargs = {
-        "model": MODEL_ID,
-        "messages": messages,
-        "max_tokens": 16384,
-        "temperature": 0.3,
-        "extra_body": {"agent_count": AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"])},
-    }
-
-    if tools:
-        kwargs["tools"] = tools
-
-    start = time.time()
-
     try:
-        response = client.chat.completions.create(**kwargs)
+        response = call_grok_with_messages(
+            messages=messages, tools=tools, timeout=timeout, thinking=thinking, mode=mode,
+        )
+    except RuntimeError as e:
+        print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
     except Exception as e:
-        elapsed = time.time() - start
-        print(f"ERROR after {elapsed:.1f}s: {e}", file=sys.stderr)
+        print(f"ERROR: {e}", file=sys.stderr)
         sys.exit(1)
 
-    elapsed = time.time() - start
-
     if not response.choices:
-        print(f"ERROR: Empty response after {elapsed:.1f}s", file=sys.stderr)
+        print("ERROR: Empty response", file=sys.stderr)
         sys.exit(1)
 
     choice = response.choices[0]
 
-    # Log usage
-    if hasattr(response, 'usage') and response.usage:
-        u = response.usage
-        agent_count = AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"])
-        print(f"USAGE: mode={mode} thinking={thinking} agents={agent_count} "
-              f"prompt={u.prompt_tokens} completion={u.completion_tokens} "
-              f"total={u.total_tokens} time={elapsed:.1f}s", file=sys.stderr)
-
     # Handle content filtering
     if choice.finish_reason == "content_filter":
-        print(f"WARNING: Response blocked by content filter after {elapsed:.1f}s", file=sys.stderr)
+        print("WARNING: Response blocked by content filter", file=sys.stderr)
         if not choice.message.content:
             print("No content returned. Try rephrasing the prompt.", file=sys.stderr)
             sys.exit(2)
diff --git a/src/bridge/oauth_setup.py b/src/bridge/oauth_setup.py
index 1913c34..aa54d43 100644
--- a/src/bridge/oauth_setup.py
+++ b/src/bridge/oauth_setup.py
@@ -24,6 +24,7 @@
 import subprocess
 import sys
 import tempfile
+import urllib.error
 import urllib.parse
 import urllib.request
 import webbrowser
@@ -141,6 +142,14 @@ def log_message(self, *args) -> None:  # type: ignore[override]
         pass  # suppress request logs
 
 
+class _InvalidCodeError(RuntimeError):
+    """Raised when the authorization code is invalid or expired (HTTP 400).
+
+    This is the only token-exchange error that is safe to retry with a fresh
+    PKCE pair — all other failures (network, 5xx, bad JSON) are fatal.
+    """
+
+
 def _exchange_code(code: str, code_verifier: str) -> str:
     """Exchange auth code for API key via OpenRouter token endpoint."""
     payload = json.dumps({"code": code, "code_verifier": code_verifier}).encode()
@@ -153,6 +162,13 @@ def _exchange_code(code: str, code_verifier: str) -> str:
     try:
         with urllib.request.urlopen(req, timeout=30) as resp:
             data = json.loads(resp.read().decode())
+    except urllib.error.HTTPError as exc:
+        if exc.code == 400:
+            raise _InvalidCodeError(
+                f"Authorization code rejected (HTTP 400). "
+                f"PKCE codes are single-use and may have expired."
+            ) from exc
+        raise RuntimeError(f"Token exchange failed (HTTP {exc.code}): {exc}") from exc
     except Exception as exc:
         raise RuntimeError(f"Token exchange failed: {exc}") from exc
 
@@ -197,12 +213,31 @@ def _check_port_available(port: int) -> bool:
 # Public entry points
 # ---------------------------------------------------------------------------
 
+def _start_callback_server() -> "HTTPServer | None":
+    """Bind the callback server to CALLBACK_PORT. Returns server or None on failure."""
+    max_retries = 3
+    for attempt in range(max_retries):
+        try:
+            server = HTTPServer(("localhost", CALLBACK_PORT), _CallbackHandler)
+            server.timeout = OAUTH_TIMEOUT_SECS
+            return server
+        except OSError:
+            if attempt < max_retries - 1:
+                __import__("time").sleep(0.5)
+    return None
+
+
 def run_oauth_flow() -> int:
     """
     Run the PKCE OAuth flow.
 
+    Retries once with a fresh PKCE pair if the token exchange fails (e.g. the
+    authorization code was already consumed — PKCE codes are single-use).
+
     Returns 0 on success, 1 on failure.
     """
+    import time
+
     # Clear any stale codes from previous runs
     _received_code.clear()
 
@@ -210,6 +245,8 @@ def run_oauth_flow() -> int:
         print(
             f"ERROR: Port {CALLBACK_PORT} is already in use.\n"
             f"\n"
+            f"Note: port {CALLBACK_PORT} is hardcoded by OpenRouter and cannot be changed.\n"
+            f"\n"
             f"To fix this, run:\n"
             f"  lsof -i :{CALLBACK_PORT}  # find the process\n"
             f"  kill <PID>                # kill the process using port {CALLBACK_PORT}\n"
@@ -222,98 +259,112 @@ def run_oauth_flow() -> int:
         )
         return 1
 
-    code_verifier, code_challenge = _generate_pkce_pair()
+    max_attempts = 2
+    for attempt in range(1, max_attempts + 1):
+        _received_code.clear()
+
+        code_verifier, code_challenge = _generate_pkce_pair()
+        auth_params = urllib.parse.urlencode(
+            {
+                "callback_url": CALLBACK_URL,
+                "code_challenge": code_challenge,
+                "code_challenge_method": "S256",
+                "app_name": APP_NAME,
+            }
+        )
+        auth_url = f"{OPENROUTER_AUTH_URL}?{auth_params}"
+
+        if attempt == 1:
+            print("=" * 60)
+            print("Grok Swarm — OpenRouter Authorization")
+            print("=" * 60)
+            print()
+            print("Click the link below to authorize Grok Swarm with your")
+            print("OpenRouter account (or paste it into your browser):")
+        else:
+            print()
+            print(f"Attempt {attempt} of {max_attempts} — new authorization link:")
 
-    auth_params = urllib.parse.urlencode(
-        {
-            "callback_url": CALLBACK_URL,
-            "code_challenge": code_challenge,
-            "code_challenge_method": "S256",
-            "app_name": APP_NAME,
-        }
-    )
-    auth_url = f"{OPENROUTER_AUTH_URL}?{auth_params}"
-
-    print("=" * 60)
-    print("Grok Swarm — OpenRouter Authorization")
-    print("=" * 60)
-    print()
-    print("Click the link below to authorize Grok Swarm with your")
-    print("OpenRouter account (or paste it into your browser):")
-    print()
-    print(f"  {auth_url}")
-    print()
-    print("Attempting to open your browser...")
-    _open_browser(auth_url)
-    print(f"Waiting up to {OAUTH_TIMEOUT_SECS}s for authorization...")
-
-    # Handle bind-time races: another process may have bound between the
-    # _check_port_available call and this HTTPServer creation.
-    max_retries = 3
-    server = None
-    for attempt in range(max_retries):
-        try:
-            server = HTTPServer(("localhost", CALLBACK_PORT), _CallbackHandler)
-            server.timeout = OAUTH_TIMEOUT_SECS
-            break
-        except OSError as exc:
-            if attempt < max_retries - 1:
-                __import__("time").sleep(0.5)
-                continue
-            # Final attempt failed
+        print()
+        print(f"  {auth_url}")
+        print()
+
+        # Bind the callback server BEFORE opening the browser so we're
+        # already listening when OpenRouter redirects back.
+        server = _start_callback_server()
+        if server is None:
             print(
-                f"\nERROR: Failed to bind to port {CALLBACK_PORT} after {max_retries} attempts: {exc}",
+                f"\nERROR: Failed to bind to port {CALLBACK_PORT}.",
                 file=sys.stderr,
             )
             print("Please try again or set your key manually:", file=sys.stderr)
             print("  export OPENROUTER_API_KEY=sk-or-v1-...", file=sys.stderr)
             return 1
 
-    deadline = __import__("time").time() + OAUTH_TIMEOUT_SECS
-    while not _received_code and __import__("time").time() < deadline:
-        server.handle_request()
+        print("Attempting to open your browser...")
+        _open_browser(auth_url)
+        print(f"Waiting up to {OAUTH_TIMEOUT_SECS}s for authorization...")
 
-    server.server_close()
+        deadline = time.time() + OAUTH_TIMEOUT_SECS
+        while not _received_code and time.time() < deadline:
+            server.handle_request()
 
-    if not _received_code:
-        print("\nERROR: Timed out waiting for authorization.", file=sys.stderr)
-        print("Please try again or set your key manually:", file=sys.stderr)
-        print("  export OPENROUTER_API_KEY=sk-or-v1-...", file=sys.stderr)
-        return 1
+        server.server_close()
 
-    code = _received_code[0]
-    print("\nCallback received. Exchanging code for API key...", end=" ", flush=True)
+        if not _received_code:
+            print("\nERROR: Timed out waiting for authorization.", file=sys.stderr)
+            print("Please try again or set your key manually:", file=sys.stderr)
+            print("  export OPENROUTER_API_KEY=sk-or-v1-...", file=sys.stderr)
+            return 1
 
-    try:
-        api_key = _exchange_code(code, code_verifier)
-    except RuntimeError as exc:
-        print(f"FAILED\nERROR: {exc}", file=sys.stderr)
-        return 1
+        code = _received_code[0]
+        print("\nCallback received. Exchanging code for API key...", end=" ", flush=True)
 
-    try:
-        _save_key(api_key)
-    except OSError as exc:
-        print(f"FAILED\nERROR: Could not save API key to {CONFIG_FILE}: {exc}", file=sys.stderr)
-        return 1
+        try:
+            api_key = _exchange_code(code, code_verifier)
+        except _InvalidCodeError as exc:
+            # PKCE codes are single-use — retryable with a fresh pair
+            if attempt < max_attempts:
+                print(
+                    f"FAILED\n"
+                    f"{exc}\n"
+                    f"Starting fresh attempt {attempt + 1} of {max_attempts}..."
+                )
+                continue
+            print(f"FAILED\nERROR: {exc}", file=sys.stderr)
+            return 1
+        except RuntimeError as exc:
+            # Non-retryable errors (network, 5xx, bad JSON) — fail immediately
+            print(f"FAILED\nERROR: {exc}", file=sys.stderr)
+            return 1
 
-    # Validate the key works before declaring success
-    print("OK\nValidating key...", end=" ", flush=True)
-    try:
-        req = urllib.request.Request(
-            "https://openrouter.ai/api/v1/auth/key",
-            headers={"Authorization": f"Bearer {api_key}"},
-            method="GET",
-        )
-        with urllib.request.urlopen(req, timeout=15) as resp:
-            key_info = json.loads(resp.read().decode())
-        label = key_info.get("label", "unknown")
-        print(f"OK ({label})")
-    except Exception:
-        # Key saved but validation failed — still usable, just not confirmed
-        print("SKIPPED (network error)")
+        try:
+            _save_key(api_key)
+        except OSError as exc:
+            print(f"FAILED\nERROR: Could not save API key to {CONFIG_FILE}: {exc}", file=sys.stderr)
+            return 1
 
-    print(f"\nSuccess! API key saved to {CONFIG_FILE}")
-    return 0
+        # Validate the key works before declaring success
+        print("OK\nValidating key...", end=" ", flush=True)
+        try:
+            req = urllib.request.Request(
+                "https://openrouter.ai/api/v1/auth/key",
+                headers={"Authorization": f"Bearer {api_key}"},
+                method="GET",
+            )
+            with urllib.request.urlopen(req, timeout=15) as resp:
+                key_info = json.loads(resp.read().decode())
+            label = key_info.get("label", "unknown")
+            print(f"OK ({label})")
+        except Exception:
+            # Key saved but validation failed — still usable, just not confirmed
+            print("SKIPPED (network error)")
+
+        print(f"\nSuccess! API key saved to {CONFIG_FILE}")
+        return 0
+
+    # Should not be reached, but be safe
+    return 1
 
 
 def run_check() -> int:
diff --git a/src/mcp/__init__.py b/src/mcp/__init__.py
new file mode 100644
index 0000000..b7ace3c
--- /dev/null
+++ b/src/mcp/__init__.py
@@ -0,0 +1 @@
+# src/mcp — MCP (Model Context Protocol) server for Grok Swarm.
diff --git a/src/mcp/__main__.py b/src/mcp/__main__.py
new file mode 100644
index 0000000..4c8ad86
--- /dev/null
+++ b/src/mcp/__main__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+"""Allow running the MCP server via: python -m mcp"""
+from .grok_server import main
+
+main()
diff --git a/src/mcp/grok_server.py b/src/mcp/grok_server.py
new file mode 100755
index 0000000..811a615
--- /dev/null
+++ b/src/mcp/grok_server.py
@@ -0,0 +1,520 @@
+#!/usr/bin/env python3
+"""
+grok_server.py — MCP (Model Context Protocol) server for Grok Swarm.
+
+Exposes Grok 4.20 Multi-Agent as native tools over the MCP stdio transport
+(JSON-RPC 2.0, newline-delimited, on stdin/stdout).
+
+Installation:
+    claude mcp add grok-swarm -- python3 /path/to/src/mcp/grok_server.py
+
+Tools:
+    grok_query            — Stateless single call to Grok
+    grok_session_start    — Begin a multi-turn session
+    grok_session_continue — Continue an existing session
+    grok_agent            — Run the autonomous agent loop
+
+Requires: Python 3.8+, openai package.
+"""
+
+import json
+import logging
+import sys
+from pathlib import Path
+
+# Ensure sibling packages are importable
+_repo_root = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(_repo_root / "src"))
+
+from bridge.grok_bridge import (
+    call_grok_with_messages,
+    get_api_key,
+    read_files,
+    strip_pgp_blocks,
+    MODE_PROMPTS,
+    AGENT_COUNTS,
+)
+
+# Lazy import — only needed if grok_agent tool is called
+_agent_module = None
+
+# Session store — imported lazily to avoid circular deps at module level
+_session_module = None
+
+# ---------------------------------------------------------------------------
+# Logging (stderr only — MCP spec reserves stdout for protocol messages)
+# ---------------------------------------------------------------------------
+
+logging.basicConfig(
+    stream=sys.stderr,
+    level=logging.INFO,
+    format="[grok-mcp] %(levelname)s %(message)s",
+)
+log = logging.getLogger("grok-mcp")
+
+# ---------------------------------------------------------------------------
+# MCP Protocol constants
+# ---------------------------------------------------------------------------
+
+SERVER_NAME = "grok-swarm"
+SERVER_VERSION = "1.3.0"
+PROTOCOL_VERSION = "2024-11-05"
+
+# ---------------------------------------------------------------------------
+# Tool definitions (JSON Schema format for MCP, NOT OpenAI format)
+# ---------------------------------------------------------------------------
+
+TOOLS = [
+    {
+        "name": "grok_query",
+        "description": (
+            "Send a single stateless query to the Grok 4.20 multi-agent swarm. "
+            "Use for code analysis, refactoring, generation, or reasoning tasks. "
+            "Optionally pass file paths for codebase context."
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "prompt": {
+                    "type": "string",
+                    "description": "Task instruction or question for Grok.",
+                },
+                "mode": {
+                    "type": "string",
+                    "enum": ["refactor", "analyze", "code", "reason", "orchestrate"],
+                    "default": "reason",
+                    "description": "Task mode. 'orchestrate' requires the 'system' parameter.",
+                },
+                "files": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "File paths to include as codebase context.",
+                },
+                "system": {
+                    "type": "string",
+                    "description": "Override system prompt (required for orchestrate mode).",
+                },
+                "thinking": {
+                    "type": "string",
+                    "enum": ["low", "high"],
+                    "default": "low",
+                    "description": "Thinking level: low = 4 agents, high = 16 agents.",
+                },
+                "timeout": {
+                    "type": "integer",
+                    "default": 120,
+                    "description": "API timeout in seconds.",
+                },
+            },
+            "required": ["prompt"],
+        },
+    },
+    {
+        "name": "grok_session_start",
+        "description": (
+            "Start a new multi-turn conversation session with Grok. "
+            "Returns a session_id that can be used with grok_session_continue "
+            "to maintain conversation history across multiple calls."
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "mode": {
+                    "type": "string",
+                    "enum": ["refactor", "analyze", "code", "reason", "orchestrate"],
+                    "default": "reason",
+                    "description": "Task mode for this session.",
+                },
+                "system": {
+                    "type": "string",
+                    "description": "Override system prompt.",
+                },
+                "thinking": {
+                    "type": "string",
+                    "enum": ["low", "high"],
+                    "default": "low",
+                    "description": "Thinking level: low = 4 agents, high = 16 agents.",
+                },
+                "files": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "File paths to include as initial context.",
+                },
+            },
+            "required": [],
+        },
+    },
+    {
+        "name": "grok_session_continue",
+        "description": (
+            "Continue an existing multi-turn session with Grok. "
+            "Grok remembers all previous messages in the session."
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "session_id": {
+                    "type": "string",
+                    "description": "Session ID returned by grok_session_start.",
+                },
+                "message": {
+                    "type": "string",
+                    "description": "Follow-up message or instruction.",
+                },
+                "files": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "description": "Additional file paths to add as context.",
+                },
+            },
+            "required": ["session_id", "message"],
+        },
+    },
+    {
+        "name": "grok_agent",
+        "description": (
+            "Run the autonomous Grok agent loop. The agent discovers files, "
+            "calls Grok, applies changes, and optionally verifies with a command. "
+            "Iterates up to max_iterations times."
+        ),
+        "inputSchema": {
+            "type": "object",
+            "properties": {
+                "task": {
+                    "type": "string",
+                    "description": "Task instruction for the agent.",
+                },
+                "target": {
+                    "type": "string",
+                    "default": ".",
+                    "description": "Target directory or file to operate on.",
+                },
+                "apply": {
+                    "type": "boolean",
+                    "default": False,
+                    "description": "Whether to actually write changes (default: dry-run preview).",
+                },
+                "max_iterations": {
+                    "type": "integer",
+                    "default": 5,
+                    "description": "Maximum number of agent iterations.",
+                },
+                "verify_cmd": {
+                    "type": "string",
+                    "description": "Command to run after each iteration for verification (e.g. 'pytest').",
+                },
+            },
+            "required": ["task"],
+        },
+    },
+]
+
+
+# ---------------------------------------------------------------------------
+# Tool implementations
+# ---------------------------------------------------------------------------
+
+def _handle_grok_query(params):
+    """Stateless single call to Grok."""
+    prompt = params["prompt"]
+    mode = params.get("mode", "reason")
+    files = params.get("files", [])
+    system = params.get("system")
+    thinking = params.get("thinking", "low")
+    timeout = params.get("timeout", 120)
+
+    if mode == "orchestrate" and not system:
+        return _error_content("orchestrate mode requires the 'system' parameter")
+
+    # Resolve system prompt
+    if system:
+        system_content = system
+    else:
+        system_content = MODE_PROMPTS.get(mode)
+        if system_content is None:
+            return _error_content(f"Mode '{mode}' requires the 'system' parameter")
+
+    # Read file context
+    if files:
+        context = read_files(files)
+        log.info("Read %d files (%d chars context)", len(files), len(context))
+        system_content += f"\n\n## Codebase Context\n{context}"
+
+    messages = [
+        {"role": "system", "content": system_content},
+        {"role": "user", "content": prompt},
+    ]
+
+    log.info("grok_query: mode=%s thinking=%s timeout=%d", mode, thinking, timeout)
+
+    response = call_grok_with_messages(
+        messages=messages, timeout=timeout, thinking=thinking, mode=mode,
+    )
+
+    if not response.choices:
+        return _error_content("Empty response from Grok API")
+
+    content = response.choices[0].message.content or ""
+    return _text_content(strip_pgp_blocks(content))
+
+
+def _handle_grok_session_start(params):
+    """Start a new multi-turn session."""
+    session_mod = _get_session_module()
+
+    mode = params.get("mode", "reason")
+    system = params.get("system")
+    thinking = params.get("thinking", "low")
+    files = params.get("files", [])
+
+    # Read initial file context
+    file_context = ""
+    if files:
+        file_context = read_files(files)
+
+    session = session_mod.create_session(
+        mode=mode,
+        system_override=system,
+        thinking=thinking,
+        initial_file_context=file_context,
+    )
+
+    log.info("Session started: %s (mode=%s, thinking=%s)", session.session_id, mode, thinking)
+
+    return _text_content(json.dumps({
+        "session_id": session.session_id,
+        "mode": session.mode,
+        "thinking": session.thinking,
+        "message": f"Session {session.session_id} started. Use grok_session_continue to send messages.",
+    }))
+
+
+def _handle_grok_session_continue(params):
+    """Continue an existing session."""
+    session_mod = _get_session_module()
+
+    session_id = params["session_id"]
+    message = params["message"]
+    files = params.get("files", [])
+
+    session = session_mod.get_session(session_id)
+    if session is None:
+        return _error_content(f"Session not found: {session_id}")
+
+    # Read additional file context
+    file_context = ""
+    if files:
+        file_context = read_files(files)
+
+    try:
+        response = session.send_message(message, file_context=file_context)
+    except Exception as exc:
+        log.error("Session %s error: %s", session_id, exc)
+        return _error_content(f"Grok API error: {exc}")
+
+    return _text_content(strip_pgp_blocks(response))
+
+
+def _handle_grok_agent(params):
+    """Run the autonomous agent loop."""
+    global _agent_module
+    if _agent_module is None:
+        sys.path.insert(0, str(_repo_root / "src" / "agent"))
+        from grok_agent import run_agent_loop, AgentState, AgentStatus, Platform
+        _agent_module = sys.modules["grok_agent"]
+
+    task = params["task"]
+    target = params.get("target", ".")
+    apply_changes = params.get("apply", False)
+    max_iterations = params.get("max_iterations", 5)
+    verify_cmd = params.get("verify_cmd")
+
+    state = _agent_module.AgentState(
+        task=task,
+        target=target,
+        platform=_agent_module.Platform.CLAUDE,
+        apply=apply_changes,
+        max_iterations=max_iterations,
+        verify_cmd=verify_cmd,
+    )
+
+    log.info("Agent started: task=%r target=%s apply=%s max_iter=%d",
+             task[:80], target, apply_changes, max_iterations)
+
+    try:
+        _agent_module.run_agent_loop(state)
+    except Exception as exc:
+        log.error("Agent error: %s", exc)
+        return _error_content(f"Agent failed: {exc}")
+
+    result = {
+        "status": state.status.value,
+        "iterations": state.iteration,
+        "changes": state.changes,
+        "errors": state.errors,
+    }
+    if state.last_response:
+        result["last_response_preview"] = state.last_response[:500]
+
+    return _text_content(json.dumps(result, indent=2))
+
+
+# Tool dispatch table
+TOOL_HANDLERS = {
+    "grok_query": _handle_grok_query,
+    "grok_session_start": _handle_grok_session_start,
+    "grok_session_continue": _handle_grok_session_continue,
+    "grok_agent": _handle_grok_agent,
+}
+
+
+# ---------------------------------------------------------------------------
+# Helper: lazy module loading
+# ---------------------------------------------------------------------------
+
+def _get_session_module():
+    global _session_module
+    if _session_module is None:
+        import mcp.session as mod
+        _session_module = mod
+    return _session_module
+
+
+# ---------------------------------------------------------------------------
+# MCP response helpers
+# ---------------------------------------------------------------------------
+
+def _text_content(text):
+    """Wrap text in MCP tool result content format. Returns (content, is_error)."""
+    return ([{"type": "text", "text": text}], False)
+
+
+def _error_content(message):
+    """Wrap error in MCP tool result content format. Returns (content, is_error)."""
+    return ([{"type": "text", "text": message}], True)
+
+
+# ---------------------------------------------------------------------------
+# JSON-RPC 2.0 protocol layer
+# ---------------------------------------------------------------------------
+
+def _jsonrpc_response(req_id, result):
+    """Build a JSON-RPC 2.0 success response."""
+    return {"jsonrpc": "2.0", "id": req_id, "result": result}
+
+
+def _jsonrpc_error(req_id, code, message):
+    """Build a JSON-RPC 2.0 error response."""
+    return {"jsonrpc": "2.0", "id": req_id, "error": {"code": code, "message": message}}
+
+
+def _handle_initialize(params):
+    """Respond to MCP initialize handshake."""
+    return {
+        "protocolVersion": PROTOCOL_VERSION,
+        "capabilities": {"tools": {}},
+        "serverInfo": {
+            "name": SERVER_NAME,
+            "version": SERVER_VERSION,
+        },
+    }
+
+
+def _handle_tools_list(_params):
+    """Return all registered tool definitions."""
+    return {"tools": TOOLS}
+
+
+def _handle_tools_call(params):
+    """Dispatch a tool call to the appropriate handler."""
+    tool_name = params.get("name")
+    tool_args = params.get("arguments", {})
+
+    handler = TOOL_HANDLERS.get(tool_name)
+    if handler is None:
+        content, _ = _error_content(f"Unknown tool: {tool_name}")
+        return {"content": content, "isError": True}
+
+    try:
+        content, is_error = handler(tool_args)
+        result = {"content": content}
+        if is_error:
+            result["isError"] = True
+        return result
+    except Exception as exc:
+        log.exception("Tool %s failed", tool_name)
+        content, _ = _error_content(f"Internal error in {tool_name}: {exc}")
+        return {"content": content, "isError": True}
+
+
+# Method dispatch table
+METHOD_HANDLERS = {
+    "initialize": _handle_initialize,
+    "tools/list": _handle_tools_list,
+    "tools/call": _handle_tools_call,
+}
+
+# Notification methods (no response expected)
+NOTIFICATION_METHODS = {
+    "notifications/initialized",
+    "notifications/cancelled",
+}
+
+
+def _process_message(raw_line):
+    """Parse one JSON-RPC message and return the response dict, or None for notifications."""
+    try:
+        msg = json.loads(raw_line)
+    except json.JSONDecodeError as exc:
+        return _jsonrpc_error(None, -32700, f"Parse error: {exc}")
+
+    method = msg.get("method")
+    req_id = msg.get("id")
+    params = msg.get("params", {})
+
+    # Notifications — no response
+    if method in NOTIFICATION_METHODS:
+        log.debug("Notification: %s", method)
+        return None
+
+    # Unknown method
+    handler = METHOD_HANDLERS.get(method)
+    if handler is None:
+        log.warning("Unknown method: %s", method)
+        return _jsonrpc_error(req_id, -32601, f"Method not found: {method}")
+
+    try:
+        result = handler(params)
+        return _jsonrpc_response(req_id, result)
+    except Exception as exc:
+        log.exception("Method %s failed", method)
+        return _jsonrpc_error(req_id, -32603, f"Internal error: {exc}")
+
+
+# ---------------------------------------------------------------------------
+# Main loop
+# ---------------------------------------------------------------------------
+
+def main():
+    """Run the MCP server on stdin/stdout."""
+    log.info("Grok Swarm MCP server starting (protocol %s)", PROTOCOL_VERSION)
+
+    # Validate API key early
+    if not get_api_key():
+        log.warning("No API key configured — tool calls will fail until key is set")
+
+    for raw_line in sys.stdin:
+        raw_line = raw_line.strip()
+        if not raw_line:
+            continue
+
+        response = _process_message(raw_line)
+        if response is not None:
+            sys.stdout.write(json.dumps(response) + "\n")
+            sys.stdout.flush()
+
+    log.info("stdin closed, shutting down")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/mcp/session.py b/src/mcp/session.py
new file mode 100644
index 0000000..db26cc1
--- /dev/null
+++ b/src/mcp/session.py
@@ -0,0 +1,257 @@
+"""
+session.py — Multi-turn session management for Grok Swarm MCP server.
+
+Each GrokSession maintains a conversation history in OpenAI message format
+and calls grok_bridge.call_grok_with_messages() for API requests.
+
+Sessions are stored in-memory (scoped to the MCP server process lifetime).
+"""
+
+import logging
+import secrets
+import sys
+import time
+from pathlib import Path
+from typing import Dict, List, Optional
+
+# Ensure bridge is importable
+_repo_root = Path(__file__).resolve().parent.parent.parent
+sys.path.insert(0, str(_repo_root / "src"))
+
+from bridge.grok_bridge import call_grok_with_messages, MODE_PROMPTS
+
+log = logging.getLogger("grok-mcp.session")
+
+# ---------------------------------------------------------------------------
+# Session configuration
+# ---------------------------------------------------------------------------
+
+SESSION_TTL_SECS = 1800  # 30 minutes
+MAX_SESSIONS = 10
+MAX_TOKEN_BUDGET = 500_000
+
+# ---------------------------------------------------------------------------
+# Session store
+# ---------------------------------------------------------------------------
+
+_sessions: Dict[str, "GrokSession"] = {}
+
+
+class GrokSession:
+    """A stateful multi-turn conversation with Grok."""
+
+    __slots__ = (
+        "session_id",
+        "mode",
+        "thinking",
+        "system_prompt",
+        "messages",
+        "created_at",
+        "last_active",
+        "total_tokens",
+        "max_tokens",
+    )
+
+    def __init__(
+        self,
+        session_id: str,
+        mode: str = "reason",
+        thinking: str = "low",
+        system_prompt: str = "",
+    ):
+        self.session_id = session_id
+        self.mode = mode
+        self.thinking = thinking
+        self.system_prompt = system_prompt
+        # messages does NOT include the system message — that's prepended at call time
+        self.messages: List[dict] = []
+        self.created_at = time.time()
+        self.last_active = time.time()
+        self.total_tokens = 0
+        self.max_tokens = MAX_TOKEN_BUDGET
+
+    def send_message(self, user_content: str, file_context: str = "") -> str:
+        """
+        Send a user message and get Grok's response.
+
+        Args:
+            user_content: The user's message text.
+            file_context: Optional file content to prepend to the message.
+
+        Returns:
+            Grok's response text.
+
+        Raises:
+            RuntimeError: If the session's token budget is exhausted.
+            Exception: Propagated from the API on errors.
+        """
+        self.last_active = time.time()
+
+        if self.total_tokens >= self.max_tokens:
+            raise RuntimeError(
+                f"Session {self.session_id} token budget exhausted "
+                f"({self.total_tokens:,} / {self.max_tokens:,}). "
+                "Start a new session."
+            )
+
+        # Build user message content
+        content = user_content
+        if file_context:
+            content = f"## Additional File Context\n{file_context}\n\n{user_content}"
+
+        self.messages.append({"role": "user", "content": content})
+
+        # Build full message list for API
+        api_messages = [{"role": "system", "content": self.system_prompt}]
+
+        # Inject budget warning early (right after system prompt) for visibility
+        if self.total_tokens > self.max_tokens * 0.8:
+            api_messages.append({
+                "role": "system",
+                "content": (
+                    "NOTE: Token budget for this session is nearly exhausted. "
+                    "Please wrap up your response concisely."
+                ),
+            })
+
+        api_messages.extend(self.messages)
+
+        response = call_grok_with_messages(
+            messages=api_messages,
+            timeout=120,
+            thinking=self.thinking,
+            mode=self.mode,
+        )
+
+        # Track cumulative API token usage (for cost tracking).
+        # Each turn's total_tokens includes re-sent history, so this reflects
+        # actual API billing, not unique conversation size.
+        if hasattr(response, "usage") and response.usage:
+            self.total_tokens += response.usage.total_tokens
+
+        # Extract assistant content
+        if not response.choices:
+            raise RuntimeError("Empty response from Grok API")
+
+        assistant_content = response.choices[0].message.content or ""
+        self.messages.append({"role": "assistant", "content": assistant_content})
+
+        log.info(
+            "Session %s: turn %d, tokens=%d/%d",
+            self.session_id,
+            len(self.messages) // 2,
+            self.total_tokens,
+            self.max_tokens,
+        )
+
+        return assistant_content
+
+    def to_dict(self) -> dict:
+        """Serialize session for optional persistence."""
+        return {
+            "session_id": self.session_id,
+            "mode": self.mode,
+            "thinking": self.thinking,
+            "system_prompt": self.system_prompt,
+            "messages": self.messages,
+            "created_at": self.created_at,
+            "last_active": self.last_active,
+            "total_tokens": self.total_tokens,
+            "max_tokens": self.max_tokens,
+        }
+
+    @classmethod
+    def from_dict(cls, data: dict) -> "GrokSession":
+        """Deserialize session from dict."""
+        session = cls(
+            session_id=data["session_id"],
+            mode=data.get("mode", "reason"),
+            thinking=data.get("thinking", "low"),
+            system_prompt=data.get("system_prompt", ""),
+        )
+        session.messages = data.get("messages", [])
+        session.created_at = data.get("created_at", time.time())
+        session.last_active = data.get("last_active", time.time())
+        session.total_tokens = data.get("total_tokens", 0)
+        session.max_tokens = data.get("max_tokens", MAX_TOKEN_BUDGET)
+        return session
+
+
+# ---------------------------------------------------------------------------
+# Session store management
+# ---------------------------------------------------------------------------
+
+def _evict_expired():
+    """Remove sessions that have exceeded their TTL."""
+    now = time.time()
+    expired = [
+        sid for sid, s in _sessions.items()
+        if now - s.last_active > SESSION_TTL_SECS
+    ]
+    for sid in expired:
+        log.info("Evicting expired session: %s", sid)
+        del _sessions[sid]
+
+
+def create_session(
+    mode: str = "reason",
+    system_override: Optional[str] = None,
+    thinking: str = "low",
+    initial_file_context: str = "",
+) -> GrokSession:
+    """Create and register a new session."""
+    _evict_expired()
+
+    # Enforce max sessions
+    if len(_sessions) >= MAX_SESSIONS:
+        # Evict the oldest session
+        oldest_id = min(_sessions, key=lambda sid: _sessions[sid].last_active)
+        log.info("Evicting oldest session (max reached): %s", oldest_id)
+        del _sessions[oldest_id]
+
+    session_id = secrets.token_hex(8)
+
+    # Build system prompt
+    if system_override:
+        system_prompt = system_override
+    else:
+        system_prompt = MODE_PROMPTS.get(mode, MODE_PROMPTS["reason"])
+        if system_prompt is None:
+            system_prompt = ""
+
+    if initial_file_context:
+        system_prompt += f"\n\n## Codebase Context\n{initial_file_context}"
+
+    session = GrokSession(
+        session_id=session_id,
+        mode=mode,
+        thinking=thinking,
+        system_prompt=system_prompt,
+    )
+    _sessions[session_id] = session
+    return session
+
+
+def get_session(session_id: str) -> Optional[GrokSession]:
+    """Look up a session by ID, evicting expired ones first."""
+    _evict_expired()
+    session = _sessions.get(session_id)
+    if session is not None:
+        session.last_active = time.time()
+    return session
+
+
+def list_sessions() -> List[dict]:
+    """Return summary info for all active sessions."""
+    _evict_expired()
+    return [
+        {
+            "session_id": s.session_id,
+            "mode": s.mode,
+            "thinking": s.thinking,
+            "turns": len(s.messages) // 2,
+            "total_tokens": s.total_tokens,
+            "last_active": s.last_active,
+        }
+        for s in _sessions.values()
+    ]