diff --git a/README.md b/README.md index 9a51f4e..05e70bb 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ Now when your agent needs deep codebase analysis, large-scale refactoring, or co - **4-Agent Swarm** — Grok 4.20 coordinates multiple agents for deeper analysis - **Massive Context** — ~2M token window, handles entire codebases +- **Native MCP Server** — Grok appears as a first-class tool in Claude Code +- **Multi-Turn Sessions** — Stateful conversations with Grok across multiple calls - **5 Modes** — Analyze, Refactor, Code, Reason, Orchestrate - **Tool Passthrough** — Pass OpenAI-format tool schemas for function calling - **File Writing** — Write annotated code blocks directly to disk @@ -258,26 +260,36 @@ claude mcp add morphllm ## Architecture +### Claude Code (MCP — Preferred) + +``` +Claude Code + │ native MCP tool calls + ▼ +grok_server.py (MCP stdio server) + │ manages sessions, dispatches tools + ▼ +grok_bridge.py (Python/OpenAI SDK) + │ + ▼ +OpenRouter API → xAI Grok 4.20 Multi-Agent +``` + +MCP tools: `grok_query`, `grok_session_start`, `grok_session_continue`, `grok_agent` + +### OpenClaw (Plugin) + ``` -OpenClaw Agent / Claude Code - │ - ▼ -grok_swarm tool / skill - │ - ▼ -index.js (Node wrapper) - │ - ▼ +OpenClaw Agent + │ tool call + ▼ +index.ts (OpenClaw plugin) + │ spawns subprocess + ▼ grok_bridge.py (Python/OpenAI SDK) - │ - ▼ -OpenRouter API - │ - ▼ -xAI Grok 4.20 Multi-Agent Beta - │ - ▼ -Response + │ + ▼ +OpenRouter API → xAI Grok 4.20 Multi-Agent ``` --- diff --git a/bin/grok-swarm-mcp.js b/bin/grok-swarm-mcp.js new file mode 100755 index 0000000..8eae66b --- /dev/null +++ b/bin/grok-swarm-mcp.js @@ -0,0 +1,45 @@ +#!/usr/bin/env node +/** + * Cross-platform wrapper for the Grok Swarm MCP server. + * Spawns python3 (or python on Windows) to run grok_server.py, + * forwarding stdin/stdout for the MCP stdio transport. + */ +const { spawn } = require('child_process'); +const path = require('path'); + +const serverScript = path.join(__dirname, '..', 'src', 'mcp', 'grok_server.py'); + +// Try python3 first, fall back to python (common on Windows) +const pythonCandidates = process.platform === 'win32' + ? ['python3', 'python'] + : ['python3']; + +function trySpawn(candidates) { + const cmd = candidates[0]; + if (!cmd) { + process.stderr.write( + 'ERROR: Python 3 not found. Install Python 3.8+ and ensure python3 (or python) is on PATH.\n' + ); + process.exit(1); + } + + const child = spawn(cmd, [serverScript], { + stdio: ['inherit', 'inherit', 'inherit'], + }); + + child.on('error', (err) => { + if (err.code === 'ENOENT' && candidates.length > 1) { + // python3 not found, try next candidate + trySpawn(candidates.slice(1)); + } else { + process.stderr.write(`ERROR: Failed to start MCP server: ${err.message}\n`); + process.exit(1); + } + }); + + child.on('exit', (code) => { + process.exit(code || 0); + }); +} + +trySpawn(pythonCandidates); diff --git a/package.json b/package.json index 3f96ef3..f0486ee 100644 --- a/package.json +++ b/package.json @@ -28,9 +28,11 @@ ], "main": "dist/index.js", "bin": { - "grok-swarm": "dist/index.js" + "grok-swarm": "dist/index.js", + "grok-swarm-mcp": "bin/grok-swarm-mcp.js" }, "files": [ + "bin/", "dist/", "src/", "README.md", diff --git a/platforms/claude/.claude-plugin/setup.sh b/platforms/claude/.claude-plugin/setup.sh index 999e320..f22e23c 100755 --- a/platforms/claude/.claude-plugin/setup.sh +++ b/platforms/claude/.claude-plugin/setup.sh @@ -51,8 +51,6 @@ if [ -f "$OAUTH_SCRIPT" ]; then echo if python3 "$OAUTH_SCRIPT"; then log "Setup complete!" - echo - echo "Run '/grok-swarm:analyze Find bugs in this codebase' to get started." else error "OAuth setup failed. Set OPENROUTER_API_KEY env var as a fallback." exit 1 @@ -65,3 +63,20 @@ else echo " Get a key at: https://openrouter.ai/keys" exit 1 fi + +# Register MCP server for native tool integration +MCP_SERVER="${PLUGIN_ROOT}/src/mcp/grok_server.py" +if [ -f "$MCP_SERVER" ] && command -v claude >/dev/null 2>&1; then + echo + log "Registering Grok Swarm MCP server..." + if claude mcp add grok-swarm -- python3 "$MCP_SERVER" 2>/dev/null; then + log "MCP server registered — grok_query, grok_session_start/continue, grok_agent tools available" + else + warn "MCP registration failed (non-fatal). Register manually:" + warn " claude mcp add grok-swarm -- python3 $MCP_SERVER" + fi +fi + +echo +echo "Run '/grok-swarm:analyze Find bugs in this codebase' to get started." +echo "Or use the native MCP tools: grok_query, grok_session_start, grok_session_continue" diff --git a/platforms/claude/commands/set-key.md b/platforms/claude/commands/set-key.md new file mode 100644 index 0000000..419655a --- /dev/null +++ b/platforms/claude/commands/set-key.md @@ -0,0 +1,86 @@ +--- +name: set-key +description: Manually save an OpenRouter API key to config. Last-resort fallback when OAuth fails. WARNING - key will be visible in conversation context. +argument-hint: YOUR_API_KEY +allowed-tools: + - Bash +--- + +# Set Grok Swarm API Key (Manual Fallback) + +**Use this only when the OAuth flow (`/grok-swarm:setup`) has failed repeatedly.** + +## Step 1 — Display security warning + +Before doing anything else, output this warning verbatim to the user: + +--- + +> **WARNING: Security Risk** +> +> You are about to paste your OpenRouter API key directly into this conversation. +> This means your API key **will be visible in the LLM context window** and may be +> stored in conversation logs. +> +> **Only proceed if you understand and accept these risks.** The recommended method +> is `/grok-swarm:setup` which uses OAuth so your key never enters the conversation. +> +> Type **yes** to confirm you accept this risk, or anything else to cancel. + +--- + +Wait for the user's response. If they do not reply with exactly `yes` (case-insensitive), stop here and remind them to use `/grok-swarm:setup` instead. + +## Step 2 — Validate the key argument + +The API key is the argument passed to this command (e.g. `/grok-swarm:set-key sk-or-v1-...`). + +If no argument was provided, ask the user to re-run with their key: +``` +/grok-swarm:set-key YOUR_API_KEY_HERE +``` +Do not ask them to type the key in a follow-up message. + +## Step 3 — Save the key + +Run (pass the key via environment variable so it is never interpolated into shell or Python source): +```bash +mkdir -p ~/.config/grok-swarm +GROK_SET_KEY="" python3 -c " +import json, os +from pathlib import Path + +api_key = os.environ['GROK_SET_KEY'] +config_file = Path.home() / '.config' / 'grok-swarm' / 'config.json' +existing = {} +if config_file.exists(): + try: + existing = json.loads(config_file.read_text()) + except Exception: + pass +existing['api_key'] = api_key +json_bytes = (json.dumps(existing, indent=2) + '\n').encode() +tmp = str(config_file.parent / f'.config.json.tmp.{os.getpid()}') +fd = os.open(tmp, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o600) +try: + os.write(fd, json_bytes) + os.fsync(fd) +finally: + os.close(fd) +os.replace(tmp, str(config_file)) +print('saved') +" +``` + +## Step 4 — Confirm success + +Tell the user: +- Key saved to `~/.config/grok-swarm/config.json` (permissions: 600) +- Show only the first 8 characters: `sk-or-v1-...` → `sk-or-v1-` + `[redacted]` +- Suggest running `/grok-swarm:analyze Hello world` to verify the key works + +## Notes + +- This command exists as a last resort. Always prefer `/grok-swarm:setup` (OAuth flow). +- The key is saved with file permissions 600 (owner read/write only). +- If you need to rotate the key later, run this command again or re-run `/grok-swarm:setup`. diff --git a/platforms/claude/commands/setup.md b/platforms/claude/commands/setup.md index b5ff7a8..9887c77 100644 --- a/platforms/claude/commands/setup.md +++ b/platforms/claude/commands/setup.md @@ -55,13 +55,20 @@ else OAUTH_PATH="$(find /usr /usr/local ~/.local -name 'oauth_setup.py' 2>/dev/null | head -1)" [ -n "$OAUTH_PATH" ] && PLUGIN_ROOT="$(cd "$(dirname "$OAUTH_PATH")/../.." 2>/dev/null && pwd)" fi -timeout 240s python3 "$PLUGIN_ROOT/src/bridge/oauth_setup.py" +# timeout is not available on macOS without Homebrew coreutils (gtimeout). +# The script has an internal 180s timeout, so the outer wrapper is best-effort only. +TIMEOUT_CMD=$(command -v gtimeout 2>/dev/null || command -v timeout 2>/dev/null || true) +if [ -n "$TIMEOUT_CMD" ]; then + "$TIMEOUT_CMD" 240s python3 "$PLUGIN_ROOT/src/bridge/oauth_setup.py" +else + python3 "$PLUGIN_ROOT/src/bridge/oauth_setup.py" +fi ``` -**Note**: The `timeout 240s` wrapper ensures the command terminates if the OAuth -flow exceeds 240 seconds. The script itself has an internal OAUTH_TIMEOUT_SECS -(180s) for the callback phase plus roughly 30s for token exchange, so the 240s -outer limit provides a safe margin. +**Note**: The wrapper tries `gtimeout` (macOS/Homebrew), then `timeout` (Linux) +to enforce a 240s outer limit. If neither is available (e.g. stock macOS), the +script runs without an outer wrapper — its internal `OAUTH_TIMEOUT_SECS` (180s) +for the callback phase plus ~30s for token exchange still prevents hangs. The script will: 1. Print an authorization URL diff --git a/platforms/claude/skills/grok-swarm/SKILL.md b/platforms/claude/skills/grok-swarm/SKILL.md index 071ca6d..630b7b5 100644 --- a/platforms/claude/skills/grok-swarm/SKILL.md +++ b/platforms/claude/skills/grok-swarm/SKILL.md @@ -50,6 +50,27 @@ This stores your API key in `~/.claude/grok-swarm.local.md` (plugin settings pat /grok-swarm:reason Compare microservices vs monolith for this project ``` +## MCP Tools (Native Integration) + +When installed via the plugin system or `claude mcp add`, Grok Swarm registers as a native MCP server. These tools are available directly — no slash commands needed: + +| Tool | Description | +|------|-------------| +| `grok_query` | Stateless single call — analyze, refactor, code, or reason | +| `grok_session_start` | Begin a multi-turn conversation with Grok | +| `grok_session_continue` | Continue an existing session (Grok remembers history) | +| `grok_agent` | Run the autonomous agent loop (discover → modify → verify) | + +### Multi-Turn Sessions + +``` +grok_session_start(mode="analyze", files=["src/auth.py"]) +→ { session_id: "abc123" } + +grok_session_continue(session_id="abc123", message="What about password hashing?") +→ Grok remembers the previous analysis +``` + ## First-Time Setup Grok Swarm uses a PKCE OAuth flow to obtain your OpenRouter API key. **Your key never passes through the LLM context window.** diff --git a/platforms/claude/skills/grok-swarm/SKILL_AGENT.md b/platforms/claude/skills/grok-swarm/SKILL_AGENT.md index 7d749ab..00309cf 100644 --- a/platforms/claude/skills/grok-swarm/SKILL_AGENT.md +++ b/platforms/claude/skills/grok-swarm/SKILL_AGENT.md @@ -42,18 +42,19 @@ Grok has a **2M token context window**. Claude Code typically has **200K-1M toke ┌─────────────────────────────────────────────────────────────┐ │ Claude Code (200K-1M context) │ │ - Orchestrates tasks │ -│ - Delegates to Grok │ +│ - Calls Grok via native MCP tools │ │ - Synthesizes results │ └─────────────────────┬───────────────────────────────────────┘ - │ CLI invocation (small prompt) + │ MCP tool call (native) ▼ ┌─────────────────────────────────────────────────────────────┐ -│ Grok Bridge (local process) │ -│ - Reads files directly from disk │ -│ - Sends to Grok API (2M context) │ -│ - Writes results to disk │ +│ Grok MCP Server (grok_server.py) │ +│ - grok_query: stateless single call │ +│ - grok_session_start/continue: multi-turn sessions │ +│ - grok_agent: autonomous agent loop │ +│ - Reads files from disk, manages sessions in-memory │ └─────────────────────┬───────────────────────────────────────┘ - │ API call + │ OpenAI SDK → OpenRouter API ▼ ┌─────────────────────────────────────────────────────────────┐ │ Grok 4.20 (2M token context) │ @@ -62,11 +63,22 @@ Grok has a **2M token context window**. Claude Code typically has **200K-1M toke └─────────────────────────────────────────────────────────────┘ ``` -**Key insight:** File content goes `disk → bridge → API → Grok`. It does NOT pass through Claude's context. Claude only receives the final response text. +**Key insight:** File content goes `disk → MCP server → API → Grok`. It does NOT pass through Claude's context. Claude only receives the final response text. ## Invocation Pattern -Grok Swarm is **not a native Claude sub-agent** — it's an external API. Use it via skill invocation: +### Preferred: MCP Tools (Native) + +When the MCP server is registered, Grok tools appear as native tools — call them directly: + +``` +grok_query(prompt="Find bugs in auth module", mode="analyze", files=["src/auth.py"]) +grok_session_start(mode="analyze", files=["src/auth.py"]) +grok_session_continue(session_id="abc123", message="What about the password hashing?") +grok_agent(task="Refactor auth module", target="./src/auth", apply=true) +``` + +### Alternative: Skill Commands ``` /grok-swarm:reason @@ -75,7 +87,7 @@ Grok Swarm is **not a native Claude sub-agent** — it's an external API. Use it /grok-swarm:refactor ``` -Or directly via CLI: +### Alternative: Direct CLI ```bash python3 src/bridge/grok_bridge.py --mode --prompt "" [options] ``` @@ -141,35 +153,34 @@ When using `--write-files --output-dir /path`, Grok writes directly to disk. The 2. `// FILE: path/to/file.py` inside block 3. `# filename.py` comment header -## Critical Limitations +## Multi-Turn Sessions -### 1. ⚠️ STATELESS — No Multi-turn Sessions -Each invocation is **independent**. Grok has no memory of previous calls. +The MCP server supports **stateful multi-turn sessions** — Grok remembers previous messages: -**Context must be explicitly provided each time:** ``` -# BAD — Grok won't remember -"Continue the security audit from before" +# Start a session +grok_session_start(mode="analyze", files=["src/auth.py"]) +→ returns session_id: "abc123" -/# GOOD — Include all context explicitly -"Continue security audit. Previous findings: [paste findings]. Now check data validation." +# Continue the conversation — Grok remembers +grok_session_continue(session_id="abc123", message="What about the password hashing?") +grok_session_continue(session_id="abc123", message="Suggest fixes for the issues found") ``` -**See Issue #30** for planned stateful session support. +Sessions have: +- **30-minute TTL** — auto-expire after inactivity +- **Token budget tracking** — warns when approaching cost limits +- **Max 10 concurrent sessions** — oldest evicted when full -### 2. ⚠️ NO Streaming — Single Response Only -The bridge waits for full response before returning. No real-time updates. +## Limitations -### 3. ⚠️ NO Native Agent Pool -Grok cannot be registered as a Claude sub-agent. Orchestration is manual: -- Run Grok in background: `python3 grok_bridge.py [args] &` -- Continue Claude work independently -- Collect Grok results when needed +### 1. ⚠️ NO Streaming — Single Response Only +The bridge waits for full response before returning. No real-time updates. -### 4. ⚠️ NO Tool Passthrough (Currently) -Built-in Grok tools (web search, code execution) are **not enabled** by default. The bridge uses Grok for reasoning only, not tool calling. +### 2. ⚠️ NO Tool Passthrough (Currently) +Built-in Grok tools (web search, code execution) are **not enabled** by default. Sub-agent tools (read_file, write_file, search_code) are planned for Phase 3. -### 5. ⚠️ Rate Limiting +### 3. ⚠️ Rate Limiting OpenRouter may rate limit. If seeing errors, add delays between calls. ## Safe Multi-Agent Coordination Patterns diff --git a/scripts/postinstall.js b/scripts/postinstall.js index f287512..227bc99 100644 --- a/scripts/postinstall.js +++ b/scripts/postinstall.js @@ -24,7 +24,12 @@ if (hasEnvKey) { } else { console.log('⚠ No API key configured yet'); console.log(''); - console.log('Run ./scripts/setup.sh to configure your OpenRouter API key'); + if (process.platform === 'win32') { + console.log('Run: python3 scripts/oauth_setup.py'); + console.log(' or: set OPENROUTER_API_KEY=sk-or-v1-...'); + } else { + console.log('Run ./scripts/setup.sh to configure your OpenRouter API key'); + } console.log('Get a key at: https://openrouter.ai/keys'); } diff --git a/src/bridge/grok_bridge.py b/src/bridge/grok_bridge.py index 23671a3..c0eaaea 100644 --- a/src/bridge/grok_bridge.py +++ b/src/bridge/grok_bridge.py @@ -318,6 +318,77 @@ def detect_high_thinking(prompt): return any(phrase in lower for phrase in HIGH_THINKING_PHRASES) +def _get_client(api_key=None, timeout=120): + """Create an OpenAI client configured for OpenRouter.""" + key = api_key or get_api_key() + if not key: + raise RuntimeError( + "No OpenRouter API key found. " + "Tried: OPENROUTER_API_KEY env, ~/.config/grok-swarm/config.json, " + "~/.claude/grok-swarm.local.md, OpenClaw auth profiles. " + "Run: python3 src/bridge/oauth_setup.py or /grok-swarm:setup" + ) + return OpenAI(base_url=OPENROUTER_BASE, api_key=key, timeout=timeout) + + +def call_grok_with_messages(messages, tools=None, timeout=120, thinking="low", mode="reason"): + """ + Call Grok with a pre-built message list. + + This is the low-level entry point used by sessions (multi-turn) and by + call_grok() for single-turn requests. + + Args: + messages: List of OpenAI-format message dicts (system, user, assistant, tool). + tools: Optional list of OpenAI-format tool definitions. + timeout: API timeout in seconds. + thinking: "low" (4 agents) or "high" (16 agents). + mode: Task mode (for usage logging). + + Returns: + The raw ChatCompletion response object from the OpenAI SDK. + + Raises: + RuntimeError: If no API key is found. + Exception: Propagated from the OpenAI SDK on API errors. + """ + client = _get_client(timeout=timeout) + + kwargs = { + "model": MODEL_ID, + "messages": messages, + "max_tokens": 16384, + "temperature": 0.3, + "extra_body": {"agent_count": AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"])}, + } + + if tools: + kwargs["tools"] = tools + + start = time.time() + response = client.chat.completions.create(**kwargs) + elapsed = time.time() - start + + # Log usage + if hasattr(response, "usage") and response.usage: + u = response.usage + agent_count = AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"]) + print( + f"USAGE: mode={mode} thinking={thinking} agents={agent_count} " + f"prompt={u.prompt_tokens} completion={u.completion_tokens} " + f"total={u.total_tokens} time={elapsed:.1f}s", + file=sys.stderr, + ) + # Record to persistent usage tracker (best-effort) + try: + from usage_tracker import record_usage + record_usage(mode, thinking, u.prompt_tokens, u.completion_tokens, u.total_tokens, elapsed) + except Exception as exc: + print(f"DEBUG: Usage tracking failed: {exc}", file=sys.stderr) + + return response + + def call_grok(prompt, mode="reason", context="", system_override=None, tools=None, timeout=120, thinking="low"): """Make the API call to Grok 4.20 Multi-Agent Beta.""" api_key = get_api_key() @@ -344,56 +415,31 @@ def call_grok(prompt, mode="reason", context="", system_override=None, tools=Non if context: system_content += f"\n\n## Codebase Context\n{context}" - client = OpenAI( - base_url=OPENROUTER_BASE, - api_key=api_key, - timeout=timeout, - ) - messages = [ {"role": "system", "content": system_content}, {"role": "user", "content": prompt}, ] - kwargs = { - "model": MODEL_ID, - "messages": messages, - "max_tokens": 16384, - "temperature": 0.3, - "extra_body": {"agent_count": AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"])}, - } - - if tools: - kwargs["tools"] = tools - - start = time.time() - try: - response = client.chat.completions.create(**kwargs) + response = call_grok_with_messages( + messages=messages, tools=tools, timeout=timeout, thinking=thinking, mode=mode, + ) + except RuntimeError as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) except Exception as e: - elapsed = time.time() - start - print(f"ERROR after {elapsed:.1f}s: {e}", file=sys.stderr) + print(f"ERROR: {e}", file=sys.stderr) sys.exit(1) - elapsed = time.time() - start - if not response.choices: - print(f"ERROR: Empty response after {elapsed:.1f}s", file=sys.stderr) + print("ERROR: Empty response", file=sys.stderr) sys.exit(1) choice = response.choices[0] - # Log usage - if hasattr(response, 'usage') and response.usage: - u = response.usage - agent_count = AGENT_COUNTS.get(thinking, AGENT_COUNTS["low"]) - print(f"USAGE: mode={mode} thinking={thinking} agents={agent_count} " - f"prompt={u.prompt_tokens} completion={u.completion_tokens} " - f"total={u.total_tokens} time={elapsed:.1f}s", file=sys.stderr) - # Handle content filtering if choice.finish_reason == "content_filter": - print(f"WARNING: Response blocked by content filter after {elapsed:.1f}s", file=sys.stderr) + print("WARNING: Response blocked by content filter", file=sys.stderr) if not choice.message.content: print("No content returned. Try rephrasing the prompt.", file=sys.stderr) sys.exit(2) diff --git a/src/bridge/oauth_setup.py b/src/bridge/oauth_setup.py index 1913c34..aa54d43 100644 --- a/src/bridge/oauth_setup.py +++ b/src/bridge/oauth_setup.py @@ -24,6 +24,7 @@ import subprocess import sys import tempfile +import urllib.error import urllib.parse import urllib.request import webbrowser @@ -141,6 +142,14 @@ def log_message(self, *args) -> None: # type: ignore[override] pass # suppress request logs +class _InvalidCodeError(RuntimeError): + """Raised when the authorization code is invalid or expired (HTTP 400). + + This is the only token-exchange error that is safe to retry with a fresh + PKCE pair — all other failures (network, 5xx, bad JSON) are fatal. + """ + + def _exchange_code(code: str, code_verifier: str) -> str: """Exchange auth code for API key via OpenRouter token endpoint.""" payload = json.dumps({"code": code, "code_verifier": code_verifier}).encode() @@ -153,6 +162,13 @@ def _exchange_code(code: str, code_verifier: str) -> str: try: with urllib.request.urlopen(req, timeout=30) as resp: data = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + if exc.code == 400: + raise _InvalidCodeError( + f"Authorization code rejected (HTTP 400). " + f"PKCE codes are single-use and may have expired." + ) from exc + raise RuntimeError(f"Token exchange failed (HTTP {exc.code}): {exc}") from exc except Exception as exc: raise RuntimeError(f"Token exchange failed: {exc}") from exc @@ -197,12 +213,31 @@ def _check_port_available(port: int) -> bool: # Public entry points # --------------------------------------------------------------------------- +def _start_callback_server() -> "HTTPServer | None": + """Bind the callback server to CALLBACK_PORT. Returns server or None on failure.""" + max_retries = 3 + for attempt in range(max_retries): + try: + server = HTTPServer(("localhost", CALLBACK_PORT), _CallbackHandler) + server.timeout = OAUTH_TIMEOUT_SECS + return server + except OSError: + if attempt < max_retries - 1: + __import__("time").sleep(0.5) + return None + + def run_oauth_flow() -> int: """ Run the PKCE OAuth flow. + Retries once with a fresh PKCE pair if the token exchange fails (e.g. the + authorization code was already consumed — PKCE codes are single-use). + Returns 0 on success, 1 on failure. """ + import time + # Clear any stale codes from previous runs _received_code.clear() @@ -210,6 +245,8 @@ def run_oauth_flow() -> int: print( f"ERROR: Port {CALLBACK_PORT} is already in use.\n" f"\n" + f"Note: port {CALLBACK_PORT} is hardcoded by OpenRouter and cannot be changed.\n" + f"\n" f"To fix this, run:\n" f" lsof -i :{CALLBACK_PORT} # find the process\n" f" kill # kill the process using port {CALLBACK_PORT}\n" @@ -222,98 +259,112 @@ def run_oauth_flow() -> int: ) return 1 - code_verifier, code_challenge = _generate_pkce_pair() + max_attempts = 2 + for attempt in range(1, max_attempts + 1): + _received_code.clear() + + code_verifier, code_challenge = _generate_pkce_pair() + auth_params = urllib.parse.urlencode( + { + "callback_url": CALLBACK_URL, + "code_challenge": code_challenge, + "code_challenge_method": "S256", + "app_name": APP_NAME, + } + ) + auth_url = f"{OPENROUTER_AUTH_URL}?{auth_params}" + + if attempt == 1: + print("=" * 60) + print("Grok Swarm — OpenRouter Authorization") + print("=" * 60) + print() + print("Click the link below to authorize Grok Swarm with your") + print("OpenRouter account (or paste it into your browser):") + else: + print() + print(f"Attempt {attempt} of {max_attempts} — new authorization link:") - auth_params = urllib.parse.urlencode( - { - "callback_url": CALLBACK_URL, - "code_challenge": code_challenge, - "code_challenge_method": "S256", - "app_name": APP_NAME, - } - ) - auth_url = f"{OPENROUTER_AUTH_URL}?{auth_params}" - - print("=" * 60) - print("Grok Swarm — OpenRouter Authorization") - print("=" * 60) - print() - print("Click the link below to authorize Grok Swarm with your") - print("OpenRouter account (or paste it into your browser):") - print() - print(f" {auth_url}") - print() - print("Attempting to open your browser...") - _open_browser(auth_url) - print(f"Waiting up to {OAUTH_TIMEOUT_SECS}s for authorization...") - - # Handle bind-time races: another process may have bound between the - # _check_port_available call and this HTTPServer creation. - max_retries = 3 - server = None - for attempt in range(max_retries): - try: - server = HTTPServer(("localhost", CALLBACK_PORT), _CallbackHandler) - server.timeout = OAUTH_TIMEOUT_SECS - break - except OSError as exc: - if attempt < max_retries - 1: - __import__("time").sleep(0.5) - continue - # Final attempt failed + print() + print(f" {auth_url}") + print() + + # Bind the callback server BEFORE opening the browser so we're + # already listening when OpenRouter redirects back. + server = _start_callback_server() + if server is None: print( - f"\nERROR: Failed to bind to port {CALLBACK_PORT} after {max_retries} attempts: {exc}", + f"\nERROR: Failed to bind to port {CALLBACK_PORT}.", file=sys.stderr, ) print("Please try again or set your key manually:", file=sys.stderr) print(" export OPENROUTER_API_KEY=sk-or-v1-...", file=sys.stderr) return 1 - deadline = __import__("time").time() + OAUTH_TIMEOUT_SECS - while not _received_code and __import__("time").time() < deadline: - server.handle_request() + print("Attempting to open your browser...") + _open_browser(auth_url) + print(f"Waiting up to {OAUTH_TIMEOUT_SECS}s for authorization...") - server.server_close() + deadline = time.time() + OAUTH_TIMEOUT_SECS + while not _received_code and time.time() < deadline: + server.handle_request() - if not _received_code: - print("\nERROR: Timed out waiting for authorization.", file=sys.stderr) - print("Please try again or set your key manually:", file=sys.stderr) - print(" export OPENROUTER_API_KEY=sk-or-v1-...", file=sys.stderr) - return 1 + server.server_close() - code = _received_code[0] - print("\nCallback received. Exchanging code for API key...", end=" ", flush=True) + if not _received_code: + print("\nERROR: Timed out waiting for authorization.", file=sys.stderr) + print("Please try again or set your key manually:", file=sys.stderr) + print(" export OPENROUTER_API_KEY=sk-or-v1-...", file=sys.stderr) + return 1 - try: - api_key = _exchange_code(code, code_verifier) - except RuntimeError as exc: - print(f"FAILED\nERROR: {exc}", file=sys.stderr) - return 1 + code = _received_code[0] + print("\nCallback received. Exchanging code for API key...", end=" ", flush=True) - try: - _save_key(api_key) - except OSError as exc: - print(f"FAILED\nERROR: Could not save API key to {CONFIG_FILE}: {exc}", file=sys.stderr) - return 1 + try: + api_key = _exchange_code(code, code_verifier) + except _InvalidCodeError as exc: + # PKCE codes are single-use — retryable with a fresh pair + if attempt < max_attempts: + print( + f"FAILED\n" + f"{exc}\n" + f"Starting fresh attempt {attempt + 1} of {max_attempts}..." + ) + continue + print(f"FAILED\nERROR: {exc}", file=sys.stderr) + return 1 + except RuntimeError as exc: + # Non-retryable errors (network, 5xx, bad JSON) — fail immediately + print(f"FAILED\nERROR: {exc}", file=sys.stderr) + return 1 - # Validate the key works before declaring success - print("OK\nValidating key...", end=" ", flush=True) - try: - req = urllib.request.Request( - "https://openrouter.ai/api/v1/auth/key", - headers={"Authorization": f"Bearer {api_key}"}, - method="GET", - ) - with urllib.request.urlopen(req, timeout=15) as resp: - key_info = json.loads(resp.read().decode()) - label = key_info.get("label", "unknown") - print(f"OK ({label})") - except Exception: - # Key saved but validation failed — still usable, just not confirmed - print("SKIPPED (network error)") + try: + _save_key(api_key) + except OSError as exc: + print(f"FAILED\nERROR: Could not save API key to {CONFIG_FILE}: {exc}", file=sys.stderr) + return 1 - print(f"\nSuccess! API key saved to {CONFIG_FILE}") - return 0 + # Validate the key works before declaring success + print("OK\nValidating key...", end=" ", flush=True) + try: + req = urllib.request.Request( + "https://openrouter.ai/api/v1/auth/key", + headers={"Authorization": f"Bearer {api_key}"}, + method="GET", + ) + with urllib.request.urlopen(req, timeout=15) as resp: + key_info = json.loads(resp.read().decode()) + label = key_info.get("label", "unknown") + print(f"OK ({label})") + except Exception: + # Key saved but validation failed — still usable, just not confirmed + print("SKIPPED (network error)") + + print(f"\nSuccess! API key saved to {CONFIG_FILE}") + return 0 + + # Should not be reached, but be safe + return 1 def run_check() -> int: diff --git a/src/mcp/__init__.py b/src/mcp/__init__.py new file mode 100644 index 0000000..b7ace3c --- /dev/null +++ b/src/mcp/__init__.py @@ -0,0 +1 @@ +# src/mcp — MCP (Model Context Protocol) server for Grok Swarm. diff --git a/src/mcp/__main__.py b/src/mcp/__main__.py new file mode 100644 index 0000000..4c8ad86 --- /dev/null +++ b/src/mcp/__main__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +"""Allow running the MCP server via: python -m mcp""" +from .grok_server import main + +main() diff --git a/src/mcp/grok_server.py b/src/mcp/grok_server.py new file mode 100755 index 0000000..811a615 --- /dev/null +++ b/src/mcp/grok_server.py @@ -0,0 +1,520 @@ +#!/usr/bin/env python3 +""" +grok_server.py — MCP (Model Context Protocol) server for Grok Swarm. + +Exposes Grok 4.20 Multi-Agent as native tools over the MCP stdio transport +(JSON-RPC 2.0, newline-delimited, on stdin/stdout). + +Installation: + claude mcp add grok-swarm -- python3 /path/to/src/mcp/grok_server.py + +Tools: + grok_query — Stateless single call to Grok + grok_session_start — Begin a multi-turn session + grok_session_continue — Continue an existing session + grok_agent — Run the autonomous agent loop + +Requires: Python 3.8+, openai package. +""" + +import json +import logging +import sys +from pathlib import Path + +# Ensure sibling packages are importable +_repo_root = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(_repo_root / "src")) + +from bridge.grok_bridge import ( + call_grok_with_messages, + get_api_key, + read_files, + strip_pgp_blocks, + MODE_PROMPTS, + AGENT_COUNTS, +) + +# Lazy import — only needed if grok_agent tool is called +_agent_module = None + +# Session store — imported lazily to avoid circular deps at module level +_session_module = None + +# --------------------------------------------------------------------------- +# Logging (stderr only — MCP spec reserves stdout for protocol messages) +# --------------------------------------------------------------------------- + +logging.basicConfig( + stream=sys.stderr, + level=logging.INFO, + format="[grok-mcp] %(levelname)s %(message)s", +) +log = logging.getLogger("grok-mcp") + +# --------------------------------------------------------------------------- +# MCP Protocol constants +# --------------------------------------------------------------------------- + +SERVER_NAME = "grok-swarm" +SERVER_VERSION = "1.3.0" +PROTOCOL_VERSION = "2024-11-05" + +# --------------------------------------------------------------------------- +# Tool definitions (JSON Schema format for MCP, NOT OpenAI format) +# --------------------------------------------------------------------------- + +TOOLS = [ + { + "name": "grok_query", + "description": ( + "Send a single stateless query to the Grok 4.20 multi-agent swarm. " + "Use for code analysis, refactoring, generation, or reasoning tasks. " + "Optionally pass file paths for codebase context." + ), + "inputSchema": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "Task instruction or question for Grok.", + }, + "mode": { + "type": "string", + "enum": ["refactor", "analyze", "code", "reason", "orchestrate"], + "default": "reason", + "description": "Task mode. 'orchestrate' requires the 'system' parameter.", + }, + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "File paths to include as codebase context.", + }, + "system": { + "type": "string", + "description": "Override system prompt (required for orchestrate mode).", + }, + "thinking": { + "type": "string", + "enum": ["low", "high"], + "default": "low", + "description": "Thinking level: low = 4 agents, high = 16 agents.", + }, + "timeout": { + "type": "integer", + "default": 120, + "description": "API timeout in seconds.", + }, + }, + "required": ["prompt"], + }, + }, + { + "name": "grok_session_start", + "description": ( + "Start a new multi-turn conversation session with Grok. " + "Returns a session_id that can be used with grok_session_continue " + "to maintain conversation history across multiple calls." + ), + "inputSchema": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["refactor", "analyze", "code", "reason", "orchestrate"], + "default": "reason", + "description": "Task mode for this session.", + }, + "system": { + "type": "string", + "description": "Override system prompt.", + }, + "thinking": { + "type": "string", + "enum": ["low", "high"], + "default": "low", + "description": "Thinking level: low = 4 agents, high = 16 agents.", + }, + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "File paths to include as initial context.", + }, + }, + "required": [], + }, + }, + { + "name": "grok_session_continue", + "description": ( + "Continue an existing multi-turn session with Grok. " + "Grok remembers all previous messages in the session." + ), + "inputSchema": { + "type": "object", + "properties": { + "session_id": { + "type": "string", + "description": "Session ID returned by grok_session_start.", + }, + "message": { + "type": "string", + "description": "Follow-up message or instruction.", + }, + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "Additional file paths to add as context.", + }, + }, + "required": ["session_id", "message"], + }, + }, + { + "name": "grok_agent", + "description": ( + "Run the autonomous Grok agent loop. The agent discovers files, " + "calls Grok, applies changes, and optionally verifies with a command. " + "Iterates up to max_iterations times." + ), + "inputSchema": { + "type": "object", + "properties": { + "task": { + "type": "string", + "description": "Task instruction for the agent.", + }, + "target": { + "type": "string", + "default": ".", + "description": "Target directory or file to operate on.", + }, + "apply": { + "type": "boolean", + "default": False, + "description": "Whether to actually write changes (default: dry-run preview).", + }, + "max_iterations": { + "type": "integer", + "default": 5, + "description": "Maximum number of agent iterations.", + }, + "verify_cmd": { + "type": "string", + "description": "Command to run after each iteration for verification (e.g. 'pytest').", + }, + }, + "required": ["task"], + }, + }, +] + + +# --------------------------------------------------------------------------- +# Tool implementations +# --------------------------------------------------------------------------- + +def _handle_grok_query(params): + """Stateless single call to Grok.""" + prompt = params["prompt"] + mode = params.get("mode", "reason") + files = params.get("files", []) + system = params.get("system") + thinking = params.get("thinking", "low") + timeout = params.get("timeout", 120) + + if mode == "orchestrate" and not system: + return _error_content("orchestrate mode requires the 'system' parameter") + + # Resolve system prompt + if system: + system_content = system + else: + system_content = MODE_PROMPTS.get(mode) + if system_content is None: + return _error_content(f"Mode '{mode}' requires the 'system' parameter") + + # Read file context + if files: + context = read_files(files) + log.info("Read %d files (%d chars context)", len(files), len(context)) + system_content += f"\n\n## Codebase Context\n{context}" + + messages = [ + {"role": "system", "content": system_content}, + {"role": "user", "content": prompt}, + ] + + log.info("grok_query: mode=%s thinking=%s timeout=%d", mode, thinking, timeout) + + response = call_grok_with_messages( + messages=messages, timeout=timeout, thinking=thinking, mode=mode, + ) + + if not response.choices: + return _error_content("Empty response from Grok API") + + content = response.choices[0].message.content or "" + return _text_content(strip_pgp_blocks(content)) + + +def _handle_grok_session_start(params): + """Start a new multi-turn session.""" + session_mod = _get_session_module() + + mode = params.get("mode", "reason") + system = params.get("system") + thinking = params.get("thinking", "low") + files = params.get("files", []) + + # Read initial file context + file_context = "" + if files: + file_context = read_files(files) + + session = session_mod.create_session( + mode=mode, + system_override=system, + thinking=thinking, + initial_file_context=file_context, + ) + + log.info("Session started: %s (mode=%s, thinking=%s)", session.session_id, mode, thinking) + + return _text_content(json.dumps({ + "session_id": session.session_id, + "mode": session.mode, + "thinking": session.thinking, + "message": f"Session {session.session_id} started. Use grok_session_continue to send messages.", + })) + + +def _handle_grok_session_continue(params): + """Continue an existing session.""" + session_mod = _get_session_module() + + session_id = params["session_id"] + message = params["message"] + files = params.get("files", []) + + session = session_mod.get_session(session_id) + if session is None: + return _error_content(f"Session not found: {session_id}") + + # Read additional file context + file_context = "" + if files: + file_context = read_files(files) + + try: + response = session.send_message(message, file_context=file_context) + except Exception as exc: + log.error("Session %s error: %s", session_id, exc) + return _error_content(f"Grok API error: {exc}") + + return _text_content(strip_pgp_blocks(response)) + + +def _handle_grok_agent(params): + """Run the autonomous agent loop.""" + global _agent_module + if _agent_module is None: + sys.path.insert(0, str(_repo_root / "src" / "agent")) + from grok_agent import run_agent_loop, AgentState, AgentStatus, Platform + _agent_module = sys.modules["grok_agent"] + + task = params["task"] + target = params.get("target", ".") + apply_changes = params.get("apply", False) + max_iterations = params.get("max_iterations", 5) + verify_cmd = params.get("verify_cmd") + + state = _agent_module.AgentState( + task=task, + target=target, + platform=_agent_module.Platform.CLAUDE, + apply=apply_changes, + max_iterations=max_iterations, + verify_cmd=verify_cmd, + ) + + log.info("Agent started: task=%r target=%s apply=%s max_iter=%d", + task[:80], target, apply_changes, max_iterations) + + try: + _agent_module.run_agent_loop(state) + except Exception as exc: + log.error("Agent error: %s", exc) + return _error_content(f"Agent failed: {exc}") + + result = { + "status": state.status.value, + "iterations": state.iteration, + "changes": state.changes, + "errors": state.errors, + } + if state.last_response: + result["last_response_preview"] = state.last_response[:500] + + return _text_content(json.dumps(result, indent=2)) + + +# Tool dispatch table +TOOL_HANDLERS = { + "grok_query": _handle_grok_query, + "grok_session_start": _handle_grok_session_start, + "grok_session_continue": _handle_grok_session_continue, + "grok_agent": _handle_grok_agent, +} + + +# --------------------------------------------------------------------------- +# Helper: lazy module loading +# --------------------------------------------------------------------------- + +def _get_session_module(): + global _session_module + if _session_module is None: + import mcp.session as mod + _session_module = mod + return _session_module + + +# --------------------------------------------------------------------------- +# MCP response helpers +# --------------------------------------------------------------------------- + +def _text_content(text): + """Wrap text in MCP tool result content format. Returns (content, is_error).""" + return ([{"type": "text", "text": text}], False) + + +def _error_content(message): + """Wrap error in MCP tool result content format. Returns (content, is_error).""" + return ([{"type": "text", "text": message}], True) + + +# --------------------------------------------------------------------------- +# JSON-RPC 2.0 protocol layer +# --------------------------------------------------------------------------- + +def _jsonrpc_response(req_id, result): + """Build a JSON-RPC 2.0 success response.""" + return {"jsonrpc": "2.0", "id": req_id, "result": result} + + +def _jsonrpc_error(req_id, code, message): + """Build a JSON-RPC 2.0 error response.""" + return {"jsonrpc": "2.0", "id": req_id, "error": {"code": code, "message": message}} + + +def _handle_initialize(params): + """Respond to MCP initialize handshake.""" + return { + "protocolVersion": PROTOCOL_VERSION, + "capabilities": {"tools": {}}, + "serverInfo": { + "name": SERVER_NAME, + "version": SERVER_VERSION, + }, + } + + +def _handle_tools_list(_params): + """Return all registered tool definitions.""" + return {"tools": TOOLS} + + +def _handle_tools_call(params): + """Dispatch a tool call to the appropriate handler.""" + tool_name = params.get("name") + tool_args = params.get("arguments", {}) + + handler = TOOL_HANDLERS.get(tool_name) + if handler is None: + content, _ = _error_content(f"Unknown tool: {tool_name}") + return {"content": content, "isError": True} + + try: + content, is_error = handler(tool_args) + result = {"content": content} + if is_error: + result["isError"] = True + return result + except Exception as exc: + log.exception("Tool %s failed", tool_name) + content, _ = _error_content(f"Internal error in {tool_name}: {exc}") + return {"content": content, "isError": True} + + +# Method dispatch table +METHOD_HANDLERS = { + "initialize": _handle_initialize, + "tools/list": _handle_tools_list, + "tools/call": _handle_tools_call, +} + +# Notification methods (no response expected) +NOTIFICATION_METHODS = { + "notifications/initialized", + "notifications/cancelled", +} + + +def _process_message(raw_line): + """Parse one JSON-RPC message and return the response dict, or None for notifications.""" + try: + msg = json.loads(raw_line) + except json.JSONDecodeError as exc: + return _jsonrpc_error(None, -32700, f"Parse error: {exc}") + + method = msg.get("method") + req_id = msg.get("id") + params = msg.get("params", {}) + + # Notifications — no response + if method in NOTIFICATION_METHODS: + log.debug("Notification: %s", method) + return None + + # Unknown method + handler = METHOD_HANDLERS.get(method) + if handler is None: + log.warning("Unknown method: %s", method) + return _jsonrpc_error(req_id, -32601, f"Method not found: {method}") + + try: + result = handler(params) + return _jsonrpc_response(req_id, result) + except Exception as exc: + log.exception("Method %s failed", method) + return _jsonrpc_error(req_id, -32603, f"Internal error: {exc}") + + +# --------------------------------------------------------------------------- +# Main loop +# --------------------------------------------------------------------------- + +def main(): + """Run the MCP server on stdin/stdout.""" + log.info("Grok Swarm MCP server starting (protocol %s)", PROTOCOL_VERSION) + + # Validate API key early + if not get_api_key(): + log.warning("No API key configured — tool calls will fail until key is set") + + for raw_line in sys.stdin: + raw_line = raw_line.strip() + if not raw_line: + continue + + response = _process_message(raw_line) + if response is not None: + sys.stdout.write(json.dumps(response) + "\n") + sys.stdout.flush() + + log.info("stdin closed, shutting down") + + +if __name__ == "__main__": + main() diff --git a/src/mcp/session.py b/src/mcp/session.py new file mode 100644 index 0000000..db26cc1 --- /dev/null +++ b/src/mcp/session.py @@ -0,0 +1,257 @@ +""" +session.py — Multi-turn session management for Grok Swarm MCP server. + +Each GrokSession maintains a conversation history in OpenAI message format +and calls grok_bridge.call_grok_with_messages() for API requests. + +Sessions are stored in-memory (scoped to the MCP server process lifetime). +""" + +import logging +import secrets +import sys +import time +from pathlib import Path +from typing import Dict, List, Optional + +# Ensure bridge is importable +_repo_root = Path(__file__).resolve().parent.parent.parent +sys.path.insert(0, str(_repo_root / "src")) + +from bridge.grok_bridge import call_grok_with_messages, MODE_PROMPTS + +log = logging.getLogger("grok-mcp.session") + +# --------------------------------------------------------------------------- +# Session configuration +# --------------------------------------------------------------------------- + +SESSION_TTL_SECS = 1800 # 30 minutes +MAX_SESSIONS = 10 +MAX_TOKEN_BUDGET = 500_000 + +# --------------------------------------------------------------------------- +# Session store +# --------------------------------------------------------------------------- + +_sessions: Dict[str, "GrokSession"] = {} + + +class GrokSession: + """A stateful multi-turn conversation with Grok.""" + + __slots__ = ( + "session_id", + "mode", + "thinking", + "system_prompt", + "messages", + "created_at", + "last_active", + "total_tokens", + "max_tokens", + ) + + def __init__( + self, + session_id: str, + mode: str = "reason", + thinking: str = "low", + system_prompt: str = "", + ): + self.session_id = session_id + self.mode = mode + self.thinking = thinking + self.system_prompt = system_prompt + # messages does NOT include the system message — that's prepended at call time + self.messages: List[dict] = [] + self.created_at = time.time() + self.last_active = time.time() + self.total_tokens = 0 + self.max_tokens = MAX_TOKEN_BUDGET + + def send_message(self, user_content: str, file_context: str = "") -> str: + """ + Send a user message and get Grok's response. + + Args: + user_content: The user's message text. + file_context: Optional file content to prepend to the message. + + Returns: + Grok's response text. + + Raises: + RuntimeError: If the session's token budget is exhausted. + Exception: Propagated from the API on errors. + """ + self.last_active = time.time() + + if self.total_tokens >= self.max_tokens: + raise RuntimeError( + f"Session {self.session_id} token budget exhausted " + f"({self.total_tokens:,} / {self.max_tokens:,}). " + "Start a new session." + ) + + # Build user message content + content = user_content + if file_context: + content = f"## Additional File Context\n{file_context}\n\n{user_content}" + + self.messages.append({"role": "user", "content": content}) + + # Build full message list for API + api_messages = [{"role": "system", "content": self.system_prompt}] + + # Inject budget warning early (right after system prompt) for visibility + if self.total_tokens > self.max_tokens * 0.8: + api_messages.append({ + "role": "system", + "content": ( + "NOTE: Token budget for this session is nearly exhausted. " + "Please wrap up your response concisely." + ), + }) + + api_messages.extend(self.messages) + + response = call_grok_with_messages( + messages=api_messages, + timeout=120, + thinking=self.thinking, + mode=self.mode, + ) + + # Track cumulative API token usage (for cost tracking). + # Each turn's total_tokens includes re-sent history, so this reflects + # actual API billing, not unique conversation size. + if hasattr(response, "usage") and response.usage: + self.total_tokens += response.usage.total_tokens + + # Extract assistant content + if not response.choices: + raise RuntimeError("Empty response from Grok API") + + assistant_content = response.choices[0].message.content or "" + self.messages.append({"role": "assistant", "content": assistant_content}) + + log.info( + "Session %s: turn %d, tokens=%d/%d", + self.session_id, + len(self.messages) // 2, + self.total_tokens, + self.max_tokens, + ) + + return assistant_content + + def to_dict(self) -> dict: + """Serialize session for optional persistence.""" + return { + "session_id": self.session_id, + "mode": self.mode, + "thinking": self.thinking, + "system_prompt": self.system_prompt, + "messages": self.messages, + "created_at": self.created_at, + "last_active": self.last_active, + "total_tokens": self.total_tokens, + "max_tokens": self.max_tokens, + } + + @classmethod + def from_dict(cls, data: dict) -> "GrokSession": + """Deserialize session from dict.""" + session = cls( + session_id=data["session_id"], + mode=data.get("mode", "reason"), + thinking=data.get("thinking", "low"), + system_prompt=data.get("system_prompt", ""), + ) + session.messages = data.get("messages", []) + session.created_at = data.get("created_at", time.time()) + session.last_active = data.get("last_active", time.time()) + session.total_tokens = data.get("total_tokens", 0) + session.max_tokens = data.get("max_tokens", MAX_TOKEN_BUDGET) + return session + + +# --------------------------------------------------------------------------- +# Session store management +# --------------------------------------------------------------------------- + +def _evict_expired(): + """Remove sessions that have exceeded their TTL.""" + now = time.time() + expired = [ + sid for sid, s in _sessions.items() + if now - s.last_active > SESSION_TTL_SECS + ] + for sid in expired: + log.info("Evicting expired session: %s", sid) + del _sessions[sid] + + +def create_session( + mode: str = "reason", + system_override: Optional[str] = None, + thinking: str = "low", + initial_file_context: str = "", +) -> GrokSession: + """Create and register a new session.""" + _evict_expired() + + # Enforce max sessions + if len(_sessions) >= MAX_SESSIONS: + # Evict the oldest session + oldest_id = min(_sessions, key=lambda sid: _sessions[sid].last_active) + log.info("Evicting oldest session (max reached): %s", oldest_id) + del _sessions[oldest_id] + + session_id = secrets.token_hex(8) + + # Build system prompt + if system_override: + system_prompt = system_override + else: + system_prompt = MODE_PROMPTS.get(mode, MODE_PROMPTS["reason"]) + if system_prompt is None: + system_prompt = "" + + if initial_file_context: + system_prompt += f"\n\n## Codebase Context\n{initial_file_context}" + + session = GrokSession( + session_id=session_id, + mode=mode, + thinking=thinking, + system_prompt=system_prompt, + ) + _sessions[session_id] = session + return session + + +def get_session(session_id: str) -> Optional[GrokSession]: + """Look up a session by ID, evicting expired ones first.""" + _evict_expired() + session = _sessions.get(session_id) + if session is not None: + session.last_active = time.time() + return session + + +def list_sessions() -> List[dict]: + """Return summary info for all active sessions.""" + _evict_expired() + return [ + { + "session_id": s.session_id, + "mode": s.mode, + "thinking": s.thinking, + "turns": len(s.messages) // 2, + "total_tokens": s.total_tokens, + "last_active": s.last_active, + } + for s in _sessions.values() + ]