From 50de9299b563d74d9591265b2407614062b3c2ab Mon Sep 17 00:00:00 2001 From: terra tauri Date: Sat, 21 Feb 2026 06:50:57 -0800 Subject: [PATCH] fix: Allow standup MCP tools via allowedTools instead of bypassPermissions The standup report and sync phases used permissionMode "acceptEdits" which blocks MCP tool calls that haven't been explicitly granted. This caused submit_status_report and submit_sync_response to fail with permission errors, making navigators unable to submit their reports. Fix: Add allowedTools to whitelist the specific MCP protocol tools while keeping acceptEdits as the base permission policy. Also adds allowedTools support to the harness AgentConfig type. Includes improved diagnostics: tool call/error counters, always-on handler error logging, and safeParse fallback for resilient report capture. --- package-lock.json | 12 ++- package.json | 3 +- .../src/harness/claude-code-harness.ts | 1 + packages/autonav/src/harness/types.ts | 3 + packages/autonav/src/standup/loop.ts | 21 ++++- .../autonav/src/standup/standup-protocol.ts | 90 +++++++++++++++---- 6 files changed, 108 insertions(+), 22 deletions(-) diff --git a/package-lock.json b/package-lock.json index c9dee59..882631e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "packs/*" ], "devDependencies": { + "@types/marked": "^5.0.2", "@types/node": "^20.0.0", "tsx": "^4.7.0", "typescript": "^5.3.0", @@ -1242,6 +1243,13 @@ "@types/node": "*" } }, + "node_modules/@types/marked": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@types/marked/-/marked-5.0.2.tgz", + "integrity": "sha512-OucS4KMHhFzhz27KxmWg7J+kIYqyqoW5kdIEI319hqARQQUTqhao3M/F+uFnDXD0Rg72iDDZxZNxq5gvctmLlg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/micromatch": { "version": "4.0.10", "license": "MIT", @@ -5756,7 +5764,7 @@ }, "packages/autonav": { "name": "@autonav/core", - "version": "1.6.0", + "version": "1.7.0", "license": "Apache-2.0", "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.1.55", @@ -5803,7 +5811,7 @@ }, "packages/communication-layer": { "name": "@autonav/communication-layer", - "version": "0.1.5", + "version": "0.2.0", "license": "Apache-2.0", "dependencies": { "zod": "^3.25.76" diff --git a/package.json b/package.json index fdde2c4..265674e 100644 --- a/package.json +++ b/package.json @@ -24,9 +24,10 @@ "author": "Terra", "license": "TBD", "devDependencies": { + "@types/marked": "^5.0.2", "@types/node": "^20.0.0", - "typescript": "^5.3.0", "tsx": "^4.7.0", + "typescript": "^5.3.0", "vitest": "^1.2.0" }, "engines": { diff --git a/packages/autonav/src/harness/claude-code-harness.ts b/packages/autonav/src/harness/claude-code-harness.ts index 65df9e5..c1886b9 100644 --- a/packages/autonav/src/harness/claude-code-harness.ts +++ b/packages/autonav/src/harness/claude-code-harness.ts @@ -116,6 +116,7 @@ function configToSdkOptions(config: AgentConfig): Record { if (config.additionalDirectories) options.additionalDirectories = config.additionalDirectories; if (config.maxTurns !== undefined) options.maxTurns = config.maxTurns; if (config.maxBudgetUsd !== undefined) options.maxBudgetUsd = config.maxBudgetUsd; + if (config.allowedTools) options.allowedTools = config.allowedTools; if (config.disallowedTools) options.disallowedTools = config.disallowedTools; if (config.mcpServers) options.mcpServers = config.mcpServers; if (config.permissionMode) options.permissionMode = config.permissionMode; diff --git a/packages/autonav/src/harness/types.ts b/packages/autonav/src/harness/types.ts index 9d7b32e..acbb6f8 100644 --- a/packages/autonav/src/harness/types.ts +++ b/packages/autonav/src/harness/types.ts @@ -62,6 +62,9 @@ export interface AgentConfig { /** Maximum budget in USD */ maxBudgetUsd?: number; + /** Tools the agent is explicitly allowed to use without permission prompts */ + allowedTools?: string[]; + /** Tools the agent is disallowed from using */ disallowedTools?: string[]; diff --git a/packages/autonav/src/standup/loop.ts b/packages/autonav/src/standup/loop.ts index c45896e..86e7cb1 100644 --- a/packages/autonav/src/standup/loop.ts +++ b/packages/autonav/src/standup/loop.ts @@ -162,6 +162,7 @@ async function runReportPhase( cwd: nav.directory, additionalDirectories: [...nav.workingDirectories, standupDir], permissionMode: "acceptEdits" as const, + allowedTools: ["mcp__autonav-standup-report__submit_status_report"], mcpServers: { "autonav-standup-report": server, }, @@ -187,10 +188,16 @@ async function runReportPhase( const session = harness.run(agentConfig, prompt); let resultEvent: (AgentEvent & { type: "result" }) | undefined; + let submitToolCallCount = 0; + let submitToolErrorCount = 0; + let lastToolError = ""; for await (const event of session) { if (event.type === "tool_use") { const shortToolName = event.name.split("__").pop() || event.name; + if (shortToolName === "submit_status_report") { + submitToolCallCount++; + } if (verbose) { console.log(`[Report:${nav.name}] Tool: ${shortToolName}`); } @@ -198,7 +205,13 @@ async function runReportPhase( } if (event.type === "tool_result") { if (event.isError) { - debug(`[Report:${nav.name}] Tool ERROR result:`, event.content.substring(0, 500)); + submitToolErrorCount++; + lastToolError = event.content.substring(0, 500); + // Always log tool errors — these reveal why tools fail + if (verbose) { + console.log(`[Report:${nav.name}] Tool ERROR: ${lastToolError}`); + } + debug(`[Report:${nav.name}] Tool ERROR result:`, lastToolError); } else if (DEBUG) { debug(`[Report:${nav.name}] Tool result:`, event.content.substring(0, 300)); } @@ -232,8 +245,11 @@ async function runReportPhase( const report = protocol.getCapturedReport(); if (!report) { + const diagnostic = submitToolCallCount > 0 + ? `Tool was called ${submitToolCallCount} time(s) with ${submitToolErrorCount} error(s).${lastToolError ? ` Last error: ${lastToolError}` : ""}` + : "Tool was never called by the navigator."; throw new Error( - `${nav.name} did not submit a status report. Navigator must use the submit_status_report tool.` + `${nav.name} did not submit a status report. ${diagnostic}` ); } @@ -296,6 +312,7 @@ async function runSyncPhase( cwd: nav.directory, additionalDirectories: [...nav.workingDirectories, standupDir], permissionMode: "acceptEdits" as const, + allowedTools: ["mcp__autonav-standup-sync__submit_sync_response"], mcpServers: { "autonav-standup-sync": server, }, diff --git a/packages/autonav/src/standup/standup-protocol.ts b/packages/autonav/src/standup/standup-protocol.ts index 6f01765..4451aa4 100644 --- a/packages/autonav/src/standup/standup-protocol.ts +++ b/packages/autonav/src/standup/standup-protocol.ts @@ -14,6 +14,19 @@ import { type SyncResponse, } from "./types.js"; +/** + * Always-on diagnostic logging for tool handler failures. + * These go to stderr so they're visible even without DEBUG=1. + */ +function logHandlerError(toolName: string, error: unknown, args: unknown): void { + console.error(`[autonav:${toolName}] Handler error: ${error instanceof Error ? error.message : String(error)}`); + if (error instanceof Error && "errors" in error) { + // Zod validation errors have an .errors array + console.error(`[autonav:${toolName}] Validation details:`, JSON.stringify((error as any).errors, null, 2)); + } + console.error(`[autonav:${toolName}] Args keys: ${args && typeof args === "object" ? Object.keys(args as object).join(", ") : "N/A"}`); +} + /** * Tool names */ @@ -92,13 +105,41 @@ Other navigators will read your report in the sync phase to identify blockers th console.error(`[debug] Raw args keys:`, Object.keys(args)); } try { + // The MCP server already validated against the tool's inputSchema. + // Use safeParse for the StatusReport schema to get actionable errors + // instead of throwing (which the MCP server swallows as isError: true). const cleaned = stripNulls(args); - const report = StatusReportSchema.parse(cleaned); - capturedReport = report; + const result = StatusReportSchema.safeParse(cleaned); + + if (!result.success) { + // Always log validation failures — these are otherwise invisible + logHandlerError(SUBMIT_STATUS_REPORT_TOOL, result.error, args); + + // Capture what we can despite validation failure: the MCP server's + // inputSchema already validated the shape, so the data is likely usable. + // Fall back to the raw args with nulls stripped. + capturedReport = cleaned as StatusReport; + + return { + content: [ + { + type: "text" as const, + text: JSON.stringify({ + success: true, + message: `Status report submitted (with validation warnings).`, + report: cleaned, + }), + }, + ], + isError: false, + }; + } + + capturedReport = result.data; const blockerSummary = - report.blockers.length > 0 - ? `${report.blockers.length} blocker(s) reported.` + result.data.blockers.length > 0 + ? `${result.data.blockers.length} blocker(s) reported.` : "No blockers."; return { @@ -108,17 +149,15 @@ Other navigators will read your report in the sync phase to identify blockers th text: JSON.stringify({ success: true, message: `Status report submitted. ${blockerSummary}`, - report, + report: result.data, }), }, ], isError: false, }; } catch (error) { - if (DEBUG) { - console.error(`[debug] submit_status_report handler error:`, error); - console.error(`[debug] Raw args:`, JSON.stringify(args, null, 2)); - } + // Always log unexpected handler errors + logHandlerError(SUBMIT_STATUS_REPORT_TOOL, error, args); throw error; } } @@ -184,8 +223,28 @@ Prioritize resolving blockers where \`needsFrom\` matches your name, then those async (args) => { try { const cleaned = stripNulls(args); - const sync = SyncResponseSchema.parse(cleaned); - capturedSync = sync; + const result = SyncResponseSchema.safeParse(cleaned); + + if (!result.success) { + logHandlerError(SUBMIT_SYNC_RESPONSE_TOOL, result.error, args); + capturedSync = cleaned as SyncResponse; + + return { + content: [ + { + type: "text" as const, + text: JSON.stringify({ + success: true, + message: `Sync response submitted (with validation warnings).`, + sync: cleaned, + }), + }, + ], + isError: false, + }; + } + + capturedSync = result.data; return { content: [ @@ -193,18 +252,15 @@ Prioritize resolving blockers where \`needsFrom\` matches your name, then those type: "text" as const, text: JSON.stringify({ success: true, - message: `Sync response submitted with ${sync.blockerResolutions.length} blocker resolution(s).`, - sync, + message: `Sync response submitted with ${result.data.blockerResolutions.length} blocker resolution(s).`, + sync: result.data, }), }, ], isError: false, }; } catch (error) { - if (DEBUG) { - console.error(`[debug] submit_sync_response handler error:`, error); - console.error(`[debug] Raw args:`, JSON.stringify(args, null, 2)); - } + logHandlerError(SUBMIT_SYNC_RESPONSE_TOOL, error, args); throw error; } }