diff --git a/package-lock.json b/package-lock.json index c9dee59..882631e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "packs/*" ], "devDependencies": { + "@types/marked": "^5.0.2", "@types/node": "^20.0.0", "tsx": "^4.7.0", "typescript": "^5.3.0", @@ -1242,6 +1243,13 @@ "@types/node": "*" } }, + "node_modules/@types/marked": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@types/marked/-/marked-5.0.2.tgz", + "integrity": "sha512-OucS4KMHhFzhz27KxmWg7J+kIYqyqoW5kdIEI319hqARQQUTqhao3M/F+uFnDXD0Rg72iDDZxZNxq5gvctmLlg==", + "dev": true, + "license": "MIT" + }, "node_modules/@types/micromatch": { "version": "4.0.10", "license": "MIT", @@ -5756,7 +5764,7 @@ }, "packages/autonav": { "name": "@autonav/core", - "version": "1.6.0", + "version": "1.7.0", "license": "Apache-2.0", "dependencies": { "@anthropic-ai/claude-agent-sdk": "^0.1.55", @@ -5803,7 +5811,7 @@ }, "packages/communication-layer": { "name": "@autonav/communication-layer", - "version": "0.1.5", + "version": "0.2.0", "license": "Apache-2.0", "dependencies": { "zod": "^3.25.76" diff --git a/package.json b/package.json index fdde2c4..265674e 100644 --- a/package.json +++ b/package.json @@ -24,9 +24,10 @@ "author": "Terra", "license": "TBD", "devDependencies": { + "@types/marked": "^5.0.2", "@types/node": "^20.0.0", - "typescript": "^5.3.0", "tsx": "^4.7.0", + "typescript": "^5.3.0", "vitest": "^1.2.0" }, "engines": { diff --git a/packages/autonav/src/harness/claude-code-harness.ts b/packages/autonav/src/harness/claude-code-harness.ts index 65df9e5..c1886b9 100644 --- a/packages/autonav/src/harness/claude-code-harness.ts +++ b/packages/autonav/src/harness/claude-code-harness.ts @@ -116,6 +116,7 @@ function configToSdkOptions(config: AgentConfig): Record { if (config.additionalDirectories) options.additionalDirectories = config.additionalDirectories; if (config.maxTurns !== undefined) options.maxTurns = config.maxTurns; if (config.maxBudgetUsd !== undefined) options.maxBudgetUsd = config.maxBudgetUsd; + if (config.allowedTools) options.allowedTools = config.allowedTools; if (config.disallowedTools) options.disallowedTools = config.disallowedTools; if (config.mcpServers) options.mcpServers = config.mcpServers; if (config.permissionMode) options.permissionMode = config.permissionMode; diff --git a/packages/autonav/src/harness/types.ts b/packages/autonav/src/harness/types.ts index 9d7b32e..acbb6f8 100644 --- a/packages/autonav/src/harness/types.ts +++ b/packages/autonav/src/harness/types.ts @@ -62,6 +62,9 @@ export interface AgentConfig { /** Maximum budget in USD */ maxBudgetUsd?: number; + /** Tools the agent is explicitly allowed to use without permission prompts */ + allowedTools?: string[]; + /** Tools the agent is disallowed from using */ disallowedTools?: string[]; diff --git a/packages/autonav/src/standup/loop.ts b/packages/autonav/src/standup/loop.ts index c45896e..86e7cb1 100644 --- a/packages/autonav/src/standup/loop.ts +++ b/packages/autonav/src/standup/loop.ts @@ -162,6 +162,7 @@ async function runReportPhase( cwd: nav.directory, additionalDirectories: [...nav.workingDirectories, standupDir], permissionMode: "acceptEdits" as const, + allowedTools: ["mcp__autonav-standup-report__submit_status_report"], mcpServers: { "autonav-standup-report": server, }, @@ -187,10 +188,16 @@ async function runReportPhase( const session = harness.run(agentConfig, prompt); let resultEvent: (AgentEvent & { type: "result" }) | undefined; + let submitToolCallCount = 0; + let submitToolErrorCount = 0; + let lastToolError = ""; for await (const event of session) { if (event.type === "tool_use") { const shortToolName = event.name.split("__").pop() || event.name; + if (shortToolName === "submit_status_report") { + submitToolCallCount++; + } if (verbose) { console.log(`[Report:${nav.name}] Tool: ${shortToolName}`); } @@ -198,7 +205,13 @@ async function runReportPhase( } if (event.type === "tool_result") { if (event.isError) { - debug(`[Report:${nav.name}] Tool ERROR result:`, event.content.substring(0, 500)); + submitToolErrorCount++; + lastToolError = event.content.substring(0, 500); + // Always log tool errors — these reveal why tools fail + if (verbose) { + console.log(`[Report:${nav.name}] Tool ERROR: ${lastToolError}`); + } + debug(`[Report:${nav.name}] Tool ERROR result:`, lastToolError); } else if (DEBUG) { debug(`[Report:${nav.name}] Tool result:`, event.content.substring(0, 300)); } @@ -232,8 +245,11 @@ async function runReportPhase( const report = protocol.getCapturedReport(); if (!report) { + const diagnostic = submitToolCallCount > 0 + ? `Tool was called ${submitToolCallCount} time(s) with ${submitToolErrorCount} error(s).${lastToolError ? ` Last error: ${lastToolError}` : ""}` + : "Tool was never called by the navigator."; throw new Error( - `${nav.name} did not submit a status report. Navigator must use the submit_status_report tool.` + `${nav.name} did not submit a status report. ${diagnostic}` ); } @@ -296,6 +312,7 @@ async function runSyncPhase( cwd: nav.directory, additionalDirectories: [...nav.workingDirectories, standupDir], permissionMode: "acceptEdits" as const, + allowedTools: ["mcp__autonav-standup-sync__submit_sync_response"], mcpServers: { "autonav-standup-sync": server, }, diff --git a/packages/autonav/src/standup/standup-protocol.ts b/packages/autonav/src/standup/standup-protocol.ts index 6f01765..4451aa4 100644 --- a/packages/autonav/src/standup/standup-protocol.ts +++ b/packages/autonav/src/standup/standup-protocol.ts @@ -14,6 +14,19 @@ import { type SyncResponse, } from "./types.js"; +/** + * Always-on diagnostic logging for tool handler failures. + * These go to stderr so they're visible even without DEBUG=1. + */ +function logHandlerError(toolName: string, error: unknown, args: unknown): void { + console.error(`[autonav:${toolName}] Handler error: ${error instanceof Error ? error.message : String(error)}`); + if (error instanceof Error && "errors" in error) { + // Zod validation errors have an .errors array + console.error(`[autonav:${toolName}] Validation details:`, JSON.stringify((error as any).errors, null, 2)); + } + console.error(`[autonav:${toolName}] Args keys: ${args && typeof args === "object" ? Object.keys(args as object).join(", ") : "N/A"}`); +} + /** * Tool names */ @@ -92,13 +105,41 @@ Other navigators will read your report in the sync phase to identify blockers th console.error(`[debug] Raw args keys:`, Object.keys(args)); } try { + // The MCP server already validated against the tool's inputSchema. + // Use safeParse for the StatusReport schema to get actionable errors + // instead of throwing (which the MCP server swallows as isError: true). const cleaned = stripNulls(args); - const report = StatusReportSchema.parse(cleaned); - capturedReport = report; + const result = StatusReportSchema.safeParse(cleaned); + + if (!result.success) { + // Always log validation failures — these are otherwise invisible + logHandlerError(SUBMIT_STATUS_REPORT_TOOL, result.error, args); + + // Capture what we can despite validation failure: the MCP server's + // inputSchema already validated the shape, so the data is likely usable. + // Fall back to the raw args with nulls stripped. + capturedReport = cleaned as StatusReport; + + return { + content: [ + { + type: "text" as const, + text: JSON.stringify({ + success: true, + message: `Status report submitted (with validation warnings).`, + report: cleaned, + }), + }, + ], + isError: false, + }; + } + + capturedReport = result.data; const blockerSummary = - report.blockers.length > 0 - ? `${report.blockers.length} blocker(s) reported.` + result.data.blockers.length > 0 + ? `${result.data.blockers.length} blocker(s) reported.` : "No blockers."; return { @@ -108,17 +149,15 @@ Other navigators will read your report in the sync phase to identify blockers th text: JSON.stringify({ success: true, message: `Status report submitted. ${blockerSummary}`, - report, + report: result.data, }), }, ], isError: false, }; } catch (error) { - if (DEBUG) { - console.error(`[debug] submit_status_report handler error:`, error); - console.error(`[debug] Raw args:`, JSON.stringify(args, null, 2)); - } + // Always log unexpected handler errors + logHandlerError(SUBMIT_STATUS_REPORT_TOOL, error, args); throw error; } } @@ -184,8 +223,28 @@ Prioritize resolving blockers where \`needsFrom\` matches your name, then those async (args) => { try { const cleaned = stripNulls(args); - const sync = SyncResponseSchema.parse(cleaned); - capturedSync = sync; + const result = SyncResponseSchema.safeParse(cleaned); + + if (!result.success) { + logHandlerError(SUBMIT_SYNC_RESPONSE_TOOL, result.error, args); + capturedSync = cleaned as SyncResponse; + + return { + content: [ + { + type: "text" as const, + text: JSON.stringify({ + success: true, + message: `Sync response submitted (with validation warnings).`, + sync: cleaned, + }), + }, + ], + isError: false, + }; + } + + capturedSync = result.data; return { content: [ @@ -193,18 +252,15 @@ Prioritize resolving blockers where \`needsFrom\` matches your name, then those type: "text" as const, text: JSON.stringify({ success: true, - message: `Sync response submitted with ${sync.blockerResolutions.length} blocker resolution(s).`, - sync, + message: `Sync response submitted with ${result.data.blockerResolutions.length} blocker resolution(s).`, + sync: result.data, }), }, ], isError: false, }; } catch (error) { - if (DEBUG) { - console.error(`[debug] submit_sync_response handler error:`, error); - console.error(`[debug] Raw args:`, JSON.stringify(args, null, 2)); - } + logHandlerError(SUBMIT_SYNC_RESPONSE_TOOL, error, args); throw error; } }