Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
"author": "Terra",
"license": "TBD",
"devDependencies": {
"@types/marked": "^5.0.2",
"@types/node": "^20.0.0",
"typescript": "^5.3.0",
"tsx": "^4.7.0",
"typescript": "^5.3.0",
"vitest": "^1.2.0"
},
"engines": {
Expand Down
1 change: 1 addition & 0 deletions packages/autonav/src/harness/claude-code-harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ function configToSdkOptions(config: AgentConfig): Record<string, unknown> {
if (config.additionalDirectories) options.additionalDirectories = config.additionalDirectories;
if (config.maxTurns !== undefined) options.maxTurns = config.maxTurns;
if (config.maxBudgetUsd !== undefined) options.maxBudgetUsd = config.maxBudgetUsd;
if (config.allowedTools) options.allowedTools = config.allowedTools;
if (config.disallowedTools) options.disallowedTools = config.disallowedTools;
if (config.mcpServers) options.mcpServers = config.mcpServers;
if (config.permissionMode) options.permissionMode = config.permissionMode;
Expand Down
3 changes: 3 additions & 0 deletions packages/autonav/src/harness/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ export interface AgentConfig {
/** Maximum budget in USD */
maxBudgetUsd?: number;

/** Tools the agent is explicitly allowed to use without permission prompts */
allowedTools?: string[];

/** Tools the agent is disallowed from using */
disallowedTools?: string[];

Expand Down
21 changes: 19 additions & 2 deletions packages/autonav/src/standup/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ async function runReportPhase(
cwd: nav.directory,
additionalDirectories: [...nav.workingDirectories, standupDir],
permissionMode: "acceptEdits" as const,
allowedTools: ["mcp__autonav-standup-report__submit_status_report"],
mcpServers: {
"autonav-standup-report": server,
},
Expand All @@ -187,18 +188,30 @@ async function runReportPhase(
const session = harness.run(agentConfig, prompt);

let resultEvent: (AgentEvent & { type: "result" }) | undefined;
let submitToolCallCount = 0;
let submitToolErrorCount = 0;
let lastToolError = "";

for await (const event of session) {
if (event.type === "tool_use") {
const shortToolName = event.name.split("__").pop() || event.name;
if (shortToolName === "submit_status_report") {
submitToolCallCount++;
}
if (verbose) {
console.log(`[Report:${nav.name}] Tool: ${shortToolName}`);
}
debug(`[Report:${nav.name}] Full tool name: ${event.name}`);
}
if (event.type === "tool_result") {
if (event.isError) {
debug(`[Report:${nav.name}] Tool ERROR result:`, event.content.substring(0, 500));
submitToolErrorCount++;
lastToolError = event.content.substring(0, 500);
// Always log tool errors — these reveal why tools fail
if (verbose) {
console.log(`[Report:${nav.name}] Tool ERROR: ${lastToolError}`);
}
debug(`[Report:${nav.name}] Tool ERROR result:`, lastToolError);
} else if (DEBUG) {
debug(`[Report:${nav.name}] Tool result:`, event.content.substring(0, 300));
}
Expand Down Expand Up @@ -232,8 +245,11 @@ async function runReportPhase(

const report = protocol.getCapturedReport();
if (!report) {
const diagnostic = submitToolCallCount > 0
? `Tool was called ${submitToolCallCount} time(s) with ${submitToolErrorCount} error(s).${lastToolError ? ` Last error: ${lastToolError}` : ""}`
: "Tool was never called by the navigator.";
throw new Error(
`${nav.name} did not submit a status report. Navigator must use the submit_status_report tool.`
`${nav.name} did not submit a status report. ${diagnostic}`
);
}

Expand Down Expand Up @@ -296,6 +312,7 @@ async function runSyncPhase(
cwd: nav.directory,
additionalDirectories: [...nav.workingDirectories, standupDir],
permissionMode: "acceptEdits" as const,
allowedTools: ["mcp__autonav-standup-sync__submit_sync_response"],
mcpServers: {
"autonav-standup-sync": server,
},
Expand Down
90 changes: 73 additions & 17 deletions packages/autonav/src/standup/standup-protocol.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ import {
type SyncResponse,
} from "./types.js";

/**
* Always-on diagnostic logging for tool handler failures.
* These go to stderr so they're visible even without DEBUG=1.
*/
function logHandlerError(toolName: string, error: unknown, args: unknown): void {
console.error(`[autonav:${toolName}] Handler error: ${error instanceof Error ? error.message : String(error)}`);
if (error instanceof Error && "errors" in error) {
// Zod validation errors have an .errors array
console.error(`[autonav:${toolName}] Validation details:`, JSON.stringify((error as any).errors, null, 2));
}
console.error(`[autonav:${toolName}] Args keys: ${args && typeof args === "object" ? Object.keys(args as object).join(", ") : "N/A"}`);
}

/**
* Tool names
*/
Expand Down Expand Up @@ -92,13 +105,41 @@ Other navigators will read your report in the sync phase to identify blockers th
console.error(`[debug] Raw args keys:`, Object.keys(args));
}
try {
// The MCP server already validated against the tool's inputSchema.
// Use safeParse for the StatusReport schema to get actionable errors
// instead of throwing (which the MCP server swallows as isError: true).
const cleaned = stripNulls(args);
const report = StatusReportSchema.parse(cleaned);
capturedReport = report;
const result = StatusReportSchema.safeParse(cleaned);

if (!result.success) {
// Always log validation failures — these are otherwise invisible
logHandlerError(SUBMIT_STATUS_REPORT_TOOL, result.error, args);

// Capture what we can despite validation failure: the MCP server's
// inputSchema already validated the shape, so the data is likely usable.
// Fall back to the raw args with nulls stripped.
capturedReport = cleaned as StatusReport;

return {
content: [
{
type: "text" as const,
text: JSON.stringify({
success: true,
message: `Status report submitted (with validation warnings).`,
report: cleaned,
}),
},
],
isError: false,
};
}

capturedReport = result.data;

const blockerSummary =
report.blockers.length > 0
? `${report.blockers.length} blocker(s) reported.`
result.data.blockers.length > 0
? `${result.data.blockers.length} blocker(s) reported.`
: "No blockers.";

return {
Expand All @@ -108,17 +149,15 @@ Other navigators will read your report in the sync phase to identify blockers th
text: JSON.stringify({
success: true,
message: `Status report submitted. ${blockerSummary}`,
report,
report: result.data,
}),
},
],
isError: false,
};
} catch (error) {
if (DEBUG) {
console.error(`[debug] submit_status_report handler error:`, error);
console.error(`[debug] Raw args:`, JSON.stringify(args, null, 2));
}
// Always log unexpected handler errors
logHandlerError(SUBMIT_STATUS_REPORT_TOOL, error, args);
throw error;
}
}
Expand Down Expand Up @@ -184,27 +223,44 @@ Prioritize resolving blockers where \`needsFrom\` matches your name, then those
async (args) => {
try {
const cleaned = stripNulls(args);
const sync = SyncResponseSchema.parse(cleaned);
capturedSync = sync;
const result = SyncResponseSchema.safeParse(cleaned);

if (!result.success) {
logHandlerError(SUBMIT_SYNC_RESPONSE_TOOL, result.error, args);
capturedSync = cleaned as SyncResponse;

return {
content: [
{
type: "text" as const,
text: JSON.stringify({
success: true,
message: `Sync response submitted (with validation warnings).`,
sync: cleaned,
}),
},
],
isError: false,
};
}

capturedSync = result.data;

return {
content: [
{
type: "text" as const,
text: JSON.stringify({
success: true,
message: `Sync response submitted with ${sync.blockerResolutions.length} blocker resolution(s).`,
sync,
message: `Sync response submitted with ${result.data.blockerResolutions.length} blocker resolution(s).`,
sync: result.data,
}),
},
],
isError: false,
};
} catch (error) {
if (DEBUG) {
console.error(`[debug] submit_sync_response handler error:`, error);
console.error(`[debug] Raw args:`, JSON.stringify(args, null, 2));
}
logHandlerError(SUBMIT_SYNC_RESPONSE_TOOL, error, args);
throw error;
}
}
Expand Down
Loading