From 0778bd55f5c84fe49cd4c703680d1a9046aab943 Mon Sep 17 00:00:00 2001 From: Robert Date: Wed, 4 Mar 2026 15:15:39 -0500 Subject: [PATCH 1/4] feat: cross-agent diff review before auto-merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a coding agent completes a task successfully, spawn a different agent to review the git diff before merging to main. Claude codes → Codex reviews, Codex codes → Claude reviews. - Add reviewDiffWithAgent() to AgentRunner with structured JSON prompt - Add pickReviewAgent() for cross-agent selection logic - Add parseReviewResponse() with non-greedy regex for robust JSON extraction - Insert review gate in scheduler's executeAndRelease() between task success and pool.release(merge) - Review runs in /tmp to prevent reviewer from modifying the worktree - Falls back to heuristic reviewDiff() if agent fails or times out - Add ReviewResult.approve field to gate merge decisions - Add task.review field to persist review results - 14 new tests (299 total, 0 failures) Closes #37 Co-Authored-By: Claude Opus 4.6 --- src/__tests__/agent-runner.test.ts | 88 +++++++++++++++- src/__tests__/scheduler.test.ts | 163 +++++++++++++++++++++++++++++ src/agent-runner.ts | 124 +++++++++++++++++++++- src/scheduler.ts | 26 ++++- 4 files changed, 397 insertions(+), 4 deletions(-) diff --git a/src/__tests__/agent-runner.test.ts b/src/__tests__/agent-runner.test.ts index 6cbf505..7cb2387 100644 --- a/src/__tests__/agent-runner.test.ts +++ b/src/__tests__/agent-runner.test.ts @@ -1,6 +1,6 @@ import { describe, it, before, after } from "node:test"; import assert from "node:assert/strict"; -import { AgentRunner } from "../agent-runner.js"; +import { AgentRunner, type ReviewResult } from "../agent-runner.js"; import { createTask } from "../types.js"; import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs"; import { join } from "node:path"; @@ -345,4 +345,90 @@ describe("AgentRunner", () => { assert.strictEqual(task.events[0].type, "evt-5"); assert.strictEqual(task.events[199].type, "evt-204"); }); + + // ── pickReviewAgent ── + + it("pickReviewAgent selects codex to review claude's work", () => { + assert.strictEqual(AgentRunner.pickReviewAgent("claude"), "codex"); + }); + + it("pickReviewAgent selects claude to review codex's work", () => { + assert.strictEqual(AgentRunner.pickReviewAgent("codex"), "claude"); + }); + + it("pickReviewAgent selects codex to review claude-sdk's work", () => { + assert.strictEqual(AgentRunner.pickReviewAgent("claude-sdk"), "codex"); + }); + + it("pickReviewAgent selects claude for generic agents", () => { + assert.strictEqual(AgentRunner.pickReviewAgent("aider --yes"), "claude"); + assert.strictEqual(AgentRunner.pickReviewAgent("custom-agent"), "claude"); + }); + + // ── parseReviewResponse ── + + it("parseReviewResponse parses clean JSON", () => { + const runner = new AgentRunner(); + const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner); + const result = parse('{"approve": true, "score": 85, "issues": [], "suggestions": ["looks good"]}'); + assert.ok(result); + assert.strictEqual(result.approve, true); + assert.strictEqual(result.score, 85); + assert.deepStrictEqual(result.suggestions, ["looks good"]); + }); + + it("parseReviewResponse extracts JSON from surrounding text", () => { + const runner = new AgentRunner(); + const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner); + const result = parse('Here is my review:\n{"approve": false, "score": 30, "issues": ["bug found"], "suggestions": []}\nDone.'); + assert.ok(result); + assert.strictEqual(result.approve, false); + assert.strictEqual(result.score, 30); + assert.deepStrictEqual(result.issues, ["bug found"]); + }); + + it("parseReviewResponse returns null for unparseable output", () => { + const runner = new AgentRunner(); + const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner); + assert.strictEqual(parse("I can't produce JSON right now"), null); + }); + + it("parseReviewResponse clamps score to 0-100", () => { + const runner = new AgentRunner(); + const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner); + const result = parse('{"approve": true, "score": 150, "issues": [], "suggestions": []}'); + assert.ok(result); + assert.strictEqual(result.score, 100); + }); + + // ── reviewDiff approve field ── + + it("reviewDiff includes approve field based on score threshold", () => { + const runner = new AgentRunner(); + const clean = runner.reviewDiff("diff --git a/foo.ts\n+const x = 1;"); + assert.strictEqual(typeof clean.approve, "boolean"); + assert.strictEqual(clean.approve, true); // score=50 >= 40 + + const bad = runner.reviewDiff("diff --git a/foo.ts\n+console.log('x');\n+console.log('y');\n+console.log('z');"); + // console.log penalty takes score below 40 + // Actually score=50-10=40, still >=40 + assert.strictEqual(typeof bad.approve, "boolean"); + }); + + // ── reviewDiffWithAgent fallback ── + + it("reviewDiffWithAgent falls back to heuristic when agent fails", async () => { + const runner = new AgentRunner(); + // codex not available in test env → will fail → should fall back to heuristic + const result = await runner.reviewDiffWithAgent( + "diff --git a/foo.test.ts b/foo.test.ts\n+it('works', () => {});", + "claude", // task was by claude → review by codex → codex fails → fallback + "/tmp", + 2, // 2 second timeout to keep test fast + ); + assert.strictEqual(typeof result.approve, "boolean"); + assert.strictEqual(typeof result.score, "number"); + assert.ok(Array.isArray(result.issues)); + assert.ok(Array.isArray(result.suggestions)); + }); }); diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts index 2815f2b..01bf1e0 100644 --- a/src/__tests__/scheduler.test.ts +++ b/src/__tests__/scheduler.test.ts @@ -21,6 +21,7 @@ function makeRunner(): AgentRunner { return { run: async (task: Task) => { task.status = "success"; task.durationMs = 100; return task; }, getRunningTasks: () => [], + reviewDiffWithAgent: async () => ({ approve: true, score: 80, issues: [], suggestions: [] }), } as unknown as AgentRunner; } @@ -845,4 +846,166 @@ describe("Scheduler", () => { assert.ok(result); }); }); + + // ─── 11. Cross-agent review gate ─── + + describe("cross-agent review gate", () => { + it("review gate blocks merge when review rejects", async () => { + let mergeCalledWith = false; + const pool = { + ...makePool(), + release: async (_name: string, merge: boolean) => { + mergeCalledWith = merge; + return { merged: merge }; + }, + } as unknown as WorktreePool; + + const runner = { + run: async (task: Task) => { + task.status = "success"; + task.durationMs = 100; + // Simulate agent creating a git diff event + task.events.push({ + type: "git_diff", + timestamp: new Date().toISOString(), + data: { diff: "diff --git a/foo.ts\n+console.log('debug');" }, + }); + return task; + }, + getRunningTasks: () => [], + reviewDiffWithAgent: async () => ({ + approve: false, + score: 25, + issues: ["console.log found"], + suggestions: [], + }), + } as unknown as AgentRunner; + + const events: Record[] = []; + const s = new Scheduler(pool, runner, makeStore(), (ev) => events.push(ev)); + + // Manually invoke executeAndRelease + const task = s.submit("test review gate"); + const exec = (s as any).executeAndRelease.bind(s); + await exec(task, "w0", "/tmp/w0"); + + // Task should still be "success" (review doesn't change status) + assert.strictEqual(task.status, "success"); + // But merge should have been blocked + assert.strictEqual(mergeCalledWith, false); + // Review should be attached to task + assert.ok(task.review); + assert.strictEqual(task.review!.approve, false); + assert.strictEqual(task.review!.score, 25); + // Should have emitted review_rejected event + assert.ok(events.some(e => e.type === "review_rejected")); + }); + + it("review gate allows merge when review approves", async () => { + let mergeCalledWith = false; + const pool = { + ...makePool(), + release: async (_name: string, merge: boolean) => { + mergeCalledWith = merge; + return { merged: true }; + }, + } as unknown as WorktreePool; + + const runner = { + run: async (task: Task) => { + task.status = "success"; + task.durationMs = 100; + task.events.push({ + type: "git_diff", + timestamp: new Date().toISOString(), + data: { diff: "diff --git a/foo.ts\n+const x = 1;" }, + }); + return task; + }, + getRunningTasks: () => [], + reviewDiffWithAgent: async () => ({ + approve: true, + score: 85, + issues: [], + suggestions: ["looks clean"], + }), + } as unknown as AgentRunner; + + const events: Record[] = []; + const s = new Scheduler(pool, runner, makeStore(), (ev) => events.push(ev)); + + const task = s.submit("test approve"); + const exec = (s as any).executeAndRelease.bind(s); + await exec(task, "w0", "/tmp/w0"); + + assert.strictEqual(task.status, "success"); + assert.strictEqual(mergeCalledWith, true); + assert.ok(task.review); + assert.strictEqual(task.review!.approve, true); + assert.ok(events.some(e => e.type === "review_approved")); + }); + + it("skips review when task has no diff", async () => { + let mergeCalledWith = false; + const pool = { + ...makePool(), + release: async (_name: string, merge: boolean) => { + mergeCalledWith = merge; + return { merged: true }; + }, + } as unknown as WorktreePool; + + const runner = { + run: async (task: Task) => { + task.status = "success"; + task.durationMs = 100; + // No git_diff event + return task; + }, + getRunningTasks: () => [], + reviewDiffWithAgent: async () => { throw new Error("should not be called"); }, + } as unknown as AgentRunner; + + const s = new Scheduler(pool, runner, makeStore()); + + const task = s.submit("no diff task"); + const exec = (s as any).executeAndRelease.bind(s); + await exec(task, "w0", "/tmp/w0"); + + // Should merge without review since there's no diff + assert.strictEqual(mergeCalledWith, true); + assert.strictEqual(task.review, undefined); + }); + + it("skips review when task failed", async () => { + let mergeCalledWith = false; + const pool = { + ...makePool(), + release: async (_name: string, merge: boolean) => { + mergeCalledWith = merge; + return { merged: false }; + }, + } as unknown as WorktreePool; + + const runner = { + run: async (task: Task) => { + task.status = "failed"; + task.error = "compilation error"; + task.durationMs = 50; + return task; + }, + getRunningTasks: () => [], + reviewDiffWithAgent: async () => { throw new Error("should not be called"); }, + } as unknown as AgentRunner; + + const s = new Scheduler(pool, runner, makeStore()); + + const task = s.submit("failed task"); + const exec = (s as any).executeAndRelease.bind(s); + await exec(task, "w0", "/tmp/w0"); + + assert.strictEqual(mergeCalledWith, false); + assert.strictEqual(task.review, undefined); + }); + }); }); diff --git a/src/agent-runner.ts b/src/agent-runner.ts index 2f0f117..da4c0cb 100644 --- a/src/agent-runner.ts +++ b/src/agent-runner.ts @@ -1,4 +1,4 @@ -import type { Task, TaskEvent } from "./types.js"; +import { type Task, type TaskEvent, createTask } from "./types.js"; import { log } from "./logger.js"; import { spawn, type ChildProcess } from "node:child_process"; import { exec as execCb } from "node:child_process"; @@ -51,6 +51,7 @@ export interface RunningTaskInfo { } export interface ReviewResult { + approve: boolean; score: number; issues: string[]; suggestions: string[]; @@ -91,7 +92,126 @@ export class AgentRunner { score -= 10; } - return { score, issues, suggestions }; + return { approve: score >= 40, score, issues, suggestions }; + } + + /** + * Select the cross-review agent: the reviewer should be a different agent + * than the one that wrote the code. + */ + static pickReviewAgent(taskAgent: string): string { + if (taskAgent === "codex") return "claude"; + if (taskAgent === "claude" || taskAgent === "claude-sdk") return "codex"; + return "claude"; // generic agents reviewed by claude + } + + /** + * Spawn a cross-agent to review a git diff. Returns a structured ReviewResult. + * Falls back to the heuristic reviewDiff() if the agent fails or times out. + */ + async reviewDiffWithAgent( + diff: string, + taskAgent: string, + cwd: string, + timeout: number = 60, + ): Promise { + const reviewAgent = AgentRunner.pickReviewAgent(taskAgent); + const truncatedDiff = diff.length > 50_000 ? diff.slice(0, 50_000) + "\n... (truncated)" : diff; + + const reviewPrompt = [ + "You are a code reviewer. Review the following git diff and respond with ONLY a JSON object (no markdown, no explanation).", + "", + "IMPORTANT: Do NOT modify any files. Do NOT run any commands. Only output your JSON review.", + "", + "Evaluate:", + "1. Correctness: Are there bugs, logic errors, or missing edge cases?", + "2. Quality: Is the code clean, readable, and maintainable?", + "3. Safety: Are there security issues, hardcoded secrets, or dangerous patterns?", + "", + "Respond with exactly this JSON format:", + '{"approve": true/false, "score": 0-100, "issues": ["issue1", ...], "suggestions": ["suggestion1", ...]}', + "", + "approve=true means the code is ready to merge. approve=false means it needs fixes.", + "Be pragmatic — minor style issues should not block merge. Focus on correctness and safety.", + "", + "```diff", + truncatedDiff, + "```", + ].join("\n"); + + const reviewTask = createTask(reviewPrompt, { + agent: reviewAgent, + timeout, + maxBudget: 1, + }); + + try { + log("info", "cross-agent review started", { taskAgent, reviewAgent }); + // Run review in /tmp to prevent the reviewer from modifying the worktree + await this.run(reviewTask, "/tmp"); + + if (reviewTask.status !== "success" || !reviewTask.output) { + log("warn", "cross-agent review did not succeed, falling back to heuristic", { + status: reviewTask.status, + error: reviewTask.error, + }); + return this.reviewDiff(diff); + } + + // Parse the JSON response from the review agent + const parsed = this.parseReviewResponse(reviewTask.output); + if (parsed) { + log("info", "cross-agent review complete", { + reviewAgent, + approve: parsed.approve, + score: parsed.score, + issueCount: parsed.issues.length, + }); + return parsed; + } + + log("warn", "cross-agent review response unparseable, falling back to heuristic"); + return this.reviewDiff(diff); + } catch (err: unknown) { + const msg = (err as Error).message ?? String(err); + log("warn", "cross-agent review failed, falling back to heuristic", { error: msg }); + return this.reviewDiff(diff); + } + } + + /** Try to extract a ReviewResult JSON from agent output (may contain surrounding text). */ + private parseReviewResponse(output: string): ReviewResult | null { + // Try direct JSON parse first + try { + const obj = JSON.parse(output.trim()); + if (typeof obj.approve === "boolean" && typeof obj.score === "number") { + return { + approve: obj.approve, + score: Math.max(0, Math.min(100, obj.score)), + issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [], + suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [], + }; + } + } catch { /* not pure JSON */ } + + // Try to find JSON object in the output (agent may wrap it in text) + // Use [^{}]* to avoid greedily matching across multiple JSON objects + const candidates = [...output.matchAll(/\{[^{}]*"approve"\s*:\s*(true|false)[^{}]*\}/g)]; + for (const match of candidates) { + try { + const obj = JSON.parse(match[0]); + if (typeof obj.approve === "boolean" && typeof obj.score === "number") { + return { + approve: obj.approve, + score: Math.max(0, Math.min(100, obj.score)), + issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [], + suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [], + }; + } + } catch { /* try next candidate */ } + } + + return null; } /** Returns info about all tasks currently being executed by this runner. */ diff --git a/src/scheduler.ts b/src/scheduler.ts index 62d854f..9c7a676 100644 --- a/src/scheduler.ts +++ b/src/scheduler.ts @@ -471,7 +471,31 @@ export class Scheduler { log("info", "task started", { taskId: task.id, worker: workerName }); await this.runner.run(task, workerPath, this.onEvent); - const shouldMerge = task.status === "success"; + // Cross-agent review gate: if the task succeeded, have a different agent review the diff + let shouldMerge = task.status === "success"; + if (shouldMerge) { + const diffEvent = task.events.find((e) => e.type === "git_diff"); + const diff = (diffEvent?.data as { diff?: string } | undefined)?.diff; + if (diff) { + const taskAgent = task.agent ?? "claude"; + const reviewAgent = AgentRunner.pickReviewAgent(taskAgent); + this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent }); + const review = await this.runner.reviewDiffWithAgent(diff, taskAgent, workerPath); + task.review = review; + if (!review.approve) { + shouldMerge = false; + log("info", "cross-agent review rejected merge", { + taskId: task.id, + score: review.score, + issues: review.issues, + }); + this.onEvent?.({ type: "review_rejected", taskId: task.id, score: review.score, issues: review.issues }); + } else { + log("info", "cross-agent review approved merge", { taskId: task.id, score: review.score }); + this.onEvent?.({ type: "review_approved", taskId: task.id, score: review.score }); + } + } + } const mergeResult = await this.pool.release(workerName, shouldMerge, task.id); if (shouldMerge && !mergeResult.merged) { From de35942ce6b27bf9406aa90b4502408184058fc1 Mon Sep 17 00:00:00 2001 From: Robert Date: Wed, 4 Mar 2026 16:06:01 -0500 Subject: [PATCH 2/4] fix(review): address round-2 review issues - Remove unused `cwd` param from reviewDiffWithAgent() - Fix JSON extraction: use indexOf/lastIndexOf instead of regex (handles braces in strings correctly) - Add task.error message when review rejects merge - Add review_started event assertions to scheduler tests Co-Authored-By: Claude Opus 4.6 --- src/__tests__/agent-runner.test.ts | 1 - src/__tests__/scheduler.test.ts | 10 +++++++--- src/agent-runner.ts | 13 ++++++------- src/scheduler.ts | 3 ++- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/__tests__/agent-runner.test.ts b/src/__tests__/agent-runner.test.ts index 7cb2387..3330449 100644 --- a/src/__tests__/agent-runner.test.ts +++ b/src/__tests__/agent-runner.test.ts @@ -423,7 +423,6 @@ describe("AgentRunner", () => { const result = await runner.reviewDiffWithAgent( "diff --git a/foo.test.ts b/foo.test.ts\n+it('works', () => {});", "claude", // task was by claude → review by codex → codex fails → fallback - "/tmp", 2, // 2 second timeout to keep test fast ); assert.strictEqual(typeof result.approve, "boolean"); diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts index 01bf1e0..0e28e79 100644 --- a/src/__tests__/scheduler.test.ts +++ b/src/__tests__/scheduler.test.ts @@ -897,8 +897,11 @@ describe("Scheduler", () => { assert.ok(task.review); assert.strictEqual(task.review!.approve, false); assert.strictEqual(task.review!.score, 25); - // Should have emitted review_rejected event - assert.ok(events.some(e => e.type === "review_rejected")); + // Error should explain why merge was blocked + assert.ok(task.error.includes("review rejected"), "error should explain rejection"); + // Should have emitted review_started and review_rejected events + assert.ok(events.some(e => e.type === "review_started"), "should emit review_started"); + assert.ok(events.some(e => e.type === "review_rejected"), "should emit review_rejected"); }); it("review gate allows merge when review approves", async () => { @@ -942,7 +945,8 @@ describe("Scheduler", () => { assert.strictEqual(mergeCalledWith, true); assert.ok(task.review); assert.strictEqual(task.review!.approve, true); - assert.ok(events.some(e => e.type === "review_approved")); + assert.ok(events.some(e => e.type === "review_started"), "should emit review_started"); + assert.ok(events.some(e => e.type === "review_approved"), "should emit review_approved"); }); it("skips review when task has no diff", async () => { diff --git a/src/agent-runner.ts b/src/agent-runner.ts index da4c0cb..bdde65f 100644 --- a/src/agent-runner.ts +++ b/src/agent-runner.ts @@ -112,7 +112,6 @@ export class AgentRunner { async reviewDiffWithAgent( diff: string, taskAgent: string, - cwd: string, timeout: number = 60, ): Promise { const reviewAgent = AgentRunner.pickReviewAgent(taskAgent); @@ -194,12 +193,12 @@ export class AgentRunner { } } catch { /* not pure JSON */ } - // Try to find JSON object in the output (agent may wrap it in text) - // Use [^{}]* to avoid greedily matching across multiple JSON objects - const candidates = [...output.matchAll(/\{[^{}]*"approve"\s*:\s*(true|false)[^{}]*\}/g)]; - for (const match of candidates) { + // Try to extract JSON by finding the outermost { } that contains "approve" + const start = output.indexOf("{"); + const end = output.lastIndexOf("}"); + if (start !== -1 && end > start) { try { - const obj = JSON.parse(match[0]); + const obj = JSON.parse(output.slice(start, end + 1)); if (typeof obj.approve === "boolean" && typeof obj.score === "number") { return { approve: obj.approve, @@ -208,7 +207,7 @@ export class AgentRunner { suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [], }; } - } catch { /* try next candidate */ } + } catch { /* not valid JSON */ } } return null; diff --git a/src/scheduler.ts b/src/scheduler.ts index 9c7a676..9dc9c4e 100644 --- a/src/scheduler.ts +++ b/src/scheduler.ts @@ -480,10 +480,11 @@ export class Scheduler { const taskAgent = task.agent ?? "claude"; const reviewAgent = AgentRunner.pickReviewAgent(taskAgent); this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent }); - const review = await this.runner.reviewDiffWithAgent(diff, taskAgent, workerPath); + const review = await this.runner.reviewDiffWithAgent(diff, taskAgent); task.review = review; if (!review.approve) { shouldMerge = false; + task.error = `review rejected (score ${review.score}): ${review.issues.join("; ")}`; log("info", "cross-agent review rejected merge", { taskId: task.id, score: review.score, From 4a7df1eb66a481552178df1b7c39379a6c196cb4 Mon Sep 17 00:00:00 2001 From: Robert Date: Wed, 4 Mar 2026 16:09:06 -0500 Subject: [PATCH 3/4] refactor(review): extract normalizeReviewObj, add reviewAgent to result - Extract duplicated JSON normalisation into normalizeReviewObj() - Add reviewAgent field to ReviewResult so consumers know which agent reviewed - Update Task.review type to include reviewAgent Co-Authored-By: Claude Opus 4.6 --- src/agent-runner.ts | 36 +++++++++++++++++------------------- src/scheduler.ts | 3 +-- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/agent-runner.ts b/src/agent-runner.ts index bdde65f..493c9a2 100644 --- a/src/agent-runner.ts +++ b/src/agent-runner.ts @@ -55,6 +55,7 @@ export interface ReviewResult { score: number; issues: string[]; suggestions: string[]; + reviewAgent?: string; } export class AgentRunner { @@ -160,6 +161,7 @@ export class AgentRunner { // Parse the JSON response from the review agent const parsed = this.parseReviewResponse(reviewTask.output); if (parsed) { + parsed.reviewAgent = reviewAgent; log("info", "cross-agent review complete", { reviewAgent, approve: parsed.approve, @@ -182,37 +184,33 @@ export class AgentRunner { private parseReviewResponse(output: string): ReviewResult | null { // Try direct JSON parse first try { - const obj = JSON.parse(output.trim()); - if (typeof obj.approve === "boolean" && typeof obj.score === "number") { - return { - approve: obj.approve, - score: Math.max(0, Math.min(100, obj.score)), - issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [], - suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [], - }; - } + const result = this.normalizeReviewObj(JSON.parse(output.trim())); + if (result) return result; } catch { /* not pure JSON */ } - // Try to extract JSON by finding the outermost { } that contains "approve" + // Try to extract JSON by finding the outermost { } const start = output.indexOf("{"); const end = output.lastIndexOf("}"); if (start !== -1 && end > start) { try { - const obj = JSON.parse(output.slice(start, end + 1)); - if (typeof obj.approve === "boolean" && typeof obj.score === "number") { - return { - approve: obj.approve, - score: Math.max(0, Math.min(100, obj.score)), - issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [], - suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [], - }; - } + const result = this.normalizeReviewObj(JSON.parse(output.slice(start, end + 1))); + if (result) return result; } catch { /* not valid JSON */ } } return null; } + private normalizeReviewObj(obj: Record): ReviewResult | null { + if (typeof obj.approve !== "boolean" || typeof obj.score !== "number") return null; + return { + approve: obj.approve, + score: Math.max(0, Math.min(100, obj.score)), + issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [], + suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [], + }; + } + /** Returns info about all tasks currently being executed by this runner. */ getRunningTasks(): RunningTaskInfo[] { const now = Date.now(); diff --git a/src/scheduler.ts b/src/scheduler.ts index 9dc9c4e..18e4742 100644 --- a/src/scheduler.ts +++ b/src/scheduler.ts @@ -478,8 +478,7 @@ export class Scheduler { const diff = (diffEvent?.data as { diff?: string } | undefined)?.diff; if (diff) { const taskAgent = task.agent ?? "claude"; - const reviewAgent = AgentRunner.pickReviewAgent(taskAgent); - this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent }); + this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent: AgentRunner.pickReviewAgent(taskAgent) }); const review = await this.runner.reviewDiffWithAgent(diff, taskAgent); task.review = review; if (!review.approve) { From bed551953af1f8c3392d9462ddc8f9325f605094 Mon Sep 17 00:00:00 2001 From: Robert Date: Wed, 4 Mar 2026 16:44:37 -0500 Subject: [PATCH 4/4] refactor(review): move ReviewResult to types.ts, add missing tests - Move ReviewResult interface to types.ts (single source of truth) - Task.review now references ReviewResult instead of inline duplicate - Re-export ReviewResult from agent-runner.ts for backward compat - Add test: normalizeReviewObj returns null for missing/wrong-type fields - Add test: scheduler catch block when reviewDiffWithAgent throws Co-Authored-By: Claude Opus 4.6 --- src/__tests__/agent-runner.test.ts | 8 +++++++ src/__tests__/scheduler.test.ts | 38 ++++++++++++++++++++++++++++++ src/agent-runner.ts | 10 ++------ 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/__tests__/agent-runner.test.ts b/src/__tests__/agent-runner.test.ts index 3330449..f0d7f48 100644 --- a/src/__tests__/agent-runner.test.ts +++ b/src/__tests__/agent-runner.test.ts @@ -401,6 +401,14 @@ describe("AgentRunner", () => { assert.strictEqual(result.score, 100); }); + it("parseReviewResponse returns null when approve or score is missing/wrong type", () => { + const runner = new AgentRunner(); + const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner); + assert.strictEqual(parse('{"score": 80, "issues": []}'), null); // missing approve + assert.strictEqual(parse('{"approve": "yes", "score": 80}'), null); // approve is string + assert.strictEqual(parse('{"approve": true, "issues": []}'), null); // missing score + }); + // ── reviewDiff approve field ── it("reviewDiff includes approve field based on score threshold", () => { diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts index 0e28e79..0da3e3c 100644 --- a/src/__tests__/scheduler.test.ts +++ b/src/__tests__/scheduler.test.ts @@ -1011,5 +1011,43 @@ describe("Scheduler", () => { assert.strictEqual(mergeCalledWith, false); assert.strictEqual(task.review, undefined); }); + + it("falls back gracefully when reviewDiffWithAgent throws", async () => { + let mergeCalledWith = false; + const pool = { + ...makePool(), + release: async (_name: string, merge: boolean) => { + mergeCalledWith = merge; + return { merged: false }; + }, + } as unknown as WorktreePool; + + const runner = { + run: async (task: Task) => { + task.status = "success"; + task.durationMs = 100; + task.events.push({ + type: "git_diff", + timestamp: new Date().toISOString(), + data: { diff: "diff --git a/foo.ts\n+const x = 1;" }, + }); + return task; + }, + getRunningTasks: () => [], + reviewDiffWithAgent: async () => { throw new Error("network error"); }, + } as unknown as AgentRunner; + + const s = new Scheduler(pool, runner, makeStore()); + + const task = s.submit("review throws"); + task.maxRetries = 0; // prevent retry so we can assert final status + const exec = (s as any).executeAndRelease.bind(s); + await exec(task, "w0", "/tmp/w0"); + + // The outer catch should mark the task as failed + assert.strictEqual(task.status, "failed"); + assert.ok(task.error.includes("network error")); + assert.strictEqual(mergeCalledWith, false); + }); }); }); diff --git a/src/agent-runner.ts b/src/agent-runner.ts index 493c9a2..d2b7545 100644 --- a/src/agent-runner.ts +++ b/src/agent-runner.ts @@ -1,4 +1,4 @@ -import { type Task, type TaskEvent, createTask } from "./types.js"; +import { type Task, type TaskEvent, type ReviewResult, createTask } from "./types.js"; import { log } from "./logger.js"; import { spawn, type ChildProcess } from "node:child_process"; import { exec as execCb } from "node:child_process"; @@ -50,13 +50,7 @@ export interface RunningTaskInfo { costUsd: number; } -export interface ReviewResult { - approve: boolean; - score: number; - issues: string[]; - suggestions: string[]; - reviewAgent?: string; -} +export type { ReviewResult }; export class AgentRunner { private readonly _runningTasks = new Map();