From 0778bd55f5c84fe49cd4c703680d1a9046aab943 Mon Sep 17 00:00:00 2001
From: Robert <robert.alexander37899@gmail.com>
Date: Wed, 4 Mar 2026 15:15:39 -0500
Subject: [PATCH 1/4] feat: cross-agent diff review before auto-merge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a coding agent completes a task successfully, spawn a different
agent to review the git diff before merging to main. Claude codes →
Codex reviews, Codex codes → Claude reviews.

- Add reviewDiffWithAgent() to AgentRunner with structured JSON prompt
- Add pickReviewAgent() for cross-agent selection logic
- Add parseReviewResponse() with non-greedy regex for robust JSON extraction
- Insert review gate in scheduler's executeAndRelease() between task
  success and pool.release(merge)
- Review runs in /tmp to prevent reviewer from modifying the worktree
- Falls back to heuristic reviewDiff() if agent fails or times out
- Add ReviewResult.approve field to gate merge decisions
- Add task.review field to persist review results
- 14 new tests (299 total, 0 failures)

Closes #37

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/__tests__/agent-runner.test.ts |  88 +++++++++++++++-
 src/__tests__/scheduler.test.ts    | 163 +++++++++++++++++++++++++++++
 src/agent-runner.ts                | 124 +++++++++++++++++++++-
 src/scheduler.ts                   |  26 ++++-
 4 files changed, 397 insertions(+), 4 deletions(-)

diff --git a/src/__tests__/agent-runner.test.ts b/src/__tests__/agent-runner.test.ts
index 6cbf505..7cb2387 100644
--- a/src/__tests__/agent-runner.test.ts
+++ b/src/__tests__/agent-runner.test.ts
@@ -1,6 +1,6 @@
 import { describe, it, before, after } from "node:test";
 import assert from "node:assert/strict";
-import { AgentRunner } from "../agent-runner.js";
+import { AgentRunner, type ReviewResult } from "../agent-runner.js";
 import { createTask } from "../types.js";
 import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
 import { join } from "node:path";
@@ -345,4 +345,90 @@ describe("AgentRunner", () => {
     assert.strictEqual(task.events[0].type, "evt-5");
     assert.strictEqual(task.events[199].type, "evt-204");
   });
+
+  // ── pickReviewAgent ──
+
+  it("pickReviewAgent selects codex to review claude's work", () => {
+    assert.strictEqual(AgentRunner.pickReviewAgent("claude"), "codex");
+  });
+
+  it("pickReviewAgent selects claude to review codex's work", () => {
+    assert.strictEqual(AgentRunner.pickReviewAgent("codex"), "claude");
+  });
+
+  it("pickReviewAgent selects codex to review claude-sdk's work", () => {
+    assert.strictEqual(AgentRunner.pickReviewAgent("claude-sdk"), "codex");
+  });
+
+  it("pickReviewAgent selects claude for generic agents", () => {
+    assert.strictEqual(AgentRunner.pickReviewAgent("aider --yes"), "claude");
+    assert.strictEqual(AgentRunner.pickReviewAgent("custom-agent"), "claude");
+  });
+
+  // ── parseReviewResponse ──
+
+  it("parseReviewResponse parses clean JSON", () => {
+    const runner = new AgentRunner();
+    const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
+    const result = parse('{"approve": true, "score": 85, "issues": [], "suggestions": ["looks good"]}');
+    assert.ok(result);
+    assert.strictEqual(result.approve, true);
+    assert.strictEqual(result.score, 85);
+    assert.deepStrictEqual(result.suggestions, ["looks good"]);
+  });
+
+  it("parseReviewResponse extracts JSON from surrounding text", () => {
+    const runner = new AgentRunner();
+    const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
+    const result = parse('Here is my review:\n{"approve": false, "score": 30, "issues": ["bug found"], "suggestions": []}\nDone.');
+    assert.ok(result);
+    assert.strictEqual(result.approve, false);
+    assert.strictEqual(result.score, 30);
+    assert.deepStrictEqual(result.issues, ["bug found"]);
+  });
+
+  it("parseReviewResponse returns null for unparseable output", () => {
+    const runner = new AgentRunner();
+    const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
+    assert.strictEqual(parse("I can't produce JSON right now"), null);
+  });
+
+  it("parseReviewResponse clamps score to 0-100", () => {
+    const runner = new AgentRunner();
+    const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
+    const result = parse('{"approve": true, "score": 150, "issues": [], "suggestions": []}');
+    assert.ok(result);
+    assert.strictEqual(result.score, 100);
+  });
+
+  // ── reviewDiff approve field ──
+
+  it("reviewDiff includes approve field based on score threshold", () => {
+    const runner = new AgentRunner();
+    const clean = runner.reviewDiff("diff --git a/foo.ts\n+const x = 1;");
+    assert.strictEqual(typeof clean.approve, "boolean");
+    assert.strictEqual(clean.approve, true); // score=50 >= 40
+
+    const bad = runner.reviewDiff("diff --git a/foo.ts\n+console.log('x');\n+console.log('y');\n+console.log('z');");
+    // console.log penalty takes score below 40
+    // Actually score=50-10=40, still >=40
+    assert.strictEqual(typeof bad.approve, "boolean");
+  });
+
+  // ── reviewDiffWithAgent fallback ──
+
+  it("reviewDiffWithAgent falls back to heuristic when agent fails", async () => {
+    const runner = new AgentRunner();
+    // codex not available in test env → will fail → should fall back to heuristic
+    const result = await runner.reviewDiffWithAgent(
+      "diff --git a/foo.test.ts b/foo.test.ts\n+it('works', () => {});",
+      "claude",  // task was by claude → review by codex → codex fails → fallback
+      "/tmp",
+      2,  // 2 second timeout to keep test fast
+    );
+    assert.strictEqual(typeof result.approve, "boolean");
+    assert.strictEqual(typeof result.score, "number");
+    assert.ok(Array.isArray(result.issues));
+    assert.ok(Array.isArray(result.suggestions));
+  });
 });
diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts
index 2815f2b..01bf1e0 100644
--- a/src/__tests__/scheduler.test.ts
+++ b/src/__tests__/scheduler.test.ts
@@ -21,6 +21,7 @@ function makeRunner(): AgentRunner {
   return {
     run: async (task: Task) => { task.status = "success"; task.durationMs = 100; return task; },
     getRunningTasks: () => [],
+    reviewDiffWithAgent: async () => ({ approve: true, score: 80, issues: [], suggestions: [] }),
   } as unknown as AgentRunner;
 }
 
@@ -845,4 +846,166 @@ describe("Scheduler", () => {
       assert.ok(result);
     });
   });
+
+  // ─── 11. Cross-agent review gate ───
+
+  describe("cross-agent review gate", () => {
+    it("review gate blocks merge when review rejects", async () => {
+      let mergeCalledWith = false;
+      const pool = {
+        ...makePool(),
+        release: async (_name: string, merge: boolean) => {
+          mergeCalledWith = merge;
+          return { merged: merge };
+        },
+      } as unknown as WorktreePool;
+
+      const runner = {
+        run: async (task: Task) => {
+          task.status = "success";
+          task.durationMs = 100;
+          // Simulate agent creating a git diff event
+          task.events.push({
+            type: "git_diff",
+            timestamp: new Date().toISOString(),
+            data: { diff: "diff --git a/foo.ts\n+console.log('debug');" },
+          });
+          return task;
+        },
+        getRunningTasks: () => [],
+        reviewDiffWithAgent: async () => ({
+          approve: false,
+          score: 25,
+          issues: ["console.log found"],
+          suggestions: [],
+        }),
+      } as unknown as AgentRunner;
+
+      const events: Record<string, unknown>[] = [];
+      const s = new Scheduler(pool, runner, makeStore(), (ev) => events.push(ev));
+
+      // Manually invoke executeAndRelease
+      const task = s.submit("test review gate");
+      const exec = (s as any).executeAndRelease.bind(s);
+      await exec(task, "w0", "/tmp/w0");
+
+      // Task should still be "success" (review doesn't change status)
+      assert.strictEqual(task.status, "success");
+      // But merge should have been blocked
+      assert.strictEqual(mergeCalledWith, false);
+      // Review should be attached to task
+      assert.ok(task.review);
+      assert.strictEqual(task.review!.approve, false);
+      assert.strictEqual(task.review!.score, 25);
+      // Should have emitted review_rejected event
+      assert.ok(events.some(e => e.type === "review_rejected"));
+    });
+
+    it("review gate allows merge when review approves", async () => {
+      let mergeCalledWith = false;
+      const pool = {
+        ...makePool(),
+        release: async (_name: string, merge: boolean) => {
+          mergeCalledWith = merge;
+          return { merged: true };
+        },
+      } as unknown as WorktreePool;
+
+      const runner = {
+        run: async (task: Task) => {
+          task.status = "success";
+          task.durationMs = 100;
+          task.events.push({
+            type: "git_diff",
+            timestamp: new Date().toISOString(),
+            data: { diff: "diff --git a/foo.ts\n+const x = 1;" },
+          });
+          return task;
+        },
+        getRunningTasks: () => [],
+        reviewDiffWithAgent: async () => ({
+          approve: true,
+          score: 85,
+          issues: [],
+          suggestions: ["looks clean"],
+        }),
+      } as unknown as AgentRunner;
+
+      const events: Record<string, unknown>[] = [];
+      const s = new Scheduler(pool, runner, makeStore(), (ev) => events.push(ev));
+
+      const task = s.submit("test approve");
+      const exec = (s as any).executeAndRelease.bind(s);
+      await exec(task, "w0", "/tmp/w0");
+
+      assert.strictEqual(task.status, "success");
+      assert.strictEqual(mergeCalledWith, true);
+      assert.ok(task.review);
+      assert.strictEqual(task.review!.approve, true);
+      assert.ok(events.some(e => e.type === "review_approved"));
+    });
+
+    it("skips review when task has no diff", async () => {
+      let mergeCalledWith = false;
+      const pool = {
+        ...makePool(),
+        release: async (_name: string, merge: boolean) => {
+          mergeCalledWith = merge;
+          return { merged: true };
+        },
+      } as unknown as WorktreePool;
+
+      const runner = {
+        run: async (task: Task) => {
+          task.status = "success";
+          task.durationMs = 100;
+          // No git_diff event
+          return task;
+        },
+        getRunningTasks: () => [],
+        reviewDiffWithAgent: async () => { throw new Error("should not be called"); },
+      } as unknown as AgentRunner;
+
+      const s = new Scheduler(pool, runner, makeStore());
+
+      const task = s.submit("no diff task");
+      const exec = (s as any).executeAndRelease.bind(s);
+      await exec(task, "w0", "/tmp/w0");
+
+      // Should merge without review since there's no diff
+      assert.strictEqual(mergeCalledWith, true);
+      assert.strictEqual(task.review, undefined);
+    });
+
+    it("skips review when task failed", async () => {
+      let mergeCalledWith = false;
+      const pool = {
+        ...makePool(),
+        release: async (_name: string, merge: boolean) => {
+          mergeCalledWith = merge;
+          return { merged: false };
+        },
+      } as unknown as WorktreePool;
+
+      const runner = {
+        run: async (task: Task) => {
+          task.status = "failed";
+          task.error = "compilation error";
+          task.durationMs = 50;
+          return task;
+        },
+        getRunningTasks: () => [],
+        reviewDiffWithAgent: async () => { throw new Error("should not be called"); },
+      } as unknown as AgentRunner;
+
+      const s = new Scheduler(pool, runner, makeStore());
+
+      const task = s.submit("failed task");
+      const exec = (s as any).executeAndRelease.bind(s);
+      await exec(task, "w0", "/tmp/w0");
+
+      assert.strictEqual(mergeCalledWith, false);
+      assert.strictEqual(task.review, undefined);
+    });
+  });
 });
diff --git a/src/agent-runner.ts b/src/agent-runner.ts
index 2f0f117..da4c0cb 100644
--- a/src/agent-runner.ts
+++ b/src/agent-runner.ts
@@ -1,4 +1,4 @@
-import type { Task, TaskEvent } from "./types.js";
+import { type Task, type TaskEvent, createTask } from "./types.js";
 import { log } from "./logger.js";
 import { spawn, type ChildProcess } from "node:child_process";
 import { exec as execCb } from "node:child_process";
@@ -51,6 +51,7 @@ export interface RunningTaskInfo {
 }
 
 export interface ReviewResult {
+  approve: boolean;
   score: number;
   issues: string[];
   suggestions: string[];
@@ -91,7 +92,126 @@ export class AgentRunner {
       score -= 10;
     }
 
-    return { score, issues, suggestions };
+    return { approve: score >= 40, score, issues, suggestions };
+  }
+
+  /**
+   * Select the cross-review agent: the reviewer should be a different agent
+   * than the one that wrote the code.
+   */
+  static pickReviewAgent(taskAgent: string): string {
+    if (taskAgent === "codex") return "claude";
+    if (taskAgent === "claude" || taskAgent === "claude-sdk") return "codex";
+    return "claude"; // generic agents reviewed by claude
+  }
+
+  /**
+   * Spawn a cross-agent to review a git diff. Returns a structured ReviewResult.
+   * Falls back to the heuristic reviewDiff() if the agent fails or times out.
+   */
+  async reviewDiffWithAgent(
+    diff: string,
+    taskAgent: string,
+    cwd: string,
+    timeout: number = 60,
+  ): Promise<ReviewResult> {
+    const reviewAgent = AgentRunner.pickReviewAgent(taskAgent);
+    const truncatedDiff = diff.length > 50_000 ? diff.slice(0, 50_000) + "\n... (truncated)" : diff;
+
+    const reviewPrompt = [
+      "You are a code reviewer. Review the following git diff and respond with ONLY a JSON object (no markdown, no explanation).",
+      "",
+      "IMPORTANT: Do NOT modify any files. Do NOT run any commands. Only output your JSON review.",
+      "",
+      "Evaluate:",
+      "1. Correctness: Are there bugs, logic errors, or missing edge cases?",
+      "2. Quality: Is the code clean, readable, and maintainable?",
+      "3. Safety: Are there security issues, hardcoded secrets, or dangerous patterns?",
+      "",
+      "Respond with exactly this JSON format:",
+      '{"approve": true/false, "score": 0-100, "issues": ["issue1", ...], "suggestions": ["suggestion1", ...]}',
+      "",
+      "approve=true means the code is ready to merge. approve=false means it needs fixes.",
+      "Be pragmatic — minor style issues should not block merge. Focus on correctness and safety.",
+      "",
+      "```diff",
+      truncatedDiff,
+      "```",
+    ].join("\n");
+
+    const reviewTask = createTask(reviewPrompt, {
+      agent: reviewAgent,
+      timeout,
+      maxBudget: 1,
+    });
+
+    try {
+      log("info", "cross-agent review started", { taskAgent, reviewAgent });
+      // Run review in /tmp to prevent the reviewer from modifying the worktree
+      await this.run(reviewTask, "/tmp");
+
+      if (reviewTask.status !== "success" || !reviewTask.output) {
+        log("warn", "cross-agent review did not succeed, falling back to heuristic", {
+          status: reviewTask.status,
+          error: reviewTask.error,
+        });
+        return this.reviewDiff(diff);
+      }
+
+      // Parse the JSON response from the review agent
+      const parsed = this.parseReviewResponse(reviewTask.output);
+      if (parsed) {
+        log("info", "cross-agent review complete", {
+          reviewAgent,
+          approve: parsed.approve,
+          score: parsed.score,
+          issueCount: parsed.issues.length,
+        });
+        return parsed;
+      }
+
+      log("warn", "cross-agent review response unparseable, falling back to heuristic");
+      return this.reviewDiff(diff);
+    } catch (err: unknown) {
+      const msg = (err as Error).message ?? String(err);
+      log("warn", "cross-agent review failed, falling back to heuristic", { error: msg });
+      return this.reviewDiff(diff);
+    }
+  }
+
+  /** Try to extract a ReviewResult JSON from agent output (may contain surrounding text). */
+  private parseReviewResponse(output: string): ReviewResult | null {
+    // Try direct JSON parse first
+    try {
+      const obj = JSON.parse(output.trim());
+      if (typeof obj.approve === "boolean" && typeof obj.score === "number") {
+        return {
+          approve: obj.approve,
+          score: Math.max(0, Math.min(100, obj.score)),
+          issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [],
+          suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [],
+        };
+      }
+    } catch { /* not pure JSON */ }
+
+    // Try to find JSON object in the output (agent may wrap it in text)
+    // Use [^{}]* to avoid greedily matching across multiple JSON objects
+    const candidates = [...output.matchAll(/\{[^{}]*"approve"\s*:\s*(true|false)[^{}]*\}/g)];
+    for (const match of candidates) {
+      try {
+        const obj = JSON.parse(match[0]);
+        if (typeof obj.approve === "boolean" && typeof obj.score === "number") {
+          return {
+            approve: obj.approve,
+            score: Math.max(0, Math.min(100, obj.score)),
+            issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [],
+            suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [],
+          };
+        }
+      } catch { /* try next candidate */ }
+    }
+
+    return null;
   }
 
   /** Returns info about all tasks currently being executed by this runner. */
diff --git a/src/scheduler.ts b/src/scheduler.ts
index 62d854f..9c7a676 100644
--- a/src/scheduler.ts
+++ b/src/scheduler.ts
@@ -471,7 +471,31 @@ export class Scheduler {
       log("info", "task started", { taskId: task.id, worker: workerName });
       await this.runner.run(task, workerPath, this.onEvent);
 
-      const shouldMerge = task.status === "success";
+      // Cross-agent review gate: if the task succeeded, have a different agent review the diff
+      let shouldMerge = task.status === "success";
+      if (shouldMerge) {
+        const diffEvent = task.events.find((e) => e.type === "git_diff");
+        const diff = (diffEvent?.data as { diff?: string } | undefined)?.diff;
+        if (diff) {
+          const taskAgent = task.agent ?? "claude";
+          const reviewAgent = AgentRunner.pickReviewAgent(taskAgent);
+          this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent });
+          const review = await this.runner.reviewDiffWithAgent(diff, taskAgent, workerPath);
+          task.review = review;
+          if (!review.approve) {
+            shouldMerge = false;
+            log("info", "cross-agent review rejected merge", {
+              taskId: task.id,
+              score: review.score,
+              issues: review.issues,
+            });
+            this.onEvent?.({ type: "review_rejected", taskId: task.id, score: review.score, issues: review.issues });
+          } else {
+            log("info", "cross-agent review approved merge", { taskId: task.id, score: review.score });
+            this.onEvent?.({ type: "review_approved", taskId: task.id, score: review.score });
+          }
+        }
+      }
       const mergeResult = await this.pool.release(workerName, shouldMerge, task.id);
 
       if (shouldMerge && !mergeResult.merged) {

From de35942ce6b27bf9406aa90b4502408184058fc1 Mon Sep 17 00:00:00 2001
From: Robert <robert.alexander37899@gmail.com>
Date: Wed, 4 Mar 2026 16:06:01 -0500
Subject: [PATCH 2/4] fix(review): address round-2 review issues

- Remove unused `cwd` param from reviewDiffWithAgent()
- Fix JSON extraction: use indexOf/lastIndexOf instead of regex
  (handles braces in strings correctly)
- Add task.error message when review rejects merge
- Add review_started event assertions to scheduler tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/__tests__/agent-runner.test.ts |  1 -
 src/__tests__/scheduler.test.ts    | 10 +++++++---
 src/agent-runner.ts                | 13 ++++++-------
 src/scheduler.ts                   |  3 ++-
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/__tests__/agent-runner.test.ts b/src/__tests__/agent-runner.test.ts
index 7cb2387..3330449 100644
--- a/src/__tests__/agent-runner.test.ts
+++ b/src/__tests__/agent-runner.test.ts
@@ -423,7 +423,6 @@ describe("AgentRunner", () => {
     const result = await runner.reviewDiffWithAgent(
       "diff --git a/foo.test.ts b/foo.test.ts\n+it('works', () => {});",
       "claude",  // task was by claude → review by codex → codex fails → fallback
-      "/tmp",
       2,  // 2 second timeout to keep test fast
     );
     assert.strictEqual(typeof result.approve, "boolean");
diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts
index 01bf1e0..0e28e79 100644
--- a/src/__tests__/scheduler.test.ts
+++ b/src/__tests__/scheduler.test.ts
@@ -897,8 +897,11 @@ describe("Scheduler", () => {
       assert.ok(task.review);
       assert.strictEqual(task.review!.approve, false);
       assert.strictEqual(task.review!.score, 25);
-      // Should have emitted review_rejected event
-      assert.ok(events.some(e => e.type === "review_rejected"));
+      // Error should explain why merge was blocked
+      assert.ok(task.error.includes("review rejected"), "error should explain rejection");
+      // Should have emitted review_started and review_rejected events
+      assert.ok(events.some(e => e.type === "review_started"), "should emit review_started");
+      assert.ok(events.some(e => e.type === "review_rejected"), "should emit review_rejected");
     });
 
     it("review gate allows merge when review approves", async () => {
@@ -942,7 +945,8 @@ describe("Scheduler", () => {
       assert.strictEqual(mergeCalledWith, true);
       assert.ok(task.review);
       assert.strictEqual(task.review!.approve, true);
-      assert.ok(events.some(e => e.type === "review_approved"));
+      assert.ok(events.some(e => e.type === "review_started"), "should emit review_started");
+      assert.ok(events.some(e => e.type === "review_approved"), "should emit review_approved");
     });
 
     it("skips review when task has no diff", async () => {
diff --git a/src/agent-runner.ts b/src/agent-runner.ts
index da4c0cb..bdde65f 100644
--- a/src/agent-runner.ts
+++ b/src/agent-runner.ts
@@ -112,7 +112,6 @@ export class AgentRunner {
   async reviewDiffWithAgent(
     diff: string,
     taskAgent: string,
-    cwd: string,
     timeout: number = 60,
   ): Promise<ReviewResult> {
     const reviewAgent = AgentRunner.pickReviewAgent(taskAgent);
@@ -194,12 +193,12 @@ export class AgentRunner {
       }
     } catch { /* not pure JSON */ }
 
-    // Try to find JSON object in the output (agent may wrap it in text)
-    // Use [^{}]* to avoid greedily matching across multiple JSON objects
-    const candidates = [...output.matchAll(/\{[^{}]*"approve"\s*:\s*(true|false)[^{}]*\}/g)];
-    for (const match of candidates) {
+    // Try to extract JSON by finding the outermost { } that contains "approve"
+    const start = output.indexOf("{");
+    const end = output.lastIndexOf("}");
+    if (start !== -1 && end > start) {
       try {
-        const obj = JSON.parse(match[0]);
+        const obj = JSON.parse(output.slice(start, end + 1));
         if (typeof obj.approve === "boolean" && typeof obj.score === "number") {
           return {
             approve: obj.approve,
@@ -208,7 +207,7 @@ export class AgentRunner {
             suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [],
           };
         }
-      } catch { /* try next candidate */ }
+      } catch { /* not valid JSON */ }
     }
 
     return null;
diff --git a/src/scheduler.ts b/src/scheduler.ts
index 9c7a676..9dc9c4e 100644
--- a/src/scheduler.ts
+++ b/src/scheduler.ts
@@ -480,10 +480,11 @@ export class Scheduler {
           const taskAgent = task.agent ?? "claude";
           const reviewAgent = AgentRunner.pickReviewAgent(taskAgent);
           this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent });
-          const review = await this.runner.reviewDiffWithAgent(diff, taskAgent, workerPath);
+          const review = await this.runner.reviewDiffWithAgent(diff, taskAgent);
           task.review = review;
           if (!review.approve) {
             shouldMerge = false;
+            task.error = `review rejected (score ${review.score}): ${review.issues.join("; ")}`;
             log("info", "cross-agent review rejected merge", {
               taskId: task.id,
               score: review.score,

From 4a7df1eb66a481552178df1b7c39379a6c196cb4 Mon Sep 17 00:00:00 2001
From: Robert <robert.alexander37899@gmail.com>
Date: Wed, 4 Mar 2026 16:09:06 -0500
Subject: [PATCH 3/4] refactor(review): extract normalizeReviewObj, add
 reviewAgent to result

- Extract duplicated JSON normalisation into normalizeReviewObj()
- Add reviewAgent field to ReviewResult so consumers know which agent reviewed
- Update Task.review type to include reviewAgent

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/agent-runner.ts | 36 +++++++++++++++++-------------------
 src/scheduler.ts    |  3 +--
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/agent-runner.ts b/src/agent-runner.ts
index bdde65f..493c9a2 100644
--- a/src/agent-runner.ts
+++ b/src/agent-runner.ts
@@ -55,6 +55,7 @@ export interface ReviewResult {
   score: number;
   issues: string[];
   suggestions: string[];
+  reviewAgent?: string;
 }
 
 export class AgentRunner {
@@ -160,6 +161,7 @@ export class AgentRunner {
       // Parse the JSON response from the review agent
       const parsed = this.parseReviewResponse(reviewTask.output);
       if (parsed) {
+        parsed.reviewAgent = reviewAgent;
         log("info", "cross-agent review complete", {
           reviewAgent,
           approve: parsed.approve,
@@ -182,37 +184,33 @@ export class AgentRunner {
   private parseReviewResponse(output: string): ReviewResult | null {
     // Try direct JSON parse first
     try {
-      const obj = JSON.parse(output.trim());
-      if (typeof obj.approve === "boolean" && typeof obj.score === "number") {
-        return {
-          approve: obj.approve,
-          score: Math.max(0, Math.min(100, obj.score)),
-          issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [],
-          suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [],
-        };
-      }
+      const result = this.normalizeReviewObj(JSON.parse(output.trim()));
+      if (result) return result;
     } catch { /* not pure JSON */ }
 
-    // Try to extract JSON by finding the outermost { } that contains "approve"
+    // Try to extract JSON by finding the outermost { }
     const start = output.indexOf("{");
     const end = output.lastIndexOf("}");
     if (start !== -1 && end > start) {
       try {
-        const obj = JSON.parse(output.slice(start, end + 1));
-        if (typeof obj.approve === "boolean" && typeof obj.score === "number") {
-          return {
-            approve: obj.approve,
-            score: Math.max(0, Math.min(100, obj.score)),
-            issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [],
-            suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [],
-          };
-        }
+        const result = this.normalizeReviewObj(JSON.parse(output.slice(start, end + 1)));
+        if (result) return result;
       } catch { /* not valid JSON */ }
     }
 
     return null;
   }
 
+  private normalizeReviewObj(obj: Record<string, unknown>): ReviewResult | null {
+    if (typeof obj.approve !== "boolean" || typeof obj.score !== "number") return null;
+    return {
+      approve: obj.approve,
+      score: Math.max(0, Math.min(100, obj.score)),
+      issues: Array.isArray(obj.issues) ? obj.issues.map(String) : [],
+      suggestions: Array.isArray(obj.suggestions) ? obj.suggestions.map(String) : [],
+    };
+  }
+
   /** Returns info about all tasks currently being executed by this runner. */
   getRunningTasks(): RunningTaskInfo[] {
     const now = Date.now();
diff --git a/src/scheduler.ts b/src/scheduler.ts
index 9dc9c4e..18e4742 100644
--- a/src/scheduler.ts
+++ b/src/scheduler.ts
@@ -478,8 +478,7 @@ export class Scheduler {
         const diff = (diffEvent?.data as { diff?: string } | undefined)?.diff;
         if (diff) {
           const taskAgent = task.agent ?? "claude";
-          const reviewAgent = AgentRunner.pickReviewAgent(taskAgent);
-          this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent });
+          this.onEvent?.({ type: "review_started", taskId: task.id, reviewAgent: AgentRunner.pickReviewAgent(taskAgent) });
           const review = await this.runner.reviewDiffWithAgent(diff, taskAgent);
           task.review = review;
           if (!review.approve) {

From bed551953af1f8c3392d9462ddc8f9325f605094 Mon Sep 17 00:00:00 2001
From: Robert <robert.alexander37899@gmail.com>
Date: Wed, 4 Mar 2026 16:44:37 -0500
Subject: [PATCH 4/4] refactor(review): move ReviewResult to types.ts, add
 missing tests

- Move ReviewResult interface to types.ts (single source of truth)
- Task.review now references ReviewResult instead of inline duplicate
- Re-export ReviewResult from agent-runner.ts for backward compat
- Add test: normalizeReviewObj returns null for missing/wrong-type fields
- Add test: scheduler catch block when reviewDiffWithAgent throws

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/__tests__/agent-runner.test.ts |  8 +++++++
 src/__tests__/scheduler.test.ts    | 38 ++++++++++++++++++++++++++++++
 src/agent-runner.ts                | 10 ++------
 3 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/__tests__/agent-runner.test.ts b/src/__tests__/agent-runner.test.ts
index 3330449..f0d7f48 100644
--- a/src/__tests__/agent-runner.test.ts
+++ b/src/__tests__/agent-runner.test.ts
@@ -401,6 +401,14 @@ describe("AgentRunner", () => {
     assert.strictEqual(result.score, 100);
   });
 
+  it("parseReviewResponse returns null when approve or score is missing/wrong type", () => {
+    const runner = new AgentRunner();
+    const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
+    assert.strictEqual(parse('{"score": 80, "issues": []}'), null); // missing approve
+    assert.strictEqual(parse('{"approve": "yes", "score": 80}'), null); // approve is string
+    assert.strictEqual(parse('{"approve": true, "issues": []}'), null); // missing score
+  });
+
   // ── reviewDiff approve field ──
 
   it("reviewDiff includes approve field based on score threshold", () => {
diff --git a/src/__tests__/scheduler.test.ts b/src/__tests__/scheduler.test.ts
index 0e28e79..0da3e3c 100644
--- a/src/__tests__/scheduler.test.ts
+++ b/src/__tests__/scheduler.test.ts
@@ -1011,5 +1011,43 @@ describe("Scheduler", () => {
       assert.strictEqual(mergeCalledWith, false);
       assert.strictEqual(task.review, undefined);
     });
+
+    it("falls back gracefully when reviewDiffWithAgent throws", async () => {
+      let mergeCalledWith = false;
+      const pool = {
+        ...makePool(),
+        release: async (_name: string, merge: boolean) => {
+          mergeCalledWith = merge;
+          return { merged: false };
+        },
+      } as unknown as WorktreePool;
+
+      const runner = {
+        run: async (task: Task) => {
+          task.status = "success";
+          task.durationMs = 100;
+          task.events.push({
+            type: "git_diff",
+            timestamp: new Date().toISOString(),
+            data: { diff: "diff --git a/foo.ts\n+const x = 1;" },
+          });
+          return task;
+        },
+        getRunningTasks: () => [],
+        reviewDiffWithAgent: async () => { throw new Error("network error"); },
+      } as unknown as AgentRunner;
+
+      const s = new Scheduler(pool, runner, makeStore());
+
+      const task = s.submit("review throws");
+      task.maxRetries = 0; // prevent retry so we can assert final status
+      const exec = (s as any).executeAndRelease.bind(s);
+      await exec(task, "w0", "/tmp/w0");
+
+      // The outer catch should mark the task as failed
+      assert.strictEqual(task.status, "failed");
+      assert.ok(task.error.includes("network error"));
+      assert.strictEqual(mergeCalledWith, false);
+    });
   });
 });
diff --git a/src/agent-runner.ts b/src/agent-runner.ts
index 493c9a2..d2b7545 100644
--- a/src/agent-runner.ts
+++ b/src/agent-runner.ts
@@ -1,4 +1,4 @@
-import { type Task, type TaskEvent, createTask } from "./types.js";
+import { type Task, type TaskEvent, type ReviewResult, createTask } from "./types.js";
 import { log } from "./logger.js";
 import { spawn, type ChildProcess } from "node:child_process";
 import { exec as execCb } from "node:child_process";
@@ -50,13 +50,7 @@ export interface RunningTaskInfo {
   costUsd: number;
 }
 
-export interface ReviewResult {
-  approve: boolean;
-  score: number;
-  issues: string[];
-  suggestions: string[];
-  reviewAgent?: string;
-}
+export type { ReviewResult };
 
 export class AgentRunner {
   private readonly _runningTasks = new Map<string, RunningTaskEntry>();