Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 94 additions & 1 deletion src/__tests__/agent-runner.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, it, before, after } from "node:test";
import assert from "node:assert/strict";
import { AgentRunner } from "../agent-runner.js";
import { AgentRunner, type ReviewResult } from "../agent-runner.js";
import { createTask } from "../types.js";
import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from "node:fs";
import { join } from "node:path";
Expand Down Expand Up @@ -345,4 +345,97 @@ describe("AgentRunner", () => {
assert.strictEqual(task.events[0].type, "evt-5");
assert.strictEqual(task.events[199].type, "evt-204");
});

// ── pickReviewAgent ──

it("pickReviewAgent selects codex to review claude's work", () => {
assert.strictEqual(AgentRunner.pickReviewAgent("claude"), "codex");
});

it("pickReviewAgent selects claude to review codex's work", () => {
assert.strictEqual(AgentRunner.pickReviewAgent("codex"), "claude");
});

it("pickReviewAgent selects codex to review claude-sdk's work", () => {
assert.strictEqual(AgentRunner.pickReviewAgent("claude-sdk"), "codex");
});

it("pickReviewAgent selects claude for generic agents", () => {
assert.strictEqual(AgentRunner.pickReviewAgent("aider --yes"), "claude");
assert.strictEqual(AgentRunner.pickReviewAgent("custom-agent"), "claude");
});

// ── parseReviewResponse ──

it("parseReviewResponse parses clean JSON", () => {
const runner = new AgentRunner();
const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
const result = parse('{"approve": true, "score": 85, "issues": [], "suggestions": ["looks good"]}');
assert.ok(result);
assert.strictEqual(result.approve, true);
assert.strictEqual(result.score, 85);
assert.deepStrictEqual(result.suggestions, ["looks good"]);
});

it("parseReviewResponse extracts JSON from surrounding text", () => {
const runner = new AgentRunner();
const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
const result = parse('Here is my review:\n{"approve": false, "score": 30, "issues": ["bug found"], "suggestions": []}\nDone.');
assert.ok(result);
assert.strictEqual(result.approve, false);
assert.strictEqual(result.score, 30);
assert.deepStrictEqual(result.issues, ["bug found"]);
});

it("parseReviewResponse returns null for unparseable output", () => {
const runner = new AgentRunner();
const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
assert.strictEqual(parse("I can't produce JSON right now"), null);
});

it("parseReviewResponse clamps score to 0-100", () => {
const runner = new AgentRunner();
const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
const result = parse('{"approve": true, "score": 150, "issues": [], "suggestions": []}');
assert.ok(result);
assert.strictEqual(result.score, 100);
});

it("parseReviewResponse returns null when approve or score is missing/wrong type", () => {
const runner = new AgentRunner();
const parse = (runner as unknown as { parseReviewResponse: (s: string) => ReviewResult | null }).parseReviewResponse.bind(runner);
assert.strictEqual(parse('{"score": 80, "issues": []}'), null); // missing approve
assert.strictEqual(parse('{"approve": "yes", "score": 80}'), null); // approve is string
assert.strictEqual(parse('{"approve": true, "issues": []}'), null); // missing score
});

// ── reviewDiff approve field ──

it("reviewDiff includes approve field based on score threshold", () => {
const runner = new AgentRunner();
const clean = runner.reviewDiff("diff --git a/foo.ts\n+const x = 1;");
assert.strictEqual(typeof clean.approve, "boolean");
assert.strictEqual(clean.approve, true); // score=50 >= 40

const bad = runner.reviewDiff("diff --git a/foo.ts\n+console.log('x');\n+console.log('y');\n+console.log('z');");
// console.log penalty takes score below 40
// Actually score=50-10=40, still >=40
assert.strictEqual(typeof bad.approve, "boolean");
});

// ── reviewDiffWithAgent fallback ──

it("reviewDiffWithAgent falls back to heuristic when agent fails", async () => {
const runner = new AgentRunner();
// codex not available in test env → will fail → should fall back to heuristic
const result = await runner.reviewDiffWithAgent(
"diff --git a/foo.test.ts b/foo.test.ts\n+it('works', () => {});",
"claude", // task was by claude → review by codex → codex fails → fallback
2, // 2 second timeout to keep test fast
);
assert.strictEqual(typeof result.approve, "boolean");
assert.strictEqual(typeof result.score, "number");
assert.ok(Array.isArray(result.issues));
assert.ok(Array.isArray(result.suggestions));
});
});
205 changes: 205 additions & 0 deletions src/__tests__/scheduler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ function makeRunner(): AgentRunner {
return {
run: async (task: Task) => { task.status = "success"; task.durationMs = 100; return task; },
getRunningTasks: () => [],
reviewDiffWithAgent: async () => ({ approve: true, score: 80, issues: [], suggestions: [] }),
} as unknown as AgentRunner;
}

Expand Down Expand Up @@ -845,4 +846,208 @@ describe("Scheduler", () => {
assert.ok(result);
});
});

// ─── 11. Cross-agent review gate ───

describe("cross-agent review gate", () => {
it("review gate blocks merge when review rejects", async () => {
let mergeCalledWith = false;
const pool = {
...makePool(),
release: async (_name: string, merge: boolean) => {
mergeCalledWith = merge;
return { merged: merge };
},
} as unknown as WorktreePool;

const runner = {
run: async (task: Task) => {
task.status = "success";
task.durationMs = 100;
// Simulate agent creating a git diff event
task.events.push({
type: "git_diff",
timestamp: new Date().toISOString(),
data: { diff: "diff --git a/foo.ts\n+console.log('debug');" },
});
return task;
},
getRunningTasks: () => [],
reviewDiffWithAgent: async () => ({
approve: false,
score: 25,
issues: ["console.log found"],
suggestions: [],
}),
} as unknown as AgentRunner;

const events: Record<string, unknown>[] = [];
const s = new Scheduler(pool, runner, makeStore(), (ev) => events.push(ev));

// Manually invoke executeAndRelease
const task = s.submit("test review gate");
const exec = (s as any).executeAndRelease.bind(s);
await exec(task, "w0", "/tmp/w0");

// Task should still be "success" (review doesn't change status)
assert.strictEqual(task.status, "success");
// But merge should have been blocked
assert.strictEqual(mergeCalledWith, false);
// Review should be attached to task
assert.ok(task.review);
assert.strictEqual(task.review!.approve, false);
assert.strictEqual(task.review!.score, 25);
// Error should explain why merge was blocked
assert.ok(task.error.includes("review rejected"), "error should explain rejection");
// Should have emitted review_started and review_rejected events
assert.ok(events.some(e => e.type === "review_started"), "should emit review_started");
assert.ok(events.some(e => e.type === "review_rejected"), "should emit review_rejected");
});

it("review gate allows merge when review approves", async () => {
let mergeCalledWith = false;
const pool = {
...makePool(),
release: async (_name: string, merge: boolean) => {
mergeCalledWith = merge;
return { merged: true };
},
} as unknown as WorktreePool;

const runner = {
run: async (task: Task) => {
task.status = "success";
task.durationMs = 100;
task.events.push({
type: "git_diff",
timestamp: new Date().toISOString(),
data: { diff: "diff --git a/foo.ts\n+const x = 1;" },
});
return task;
},
getRunningTasks: () => [],
reviewDiffWithAgent: async () => ({
approve: true,
score: 85,
issues: [],
suggestions: ["looks clean"],
}),
} as unknown as AgentRunner;

const events: Record<string, unknown>[] = [];
const s = new Scheduler(pool, runner, makeStore(), (ev) => events.push(ev));

const task = s.submit("test approve");
const exec = (s as any).executeAndRelease.bind(s);
await exec(task, "w0", "/tmp/w0");

assert.strictEqual(task.status, "success");
assert.strictEqual(mergeCalledWith, true);
assert.ok(task.review);
assert.strictEqual(task.review!.approve, true);
assert.ok(events.some(e => e.type === "review_started"), "should emit review_started");
assert.ok(events.some(e => e.type === "review_approved"), "should emit review_approved");
});

it("skips review when task has no diff", async () => {
let mergeCalledWith = false;
const pool = {
...makePool(),
release: async (_name: string, merge: boolean) => {
mergeCalledWith = merge;
return { merged: true };
},
} as unknown as WorktreePool;

const runner = {
run: async (task: Task) => {
task.status = "success";
task.durationMs = 100;
// No git_diff event
return task;
},
getRunningTasks: () => [],
reviewDiffWithAgent: async () => { throw new Error("should not be called"); },
} as unknown as AgentRunner;

const s = new Scheduler(pool, runner, makeStore());

const task = s.submit("no diff task");
const exec = (s as any).executeAndRelease.bind(s);
await exec(task, "w0", "/tmp/w0");

// Should merge without review since there's no diff
assert.strictEqual(mergeCalledWith, true);
assert.strictEqual(task.review, undefined);
});

it("skips review when task failed", async () => {
let mergeCalledWith = false;
const pool = {
...makePool(),
release: async (_name: string, merge: boolean) => {
mergeCalledWith = merge;
return { merged: false };
},
} as unknown as WorktreePool;

const runner = {
run: async (task: Task) => {
task.status = "failed";
task.error = "compilation error";
task.durationMs = 50;
return task;
},
getRunningTasks: () => [],
reviewDiffWithAgent: async () => { throw new Error("should not be called"); },
} as unknown as AgentRunner;

const s = new Scheduler(pool, runner, makeStore());

const task = s.submit("failed task");
const exec = (s as any).executeAndRelease.bind(s);
await exec(task, "w0", "/tmp/w0");

assert.strictEqual(mergeCalledWith, false);
assert.strictEqual(task.review, undefined);
});

it("falls back gracefully when reviewDiffWithAgent throws", async () => {
let mergeCalledWith = false;
const pool = {
...makePool(),
release: async (_name: string, merge: boolean) => {
mergeCalledWith = merge;
return { merged: false };
},
} as unknown as WorktreePool;

const runner = {
run: async (task: Task) => {
task.status = "success";
task.durationMs = 100;
task.events.push({
type: "git_diff",
timestamp: new Date().toISOString(),
data: { diff: "diff --git a/foo.ts\n+const x = 1;" },
});
return task;
},
getRunningTasks: () => [],
reviewDiffWithAgent: async () => { throw new Error("network error"); },
} as unknown as AgentRunner;

const s = new Scheduler(pool, runner, makeStore());

const task = s.submit("review throws");
task.maxRetries = 0; // prevent retry so we can assert final status
const exec = (s as any).executeAndRelease.bind(s);
await exec(task, "w0", "/tmp/w0");

// The outer catch should mark the task as failed
assert.strictEqual(task.status, "failed");
assert.ok(task.error.includes("network error"));
assert.strictEqual(mergeCalledWith, false);
});
});
});
Loading