diff --git a/README.md b/README.md index d4bdcbf..a3e6e54 100644 --- a/README.md +++ b/README.md @@ -135,12 +135,13 @@ When running from source (`bun src/loop.ts`), auto-update is disabled — use `g - `-a, --agent `: agent to run (default: `codex`) - `-p, --prompt `: prompt text or a `.md` prompt file path. Plain text auto-creates `PLAN.md` first. -- `--proof `: required proof criteria for task completion +- `--proof `: optional proof criteria for task completion - `--codex-model `: set the model passed to codex (`LOOP_CODEX_MODEL` can also set this by default) - `-m, --max-iterations `: max loop count (default: infinite) - `-d, --done `: done signal string (default: `DONE`) - `--format `: output format (default: `pretty`) - `--review [claude|codex|claudex]`: run a review when done (default: `claudex`; bare `--review` also uses `claudex`). With `claudex`, both reviews run in parallel, then both comments are passed back to the original agent so it can decide what to address. If both reviews found the same issue, that is a stronger signal to fix it. +- `--review-plan [other|claude|codex]`: reviewer for the automatic plan review pass that runs after plain-text prompts create `PLAN.md` (default: `other`, the non-primary model). - `--tmux`: run `loop` in a detached tmux session so it survives SSH disconnects (auto-attaches when interactive). Session name format: `repo-loop-X` - `--worktree`: create and run inside a fresh git worktree + branch automatically. Worktree/branch format: `repo-loop-X` - `-h, --help`: help @@ -154,9 +155,12 @@ loop --proof "Use {skill} to verify your changes" # two iteration, raw JSON/event output loop -m 2 --proof "Use {skill} to verify your changes" "Implement {feature}" --format raw -# plain text prompt: auto-creates PLAN.md, then runs from PLAN.md +# plain text prompt: auto-creates PLAN.md, then auto-reviews with the other model (default) loop --proof "Use {skill} to verify your changes" "Implement {feature}" +# plain text prompt: override the plan reviewer +loop --proof "Use {skill} to verify your changes" --review-plan claude "Implement {feature}" + # run with claude loop --proof "Use {skill} to verify your changes" --agent claude --prompt PLAN.md diff --git a/src/loop/args.ts b/src/loop/args.ts index 4363a6d..f36e5a7 100644 --- a/src/loop/args.ts +++ b/src/loop/args.ts @@ -6,9 +6,15 @@ import { LOOP_VERSION, VALUE_FLAGS, } from "./constants"; -import type { Agent, Format, Options, ReviewMode, ValueFlag } from "./types"; +import type { + Agent, + Format, + Options, + PlanReviewMode, + ReviewMode, + ValueFlag, +} from "./types"; -const REQUIRED_PROOF_ERROR = "Missing required --proof value."; const EMPTY_DONE_SIGNAL_ERROR = "Invalid --done value: cannot be empty"; const parseAgent = (value: string): Agent => { @@ -32,6 +38,13 @@ const parseReviewValue = (value: string): ReviewMode => { throw new Error(`Invalid --review value: ${value}`); }; +const parsePlanReviewValue = (value: string): PlanReviewMode => { + if (value === "other" || value === "claude" || value === "codex") { + return value; + } + throw new Error(`Invalid --review-plan value: ${value}`); +}; + const applyValueFlag = ( flag: ValueFlag, value: string, @@ -101,6 +114,27 @@ const parseReviewArg = ( return index; }; +const parsePlanReviewArg = ( + argv: string[], + index: number, + opts: Options, + arg: string +): number => { + if (arg.startsWith("--review-plan=")) { + opts.reviewPlan = parsePlanReviewValue(arg.slice("--review-plan=".length)); + return index; + } + + const next = argv[index + 1]; + if (next === "other" || next === "claude" || next === "codex") { + opts.reviewPlan = next; + return index + 1; + } + + opts.reviewPlan = "other"; + return index; +}; + const consumeArg = ( argv: string[], index: number, @@ -136,6 +170,13 @@ const consumeArg = ( }; } + if (arg === "--review-plan" || arg.startsWith("--review-plan=")) { + return { + nextIndex: parsePlanReviewArg(argv, index, opts, arg) + 1, + stop: false, + }; + } + if (arg === "--tmux") { opts.tmux = true; return { nextIndex: index + 1, stop: false }; @@ -195,9 +236,5 @@ export const parseArgs = (argv: string[]): Options => { opts.promptInput = positional.join(" "); } - if (!opts.proof) { - throw new Error(REQUIRED_PROOF_ERROR); - } - return opts; }; diff --git a/src/loop/constants.ts b/src/loop/constants.ts index ce28f19..820fb34 100644 --- a/src/loop/constants.ts +++ b/src/loop/constants.ts @@ -10,24 +10,25 @@ export const HELP = ` loop - v${LOOP_VERSION} - meta agent loop runner Usage: - loop Open live panel for running claude/codex instances + loop Open live panel for running claude/codex instances loop [options] [prompt] - loop update Check for updates and apply if available - loop upgrade Alias for update + loop update Check for updates and apply if available + loop upgrade Alias for update Options: - -a, --agent Agent CLI to run (default: codex) - -p, --prompt Prompt text or path to a .md prompt file - -m, --max-iterations . Max loops (default: infinite) - -d, --done Done signal (default: DONE) - --proof Proof requirements for task completion (required) - --codex-model Override codex model (default: ${DEFAULT_CODEX_MODEL}) - --format Log format (default: pretty) - --review [claude|codex|claudex] Review on done (default: claudex) - --tmux Run in a detached tmux session (name: repo-loop-X) - --worktree Create and run in a fresh git worktree (name: repo-loop-X) - -v, --version Show loop version - -h, --help Show this help + -a, --agent Agent CLI to run (default: codex) + -p, --prompt Prompt text or path to a .md prompt file + -m, --max-iterations Max loops (default: infinite) + -d, --done Done signal (default: DONE) + --proof Proof requirements for task completion + --codex-model Override codex model (default: ${DEFAULT_CODEX_MODEL}) + --format Log format (default: pretty) + --review [claude|codex|claudex] Review on done (default: claudex) + --review-plan [other|claude|codex] Review PLAN.md after plain-text planning (default: other) + --tmux Run in a detached tmux session (name: repo-loop-X) + --worktree Create and run in a fresh git worktree (name: repo-loop-X) + -v, --version Show loop version + -h, --help Show this help Auto-update: Updates are checked automatically on startup and applied on the next run. diff --git a/src/loop/prompts.ts b/src/loop/prompts.ts index e840292..d3a5d7b 100644 --- a/src/loop/prompts.ts +++ b/src/loop/prompts.ts @@ -4,6 +4,14 @@ const NEWLINE_RE = /\r?\n/; const SPAWN_TEAM_WITH_WORKTREE_ISOLATION = "Spawn a team of agents with worktree isolation."; +const appendProofRequirements = (parts: string[], proof: string): void => { + const trimmed = proof.trim(); + if (!trimmed) { + return; + } + parts.push(`Proof requirements:\n${trimmed}`); +}; + const hasProofInTask = (task: string, proof: string): boolean => { const proofLines = proof .split(NEWLINE_RE) @@ -30,6 +38,15 @@ export const buildPlanPrompt = (task: string): string => "Only write the plan in this step. Do not implement code yet.", ].join("\n\n"); +export const buildPlanReviewPrompt = (task: string): string => + [ + "Plan review mode:", + `Task:\n${task.trim()}`, + "Review PLAN.md for correctness, missing steps, and verification gaps.", + "Update PLAN.md directly if needed.", + "Only edit PLAN.md in this step. Do not implement code yet.", + ].join("\n\n"); + export const buildWorkPrompt = ( task: string, doneSignal: string, @@ -45,7 +62,7 @@ export const buildWorkPrompt = ( } if (!hasProofInTask(task, proof)) { - parts.push(`Proof requirements:\n${proof.trim()}`); + appendProofRequirements(parts, proof); } parts.push( @@ -64,7 +81,7 @@ export const buildReviewPrompt = ( "Run checks/tests/commands as needed and inspect changed files.", ]; - parts.push(`Proof requirements:\n${proof.trim()}`); + appendProofRequirements(parts, proof); parts.push( `If review is needed, end your response with exactly "${REVIEW_FAIL}" on the final non-empty line. Nothing may follow this line.` diff --git a/src/loop/runner.ts b/src/loop/runner.ts index c0ff382..6972b30 100644 --- a/src/loop/runner.ts +++ b/src/loop/runner.ts @@ -23,6 +23,16 @@ interface SpawnConfig { args: string[]; cmd: string; } + +interface AppServerEvent { + method?: unknown; + params?: unknown; +} +interface CodexRenderState { + activeItemHasDelta: boolean; + activeItemId: string; + lastCompleted: string; +} type LegacyAgentRunner = ( agent: Agent, prompt: string, @@ -175,6 +185,155 @@ const eventMessage = (line: string): string => { } }; +const asRecord = (value: unknown): Record => + typeof value === "object" && value !== null + ? (value as Record) + : {}; + +const asString = (value: unknown): string | undefined => + typeof value === "string" ? value : undefined; + +const parseJsonLine = (line: string): Record | undefined => { + if (!line.trim().startsWith("{")) { + return undefined; + } + try { + const parsed = JSON.parse(line); + return typeof parsed === "object" && parsed !== null + ? (parsed as Record) + : undefined; + } catch { + return undefined; + } +}; + +const collectDeltaText = (value: unknown, out: string[]): void => { + if (typeof value === "string") { + out.push(value); + return; + } + if (Array.isArray(value)) { + for (const item of value) { + collectDeltaText(item, out); + } + return; + } + if (!value || typeof value !== "object") { + return; + } + const record = asRecord(value); + const direct = asString(record.delta) ?? asString(record.text); + if (direct !== undefined) { + out.push(direct); + } + collectDeltaText(record.content, out); + collectDeltaText(record.item, out); + collectDeltaText(record.payload, out); +}; + +const parseDeltaText = (value: unknown): string => { + const parts: string[] = []; + collectDeltaText(value, parts); + return parts.join(""); +}; + +const collectMessageText = (value: unknown, out: string[]): void => { + if (typeof value === "string") { + out.push(value); + return; + } + if (Array.isArray(value)) { + for (const item of value) { + collectMessageText(item, out); + } + return; + } + if (!value || typeof value !== "object") { + return; + } + const record = asRecord(value); + const direct = asString(record.text) ?? asString(record.delta); + if (direct !== undefined) { + out.push(direct); + } + collectMessageText(record.content, out); + collectMessageText(record.item, out); + collectMessageText(record.payload, out); +}; + +const parseCompletedMessage = (value: unknown): string => { + const parts: string[] = []; + collectMessageText(value, parts); + return parts + .map((part) => part.trim()) + .filter(Boolean) + .join("\n"); +}; + +const parseItemId = (value: unknown): string => { + const record = asRecord(value); + return ( + asString(record.itemId) ?? + asString(record.item_id) ?? + asString(record.id) ?? + asString(asRecord(record.item).id) ?? + "" + ); +}; + +const handleDeltaLine = ( + params: Record, + renderState: CodexRenderState, + appendChunk: (text: string) => void, + markMessageBoundary: () => void +): void => { + const itemId = parseItemId(params); + const itemChanged = + Boolean(renderState.activeItemId) && + Boolean(itemId) && + itemId !== renderState.activeItemId; + if (itemChanged) { + markMessageBoundary(); + } + if (itemId) { + renderState.activeItemId = itemId; + } + const chunk = parseDeltaText(params.delta ?? params); + if (!chunk) { + return; + } + renderState.activeItemHasDelta = true; + appendChunk(chunk); +}; + +const handleCompletedLine = ( + params: Record, + renderState: CodexRenderState, + appendChunk: (text: string) => void, + markMessageBoundary: () => void +): void => { + const item = asRecord(params.item); + const itemType = asString(item.type); + if (itemType !== "agentMessage" && itemType !== "agent_message") { + return; + } + const completedId = parseItemId(params); + const sameActive = + Boolean(completedId) && + Boolean(renderState.activeItemId) && + completedId === renderState.activeItemId; + const candidate = parseCompletedMessage(item); + if ( + candidate && + candidate !== renderState.lastCompleted && + !(sameActive && renderState.activeItemHasDelta) + ) { + renderState.lastCompleted = candidate; + appendChunk(candidate); + } + markMessageBoundary(); +}; + const consume = async ( stream: ReadableStream, onText: (text: string) => void @@ -238,16 +397,65 @@ const runCodexAgent = async ( return runnerState.runLegacyAgent("codex", prompt, opts); } + const renderState: CodexRenderState = { + activeItemHasDelta: false, + activeItemId: "", + lastCompleted: "", + }; let parsed = ""; - let state = { parsed: "", prettyCount: 0, lastMessage: "" }; - const onParsed = (text: string): void => { - state = appendParsedLine(text, opts, state); - parsed = state.parsed; + let pendingMessageBreak = false; + let wrotePretty = false; + const appendChunk = (text: string): void => { + if (!text) { + return; + } + const needsMessageBreak = + pendingMessageBreak && + parsed && + !parsed.endsWith("\n") && + !text.startsWith("\n"); + const parsedChunk = needsMessageBreak ? `\n${text}` : text; + const prettyChunk = needsMessageBreak ? `\n\n${text}` : text; + pendingMessageBreak = false; + parsed += parsedChunk; + if (opts.format === "pretty") { + process.stdout.write(prettyChunk); + wrotePretty = true; + } + }; + const markMessageBoundary = (): void => { + pendingMessageBreak = true; + renderState.activeItemHasDelta = false; + renderState.activeItemId = ""; + }; + const onParsed = (_text: string): void => { + // app-server parsing is ignored; client renders from raw events. }; const onRaw = (text: string): void => { if (opts.format === "raw") { process.stdout.write(`${text}\n`); } + + const parsedLine = parseJsonLine(text) as AppServerEvent | undefined; + if (!parsedLine) { + return; + } + const method = asString(parsedLine.method); + const params = asRecord(parsedLine.params); + + if (method === "item/agentMessage/delta") { + handleDeltaLine(params, renderState, appendChunk, markMessageBoundary); + return; + } + + if (method === "item/completed") { + handleCompletedLine( + params, + renderState, + appendChunk, + markMessageBoundary + ); + } }; activeAppServerRuns += 1; @@ -268,7 +476,15 @@ const runCodexAgent = async ( onParsed, onRaw, }); - return { ...result, parsed: result.parsed || parsed }; + const finalParsed = result.parsed || parsed; + if ( + opts.format === "pretty" && + wrotePretty && + !finalParsed.endsWith("\n") + ) { + process.stdout.write("\n"); + } + return { ...result, parsed: finalParsed }; } catch (error) { if ( process.env[CODEX_TRANSPORT_ENV] !== CODEX_TRANSPORT_EXEC && diff --git a/src/loop/task.ts b/src/loop/task.ts index 11ea3f3..7a335b3 100644 --- a/src/loop/task.ts +++ b/src/loop/task.ts @@ -1,6 +1,6 @@ -import { buildPlanPrompt } from "./prompts"; +import { buildPlanPrompt, buildPlanReviewPrompt } from "./prompts"; import { runAgent } from "./runner"; -import type { Options } from "./types"; +import type { Agent, Options, PlanReviewMode } from "./types"; import { isFile, readPrompt } from "./utils"; const PLAN_FILE = "PLAN.md"; @@ -11,6 +11,17 @@ const MARKDOWN_PATH_RE = /^[^\s]+\.md$/i; const isMarkdownInput = (input: string): boolean => MARKDOWN_PATH_RE.test(input.trim()); +const resolvePlanReviewer = ( + reviewPlan: PlanReviewMode | undefined, + agent: Agent +): Agent => { + const mode = reviewPlan ?? "other"; + if (mode === "other") { + return agent === "codex" ? "claude" : "codex"; + } + return mode; +}; + const runPlanMode = async (opts: Options, task: string): Promise => { console.log("\n[loop] prompt text detected. creating PLAN.md first."); const planPrompt = buildPlanPrompt(task); @@ -25,6 +36,16 @@ const runPlanMode = async (opts: Options, task: string): Promise => { if (!isFile(PLAN_FILE)) { throw new Error("[loop] planning step did not create PLAN.md"); } + + const reviewer = resolvePlanReviewer(opts.reviewPlan, opts.agent); + console.log(`\n[loop] reviewing PLAN.md with ${reviewer}.`); + const reviewPrompt = buildPlanReviewPrompt(task); + const review = await runAgent(reviewer, reviewPrompt, opts); + if (review.exitCode !== 0) { + throw new Error( + `[loop] plan review ${reviewer} exited with code ${review.exitCode}` + ); + } }; export const resolveTask = async (opts: Options): Promise => { diff --git a/src/loop/types.ts b/src/loop/types.ts index 138950f..dbd2c69 100644 --- a/src/loop/types.ts +++ b/src/loop/types.ts @@ -1,6 +1,7 @@ export type Agent = "claude" | "codex"; export type Format = "pretty" | "raw"; export type ReviewMode = Agent | "claudex"; +export type PlanReviewMode = Agent | "other"; export type ValueFlag = | "agent" | "prompt" @@ -19,6 +20,7 @@ export interface Options { promptInput?: string; proof: string; review?: ReviewMode; + reviewPlan?: PlanReviewMode; tmux?: boolean; worktree?: boolean; } diff --git a/tests/loop/args.test.ts b/tests/loop/args.test.ts index 1c924ba..17033cf 100644 --- a/tests/loop/args.test.ts +++ b/tests/loop/args.test.ts @@ -62,22 +62,19 @@ test("parseArgs prints version and exits when -v is passed", () => { expect(code).toBe(0); }); -test("parseArgs throws when required proof is missing", () => { - expect(() => parseArgs([])).toThrow("Missing required --proof value."); -}); - -test("parseArgs returns expected defaults when proof is provided", () => { +test("parseArgs returns expected defaults when proof is omitted", () => { clearModelEnv(); - const opts = parseArgs(["--proof", "verify with tests"]); + const opts = parseArgs([]); expect(opts.agent).toBe("codex"); expect(opts.doneSignal).toBe(DEFAULT_DONE_SIGNAL); - expect(opts.proof).toBe("verify with tests"); + expect(opts.proof).toBe(""); expect(opts.format).toBe("pretty"); expect(opts.maxIterations).toBe(Number.POSITIVE_INFINITY); expect(opts.model).toBe(DEFAULT_CODEX_MODEL); expect(opts.promptInput).toBeUndefined(); expect(opts.review).toBe("claudex"); + expect(opts.reviewPlan).toBeUndefined(); expect(opts.tmux).toBe(false); expect(opts.worktree).toBe(false); }); @@ -149,6 +146,31 @@ test("parseArgs uses reviewer after --review when valid", () => { expect(opts.review).toBe("claude"); }); +test("parseArgs treats bare --review-plan as other when no reviewer follows", () => { + const opts = parseArgs(["--review-plan", "ship it", "--proof", "verify"]); + + expect(opts.reviewPlan).toBe("other"); + expect(opts.promptInput).toBe("ship it"); +}); + +test("parseArgs uses reviewer after --review-plan when valid", () => { + const opts = parseArgs(["--review-plan", "claude", "--proof", "verify"]); + + expect(opts.reviewPlan).toBe("claude"); +}); + +test("parseArgs supports equals form for --review-plan", () => { + const opts = parseArgs(["--review-plan=codex", "--proof", "verify"]); + + expect(opts.reviewPlan).toBe("codex"); +}); + +test("parseArgs rejects invalid --review-plan value", () => { + expect(() => + parseArgs(["--review-plan=claudex", "--proof", "verify"]) + ).toThrow("Invalid --review-plan value: claudex"); +}); + test("parseArgs enables tmux mode with --tmux", () => { const opts = parseArgs(["--tmux", "--proof", "verify"]); diff --git a/tests/loop/prompts.test.ts b/tests/loop/prompts.test.ts index 38a2f89..3070555 100644 --- a/tests/loop/prompts.test.ts +++ b/tests/loop/prompts.test.ts @@ -2,6 +2,7 @@ import { expect, test } from "bun:test"; import { REVIEW_FAIL, REVIEW_PASS } from "../../src/loop/constants"; import { buildPlanPrompt, + buildPlanReviewPrompt, buildReviewPrompt, buildWorkPrompt, } from "../../src/loop/prompts"; @@ -14,6 +15,15 @@ test("buildPlanPrompt asks for PLAN.md", () => { expect(prompt).toContain("Do not implement code yet."); }); +test("buildPlanReviewPrompt asks to review PLAN.md only", () => { + const prompt = buildPlanReviewPrompt(" ship feature "); + + expect(prompt).toContain("Task:\nship feature"); + expect(prompt).toContain("Review PLAN.md"); + expect(prompt).toContain("Update PLAN.md directly if needed."); + expect(prompt).toContain("Only edit PLAN.md in this step."); +}); + test("buildWorkPrompt keeps task, optional sections, and done instruction", () => { const prompt = buildWorkPrompt( " ship feature ", @@ -70,3 +80,9 @@ test("buildReviewPrompt includes strict review signal instructions", () => { "The final line must be one of the two review signals" ); }); + +test("buildReviewPrompt omits proof requirements when proof is empty", () => { + const prompt = buildReviewPrompt("do task", "", ""); + + expect(prompt).not.toContain("Proof requirements:"); +}); diff --git a/tests/loop/runner.test.ts b/tests/loop/runner.test.ts index 97e599e..ed65708 100644 --- a/tests/loop/runner.test.ts +++ b/tests/loop/runner.test.ts @@ -44,7 +44,14 @@ const interruptAppServer: MockFn<(signal: "SIGINT" | "SIGTERM") => void> = mock( () => undefined ); const runCodexTurn: MockFn< - (_prompt: string, _opts: Options) => Promise + ( + _prompt: string, + _opts: Options, + _callbacks: { + onParsed: (text: string) => void; + onRaw: (text: string) => void; + } + ) => Promise > = mock(async () => makeResult()); const runLegacyAgent: MockFn< (agent: string, prompt: string, opts: Options) => Promise @@ -172,6 +179,99 @@ test("runAgent propagates turn/completed failure exit code", async () => { expect(result.parsed).toBe("failed"); }); +test("runAgent inserts pretty-mode blank line after message completion", async () => { + runCodexTurn.mockImplementation((_prompt, _opts, callbacks) => { + callbacks.onRaw( + JSON.stringify({ + method: "item/agentMessage/delta", + params: { itemId: "item-1", delta: "I checked the repo." }, + }) + ); + callbacks.onRaw( + JSON.stringify({ + method: "item/completed", + params: { + item: { + id: "item-1", + type: "agentMessage", + content: [{ text: "I checked the repo." }], + }, + }, + }) + ); + callbacks.onRaw( + JSON.stringify({ + method: "item/agentMessage/delta", + params: { itemId: "item-2", delta: "I am updating PLAN.md." }, + }) + ); + return Promise.resolve(makeResult()); + }); + + const originalWrite = process.stdout.write; + const writes: string[] = []; + const writeSpy = mock((chunk: string | Uint8Array): boolean => { + writes.push( + typeof chunk === "string" ? chunk : Buffer.from(chunk).toString("utf8") + ); + return true; + }); + (process.stdout as { write: typeof writeSpy }).write = writeSpy; + + try { + const result = await runAgent( + "codex", + "say hi", + makeOptions({ format: "pretty" }) + ); + expect(result.parsed).toBe("I checked the repo.\nI am updating PLAN.md."); + expect(writes.join("")).toContain( + "I checked the repo.\n\nI am updating PLAN.md.\n" + ); + } finally { + process.stdout.write = originalWrite; + } +}); + +test("runAgent preserves nested delta newline content in pretty mode", async () => { + runCodexTurn.mockImplementation((_prompt, _opts, callbacks) => { + callbacks.onRaw( + JSON.stringify({ + method: "item/agentMessage/delta", + params: { + delta: { + text: "Heading", + content: [{ text: "\n- one" }, { text: "\n- two" }], + }, + }, + }) + ); + return Promise.resolve(makeResult()); + }); + + const originalWrite = process.stdout.write; + const writes: string[] = []; + const writeSpy = mock((chunk: string | Uint8Array): boolean => { + writes.push( + typeof chunk === "string" ? chunk : Buffer.from(chunk).toString("utf8") + ); + return true; + }); + (process.stdout as { write: typeof writeSpy }).write = writeSpy; + + try { + const result = await runAgent( + "codex", + "say hi", + makeOptions({ format: "pretty" }) + ); + expect(result.parsed).toBe("Heading\n- one\n- two"); + expect(writes.join("")).toContain("Heading\n- one\n- two\n"); + } finally { + process.stdout.write = originalWrite; + } +}); + test("runAgent only falls back to legacy once per process for app-server compatibility errors", async () => { runCodexTurn.mockImplementation(() => { throw new appServerFallback("app-server unsupported"); diff --git a/tests/loop/task.test.ts b/tests/loop/task.test.ts index fa50074..6e4d314 100644 --- a/tests/loop/task.test.ts +++ b/tests/loop/task.test.ts @@ -1,7 +1,10 @@ import { afterEach, expect, mock, test } from "bun:test"; import type { Options, RunResult } from "../../src/loop/types"; -const makeOptions = (promptInput?: string): Options => ({ +const makeOptions = ( + promptInput?: string, + overrides: Partial = {} +): Options => ({ agent: "codex", doneSignal: "", proof: "verify with tests", @@ -9,6 +12,7 @@ const makeOptions = (promptInput?: string): Options => ({ maxIterations: 5, model: "test-model", ...(promptInput ? { promptInput } : {}), + ...overrides, }); afterEach(() => { @@ -97,12 +101,19 @@ test("resolveTask treats spaced text ending with .md as prompt text", async () = const task = await resolveTask(makeOptions("fix bug in README.md")); expect(task).toBe("generated plan task"); - expect(runAgentMock).toHaveBeenCalledTimes(1); - expect(runAgentMock).toHaveBeenCalledWith( + expect(runAgentMock).toHaveBeenCalledTimes(2); + expect(runAgentMock).toHaveBeenNthCalledWith( + 1, "codex", expect.stringContaining("Task:\nfix bug in README.md"), expect.any(Object) ); + expect(runAgentMock).toHaveBeenNthCalledWith( + 2, + "claude", + expect.stringContaining("Plan review mode:"), + expect.any(Object) + ); }); test("resolveTask creates PLAN.md first for plain-text prompt input", async () => { @@ -120,15 +131,100 @@ test("resolveTask creates PLAN.md first for plain-text prompt input", async () = const task = await resolveTask(makeOptions("ship feature")); expect(task).toBe("generated plan task"); - expect(runAgentMock).toHaveBeenCalledTimes(1); - expect(runAgentMock).toHaveBeenCalledWith( + expect(runAgentMock).toHaveBeenCalledTimes(2); + expect(runAgentMock).toHaveBeenNthCalledWith( + 1, "codex", expect.stringContaining("Task:\nship feature"), expect.any(Object) ); + expect(runAgentMock).toHaveBeenNthCalledWith( + 2, + "claude", + expect.stringContaining("Plan review mode:"), + expect.any(Object) + ); expect(readPromptMock).toHaveBeenCalledWith("PLAN.md"); }); +test("resolveTask reviews PLAN.md with other model by default in plain-text flow", async () => { + const { resolveTask, runAgentMock } = await loadResolveTask({ + isFile: (path) => path === "PLAN.md", + readPrompt: async (input) => + input === "PLAN.md" ? "generated plan task" : input, + runAgent: async () => ({ + combined: "", + exitCode: 0, + parsed: "", + }), + }); + + const task = await resolveTask(makeOptions("ship feature")); + + expect(task).toBe("generated plan task"); + expect(runAgentMock).toHaveBeenCalledTimes(2); + expect(runAgentMock).toHaveBeenNthCalledWith( + 1, + "codex", + expect.stringContaining("Plan mode:"), + expect.any(Object) + ); + expect(runAgentMock).toHaveBeenNthCalledWith( + 2, + "claude", + expect.stringContaining("Plan review mode:"), + expect.any(Object) + ); +}); + +test("resolveTask uses codex as review-plan=other reviewer when primary is claude", async () => { + const { runAgentMock, resolveTask } = await loadResolveTask({ + isFile: (path) => path === "PLAN.md", + readPrompt: async () => "generated plan task", + runAgent: async () => ({ + combined: "", + exitCode: 0, + parsed: "", + }), + }); + + await resolveTask( + makeOptions("ship feature", { agent: "claude", reviewPlan: "other" }) + ); + + expect(runAgentMock).toHaveBeenNthCalledWith( + 1, + "claude", + expect.any(String), + expect.any(Object) + ); + expect(runAgentMock).toHaveBeenNthCalledWith( + 2, + "codex", + expect.any(String), + expect.any(Object) + ); +}); + +test("resolveTask throws when plan review exits non-zero", async () => { + let calls = 0; + const { resolveTask } = await loadResolveTask({ + isFile: (path) => path === "PLAN.md", + runAgent: () => { + calls += 1; + return { + combined: "", + exitCode: calls === 1 ? 0 : 2, + parsed: "", + }; + }, + }); + + await expect(resolveTask(makeOptions("ship feature"))).rejects.toThrow( + "[loop] plan review claude exited with code 2" + ); +}); + test("resolveTask throws when planning exits non-zero", async () => { const { resolveTask } = await loadResolveTask({ runAgent: async () => ({