diff --git a/README.md b/README.md index 95a5bc1..9874813 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,7 @@ When running from source (`bun src/loop.ts`), auto-update is disabled — use `g - `-a, --agent `: agent to run (default: `codex`) - `-p, --prompt `: prompt text or a `.md` prompt file path. Plain text auto-creates `PLAN.md` first. - `--proof `: required proof criteria for task completion +- `--codex-model `: set the model passed to codex (`LOOP_CODEX_MODEL` can also set this by default) - `-m, --max-iterations `: max loop count (default: infinite) - `-d, --done `: done signal string (default: `DONE`) - `--format `: output format (default: `pretty`) diff --git a/src/loop/args.ts b/src/loop/args.ts index 88ca332..4363a6d 100644 --- a/src/loop/args.ts +++ b/src/loop/args.ts @@ -69,6 +69,14 @@ const applyValueFlag = ( opts.proof = trimmed; return; } + if (flag === "codexModel") { + const trimmed = value.trim(); + if (!trimmed) { + throw new Error("Invalid --codex-model value: cannot be empty"); + } + opts.model = trimmed; + return; + } opts.format = parseFormat(value); }; @@ -116,6 +124,11 @@ const consumeArg = ( return { nextIndex: argv.length, stop: true }; } + if (arg.startsWith("--codex-model=")) { + applyValueFlag("codexModel", arg.slice("--codex-model=".length), opts); + return { nextIndex: index + 1, stop: false }; + } + if (arg === "--review" || arg.startsWith("--review=")) { return { nextIndex: parseReviewArg(argv, index, opts, arg) + 1, diff --git a/src/loop/constants.ts b/src/loop/constants.ts index 495d5eb..6544df0 100644 --- a/src/loop/constants.ts +++ b/src/loop/constants.ts @@ -20,8 +20,8 @@ Options: -p, --prompt Prompt text or path to a .md prompt file -m, --max-iterations . Max loops (default: infinite) -d, --done Done signal (default: DONE) - CODEX_TRANSPORT=app-server|exec Codex transport mode (default: app-server) --proof Proof requirements for task completion (required) + --codex-model Override codex model (default: ${DEFAULT_CODEX_MODEL}) --format Log format (default: pretty) --review [claude|codex|claudex] Review on done (default: claudex) --tmux Run in a detached tmux session (name: repo-loop-X) @@ -48,5 +48,6 @@ export const VALUE_FLAGS: Record = { "-d": "done", "--done": "done", "--proof": "proof", + "--codex-model": "codexModel", "--format": "format", }; diff --git a/src/loop/main.ts b/src/loop/main.ts index 2438ad3..8cc992f 100644 --- a/src/loop/main.ts +++ b/src/loop/main.ts @@ -6,6 +6,17 @@ import { runAgent } from "./runner"; import type { Options } from "./types"; import { hasSignal } from "./utils"; +const doneSignalText = (doneSignal: string) => `done signal "${doneSignal}"`; +const doneSignalMissingText = (signal: string) => + `\n[loop] ${doneSignalText(signal)} detected, stopping.`; +const doneSignalPassedText = (signal: string) => + `\n[loop] ${doneSignalText(signal)} detected and review passed, stopping.`; +const doneSignalExitText = (doneSignal: string, exitCode: number) => + `[loop] ${doneSignalText(doneSignal)} seen despite exit code ${exitCode}.`; +const bothReviewersNotes = (notes: string): string => + "Both reviewers requested changes. Decide for each comment whether to address it now. " + + `If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${notes}`; + const runIterations = async ( task: string, opts: Options, @@ -13,7 +24,8 @@ const runIterations = async ( hasExistingPr = false ): Promise => { let reviewNotes = ""; - const stopMessage = `\n[loop] done signal "${opts.doneSignal}" detected`; + const shouldReview = reviewers.length > 0; + const doneSignal = opts.doneSignal; console.log(`\n[loop] PLAN.md:\n\n${task}`); for (let i = 1; i <= opts.maxIterations; i++) { const tag = Number.isFinite(opts.maxIterations) @@ -28,28 +40,31 @@ const runIterations = async ( ); reviewNotes = ""; const result = await runAgent(opts.agent, prompt, opts); - if (result.exitCode !== 0) { + const output = `${result.parsed}\n${result.combined}`; + const done = hasSignal(output, doneSignal); + if (!done && result.exitCode !== 0) { throw new Error( `[loop] ${opts.agent} exited with code ${result.exitCode}` ); } - if (!hasSignal(`${result.parsed}\n${result.combined}`, opts.doneSignal)) { + if (!done) { continue; } - if (reviewers.length === 0) { - console.log(`${stopMessage}, stopping.`); + if (result.exitCode !== 0) { + console.log(doneSignalExitText(doneSignal, result.exitCode)); + } + if (!shouldReview) { + console.log(doneSignalMissingText(doneSignal)); return true; } const review = await runReview(reviewers, task, opts); if (review.approved) { await runDraftPrStep(task, opts, hasExistingPr); - console.log(`${stopMessage} and review passed, stopping.`); + console.log(doneSignalPassedText(opts.doneSignal)); return true; } if (review.consensusFail) { - reviewNotes = - "Both reviewers requested changes. Decide for each comment whether to address it now. " + - `If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${review.notes}`; + reviewNotes = bothReviewersNotes(review.notes); console.log( "\n[loop] both reviews collected. original agent deciding what to address." ); @@ -68,14 +83,9 @@ export const runLoop = async (task: string, opts: Options): Promise => { ? createInterface({ input: process.stdin, output: process.stdout }) : undefined; let hasExistingPr = false; - let currentTask = task; + let loopTask = task; while (true) { - const done = await runIterations( - currentTask, - opts, - reviewers, - hasExistingPr - ); + const done = await runIterations(loopTask, opts, reviewers, hasExistingPr); if (reviewers.length > 0 && done) { hasExistingPr = true; } @@ -98,6 +108,6 @@ export const runLoop = async (task: string, opts: Options): Promise => { rl.close(); return; } - currentTask = `${currentTask}\n\nFollow-up:\n${followUp}`; + loopTask = `${loopTask}\n\nFollow-up:\n${followUp}`; } }; diff --git a/src/loop/types.ts b/src/loop/types.ts index 4752984..1a68694 100644 --- a/src/loop/types.ts +++ b/src/loop/types.ts @@ -7,6 +7,7 @@ export type ValueFlag = | "max" | "done" | "proof" + | "codexModel" | "format"; export interface Options { diff --git a/tests/loop/args.test.ts b/tests/loop/args.test.ts index 93feefa..1c924ba 100644 --- a/tests/loop/args.test.ts +++ b/tests/loop/args.test.ts @@ -89,6 +89,24 @@ test("parseArgs uses LOOP_CODEX_MODEL when present", () => { expect(opts.model).toBe("test-model"); }); +test("parseArgs uses --codex-model when provided", () => { + const opts = parseArgs([ + "--codex-model", + "custom-model", + "--proof", + "verify", + ]); + + expect(opts.model).toBe("custom-model"); +}); + +test("parseArgs with --codex-model= overrides LOOP_CODEX_MODEL", () => { + process.env.LOOP_CODEX_MODEL = "env-model"; + const opts = parseArgs(["--codex-model=flag-model", "--proof", "verify"]); + + expect(opts.model).toBe("flag-model"); +}); + test("parseArgs handles all value flags and explicit reviewer", () => { const opts = parseArgs([ "--agent", @@ -104,6 +122,8 @@ test("parseArgs handles all value flags and explicit reviewer", () => { "--format", "pretty", "--review=claudex", + "--codex-model", + "custom-model", ]); expect(opts.agent).toBe("claude"); @@ -113,6 +133,7 @@ test("parseArgs handles all value flags and explicit reviewer", () => { expect(opts.proof).toBe("verify this"); expect(opts.format).toBe("pretty"); expect(opts.review).toBe("claudex"); + expect(opts.model).toBe("custom-model"); }); test("parseArgs treats bare --review as claudex when no reviewer follows", () => { diff --git a/tests/loop/main.test.ts b/tests/loop/main.test.ts index cc77d51..a1b4c26 100644 --- a/tests/loop/main.test.ts +++ b/tests/loop/main.test.ts @@ -92,6 +92,19 @@ test("runLoop stops immediately on done signal when review is disabled", async ( expect(runDraftPrStep).not.toHaveBeenCalled(); }); +test("runLoop stops on done signal even if agent exits non-zero when review is disabled", async () => { + const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ + resolveReviewers: () => [], + runAgent: async () => makeRunResult("", "", 1), + }); + + await runLoop("Ship feature", makeOptions({ review: undefined })); + + expect(runAgent).toHaveBeenCalledTimes(1); + expect(runReview).not.toHaveBeenCalled(); + expect(runDraftPrStep).not.toHaveBeenCalled(); +}); + test("runLoop creates draft PR when done signal is reviewed and approved", async () => { const opts = makeOptions({ review: "claudex" }); const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ @@ -116,6 +129,31 @@ test("runLoop creates draft PR when done signal is reviewed and approved", async ); }); +test("runLoop creates draft PR when done signal is reviewed and approved even if agent exits non-zero", async () => { + const opts = makeOptions({ review: "claudex" }); + const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ + resolveReviewers: () => ["codex", "claude"], + runAgent: async () => makeRunResult("", "", 1), + runReview: async () => ({ + approved: true, + consensusFail: false, + notes: "", + }), + }); + + await runLoop("Ship feature", opts); + + expect(runAgent).toHaveBeenCalledTimes(1); + expect(runReview).toHaveBeenCalledTimes(1); + expect(runDraftPrStep).toHaveBeenCalledTimes(1); + expect(runDraftPrStep).toHaveBeenNthCalledWith( + 1, + "Ship feature", + opts, + false + ); +}); + test("runLoop uses follow-up commit prompt after a PR is already created", async () => { const answers = ["Update docs", ""]; const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({