From c50a0537a11d4f9fecdbbe69cff4864ca9113ea3 Mon Sep 17 00:00:00 2001 From: Axel Delafosse Date: Sun, 22 Feb 2026 00:30:34 -0800 Subject: [PATCH 1/2] feat: add --codex-model flag and allow done signal on non-zero exit --- README.md | 1 + src/loop/args.ts | 17 +++++++++++ src/loop/constants.ts | 2 ++ src/loop/main.ts | 64 +++++++++++++++++------------------------ src/loop/types.ts | 1 + tests/loop/args.test.ts | 16 +++++++++++ tests/loop/main.test.ts | 38 ++++++++++++++++++++++++ 7 files changed, 102 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 9051798..e023e54 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,7 @@ When running from source (`bun src/loop.ts`), auto-update is disabled — use `g - `-a, --agent `: agent to run (default: `codex`) - `-p, --prompt `: prompt text or a `.md` prompt file path. Plain text auto-creates `PLAN.md` first. - `--proof `: required proof criteria for task completion +- `--codex-model `: set the model passed to codex (`LOOP_CODEX_MODEL` can also set this by default) - `-m, --max-iterations `: max loop count (default: infinite) - `-d, --done `: done signal string (default: `DONE`) - `--format `: output format (default: `pretty`) diff --git a/src/loop/args.ts b/src/loop/args.ts index 88ca332..e636c3c 100644 --- a/src/loop/args.ts +++ b/src/loop/args.ts @@ -69,6 +69,14 @@ const applyValueFlag = ( opts.proof = trimmed; return; } + if (flag === "codexModel") { + const trimmed = value.trim(); + if (!trimmed) { + throw new Error("Invalid --codex-model value: cannot be empty"); + } + opts.model = trimmed; + return; + } opts.format = parseFormat(value); }; @@ -116,6 +124,15 @@ const consumeArg = ( return { nextIndex: argv.length, stop: true }; } + if (arg.startsWith("--codex-model=")) { + applyValueFlag( + "codexModel", + arg.slice("--codex-model=".length), + opts + ); + return { nextIndex: index + 1, stop: false }; + } + if (arg === "--review" || arg.startsWith("--review=")) { return { nextIndex: parseReviewArg(argv, index, opts, arg) + 1, diff --git a/src/loop/constants.ts b/src/loop/constants.ts index f64206e..6544df0 100644 --- a/src/loop/constants.ts +++ b/src/loop/constants.ts @@ -21,6 +21,7 @@ Options: -m, --max-iterations . Max loops (default: infinite) -d, --done Done signal (default: DONE) --proof Proof requirements for task completion (required) + --codex-model Override codex model (default: ${DEFAULT_CODEX_MODEL}) --format Log format (default: pretty) --review [claude|codex|claudex] Review on done (default: claudex) --tmux Run in a detached tmux session (name: repo-loop-X) @@ -47,5 +48,6 @@ export const VALUE_FLAGS: Record = { "-d": "done", "--done": "done", "--proof": "proof", + "--codex-model": "codexModel", "--format": "format", }; diff --git a/src/loop/main.ts b/src/loop/main.ts index e1d89b3..5a3a073 100644 --- a/src/loop/main.ts +++ b/src/loop/main.ts @@ -3,8 +3,15 @@ import { runDraftPrStep } from "./pr"; import { buildWorkPrompt } from "./prompts"; import { resolveReviewers, runReview } from "./review"; import { runAgent } from "./runner"; -import type { Options } from "./types"; import { hasSignal } from "./utils"; +import type { Options } from "./types"; + +const doneSignalText = (doneSignal: string): string => + `done signal "${doneSignal}"`; +const doneSignalMissingText = (signal: string): string => + `\n[loop] ${doneSignalText(signal)} detected, stopping.`; +const doneSignalPassedText = (signal: string): string => + `\n[loop] ${doneSignalText(signal)} detected and review passed, stopping.`; const runIterations = async ( task: string, @@ -13,50 +20,42 @@ const runIterations = async ( hasExistingPr = false ): Promise => { let reviewNotes = ""; + const shouldReview = reviewers.length > 0; console.log(`\n[loop] PLAN.md:\n\n${task}`); for (let i = 1; i <= opts.maxIterations; i++) { - const tag = Number.isFinite(opts.maxIterations) - ? `/${opts.maxIterations}` - : ""; + const tag = Number.isFinite(opts.maxIterations) ? `/${opts.maxIterations}` : ""; console.log(`\n[loop] iteration ${i}${tag}`); - const prompt = buildWorkPrompt( - task, - opts.doneSignal, - opts.proof, - reviewNotes - ); + const prompt = buildWorkPrompt(task, opts.doneSignal, opts.proof, reviewNotes); reviewNotes = ""; const result = await runAgent(opts.agent, prompt, opts); - if (result.exitCode !== 0) { - throw new Error( - `[loop] ${opts.agent} exited with code ${result.exitCode}` - ); - } const output = `${result.parsed}\n${result.combined}`; - if (!hasSignal(output, opts.doneSignal)) { + const done = hasSignal(output, opts.doneSignal); + if (!done && result.exitCode !== 0) { + throw new Error(`[loop] ${opts.agent} exited with code ${result.exitCode}`); + } + if (!done) { continue; } - if (reviewers.length === 0) { + if (result.exitCode !== 0) { console.log( - `\n[loop] done signal "${opts.doneSignal}" detected, stopping.` + `[loop] ${doneSignalText(opts.doneSignal)} seen despite exit code ${result.exitCode}.` ); + } + if (!shouldReview) { + console.log(doneSignalMissingText(opts.doneSignal)); return true; } const review = await runReview(reviewers, task, opts); if (review.approved) { await runDraftPrStep(task, opts, hasExistingPr); - console.log( - `\n[loop] done signal "${opts.doneSignal}" detected and review passed, stopping.` - ); + console.log(doneSignalPassedText(opts.doneSignal)); return true; } if (review.consensusFail) { reviewNotes = - "Both reviewers requested changes. Decide for each comment whether to address it now. " + + `Both reviewers requested changes. Decide for each comment whether to address it now. ` + `If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${review.notes}`; - console.log( - "\n[loop] both reviews collected. original agent deciding what to address." - ); + console.log("\n[loop] both reviews collected. original agent deciding what to address."); continue; } reviewNotes = review.notes || "Reviewer found more work to do."; @@ -74,29 +73,20 @@ export const runLoop = async (task: string, opts: Options): Promise => { let hasExistingPr = false; let currentTask = task; while (true) { - const done = await runIterations( - currentTask, - opts, - reviewers, - hasExistingPr - ); + const done = await runIterations(currentTask, opts, reviewers, hasExistingPr); if (reviewers.length > 0 && done) { hasExistingPr = true; } if (!rl) { if (!done) { - console.log( - `\n[loop] reached max iterations (${opts.maxIterations}), stopping.` - ); + console.log(`\n[loop] reached max iterations (${opts.maxIterations}), stopping.`); } return; } if (!done) { console.log(`\n[loop] reached max iterations (${opts.maxIterations}).`); } - const answer = await rl.question( - "\n[loop] follow-up prompt (blank to exit): " - ); + const answer = await rl.question("\n[loop] follow-up prompt (blank to exit): "); const followUp = answer.trim() || null; if (!followUp) { rl.close(); diff --git a/src/loop/types.ts b/src/loop/types.ts index 4752984..1a68694 100644 --- a/src/loop/types.ts +++ b/src/loop/types.ts @@ -7,6 +7,7 @@ export type ValueFlag = | "max" | "done" | "proof" + | "codexModel" | "format"; export interface Options { diff --git a/tests/loop/args.test.ts b/tests/loop/args.test.ts index 93feefa..bfa3577 100644 --- a/tests/loop/args.test.ts +++ b/tests/loop/args.test.ts @@ -89,6 +89,19 @@ test("parseArgs uses LOOP_CODEX_MODEL when present", () => { expect(opts.model).toBe("test-model"); }); +test("parseArgs uses --codex-model when provided", () => { + const opts = parseArgs(["--codex-model", "custom-model", "--proof", "verify"]); + + expect(opts.model).toBe("custom-model"); +}); + +test("parseArgs with --codex-model= overrides LOOP_CODEX_MODEL", () => { + process.env.LOOP_CODEX_MODEL = "env-model"; + const opts = parseArgs(["--codex-model=flag-model", "--proof", "verify"]); + + expect(opts.model).toBe("flag-model"); +}); + test("parseArgs handles all value flags and explicit reviewer", () => { const opts = parseArgs([ "--agent", @@ -104,6 +117,8 @@ test("parseArgs handles all value flags and explicit reviewer", () => { "--format", "pretty", "--review=claudex", + "--codex-model", + "custom-model", ]); expect(opts.agent).toBe("claude"); @@ -113,6 +128,7 @@ test("parseArgs handles all value flags and explicit reviewer", () => { expect(opts.proof).toBe("verify this"); expect(opts.format).toBe("pretty"); expect(opts.review).toBe("claudex"); + expect(opts.model).toBe("custom-model"); }); test("parseArgs treats bare --review as claudex when no reviewer follows", () => { diff --git a/tests/loop/main.test.ts b/tests/loop/main.test.ts index c9e4d7d..c81657b 100644 --- a/tests/loop/main.test.ts +++ b/tests/loop/main.test.ts @@ -89,6 +89,19 @@ test("runLoop stops immediately on done signal when review is disabled", async ( expect(runDraftPrStep).not.toHaveBeenCalled(); }); +test("runLoop stops on done signal even if agent exits non-zero when review is disabled", async () => { + const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ + resolveReviewers: () => [], + runAgent: async () => makeRunResult("", "", 1), + }); + + await runLoop("Ship feature", makeOptions({ review: undefined })); + + expect(runAgent).toHaveBeenCalledTimes(1); + expect(runReview).not.toHaveBeenCalled(); + expect(runDraftPrStep).not.toHaveBeenCalled(); +}); + test("runLoop creates draft PR when done signal is reviewed and approved", async () => { const opts = makeOptions({ review: "claudex" }); const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ @@ -113,6 +126,31 @@ test("runLoop creates draft PR when done signal is reviewed and approved", async ); }); +test("runLoop creates draft PR when done signal is reviewed and approved even if agent exits non-zero", async () => { + const opts = makeOptions({ review: "claudex" }); + const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ + resolveReviewers: () => ["codex", "claude"], + runAgent: async () => makeRunResult("", "", 1), + runReview: async () => ({ + approved: true, + consensusFail: false, + notes: "", + }), + }); + + await runLoop("Ship feature", opts); + + expect(runAgent).toHaveBeenCalledTimes(1); + expect(runReview).toHaveBeenCalledTimes(1); + expect(runDraftPrStep).toHaveBeenCalledTimes(1); + expect(runDraftPrStep).toHaveBeenNthCalledWith( + 1, + "Ship feature", + opts, + false + ); +}); + test("runLoop uses follow-up commit prompt after a PR is already created", async () => { const answers = ["Update docs", ""]; const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({ From 7db7cb5d2a82fe38aed6c9c87c833db06dd8e57d Mon Sep 17 00:00:00 2001 From: Axel Delafosse Date: Sun, 22 Feb 2026 00:36:31 -0800 Subject: [PATCH 2/2] chore: commit unstaged changes --- src/loop/args.ts | 6 +---- src/loop/constants.ts | 1 - src/loop/main.ts | 60 ++++++++++++++++++++++++++--------------- tests/loop/args.test.ts | 7 ++++- 4 files changed, 45 insertions(+), 29 deletions(-) diff --git a/src/loop/args.ts b/src/loop/args.ts index e636c3c..4363a6d 100644 --- a/src/loop/args.ts +++ b/src/loop/args.ts @@ -125,11 +125,7 @@ const consumeArg = ( } if (arg.startsWith("--codex-model=")) { - applyValueFlag( - "codexModel", - arg.slice("--codex-model=".length), - opts - ); + applyValueFlag("codexModel", arg.slice("--codex-model=".length), opts); return { nextIndex: index + 1, stop: false }; } diff --git a/src/loop/constants.ts b/src/loop/constants.ts index 933dc6d..6544df0 100644 --- a/src/loop/constants.ts +++ b/src/loop/constants.ts @@ -20,7 +20,6 @@ Options: -p, --prompt Prompt text or path to a .md prompt file -m, --max-iterations . Max loops (default: infinite) -d, --done Done signal (default: DONE) - CODEX_TRANSPORT=app-server|exec Codex transport mode (default: app-server) --proof Proof requirements for task completion (required) --codex-model Override codex model (default: ${DEFAULT_CODEX_MODEL}) --format Log format (default: pretty) diff --git a/src/loop/main.ts b/src/loop/main.ts index 5a3a073..8cc992f 100644 --- a/src/loop/main.ts +++ b/src/loop/main.ts @@ -3,15 +3,19 @@ import { runDraftPrStep } from "./pr"; import { buildWorkPrompt } from "./prompts"; import { resolveReviewers, runReview } from "./review"; import { runAgent } from "./runner"; -import { hasSignal } from "./utils"; import type { Options } from "./types"; +import { hasSignal } from "./utils"; -const doneSignalText = (doneSignal: string): string => - `done signal "${doneSignal}"`; -const doneSignalMissingText = (signal: string): string => +const doneSignalText = (doneSignal: string) => `done signal "${doneSignal}"`; +const doneSignalMissingText = (signal: string) => `\n[loop] ${doneSignalText(signal)} detected, stopping.`; -const doneSignalPassedText = (signal: string): string => +const doneSignalPassedText = (signal: string) => `\n[loop] ${doneSignalText(signal)} detected and review passed, stopping.`; +const doneSignalExitText = (doneSignal: string, exitCode: number) => + `[loop] ${doneSignalText(doneSignal)} seen despite exit code ${exitCode}.`; +const bothReviewersNotes = (notes: string): string => + "Both reviewers requested changes. Decide for each comment whether to address it now. " + + `If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${notes}`; const runIterations = async ( task: string, @@ -21,28 +25,36 @@ const runIterations = async ( ): Promise => { let reviewNotes = ""; const shouldReview = reviewers.length > 0; + const doneSignal = opts.doneSignal; console.log(`\n[loop] PLAN.md:\n\n${task}`); for (let i = 1; i <= opts.maxIterations; i++) { - const tag = Number.isFinite(opts.maxIterations) ? `/${opts.maxIterations}` : ""; + const tag = Number.isFinite(opts.maxIterations) + ? `/${opts.maxIterations}` + : ""; console.log(`\n[loop] iteration ${i}${tag}`); - const prompt = buildWorkPrompt(task, opts.doneSignal, opts.proof, reviewNotes); + const prompt = buildWorkPrompt( + task, + opts.doneSignal, + opts.proof, + reviewNotes + ); reviewNotes = ""; const result = await runAgent(opts.agent, prompt, opts); const output = `${result.parsed}\n${result.combined}`; - const done = hasSignal(output, opts.doneSignal); + const done = hasSignal(output, doneSignal); if (!done && result.exitCode !== 0) { - throw new Error(`[loop] ${opts.agent} exited with code ${result.exitCode}`); + throw new Error( + `[loop] ${opts.agent} exited with code ${result.exitCode}` + ); } if (!done) { continue; } if (result.exitCode !== 0) { - console.log( - `[loop] ${doneSignalText(opts.doneSignal)} seen despite exit code ${result.exitCode}.` - ); + console.log(doneSignalExitText(doneSignal, result.exitCode)); } if (!shouldReview) { - console.log(doneSignalMissingText(opts.doneSignal)); + console.log(doneSignalMissingText(doneSignal)); return true; } const review = await runReview(reviewers, task, opts); @@ -52,10 +64,10 @@ const runIterations = async ( return true; } if (review.consensusFail) { - reviewNotes = - `Both reviewers requested changes. Decide for each comment whether to address it now. ` + - `If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${review.notes}`; - console.log("\n[loop] both reviews collected. original agent deciding what to address."); + reviewNotes = bothReviewersNotes(review.notes); + console.log( + "\n[loop] both reviews collected. original agent deciding what to address." + ); continue; } reviewNotes = review.notes || "Reviewer found more work to do."; @@ -71,27 +83,31 @@ export const runLoop = async (task: string, opts: Options): Promise => { ? createInterface({ input: process.stdin, output: process.stdout }) : undefined; let hasExistingPr = false; - let currentTask = task; + let loopTask = task; while (true) { - const done = await runIterations(currentTask, opts, reviewers, hasExistingPr); + const done = await runIterations(loopTask, opts, reviewers, hasExistingPr); if (reviewers.length > 0 && done) { hasExistingPr = true; } if (!rl) { if (!done) { - console.log(`\n[loop] reached max iterations (${opts.maxIterations}), stopping.`); + console.log( + `\n[loop] reached max iterations (${opts.maxIterations}), stopping.` + ); } return; } if (!done) { console.log(`\n[loop] reached max iterations (${opts.maxIterations}).`); } - const answer = await rl.question("\n[loop] follow-up prompt (blank to exit): "); + const answer = await rl.question( + "\n[loop] follow-up prompt (blank to exit): " + ); const followUp = answer.trim() || null; if (!followUp) { rl.close(); return; } - currentTask = `${currentTask}\n\nFollow-up:\n${followUp}`; + loopTask = `${loopTask}\n\nFollow-up:\n${followUp}`; } }; diff --git a/tests/loop/args.test.ts b/tests/loop/args.test.ts index bfa3577..1c924ba 100644 --- a/tests/loop/args.test.ts +++ b/tests/loop/args.test.ts @@ -90,7 +90,12 @@ test("parseArgs uses LOOP_CODEX_MODEL when present", () => { }); test("parseArgs uses --codex-model when provided", () => { - const opts = parseArgs(["--codex-model", "custom-model", "--proof", "verify"]); + const opts = parseArgs([ + "--codex-model", + "custom-model", + "--proof", + "verify", + ]); expect(opts.model).toBe("custom-model"); });