Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ When running from source (`bun src/loop.ts`), auto-update is disabled — use `g
- `-a, --agent <claude|codex>`: agent to run (default: `codex`)
- `-p, --prompt <text|.md file>`: prompt text or a `.md` prompt file path. Plain text auto-creates `PLAN.md` first.
- `--proof <text>`: required proof criteria for task completion
- `--codex-model <model>`: set the model passed to codex (`LOOP_CODEX_MODEL` can also set this by default)
- `-m, --max-iterations <number>`: max loop count (default: infinite)
- `-d, --done <signal>`: done signal string (default: `<promise>DONE</promise>`)
- `--format <pretty|raw>`: output format (default: `pretty`)
Expand Down
13 changes: 13 additions & 0 deletions src/loop/args.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ const applyValueFlag = (
opts.proof = trimmed;
return;
}
if (flag === "codexModel") {
const trimmed = value.trim();
if (!trimmed) {
throw new Error("Invalid --codex-model value: cannot be empty");
}
opts.model = trimmed;
return;
}
opts.format = parseFormat(value);
};

Expand Down Expand Up @@ -116,6 +124,11 @@ const consumeArg = (
return { nextIndex: argv.length, stop: true };
}

if (arg.startsWith("--codex-model=")) {
applyValueFlag("codexModel", arg.slice("--codex-model=".length), opts);
return { nextIndex: index + 1, stop: false };
}

if (arg === "--review" || arg.startsWith("--review=")) {
return {
nextIndex: parseReviewArg(argv, index, opts, arg) + 1,
Expand Down
3 changes: 2 additions & 1 deletion src/loop/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ Options:
-p, --prompt <text|.md file> Prompt text or path to a .md prompt file
-m, --max-iterations <number>. Max loops (default: infinite)
-d, --done <signal> Done signal (default: <promise>DONE</promise>)
CODEX_TRANSPORT=app-server|exec Codex transport mode (default: app-server)
--proof <text> Proof requirements for task completion (required)
--codex-model <model> Override codex model (default: ${DEFAULT_CODEX_MODEL})
--format <pretty|raw> Log format (default: pretty)
--review [claude|codex|claudex] Review on done (default: claudex)
--tmux Run in a detached tmux session (name: repo-loop-X)
Expand All @@ -48,5 +48,6 @@ export const VALUE_FLAGS: Record<string, ValueFlag> = {
"-d": "done",
"--done": "done",
"--proof": "proof",
"--codex-model": "codexModel",
"--format": "format",
};
44 changes: 27 additions & 17 deletions src/loop/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,26 @@ import { runAgent } from "./runner";
import type { Options } from "./types";
import { hasSignal } from "./utils";

const doneSignalText = (doneSignal: string) => `done signal "${doneSignal}"`;
const doneSignalMissingText = (signal: string) =>
`\n[loop] ${doneSignalText(signal)} detected, stopping.`;
const doneSignalPassedText = (signal: string) =>
`\n[loop] ${doneSignalText(signal)} detected and review passed, stopping.`;
const doneSignalExitText = (doneSignal: string, exitCode: number) =>
`[loop] ${doneSignalText(doneSignal)} seen despite exit code ${exitCode}.`;
const bothReviewersNotes = (notes: string): string =>
"Both reviewers requested changes. Decide for each comment whether to address it now. " +
`If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${notes}`;

const runIterations = async (
task: string,
opts: Options,
reviewers: string[],
hasExistingPr = false
): Promise<boolean> => {
let reviewNotes = "";
const stopMessage = `\n[loop] done signal "${opts.doneSignal}" detected`;
const shouldReview = reviewers.length > 0;
const doneSignal = opts.doneSignal;
console.log(`\n[loop] PLAN.md:\n\n${task}`);
for (let i = 1; i <= opts.maxIterations; i++) {
const tag = Number.isFinite(opts.maxIterations)
Expand All @@ -28,28 +40,31 @@ const runIterations = async (
);
reviewNotes = "";
const result = await runAgent(opts.agent, prompt, opts);
if (result.exitCode !== 0) {
const output = `${result.parsed}\n${result.combined}`;
const done = hasSignal(output, doneSignal);
if (!done && result.exitCode !== 0) {
throw new Error(
`[loop] ${opts.agent} exited with code ${result.exitCode}`
);
}
if (!hasSignal(`${result.parsed}\n${result.combined}`, opts.doneSignal)) {
if (!done) {
continue;
}
if (reviewers.length === 0) {
console.log(`${stopMessage}, stopping.`);
if (result.exitCode !== 0) {
console.log(doneSignalExitText(doneSignal, result.exitCode));
}
if (!shouldReview) {
console.log(doneSignalMissingText(doneSignal));
return true;
}
const review = await runReview(reviewers, task, opts);
if (review.approved) {
await runDraftPrStep(task, opts, hasExistingPr);
console.log(`${stopMessage} and review passed, stopping.`);
console.log(doneSignalPassedText(opts.doneSignal));
return true;
}
if (review.consensusFail) {
reviewNotes =
"Both reviewers requested changes. Decide for each comment whether to address it now. " +
`If you skip one, explain why briefly. If both reviews found the same issue, it might be worth addressing.\n\n${review.notes}`;
reviewNotes = bothReviewersNotes(review.notes);
console.log(
"\n[loop] both reviews collected. original agent deciding what to address."
);
Expand All @@ -68,14 +83,9 @@ export const runLoop = async (task: string, opts: Options): Promise<void> => {
? createInterface({ input: process.stdin, output: process.stdout })
: undefined;
let hasExistingPr = false;
let currentTask = task;
let loopTask = task;
while (true) {
const done = await runIterations(
currentTask,
opts,
reviewers,
hasExistingPr
);
const done = await runIterations(loopTask, opts, reviewers, hasExistingPr);
if (reviewers.length > 0 && done) {
hasExistingPr = true;
}
Expand All @@ -98,6 +108,6 @@ export const runLoop = async (task: string, opts: Options): Promise<void> => {
rl.close();
return;
}
currentTask = `${currentTask}\n\nFollow-up:\n${followUp}`;
loopTask = `${loopTask}\n\nFollow-up:\n${followUp}`;
}
};
1 change: 1 addition & 0 deletions src/loop/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export type ValueFlag =
| "max"
| "done"
| "proof"
| "codexModel"
| "format";

export interface Options {
Expand Down
21 changes: 21 additions & 0 deletions tests/loop/args.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,24 @@ test("parseArgs uses LOOP_CODEX_MODEL when present", () => {
expect(opts.model).toBe("test-model");
});

test("parseArgs uses --codex-model when provided", () => {
const opts = parseArgs([
"--codex-model",
"custom-model",
"--proof",
"verify",
]);

expect(opts.model).toBe("custom-model");
});

test("parseArgs with --codex-model= overrides LOOP_CODEX_MODEL", () => {
process.env.LOOP_CODEX_MODEL = "env-model";
const opts = parseArgs(["--codex-model=flag-model", "--proof", "verify"]);

expect(opts.model).toBe("flag-model");
});

test("parseArgs handles all value flags and explicit reviewer", () => {
const opts = parseArgs([
"--agent",
Expand All @@ -104,6 +122,8 @@ test("parseArgs handles all value flags and explicit reviewer", () => {
"--format",
"pretty",
"--review=claudex",
"--codex-model",
"custom-model",
]);

expect(opts.agent).toBe("claude");
Expand All @@ -113,6 +133,7 @@ test("parseArgs handles all value flags and explicit reviewer", () => {
expect(opts.proof).toBe("verify this");
expect(opts.format).toBe("pretty");
expect(opts.review).toBe("claudex");
expect(opts.model).toBe("custom-model");
});

test("parseArgs treats bare --review as claudex when no reviewer follows", () => {
Expand Down
38 changes: 38 additions & 0 deletions tests/loop/main.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,19 @@ test("runLoop stops immediately on done signal when review is disabled", async (
expect(runDraftPrStep).not.toHaveBeenCalled();
});

test("runLoop stops on done signal even if agent exits non-zero when review is disabled", async () => {
const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({
resolveReviewers: () => [],
runAgent: async () => makeRunResult("<done/>", "", 1),
});

await runLoop("Ship feature", makeOptions({ review: undefined }));

expect(runAgent).toHaveBeenCalledTimes(1);
expect(runReview).not.toHaveBeenCalled();
expect(runDraftPrStep).not.toHaveBeenCalled();
});

test("runLoop creates draft PR when done signal is reviewed and approved", async () => {
const opts = makeOptions({ review: "claudex" });
const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({
Expand All @@ -116,6 +129,31 @@ test("runLoop creates draft PR when done signal is reviewed and approved", async
);
});

test("runLoop creates draft PR when done signal is reviewed and approved even if agent exits non-zero", async () => {
const opts = makeOptions({ review: "claudex" });
const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({
resolveReviewers: () => ["codex", "claude"],
runAgent: async () => makeRunResult("<done/>", "", 1),
runReview: async () => ({
approved: true,
consensusFail: false,
notes: "",
}),
});

await runLoop("Ship feature", opts);

expect(runAgent).toHaveBeenCalledTimes(1);
expect(runReview).toHaveBeenCalledTimes(1);
expect(runDraftPrStep).toHaveBeenCalledTimes(1);
expect(runDraftPrStep).toHaveBeenNthCalledWith(
1,
"Ship feature",
opts,
false
);
});

test("runLoop uses follow-up commit prompt after a PR is already created", async () => {
const answers = ["Update docs", ""];
const { runLoop, runAgent, runReview, runDraftPrStep } = await loadRunLoop({
Expand Down