Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ Your configuration will be picked up based on:

Check out the Codex docs for more [configuration options](https://developers.openai.com/codex/config-reference).

By default, the plugin pins review runs to Codex's read-only sandbox and maps write-capable rescue tasks to `workspace-write`. If your local environment cannot initialize the Codex sandbox, set `CODEX_COMPANION_SANDBOX_MODE=inherit` before starting Claude Code to let Codex apply your configured `sandbox_mode` directly. You can also set it to `read-only`, `workspace-write`, or `danger-full-access` to force a specific sandbox mode for plugin-launched Codex threads.

### Moving The Work Over To Codex

Delegated tasks and any [stop gate](#what-does-the-review-gate-do) run can also be directly resumed inside Codex by running `codex resume` either with the specific session ID you received from running `/codex:result` or `/codex:status` or by selecting it from the list.
Expand Down
2 changes: 2 additions & 0 deletions plugins/codex/agents/codex-rescue.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Forwarding rules:
- If the user asks for a concrete model name such as `gpt-5.4-mini`, pass it through with `--model`.
- Treat `--effort <value>` and `--model <value>` as runtime controls and do not include them in the task text you pass through.
- Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
- If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing, keep the single Bash call but prefix it with `CODEX_COMPANION_SANDBOX_MODE=inherit`. This lets Codex apply the user's configured sandbox mode instead of forcing the plugin's default task sandbox.
- If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it. Do not unset it or replace it unless the user explicitly asks for a different sandbox mode.
- Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through.
- `--resume` means add `--resume-last`.
- `--fresh` means do not add `--resume-last`.
Expand Down
21 changes: 19 additions & 2 deletions plugins/codex/scripts/codex-companion.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json"
const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000;
const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000;
const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]);
const VALID_SANDBOX_MODES = new Set(["read-only", "workspace-write", "danger-full-access"]);
const SANDBOX_MODE_ENV = "CODEX_COMPANION_SANDBOX_MODE";
const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]);
const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn.";

Expand Down Expand Up @@ -153,6 +155,20 @@ function resolveCommandWorkspace(options = {}) {
return resolveWorkspaceRoot(resolveCommandCwd(options));
}

function resolveSandboxMode(defaultMode) {
const configured = process.env[SANDBOX_MODE_ENV]?.trim();
if (!configured) {
return defaultMode;
}
if (configured === "inherit") {
return null;
}
if (VALID_SANDBOX_MODES.has(configured)) {
return configured;
}
throw new Error(`Invalid ${SANDBOX_MODE_ENV}: ${configured}`);
}

function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
Expand Down Expand Up @@ -367,6 +383,7 @@ async function executeReviewRun(request) {
const result = await runAppServerReview(request.cwd, {
target: reviewTarget,
model: request.model,
sandbox: resolveSandboxMode("read-only"),
onProgress: request.onProgress
});
const payload = {
Expand Down Expand Up @@ -408,7 +425,7 @@ async function executeReviewRun(request) {
const result = await runAppServerTurn(context.repoRoot, {
prompt,
model: request.model,
sandbox: "read-only",
sandbox: resolveSandboxMode("read-only"),
outputSchema: readOutputSchema(REVIEW_SCHEMA),
onProgress: request.onProgress
Comment on lines 425 to 430
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Keep adversarial review threads in read-only sandbox

This call no longer pins a sandbox mode for adversarial reviews, so /codex:adversarial-review now inherits the user's global Codex sandbox. In environments configured as workspace-write or danger-full-access, the review turn can run with write-capable tooling and mutate the repo, which breaks the command’s documented read-only contract (README.md says adversarial review "does not fix code"). Please keep review flows explicitly read-only (or otherwise enforce read-only behavior independent of global task defaults).

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 56cb002. Native and adversarial review flows now keep the default read-only sandbox, preserving the documented review-only contract. The Codex-config inheritance path is now explicit via CODEX_COMPANION_SANDBOX_MODE=inherit, with README and regression coverage for the opt-in behavior.

});
Expand Down Expand Up @@ -485,7 +502,7 @@ async function executeTaskRun(request) {
defaultPrompt: resumeThreadId ? DEFAULT_CONTINUE_PROMPT : "",
model: request.model,
effort: request.effort,
Comment on lines 502 to 504
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Use --write flag to request writable task sandbox

executeTaskRun no longer maps request.write into the thread sandbox request, so task --write and read-only task runs now send the same sandbox params unless some external caller injects one. For users whose Codex config is read-only, --write silently loses its functional effect and rescue workflows that depend on write-capable runs will be unable to apply edits.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 56cb002. task --write now defaults back to workspace-write, while read-only task runs default to read-only. The only way to omit/override that sandbox request is the new explicit CODEX_COMPANION_SANDBOX_MODE escape hatch, so --write keeps its functional effect for users with read-only Codex defaults.

sandbox: request.write ? "workspace-write" : "read-only",
sandbox: resolveSandboxMode(request.write ? "workspace-write" : "read-only"),
onProgress: request.onProgress,
persistThread: true,
threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT)
Expand Down
18 changes: 12 additions & 6 deletions plugins/codex/scripts/lib/codex.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -54,26 +54,32 @@ function cleanCodexStderr(stderr) {

/** @returns {ThreadStartParams} */
function buildThreadParams(cwd, options = {}) {
return {
const params = {
cwd,
model: options.model ?? null,
approvalPolicy: options.approvalPolicy ?? "never",
sandbox: options.sandbox ?? "read-only",
serviceName: SERVICE_NAME,
ephemeral: options.ephemeral ?? true,
experimentalRawEvents: false
};
if (typeof options.sandbox === "string") {
params.sandbox = options.sandbox;
}
return params;
}

/** @returns {ThreadResumeParams} */
function buildResumeParams(threadId, cwd, options = {}) {
return {
const params = {
threadId,
cwd,
model: options.model ?? null,
approvalPolicy: options.approvalPolicy ?? "never",
sandbox: options.sandbox ?? "read-only"
approvalPolicy: options.approvalPolicy ?? "never"
};
if (typeof options.sandbox === "string") {
params.sandbox = options.sandbox;
}
return params;
}

/** @returns {UserInput[]} */
Expand Down Expand Up @@ -915,7 +921,7 @@ export async function runAppServerReview(cwd, options = {}) {
emitProgress(options.onProgress, "Starting Codex review thread.", "starting");
const thread = await startThread(client, cwd, {
model: options.model,
sandbox: "read-only",
sandbox: options.sandbox === undefined ? "read-only" : options.sandbox,
ephemeral: true,
threadName: options.threadName
});
Expand Down
3 changes: 3 additions & 0 deletions plugins/codex/skills/codex-cli-runtime/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Execution rules:
- Leave model unset by default. Add `--model` only when the user explicitly asks for one.
- Map `spark` to `--model gpt-5.3-codex-spark`.
- Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits.
- If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures, prefix the single `task` command with `CODEX_COMPANION_SANDBOX_MODE=inherit`. Example: `CODEX_COMPANION_SANDBOX_MODE=inherit node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --write "<raw arguments>"`.
- If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged unless the user explicitly requests another sandbox mode.

Command selection:
- Use exactly one `task` invocation per rescue handoff.
Expand All @@ -34,6 +36,7 @@ Command selection:
- `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up.
- `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.
- `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run.
- Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field so Codex uses its configured `sandbox_mode`. The variable also accepts `read-only`, `workspace-write`, and `danger-full-access`, but only set those explicit modes when the user asks for that exact sandbox behavior.

Safety rules:
- Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior.
Expand Down
13 changes: 11 additions & 2 deletions tests/commands.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ test("rescue command absorbs continue semantics", () => {
assert.match(agent, /Leave model unset by default/i);
assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i);
assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i);
assert.match(agent, /Default to a write-capable Codex run by adding `--write`/i);
assert.match(agent, /If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing/i);
assert.match(agent, /CODEX_COMPANION_SANDBOX_MODE=inherit/i);
assert.match(agent, /If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it/i);
assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i);
assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
assert.match(agent, /gpt-5-4-prompting/);
Expand All @@ -138,9 +142,14 @@ test("rescue command absorbs continue semantics", () => {
assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i);
assert.match(runtimeSkill, /Leave model unset by default/i);
assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i);
assert.match(runtimeSkill, /Default to a write-capable Codex run by adding `--write`/i);
assert.match(runtimeSkill, /If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures/i);
assert.match(runtimeSkill, /CODEX_COMPANION_SANDBOX_MODE=inherit node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" task --write "<raw arguments>"/i);
assert.match(runtimeSkill, /If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged/i);
assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i);
assert.match(runtimeSkill, /Strip it before calling `task`/i);
assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i);
assert.match(runtimeSkill, /Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field/i);
assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i);
assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i);
assert.match(readme, /`codex:codex-rescue` subagent/i);
Expand All @@ -165,9 +174,9 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint
const resultHandling = read("skills/codex-result-handling/SKILL.md");

assert.match(result, /disable-model-invocation:\s*true/);
assert.match(result, /codex-companion\.mjs" result \$ARGUMENTS/);
assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/);
assert.match(cancel, /disable-model-invocation:\s*true/);
assert.match(cancel, /codex-companion\.mjs" cancel \$ARGUMENTS/);
assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/);
assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i);
assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i);
});
Expand Down
5 changes: 4 additions & 1 deletion tests/fake-codex-fixture.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const readline = require("node:readline");

function loadState() {
if (!fs.existsSync(STATE_PATH)) {
return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastInterrupt: null };
return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastThreadStart: null, lastThreadResume: null, lastInterrupt: null };
}
return JSON.parse(fs.readFileSync(STATE_PATH, "utf8"));
}
Expand Down Expand Up @@ -297,6 +297,8 @@ rl.on("line", (line) => {
throw new Error("thread/start.persistFullHistory requires experimentalApi capability");
}
const thread = nextThread(state, message.params.cwd, message.params.ephemeral);
state.lastThreadStart = message.params;
saveState(state);
send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });
send({ method: "thread/started", params: { thread: { id: thread.id } } });
break;
Expand Down Expand Up @@ -330,6 +332,7 @@ rl.on("line", (line) => {
}
const thread = ensureThread(state, message.params.threadId);
thread.updatedAt = now();
state.lastThreadResume = message.params;
saveState(state);
send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } });
break;
Expand Down
112 changes: 112 additions & 0 deletions tests/runtime.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,50 @@ test("review renders a no-findings result from app-server review/start", () => {
assert.match(result.stdout, /No material issues found/);
});

test("review keeps the default read-only sandbox", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });
fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");

const result = run("node", [SCRIPT, "review"], {
cwd: repo,
env: buildEnv(binDir)
});

assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.equal(state.lastThreadStart.approvalPolicy, "never");
assert.equal(state.lastThreadStart.sandbox, "read-only");
});

test("review can inherit the configured Codex sandbox when explicitly requested", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });
fs.writeFileSync(path.join(repo, "README.md"), "hello again\n");

const result = run("node", [SCRIPT, "review"], {
cwd: repo,
env: {
...buildEnv(binDir),
CODEX_COMPANION_SANDBOX_MODE: "inherit"
}
});

assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.equal(state.lastThreadStart.sandbox, undefined);
});

test("task runs when the active provider does not require OpenAI login", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
Expand All @@ -175,6 +219,48 @@ test("task runs when the active provider does not require OpenAI login", () => {
assert.match(result.stdout, /Handled the requested task/);
});

test("task --write requests the default workspace-write sandbox", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });

const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], {
cwd: repo,
env: buildEnv(binDir)
});

assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.equal(state.lastThreadStart.approvalPolicy, "never");
assert.equal(state.lastThreadStart.sandbox, "workspace-write");
});

test("task --write can inherit the configured Codex sandbox when explicitly requested", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.writeFileSync(path.join(repo, "README.md"), "hello\n");
run("git", ["add", "README.md"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });

const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], {
cwd: repo,
env: {
...buildEnv(binDir),
CODEX_COMPANION_SANDBOX_MODE: "inherit"
}
});

assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.equal(state.lastThreadStart.sandbox, undefined);
});

test("task runs without auth preflight so Codex can refresh an expired session", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
Expand Down Expand Up @@ -250,6 +336,32 @@ test("adversarial review renders structured findings over app-server turn/start"

assert.equal(result.status, 0);
assert.match(result.stdout, /Missing empty-state guard/);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.equal(state.lastThreadStart.sandbox, "read-only");
});

test("adversarial review can inherit the configured Codex sandbox when explicitly requested", () => {
const repo = makeTempDir();
const binDir = makeTempDir();
installFakeCodex(binDir);
initGitRepo(repo);
fs.mkdirSync(path.join(repo, "src"));
fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n");
run("git", ["add", "src/app.js"], { cwd: repo });
run("git", ["commit", "-m", "init"], { cwd: repo });
fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n");

const result = run("node", [SCRIPT, "adversarial-review"], {
cwd: repo,
env: {
...buildEnv(binDir),
CODEX_COMPANION_SANDBOX_MODE: "inherit"
}
});

assert.equal(result.status, 0, result.stderr);
const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8"));
assert.equal(state.lastThreadStart.sandbox, undefined);
});

test("adversarial review accepts the same base-branch targeting as review", () => {
Expand Down