From f89b868c3b657f313dd58070884dba45d1e2ab62 Mon Sep 17 00:00:00 2001 From: msl2246 Date: Sat, 18 Apr 2026 15:57:32 +0900 Subject: [PATCH] Respect Codex sandbox config in plugin threads --- README.md | 2 + plugins/codex/agents/codex-rescue.md | 2 + plugins/codex/scripts/codex-companion.mjs | 21 +++- plugins/codex/scripts/lib/codex.mjs | 18 ++- .../codex/skills/codex-cli-runtime/SKILL.md | 3 + tests/commands.test.mjs | 13 +- tests/fake-codex-fixture.mjs | 5 +- tests/runtime.test.mjs | 112 ++++++++++++++++++ 8 files changed, 165 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 458c39fb..d813e867 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,8 @@ Your configuration will be picked up based on: Check out the Codex docs for more [configuration options](https://developers.openai.com/codex/config-reference). +By default, the plugin pins review runs to Codex's read-only sandbox and maps write-capable rescue tasks to `workspace-write`. If your local environment cannot initialize the Codex sandbox, set `CODEX_COMPANION_SANDBOX_MODE=inherit` before starting Claude Code to let Codex apply your configured `sandbox_mode` directly. You can also set it to `read-only`, `workspace-write`, or `danger-full-access` to force a specific sandbox mode for plugin-launched Codex threads. + ### Moving The Work Over To Codex Delegated tasks and any [stop gate](#what-does-the-review-gate-do) run can also be directly resumed inside Codex by running `codex resume` either with the specific session ID you received from running `/codex:result` or `/codex:status` or by selecting it from the list. diff --git a/plugins/codex/agents/codex-rescue.md b/plugins/codex/agents/codex-rescue.md index 7009ec86..451bb479 100644 --- a/plugins/codex/agents/codex-rescue.md +++ b/plugins/codex/agents/codex-rescue.md @@ -32,6 +32,8 @@ Forwarding rules: - If the user asks for a concrete model name such as `gpt-5.4-mini`, pass it through with `--model`. - Treat `--effort ` and `--model ` as runtime controls and do not include them in the task text you pass through. - Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits. +- If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing, keep the single Bash call but prefix it with `CODEX_COMPANION_SANDBOX_MODE=inherit`. This lets Codex apply the user's configured sandbox mode instead of forcing the plugin's default task sandbox. +- If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it. Do not unset it or replace it unless the user explicitly asks for a different sandbox mode. - Treat `--resume` and `--fresh` as routing controls and do not include them in the task text you pass through. - `--resume` means add `--resume-last`. - `--fresh` means do not add `--resume-last`. diff --git a/plugins/codex/scripts/codex-companion.mjs b/plugins/codex/scripts/codex-companion.mjs index 35222fd5..ddc5b415 100644 --- a/plugins/codex/scripts/codex-companion.mjs +++ b/plugins/codex/scripts/codex-companion.mjs @@ -67,6 +67,8 @@ const REVIEW_SCHEMA = path.join(ROOT_DIR, "schemas", "review-output.schema.json" const DEFAULT_STATUS_WAIT_TIMEOUT_MS = 240000; const DEFAULT_STATUS_POLL_INTERVAL_MS = 2000; const VALID_REASONING_EFFORTS = new Set(["none", "minimal", "low", "medium", "high", "xhigh"]); +const VALID_SANDBOX_MODES = new Set(["read-only", "workspace-write", "danger-full-access"]); +const SANDBOX_MODE_ENV = "CODEX_COMPANION_SANDBOX_MODE"; const MODEL_ALIASES = new Map([["spark", "gpt-5.3-codex-spark"]]); const STOP_REVIEW_TASK_MARKER = "Run a stop-gate review of the previous Claude turn."; @@ -153,6 +155,20 @@ function resolveCommandWorkspace(options = {}) { return resolveWorkspaceRoot(resolveCommandCwd(options)); } +function resolveSandboxMode(defaultMode) { + const configured = process.env[SANDBOX_MODE_ENV]?.trim(); + if (!configured) { + return defaultMode; + } + if (configured === "inherit") { + return null; + } + if (VALID_SANDBOX_MODES.has(configured)) { + return configured; + } + throw new Error(`Invalid ${SANDBOX_MODE_ENV}: ${configured}`); +} + function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } @@ -367,6 +383,7 @@ async function executeReviewRun(request) { const result = await runAppServerReview(request.cwd, { target: reviewTarget, model: request.model, + sandbox: resolveSandboxMode("read-only"), onProgress: request.onProgress }); const payload = { @@ -408,7 +425,7 @@ async function executeReviewRun(request) { const result = await runAppServerTurn(context.repoRoot, { prompt, model: request.model, - sandbox: "read-only", + sandbox: resolveSandboxMode("read-only"), outputSchema: readOutputSchema(REVIEW_SCHEMA), onProgress: request.onProgress }); @@ -485,7 +502,7 @@ async function executeTaskRun(request) { defaultPrompt: resumeThreadId ? DEFAULT_CONTINUE_PROMPT : "", model: request.model, effort: request.effort, - sandbox: request.write ? "workspace-write" : "read-only", + sandbox: resolveSandboxMode(request.write ? "workspace-write" : "read-only"), onProgress: request.onProgress, persistThread: true, threadName: resumeThreadId ? null : buildPersistentTaskThreadName(request.prompt || DEFAULT_CONTINUE_PROMPT) diff --git a/plugins/codex/scripts/lib/codex.mjs b/plugins/codex/scripts/lib/codex.mjs index f2fe88bd..744d4b99 100644 --- a/plugins/codex/scripts/lib/codex.mjs +++ b/plugins/codex/scripts/lib/codex.mjs @@ -54,26 +54,32 @@ function cleanCodexStderr(stderr) { /** @returns {ThreadStartParams} */ function buildThreadParams(cwd, options = {}) { - return { + const params = { cwd, model: options.model ?? null, approvalPolicy: options.approvalPolicy ?? "never", - sandbox: options.sandbox ?? "read-only", serviceName: SERVICE_NAME, ephemeral: options.ephemeral ?? true, experimentalRawEvents: false }; + if (typeof options.sandbox === "string") { + params.sandbox = options.sandbox; + } + return params; } /** @returns {ThreadResumeParams} */ function buildResumeParams(threadId, cwd, options = {}) { - return { + const params = { threadId, cwd, model: options.model ?? null, - approvalPolicy: options.approvalPolicy ?? "never", - sandbox: options.sandbox ?? "read-only" + approvalPolicy: options.approvalPolicy ?? "never" }; + if (typeof options.sandbox === "string") { + params.sandbox = options.sandbox; + } + return params; } /** @returns {UserInput[]} */ @@ -915,7 +921,7 @@ export async function runAppServerReview(cwd, options = {}) { emitProgress(options.onProgress, "Starting Codex review thread.", "starting"); const thread = await startThread(client, cwd, { model: options.model, - sandbox: "read-only", + sandbox: options.sandbox === undefined ? "read-only" : options.sandbox, ephemeral: true, threadName: options.threadName }); diff --git a/plugins/codex/skills/codex-cli-runtime/SKILL.md b/plugins/codex/skills/codex-cli-runtime/SKILL.md index 0e91bfb5..06c6127e 100644 --- a/plugins/codex/skills/codex-cli-runtime/SKILL.md +++ b/plugins/codex/skills/codex-cli-runtime/SKILL.md @@ -22,6 +22,8 @@ Execution rules: - Leave model unset by default. Add `--model` only when the user explicitly asks for one. - Map `spark` to `--model gpt-5.3-codex-spark`. - Default to a write-capable Codex run by adding `--write` unless the user explicitly asks for read-only behavior or only wants review, diagnosis, or research without edits. +- If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures, prefix the single `task` command with `CODEX_COMPANION_SANDBOX_MODE=inherit`. Example: `CODEX_COMPANION_SANDBOX_MODE=inherit node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" task --write ""`. +- If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged unless the user explicitly requests another sandbox mode. Command selection: - Use exactly one `task` invocation per rescue handoff. @@ -34,6 +36,7 @@ Command selection: - `--fresh`: always use a fresh `task` run, even if the request sounds like a follow-up. - `--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`. - `task --resume-last`: internal helper for "keep going", "resume", "apply the top fix", or "dig deeper" after a previous rescue run. +- Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field so Codex uses its configured `sandbox_mode`. The variable also accepts `read-only`, `workspace-write`, and `danger-full-access`, but only set those explicit modes when the user asks for that exact sandbox behavior. Safety rules: - Default to write-capable Codex work in `codex:codex-rescue` unless the user explicitly asks for read-only behavior. diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index ef5adb09..a4215bf5 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -126,6 +126,10 @@ test("rescue command absorbs continue semantics", () => { assert.match(agent, /Leave model unset by default/i); assert.match(agent, /If the user asks for `spark`, map that to `--model gpt-5\.3-codex-spark`/i); assert.match(agent, /If the user asks for a concrete model name such as `gpt-5\.4-mini`, pass it through with `--model`/i); + assert.match(agent, /Default to a write-capable Codex run by adding `--write`/i); + assert.match(agent, /If the user says Codex sandboxing, bwrap, bubblewrap, or Linux sandbox setup is failing/i); + assert.match(agent, /CODEX_COMPANION_SANDBOX_MODE=inherit/i); + assert.match(agent, /If `CODEX_COMPANION_SANDBOX_MODE` is already present in the environment, preserve it/i); assert.match(agent, /Return the stdout of the `codex-companion` command exactly as-is/i); assert.match(agent, /If the Bash call fails or Codex cannot be invoked, return nothing/i); assert.match(agent, /gpt-5-4-prompting/); @@ -138,9 +142,14 @@ test("rescue command absorbs continue semantics", () => { assert.match(runtimeSkill, /Leave `--effort` unset unless the user explicitly requests a specific effort/i); assert.match(runtimeSkill, /Leave model unset by default/i); assert.match(runtimeSkill, /Map `spark` to `--model gpt-5\.3-codex-spark`/i); + assert.match(runtimeSkill, /Default to a write-capable Codex run by adding `--write`/i); + assert.match(runtimeSkill, /If the user reports `bwrap`, `bubblewrap`, Codex sandbox, or Linux sandbox setup failures/i); + assert.match(runtimeSkill, /CODEX_COMPANION_SANDBOX_MODE=inherit node "\$\{CLAUDE_PLUGIN_ROOT\}\/scripts\/codex-companion\.mjs" task --write ""/i); + assert.match(runtimeSkill, /If `CODEX_COMPANION_SANDBOX_MODE` is already set in the environment, let it pass through unchanged/i); assert.match(runtimeSkill, /If the forwarded request includes `--background` or `--wait`, treat that as Claude-side execution control only/i); assert.match(runtimeSkill, /Strip it before calling `task`/i); assert.match(runtimeSkill, /`--effort`: accepted values are `none`, `minimal`, `low`, `medium`, `high`, `xhigh`/i); + assert.match(runtimeSkill, /Sandbox override: `CODEX_COMPANION_SANDBOX_MODE=inherit` omits the app-server sandbox field/i); assert.match(runtimeSkill, /Do not inspect the repository, read files, grep, monitor progress, poll status, fetch results, cancel jobs, summarize output, or do any follow-up work of your own/i); assert.match(runtimeSkill, /If the Bash call fails or Codex cannot be invoked, return nothing/i); assert.match(readme, /`codex:codex-rescue` subagent/i); @@ -165,9 +174,9 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint const resultHandling = read("skills/codex-result-handling/SKILL.md"); assert.match(result, /disable-model-invocation:\s*true/); - assert.match(result, /codex-companion\.mjs" result \$ARGUMENTS/); + assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/); assert.match(cancel, /disable-model-invocation:\s*true/); - assert.match(cancel, /codex-companion\.mjs" cancel \$ARGUMENTS/); + assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/); assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i); assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i); }); diff --git a/tests/fake-codex-fixture.mjs b/tests/fake-codex-fixture.mjs index debcadce..209197bc 100644 --- a/tests/fake-codex-fixture.mjs +++ b/tests/fake-codex-fixture.mjs @@ -18,7 +18,7 @@ const readline = require("node:readline"); function loadState() { if (!fs.existsSync(STATE_PATH)) { - return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastInterrupt: null }; + return { nextThreadId: 1, nextTurnId: 1, appServerStarts: 0, threads: [], capabilities: null, lastThreadStart: null, lastThreadResume: null, lastInterrupt: null }; } return JSON.parse(fs.readFileSync(STATE_PATH, "utf8")); } @@ -297,6 +297,8 @@ rl.on("line", (line) => { throw new Error("thread/start.persistFullHistory requires experimentalApi capability"); } const thread = nextThread(state, message.params.cwd, message.params.ephemeral); + state.lastThreadStart = message.params; + saveState(state); send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } }); send({ method: "thread/started", params: { thread: { id: thread.id } } }); break; @@ -330,6 +332,7 @@ rl.on("line", (line) => { } const thread = ensureThread(state, message.params.threadId); thread.updatedAt = now(); + state.lastThreadResume = message.params; saveState(state); send({ id: message.id, result: { thread: buildThread(thread), model: message.params.model || "gpt-5.4", modelProvider: "openai", serviceTier: null, cwd: thread.cwd, approvalPolicy: "never", sandbox: { type: "readOnly", access: { type: "fullAccess" }, networkAccess: false }, reasoningEffort: null } }); break; diff --git a/tests/runtime.test.mjs b/tests/runtime.test.mjs index 90408372..78113e7e 100644 --- a/tests/runtime.test.mjs +++ b/tests/runtime.test.mjs @@ -157,6 +157,50 @@ test("review renders a no-findings result from app-server review/start", () => { assert.match(result.stdout, /No material issues found/); }); +test("review keeps the default read-only sandbox", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const result = run("node", [SCRIPT, "review"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.equal(state.lastThreadStart.approvalPolicy, "never"); + assert.equal(state.lastThreadStart.sandbox, "read-only"); +}); + +test("review can inherit the configured Codex sandbox when explicitly requested", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "README.md"), "hello again\n"); + + const result = run("node", [SCRIPT, "review"], { + cwd: repo, + env: { + ...buildEnv(binDir), + CODEX_COMPANION_SANDBOX_MODE: "inherit" + } + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.equal(state.lastThreadStart.sandbox, undefined); +}); + test("task runs when the active provider does not require OpenAI login", () => { const repo = makeTempDir(); const binDir = makeTempDir(); @@ -175,6 +219,48 @@ test("task runs when the active provider does not require OpenAI login", () => { assert.match(result.stdout, /Handled the requested task/); }); +test("task --write requests the default workspace-write sandbox", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], { + cwd: repo, + env: buildEnv(binDir) + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.equal(state.lastThreadStart.approvalPolicy, "never"); + assert.equal(state.lastThreadStart.sandbox, "workspace-write"); +}); + +test("task --write can inherit the configured Codex sandbox when explicitly requested", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.writeFileSync(path.join(repo, "README.md"), "hello\n"); + run("git", ["add", "README.md"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + + const result = run("node", [SCRIPT, "task", "--write", "fix the bug"], { + cwd: repo, + env: { + ...buildEnv(binDir), + CODEX_COMPANION_SANDBOX_MODE: "inherit" + } + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.equal(state.lastThreadStart.sandbox, undefined); +}); + test("task runs without auth preflight so Codex can refresh an expired session", () => { const repo = makeTempDir(); const binDir = makeTempDir(); @@ -250,6 +336,32 @@ test("adversarial review renders structured findings over app-server turn/start" assert.equal(result.status, 0); assert.match(result.stdout, /Missing empty-state guard/); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.equal(state.lastThreadStart.sandbox, "read-only"); +}); + +test("adversarial review can inherit the configured Codex sandbox when explicitly requested", () => { + const repo = makeTempDir(); + const binDir = makeTempDir(); + installFakeCodex(binDir); + initGitRepo(repo); + fs.mkdirSync(path.join(repo, "src")); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0];\n"); + run("git", ["add", "src/app.js"], { cwd: repo }); + run("git", ["commit", "-m", "init"], { cwd: repo }); + fs.writeFileSync(path.join(repo, "src", "app.js"), "export const value = items[0].id;\n"); + + const result = run("node", [SCRIPT, "adversarial-review"], { + cwd: repo, + env: { + ...buildEnv(binDir), + CODEX_COMPANION_SANDBOX_MODE: "inherit" + } + }); + + assert.equal(result.status, 0, result.stderr); + const state = JSON.parse(fs.readFileSync(path.join(binDir, "fake-codex-state.json"), "utf8")); + assert.equal(state.lastThreadStart.sandbox, undefined); }); test("adversarial review accepts the same base-branch targeting as review", () => {