From 0115b09eab57fc65c264d2b1a7a258d117f7b2f3 Mon Sep 17 00:00:00 2001 From: Bastian Zimmermann <10774221+BastianZim@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:17:44 -0400 Subject: [PATCH 1/2] fix: allow model invocation for codex commands except review (#211) Remove `disable-model-invocation: true` from adversarial-review, cancel, result, and status commands so Claude can route explicit user requests and other skills/workflows can invoke them. The flag is intentionally kept on `review` to prevent proactive Codex spend on the heavyweight native review. An explicit guardrail is added to adversarial-review telling Claude to only run it when explicitly asked. Also fixes a pre-existing test regression from #168 where the quoted `$ARGUMENTS` pattern was not reflected in the test regex. Closes #211 Supersedes #156 #157 Co-Authored-By: Claude Opus 4.6 (1M context) --- plugins/codex/commands/adversarial-review.md | 2 +- plugins/codex/commands/cancel.md | 1 - plugins/codex/commands/result.md | 1 - plugins/codex/commands/status.md | 1 - tests/commands.test.mjs | 26 +++++++++++++++++--- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/plugins/codex/commands/adversarial-review.md b/plugins/codex/commands/adversarial-review.md index da440ab4..503feec0 100644 --- a/plugins/codex/commands/adversarial-review.md +++ b/plugins/codex/commands/adversarial-review.md @@ -1,7 +1,6 @@ --- description: Run a Codex review that challenges the implementation approach and design choices argument-hint: '[--wait|--background] [--base ] [--scope auto|working-tree|branch] [focus ...]' -disable-model-invocation: true allowed-tools: Read, Glob, Grep, Bash(node:*), Bash(git:*), AskUserQuestion --- @@ -17,6 +16,7 @@ Core constraint: - Do not fix issues, apply patches, or suggest that you are about to make changes. - Your only job is to run the review and return Codex's output verbatim to the user. - Keep the framing focused on whether the current approach is the right one, what assumptions it depends on, and where the design could fail under real-world conditions. +- Only run this command when the user has explicitly asked for an adversarial Codex review — by slash command, by naming it in a message, or through another skill or workflow that clearly invokes it. Do not run it on your own initiative as a speculative quality check. Execution mode rules: - If the raw arguments include `--wait`, do not ask. Run in the foreground. diff --git a/plugins/codex/commands/cancel.md b/plugins/codex/commands/cancel.md index a1472b83..c898e833 100644 --- a/plugins/codex/commands/cancel.md +++ b/plugins/codex/commands/cancel.md @@ -1,7 +1,6 @@ --- description: Cancel an active background Codex job in this repository argument-hint: '[job-id]' -disable-model-invocation: true allowed-tools: Bash(node:*) --- diff --git a/plugins/codex/commands/result.md b/plugins/codex/commands/result.md index 3abc2d93..7fb065be 100644 --- a/plugins/codex/commands/result.md +++ b/plugins/codex/commands/result.md @@ -1,7 +1,6 @@ --- description: Show the stored final output for a finished Codex job in this repository argument-hint: '[job-id]' -disable-model-invocation: true allowed-tools: Bash(node:*) --- diff --git a/plugins/codex/commands/status.md b/plugins/codex/commands/status.md index 8f70663d..61e096c0 100644 --- a/plugins/codex/commands/status.md +++ b/plugins/codex/commands/status.md @@ -1,7 +1,6 @@ --- description: Show active and recent Codex jobs for this repository, including review-gate status argument-hint: '[job-id] [--wait] [--timeout-ms ] [--all]' -disable-model-invocation: true allowed-tools: Bash(node:*) --- diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index ef5adb09..5222095b 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -164,14 +164,32 @@ test("result and cancel commands are exposed as deterministic runtime entrypoint const cancel = read("commands/cancel.md"); const resultHandling = read("skills/codex-result-handling/SKILL.md"); - assert.match(result, /disable-model-invocation:\s*true/); - assert.match(result, /codex-companion\.mjs" result \$ARGUMENTS/); - assert.match(cancel, /disable-model-invocation:\s*true/); - assert.match(cancel, /codex-companion\.mjs" cancel \$ARGUMENTS/); + assert.match(result, /codex-companion\.mjs" result "\$ARGUMENTS"/); + assert.match(cancel, /codex-companion\.mjs" cancel "\$ARGUMENTS"/); assert.match(resultHandling, /do not turn a failed or incomplete Codex run into a Claude-side implementation attempt/i); assert.match(resultHandling, /if Codex was never successfully invoked, do not generate a substitute answer at all/i); }); +test("model invocation policy: review is user-only, other commands are model-invokable", () => { + const review = read("commands/review.md"); + const adversarialReview = read("commands/adversarial-review.md"); + const cancel = read("commands/cancel.md"); + const result = read("commands/result.md"); + const status = read("commands/status.md"); + + // review is intentionally kept user-only to avoid proactive Codex spend + assert.match(review, /disable-model-invocation:\s*true/); + + // other commands are model-invokable so Claude can route explicit user requests + assert.doesNotMatch(adversarialReview, /disable-model-invocation:\s*true/); + assert.doesNotMatch(cancel, /disable-model-invocation:\s*true/); + assert.doesNotMatch(result, /disable-model-invocation:\s*true/); + assert.doesNotMatch(status, /disable-model-invocation:\s*true/); + + // adversarial-review has explicit guardrail against proactive invocation + assert.match(adversarialReview, /Only run this command when the user has explicitly asked/i); +}); + test("internal docs use task terminology for rescue runs", () => { const runtimeSkill = read("skills/codex-cli-runtime/SKILL.md"); const promptingSkill = read("skills/gpt-5-4-prompting/SKILL.md"); From e75a22a0c0e44d4522db8e837b4e85776a7e5dad Mon Sep 17 00:00:00 2001 From: Bastian Zimmermann <10774221+BastianZim@users.noreply.github.com> Date: Tue, 14 Apr 2026 13:28:13 -0400 Subject: [PATCH 2/2] fix: add explicit-user guardrail to cancel command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cancel is destructive — it terminates a running Codex job and with no job-id it auto-resolves to the single active job. Add a prompt-level guardrail matching the one on adversarial-review to prevent proactive invocation. Co-Authored-By: Claude Opus 4.6 (1M context) --- plugins/codex/commands/cancel.md | 2 ++ tests/commands.test.mjs | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/codex/commands/cancel.md b/plugins/codex/commands/cancel.md index c898e833..03cbc054 100644 --- a/plugins/codex/commands/cancel.md +++ b/plugins/codex/commands/cancel.md @@ -4,4 +4,6 @@ argument-hint: '[job-id]' allowed-tools: Bash(node:*) --- +Only cancel a job when the user has explicitly asked to cancel or stop it. Do not cancel jobs on your own initiative. + !`node "${CLAUDE_PLUGIN_ROOT}/scripts/codex-companion.mjs" cancel "$ARGUMENTS"` diff --git a/tests/commands.test.mjs b/tests/commands.test.mjs index 5222095b..20514109 100644 --- a/tests/commands.test.mjs +++ b/tests/commands.test.mjs @@ -186,8 +186,9 @@ test("model invocation policy: review is user-only, other commands are model-inv assert.doesNotMatch(result, /disable-model-invocation:\s*true/); assert.doesNotMatch(status, /disable-model-invocation:\s*true/); - // adversarial-review has explicit guardrail against proactive invocation + // commands with side effects have explicit guardrails against proactive invocation assert.match(adversarialReview, /Only run this command when the user has explicitly asked/i); + assert.match(cancel, /Only cancel a job when the user has explicitly asked/i); }); test("internal docs use task terminology for rescue runs", () => {