From 23df42e879550ef28996e5c791d0b6814e848257 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Wed, 22 Apr 2026 14:58:16 +0200 Subject: [PATCH] Preserve full cleanup-evidence scaffolds for merged-proof takeovers Lightweight prefixes stay the default escape hatch for tiny work, but cleanup-evidence asks still need a real OpenSpec change workspace so takeover lanes can carry PR URL, MERGED state, and sandbox-prune proof honestly. This narrows the override to artifact-heavy cleanup wording, promotes those requests from T1 to T2, and adds a routing regression plus change docs for the exact failing shape. Constraint: Ordinary simple-prefixed tasks must remain caveman-first and notes-only by default Rejected: Promote all simple-prefixed workflow wording to T2 | would weaken the explicit lightweight escape hatch Confidence: high Scope-risk: narrow Reversibility: clean Directive: Keep lightweight-prefix overrides narrow; only artifact-heavy cleanup-proof requests should bypass T1 Tested: node --test test/sandbox.test.js test/metadata.test.js Tested: openspec validate agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32 --type change --strict Tested: openspec validate --specs Not-tested: End-to-end finish pipeline before this commit --- .../proposal.md | 15 ++++ .../spec.md | 15 ++++ .../tasks.md | 34 ++++++++ scripts/codex-agent.sh | 16 +++- templates/scripts/codex-agent.sh | 16 +++- test/sandbox.test.js | 78 +++++++++++++++++++ 6 files changed, 170 insertions(+), 4 deletions(-) create mode 100644 openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/proposal.md create mode 100644 openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/specs/simple-record-merged-cleanup-evidence-for-task-mode-decider/spec.md create mode 100644 openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/tasks.md diff --git a/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/proposal.md b/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/proposal.md new file mode 100644 index 0000000..1a10fc7 --- /dev/null +++ b/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/proposal.md @@ -0,0 +1,15 @@ +## Why + +- `codex-agent` currently treats any `simple:` task as T1 notes-only work, even when the task text explicitly asks for merged cleanup evidence and a real completion checklist. +- That routing loses the full OpenSpec change workspace (`proposal.md`, `tasks.md`, `spec.md`) the operator needs to carry finish-pipeline proof through PR merge and sandbox cleanup. + +## What Changes + +- Teach the task-mode decider to promote lightweight-prefixed tasks to T2 when the task wording explicitly asks for merged cleanup evidence or equivalent completion artifacts. +- Keep the normal `simple:` / `quick:` lightweight escape hatch unchanged for ordinary tiny tasks. +- Add a focused regression that proves cleanup-evidence tasks still get a full change workspace without escalating to a plan workspace. + +## Impact + +- Affects `codex-agent` task routing plus the generated `scripts/codex-agent.sh` template used by setup/install flows. +- Risk is narrow: only lightweight-prefixed tasks with cleanup-evidence wording change tier, and they move from T1 to T2 rather than to a plan-heavy T3 path. diff --git a/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/specs/simple-record-merged-cleanup-evidence-for-task-mode-decider/spec.md b/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/specs/simple-record-merged-cleanup-evidence-for-task-mode-decider/spec.md new file mode 100644 index 0000000..d98f4e2 --- /dev/null +++ b/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/specs/simple-record-merged-cleanup-evidence-for-task-mode-decider/spec.md @@ -0,0 +1,15 @@ +## ADDED Requirements + +### Requirement: cleanup-evidence tasks keep a full change workspace +The task-mode decider SHALL keep a full OpenSpec change workspace for lightweight-prefixed tasks that explicitly ask for merged cleanup evidence or equivalent completion artifacts. + +#### Scenario: lightweight prefix still needs completion artifacts +- **WHEN** `codex-agent` receives a task with a lightweight prefix such as `simple:` and the task wording asks to record merged cleanup evidence, PR URL / `MERGED` state, or equivalent cleanup-pipeline proof +- **THEN** the task is routed to T2 +- **AND** the sandbox includes `proposal.md`, `tasks.md`, and `spec.md` +- **AND** no plan workspace is created. + +#### Scenario: ordinary lightweight tasks stay notes-only +- **WHEN** `codex-agent` receives a lightweight-prefixed task without cleanup-evidence artifact wording +- **THEN** the task remains on T1 notes-only routing +- **AND** existing small-task caveman behavior is preserved. diff --git a/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/tasks.md b/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/tasks.md new file mode 100644 index 0000000..55c0407 --- /dev/null +++ b/openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/tasks.md @@ -0,0 +1,34 @@ +## Definition of Done + +This change is complete only when **all** of the following are true: + +- Every checkbox below is checked. +- The agent branch reaches `MERGED` state on `origin` and the PR URL + state are recorded in the completion handoff. +- If any step blocks (test failure, conflict, ambiguous result), append a `BLOCKED:` line under section 4 explaining the blocker and **STOP**. Do not tick remaining cleanup boxes; do not silently skip the cleanup pipeline. + +## Handoff + +- Handoff: change=`agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32`; branch=`agent/codex/simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32`; scope=`scripts/codex-agent.sh, templates/scripts/codex-agent.sh, test/sandbox.test.js, OpenSpec change docs`; action=`finish the routing fix, verify it, then run the guarded finish pipeline on base main`. +- Copy prompt: Continue `agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32` on branch `agent/codex/simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32`. Work inside the existing sandbox, review `openspec/changes/agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32/tasks.md`, continue from the current state instead of creating a new sandbox, and when the work is done run `gx branch finish --branch agent/codex/simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32 --base main --via-pr --wait-for-merge --cleanup`. + +## 1. Specification + +- [x] 1.1 Finalize proposal scope and acceptance criteria for `agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32`. +- [x] 1.2 Define normative requirements in `specs/simple-record-merged-cleanup-evidence-for-task-mode-decider/spec.md`. + +## 2. Implementation + +- [x] 2.1 Implement scoped behavior changes. +- [x] 2.2 Add/update focused regression coverage. + +## 3. Verification + +- [x] 3.1 Run targeted project verification commands. `node --test test/sandbox.test.js test/metadata.test.js` passed (`25/25`). +- [x] 3.2 Run `openspec validate agent-codex-simple-record-merged-cleanup-evidence-fo-2026-04-22-12-32 --type change --strict`. Passed. +- [x] 3.3 Run `openspec validate --specs`. Passed with `No items found to validate.` in the current repo state. + +## 4. Cleanup (mandatory; run before claiming completion) + +- [ ] 4.1 Run the cleanup pipeline: `gx branch finish --branch agent// --base dev --via-pr --wait-for-merge --cleanup`. This handles commit -> push -> PR create -> merge wait -> worktree prune in one invocation. +- [ ] 4.2 Record the PR URL and final merge state (`MERGED`) in the completion handoff. +- [ ] 4.3 Confirm the sandbox worktree is gone (`git worktree list` no longer shows the agent path; `git branch -a` shows no surviving local/remote refs for the branch). diff --git a/scripts/codex-agent.sh b/scripts/codex-agent.sh index e287707..3de9eca 100755 --- a/scripts/codex-agent.sh +++ b/scripts/codex-agent.sh @@ -90,6 +90,13 @@ string_has_lightweight_prefix() { return 1 } +task_requires_full_change_workspace() { + local text="$1" + string_contains_any "$text" \ + "cleanup evidence" "merged cleanup" "merged state" "pr url" \ + "cleanup pipeline" "finish pipeline" "sandbox cleanup" "tasks.md" +} + derive_task_mode_from_tier() { case "$1" in T0|T1) printf 'caveman' ;; @@ -128,8 +135,13 @@ decide_task_routing() { fi TASK_ROUTING_REASON="explicit tier override" elif string_has_lightweight_prefix "$task_lower"; then - OPENSPEC_TIER="T1" - TASK_ROUTING_REASON="explicit lightweight prefix" + if task_requires_full_change_workspace "$task_lower"; then + OPENSPEC_TIER="T2" + TASK_ROUTING_REASON="cleanup-evidence artifact wording overrides lightweight prefix" + else + OPENSPEC_TIER="T1" + TASK_ROUTING_REASON="explicit lightweight prefix" + fi elif string_contains_any "$task_lower" \ "ralph" "autopilot" "ultrawork" "ultraqa" "ralplan" "deep interview" "ouroboros" \ "migration" "refactor" "architecture" "re-architect" "cross-cutting" "multi-agent" \ diff --git a/templates/scripts/codex-agent.sh b/templates/scripts/codex-agent.sh index e287707..3de9eca 100755 --- a/templates/scripts/codex-agent.sh +++ b/templates/scripts/codex-agent.sh @@ -90,6 +90,13 @@ string_has_lightweight_prefix() { return 1 } +task_requires_full_change_workspace() { + local text="$1" + string_contains_any "$text" \ + "cleanup evidence" "merged cleanup" "merged state" "pr url" \ + "cleanup pipeline" "finish pipeline" "sandbox cleanup" "tasks.md" +} + derive_task_mode_from_tier() { case "$1" in T0|T1) printf 'caveman' ;; @@ -128,8 +135,13 @@ decide_task_routing() { fi TASK_ROUTING_REASON="explicit tier override" elif string_has_lightweight_prefix "$task_lower"; then - OPENSPEC_TIER="T1" - TASK_ROUTING_REASON="explicit lightweight prefix" + if task_requires_full_change_workspace "$task_lower"; then + OPENSPEC_TIER="T2" + TASK_ROUTING_REASON="cleanup-evidence artifact wording overrides lightweight prefix" + else + OPENSPEC_TIER="T1" + TASK_ROUTING_REASON="explicit lightweight prefix" + fi elif string_contains_any "$task_lower" \ "ralph" "autopilot" "ultrawork" "ultraqa" "ralplan" "deep interview" "ouroboros" \ "migration" "refactor" "architecture" "re-architect" "cross-cutting" "multi-agent" \ diff --git a/test/sandbox.test.js b/test/sandbox.test.js index b85bc13..fe61ceb 100644 --- a/test/sandbox.test.js +++ b/test/sandbox.test.js @@ -208,6 +208,84 @@ test('codex-agent routes lightweight tasks to caveman T1 with notes-only OpenSpe }); +test('codex-agent keeps cleanup-evidence tasks on T2 even with a lightweight prefix', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + + const setupResult = runNode(['setup', '--target', repoDir, '--no-global-install'], repoDir); + assert.equal(setupResult.status, 0, setupResult.stderr || setupResult.stdout); + let result = runCmd('git', ['add', '.'], repoDir); + assert.equal(result.status, 0, result.stderr); + result = runCmd('git', ['commit', '-m', 'apply gx setup'], repoDir, { + ALLOW_COMMIT_ON_PROTECTED_BRANCH: '1', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + + const fakeBin = fs.mkdtempSync(path.join(os.tmpdir(), 'guardex-fake-codex-cleanup-evidence-')); + const fakeCodexPath = path.join(fakeBin, 'codex'); + fs.writeFileSync( + fakeCodexPath, + `#!/usr/bin/env bash\n` + + `pwd > "${'${GUARDEX_TEST_CODEX_CWD}'}"\n` + + `echo "$@" > "${'${GUARDEX_TEST_CODEX_ARGS}'}"\n` + + `printf '%s' "${'${GUARDEX_TASK_MODE}'}" > "${'${GUARDEX_TEST_TASK_MODE}'}"\n` + + `printf '%s' "${'${GUARDEX_OPENSPEC_TIER}'}" > "${'${GUARDEX_TEST_TASK_TIER}'}"\n` + + `printf '%s' "${'${GUARDEX_TASK_ROUTING_REASON}'}" > "${'${GUARDEX_TEST_TASK_REASON}'}"\n`, + 'utf8', + ); + fs.chmodSync(fakeCodexPath, 0o755); + + const cwdMarker = path.join(repoDir, '.codex-agent-cwd-cleanup-evidence'); + const argsMarker = path.join(repoDir, '.codex-agent-args-cleanup-evidence'); + const modeMarker = path.join(repoDir, '.codex-agent-mode-cleanup-evidence'); + const tierMarker = path.join(repoDir, '.codex-agent-tier-cleanup-evidence'); + const reasonMarker = path.join(repoDir, '.codex-agent-reason-cleanup-evidence'); + const launch = runCodexAgent( + ['simple: record merged cleanup evidence for task mode decider', 'planner', 'dev', '--model', 'gpt-5.4-mini'], + repoDir, + { + PATH: `${fakeBin}:${process.env.PATH}`, + GUARDEX_TEST_CODEX_CWD: cwdMarker, + GUARDEX_TEST_CODEX_ARGS: argsMarker, + GUARDEX_TEST_TASK_MODE: modeMarker, + GUARDEX_TEST_TASK_TIER: tierMarker, + GUARDEX_TEST_TASK_REASON: reasonMarker, + }, + ); + assert.equal(launch.status, 0, launch.stderr || launch.stdout); + assert.match( + launch.stdout, + /\[codex-agent\] Task routing: omx \/ T2 \(change workspace only\) \(cleanup-evidence artifact wording overrides lightweight prefix\)/, + ); + assert.doesNotMatch(launch.stdout, /\[codex-agent\] OpenSpec plan workspace:/); + + const launchedCwd = fs.readFileSync(cwdMarker, 'utf8').trim(); + const launchedBranch = extractCreatedBranch(launch.stdout); + const changeSlug = sanitizeSlug(launchedBranch, 'simple-record-merged-cleanup-evidence-for-task-mode-decider'); + const changeDir = path.join(launchedCwd, 'openspec', 'changes', changeSlug); + const launchedArgs = fs.readFileSync(argsMarker, 'utf8').trim(); + + assert.doesNotMatch(launchedCwd, /masterplan/); + assert.match(launchedArgs, /--model gpt-5\.4-mini/); + assert.equal(fs.readFileSync(modeMarker, 'utf8'), 'omx'); + assert.equal(fs.readFileSync(tierMarker, 'utf8'), 'T2'); + assert.match(fs.readFileSync(reasonMarker, 'utf8'), /cleanup-evidence artifact wording overrides lightweight prefix/); + assert.equal(fs.existsSync(path.join(changeDir, '.openspec.yaml')), true, '.openspec.yaml missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'proposal.md')), true, 'proposal.md missing'); + assert.equal(fs.existsSync(path.join(changeDir, 'tasks.md')), true, 'tasks.md missing'); + assert.equal( + fs.existsSync(path.join(changeDir, 'specs', 'simple-record-merged-cleanup-evidence-for-task-mode-decider', 'spec.md')), + true, + 'spec.md missing', + ); + assert.equal( + fs.existsSync(path.join(launchedCwd, 'openspec', 'plan', changeSlug)), + false, + 'cleanup-evidence T2 routing should not create a plan workspace', + ); +}); + + test('codex-agent ignores stale repo-local starter shims and keeps the visible checkout stable', () => { const repoDir = initRepo(); seedCommit(repoDir);