From f70f6ab305e678c6b54ca71686fba7c442fe51b3 Mon Sep 17 00:00:00 2001 From: NagyVikt Date: Sat, 11 Apr 2026 19:40:07 +0200 Subject: [PATCH] Auto-close Codex sandbox branches through the PR workflow Codex sandbox sessions now auto-run a finish pipeline after the agent exits: claim+commit changed files, push/create PR, attempt merge, prune worktree, and pull base updates when merge completes. The docs now codify this as the default completion protocol, and tests cover the new happy path while preserving dirty-worktree safety behavior. Constraint: Repos without an origin remote must not fail; auto-finish now skips merge/PR and keeps manual flow available Rejected: Always forcing merge/PR regardless of remote/gh availability | breaks local-only and offline workflows Confidence: high Scope-risk: moderate Reversibility: clean Directive: Keep agent-branch-finish as the single merge/cleanup authority; codex-agent should orchestrate, not duplicate finish logic Tested: npm test Tested: node --check bin/multiagent-safety.js Tested: npm pack --dry-run Not-tested: End-to-end merge conflict review path against live GitHub checks/permissions --- AGENTS.md | 4 +- README.md | 7 +- templates/AGENTS.multiagent-safety.md | 4 +- templates/scripts/codex-agent.sh | 207 +++++++++++++++++++++++++- test/install.test.js | 82 +++++++++- 5 files changed, 297 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index ffd728b..641def9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -92,7 +92,9 @@ OMX runtime state typically lives under `.omx/`: - For git isolation, each agent must start on a dedicated branch via `scripts/agent-branch-start.sh "" ""`. - Do not implement changes directly on `main` or other base branches; all edits must happen on dedicated agent branches/worktrees. - If the current local branch already contains accidental edits, move them to an agent branch/worktree first, then continue implementation. -- Agent completion must use `scripts/agent-branch-finish.sh` (merge into `dev`, push, delete agent branch). +- Agent completion defaults to `scripts/codex-agent.sh`, which auto-finishes the branch (auto-commit changed files, push/create PR, attempt merge, clean branch/worktree, and pull the local base branch after merge). +- If codex-agent auto-finish cannot complete, run `scripts/agent-branch-finish.sh --branch "" --via-pr` and keep the branch open until checks/review pass. +- If merge/rebase conflicts block auto-finish, run a conflict-resolution review pass in that sandbox branch, then rerun `agent-branch-finish.sh --via-pr` until merged. 1. Explicit ownership before edits diff --git a/README.md b/README.md index 7a1c7a2..42f2c6c 100644 --- a/README.md +++ b/README.md @@ -238,6 +238,9 @@ Use this exact checklist to setup multi-agent safety in this repository for Code - For every new user message/task, repeat the same cycle: start isolated agent branch/worktree -> claim file locks -> implement/verify -> finish via PR/merge cleanup with scripts/agent-branch-finish.sh. + - `scripts/codex-agent.sh` now auto-runs this finish flow after Codex exits: + auto-commit changed files -> push/create PR -> merge attempt -> branch/worktree cleanup -> + pull local base branch. 5) Optional: create OpenSpec planning workspace: bash scripts/openspec/init-plan-workspace.sh "" @@ -285,7 +288,9 @@ and asks `[y/N]` whether to update immediately (default is `N`). - Interactive prompt is strict (`[y/n]`) and waits for explicit answer. - Non-interactive setup: skips global installs by default; use `--yes-global-install` to force. - In already-initialized repos, `setup` / `install` / `fix` / `doctor` block writes on protected `main` by default; start an agent branch first. Use `--allow-protected-base-write` only for emergency in-place maintenance. -- `scripts/codex-agent.sh` now auto-runs worktree prune after a Codex session; clean sandbox branches are removed automatically, dirty ones are kept. +- `scripts/codex-agent.sh` now auto-runs finish automation after a Codex session when `origin` exists: + auto-commit changed files, run PR/merge cleanup, and prune merged worktrees. + If conflicts remain, it keeps the sandbox and prompts for a conflict-resolution review pass. ## Advanced commands diff --git a/templates/AGENTS.multiagent-safety.md b/templates/AGENTS.multiagent-safety.md index bbce4a3..a883a72 100644 --- a/templates/AGENTS.multiagent-safety.md +++ b/templates/AGENTS.multiagent-safety.md @@ -11,7 +11,9 @@ - If ownership is unclear or overlaps, stop that edit, post a blocker comment, and let the leader/integrator reassign scope. - For git isolation, each agent must start on a dedicated branch via `scripts/agent-branch-start.sh "" ""`. - Treat the base branch (`main` or the user's current local base branch) as read-only while the agent branch is active. -- Agent completion must use `scripts/agent-branch-finish.sh` (direct merge to base when allowed; auto PR fallback for protected bases, then cleanup after merge). +- Agent completion defaults to `scripts/codex-agent.sh`, which now auto-finishes the branch (auto-commit changed files, push/create PR, attempt merge, clean branch/worktree, and pull the local base branch after merge). +- If codex-agent auto-finish cannot complete, immediately run `scripts/agent-branch-finish.sh --branch "" --via-pr` and keep the branch open until checks/review pass. +- If merge/rebase conflicts block auto-finish, run a conflict-resolution review pass in that sandbox branch, then rerun `agent-branch-finish.sh --via-pr` until merged. - Per-message loop is mandatory: for every new user message/task, start a fresh agent branch/worktree, claim ownership locks, implement and verify, finish via PR/merge cleanup, then repeat for the next message/task. 1. Explicit ownership before edits diff --git a/templates/scripts/codex-agent.sh b/templates/scripts/codex-agent.sh index 370d2d5..09cacf5 100755 --- a/templates/scripts/codex-agent.sh +++ b/templates/scripts/codex-agent.sh @@ -6,6 +6,24 @@ AGENT_NAME="${MUSAFETY_AGENT_NAME:-agent}" BASE_BRANCH="${MUSAFETY_BASE_BRANCH:-}" BASE_BRANCH_EXPLICIT=0 CODEX_BIN="${MUSAFETY_CODEX_BIN:-codex}" +AUTO_FINISH_RAW="${MUSAFETY_CODEX_AUTO_FINISH:-true}" +AUTO_REVIEW_ON_CONFLICT_RAW="${MUSAFETY_CODEX_AUTO_REVIEW_ON_CONFLICT:-true}" + +normalize_bool() { + local raw="${1:-}" + local fallback="${2:-0}" + local lowered + lowered="$(printf '%s' "$raw" | tr '[:upper:]' '[:lower:]')" + case "$lowered" in + 1|true|yes|on) printf '1' ;; + 0|false|no|off) printf '0' ;; + '') printf '%s' "$fallback" ;; + *) printf '%s' "$fallback" ;; + esac +} + +AUTO_FINISH="$(normalize_bool "$AUTO_FINISH_RAW" "1")" +AUTO_REVIEW_ON_CONFLICT="$(normalize_bool "$AUTO_REVIEW_ON_CONFLICT_RAW" "1")" if [[ -n "$BASE_BRANCH" ]]; then BASE_BRANCH_EXPLICIT=1 @@ -30,6 +48,22 @@ while [[ $# -gt 0 ]]; do CODEX_BIN="${2:-$CODEX_BIN}" shift 2 ;; + --auto-finish) + AUTO_FINISH=1 + shift + ;; + --no-auto-finish) + AUTO_FINISH=0 + shift + ;; + --auto-review-on-conflict) + AUTO_REVIEW_ON_CONFLICT=1 + shift + ;; + --no-auto-review-on-conflict) + AUTO_REVIEW_ON_CONFLICT=0 + shift + ;; --) shift break @@ -95,6 +129,145 @@ if [[ ! -d "$worktree_path" ]]; then exit 1 fi +has_origin_remote() { + git -C "$repo_root" remote get-url origin >/dev/null 2>&1 +} + +worktree_has_changes() { + local wt="$1" + if ! git -C "$wt" diff --quiet -- . ":(exclude).omx/state/agent-file-locks.json"; then + return 0 + fi + if ! git -C "$wt" diff --cached --quiet -- . ":(exclude).omx/state/agent-file-locks.json"; then + return 0 + fi + if [[ -n "$(git -C "$wt" ls-files --others --exclude-standard)" ]]; then + return 0 + fi + return 1 +} + +claim_changed_files() { + local wt="$1" + local branch="$2" + local lock_script="${repo_root}/scripts/agent-file-locks.py" + + if [[ ! -x "$lock_script" ]]; then + return 0 + fi + + local changed_raw deleted_raw + changed_raw="$({ + git -C "$wt" diff --name-only -- . ":(exclude).omx/state/agent-file-locks.json"; + git -C "$wt" diff --cached --name-only -- . ":(exclude).omx/state/agent-file-locks.json"; + git -C "$wt" ls-files --others --exclude-standard; + } | sed '/^$/d' | sort -u)" + + if [[ -n "$changed_raw" ]]; then + mapfile -t changed_files < <(printf '%s\n' "$changed_raw") + python3 "$lock_script" claim --branch "$branch" "${changed_files[@]}" >/dev/null 2>&1 || true + fi + + deleted_raw="$({ + git -C "$wt" diff --name-only --diff-filter=D -- . ":(exclude).omx/state/agent-file-locks.json"; + git -C "$wt" diff --cached --name-only --diff-filter=D -- . ":(exclude).omx/state/agent-file-locks.json"; + } | sed '/^$/d' | sort -u)" + + if [[ -n "$deleted_raw" ]]; then + mapfile -t deleted_files < <(printf '%s\n' "$deleted_raw") + python3 "$lock_script" allow-delete --branch "$branch" "${deleted_files[@]}" >/dev/null 2>&1 || true + fi +} + +auto_commit_worktree_changes() { + local wt="$1" + local branch="$2" + + if ! worktree_has_changes "$wt"; then + return 0 + fi + + claim_changed_files "$wt" "$branch" + git -C "$wt" add -A + + if git -C "$wt" diff --cached --quiet -- . ":(exclude).omx/state/agent-file-locks.json"; then + return 0 + fi + + local default_message="Auto-finish: ${TASK_NAME}" + local commit_message="${MUSAFETY_CODEX_AUTO_COMMIT_MESSAGE:-$default_message}" + + if ! git -C "$wt" commit -m "$commit_message" >/dev/null 2>&1; then + echo "[codex-agent] Auto-commit failed in sandbox. Keeping branch for manual review: $branch" >&2 + return 1 + fi + + echo "[codex-agent] Auto-committed sandbox changes on '${branch}'." + return 0 +} + +looks_like_conflict_failure() { + local output="$1" + if grep -qiE 'preflight conflict detected|merge conflict detected|auto-sync failed while rebasing|rebase --continue|rebase --abort' <<< "$output"; then + return 0 + fi + return 1 +} + +run_finish_flow() { + local wt="$1" + local branch="$2" + local finish_output="" + local -a finish_args + + finish_args=(--branch "$branch") + if [[ "$BASE_BRANCH_EXPLICIT" -eq 1 ]]; then + finish_args+=(--base "$BASE_BRANCH") + fi + + if has_origin_remote; then + if command -v gh >/dev/null 2>&1 || command -v "${MUSAFETY_GH_BIN:-gh}" >/dev/null 2>&1; then + finish_args+=(--via-pr) + fi + else + echo "[codex-agent] No origin remote detected; skipping auto-finish merge/PR pipeline." >&2 + return 2 + fi + + if finish_output="$(bash "${repo_root}/scripts/agent-branch-finish.sh" "${finish_args[@]}" 2>&1)"; then + printf '%s\n' "$finish_output" + return 0 + fi + + printf '%s\n' "$finish_output" >&2 + + if [[ "$AUTO_REVIEW_ON_CONFLICT" -eq 1 ]] && looks_like_conflict_failure "$finish_output"; then + echo "[codex-agent] Auto-finish hit conflicts. Launching Codex conflict-review pass in sandbox..." >&2 + local review_prompt + review_prompt="Resolve git conflicts for branch ${branch} against ${BASE_BRANCH:-base branch}, then commit the resolution in this sandbox worktree and exit." + + ( + cd "$wt" + set +e + "$CODEX_BIN" "$review_prompt" + review_exit="$?" + set -e + if [[ "$review_exit" -ne 0 ]]; then + echo "[codex-agent] Conflict-review Codex pass exited with status ${review_exit}." >&2 + fi + ) + + if finish_output="$(bash "${repo_root}/scripts/agent-branch-finish.sh" "${finish_args[@]}" 2>&1)"; then + printf '%s\n' "$finish_output" + return 0 + fi + + printf '%s\n' "$finish_output" >&2 + fi + + return 1 +} + echo "[codex-agent] Launching ${CODEX_BIN} in sandbox: $worktree_path" cd "$worktree_path" set +e @@ -103,6 +276,34 @@ codex_exit="$?" set -e cd "$repo_root" +final_exit="$codex_exit" +auto_finish_completed=0 + +worktree_branch="$(git -C "$worktree_path" rev-parse --abbrev-ref HEAD 2>/dev/null || true)" + +if [[ "$AUTO_FINISH" -eq 1 && -n "$worktree_branch" && "$worktree_branch" != "HEAD" ]]; then + echo "[codex-agent] Auto-finish enabled: commit -> push/PR -> merge -> cleanup." + if auto_commit_worktree_changes "$worktree_path" "$worktree_branch"; then + if run_finish_flow "$worktree_path" "$worktree_branch"; then + auto_finish_completed=1 + echo "[codex-agent] Auto-finish completed for '${worktree_branch}'." + else + finish_status="$?" + if [[ "$finish_status" -eq 2 ]]; then + echo "[codex-agent] Auto-finish skipped for '${worktree_branch}' (no mergeable remote context)." >&2 + else + echo "[codex-agent] Auto-finish did not complete; keeping sandbox for manual review: $worktree_path" >&2 + if [[ "$final_exit" -eq 0 ]]; then + final_exit=1 + fi + fi + fi + else + if [[ "$final_exit" -eq 0 ]]; then + final_exit=1 + fi + fi +fi if [[ -x "${repo_root}/scripts/agent-worktree-prune.sh" ]]; then echo "[codex-agent] Session ended (exit=${codex_exit}). Running worktree cleanup..." @@ -120,9 +321,9 @@ if [[ ! -d "$worktree_path" ]]; then else worktree_branch="$(git -C "$worktree_path" rev-parse --abbrev-ref HEAD 2>/dev/null || true)" echo "[codex-agent] Sandbox worktree kept: $worktree_path" - if [[ -n "$worktree_branch" && "$worktree_branch" != "HEAD" ]]; then - echo "[codex-agent] If finished, merge + clean with: bash scripts/agent-branch-finish.sh --branch \"${worktree_branch}\"" + if [[ "$auto_finish_completed" -eq 0 && -n "$worktree_branch" && "$worktree_branch" != "HEAD" ]]; then + echo "[codex-agent] If finished, merge + clean with: bash scripts/agent-branch-finish.sh --branch \"${worktree_branch}\" --via-pr" fi fi -exit "$codex_exit" +exit "$final_exit" diff --git a/test/install.test.js b/test/install.test.js index 8197e39..40bddae 100644 --- a/test/install.test.js +++ b/test/install.test.js @@ -827,7 +827,10 @@ test('codex-agent keeps dirty sandbox worktrees after session exit', () => { }, ); assert.equal(launch.status, 0, launch.stderr || launch.stdout); - assert.match(launch.stdout, /\[agent-worktree-prune\] Skipping dirty worktree/); + assert.match( + launch.stdout, + /\[agent-worktree-prune\] Skipping dirty worktree|\[codex-agent\] Auto-committed sandbox changes on/, + ); assert.match(launch.stdout, /\[codex-agent\] Sandbox worktree kept:/); const launchedCwd = fs.readFileSync(cwdMarker, 'utf8').trim(); @@ -835,6 +838,83 @@ test('codex-agent keeps dirty sandbox worktrees after session exit', () => { assert.equal(fs.existsSync(path.join(launchedCwd, 'codex-dirty.txt')), true); }); +test('codex-agent auto-finishes dirty sandbox branches via PR flow when origin is configured', () => { + const repoDir = initRepo(); + seedCommit(repoDir); + attachOriginRemote(repoDir); + + let result = runNode(['setup', '--target', repoDir, '--no-global-install'], repoDir); + assert.equal(result.status, 0, result.stderr || result.stdout); + result = runCmd('git', ['add', '.'], repoDir); + assert.equal(result.status, 0, result.stderr); + result = runCmd('git', ['commit', '-m', 'apply gx setup'], repoDir, { + ALLOW_COMMIT_ON_PROTECTED_BRANCH: '1', + }); + assert.equal(result.status, 0, result.stderr || result.stdout); + result = runCmd('git', ['push', 'origin', 'dev'], repoDir); + assert.equal(result.status, 0, result.stderr || result.stdout); + + const fakeCodexBin = fs.mkdtempSync(path.join(os.tmpdir(), 'musafety-fake-codex-autofinish-')); + const fakeCodexPath = path.join(fakeCodexBin, 'codex'); + fs.writeFileSync( + fakeCodexPath, + `#!/usr/bin/env bash\n` + + `pwd > "${'${MUSAFETY_TEST_CODEX_CWD}'}"\n` + + `echo "$@" > "${'${MUSAFETY_TEST_CODEX_ARGS}'}"\n` + + `echo "auto-finish-change" > codex-autofinish.txt\n`, + 'utf8', + ); + fs.chmodSync(fakeCodexPath, 0o755); + + const { fakePath: fakeGhPath } = createFakeGhScript(` +if [[ "$1" == "pr" && "$2" == "create" ]]; then + exit 0 +fi +if [[ "$1" == "pr" && "$2" == "view" ]]; then + if [[ " $* " == *" --json url "* ]]; then + echo "https://example.test/pr/auto-finish" + exit 0 + fi + echo "unexpected gh pr view args: $*" >&2 + exit 1 +fi +if [[ "$1" == "pr" && "$2" == "merge" ]]; then + exit 0 +fi +echo "unexpected gh args: $*" >&2 +exit 1 +`); + + const cwdMarker = path.join(repoDir, '.codex-agent-cwd-autofinish'); + const argsMarker = path.join(repoDir, '.codex-agent-args-autofinish'); + const launch = runCmd( + 'bash', + ['scripts/codex-agent.sh', 'autofinish-task', 'planner', 'dev', '--model', 'gpt-5.4-mini'], + repoDir, + { + PATH: `${fakeCodexBin}:${process.env.PATH}`, + MUSAFETY_TEST_CODEX_CWD: cwdMarker, + MUSAFETY_TEST_CODEX_ARGS: argsMarker, + MUSAFETY_GH_BIN: fakeGhPath, + }, + ); + assert.equal(launch.status, 0, launch.stderr || launch.stdout); + assert.match(launch.stdout, /\[codex-agent\] Auto-finish enabled: commit -> push\/PR -> merge -> cleanup\./); + assert.match(launch.stdout, /\[codex-agent\] Auto-finish completed for/); + assert.match(launch.stdout, /\[codex-agent\] Auto-cleaned sandbox worktree:/); + + const launchedCwd = fs.readFileSync(cwdMarker, 'utf8').trim(); + assert.equal(fs.existsSync(launchedCwd), false, 'auto-finished sandbox should be removed'); + const launchedBranch = extractCreatedBranch(launch.stdout); + result = runCmd('git', ['show-ref', '--verify', '--quiet', `refs/heads/${launchedBranch}`], repoDir); + assert.notEqual(result.status, 0, 'auto-finished branch should be removed locally'); + result = runCmd('git', ['ls-remote', '--heads', 'origin', launchedBranch], repoDir); + assert.equal(result.stdout.trim(), '', 'auto-finished branch should be removed on origin'); + + const launchedArgs = fs.readFileSync(argsMarker, 'utf8').trim(); + assert.match(launchedArgs, /--model gpt-5\.4-mini/); +}); + test('sync command rebases current agent branch onto latest origin/dev', () => { const repoDir = initRepo(); seedCommit(repoDir);