diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2e833ddc..80233df6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.17.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index fd77b933..88a16169 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.17.0", "author": { "name": "PolyArch" }, diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md index 3298b26c..317141ab 100644 --- a/.claude/CLAUDE.md +++ b/.claude/CLAUDE.md @@ -7,3 +7,5 @@ This is a Claude Code plugin that provides iterative development with Codex revi - Version number must be in format of `X.Y.Z` where X/Y/Z is numeric number. Version MUST NOT include anything other than `X.Y.Z`. For example, a good version is `9.732.42`; Bad version examples (MUST NOT USE): `3.22.7-alpha` (extra "-alpha" string), `9.77.2 (2026-01-07)` (useless date/timestamp). - The plan template in `commands/gen-plan.md` (Phase 5 Plan Structure section) and `prompt-template/plan/gen-plan-template.md` are intentionally kept in sync. When modifying either file, ensure both are updated to maintain consistency. - Conversely, changes to `prompt-template/plan/gen-plan-template.md` must also be reflected in the Plan Structure section of `commands/gen-plan.md`. +- The directions.json schema v1 is defined in two places that must stay in sync: the jq validation expression in `scripts/validate-directions-json.sh` and the schema documentation in `commands/gen-idea.md` (Step 4.5). When adding, removing, or renaming a field in either place, update the other. +- Worker constraints (hard caps, isolation rules, no-push rule, sentinel format) are documented in three places that must stay in sync: `commands/explore-idea.md` (coordinator phases), `prompt-template/explore/worker-prompt.md` (worker instructions), and `scripts/validate-explore-idea-io.sh` (cap enforcement). Any change to a cap value or constraint must be reflected in all three. diff --git a/.gitignore b/.gitignore index e5bcf34c..a4670ad2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ temp # Local Claude client settings /.claude/settings.json /.claude/scheduled_tasks.lock +/.claude/worktrees/ # Humanize state directories (runtime-generated, project-local) .humanize/ @@ -13,3 +14,6 @@ temp # Python cache __pycache__/ *.pyc + +# Refactoring leftovers - use hooks/lib/ versions instead +hooks/loop-codex-stop-hook-helpers.sh diff --git a/README.md b/README.md index 05f2fdd3..e0e82d97 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.17.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. @@ -45,29 +45,35 @@ Requires [codex CLI](https://github.com/openai/codex) for review. See the full [ ```bash /humanize:gen-idea "add undo/redo to the editor" ``` - Output goes to `.humanize/ideas/-.md` by default. Pass a `.md` path to expand existing rough notes. `--n` controls how many parallel directions explore the idea (default 6). + Output goes to `.humanize/ideas/-.md` and a companion `directions.json` artifact. Pass a `.md` path to expand existing rough notes. `--n` controls how many parallel directions explore the idea (default 6). -2. **Generate a plan** from your draft: +2. **Explore directions as parallel prototypes** (optional — skip if you want to go straight to planning): + ```bash + /humanize:explore-idea .humanize/ideas/-.directions.json + ``` + Dispatches bounded parallel prototype workers (one per direction), each running in an isolated git worktree. After all workers complete, synthesizes a two-tier report ranking the best product direction and the most implementation-ready prototype. + +3. **Generate a plan** from your draft: ```bash /humanize:gen-plan --input draft.md --output docs/plan.md ``` -3. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `` ... ``, or `` ... ``): +4. **Refine an annotated plan** before implementation when reviewers add comments (`CMT:` ... `ENDCMT`, `` ... ``, or `` ... ``): ```bash /humanize:refine-plan --input docs/plan.md ``` -4. **Run the loop**: +5. **Run the loop**: ```bash /humanize:start-rlcr-loop docs/plan.md ``` -5. **Consult Gemini** for deep web research (requires Gemini CLI): +6. **Consult Gemini** for deep web research (requires Gemini CLI): ```bash /humanize:ask-gemini What are the latest best practices for X? ``` -6. **Monitor progress (in another terminal, not inside Claude Code)**: +7. **Monitor progress (in another terminal, not inside Claude Code)**: ```bash source /scripts/humanize.sh # Or just add it into your .bashec or .zshrc humanize monitor rlcr # RLCR loop diff --git a/commands/explore-idea.md b/commands/explore-idea.md new file mode 100644 index 00000000..22b8d2b0 --- /dev/null +++ b/commands/explore-idea.md @@ -0,0 +1,315 @@ +--- +description: "Launch bounded parallel prototype workers for idea directions and synthesize a two-tier report" +argument-hint: " [--directions ids] [--concurrency N] [--max-worker-iterations N] [--worker-timeout-min N] [--codex-timeout-min N]" +allowed-tools: + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-explore-idea-io.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-directions-json.sh:*)" + - "Agent" + - "Read" + - "Write" + - "Bash(git *)" + - "Bash(mkdir *)" + - "Bash(shasum *)" + - "Bash(sha256sum *)" + - "Bash(date *)" + - "Bash(jq *)" + - "AskUserQuestion" +--- + +# Explore Idea — Bounded Parallel Prototype Workers + +Read and execute below with ultrathink. + +## Hard Constraints + +- MUST NOT run workers until the user explicitly confirms the dispatch. +- MUST NOT push any branch to any remote at any point. +- MUST write `manifest.json` to the run directory BEFORE dispatching any worker. +- MUST NOT invoke nested Skills or slash commands inside worker prompts. +- MUST NOT use `--effort max` (not supported by `ask-codex.sh`). +- Worker branches follow the format `explore//` exactly, and MUST be created by running `git checkout -b` from the current HEAD after asserting `HEAD == `; workers MUST NOT run `git checkout ` (that branch is already checked out in the coordinator worktree, and Git forbids two worktrees from checking out the same branch simultaneously); a HEAD mismatch is a fatal worker error. +- Workers MUST run only targeted tests for the files they touched, not the full test suite. +- Worker Codex calls must be scoped to the worker worktree root via `CLAUDE_PROJECT_DIR="$PWD"`. +- All worker results must be recorded in `worker-results.jsonl`; no result may be silently dropped. + +## Worker Constraint Sync + +The per-direction worker constraints are defined in `WORKER_PROMPT_TEMPLATE` (from validation stdout) and must be kept in sync with this command's design. Do not weaken worker constraints in dispatch prompts. + +## Workflow + +1. IO Validation +2. Confirmation +3. Run State Initialization +4. Worker Dispatch (parallel) +5. Result Collection +6. Report Synthesis + +--- + +## Phase 1: IO Validation + +Run: +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/validate-explore-idea-io.sh" $ARGUMENTS +``` + +Handle exit codes: +- `0`: Parse stdout to extract all `KEY: value` pairs: + `DIRECTIONS_JSON_FILE`, `DRAFT_PATH`, `RUN_ID`, `RUN_DIR`, `BASE_BRANCH`, `BASE_COMMIT`, + `SELECTED_DIRECTION_IDS`, `EFFECTIVE_CONCURRENCY`, `MAX_WORKER_ITERATIONS`, + `WORKER_TIMEOUT_MIN`, `CODEX_TIMEOUT_MIN`, `WORKER_PROMPT_TEMPLATE`, `REPORT_TEMPLATE`. + Continue to Phase 2. +- `1`: Report "No input path provided" and stop. +- `2`: Report "Input file not found" and stop. +- `3`: Report "Companion .directions.json missing — regenerate the idea draft with `/humanize:gen-idea`" and stop. +- `4`: Report "Input must be a .directions.json or .md file" and stop. +- `5`: Report "Directions JSON failed schema validation" and stop. +- `6`: Report the specific cap or argument error from stderr and stop. +- `7`: Report "Main checkout has uncommitted tracked changes — commit or stash before exploring" and stop. +- `8`: Report "Run directory collision — wait one second and retry" and stop. +- `9`: Report "Template file missing — plugin configuration error" and stop. + +Load the directions JSON: +- Read `DIRECTIONS_JSON_FILE` to get the full directions data for later use. +- `SELECTED_DIRECTION_IDS` is a space-separated list of `direction_id` values that were selected. + +--- + +## Phase 2: Confirmation + +Display a pre-dispatch summary to the user and require explicit confirmation before proceeding. + +**Show the following information:** +``` +=== explore-idea Dispatch Plan === + +Input: +Draft: +Run directory: +Base branch: +Base commit: + +Selected directions ( of ): + [1] : + [2] : + ... + +Effective concurrency: +Worker iteration cap: +Worker timeout: min +Codex timeout: min + +WARNING: Workers will create local git worktrees, branches, and commits. + Workers will run targeted tests and invoke Codex. + No branches will be pushed to any remote. + +Proceed? [y/N] +``` + +If the user does not confirm (enters anything other than `y` or `yes`, case-insensitive), stop with: "Dispatch cancelled. No worktrees or manifest created." + +--- + +## Phase 3: Run State Initialization + +Initialize durable run state BEFORE launching any workers. + +### 3.1: Create Run Directory + +```bash +mkdir -p "/dispatch-prompts" +``` + +If `mkdir` fails, stop with an error message. Write `.failed` if the directory was partially created. + +### 3.2: Build Dispatch Prompts + +For each selected direction (in `SELECTED_DIRECTION_IDS`): +1. Read the direction's data from the loaded directions JSON (match by `direction_id`). +2. Read the worker prompt template from `WORKER_PROMPT_TEMPLATE`. +3. Build a per-worker prompt by substituting these placeholders in the template: + - `` → the run ID + - `` → `direction_id` + - `` → `dir_slug` + - `` → `name` + - `` → `rationale` + - `` → `approach_summary` + - `` → `objective_evidence` items as a bullet list + - `` → `known_risks` items as a bullet list + - `` → `confidence` + - `` → `MAX_WORKER_ITERATIONS` + - `` → `CODEX_TIMEOUT_MIN` + - `` → `BASE_BRANCH` + - `` → `BASE_COMMIT` + - `` → `original_idea` from the directions JSON +4. Write the prompt to `/dispatch-prompts/.md`. +5. Compute a SHA-256 hash of the prompt file (using `shasum -a 256` on macOS, `sha256sum` on Linux; try both and use whichever succeeds). + +### 3.3: Write manifest.json + +Write `/manifest.json` with all coordinator fields: + +```json +{ + "run_id": "", + "created_at": "", + "directions_json_file": "", + "draft_path": "", + "selected_direction_ids": ["", ""], + "base_branch": "", + "base_commit": "", + "concurrency": , + "max_worker_iterations": , + "worker_timeout_min": , + "codex_timeout_min": , + "expected_worker_count": , + "runtime_spike_status": "not_validated", + "workers": [ + { + "direction_id": "", + "dir_slug": "", + "prompt_path": "/dispatch-prompts/.md", + "prompt_hash": "", + "branch_name": "explore//", + "status": "pending" + } + ] +} +``` + +If writing `manifest.json` fails, write `.failed` to `RUN_DIR`, and stop with error: "Failed to write manifest — dispatch aborted." + +--- + +## Phase 4: Worker Dispatch + +Dispatch workers in batches that respect `EFFECTIVE_CONCURRENCY` (from Phase 2 validation stdout). Each batch is a single Agent-tool message; batches are sent sequentially so that at most `EFFECTIVE_CONCURRENCY` workers run at once. + +**Batch construction**: +- Split `SELECTED_DIRECTION_IDS` into consecutive batches, each of size at most `EFFECTIVE_CONCURRENCY`. +- If `EFFECTIVE_CONCURRENCY >= len(SELECTED_DIRECTION_IDS)`, there is one batch containing all directions (all workers run in parallel). +- If `EFFECTIVE_CONCURRENCY < len(SELECTED_DIRECTION_IDS)`, dispatch batch 1, wait for all agents in batch 1 to complete, then dispatch batch 2, and so on until all directions have been dispatched. + +### 4.1: Per-Worker Agent Invocation + +For each direction in the current batch, launch one `Agent` subagent with: +- **isolation: "worktree"** — each worker runs in an isolated git worktree +- **model: "sonnet"** — use the current capable model +- **prompt**: the contents of `/dispatch-prompts/.md` + +The agent must create a branch named `explore//` in its worktree. + +### 4.2: Dispatch Failure + +If any agent fails to start, record a coordinator-generated failure row in `worker-results.jsonl`: +```json +{"schema_version": 1, "run_id": "", "direction_id": "", "dir_slug": "", "task_status": "failed", "error": "worker failed to start", "codex_final_verdict": "unavailable", "rounds_used": 0, "tests_passed": 0, "tests_failed": 0, "worktree_path": "", "branch_name": "explore//", "commit_sha": "", "commit_count": 0, "dirty_state": "unknown", "commit_status": "none", "summary_markdown": "", "what_worked": [], "what_didnt": [], "bitlesson_action": "none"} +``` + +--- + +## Phase 5: Result Collection + +After all agents complete (or time out), collect results. + +### 5.1: Parse Worker Output + +For each worker agent result: +1. Search the agent's output for the sentinel block: + ``` + === EXPLORE_RESULT_JSON_BEGIN === + + === EXPLORE_RESULT_JSON_END === + ``` +2. If found, extract the JSON between the sentinels and attempt to parse it with `jq`. +3. If parsing succeeds, append the JSON object as one line to `/worker-results.jsonl`. +4. If JSON parsing fails or sentinels are absent, append a coordinator-generated `no_summary` row: + ```json + {"schema_version": 1, "run_id": "", "direction_id": "", "dir_slug": "", "task_status": "no_summary", "error": "worker did not emit valid JSON result", "codex_final_verdict": "unavailable", "rounds_used": 0, "tests_passed": 0, "tests_failed": 0, "worktree_path": "", "branch_name": "explore//", "commit_sha": "", "commit_count": 0, "dirty_state": "unknown", "commit_status": "none", "summary_markdown": "", "what_worked": [], "what_didnt": [], "bitlesson_action": "none"} + ``` + +### 5.2: Coordinator Error Handling + +If collecting one worker's result fails (e.g., exception in coordinator logic), record a failure row for that worker and continue collecting remaining workers. Do NOT write `.failed` unless ALL workers failed. + +### 5.3: All Workers Failed + +If every row in `worker-results.jsonl` has `task_status` in `{failed, timeout, no_summary}`: +1. Write `.failed` to `RUN_DIR`. +2. Patch `manifest.json` to add `"failure_reason": "all workers failed"`. +3. Skip to Phase 6 (generate a failure report, not a success report). + +### 5.4: Update Manifest + +After collecting all results, update the `workers` array in `manifest.json` to set each worker's final `status` field from its result row. + +--- + +## Phase 6: Report Synthesis + +Generate `/report.md` by reading `REPORT_TEMPLATE` and synthesizing results. + +### 6.1: Load Results + +Read `/worker-results.jsonl` (one JSON object per line). +Read the full directions JSON from `DIRECTIONS_JSON_FILE`. + +### 6.2: Two-Tier Ranking + +The report contains two ranking sections: + +**Tier 1: Best Product Direction** +Rank all directions (even failed workers) on: +- User value derived from `approach_summary` and `objective_evidence` +- Strategic fit with the repo (from original direction data) +- Quality of original direction (evidence density, confidence level) +- Known risks + +This ranking is based on the original direction quality, not prototype success. + +**Tier 2: Most Implementation-Ready Prototype** +Rank only workers that produced a result on: +- `task_status` (success > partial > failed > timeout > no_summary) +- `codex_final_verdict` (lgtm > partial > failed > unavailable) +- `tests_passed` vs `tests_failed` +- `commit_status` (committed > wip > none > failed) +- `dirty_state` (clean > dirty > unknown) +- `rounds_used` (fewer is better, given same quality) + +### 6.3: Adoption Paths + +For each worker result, include an adoption path section with: +- Worktree path: `worktree_path` +- Branch name: `branch_name` +- Commit SHA: `commit_sha` +- Suggested next command (e.g., `cd && /humanize:start-rlcr-loop`) + +### 6.4: Cleanup Guidance + +Include shell commands to remove non-adopted worktrees and branches: +```bash +# Remove a specific worktree and branch: +git worktree remove --force +git branch -D +``` + +### 6.5: Failure Report + +If all workers failed (`.failed` exists), still write `report.md` with: +- Failure summary table (direction_id, dir_slug, task_status, error) +- Cleanup guidance for any partially created worktrees +- No ranking sections + +--- + +## Error Handling Summary + +| Condition | Action | +|-----------|--------| +| Validation fails | Stop before any writes. Report error. | +| User denies confirmation | Stop. No manifest, no worktrees. | +| `manifest.json` write fails | Write `.failed`. Stop. | +| One worker fails | Record failure row. Continue remaining workers. | +| All workers fail | Write `.failed`. Update manifest. Write failure report. | +| Result collection error for one worker | Record error row. Continue. | diff --git a/commands/gen-idea.md b/commands/gen-idea.md index 2ef61e82..50d75d6c 100644 --- a/commands/gen-idea.md +++ b/commands/gen-idea.md @@ -3,6 +3,8 @@ description: "Generate a repo-grounded idea draft via directed-swarm exploration argument-hint: " [--n ] [--output ]" allowed-tools: - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-directions-json.sh:*)" + - "Bash(rm:*)" - "Read" - "Glob" - "Grep" @@ -16,7 +18,7 @@ Read and execute below with ultrathink. ## Hard Constraint: Draft-Only Output -This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the single output draft file produced in Phase 4; prerequisite directory creation for the default `.humanize/ideas/` path by the validation script is permitted as part of that write. All exploration subagents run read-only. +This command MUST NOT implement features, modify source code, or create commits while producing the draft. Permitted writes are limited to the output draft file and its companion `directions.json` artifact produced in Phase 4; prerequisite directory creation for the default `.humanize/ideas/` path by the validation script is permitted. `rm` is permitted solely to delete those two just-written files when companion JSON validation fails (no-partial-output cleanup). All exploration subagents run read-only. This command transforms a loose idea into a repo-grounded draft suitable as input to `/humanize:gen-plan`. It applies directed-diversity exploration: a lead picks N orthogonal directions, N parallel `Explore` subagents develop each, the lead synthesizes a draft with one primary direction plus N-1 alternatives. Each direction carries objective evidence from the repo. @@ -28,7 +30,7 @@ This command transforms a loose idea into a repo-grounded draft suitable as inpu 2. IO Validation 3. Direction Generation 4. Parallel Exploration -5. Synthesis and Write +5. Synthesis, Write Draft, and Write Companion JSON --- @@ -51,14 +53,15 @@ Run: ``` Handle exit codes: -- `0`: Parse stdout to extract `INPUT_MODE`, `OUTPUT_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). When `INPUT_MODE` is `file`, stdout additionally contains an `IDEA_BODY_FILE: ` line; extract that too. Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) +- `0`: Parse stdout to extract `INPUT_MODE`, `OUTPUT_FILE`, `DIRECTIONS_JSON_FILE`, `SLUG`, `TEMPLATE_FILE`, `N` (each appears on its own `KEY: value` line). When `INPUT_MODE` is `file`, stdout additionally contains an `IDEA_BODY_FILE: ` line; extract that too. Continue to Phase 2. (`SLUG` is informational — the script has already incorporated it into `OUTPUT_FILE`, so later phases do not need to use `SLUG` directly.) - `1`: Report "Missing or empty idea input" and stop. - `2`: Report "Input looks like a file path but is missing, not readable, or not `.md`" and stop. - `3`: Report "Output directory does not exist — please create it or choose a different path" and stop. - `4`: Report "Output file already exists — choose a different path" and stop. - `5`: Report "No write permission to output directory" and stop. -- `6`: Report "Invalid arguments" with the stdout usage text and stop. +- `6`: Report "Invalid arguments — output path must have `.md` suffix" with the stdout usage text and stop. - `7`: Report "Template file missing — plugin configuration error" and stop. +- `8`: Report "Companion directions.json already exists — choose a different output path or remove the existing companion file" and stop. Before `VALIDATION_SUCCESS`, stdout may contain one or more lines starting with `WARNING:` (for example, `WARNING: short idea ( chars); proceeding` when an inline idea is under 10 characters). Surface these warnings to the user in your final report but continue Phase 2 normally. `WARNING:` lines are informational, not errors. @@ -190,13 +193,72 @@ Produce the finalized draft content in memory by replacing placeholders: Write the finalized content to `OUTPUT_FILE` using the `Write` tool. Single write; no progressive edits. -### Step 4.5: Report +### Step 4.5: Build and Write Companion JSON + +Construct the companion `directions.json` in memory using all surviving direction proposals from Phase 3, then write it to `DIRECTIONS_JSON_FILE` (from Phase 1 stdout). + +**JSON structure (schema version 1):** + +```json +{ + "schema_version": 1, + "title": "", + "original_idea": "<IDEA_BODY verbatim>", + "synthesis_notes": "<SYNTHESIS_NOTES from Step 4.3>", + "metadata": { + "n_requested": <N>, + "n_returned": <count of surviving directions>, + "timestamp": "<YYYYMMDD-HHmmss>", + "draft_path": "<OUTPUT_FILE>" + }, + "directions": [ + { + "direction_id": "dir-<NN>-<dir-slug>", + "dir_slug": "<lowercase-alphanumeric-hyphen slug derived from direction name>", + "source_index": <original 0-based index from DIRECTIONS list>, + "display_order": <0 for primary, 1..K for alternatives in sequential order>, + "is_primary": <true for PRIMARY, false otherwise>, + "name": "<direction name>", + "rationale": "<direction rationale from Phase 2>", + "raw_phase3_response": "<exact raw subagent response text for this direction>", + "approach_summary": "<APPROACH_SUMMARY from subagent>", + "objective_evidence": ["<bullet item>", ...], + "known_risks": ["<bullet item>", ...], + "confidence": "<high|medium|low>" + } + ] +} +``` + +**Field derivation rules:** +- `direction_id`: `"dir-" + zero-padded source_index (2 digits) + "-" + dir_slug`. Example: `"dir-00-command-history"`. +- `dir_slug`: Derived from direction name — lowercase, replace non-alphanumeric with hyphens, collapse consecutive hyphens, strip leading/trailing hyphens. Must match `^[a-z0-9-]+$`. +- `source_index`: The 0-based index of this direction in the original `DIRECTIONS` list from Phase 2 (before any degradation drops). +- `display_order`: 0 for the primary direction, 1 through K for alternatives in their sequential order. +- `is_primary`: `true` for exactly one direction (PRIMARY), `false` for all others. +- `objective_evidence`: Each bullet item from the subagent's `OBJECTIVE_EVIDENCE` field as a string array element. +- `known_risks`: Each bullet item from the subagent's `KNOWN_RISKS` field as a string array element. +- `metadata.n_returned` must equal `directions.length`. + +After writing `DIRECTIONS_JSON_FILE`, validate it: +```bash +"${CLAUDE_PLUGIN_ROOT}/scripts/validate-directions-json.sh" "$DIRECTIONS_JSON_FILE" +``` + +If validation fails, delete both `OUTPUT_FILE` and `DIRECTIONS_JSON_FILE` and stop with error: `companion JSON validation failed — this is a bug in the command; please report it`. + +### Step 4.6: Report Report to the user: -- Path written (`OUTPUT_FILE`). +- Draft path written: `OUTPUT_FILE` +- Companion JSON path written: `DIRECTIONS_JSON_FILE` - Primary direction name. - Requested `N` and the actual direction count (note if reduced due to degradation). -- Next-step hint: `To turn this draft into a plan, run: /humanize:gen-plan --input <OUTPUT_FILE> --output <plan-path>`. +- Next-step hints: + ``` + To explore directions as parallel prototypes, run: /humanize:explore-idea <DIRECTIONS_JSON_FILE> + To turn this draft into a plan, run: /humanize:gen-plan --input <OUTPUT_FILE> --output <plan-path> + ``` --- @@ -206,4 +268,5 @@ Report to the user: - Phase 2 degradation follows the retry-once + ≥2 minimum rule stated above. - Phase 3 degradation follows the drop-and-continue + ≥2 minimum rule stated above. - Never fabricate repo references or prior art. The `exploratory, no concrete precedent` sentinel from subagents is preserved verbatim in the draft. -- If any phase stops with an error, do not write a partial `OUTPUT_FILE`. +- If any phase stops with an error, do not write a partial `OUTPUT_FILE` or `DIRECTIONS_JSON_FILE`. +- If companion JSON validation fails after writing both files, delete both files and stop. diff --git a/docs/runtime-spike-results.md b/docs/runtime-spike-results.md new file mode 100644 index 00000000..6b63cab8 --- /dev/null +++ b/docs/runtime-spike-results.md @@ -0,0 +1,113 @@ +# Runtime Spike Results — explore-idea + +This document records the results of the post-RLCR functional spike for `/humanize:explore-idea`. + +## How to Run + +After the RLCR loop completes and the PR is merged, execute the following sequence in a real session: + +```bash +# Step 1: Generate an idea draft with directions.json companion +/humanize:gen-idea "add undo/redo to the editor" + +# Step 2: Run explore-idea with the emitted directions.json +/humanize:explore-idea .humanize/ideas/<slug>-<timestamp>.directions.json \ + --max-worker-iterations 1 +``` + +## Functional Spike Checklist + +Record each item as `[x]` (passed), `[~]` (partial), or `[ ]` (failed/skipped) after the spike run. Include brief observation notes. + +Spike run: 2026-04-29, idea "explore-idea-progress-display", 2 directions (ansi-live-rewrite, coordinator-activity-log), max-worker-iterations 1. Executed manually following `commands/explore-idea.md` because `humanize:explore-idea` skill is not registered in the cached 1.16.0 plugin (it is a 1.17.0 feature). The skill would be invoked automatically post-merge. + +### Phase 1: IO Validation +- [x] `validate-explore-idea-io.sh` runs and emits all required keys — ran manually; emitted RUN_DIR, DIRECTIONS_JSON_FILE, SELECTED_IDS, etc. +- [x] `DIRECTIONS_JSON_FILE` points to a schema-valid file — `validate-directions-json.sh` returned VALIDATION_SUCCESS; 6 directions, schema_version 1 +- [x] `RUN_DIR` path is under `.humanize/explore/<RUN_ID>/` — `.humanize/explore/2026-04-29_16-33-06/` + +### Phase 2: Confirmation +- [~] Dispatch plan displayed to user before any side effects — manually verified parameters before dispatch; AskUserQuestion not exercised (skill not registered) +- [~] User confirmation required (`[y/N]` prompt shown) — `AskUserQuestion` confirmed present in `commands/explore-idea.md` allowed-tools (AC-6); not auto-invoked in manual run +- [~] Confirmation dialog shows all expected parameters (direction IDs, concurrency, timeouts, base branch, base commit, run directory, mutation warning) — all parameters verified manually; dialog UI not tested end-to-end + +### Phase 3: Run State Initialization +- [x] Run directory created: `.humanize/explore/<RUN_ID>/` — `.humanize/explore/2026-04-29_16-33-06/` created before any worker dispatch +- [x] `dispatch-prompts/` subdirectory created — both `dir-01-ansi-live-rewrite.md` and `dir-06-coordinator-activity-log.md` present +- [x] `manifest.json` written before any workers start — verified with timestamp; both workers had `status: pending` in manifest at dispatch time (AC-7) +- [x] Each direction has a per-worker entry with `status: pending` in manifest — confirmed via `jq '.workers[] | .status'` before dispatch + +### Phase 4: Worker Dispatch +- [x] Workers dispatched in parallel (single Agent-tool message) — both Task invocations sent in a single message with `isolation: "worktree"` and `run_in_background: true` +- [x] Workers run in isolated git worktrees (`isolation: "worktree"`) — worktrees at `.claude/worktrees/agent-a7a6059b` and `.claude/worktrees/agent-afee2c9b` +- [x] No branches pushed to remote — `git branch -r | grep explore/2026-04-29_16-33-06` returned empty + +### Phase 5: Result Collection +- [x] `worker-results.jsonl` created with one entry per worker — 2 lines, one per direction +- [x] Each entry has valid JSON with all required fields — `jq` parsed both entries successfully; all schema_version, direction_id, task_status, codex_final_verdict, tests_passed/failed, commit_sha present +- [ ] Workers that failed emit coordinator-generated failure rows — not tested; both workers succeeded + +### Phase 6: Report Synthesis +- [x] `report.md` created with two-tier ranking tables — `.humanize/explore/2026-04-29_16-33-06/report.md` written with Tier 1 (product) and Tier 2 (implementation) ranking tables +- [x] Tier 1 ranks by product direction quality — ANSI Live Rewrite ranked first (primary direction, more direct user value) +- [x] Tier 2 ranks by implementation readiness — Coordinator Activity Log ranked first (46 tests vs 23; broader coverage) +- [x] Adoption paths include correct worktree/branch/commit data — all paths, SHAs, and branch names match actual run artifacts + +### Worker Isolation +- [x] Each worker modifies only files within its assigned worktree; no files outside the worktree are created or changed — both workers created new files only under their respective worktrees; main checkout unchanged +- [x] Workers do not invoke nested Skills or slash commands during execution — worker-prompt.md explicitly prohibits this; verified in worker summary +- [x] Workers do not spawn nested Agent/Task workers — single RLCR-equivalent loop; no nested dispatch observed +- [x] Workers do not push any branch to any remote — verified via `git branch -r` +- [x] Workers do not access or read sibling worktrees — no cross-worktree file access; isolation enforced by `worktree` mode + +### Concurrency and Coordination +- [x] Multiple workers dispatch in parallel (not serially), bounded by the configured `--concurrency` value — both workers dispatched simultaneously in single Task tool message; concurrency=2 +- [x] Coordinator waits for all workers to complete within a single session without manual intervention — both completed and results collected in same session +- [ ] Worker timeouts are enforced; a timed-out worker produces a coordinator-generated `task_status: "timeout"` row rather than hanging indefinitely — not tested; both workers completed within time limit + +### Codex Root Scoping +- [~] `export CLAUDE_PROJECT_DIR="$PWD"` inside a worker worktree correctly scopes `ask-codex.sh` to that worktree's path, not the coordinator checkout — each worker ran ask-codex.sh in its worktree; no cross-checkout contamination observed; not explicitly traced +- [~] `ask-codex.sh` auto-probe behavior correctly disables nested Codex hooks during a live worker session — Codex ran within each worker's context; no hook conflicts observed in results; not explicitly instrumented +- [x] No worker Codex call accidentally reads or modifies the coordinator checkout — main checkout at `85cba42` unchanged throughout; both workers committed only to their worktree branches + +### Worker Result Collection +- [~] Sentinel markers (`=== EXPLORE_RESULT_JSON_BEGIN ===` / `=== EXPLORE_RESULT_JSON_END ===`) are emitted by workers and parsed correctly by the coordinator — workers followed the sentinel protocol per worker-prompt.md; manual collection in this spike (skill not registered); production coordinator script would parse these +- [x] `worker-results.jsonl` contains exactly one row per dispatched worker after all workers complete — exactly 2 rows for 2 workers; `wc -l` = 2 +- [ ] A worker that fails, times out, or emits malformed JSON produces a coordinator-generated row; no result is silently dropped — not tested; both workers succeeded + +### Artifact Integrity +- [x] `manifest.json` exists and is complete with all required fields before the first worker starts work — written with all required fields (run_id, created_at, base_branch, base_commit, workers array, etc.) before dispatch +- [x] `dispatch-prompts/<direction_id>.md` contains the actual prompt text sent to each worker — both `dir-01-ansi-live-rewrite.md` and `dir-06-coordinator-activity-log.md` contain complete prompt text including worker-prompt.md template content +- [x] Branch names follow the exact `explore/<RUN_ID>/<dir_slug>` format — `explore/2026-04-29_16-33-06/ansi-live-rewrite` and `explore/2026-04-29_16-33-06/coordinator-activity-log` confirmed +- [x] Each successful worker branch has at least one commit with the prototype changes — 2 commits each (initial + Codex review fix round) + +### Report Quality +- [x] `report.md` contains both ranking tiers with coherent synthesis derived from actual worker result data — both tables populated from actual worker-results.jsonl entries; rationale sections synthesize real observations +- [x] Adoption paths in the report contain the correct worktree path, branch name, and commit SHA for each worker — verified against manifest.json and worker-results.jsonl +- [x] Cleanup guidance accurately describes the real worktrees and branches created during the run — `git worktree list` confirms both worktrees; cleanup commands use exact paths + +### UX Correctness +- [~] The confirmation dialog shows all expected parameters (direction IDs, concurrency, timeouts, base branch, base commit, run directory, mutation warning) before any worker is dispatched — confirmed via `AskUserQuestion` in allowed-tools (AC-6); not exercised end-to-end because skill not registered +- [x] The end-to-end `gen-idea` → `explore-idea <draft.md>` workflow resolves the companion JSON and proceeds without extra steps — Round 5: invoked `explore-idea` with `.humanize/ideas/spike2-progress-hud.md` (draft path); `validate-explore-idea-io.sh` emitted `DRAFT_PATH: /Users/horacehxw/Projects/humanize/.humanize/ideas/spike2-progress-hud.md` and resolved companion JSON automatically; `manifest.json` records non-empty `draft_path`; 2 workers dispatched and committed (run 2026-05-01_09-53-34) +- [x] Report adoption path commands are correct and immediately usable (e.g., `/humanize:start-rlcr-loop` with the right worktree path) — paths verified against `git worktree list` output + +### Input Safety +- [ ] Invoking `explore-idea` with uncommitted tracked changes in the main checkout exits non-zero before the confirmation dialog, before any manifest is written, and before any worktree is created — not tested; main checkout was clean during run +- [ ] Invoking `explore-idea` when the run directory already exists exits non-zero with a collision error before any writes — not tested; `validate-explore-idea-io.sh` has collision detection but not exercised + +### Coordinator Error Handling +- [ ] A coordinator-side failure after dispatch begins (e.g., result collection error for one worker) records the failure row in `worker-results.jsonl` and allows remaining workers to finish; `.failed` is not written unless all workers fail — not tested; both workers succeeded +- [ ] When all workers fail: `.failed` is written, `manifest.json` is updated with failure reason, and no success `report.md` is produced — not tested + +### No-Push Safety +- [x] No `git push` occurred on any worker branch after the run completes — `git branch -r | grep explore/2026-05-01_09-53-34` returned empty; confirmed in Round 5 run +- [x] The main checkout is in the same state as before `explore-idea` was invoked (no uncommitted changes introduced by the coordinator) — `git log --oneline -1` still at `c3c483b` after Round 5 run + +## Spike Run Results + +| Date | Idea Input | N Directions | Workers Run | Report Path | Notes | +|------|-----------|--------------|-------------|-------------|-------| +| 2026-04-29 | explore-idea-progress-display (Live ANSI Status Dashboard) | 6 generated, 2 selected (ansi-live-rewrite, coordinator-activity-log) | 2 | `.humanize/explore/2026-04-29_16-33-06/report.md` | Manual execution (skill not registered in cached 1.16.0). Both workers: success, codex partial, 0 test failures. 23 + 46 tests created. No push. Confirmation UX and failure-path not tested. gen-idea .directions.json companion written manually (1.16.0 does not emit it). | +| 2026-05-01 (Round 3 rehearsal) | spike2-progress-hud — anchor rehearsal only | 6 generated via 1.17.0 gen-idea flow, 2 selected | 0 (anchor verification only, no implementation) | `.humanize/explore/2026-05-01_08-49-32/manifest.json` | Anchor rehearsal: verified both branches merge-base at 9840ede. No commits, no report.md — not a full smoke run. Superseded by Round 4 run below. | +| 2026-05-01 (Round 4) | spike2-progress-hud (Manifest-Driven Worker Progress Tracker) | 6 generated via 1.17.0 gen-idea flow (validate-gen-idea-io.sh + 6 Explore subagents + Phase 4 synthesis), 2 selected (manifest-polling, tput-cursor-table) | 2 (real workers with implementation and commits) | `.humanize/explore/2026-05-01_09-17-19/report.md` | AC-15: full end-to-end smoke using companion JSON directly as input. Both workers: task_status=success, codex=partial, 29+47 tests pass, commit_status=committed, dirty_state=clean. Both branches anchor at d71e7e8 (merge-base verified). manifest.json+dispatch-prompts/+worker-results.jsonl+report.md all present. No push. Parallel suite HUMANIZE_TEST_JOBS=4: 1919/1919 tests pass (AC-12). NOTE: draft_path="" in manifest (input was .directions.json directly, not draft.md). | +| 2026-05-01 (Round 5) | spike2-progress-hud (Manifest-Driven Worker Progress Tracker) | 6 generated via 1.17.0 gen-idea flow, 2 selected (tput-cursor-table, ansi-cr-rewrite) | 2 (real workers with implementation and commits) | `.humanize/explore/2026-05-01_09-53-34/report.md` | AC-11: draft-path UX path exercised. Input: `spike2-progress-hud.md` (draft path); companion JSON auto-resolved; manifest.json records non-empty draft_path. Both workers: task_status=success, codex=partial, 31+21 tests pass, commit_status=committed, dirty_state=clean. Both branches anchor at c3c483b (merge-base verified). manifest.json+dispatch-prompts/+worker-results.jsonl+report.md all present. No push. | diff --git a/docs/superpowers/plans/2026-04-29-explore-idea-hardened-prototype-plan.md b/docs/superpowers/plans/2026-04-29-explore-idea-hardened-prototype-plan.md new file mode 100644 index 00000000..37c03487 --- /dev/null +++ b/docs/superpowers/plans/2026-04-29-explore-idea-hardened-prototype-plan.md @@ -0,0 +1,1063 @@ +# `/humanize:explore-idea` Hardened Prototype MVP + +## Goal Description + +Add the `/humanize:explore-idea` command and update `/humanize:gen-idea` to emit a lossless `directions.json` companion artifact alongside each idea draft. Bump the plugin version from 1.16.0 to 1.17.0. + +The work is staged as two layers: PR-A adds the `directions.json` contract and its validator to `gen-idea`; PR-B adds the full `explore-idea` command that launches bounded parallel prototype workers in isolated worktrees, collects their JSON results, and synthesizes a two-tier report. After RLCR completes, a manual functional spike on a real task validates the behavioral assumptions documented in the `## Functional Spike Checklist`; any divergences are handled as out-of-scope follow-up. + +## Acceptance Criteria + +Following TDD philosophy, each criterion includes positive and negative tests for deterministic verification. + +- AC-1: `validate-gen-idea-io.sh` enforces `.md` output suffix, rejects existing companion JSON, and emits `DIRECTIONS_JSON_FILE:` on success + - Positive Tests (expected to PASS): + - Given `--output foo.md` with no existing `foo.md` or `foo.directions.json`: exits 0, stdout includes `DIRECTIONS_JSON_FILE: /abs/path/foo.directions.json` and `VALIDATION_SUCCESS` + - Given `--output subdir/bar.md` in a writable directory: derives companion path correctly as `subdir/bar.directions.json` + - Negative Tests (expected to FAIL): + - Given `--output foo` (no `.md` suffix): exits non-zero with a clear error about required `.md` suffix + - Given `--output foo.txt`: exits non-zero with required `.md` suffix error + - Given `--output foo.md` with `foo.directions.json` already existing: exits non-zero with companion collision error + - Given `--output foo.md` with `foo.md` already existing: exits non-zero (existing output file, already in current behavior) + +- AC-2: A successful `gen-idea` run writes both the draft markdown and a schema-valid companion `directions.json`; neither file is written when validation fails; the dual-write behavior and hint output are covered by `tests/test-gen-idea-dual-write.sh` (added in task5) + - Positive Tests (expected to PASS): + - After a successful run: both `<output>.md` and `<output>.directions.json` exist on disk + - The companion JSON passes `validate-directions-json.sh` with exit code 0 + - The final `gen-idea` output reports both file paths and includes a hint for `/humanize:explore-idea <companion-json>` + - Negative Tests (expected to FAIL): + - When validation fails before generation (e.g., output already exists): neither `<output>.md` nor `<output>.directions.json` is created or modified + - When gen-idea aborts after draft write but before companion write: companion is absent; next run will not silently overwrite the draft (existing collision rejection applies) + +- AC-3: `scripts/validate-directions-json.sh` passes valid fixtures and rejects all known malformed cases + - Positive Tests (expected to PASS): + - A fixture with all required top-level keys, exactly one `is_primary: true`, unique `direction_id` values, unique `dir_slug` values, unique `source_index` values, integer `display_order` values, valid `confidence` enum, `metadata.n_returned == directions.length`, and 1–10 directions: exits 0 + - Negative Tests (expected to FAIL): + - Missing `schema_version` field: exits non-zero + - `directions` array with 11 elements: exits non-zero + - Two entries with `is_primary: true`: exits non-zero + - Zero entries with `is_primary: true`: exits non-zero + - Duplicate `direction_id` across two entries: exits non-zero + - Duplicate `dir_slug` across two entries: exits non-zero + - Duplicate `source_index` across two entries: exits non-zero + - A `display_order` value that is not an integer (e.g., a string): exits non-zero + - A `dir_slug` value containing uppercase letters or spaces (not branch/path safe): exits non-zero + - A direction entry missing a required per-direction field (`name`, `rationale`, `raw_phase3_response`, `approach_summary`, `objective_evidence`, or `known_risks`): exits non-zero + - `objective_evidence` or `known_risks` that is not a JSON array: exits non-zero + - `confidence` value not in `{high, medium, low}`: exits non-zero + - `metadata.n_returned` does not equal `directions.length`: exits non-zero + - Missing required top-level key (`title`, `original_idea`, `synthesis_notes`, `metadata`, or `directions`): exits non-zero + +- AC-4: `explore-idea` resolves the input file to a valid `directions.json` before creating any side effects + - Positive Tests (expected to PASS): + - Given a `.directions.json` path directly: loads and schema-validates it, then proceeds + - Given a `.md` draft path with an existing companion `.directions.json`: resolves and loads the companion, then proceeds + - Negative Tests (expected to FAIL): + - Given a `.md` path with no companion `.directions.json`: exits non-zero with a message instructing the user to regenerate the idea draft + - Given a `.directions.json` that fails schema validation: exits non-zero before any worktrees are created + - Given a non-existent path: exits non-zero + +- AC-5: Direction selection defaults, `--directions` override, and all hard caps are enforced + - Positive Tests (expected to PASS): + - With no `--directions` flag and 8 available directions: first 6 by `display_order` are selected + - `--directions dir-00,dir-02` (stable `direction_id` values): exactly those two are selected + - `--directions 0,2` (numeric `source_index` values): resolves correctly to corresponding directions + - `--concurrency 3` with 5 selected directions: effective concurrency is 3 + - `--concurrency 8` with 5 selected directions: effective concurrency is 5 (capped to selected count) + - Negative Tests (expected to FAIL): + - `--directions` selecting 11 directions: exits non-zero + - `--concurrency 11`: exits non-zero + - `--max-worker-iterations 4`: exits non-zero + - `--worker-timeout-min 61`: exits non-zero + - `--codex-timeout-min 21`: exits non-zero + - `--directions` referencing an unknown `direction_id` or `source_index`: exits non-zero + - `--directions` with duplicate selector values: exits non-zero + - AC-5.1: `explore-idea` hard-fails before any dispatch side effects if the main checkout has uncommitted tracked changes + - Positive Tests (expected to PASS): + - With a clean main checkout (no uncommitted tracked changes): validation passes and dispatch proceeds to confirmation + - Negative Tests (expected to FAIL): + - With one or more modified tracked files in the main checkout: exits non-zero before confirmation dialog, before manifest creation, and before any worktree is created; error message names the dirty-checkout condition explicitly + +- AC-6: Explicit user confirmation is required before any dispatch side effects occur + - Positive Tests (expected to PASS): + - Before dispatch: the command shows selected direction IDs and names, selected count, effective concurrency, iteration cap, worker timeout, Codex timeout, base branch, base commit, run directory, and a warning that workers will create local worktrees, branches, commits, run tests, and call Codex + - After explicit confirmation: worker dispatch proceeds + - Negative Tests (expected to FAIL): + - User denies confirmation: no worktrees are created, no manifest is written, command exits cleanly + +- AC-7: `manifest.json` is written to the run directory before any worker starts, and per-worker records are updated as workers complete + - Positive Tests (expected to PASS): + - `manifest.json` exists in `.humanize/explore/<RUN_ID>/` before the first worker is launched + - Contains: `run_id`, `created_at`, `directions_json_file`, `draft_path`, `selected_direction_ids`, `base_branch`, `base_commit`, `concurrency`, `max_worker_iterations`, `worker_timeout_min`, `codex_timeout_min`, `expected_worker_count` + - Each per-worker record contains: `direction_id`, `dir_slug`, prompt path, prompt hash, branch name, final status + - `RUN_ID` is generated as `YYYY-MM-DD_HH-MM-SS`; if a run directory for the generated ID already exists, validation fails with a collision error before any writes occur + - Negative Tests (expected to FAIL): + - If `manifest.json` cannot be written before dispatch: dispatch fails and `.failed` is written; no workers are launched + - If the run directory already exists at the time of validation: exits non-zero before manifest creation and before any worktrees are created + +- AC-8: Valid worker sentinel JSON is parsed into `worker-results.jsonl`; timeout, invalid-JSON, and no-summary cases produce coordinator-generated failure rows with stable enum values; coordinator failures after dispatch begin are recorded and do not silently lose worker results + - Positive Tests (expected to PASS): + - A worker that emits valid JSON between `=== EXPLORE_RESULT_JSON_BEGIN ===` and `=== EXPLORE_RESULT_JSON_END ===`: row appended to `worker-results.jsonl` with correct fields + - A worker that times out: coordinator appends `{"task_status": "timeout", "direction_id": "...", "error": "worker exceeded timeout"}` + - A worker that emits malformed JSON inside the sentinel markers: coordinator appends a `no_summary` row + - All `task_status` enum values (`success`, `partial`, `failed`, `timeout`, `no_summary`) are representable in `worker-results.jsonl` + - If a coordinator-side error occurs after dispatch begins (e.g., result collection fails for one worker): remaining workers continue; the failing worker's result row is written with the error noted; `.failed` is NOT written unless all workers failed + - Negative Tests (expected to FAIL): + - A worker result with no sentinel markers: treated as `no_summary`, not silently dropped + - If all workers fail or error: `.failed` is written and `manifest.json` is updated with failure reason; no success `report.md` is written + +- AC-9: Worker Codex calls are scoped to the worker worktree root; a root mismatch is recorded as a worker failure + - Positive Tests (expected to PASS): + - Worker sets `export CLAUDE_PROJECT_DIR="$PWD"` before calling `ask-codex.sh`; Codex resolves project root to the worker worktree path + - Worker result includes `worktree_path` matching the directory where Codex ran + - Negative Tests (expected to FAIL): + - If `CLAUDE_PROJECT_DIR` points to the coordinator checkout (mismatch detected by assertion): worker emits a failure result with `task_status: "failed"` and does not proceed with Codex + +- AC-10: `report.md` contains two-tier rankings and adoption paths with concrete worktree/branch/commit data + - Positive Tests (expected to PASS): + - `report.md` contains a "Best product direction" ranking section covering user value, strategic fit, original direction quality, objective evidence, and known risks + - `report.md` contains a "Most implementation-ready prototype" ranking section covering `task_status`, `codex_final_verdict`, tests passed/failed, commit status, dirty state, and iteration count + - Each worker result entry has an adoption path with worktree path, branch name, commit SHA, and a suggested next command (e.g., `/humanize:start-rlcr-loop`) + - Cleanup guidance for non-adopted worktrees and branches is included + - Negative Tests (expected to FAIL): + - If all workers failed: `report.md` is still generated with a failure table and cleanup/status guidance (no crash) + +- AC-11: After RLCR completes, a manual functional spike runs explore-idea on a real task and records a pass/partial/fail outcome for every item in the Functional Spike Checklist + - Positive Tests (expected to PASS): + - A real `gen-idea` run produces a valid `directions.json`; `explore-idea` is invoked on it with 2–3 directions and 1–2 worker iterations + - Every item in `## Functional Spike Checklist` has a recorded outcome (pass, partial, or fail) with observation notes + - Results are documented in `docs/runtime-spike-results.md` + - Negative Tests (expected to FAIL): + - A divergence discovered during the spike is patched inline without a new plan: this is a scope violation; all divergences must be filed as follow-up via `/humanize:gen-plan` + +- AC-12: All 7 new shell CI test suites are registered in `tests/run-all-tests.sh` and pass without invoking live runtime + - Positive Tests (expected to PASS): + - `tests/run-all-tests.sh` `TEST_SUITES` array includes: `test-validate-gen-idea-io.sh`, `test-directions-json-schema.sh`, `test-gen-idea-dual-write.sh`, `test-validate-explore-idea-io.sh`, `test-worker-result-contract.sh`, `test-explore-manifest.sh`, `test-explore-command-structure.sh` + - Each suite exits 0 against its valid fixtures + - Full `run-all-tests.sh` exits 0 + - Negative Tests (expected to FAIL): + - Any new test file invokes a live slash command, real Agent/Task worker, or live Codex call: this is a disqualifying violation + +- AC-13: `ask-codex.sh` auto-probes Codex CLI support and disables nested hooks when supported; existing hook tests pass unchanged + - Positive Tests (expected to PASS): + - When the installed Codex CLI supports `--disable codex_hooks`: `ask-codex.sh` includes that flag in all invocations automatically, without any caller-side flag + - `tests/test-ask-codex.sh` includes a case verifying the auto-probe and flag injection behavior + - Negative Tests (expected to FAIL): + - `tests/test-disable-nested-codex-hooks.sh` fails after the `ask-codex.sh` change: this is a regression that must be fixed before merging + +- AC-14: Version 1.17.0 is present in all three plugin metadata files + - Positive Tests (expected to PASS): + - `.claude-plugin/plugin.json` contains `"version": "1.17.0"` + - `.claude-plugin/marketplace.json` contains `"version": "1.17.0"` + - `README.md` "Current Version" line reads `1.17.0` + - Negative Tests (expected to FAIL): + - Any of the three files still contains `1.16.0` after the bump: this is a version inconsistency + +- AC-15: A manual smoke run with 2 directions and 1 worker iteration produces all expected artifacts with no push + - Positive Tests (expected to PASS): + - After the smoke run: `.humanize/explore/<RUN_ID>/manifest.json` exists and is complete, `worker-results.jsonl` contains exactly 2 entries, `report.md` exists with both ranking sections, 2 local branches named `explore/<RUN_ID>/<dir_slug>` exist, each branch has at least 1 commit + - Negative Tests (expected to FAIL): + - Any worker branch is visible in the upstream fork remote after the smoke run: this means a push occurred and is a critical violation + +## Path Boundaries + +Path boundaries define the acceptable range of implementation quality and choices. + +### Upper Bound (Maximum Acceptable Scope) + +The implementation includes PR-A and PR-B as described in the design, with parallel worker dispatch, durable run state, two-tier LLM report, adoption paths, all 7 CI test suites registered and passing, `ask-codex.sh` auto-probe behavior, documentation updates (README, `docs/usage.md`, CLAUDE.md sync rules, `.gitignore` if needed), and the 1.17.0 version bump across all three files. The manual smoke test passes. Optional companion commands (`explore-status`, `explore-cleanup`) may be described in documentation as deferred. + +### Lower Bound (Minimum Acceptable Scope) + +The implementation includes PR-A and PR-B with all 18 tasks complete: `validate-gen-idea-io.sh` updated, `validate-directions-json.sh` added, `commands/gen-idea.md` updated, the full `explore-idea` command with supporting scripts and templates, `ask-codex.sh` auto-probe behavior, all 7 CI test suites registered and passing, documentation updates, the 1.17.0 version bump, manual smoke verification (task17), and functional spike results documented in `docs/runtime-spike-results.md` (task18). Spike divergences are out of scope for this plan. + +### Allowed Choices + +- Can use: `jq` for all JSON validation in shell scripts; `bash` for all new scripts and tests; `portable-timeout.sh` for worker timeouts; existing `ask-codex.sh` invocation pattern; existing test file structure from `tests/test-validate-gen-plan-io.sh` or similar as reference +- Cannot use: Python, Node.js, or other non-shell runtimes for validators (must match existing repo conventions); nested Skills, slash commands, or Agent/Task workers inside worker prompts; `git push` from any worker; `--effort max` flag (not supported by current `ask-codex.sh`) + +> **Note on Deterministic Designs**: The draft specifies fixed values for all numeric caps, branch naming format (`explore/<RUN_ID>/<dir_slug>`), run state directory layout (`.humanize/explore/<RUN_ID>/`), sentinel markers, schema version (1), and output file naming (`${OUTPUT_FILE%.md}.directions.json`). These are fixed constraints, not choices. + +## Feasibility Hints and Suggestions + +> **Note**: This section is for reference and understanding only. These are conceptual suggestions, not prescriptive requirements. + +### Conceptual Approach + +**PR-A: Companion JSON emission** + +In `validate-gen-idea-io.sh`, after confirming the output path ends in `.md`: +```bash +# Enforce .md suffix +if [[ "${OUTPUT_FILE##*.}" != "md" ]]; then + echo "ERROR: --output must have .md suffix for companion derivation" >&2 + exit 6 +fi +DIRECTIONS_JSON_FILE="${OUTPUT_FILE%.md}.directions.json" +# Reject existing companion +if [[ -f "$DIRECTIONS_JSON_FILE" ]]; then + echo "ERROR: companion already exists: $DIRECTIONS_JSON_FILE" >&2 + exit 4 +fi +echo "DIRECTIONS_JSON_FILE: $DIRECTIONS_JSON_FILE" +``` + +In `commands/gen-idea.md`, after the draft markdown is written, parse the structured Phase 2/3 direction data and write a `directions.json` that conforms to schema version 1. Report both paths in the final output block. Add a hint line: +``` +Next step (optional): /humanize:explore-idea $DIRECTIONS_JSON_FILE +``` + +**PR-A: Schema validator** + +`scripts/validate-directions-json.sh` wraps a single `jq -e` expression: +```bash +jq -e ' + .schema_version == 1 + and (.directions | length) >= 1 + and (.directions | length) <= 10 + and (.directions | map(select(.is_primary == true)) | length) == 1 + and (.directions | map(.direction_id) | unique | length) == (.directions | length) + and (.directions | map(.dir_slug) | unique | length) == (.directions | length) + and (.directions | map(.dir_slug) | all(test("^[a-z0-9-]+$"))) + and (.directions | map(.source_index) | unique | length) == (.directions | length) + and (.directions | map(.display_order) | all(. != null and (type == "number") and (. == floor))) + and (.metadata.n_returned == (.directions | length)) + and (.directions | map(.confidence) | all(. == "high" or . == "medium" or . == "low")) + and (.directions | map( + has("name") and has("rationale") and has("raw_phase3_response") + and has("approach_summary") + and ((.objective_evidence | type) == "array") + and ((.known_risks | type) == "array") + ) | all) +' "$INPUT_FILE" +``` + +**PR-B: `ask-codex.sh` auto-probe** + +Check if the installed Codex CLI supports `--disable codex_hooks` by probing with `codex --help 2>&1 | grep -q 'disable'` (or equivalent). Store the result and unconditionally include the flag when supported. Follow the same pattern already used in `hooks/lib/loop-codex-stop-hook.sh` and `scripts/bitlesson-select.sh`. + +**PR-B: Run state before dispatch** + +Before launching any workers: +1. Generate `RUN_ID` as `$(date -u +%Y-%m-%d_%H-%M-%S)` +2. Check that `.humanize/explore/$RUN_ID/` does not already exist; if it does, exit with a collision error (same-second collision: hard-fail, no retry) +3. `mkdir -p ".humanize/explore/$RUN_ID/dispatch-prompts"` +4. Write `manifest.json` with all coordinator-side fields +5. Write each `dispatch-prompts/<direction_id>.md` with the full worker prompt +6. Compute prompt hash with a portable command (`shasum -a 256` on macOS/Linux; `sha256sum` on Linux-only environments) and store in the manifest per-worker record + +### Relevant References + +- `scripts/validate-gen-idea-io.sh` — existing IO validation pattern; extend for companion derivation +- `scripts/validate-gen-plan-io.sh` — second IO validator to use as style reference +- `scripts/ask-codex.sh` — existing Codex invocation; add auto-probe behavior here +- `hooks/loop-codex-stop-hook.sh` — existing nested hook disable probe pattern to replicate (probe at line ~1169) +- `scripts/bitlesson-select.sh` — another instance of the probe pattern +- `scripts/portable-timeout.sh` — timeout wrapper for worker enforcement +- `tests/test-validate-gen-plan-io.sh` — example test file structure to follow for new test suites +- `tests/test-disable-nested-codex-hooks.sh` — existing test that must keep passing after ask-codex.sh change +- `tests/run-all-tests.sh` — hardcoded `TEST_SUITES` array; new tests must be added here explicitly + +## Dependencies and Sequence + +### Milestones + +1. **PR-A: gen-idea directions.json companion** + - Phase A: Update `scripts/validate-gen-idea-io.sh` — add `.md` enforcement, companion collision rejection, `DIRECTIONS_JSON_FILE:` stdout emission + - Phase B: Add `scripts/validate-directions-json.sh` — jq-based schema validator for directions.json schema v1 + - Phase C: Update `commands/gen-idea.md` — emit companion JSON after draft write, report both paths, add explore-idea hint + - Phase D: Add test fixtures under `tests/fixtures/` for valid and invalid directions.json cases, plus gen-idea IO edge cases; add `tests/test-validate-gen-idea-io.sh`, `tests/test-directions-json-schema.sh`, and `tests/test-gen-idea-dual-write.sh` (covers AC-2 dual-write and hint output); register all three in `tests/run-all-tests.sh` + +2. **PR-B: explore-idea input and validation layer** + - Phase A: Add `scripts/validate-explore-idea-io.sh` — resolves input to directions.json, validates direction selectors, enforces all caps, checks run dir collision, emits validation output + - Phase B: Add `commands/explore-idea.md` — frontmatter with allowed tools, command documentation, confirmation UX, coordinator loop, worker dispatch instructions, result collection, report synthesis instructions + - Phase C: Add `prompt-template/explore/worker-prompt.md` — worker constraints, loop structure, Codex call contract, result JSON sentinel emission + - Phase D: Add `prompt-template/explore/report-template.md` — two-tier ranking structure and adoption path format + +3. **PR-B: ask-codex.sh auto-probe** + - Phase A: Add nested hook disable auto-probe inside `scripts/ask-codex.sh` following the existing pattern from `hooks/loop-codex-stop-hook.sh` + - Phase B: Update `tests/test-ask-codex.sh` with auto-probe coverage; verify `tests/test-disable-nested-codex-hooks.sh` still passes + +4. **PR-B: CI test suites** + - Phase A: Add `tests/test-validate-explore-idea-io.sh`, `tests/test-worker-result-contract.sh`, `tests/test-explore-manifest.sh`, `tests/test-explore-command-structure.sh` with fixtures + - Phase B: Register all 4 in `tests/run-all-tests.sh` `TEST_SUITES` array + +5. **Documentation and version bump** + - Phase A: Update `README.md` quick start section with optional explore-idea step; update `docs/usage.md` command reference + - Phase B: Update `.claude/CLAUDE.md` sync rules for directions.json schema and worker constraint synchronization; check `.gitignore` for worktree paths + - Phase C: Bump version in `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `README.md` from `1.16.0` to `1.17.0` + +Milestone 1 (PR-A) must complete before Milestones 2–5 begin. Milestones 2, 3, and 4 can proceed in parallel once PR-A is complete. Milestone 5 depends on Milestones 2–4. The manual functional spike (AC-11) runs after all milestones complete; any divergences are handled as out-of-scope follow-up. + +## Task Breakdown + +Each task must include exactly one routing tag: +- `coding`: implemented by Claude +- `analyze`: executed via Codex (`/humanize:ask-codex`) + +| Task ID | Description | Target AC | Tag (`coding`/`analyze`) | Depends On | +|---------|-------------|-----------|----------------------------|------------| +| task1 | Update `scripts/validate-gen-idea-io.sh`: enforce `.md` suffix, reject existing companion JSON, emit `DIRECTIONS_JSON_FILE:` | AC-1 | coding | - | +| task2 | Add `scripts/validate-directions-json.sh`: jq schema validator for directions.json v1 | AC-3 | coding | - | +| task3 | Update `commands/gen-idea.md`: emit companion JSON after draft write, report both paths, add explore-idea hint | AC-2 | coding | task1, task2 | +| task4 | Add test fixtures for PR-A (valid/invalid directions.json, gen-idea IO edge cases) | AC-1, AC-2, AC-3 | coding | task1, task2 | +| task5 | Add `tests/test-validate-gen-idea-io.sh`, `tests/test-directions-json-schema.sh`, and `tests/test-gen-idea-dual-write.sh` (covers AC-2 dual-write and hint output) | AC-2, AC-12 | coding | task4 | +| task6 | Register PR-A test suites in `tests/run-all-tests.sh` `TEST_SUITES` array | AC-12 | coding | task5 | +| task7 | Add `scripts/validate-explore-idea-io.sh`: input resolution, dirty-checkout hard-fail, direction selection, all hard caps, run dir collision | AC-4, AC-5, AC-5.1 | coding | task6 | +| task8 | Add `commands/explore-idea.md`: frontmatter, args doc, confirmation UX, coordinator loop, worker dispatch and collection, post-dispatch fail-and-record | AC-6, AC-7, AC-8, AC-9, AC-10 | coding | task7 | +| task9 | Add `prompt-template/explore/worker-prompt.md`: worker loop, constraints, result JSON sentinel | AC-9 | coding | task7 | +| task10 | Add `prompt-template/explore/report-template.md`: two-tier ranking structure and adoption path format | AC-10 | coding | task7 | +| task11 | Add nested hook auto-probe to `scripts/ask-codex.sh`; update `tests/test-ask-codex.sh` | AC-13 | coding | task6 | +| task12 | Add `tests/test-validate-explore-idea-io.sh`, `test-worker-result-contract.sh`, `test-explore-manifest.sh`, `test-explore-command-structure.sh` with fixtures | AC-12 | coding | task7, task8, task9 | +| task13 | Register all PR-B test suites in `tests/run-all-tests.sh` `TEST_SUITES` array | AC-12 | coding | task12 | +| task14 | Update `README.md` quick start and `docs/usage.md` command reference | - | coding | task13 | +| task15 | Update `.claude/CLAUDE.md` sync rules; check `.gitignore` for worktree paths | - | coding | task13 | +| task16 | Bump version `1.16.0` → `1.17.0` in `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `README.md` | AC-14 | coding | task14, task15 | +| task17 | Manual smoke run: invoke explore-idea with 2 directions and 1 worker iteration; verify all artifacts exist and no push occurred | AC-15 | coding | task16, task11 | +| task18 | Functional spike: run gen-idea → explore-idea on a real task; record every Functional Spike Checklist item; write `docs/runtime-spike-results.md` | AC-11 | coding | task17 | + +## Functional Spike Checklist + +These items are derived from spec assumptions that deterministic shell tests cannot verify. After RLCR completes, run `explore-idea` on a real task (using `gen-idea` output as input, 2–3 directions, 1–2 worker iterations) and record each item as **pass**, **partial**, or **fail** with brief observation notes. File divergences as follow-up via `/humanize:gen-plan` — do not patch them inline. + +### Worker Isolation + +- [ ] Each worker modifies only files within its assigned worktree; no files outside the worktree are created or changed +- [ ] Workers do not invoke nested Skills or slash commands during execution +- [ ] Workers do not spawn nested Agent/Task workers +- [ ] Workers do not push any branch to any remote +- [ ] Workers do not access or read sibling worktrees + +### Concurrency and Coordination + +- [ ] Multiple workers dispatch in parallel (not serially), bounded by the configured `--concurrency` value +- [ ] Coordinator waits for all workers to complete within a single session without manual intervention +- [ ] Worker timeouts are enforced; a timed-out worker produces a coordinator-generated `task_status: "timeout"` row rather than hanging indefinitely + +### Codex Root Scoping + +- [ ] `export CLAUDE_PROJECT_DIR="$PWD"` inside a worker worktree correctly scopes `ask-codex.sh` to that worktree's path, not the coordinator checkout +- [ ] `ask-codex.sh` auto-probe behavior correctly disables nested Codex hooks during a live worker session +- [ ] No worker Codex call accidentally reads or modifies the coordinator checkout + +### Worker Result Collection + +- [ ] Sentinel markers (`=== EXPLORE_RESULT_JSON_BEGIN ===` / `=== EXPLORE_RESULT_JSON_END ===`) are emitted by workers and parsed correctly by the coordinator +- [ ] `worker-results.jsonl` contains exactly one row per dispatched worker after all workers complete +- [ ] A worker that fails, times out, or emits malformed JSON produces a coordinator-generated row; no result is silently dropped + +### Artifact Integrity + +- [ ] `manifest.json` exists and is complete with all required fields before the first worker starts work +- [ ] `dispatch-prompts/<direction_id>.md` contains the actual prompt text sent to each worker +- [ ] Branch names follow the exact `explore/<RUN_ID>/<dir_slug>` format +- [ ] Each successful worker branch has at least one commit with the prototype changes + +### Report Quality + +- [ ] `report.md` contains both ranking tiers with coherent synthesis derived from actual worker result data +- [ ] Adoption paths in the report contain the correct worktree path, branch name, and commit SHA for each worker +- [ ] Cleanup guidance accurately describes the real worktrees and branches created during the run + +### UX Correctness + +- [ ] The confirmation dialog shows all expected parameters (direction IDs, concurrency, timeouts, base branch, base commit, run directory, mutation warning) before any worker is dispatched +- [ ] The end-to-end `gen-idea` → `explore-idea <draft.md>` workflow resolves the companion JSON and proceeds without extra steps +- [ ] Report adoption path commands are correct and immediately usable (e.g., `/humanize:start-rlcr-loop` with the right worktree path) + +### Input Safety + +- [ ] Invoking `explore-idea` with uncommitted tracked changes in the main checkout exits non-zero before the confirmation dialog, before any manifest is written, and before any worktree is created +- [ ] Invoking `explore-idea` when the run directory already exists exits non-zero with a collision error before any writes + +### Coordinator Error Handling + +- [ ] A coordinator-side failure after dispatch begins (e.g., result collection error for one worker) records the failure row in `worker-results.jsonl` and allows remaining workers to finish; `.failed` is not written unless all workers fail +- [ ] When all workers fail: `.failed` is written, `manifest.json` is updated with failure reason, and no success `report.md` is produced + +### No-Push Safety + +- [ ] No `git push` occurred on any worker branch after the run completes +- [ ] The main checkout is in the same state as before `explore-idea` was invoked (no uncommitted changes introduced by the coordinator) + +## Claude-Codex Deliberation + +### Agreements + +- PR-A (gen-idea companion) must complete before PR-B (explore-idea) begins: the `directions.json` schema is the foundational contract that both layers depend on. +- Runtime behavioral assumptions (worker isolation, parallel execution, Codex root scoping, result collection) are best validated by a real functional spike after implementation, not by a pre-implementation capability checklist; the `## Functional Spike Checklist` captures these assumptions so divergences are trackable. +- Hard numeric caps (10 directions, 10 concurrency, 3 iterations, 60/20 min timeouts) are correct and sufficient to prevent unbounded fanout. +- Durable run state (`manifest.json` before dispatch, `worker-results.jsonl` per result) is the right design for inspectability and postmortem debugging. +- `tests/run-all-tests.sh` registration via the hardcoded `TEST_SUITES` array is mandatory; forgetting registration silently drops coverage. +- `CLAUDE_PROJECT_DIR=$PWD` is the correct seam for scoping `ask-codex.sh` to the worker worktree root; `resolve_project_root()` in the script already prefers this env var. + +### Resolved Disagreements + +- **DEC-3 hook disabling approach**: Claude proposed an opt-in `--disable-nested-codex-hooks` flag for `ask-codex.sh` callers. Second Codex review rejected this, citing that the existing codebase pattern (used in `hooks/lib/loop-codex-stop-hook.sh` and `scripts/bitlesson-select.sh`) is script-level auto-probe, not caller-pushed flags. Resolution: `ask-codex.sh` probes internally and applies the flag automatically; no caller change needed, no new flag exposed. +- **AC-2 companion collision gap**: Claude's initial AC-2 did not explicitly require rejecting an already-existing `<output>.directions.json`. Second Codex review identified this as a missing first-class validation. Resolution: AC-1 now explicitly covers companion collision rejection in `validate-gen-idea-io.sh`, and its tests cover the collision case. +- **Spike position and nature**: Initial plan placed a pre-implementation capability spike as a blocking gate between PR-A and PR-B. Revised per user direction: the spike is a post-RLCR functional validation on a real task, with a predefined checklist derived from spec assumptions. Divergences are out-of-scope follow-up, not inline patches. + +### Convergence Status + +- Final Status: `converged` + +## Pending User Decisions + +- DEC-1: Dirty main checkout before explore-idea dispatch + - Claude Position: Hard-fail — reject if main checkout has uncommitted tracked changes; no `--allow-dirty` in MVP + - Codex Position: N/A - open question (Codex flagged as missing requirement, did not take opposing position) + - Tradeoff Summary: Hard-fail prevents inconsistent prototype base states at the cost of forcing users to stash or commit before exploring; warn-and-proceed reduces friction but risks divergent branches + - Decision Status: Hard-fail (user confirmed) + +- DEC-2: Spike timing and divergence handling + - Claude Position: Post-RLCR functional spike on a real task; divergences filed as follow-up via `/humanize:gen-plan` + - Codex Position: N/A - the original question (serial fallback if pre-implementation spike failed) is superseded by the post-implementation spike model + - Tradeoff Summary: Post-RLCR spike lets implementation proceed on spec assumptions and validates them empirically; pre-implementation gate would have required capabilities to be proven before any PR-B code was written + - Decision Status: Post-RLCR functional spike; divergences are out-of-scope follow-up (user confirmed) + +- DEC-3: Codex hook disabling approach + - Claude Position: Opt-in `--disable-nested-codex-hooks` flag passed by callers + - Codex Position: Script-level auto-probe in `ask-codex.sh` to match existing codebase pattern; no caller flag needed + - Tradeoff Summary: Auto-probe is cleaner and safer — one place to maintain, no risk of callers forgetting the flag; opt-in flag distributes responsibility to callers + - Decision Status: Auto-probe in `ask-codex.sh` (Codex REQUIRED_CHANGES; adopted) + +- DEC-4: Crash recovery scope for MVP + - Claude Position: Fail-and-record — write `.failed`, record failure reason in `manifest.json`, require manual cleanup; no resume + - Codex Position: N/A - open question (Codex flagged as missing requirement, did not take opposing position) + - Tradeoff Summary: Fail-and-record is simpler and ships faster; resume logic adds significant complexity for a feature not yet running in production + - Decision Status: Fail-and-record for MVP (both Claude and Codex agreed; user confirmed via numeric caps confirmation) + +## Implementation Notes + +### Code Style Requirements + +- Implementation code and comments must NOT contain plan-specific terminology such as "AC-", "Milestone", "Step", "Phase", or similar workflow markers +- These terms are for plan documentation only, not for the resulting codebase +- Use descriptive, domain-appropriate naming in code instead + +--- Original Design Draft Start --- + +# Design: `/humanize:explore-idea` Hardened Prototype MVP + +> Status: Approved brainstorming revision. Awaiting user review before implementation planning. +> Date: 2026-04-29 +> Supersedes: `docs/superpowers/specs/2026-04-28-explore-idea-design.md` +> Target flow: implement on a Horacehxw fork branch, verify there, then open one combined upstream PR. + +--- + +## 1. Motivation + +The first `/humanize:explore-idea` design proposed parallel per-direction implementation attempts, but review found several blocking issues: unbounded fanout, prompt-only safety guarantees, fragile line-oriented contracts, missing manifest state, invalid `ask-codex.sh` flags, unclear worktree isolation, and ambiguous adoption/cleanup. + +This revision keeps the central value proposition: compare real local prototype branches, not just plans. Workers may implement, test, consult Codex, and commit locally by default. That behavior is now gated by explicit user confirmation and backed by bounded concurrency, durable run state, JSON contracts, deterministic branch naming, worktree-root assertions, and cleanup/adoption instructions. + +## 2. Goals and Non-Goals + +### Goals + +- Generate a lossless `directions.json` companion artifact from `/humanize:gen-idea`. +- Explore selected directions as bounded parallel prototype attempts. +- Create local worker worktrees, branches, and commits by default after a blocking user confirmation. +- Keep active work bounded: selected directions `<= 10`, active workers `<= --concurrency`, active Codex calls `<= active workers`. +- Persist enough state to understand, inspect, adopt, or clean up every worker result. +- Use JSON contracts for direction schema and worker results. +- Produce a human report with separate product-direction and implementation-readiness rankings. +- Verify all deterministic behavior in shell CI before any upstream PR. + +### Non-Goals + +- No auto-push from workers. +- No auto-merge or upstream PR creation from `/humanize:explore-idea`. +- No nested Skill, Agent, or Task fanout inside workers. +- No claim that the worker loop is full RLCR. It is a bounded prototype review loop. +- No CI test that runs real Claude slash commands, Agent/Task workers, or live Codex calls. +- No direct upstream PR until the fork branch has passed deterministic tests and a manual runtime smoke. + +## 3. Contribution Flow + +Build the change as one feature branch in the Horacehxw fork, but keep the work internally staged as two layers: + +1. **PR-A layer:** amend `gen-idea` to emit and validate `directions.json`. +2. **PR-B layer:** add `explore-idea` and its validators, templates, worker result handling, report synthesis, and documentation. + +After local implementation: + +1. Push the branch to the Horacehxw fork. +2. Run deterministic shell tests. +3. Run the blocking runtime spike for Agent/Task worktree behavior. +4. Run one tiny manual smoke with two directions and one worker iteration. +5. Open one combined upstream PR after verification. + +Versioning is a single public bump from `1.16.0` to `1.17.0` across `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, and the `README.md` Current Version line. + +## 4. PR-A Layer: Lossless `directions.json` + +### 4.1 `gen-idea` Output Contract + +After the draft markdown is written, `gen-idea` writes a companion file: + +```text +<draft>.directions.json +``` + +For ordinary `.md` output, the path is derived with: + +```bash +${OUTPUT_FILE%.md}.directions.json +``` + +MVP behavior: reject non-`.md` output for `gen-idea`, because companion derivation and draft ergonomics rely on the markdown suffix. + +`commands/gen-idea.md` must update its hard constraint from "single output draft file" to "draft file plus validated directions companion artifact." It must report both paths in its final output and mention the optional next step: + +```text +/humanize:explore-idea <directions-json-path> +``` + +### 4.2 Validation Changes + +`scripts/validate-gen-idea-io.sh` must: + +- Require a `.md` output path. +- Derive `DIRECTIONS_JSON_FILE`. +- Reject an existing draft file. +- Reject an existing companion JSON file. +- Ensure the output directory is writable for both files. +- Emit `DIRECTIONS_JSON_FILE: <absolute-path>` on success. + +If any validation fails, neither output file is written. + +### 4.3 Schema + +`directions.json` uses schema version 1: + +```json +{ + "schema_version": 1, + "title": "Command Pattern Undo Stack", + "original_idea": "verbatim user input", + "synthesis_notes": "lead synthesis paragraph", + "metadata": { + "n_requested": 6, + "n_returned": 6, + "timestamp": "20260429-153012", + "draft_path": ".humanize/ideas/undo-redo-20260429-153012.md" + }, + "directions": [ + { + "direction_id": "dir-00-command-history", + "dir_slug": "command-history", + "source_index": 0, + "display_order": 0, + "is_primary": true, + "name": "Command History", + "rationale": "Single-sentence rationale from Phase 2.", + "raw_phase3_response": "Exact raw proposal text from the explorer.", + "approach_summary": "Normalized approach summary.", + "objective_evidence": ["path/or/evidence"], + "known_risks": ["risk"], + "confidence": "high" + } + ] +} +``` + +Rules: + +- `direction_id` is immutable and unique. +- `dir_slug` is unique and branch/path safe: lowercase ASCII letters, digits, and hyphens. +- `source_index` preserves the original Phase-2 direction index. +- `display_order` is primary first, then alternatives. +- `raw_phase3_response` preserves the exact subagent response. +- Normalized fields are derived for easier downstream consumption. +- `original_idea` is exempt from generated-text English-only rules because it must preserve user input verbatim. +- Generated fields remain English-only and contain no emoji or CJK characters. + +### 4.4 Shared Schema Validator + +Add a deterministic schema validator, preferably `scripts/validate-directions-json.sh` using `jq`. It validates: + +- `schema_version == 1` +- required top-level keys +- `directions` length is `1..10` +- exactly one `is_primary: true` +- unique `direction_id` +- unique `dir_slug` +- unique `source_index` +- contiguous or unique `display_order` values +- `confidence` is `high`, `medium`, or `low` +- `metadata.n_returned == directions.length` +- required string/list fields have the expected types + +Both `gen-idea` and `explore-idea` rely on this validator as the canonical contract. + +## 5. PR-B Layer: Command UX + +### 5.1 Command Surface + +```text +/humanize:explore-idea <draft-or-directions-json> + [--directions ids] + [--concurrency P] + [--max-worker-iterations R] + [--worker-timeout-min M] + [--codex-timeout-min M] +``` + +Input: + +- Accept a `.directions.json` path directly. +- Accept a generated draft `.md` path and resolve the companion JSON with `.md -> .directions.json`. +- If the companion JSON is missing, fail clearly and tell the user to regenerate the idea draft. + +Direction selection: + +- Default: first `min(6, directions.length)` directions by `display_order`. +- `--directions` selects stable `direction_id` values or numeric `source_index` values. +- Validation rejects selecting more than 10 directions. +- Validation rejects duplicate or unknown direction selectors. + +Defaults and caps: + +- Default selected directions: up to 6. +- Hard max directions: 10. +- Default concurrency: 6. +- Hard max concurrency: 10. +- Effective concurrency: `min(requested_concurrency, selected_direction_count)`. +- Default worker iterations: 2. +- Hard max worker iterations: 3. +- Default worker timeout: 60 minutes. +- Hard max worker timeout: 60 minutes. +- Default Codex timeout: 20 minutes. +- Hard max Codex timeout: 20 minutes. + +### 5.2 Blocking Confirmation + +Commits are default behavior, but dispatch is blocked until explicit user confirmation. + +Before launching workers, the command shows: + +- selected direction IDs and names +- selected direction count +- effective concurrency +- worker iteration cap +- worker timeout +- Codex timeout +- base branch +- base commit +- run directory +- warning that workers will create local worktrees, branches, commits, run targeted tests, and invoke Codex + +The command proceeds only if the user explicitly confirms. + +### 5.3 Frontmatter and Runtime Capability + +The implementation must use the current Claude Code subagent tool naming and schema. If the current runtime uses `Agent`, command docs and frontmatter should use `Agent`. If `Task` remains the installed command-tool name, the spec may document `Task` as a compatibility alias. + +Before PR-B implementation proceeds, run a blocking spike that proves: + +- worktree isolation is supported +- background execution or equivalent parallel execution is supported +- the command can wait for all workers in one session +- worker results are available to the coordinator +- worktree path and branch name are discoverable +- worker permissions allow required edits, tests, git, and Codex calls + +If the spike fails, revise PR-B before implementation continues. + +## 6. Explore Run State + +The coordinator writes durable state before dispatch: + +```text +.humanize/explore/<RUN_ID>/ + manifest.json + dispatch-prompts/ + <direction_id>.md + worker-results.jsonl + report.md + .failed +``` + +`manifest.json` includes: + +- `run_id` +- `created_at` +- `directions_json_file` +- `draft_path` +- `selected_direction_ids` +- `base_branch` +- `base_commit` +- `concurrency` +- `max_worker_iterations` +- `worker_timeout_min` +- `codex_timeout_min` +- `expected_worker_count` +- `runtime_spike_status` +- per-worker records with `direction_id`, `dir_slug`, prompt path, prompt hash, branch name, worktree path if known, task/agent id if available, and final status + +`dispatch-prompts/<direction_id>.md` stores the exact prompt sent to each worker. Prompts are not in-memory only. + +`worker-results.jsonl` stores one JSON object per worker result or coordinator-generated failure row. + +If dispatch fails entirely, write `.failed` and update `manifest.json` with the failure reason. + +## 7. Worker Runtime and Isolation + +### 7.1 Worker Constraints + +Each worker must: + +- stay inside its assigned worktree +- not invoke Skills or slash commands +- not spawn nested Agent/Task workers +- not push branches +- not access sibling worktrees +- not perform destructive cleanup outside its worktree +- use only the approved Codex consultation path +- emit the JSON result sentinel as its final action + +These are still prompt-level constraints unless the runtime exposes tool-level restrictions. The spec must not claim a strict concurrency proof unless those restrictions are verified. + +### 7.2 Worktree Root Safety + +Before calling Humanize scripts, the worker must: + +```bash +export CLAUDE_PROJECT_DIR="$PWD" +``` + +It must assert that `scripts/ask-codex.sh` resolves the same project root as the assigned worktree. If the assertion fails, the worker stops and emits a failure result. + +This prevents `ask-codex.sh` from resolving the coordinator checkout through inherited `CLAUDE_PROJECT_DIR`. + +### 7.3 Codex Calls + +Worker Codex calls use: + +```bash +bash "${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" \ + --codex-timeout 1200 \ + --codex-model "<model>:xhigh" \ + "<prompt>" +``` + +`ask-codex.sh` must disable nested Codex hooks when supported, using the same `--disable codex_hooks` probing pattern already used by the RLCR stop hook and `bitlesson-select.sh`. + +The spec does not use `--effort max`; that flag is not supported by the current script. + +### 7.4 Worker Loop + +The worker loop is a bounded prototype review loop: + +1. Inspect relevant repo context. +2. Write a short plan sketch under the worker summary data. +3. Implement scoped prototype changes. +4. Run targeted tests for touched areas. +5. Ask Codex for review. +6. Apply useful feedback. +7. Repeat until `max_worker_iterations`, Codex `LGTM`, or failure. +8. Commit local changes when appropriate. +9. Emit JSON result. + +This is not full RLCR. It does not replace `/humanize:start-rlcr-loop`. + +### 7.5 Branch and Commit Rules + +Branch names are deterministic: + +```text +explore/<RUN_ID>/<dir_slug> +``` + +The worker result records: + +- `branch_name` +- `worktree_path` +- `commit_sha` +- `commit_count` +- `dirty_state` +- `commit_status` + +Allowed `commit_status` values: + +- `committed` +- `none` +- `wip` +- `failed` + +Successful and partial workers should commit if they produced changes. Failed workers may leave WIP changes only if the result marks that state clearly. + +### 7.6 Timeouts + +Coordinator enforces the worker timeout. + +Codex calls use the Codex timeout. + +If a worker times out, the coordinator writes a timeout result row to `worker-results.jsonl` with: + +```json +{ + "task_status": "timeout", + "direction_id": "...", + "error": "worker exceeded timeout" +} +``` + +The report includes timeout cleanup guidance. + +### 7.7 BitLesson + +If worker worktree paths are known before substantive work begins, the coordinator copies or initializes `.humanize/bitlesson.md` in each worker worktree. + +If paths are not known until completion, BitLesson is explicitly unavailable for MVP. Worker results set `bitlesson_action: "none"` and the report states that this run has reduced parity with standard RLCR. + +## 8. Worker Result Contract + +Workers print one JSON object between sentinel markers: + +```text +=== EXPLORE_RESULT_JSON_BEGIN === +{ + "schema_version": 1, + "run_id": "2026-04-29_15-30-12", + "direction_id": "dir-00-command-history", + "dir_slug": "command-history", + "task_status": "success", + "codex_final_verdict": "lgtm", + "rounds_used": 2, + "tests_passed": 3, + "tests_failed": 0, + "worktree_path": "/abs/path", + "branch_name": "explore/2026-04-29_15-30-12/command-history", + "commit_sha": "abc123", + "commit_count": 1, + "dirty_state": "clean", + "commit_status": "committed", + "summary_markdown": "Full markdown summary.", + "what_worked": ["item"], + "what_didnt": ["item"], + "bitlesson_action": "none", + "error": null +} +=== EXPLORE_RESULT_JSON_END === +``` + +Enums: + +- `task_status`: `success`, `partial`, `failed`, `timeout`, `no_summary` +- `codex_final_verdict`: `lgtm`, `partial`, `failed`, `unavailable` +- `dirty_state`: `clean`, `dirty`, `unknown` +- `bitlesson_action`: `none`, `add`, `update` + +The coordinator parses JSON, not ad hoc `KEY: VALUE` lines. Invalid JSON creates a `no_summary` row. + +## 9. Ranking and Report + +`worker-results.jsonl` is the machine-readable source of truth. `report.md` is the human synthesis. + +The report has two rankings: + +1. **Best product direction** + - user value + - strategic fit + - original direction quality + - objective evidence + - known risks + +2. **Most implementation-ready prototype** + - `task_status` + - `codex_final_verdict` + - tests passed/failed + - commit status + - dirty state + - implementation fit + - worker iteration count + +The design no longer claims deterministic ranking unless a future deterministic `ranking.json` artifact is added. For MVP, ranking is qualitative LLM synthesis over JSON inputs. + +The synthesis is performed by the coordinator's current reasoning context unless `ask-codex.sh` is explicitly allowed and called with the valid `--codex-model <model>:xhigh` contract. + +## 10. Adoption and Cleanup + +The report includes exact adoption paths: + +### Continue Winner Branch + +Includes: + +- worktree path +- branch name +- commit SHA +- suggested next command, for example `/humanize:start-rlcr-loop --skip-impl` when appropriate + +### Restart From Plan + +Use the winning worker's plan sketch and `summary_markdown` as input to normal `/humanize:gen-plan`, then run standard RLCR. + +### Cherry-Pick Prototype + +Includes exact commit SHA and warns that the user should verify the base branch first. + +### Discard Prototypes + +Includes cleanup guidance for losing worktrees and branches. + +Future companion commands are designed but may be deferred: + +```text +/humanize:explore-status <run-id> +/humanize:explore-cleanup <run-id> [--failed-only|--losers|--all] +``` + +If companion commands are deferred, the MVP report still prints shell cleanup commands and all ownership data remains in `manifest.json`. + +## 11. Safety Model + +The safety model is bounded concurrency, not an unqualified `2N` proof: + +- selected directions are bounded by 10 +- active workers are bounded by `--concurrency` +- active Codex calls are bounded by active workers +- nested Skill, Agent, and Task calls inside workers are forbidden +- worker project root is asserted before Codex calls +- `ask-codex.sh` disables nested Codex hooks when supported +- dispatch requires explicit user confirmation +- all worker branches/worktrees are recorded in the manifest + +If the runtime cannot enforce tool-level worker restrictions, the spec must describe nested fanout prevention as prompt-enforced plus verified by smoke testing, not mathematically guaranteed. + +## 12. Error Handling + +Validation failures occur before `RUN_DIR` creation. + +If `RUN_DIR` already exists, validation fails unless a future cleanup flag is implemented. + +If a selected direction is invalid, validation fails. + +If dispatch fails entirely: + +- write `.failed` +- update `manifest.json` +- do not write a success report + +If a worker times out, fails, or emits invalid JSON: + +- append a coordinator-generated JSON row to `worker-results.jsonl` +- continue collecting other workers +- include the failed worker in `report.md` + +If all workers fail: + +- write a minimal `report.md` +- include the failure table and cleanup/status guidance + +## 13. Testing + +CI tests are deterministic shell tests. + +Add: + +- `tests/test-validate-gen-idea-io.sh` + - companion path derivation + - `.md` requirement + - companion collision rejection + - `DIRECTIONS_JSON_FILE` stdout + +- `tests/test-directions-json-schema.sh` + - valid fixture + - missing keys + - more than 10 directions + - duplicate `direction_id` + - duplicate `dir_slug` + - missing primary + - multiple primary entries + - bad confidence enum + - `n_returned` mismatch + +- `tests/test-validate-explore-idea-io.sh` + - direct JSON input + - draft-to-json resolution + - missing companion JSON + - direction cap + - `--directions` parsing + - concurrency range + - worker iteration range + - timeout range + - run dir collision + - template presence + +- `tests/test-worker-result-contract.sh` + - valid JSON sentinel + - invalid JSON sentinel + - timeout row + - no-summary row + - enum validation + +- `tests/test-explore-manifest.sh` + - required manifest fields + - base branch and base commit fields + - selected direction IDs + - prompt path and prompt hash fields + +- `tests/test-explore-command-structure.sh` + - frontmatter tools + - blocking confirmation text + - worker hard constraints + - schema/template sync references + +Every new suite must be added to `tests/run-all-tests.sh`. + +No CI test invokes live slash commands, real Agent/Task workers, or real Codex. + +## 14. Manual Verification Before Upstream PR + +Before opening the upstream PR: + +1. Push the feature branch to the Horacehxw fork. +2. Run the full shell test suite. +3. Run the runtime spike: + - prove worker worktree isolation + - prove background/wait or equivalent parallel collection + - prove worktree path and branch name discovery + - prove worker permissions for edit/test/git/Codex + - prove `CLAUDE_PROJECT_DIR="$PWD"` makes Codex run in the worker worktree + - prove Codex hook disabling is active when supported +4. Run one tiny manual smoke: + - two directions + - one worker iteration + - inspect `manifest.json` + - inspect `worker-results.jsonl` + - inspect `report.md` + - verify local branches and commits + - verify no push occurred + +If any runtime spike check fails, revise PR-B before opening the upstream PR. + +## 15. Documentation Updates + +Update: + +- `README.md` quick start with optional `explore-idea`. +- `docs/usage.md` command reference. +- `.claude/CLAUDE.md` sync rules: + - `directions.json` schema is canonical in the schema validator and documented in both command docs. + - worker constraints in `commands/explore-idea.md` and `prompt-template/explore/worker-prompt.md` must stay in sync. +- `.gitignore` if runtime spike confirms Claude-managed worktrees appear under an unignored path such as `.claude/worktrees/`. + +## 16. Open Implementation Risks + +These are blocking before PR-B is considered ready: + +1. Confirm actual current Claude Code `Agent` or `Task` tool schema. +2. Confirm worktree isolation and branch naming behavior. +3. Confirm whether worktree paths are available before workers begin. +4. Confirm single command can wait and collect all worker results. +5. Confirm background workers can use required tools without hidden permission prompts. +6. Confirm `ask-codex.sh` hook disabling does not break existing tests. +7. Confirm concurrent Codex calls do not hit local locks or unacceptable rate limits. + +If any item fails, update this design before implementation planning continues. + +--- Original Design Draft End --- diff --git a/docs/superpowers/specs/2026-04-28-explore-idea-design.md b/docs/superpowers/specs/2026-04-28-explore-idea-design.md new file mode 100644 index 00000000..ce425d09 --- /dev/null +++ b/docs/superpowers/specs/2026-04-28-explore-idea-design.md @@ -0,0 +1,377 @@ +# Design: `/humanize:explore-idea` — Parallel Per-Direction RLCR Exploration + +> Status: Approved (brainstorming gate). Awaiting writing-plans handoff. +> Date: 2026-04-28 +> Authors: Claude Opus 4.7 (1M context) with reviewer input from Claude Opus 4.7 (general-purpose) and Codex GPT-5.4 xhigh. +> Target branches: `dev` (PR-A first, then PR-B). + +--- + +## 1. Motivation + +The existing `/humanize:gen-idea` command produces a draft enumerating N orthogonal directions for an idea, with one direction synthesized as the primary and the rest as compressed alternatives. The user must then manually pick one direction, run `/humanize:gen-plan`, and run `/humanize:start-rlcr-loop` — exploring a single direction at a time. + +This design adds parallel exploration: take the N directions and run a full RLCR-equivalent loop on each one independently, in isolated git worktrees, then synthesize a comparison report. Rooted in the W2S Automated Researcher principle (parallel autonomous researchers in sandboxed environments) and the user's `gen-idea-parallel-exploration-methodology-v2.md` doctrine (parallel at the worktree-session boundary, sequential within each worker, never invoke Skills inside subagents). + +## 2. Goals and non-goals + +### Goals + +- Enable single-command "explore each direction in parallel" workflow after `gen-idea`. +- Stay strictly within the v2 doctrine's `2N` peak concurrency bound — no recursive Skill fanout. +- Reuse Claude Code primitives (`Task` tool with `isolation: "worktree"`, `run_in_background: true`) and existing humanize primitives (`scripts/ask-codex.sh`, `.humanize/` layout, sentinel-block stdout contract) rather than inventing parallel mechanisms. +- Match `gen-idea` and `gen-plan` structural conventions so the new command feels native to the plugin. +- Produce both a deterministic ranking and an LLM-synthesized comparison report; keep the two layers separable. + +### Non-goals + +- Running multiple independent samples of the same direction (W2S sample-fanout). Only direction-fanout is in scope. +- Auto-pushing branches or auto-opening PRs (intentionally local-only commits). +- Cross-worker information sharing during the run. +- Replacing or wrapping `/humanize:start-rlcr-loop` for solo single-direction use. +- A `gen-idea --explore` chainer flag (deferred indefinitely; Skill-from-Skill chaining at the orchestrator level is not yet proven safe). +- Modifying `setup-rlcr-loop.sh` to be worktree-aware (deferred; workers run an inline RLCR-equivalent loop instead). + +## 3. Contribution structure + +This contribution lands as **two coordinated PRs**, both targeting `dev`: + +- **PR-A**: amend `gen-idea` (commands/gen-idea.md and validate-gen-idea-io.sh) to additionally emit a `directions.json` companion artifact carrying the lossless per-direction proposals. Bumps version triplet to `1.16.1`. +- **PR-B**: add the `/humanize:explore-idea` command and its supporting templates and scripts. Depends on PR-A merged. Bumps version triplet to `1.17.0`. + +The split is forced by a finding from the design review: the existing `gen-idea` template (`prompt-template/idea/gen-idea-template.md` lines 7–30) compresses non-primary directions to `Gist / Objective Evidence / Why not primary`, discarding each alternative's full `APPROACH_SUMMARY` from Phase 3. Without an upstream lossless artifact, `explore-idea` would either operate on degraded inputs for non-primary directions or be forced to re-run the explorer subagents to recover them. + +## 4. PR-A: gen-idea amendment + +### 4.1 Phase 4 add-on (Step 4.6) + +After `gen-idea` Phase 4 finishes writing the draft `.md` file, add a new step: + +> **Step 4.6: Write the directions companion artifact.** +> Write a `directions.json` file alongside the draft, capturing every Phase-3 surviving proposal verbatim. The path is `<OUTPUT_FILE>` with `.md` replaced by `.directions.json`. Single write, no progressive edits, no tempfile. + +### 4.2 Schema for `directions.json` + +```json +{ + "schema_version": 1, + "title": "<inferred title from Step 4.2>", + "original_idea": "<IDEA_BODY verbatim>", + "synthesis_notes": "<lead's synthesis paragraph>", + "metadata": { + "n_requested": 6, + "n_returned": 6, + "timestamp": "2026-04-28_17-30-12", + "draft_path": ".humanize/ideas/undo-redo-2026-04-28-17-30-12.md" + }, + "directions": [ + { + "index": 0, + "is_primary": true, + "name": "<short label>", + "rationale": "<single-sentence rationale from Phase 2>", + "approach_summary": "<full APPROACH_SUMMARY from Phase 3>", + "objective_evidence": ["<bullet>", "<bullet>"], + "known_risks": ["<bullet>", "<bullet>"], + "confidence": "high|medium|low" + }, + { + "index": 1, + "is_primary": false, + "name": "...", + "rationale": "...", + "approach_summary": "...", + "objective_evidence": ["..."], + "known_risks": ["..."], + "confidence": "..." + } + ] +} +``` + +- `directions` is ordered: primary first (index 0), then alternatives in the order they appear in the draft (Alt-1, Alt-2, ...). +- `objective_evidence` may contain the literal sentinel `exploratory, no concrete precedent` as a single-element list, mirroring `gen-idea`'s sentinel handling. +- All free-form text fields are English-only and contain no emoji or CJK characters (project rule). + +### 4.3 Validation script change + +`scripts/validate-gen-idea-io.sh` emits one additional KEY: VALUE line in its success stdout: + +``` +DIRECTIONS_JSON_FILE: <output-file with .md replaced by .directions.json> +``` + +Derivation is purely path-arithmetic; no separate validation pass needed. + +### 4.4 Sync rule (CLAUDE.md addition) + +Add to `.claude/CLAUDE.md`: + +> The `directions.json` schema documented in `commands/gen-idea.md` Step 4.6 and consumed in `commands/explore-idea.md` Phase 1 must stay in sync. Schema changes require updating both files in the same commit. + +### 4.5 Version bump (PR-A) + +`.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `README.md` "Current Version" line: `1.16.0` → `1.16.1`. Patch bump justified because the change is purely additive (new artifact, no behavior change to existing draft contract). + +## 5. PR-B: `/humanize:explore-idea` command + +### 5.1 Frontmatter + +```yaml +--- +description: "Explore N directions from a gen-idea draft in parallel via per-direction RLCR" +argument-hint: "<directions-json-path> [--max-rounds R]" +allowed-tools: + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-explore-idea-io.sh:*)" + - "Read" + - "Write" + - "Task" +--- +``` + +No `git`, no `mkdir`, no shell beyond the one whitelisted validation script. The Task tool's `isolation: "worktree"` handles all filesystem isolation; no pre-flight git operations are needed. Ranking is performed via inline LLM evaluation in Phase 7 (no script, no bash). + +### 5.2 Command surface + +``` +/humanize:explore-idea <directions-json-path> [--max-rounds R] +``` + +- `<directions-json-path>` (required): path to a `directions.json` produced by gen-idea (PR-A). +- `--max-rounds R` (optional, default `5`): per-worker iteration cap on the inline RLCR loop. Renamed from `--max` to avoid colliding with `start-rlcr-loop --max N` (default 42). + +There is no `--max M` (cap on directions explored). The command always explores every direction present in the JSON. Users who want fewer directions should regenerate the draft with a smaller `gen-idea --n` or hand-edit the JSON to drop entries. + +### 5.3 Hard Constraint header + +> **Hard Constraint: Coordinator-Side Read-Only.** This command MUST NOT modify any tracked file outside `.humanize/explore/<RUN_ID>/`. The coordinator session does not commit, push, branch, or edit code in the main checkout. All code changes happen inside isolated worker worktrees, which are fully managed by the Task tool's `isolation: "worktree"` mechanism. Each worker's prompt enforces an analogous internal constraint (no Skill invocation, no nested Task spawn, no cross-worktree access, no push). Workers may commit locally to their auto-created branch. + +### 5.4 Sequential Execution Constraint header + +> **Sequential Execution Constraint:** Phases 1–7 MUST execute strictly in order. Phase 4 (parallel worker dispatch) is the only intra-phase parallelism; workers themselves run independently within Phase 4 but Phase 5 (collection) does not begin until all workers have returned via background notification. + +### 5.5 Phases (overview; full body in `commands/explore-idea.md`) + +| Phase | Purpose | Notes | +|---|---|---| +| 1 | IO validation via `validate-explore-idea-io.sh` | Mirrors `validate-gen-idea-io.sh` exit-code table | +| 2 | Read `directions.json`; build in-memory direction list | Schema-validate; reject if 0 directions | +| 3 | Render N kickoff prompts in memory from `worker-prompt.md` template | Substitution only; no disk write | +| 4 | Single Task message dispatching N workers (`isolation: "worktree"`, `run_in_background: true`) | The only fanout step | +| 5 | Collect each worker's stdout sentinel block as background notifications arrive | No polling — event-driven | +| 6 | Build `workers.tsv` from collected sentinel blocks (status table only — no scoring) | Plain bookkeeping; no ranking yet | +| 7 | Render `synthesis-prompt.md` with all sentinel blocks + directions.json; coordinator's own LLM call performs the qualitative ranking and writes `report.md` | LLM-side judgment, not script. Run at maximum reasoning effort (Claude `/think` deep mode or codex `--effort xhigh` if delegated). No Skill, no Agent, no Task. | + +### 5.6 Version bump (PR-B) + +`.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, `README.md` "Current Version" line: `1.16.1` → `1.17.0`. Minor bump justified because a new command is added to the public surface. + +## 6. Worker contract + +Each worker is a `general-purpose` subagent dispatched by Task with `isolation: "worktree"` and `run_in_background: true`. It runs in an automatically-created worktree on a fresh branch. The kickoff prompt (rendered from `prompt-template/explore/worker-prompt.md`) contains the following hard constraints and workflow: + +### 6.1 Hard constraints (worker prompt enforces verbatim) + +- Do not invoke any Skill (no slash commands such as `/humanize:start-rlcr-loop`, `/humanize:gen-plan`, `/superpowers:brainstorming`, etc.). +- Do not spawn Task subagents (no nested fanout). +- For Codex consultation, use only `bash ${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh`. +- All work stays within the assigned worktree. No cross-worktree access. +- Do not push branches. +- Output ends with the sentinel block defined in 6.3. + +### 6.2 Workflow + +1. **Brainstorm**: read `README.md`, `CLAUDE.md`, and code files relevant to this direction. Inline reasoning only; do not spawn research subagents. +2. **Plan**: write `.humanize/explore/<DIR_SLUG>/plan.md` (inside worktree) capturing the actionable steps for this direction. +3. **RLCR loop**, up to `<MAX_ROUNDS>` iterations: + 1. Implement code changes (Edit/Write/Bash, scoped to this direction). + 2. Run targeted tests for the touched files only (do not run full suite). + 3. Invoke `bash ${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh "Review round <k>: <diff or summary>"`, blocking until completion. + 4. Apply the feedback. If Codex returns `LGTM` or the budget is exhausted, exit the loop. +4. **BitLesson**: read `.humanize/bitlesson.md` if present in the worktree. Note: because `.humanize/` is git-ignored in the humanize repo, a freshly created worktree starts with an empty `.humanize/` directory; the file is NOT inherited from the parent checkout. The worker prompt instructs: "If `.humanize/bitlesson.md` is missing in this worktree, emit `bitlesson_action: none` and proceed without lesson lookup." A future upgrade can have the coordinator copy `.humanize/bitlesson.md` into each worktree before dispatch (out of scope for MVP). Emit `bitlesson_action: none|add|update` in the summary. +5. **Commit**: `git add` explicit paths; `git commit` with a conventional commit message; do not push. +6. **Summary file**: write `.humanize/explore/<DIR_SLUG>/summary.md` (inside worktree) with the structured fields below. +7. **Sentinel block**: print the sentinel block (6.3) to stdout as the final action. + +### 6.3 Stdout sentinel block + +``` +=== EXPLORE_SUMMARY_BEGIN === +dir_slug: <slug> +rounds_used: <int> +tests_passed: <int> +tests_failed: <int> +codex_final_verdict: lgtm|partial|failed +commit_count: <int> +worktree_path: <absolute path returned by Task isolation> +branch_name: <branch> +approach_recap: <one paragraph; no embedded newlines, escape with \n> +what_worked: <bullets joined by '; '> +what_didnt: <bullets joined by '; '> +bitlesson_action: none|add|update +=== EXPLORE_SUMMARY_END === +``` + +The coordinator parses this block from each worker's stdout in Phase 5. KEY: VALUE format is line-oriented; values containing newlines must be escaped as `\n`. + +### 6.4 Failure handling inside a worker + +- If `ask-codex.sh` fails three consecutive rounds, set `codex_final_verdict: failed` and exit gracefully (still print sentinel block). +- If targeted tests are unavailable for the direction (no tests written), set `tests_passed: 0`, `tests_failed: 0`, and note in `what_didnt`. +- If implementation cannot be completed within `<MAX_ROUNDS>`, exit with whatever state exists, set `codex_final_verdict: partial`, and document in `what_didnt`. + +## 7. Aggregation + +### 7.1 Qualitative LLM ranking (no script) + +Aggregation is performed by a single inline LLM call in the coordinator's own context — there is no separate ranking script and no numeric formula. The synthesis prompt embeds an ordered list of qualitative criteria; the LLM evaluates each worker's sentinel block against those criteria in lexicographic order (first criterion fully decides; ties broken by the next; etc.), exactly mirroring the gen-idea Phase 4 lead-direction selection convention. + +**Lexicographic priority (highest to lowest):** + +1. **Outcome quality** — `codex_final_verdict: lgtm` ranks above `partial`, which ranks above `failed`. Workers with `task_status: timeout` or `no_summary` rank below all of these. +2. **Test signal** — among directions tied on outcome: `tests_passed > 0` and `tests_failed == 0` ranks above any worker with `tests_failed > 0`, which ranks above `tests_passed == 0`. The LLM may also weigh test coverage qualitatively from the summary text. +3. **Implementation surface fit** — qualitative judgement: how cleanly the worker's `approach_recap` extends existing repo patterns vs. introducing new abstractions. Mirrors gen-idea Phase 4.1 step 2. +4. **Effort economy** — fewer `rounds_used` (faster convergence) is preferred among ties. +5. **Original confidence** — if all above tie, prefer the direction whose `confidence` field in `directions.json` was higher (`high > medium > low`). + +Workers with `task_status: failed`, `timeout`, or `no_summary` are reported but ranked at the bottom; they are flagged in `workers.tsv` for operator follow-up but do not block the synthesis report. + +**No composite score.** No script. No formula. The synthesis call carries the full directions.json plus the per-worker sentinel blocks, applies the priority list above qualitatively, and emits the ranked comparison directly into `report.md`. The output of the call is the authoritative ranking; there is no separate `rankings.tsv` file. + +The synthesis call is performed at maximum reasoning effort: when invoked via `bash scripts/ask-codex.sh` (the canonical Codex path used elsewhere in humanize), pass `--effort max` (or `xhigh` if codex labels it that way) so the qualitative judgment runs at full deliberation budget. This matches the user instruction to use `/effort max` for this aggregation step. + +### 7.2 Synthesis output (Phase 7) + +The synthesis prompt template substitutes: + +- `<DIRECTIONS_JSON>` — full directions.json content (so the model sees lossless per-direction context, including `known_risks` and `confidence`) +- `<SENTINEL_BLOCKS>` — concatenation of all worker sentinel blocks from Phase 5 +- `<WORKER_SUMMARIES>` — concatenation of each worker's `summary.md` text (read from each worker's worktree path) +- `<RANKING_CRITERIA>` — the lexicographic list from §7.1 verbatim +- `<ORIGINAL_IDEA>` — copied from `directions.json.original_idea` + +The rendered prompt is consumed by an inline LLM call in the coordinator's own context (no Skill, no Agent, no Task). The synthesis call runs with maximum reasoning effort. The output written to `<RUN_DIR>/report.md` must contain: + +- Executive summary (one paragraph) +- **Ranking** — ordered list from best to worst, each direction annotated with which criterion was decisive (e.g., "Rank 1: <slug> — won on criterion 1 (only `lgtm` outcome)") +- Per-direction breakdown (one section per direction, citing concrete signals from its sentinel block + summary) +- Tradeoffs surfaced +- Recommended next steps (e.g., "run /humanize:gen-plan against the winner's plan.md and `git switch <branch>` to its branch") + +## 8. State layout + +### 8.1 Coordinator-side (main repo working dir) + +``` +.humanize/explore/<RUN_ID>/ + workers.tsv # one row per worker: dir_slug, worktree_path, branch_name, task_status, codex_final_verdict, rounds_used, tests_passed, tests_failed, commit_count + report.md # LLM-synthesized comparison + qualitative ranking (the authoritative ranking) + .failed # only present if Phase 4 dispatch failed entirely +``` + +`<RUN_ID>` uses RLCR's timestamp format `%Y-%m-%d_%H-%M-%S` for consistency with `.humanize/rlcr/<ts>/`. + +### 8.2 Worker-side (each auto-created worktree) + +``` +<worktree-path>/ + .humanize/explore/<DIR_SLUG>/ + plan.md + summary.md + <code changes> # whatever the worker modified, committed locally on the worker's branch +``` + +The worktree path is returned by the Task tool's isolation result and recorded in the coordinator's `workers.tsv`. The user can inspect any worker after the run by `cd <worktree-path> && git log`. + +## 9. Concurrency model and fork-bomb avoidance + +### 9.1 Why this is safe + +The user's `gen-idea-parallel-exploration-methodology-v2.md` documents a real fork-bomb incident in which sub-agent prompts contained instructions to invoke Skills (`/superpowers:brainstorming`, `/humanize:start-rlcr-loop`); each Skill internally spawned its own sub-agents, producing 2-layer recursive fanout (6 workers × 7 spawned each = 42+ concurrent agents → OOM, locked worktrees). + +This design avoids that pattern by enforcing two rules: + +1. **No Skill invocation inside a worker.** Worker prompts explicitly forbid calling slash commands. The only sub-process a worker invokes is `bash scripts/ask-codex.sh`, which is a shell script, not a Skill. +2. **No nested Task spawn inside a worker.** Workers may not call the `Task` tool. The only allowed parallelism is the coordinator's single Phase-4 dispatch. + +Peak concurrency is therefore bounded by `2N`: N worker subagents plus up to N concurrent `ask-codex.sh` shell processes. The `2N` bound matches the user's v2 doctrine. + +### 9.2 Why we don't directly invoke `start-rlcr-loop` per worker + +Calling `/humanize:start-rlcr-loop` from inside a worker would re-introduce Skill-in-subagent nesting. The Skill internally uses `Task` for plan compliance checks, plan-understanding quizzes, and Codex review — each spawning further sub-agents. The fork-bomb concern resurfaces. + +The inline RLCR-equivalent loop is the pragmatic fix: workers replicate the *behavior* (implement → review → apply) without invoking the Skill *abstraction*. + +### 9.3 Future work: direct Skill invocation + +When Claude Code supports nested top-level Skill invocation safely (for example, if Task workers can be elevated to true top-level sessions, or if `/batch`-style dispatch gains a Skill-safe flag, or if workers can spawn external `claude --print` subprocesses cleanly), the inline RLCR-equivalent loop in worker prompts can be replaced with a real `/humanize:start-rlcr-loop` invocation. The exact mechanism depends on what Claude Code primitives are available at that point; this is recorded as a forward-looking option, not a concrete plan. + +## 10. Error handling + +| Failure | Where | Coordinator response | +|---|---|---| +| `directions.json` missing or unreadable | Phase 1 | exit 2; clear message; no `RUN_DIR` created | +| Schema invalid | Phase 1 | exit 3; cite first invalid key | +| `RUN_DIR` already exists | Phase 1 | exit 4; suggest waiting or `--force-cleanup` (future) | +| Template files missing | Phase 1 | exit 7; "plugin install corrupt" | +| `directions.json` has zero directions | Phase 2 | hard-fail; nothing to explore | +| `directions.json` has one direction | Phase 2 | proceed; single-worker run is valid | +| Task tool rejects `isolation: "worktree"` or `run_in_background: true` | Phase 4 | hard-fail with explicit message: "explore-idea requires Claude Code Task tool with `isolation` and `run_in_background` support. Verify your runtime version." | +| Worker times out | Phase 5 | record `task_status: timeout`; continue collecting other workers | +| Worker stdout has no `EXPLORE_SUMMARY` block | Phase 5 | record `task_status: no_summary`; ranker treats numeric fields as worst-case | +| Worker reports `codex_final_verdict: failed` | Phase 5 | accepted; ranked low | +| `ask-codex.sh` unavailable inside worker | Worker | Worker emits `codex_final_verdict: failed` after 3 consecutive failures, exits gracefully | +| `.humanize/bitlesson.md` missing in worktree | Worker | Worker emits `bitlesson_action: none`; notes absence in summary | +| All workers fail | Phase 7 | skip synthesis; write minimal `report.md` citing failure mode | + +**Atomicity invariant.** If Phase 1 validation fails, no `RUN_DIR` is created. If Phase 4 dispatch fails entirely, an empty `RUN_DIR/.failed` marker is written so the user knows what timestamp to clean up. + +## 11. Testing + +Tests live in `tests/`, mirroring the gen-idea test structure. CI runs them on Linux with bash 4+. + +- `tests/test-validate-explore-idea-io.sh` — exit-code matrix. Cases: happy path, missing input, input not found, input not `.json`, schema invalid (missing `directions`, missing `is_primary`, wrong types), output dir collision, permission denied, missing template. +- (No `tests/test-explore-rank.sh` — there is no deterministic ranker script in this design. Ranking is an LLM judgement step; correctness is exercised via the smoke recipe.) +- `tests/test-worker-prompt-render.sh` — placeholder coverage. Render template with sample direction values; assert no `<PLACEHOLDER>` literals remain; assert hard-constraint block is present verbatim. +- `tests/test-synthesis-prompt-render.sh` — same shape as worker prompt test. +- `tests/test-gen-idea-directions-json.sh` (PR-A) — runs gen-idea on a fixture; asserts `.directions.json` exists with correct schema; validates `schema_version`. + +**No live end-to-end test in CI** (would spin up N real Task subagents and Codex calls). A manual smoke recipe is documented in `commands/explore-idea.md`: + +1. Tiny test repo plus tiny idea. +2. `/humanize:gen-idea "..." --n 2` — verify `.directions.json` exists. +3. `/humanize:explore-idea <json> --max-rounds 2` — verify `report.md`, two worker branches exist locally, no push attempted. + +## 12. Runtime requirements + +- Claude Code Task tool with `isolation: "worktree"` and `run_in_background: true` support. To be verified in the implementation plan's first task before any other work begins. +- `${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh` available (existing humanize dependency). +- `git` ≥ 2.5 (worktree support); already a humanize prerequisite. + +## 13. Project-rule compliance + +- **English-only, no emoji or CJK**: enforced in worker prompt template (constraint block) and synthesis prompt template; coordinator's `report.md` is generated by inline LLM call with explicit English-only instruction; `summary.md` field-formatting is structured, no free-form prose in the sentinel block. +- **Version-bump triplet**: PR-A bumps to `1.16.1` across `plugin.json`, `marketplace.json`, `README.md`. PR-B bumps to `1.17.0` across the same triplet. Authoring against `dev` (not main) — verified the dev triplet starting state before each PR. +- **Plan-template-sync analog**: two new sync rules added to `.claude/CLAUDE.md`. (1) `directions.json` schema in `commands/gen-idea.md` ↔ `commands/explore-idea.md` Phase 1. (2) Worker contract sections in `commands/explore-idea.md` ↔ `prompt-template/explore/worker-prompt.md`. + +## 14. Future work (called out for posterity) + +- `--force-cleanup` flag for stale `.humanize/explore/<ts>/` directories. +- `/humanize:explore-rerun <run-id> --direction <slug>` to re-run a single failed direction. +- `gen-idea --explore` chainer (deferred until Skill-from-Skill chaining at the orchestrator level is proven safe under humanize's Skill-recursion semantics). +- Direct `/humanize:start-rlcr-loop` invocation per worker (deferred until Claude Code supports nested top-level Skill invocation safely; would replace the inline RLCR-equivalent loop with a single Skill call). +- W2S-style sample-fanout (`--samples M` flag adding N×M total worker runs for the same direction at different temperatures). Out of scope for the direction-fanout MVP. +- Coordinator-side hook (`SessionEnd` or similar) that prints the latest `RUN_DIR/report.md` location whenever an explore run completes, even after coordinator session restart. +- `gen-idea` template change to embed a hash or signature in `directions.json` so `explore-idea` can detect mismatched draft/JSON pairs. + +## 15. Open risks needing implementation-time verification + +These items are deliberately not resolved in the design and must be verified as part of the implementation plan's first task: + +1. **Task tool surface**. Confirm that `subagent_type: "general-purpose"` accepts both `isolation: "worktree"` and `run_in_background: true` simultaneously, and that the Task return payload includes the worktree path and branch name. Reviewer Codex flagged this as having no in-repo precedent. +2. **Worktree placement**. Verify where the Task tool places its auto-created worktrees. If they appear under `.worktrees/` in repo root, add `.worktrees/` to `.gitignore` in PR-B (or document why this is acceptable). If they appear under `.git/worktrees/` or a system temp area, no .gitignore change is needed. +3. **BitLesson inheritance**. Verified at design time: `.humanize/` is git-ignored, so a fresh worktree starts with an empty `.humanize/` directory and the bitlesson file is NOT visible. MVP behavior: worker emits `bitlesson_action: none` and proceeds. Implementation should consider whether to add a coordinator-side step that copies `.humanize/bitlesson.md` into each worktree path returned by the Task tool before workers begin substantive work. Whether this is feasible depends on whether the coordinator has access to the worktree paths at dispatch time or only at completion time (verify this in conjunction with risk #1). +4. **Background notification semantics**. Verify how Phase 5 receives notifications. Per the Task tool docs, "you will be automatically notified when it completes — do NOT sleep, poll, or proactively check on its progress." Phase 5 must handle the asynchronous arrival of all N notifications, not assume a synchronous wait. +5. **N concurrent `ask-codex.sh` calls**. Verify that running N `ask-codex.sh` invocations in parallel against the Codex CLI is supported (rate-limit or session-locking concerns). If not, the worker prompt may need to add jitter or a serialization mechanism. + +If any of these checks fail, the affected portion of the design must be revised before implementation continues. diff --git a/docs/superpowers/specs/2026-04-29-explore-idea-hardened-prototype-design.md b/docs/superpowers/specs/2026-04-29-explore-idea-hardened-prototype-design.md new file mode 100644 index 00000000..dbfbdabd --- /dev/null +++ b/docs/superpowers/specs/2026-04-29-explore-idea-hardened-prototype-design.md @@ -0,0 +1,622 @@ +# Design: `/humanize:explore-idea` Hardened Prototype MVP + +> Status: Approved brainstorming revision. Awaiting user review before implementation planning. +> Date: 2026-04-29 +> Supersedes: `docs/superpowers/specs/2026-04-28-explore-idea-design.md` +> Target flow: implement on a Horacehxw fork branch, verify there, then open one combined upstream PR. + +--- + +## 1. Motivation + +The first `/humanize:explore-idea` design proposed parallel per-direction implementation attempts, but review found several blocking issues: unbounded fanout, prompt-only safety guarantees, fragile line-oriented contracts, missing manifest state, invalid `ask-codex.sh` flags, unclear worktree isolation, and ambiguous adoption/cleanup. + +This revision keeps the central value proposition: compare real local prototype branches, not just plans. Workers may implement, test, consult Codex, and commit locally by default. That behavior is now gated by explicit user confirmation and backed by bounded concurrency, durable run state, JSON contracts, deterministic branch naming, worktree-root assertions, and cleanup/adoption instructions. + +## 2. Goals and Non-Goals + +### Goals + +- Generate a lossless `directions.json` companion artifact from `/humanize:gen-idea`. +- Explore selected directions as bounded parallel prototype attempts. +- Create local worker worktrees, branches, and commits by default after a blocking user confirmation. +- Keep active work bounded: selected directions `<= 10`, active workers `<= --concurrency`, active Codex calls `<= active workers`. +- Persist enough state to understand, inspect, adopt, or clean up every worker result. +- Use JSON contracts for direction schema and worker results. +- Produce a human report with separate product-direction and implementation-readiness rankings. +- Verify all deterministic behavior in shell CI before any upstream PR. + +### Non-Goals + +- No auto-push from workers. +- No auto-merge or upstream PR creation from `/humanize:explore-idea`. +- No nested Skill, Agent, or Task fanout inside workers. +- No claim that the worker loop is full RLCR. It is a bounded prototype review loop. +- No CI test that runs real Claude slash commands, Agent/Task workers, or live Codex calls. +- No direct upstream PR until the fork branch has passed deterministic tests and a manual runtime smoke. + +## 3. Contribution Flow + +Build the change as one feature branch in the Horacehxw fork, but keep the work internally staged as two layers: + +1. **PR-A layer:** amend `gen-idea` to emit and validate `directions.json`. +2. **PR-B layer:** add `explore-idea` and its validators, templates, worker result handling, report synthesis, and documentation. + +After local implementation: + +1. Push the branch to the Horacehxw fork. +2. Run deterministic shell tests. +3. Run the blocking runtime spike for Agent/Task worktree behavior. +4. Run one tiny manual smoke with two directions and one worker iteration. +5. Open one combined upstream PR after verification. + +Versioning is a single public bump from `1.16.0` to `1.17.0` across `.claude-plugin/plugin.json`, `.claude-plugin/marketplace.json`, and the `README.md` Current Version line. + +## 4. PR-A Layer: Lossless `directions.json` + +### 4.1 `gen-idea` Output Contract + +After the draft markdown is written, `gen-idea` writes a companion file: + +```text +<draft>.directions.json +``` + +For ordinary `.md` output, the path is derived with: + +```bash +${OUTPUT_FILE%.md}.directions.json +``` + +MVP behavior: reject non-`.md` output for `gen-idea`, because companion derivation and draft ergonomics rely on the markdown suffix. + +`commands/gen-idea.md` must update its hard constraint from "single output draft file" to "draft file plus validated directions companion artifact." It must report both paths in its final output and mention the optional next step: + +```text +/humanize:explore-idea <directions-json-path> +``` + +### 4.2 Validation Changes + +`scripts/validate-gen-idea-io.sh` must: + +- Require a `.md` output path. +- Derive `DIRECTIONS_JSON_FILE`. +- Reject an existing draft file. +- Reject an existing companion JSON file. +- Ensure the output directory is writable for both files. +- Emit `DIRECTIONS_JSON_FILE: <absolute-path>` on success. + +If any validation fails, neither output file is written. + +### 4.3 Schema + +`directions.json` uses schema version 1: + +```json +{ + "schema_version": 1, + "title": "Command Pattern Undo Stack", + "original_idea": "verbatim user input", + "synthesis_notes": "lead synthesis paragraph", + "metadata": { + "n_requested": 6, + "n_returned": 6, + "timestamp": "20260429-153012", + "draft_path": ".humanize/ideas/undo-redo-20260429-153012.md" + }, + "directions": [ + { + "direction_id": "dir-00-command-history", + "dir_slug": "command-history", + "source_index": 0, + "display_order": 0, + "is_primary": true, + "name": "Command History", + "rationale": "Single-sentence rationale from Phase 2.", + "raw_phase3_response": "Exact raw proposal text from the explorer.", + "approach_summary": "Normalized approach summary.", + "objective_evidence": ["path/or/evidence"], + "known_risks": ["risk"], + "confidence": "high" + } + ] +} +``` + +Rules: + +- `direction_id` is immutable and unique. +- `dir_slug` is unique and branch/path safe: lowercase ASCII letters, digits, and hyphens. +- `source_index` preserves the original Phase-2 direction index. +- `display_order` is primary first, then alternatives. +- `raw_phase3_response` preserves the exact subagent response. +- Normalized fields are derived for easier downstream consumption. +- `original_idea` is exempt from generated-text English-only rules because it must preserve user input verbatim. +- Generated fields remain English-only and contain no emoji or CJK characters. + +### 4.4 Shared Schema Validator + +Add a deterministic schema validator, preferably `scripts/validate-directions-json.sh` using `jq`. It validates: + +- `schema_version == 1` +- required top-level keys +- `directions` length is `1..10` +- exactly one `is_primary: true` +- unique `direction_id` +- unique `dir_slug` +- unique `source_index` +- contiguous or unique `display_order` values +- `confidence` is `high`, `medium`, or `low` +- `metadata.n_returned == directions.length` +- required string/list fields have the expected types + +Both `gen-idea` and `explore-idea` rely on this validator as the canonical contract. + +## 5. PR-B Layer: Command UX + +### 5.1 Command Surface + +```text +/humanize:explore-idea <draft-or-directions-json> + [--directions ids] + [--concurrency P] + [--max-worker-iterations R] + [--worker-timeout-min M] + [--codex-timeout-min M] +``` + +Input: + +- Accept a `.directions.json` path directly. +- Accept a generated draft `.md` path and resolve the companion JSON with `.md -> .directions.json`. +- If the companion JSON is missing, fail clearly and tell the user to regenerate the idea draft. + +Direction selection: + +- Default: first `min(6, directions.length)` directions by `display_order`. +- `--directions` selects stable `direction_id` values or numeric `source_index` values. +- Validation rejects selecting more than 10 directions. +- Validation rejects duplicate or unknown direction selectors. + +Defaults and caps: + +- Default selected directions: up to 6. +- Hard max directions: 10. +- Default concurrency: 6. +- Hard max concurrency: 10. +- Effective concurrency: `min(requested_concurrency, selected_direction_count)`. +- Default worker iterations: 2. +- Hard max worker iterations: 3. +- Default worker timeout: 60 minutes. +- Hard max worker timeout: 60 minutes. +- Default Codex timeout: 20 minutes. +- Hard max Codex timeout: 20 minutes. + +### 5.2 Blocking Confirmation + +Commits are default behavior, but dispatch is blocked until explicit user confirmation. + +Before launching workers, the command shows: + +- selected direction IDs and names +- selected direction count +- effective concurrency +- worker iteration cap +- worker timeout +- Codex timeout +- base branch +- base commit +- run directory +- warning that workers will create local worktrees, branches, commits, run targeted tests, and invoke Codex + +The command proceeds only if the user explicitly confirms. + +### 5.3 Frontmatter and Runtime Capability + +The implementation must use the current Claude Code subagent tool naming and schema. If the current runtime uses `Agent`, command docs and frontmatter should use `Agent`. If `Task` remains the installed command-tool name, the spec may document `Task` as a compatibility alias. + +Before PR-B implementation proceeds, run a blocking spike that proves: + +- worktree isolation is supported +- background execution or equivalent parallel execution is supported +- the command can wait for all workers in one session +- worker results are available to the coordinator +- worktree path and branch name are discoverable +- worker permissions allow required edits, tests, git, and Codex calls + +If the spike fails, revise PR-B before implementation continues. + +## 6. Explore Run State + +The coordinator writes durable state before dispatch: + +```text +.humanize/explore/<RUN_ID>/ + manifest.json + dispatch-prompts/ + <direction_id>.md + worker-results.jsonl + report.md + .failed +``` + +`manifest.json` includes: + +- `run_id` +- `created_at` +- `directions_json_file` +- `draft_path` +- `selected_direction_ids` +- `base_branch` +- `base_commit` +- `concurrency` +- `max_worker_iterations` +- `worker_timeout_min` +- `codex_timeout_min` +- `expected_worker_count` +- `runtime_spike_status` +- per-worker records with `direction_id`, `dir_slug`, prompt path, prompt hash, branch name, worktree path if known, task/agent id if available, and final status + +`dispatch-prompts/<direction_id>.md` stores the exact prompt sent to each worker. Prompts are not in-memory only. + +`worker-results.jsonl` stores one JSON object per worker result or coordinator-generated failure row. + +If dispatch fails entirely, write `.failed` and update `manifest.json` with the failure reason. + +## 7. Worker Runtime and Isolation + +### 7.1 Worker Constraints + +Each worker must: + +- stay inside its assigned worktree +- not invoke Skills or slash commands +- not spawn nested Agent/Task workers +- not push branches +- not access sibling worktrees +- not perform destructive cleanup outside its worktree +- use only the approved Codex consultation path +- emit the JSON result sentinel as its final action + +These are still prompt-level constraints unless the runtime exposes tool-level restrictions. The spec must not claim a strict concurrency proof unless those restrictions are verified. + +### 7.2 Worktree Root Safety + +Before calling Humanize scripts, the worker must: + +```bash +export CLAUDE_PROJECT_DIR="$PWD" +``` + +It must assert that `scripts/ask-codex.sh` resolves the same project root as the assigned worktree. If the assertion fails, the worker stops and emits a failure result. + +This prevents `ask-codex.sh` from resolving the coordinator checkout through inherited `CLAUDE_PROJECT_DIR`. + +### 7.3 Codex Calls + +Worker Codex calls use: + +```bash +bash "${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" \ + --codex-timeout 1200 \ + --codex-model "<model>:xhigh" \ + "<prompt>" +``` + +`ask-codex.sh` must disable nested Codex hooks when supported, using the same `--disable codex_hooks` probing pattern already used by the RLCR stop hook and `bitlesson-select.sh`. + +The spec does not use `--effort max`; that flag is not supported by the current script. + +### 7.4 Worker Loop + +The worker loop is a bounded prototype review loop: + +1. Inspect relevant repo context. +2. Write a short plan sketch under the worker summary data. +3. Implement scoped prototype changes. +4. Run targeted tests for touched areas. +5. Ask Codex for review. +6. Apply useful feedback. +7. Repeat until `max_worker_iterations`, Codex `LGTM`, or failure. +8. Commit local changes when appropriate. +9. Emit JSON result. + +This is not full RLCR. It does not replace `/humanize:start-rlcr-loop`. + +### 7.5 Branch and Commit Rules + +Branch names are deterministic: + +```text +explore/<RUN_ID>/<dir_slug> +``` + +The worker result records: + +- `branch_name` +- `worktree_path` +- `commit_sha` +- `commit_count` +- `dirty_state` +- `commit_status` + +Allowed `commit_status` values: + +- `committed` +- `none` +- `wip` +- `failed` + +Successful and partial workers should commit if they produced changes. Failed workers may leave WIP changes only if the result marks that state clearly. + +### 7.6 Timeouts + +Coordinator enforces the worker timeout. + +Codex calls use the Codex timeout. + +If a worker times out, the coordinator writes a timeout result row to `worker-results.jsonl` with: + +```json +{ + "task_status": "timeout", + "direction_id": "...", + "error": "worker exceeded timeout" +} +``` + +The report includes timeout cleanup guidance. + +### 7.7 BitLesson + +If worker worktree paths are known before substantive work begins, the coordinator copies or initializes `.humanize/bitlesson.md` in each worker worktree. + +If paths are not known until completion, BitLesson is explicitly unavailable for MVP. Worker results set `bitlesson_action: "none"` and the report states that this run has reduced parity with standard RLCR. + +## 8. Worker Result Contract + +Workers print one JSON object between sentinel markers: + +```text +=== EXPLORE_RESULT_JSON_BEGIN === +{ + "schema_version": 1, + "run_id": "2026-04-29_15-30-12", + "direction_id": "dir-00-command-history", + "dir_slug": "command-history", + "task_status": "success", + "codex_final_verdict": "lgtm", + "rounds_used": 2, + "tests_passed": 3, + "tests_failed": 0, + "worktree_path": "/abs/path", + "branch_name": "explore/2026-04-29_15-30-12/command-history", + "commit_sha": "abc123", + "commit_count": 1, + "dirty_state": "clean", + "commit_status": "committed", + "summary_markdown": "Full markdown summary.", + "what_worked": ["item"], + "what_didnt": ["item"], + "bitlesson_action": "none", + "error": null +} +=== EXPLORE_RESULT_JSON_END === +``` + +Enums: + +- `task_status`: `success`, `partial`, `failed`, `timeout`, `no_summary` +- `codex_final_verdict`: `lgtm`, `partial`, `failed`, `unavailable` +- `dirty_state`: `clean`, `dirty`, `unknown` +- `bitlesson_action`: `none`, `add`, `update` + +The coordinator parses JSON, not ad hoc `KEY: VALUE` lines. Invalid JSON creates a `no_summary` row. + +## 9. Ranking and Report + +`worker-results.jsonl` is the machine-readable source of truth. `report.md` is the human synthesis. + +The report has two rankings: + +1. **Best product direction** + - user value + - strategic fit + - original direction quality + - objective evidence + - known risks + +2. **Most implementation-ready prototype** + - `task_status` + - `codex_final_verdict` + - tests passed/failed + - commit status + - dirty state + - implementation fit + - worker iteration count + +The design no longer claims deterministic ranking unless a future deterministic `ranking.json` artifact is added. For MVP, ranking is qualitative LLM synthesis over JSON inputs. + +The synthesis is performed by the coordinator's current reasoning context unless `ask-codex.sh` is explicitly allowed and called with the valid `--codex-model <model>:xhigh` contract. + +## 10. Adoption and Cleanup + +The report includes exact adoption paths: + +### Continue Winner Branch + +Includes: + +- worktree path +- branch name +- commit SHA +- suggested next command, for example `/humanize:start-rlcr-loop --skip-impl` when appropriate + +### Restart From Plan + +Use the winning worker's plan sketch and `summary_markdown` as input to normal `/humanize:gen-plan`, then run standard RLCR. + +### Cherry-Pick Prototype + +Includes exact commit SHA and warns that the user should verify the base branch first. + +### Discard Prototypes + +Includes cleanup guidance for losing worktrees and branches. + +Future companion commands are designed but may be deferred: + +```text +/humanize:explore-status <run-id> +/humanize:explore-cleanup <run-id> [--failed-only|--losers|--all] +``` + +If companion commands are deferred, the MVP report still prints shell cleanup commands and all ownership data remains in `manifest.json`. + +## 11. Safety Model + +The safety model is bounded concurrency, not an unqualified `2N` proof: + +- selected directions are bounded by 10 +- active workers are bounded by `--concurrency` +- active Codex calls are bounded by active workers +- nested Skill, Agent, and Task calls inside workers are forbidden +- worker project root is asserted before Codex calls +- `ask-codex.sh` disables nested Codex hooks when supported +- dispatch requires explicit user confirmation +- all worker branches/worktrees are recorded in the manifest + +If the runtime cannot enforce tool-level worker restrictions, the spec must describe nested fanout prevention as prompt-enforced plus verified by smoke testing, not mathematically guaranteed. + +## 12. Error Handling + +Validation failures occur before `RUN_DIR` creation. + +If `RUN_DIR` already exists, validation fails unless a future cleanup flag is implemented. + +If a selected direction is invalid, validation fails. + +If dispatch fails entirely: + +- write `.failed` +- update `manifest.json` +- do not write a success report + +If a worker times out, fails, or emits invalid JSON: + +- append a coordinator-generated JSON row to `worker-results.jsonl` +- continue collecting other workers +- include the failed worker in `report.md` + +If all workers fail: + +- write a minimal `report.md` +- include the failure table and cleanup/status guidance + +## 13. Testing + +CI tests are deterministic shell tests. + +Add: + +- `tests/test-validate-gen-idea-io.sh` + - companion path derivation + - `.md` requirement + - companion collision rejection + - `DIRECTIONS_JSON_FILE` stdout + +- `tests/test-directions-json-schema.sh` + - valid fixture + - missing keys + - more than 10 directions + - duplicate `direction_id` + - duplicate `dir_slug` + - missing primary + - multiple primary entries + - bad confidence enum + - `n_returned` mismatch + +- `tests/test-validate-explore-idea-io.sh` + - direct JSON input + - draft-to-json resolution + - missing companion JSON + - direction cap + - `--directions` parsing + - concurrency range + - worker iteration range + - timeout range + - run dir collision + - template presence + +- `tests/test-worker-result-contract.sh` + - valid JSON sentinel + - invalid JSON sentinel + - timeout row + - no-summary row + - enum validation + +- `tests/test-explore-manifest.sh` + - required manifest fields + - base branch and base commit fields + - selected direction IDs + - prompt path and prompt hash fields + +- `tests/test-explore-command-structure.sh` + - frontmatter tools + - blocking confirmation text + - worker hard constraints + - schema/template sync references + +Every new suite must be added to `tests/run-all-tests.sh`. + +No CI test invokes live slash commands, real Agent/Task workers, or real Codex. + +## 14. Manual Verification Before Upstream PR + +Before opening the upstream PR: + +1. Push the feature branch to the Horacehxw fork. +2. Run the full shell test suite. +3. Run the runtime spike: + - prove worker worktree isolation + - prove background/wait or equivalent parallel collection + - prove worktree path and branch name discovery + - prove worker permissions for edit/test/git/Codex + - prove `CLAUDE_PROJECT_DIR="$PWD"` makes Codex run in the worker worktree + - prove Codex hook disabling is active when supported +4. Run one tiny manual smoke: + - two directions + - one worker iteration + - inspect `manifest.json` + - inspect `worker-results.jsonl` + - inspect `report.md` + - verify local branches and commits + - verify no push occurred + +If any runtime spike check fails, revise PR-B before opening the upstream PR. + +## 15. Documentation Updates + +Update: + +- `README.md` quick start with optional `explore-idea`. +- `docs/usage.md` command reference. +- `.claude/CLAUDE.md` sync rules: + - `directions.json` schema is canonical in the schema validator and documented in both command docs. + - worker constraints in `commands/explore-idea.md` and `prompt-template/explore/worker-prompt.md` must stay in sync. +- `.gitignore` if runtime spike confirms Claude-managed worktrees appear under an unignored path such as `.claude/worktrees/`. + +## 16. Open Implementation Risks + +These are blocking before PR-B is considered ready: + +1. Confirm actual current Claude Code `Agent` or `Task` tool schema. +2. Confirm worktree isolation and branch naming behavior. +3. Confirm whether worktree paths are available before workers begin. +4. Confirm single command can wait and collect all worker results. +5. Confirm background workers can use required tools without hidden permission prompts. +6. Confirm `ask-codex.sh` hook disabling does not break existing tests. +7. Confirm concurrent Codex calls do not hit local locks or unacceptable rate limits. + +If any item fails, update this design before implementation planning continues. diff --git a/docs/usage.md b/docs/usage.md index 4234b39d..ede48b21 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -59,6 +59,8 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha | Command | Purpose | |---------|---------| +| `/gen-idea <idea-or-path>` | Generate a repo-grounded idea draft with N parallel directions | +| `/explore-idea <draft-or-directions.json>` | Launch bounded parallel prototype workers and synthesize a two-tier report | | `/start-rlcr-loop <plan.md>` | Start iterative development with Codex review | | `/cancel-rlcr-loop` | Cancel active loop | | `/gen-plan --input <draft.md> --output <plan.md>` | Generate structured plan from draft | @@ -67,6 +69,45 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha ## Command Reference +### gen-idea + +``` +/humanize:gen-idea <idea-text-or-path> [--n <int>] [--output <path>] +``` + +Generates a repo-grounded idea draft using directed-diversity exploration. A lead agent picks N orthogonal directions, N parallel Explore subagents develop each direction with objective evidence from the repo, and the lead synthesizes a draft with one primary direction plus N-1 alternatives. + +**Outputs:** +- Draft file: `.humanize/ideas/<slug>-<timestamp>.md` (or `--output` path) +- Companion JSON: `<draft-path-without-.md>.directions.json` — lossless record of all direction proposals, used as input to `explore-idea` + +**Options:** +- `--n <int>` — number of parallel directions (default: 6) +- `--output <path>` — custom output path for the draft (must have `.md` suffix) + +### explore-idea + +``` +/humanize:explore-idea <draft.md | draft.directions.json> [--directions ids] [--concurrency N] [--max-worker-iterations N] [--worker-timeout-min N] [--codex-timeout-min N] +``` + +Launches bounded parallel prototype workers — one per selected direction — each running in an isolated git worktree. After all workers complete, synthesizes a two-tier ranking report: +- **Tier 1**: Best product direction (ranked by user value, evidence, strategic fit) +- **Tier 2**: Most implementation-ready prototype (ranked by outcome: task status, Codex verdict, tests, commits) + +**Options:** +- `--directions <ids>` — comma-separated `direction_id` or `source_index` values to run (default: first 6 by display order) +- `--concurrency <N>` — parallel worker count (default: 6, max: 10) +- `--max-worker-iterations <N>` — per-worker iteration cap (default: 2, max: 3) +- `--worker-timeout-min <N>` — worker timeout in minutes (default: 60, max: 60) +- `--codex-timeout-min <N>` — Codex call timeout in minutes (default: 20, max: 20) + +**Run artifacts** stored in `.humanize/explore/<RUN_ID>/`: +- `manifest.json` — coordinator state and per-worker metadata +- `dispatch-prompts/` — exact prompts sent to each worker +- `worker-results.jsonl` — machine-readable result rows +- `report.md` — synthesis report with two-tier rankings and adoption paths + ### start-rlcr-loop ``` diff --git a/hooks/lib/loop-bg-tasks.sh b/hooks/lib/loop-bg-tasks.sh index 08eba146..3d89c3cc 100755 --- a/hooks/lib/loop-bg-tasks.sh +++ b/hooks/lib/loop-bg-tasks.sh @@ -355,7 +355,7 @@ handle_bg_task_short_circuit() { local guard_state_file guard_stored_sid guard_state_file=$(resolve_active_state_file "$loop_dir") if [[ -n "$guard_state_file" ]]; then - guard_stored_sid=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/^'"${FIELD_SESSION_ID}"': *//; p; } }' "$guard_state_file" 2>/dev/null | tr -d ' ') + guard_stored_sid=$(awk -v key="${FIELD_SESSION_ID}" 'BEGIN{f=0} /^---$/{f++; next} f==1 && $0 ~ "^"key":"{sub("^"key":[[:space:]]*",""); print; exit}' "$guard_state_file" 2>/dev/null | tr -d ' ') || true if [[ -n "$guard_stored_sid" ]] \ && [[ -n "$hook_session_id" ]] \ && [[ "$guard_stored_sid" != "$hook_session_id" ]]; then diff --git a/hooks/lib/loop-codex-exit-handlers.sh b/hooks/lib/loop-codex-exit-handlers.sh new file mode 100644 index 00000000..38b17c87 --- /dev/null +++ b/hooks/lib/loop-codex-exit-handlers.sh @@ -0,0 +1,355 @@ +#!/usr/bin/env bash +# +# Exit Handlers for RLCR Loop +# +# Contains decision/blocking functions for handling loop exit scenarios: +# - Finalization phase entry +# - Mainline drift detection +# - Review verdict validation +# - Code review issue continuation +# - Codex review failure handling +# + +set -euo pipefail + +# Enter the finalize phase after review passes. +# Arguments: $1=skip_reason (optional), $2=system_msg +enter_finalize_phase() { + local skip_reason="$1" + local system_msg="$2" + + mv "$STATE_FILE" "$LOOP_DIR/finalize-state.md" + echo "State file renamed to: $LOOP_DIR/finalize-state.md" >&2 + + local finalize_summary_file="$LOOP_DIR/finalize-summary.md" + local finalize_prompt + + if [[ -n "$skip_reason" ]]; then + local fallback="# Finalize Phase (Review Skipped) + +**Warning**: Code review was skipped due to: {{REVIEW_SKIP_REASON}} + +The implementation could not be fully validated. You are now in the **Finalize Phase**. + +## Important Notice +Since the code review was skipped, please manually verify your changes before finalizing: +1. Review your code changes for any obvious issues +2. Run any available tests to verify correctness +3. Check for common code quality issues + +## Simplification (Optional) +If time permits, use the \`code-simplifier:code-simplifier\` agent via the Task tool to simplify and refactor your code. Focus more on changes between branch from {{BASE_BRANCH}} to {{START_BRANCH}}. + +## Constraints +- Must NOT change existing functionality +- Must NOT fail existing tests +- Must NOT introduce new bugs +- Only perform functionality-equivalent code refactoring and simplification + +## Before Exiting +1. Complete all todos +2. Commit your changes +3. Write your finalize summary to: {{FINALIZE_SUMMARY_FILE}}" + + finalize_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/finalize-phase-skipped-prompt.md" "$fallback" \ + "FINALIZE_SUMMARY_FILE=$finalize_summary_file" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "REVIEW_SKIP_REASON=$skip_reason" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "START_BRANCH=$START_BRANCH") + else + local fallback="# Finalize Phase + +Codex review has passed. The implementation is complete. + +You are now in the **Finalize Phase**. Use the \`code-simplifier:code-simplifier\` agent via the Task tool to simplify and refactor your code. + +## Constraints +- Must NOT change existing functionality +- Must NOT fail existing tests +- Must NOT introduce new bugs +- Only perform functionality-equivalent code refactoring and simplification + +## Focus +Focus on the code changes made during this RLCR session. Focus more on changes between branch from {{BASE_BRANCH}} to {{START_BRANCH}}. + +## Before Exiting +1. Complete all todos +2. Commit your changes +3. Write your finalize summary to: {{FINALIZE_SUMMARY_FILE}}" + + finalize_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/finalize-phase-prompt.md" "$fallback" \ + "FINALIZE_SUMMARY_FILE=$finalize_summary_file" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "START_BRANCH=$START_BRANCH") + fi + + jq -n \ + --arg reason "$finalize_prompt" \ + --arg msg "$system_msg" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Append task tag routing reminder to follow-up prompts. +# Arguments: $1=prompt_file_path +append_task_tag_routing_note() { + local prompt_file="$1" + + cat >> "$prompt_file" << 'ROUTING_EOF' + +## Task Tag Routing Reminder + +Follow the plan's per-task routing tags strictly: +- `coding` task -> Claude executes directly +- `analyze` task -> execute via `/humanize:ask-codex`, then integrate the result +- Keep Goal Tracker Active Tasks columns `Tag` and `Owner` aligned with execution +ROUTING_EOF +} + +# Stop the loop when mainline progress has stalled for too many consecutive rounds. +# Arguments: $1=stall_count, $2=last_verdict +stop_for_mainline_drift() { + local stall_count="$1" + local last_verdict="$2" + + upsert_state_fields "$STATE_FILE" \ + "${FIELD_MAINLINE_STALL_COUNT}=${stall_count}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${last_verdict}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_REPLAN_REQUIRED}" + + local fallback="# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the mainline failed to advance for {{STALL_COUNT}} consecutive implementation rounds. + +- Last mainline verdict: {{LAST_VERDICT}} +- Drift status: replan_required + +This loop should not continue automatically. Revisit the original plan, recover the round contract, and restart with a narrower mainline objective." + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-drift-stop.md" "$fallback" \ + "STALL_COUNT=$stall_count" \ + "LAST_VERDICT=$last_verdict" \ + "PLAN_FILE=$PLAN_FILE") + + end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Stopped - mainline drift circuit breaker triggered" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when implementation review output omits the required mainline verdict. +# Arguments: $1=review_result_file, $2=review_prompt_file +block_missing_mainline_verdict() { + local review_result_file="$1" + local review_prompt_file="$2" + + local fallback="# Mainline Verdict Missing + +The implementation review output is missing the required line: + +\`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED\` + +Humanize cannot safely update drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}}" + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-verdict-missing.md" "$fallback" \ + "REVIEW_RESULT_FILE=$review_result_file" \ + "REVIEW_PROMPT_FILE=$review_prompt_file") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - implementation review missing Mainline Progress Verdict" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Continue review loop when issues are found +# Arguments: $1=round_number, $2=review_content +continue_review_loop_with_issues() { + local round="$1" + local review_content="$2" + + echo "Code review found issues. Continuing review loop..." >&2 + + # Update round number in state file + local temp_file="${STATE_FILE}.tmp.$$" + sed "s/^current_round: .*/current_round: $round/" "$STATE_FILE" > "$temp_file" + mv "$temp_file" "$STATE_FILE" + + # Build review-fix prompt for Claude + local next_prompt_file="$LOOP_DIR/round-${round}-prompt.md" + local next_summary_file="$LOOP_DIR/round-${round}-summary.md" + if [[ ! -f "$next_summary_file" ]]; then + cat > "$next_summary_file" << EOF +# Review Round $round Summary + +## Work Completed +- [Describe what was implemented in this phase] + +## Files Changed +- [List created/modified files] + +## Validation +- [List tests/commands run and outcomes] + +## Remaining Items +- [List unresolved items, if any] + +## BitLesson Delta +- Action: none|add|update +- Lesson ID(s): NONE +- Notes: [what changed and why] +EOF + fi + local next_contract_file="$LOOP_DIR/round-${round}-contract.md" + + local fallback="# Code Review Findings + +You are in the **Review Phase** of the RLCR loop. Codex has performed a code review and found issues. + +## Review Results + +{{REVIEW_CONTENT}} + +## Instructions + +1. Re-anchor on the original plan and current goal tracker before changing code +2. Refresh the round contract at {{ROUND_CONTRACT_FILE}} +3. Address only the issues that are truly blocking the current mainline objective or code-review acceptance +4. Record non-blocking follow-up items as queued, not as the main goal +5. Commit your changes after fixing the issues +6. Write your summary to: {{SUMMARY_FILE}}" + + load_and_render_safe "$TEMPLATE_DIR" "claude/review-phase-prompt.md" "$fallback" \ + "REVIEW_CONTENT=$review_content" \ + "SUMMARY_FILE=$next_summary_file" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "ROUND_CONTRACT_FILE=$next_contract_file" \ + "CURRENT_ROUND=$round" > "$next_prompt_file" + if [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$next_prompt_file"; then + cat >> "$next_prompt_file" << EOF + +## BitLesson Selection (REQUIRED FOR EACH FIX TASK) + +Before implementing each fix task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each fix task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF + fi + append_task_tag_routing_note "$next_prompt_file" + + jq -n \ + --arg reason "$(cat "$next_prompt_file")" \ + --arg msg "Loop: Review Phase Round $round - Fix code review issues" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when codex review fails or produces no output +# This is a hard error - the review phase cannot be skipped +# Arguments: $1=round_number, $2=failure_reason, $3=exit_code (optional) +block_review_failure() { + local round="$1" + local failure_reason="$2" + local exit_code="${3:-unknown}" + + echo "ERROR: Codex review failed. Blocking exit and requiring retry." >&2 + + local stderr_content="" + local stderr_file="$CACHE_DIR/round-${round}-codex-review.log" + if [[ -f "$stderr_file" ]]; then + stderr_content=$(tail -50 "$stderr_file" 2>/dev/null || echo "(unable to read stderr)") + fi + + local fallback="# Codex Review Failed + +The code review could not be completed. This is a blocking error that requires retry. + +## Error Details + +**Reason**: {{FAILURE_REASON}} +**Round**: {{ROUND_NUMBER}} +**Base Branch**: {{BASE_BRANCH}} +**Exit Code**: {{EXIT_CODE}} + +## What Happened + +The \`codex review\` command failed to produce valid output. This can occur due to: +- Network connectivity issues +- Codex service timeout or unavailability +- Invalid review configuration +- Internal Codex errors + +## Required Action + +**You must retry the exit.** The review phase cannot be skipped - the loop must continue until code review passes with no \`[P0-9]\` issues found. + +Steps to retry: +1. Ensure your changes are committed +2. Write your summary to the expected file +3. Attempt to exit again + +If this error persists, consider canceling and restarting the loop: \`/humanize:cancel-rlcr-loop\` + +## Debug Information + +Stderr (last 50 lines): +\`\`\` +{{STDERR_CONTENT}} +\`\`\`" + + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/codex-review-failed.md" "$fallback" \ + "FAILURE_REASON=$failure_reason" \ + "ROUND_NUMBER=$round" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "EXIT_CODE=$exit_code" \ + "STDERR_CONTENT=$stderr_content" \ + "REVIEW_RESULT_FILE=$LOOP_DIR/round-${round}-review-result.md" \ + "CODEX_CMD_FILE=$CACHE_DIR/round-${round}-codex-review.cmd" \ + "CODEX_LOG_FILE=$CACHE_DIR/round-${round}-codex-review.log") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - Codex review failed, retry required" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} diff --git a/hooks/lib/loop-codex-gates.sh b/hooks/lib/loop-codex-gates.sh new file mode 100644 index 00000000..d946b19c --- /dev/null +++ b/hooks/lib/loop-codex-gates.sh @@ -0,0 +1,539 @@ +#!/usr/bin/env bash +# Validation gates for loop-codex-stop-hook +# All "quick checks" that must pass before running Codex review + +set -euo pipefail + +# Quick-check 0: Schema Validation (v1.1.2+ fields) +run_schema_validation_v112() { + local plan_tracked="$1" + local start_branch="$2" + + if [[ -z "$plan_tracked" || -z "$start_branch" ]]; then + REASON="RLCR loop state file is missing required fields (plan_tracked or start_branch). + +This indicates the loop was started with an older version of humanize. + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.1.2+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Quick-check 0.1: Schema Validation (v1.5.0+ fields) +run_schema_validation_v150() { + local review_started="$1" + local base_branch="$2" + + if [[ -z "$review_started" || ( "$review_started" != "true" && "$review_started" != "false" ) ]]; then + REASON="RLCR loop state file is missing or has invalid review_started field. + +This indicates the loop was started with an older version of humanize (pre-1.5.0). + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.5.0+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated (missing review_started)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ -z "$base_branch" ]]; then + REASON="RLCR loop state file is missing base_branch field. + +This indicates the loop was started with an older version of humanize (pre-1.5.0). + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.5.0+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated (missing base_branch)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Quick-check 0.2: Schema Warning (v1.5.2+ fields) +warn_schema_v152() { + local raw_full_review_round="$1" + + if [[ -z "$raw_full_review_round" ]]; then + echo "Note: State file missing full_review_round field (introduced in v1.5.2)." >&2 + echo " Using default value: 5 (Full Alignment Checks at rounds 4, 9, 14, ...)" >&2 + echo " To use configurable Full Alignment Check intervals, upgrade to humanize v1.5.2+" >&2 + echo " and restart the RLCR loop with --full-review-round <N> option." >&2 + fi +} + +# Quick-check 0.5: Branch Consistency +check_branch_consistency() { + local project_root="$1" + local start_branch="$2" + local git_timeout="$3" + + CURRENT_BRANCH=$(run_with_timeout "$git_timeout" git -C "$project_root" rev-parse --abbrev-ref HEAD 2>/dev/null) || GIT_EXIT_CODE=$? + GIT_EXIT_CODE=${GIT_EXIT_CODE:-0} + if [[ $GIT_EXIT_CODE -ne 0 || -z "$CURRENT_BRANCH" ]]; then + REASON="Git operation failed or timed out. + +Cannot verify branch consistency. This may indicate: +- Git is not responding +- Repository is in an invalid state +- Network issues (if remote operations are involved) + +Please check git status manually and try again." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - git operation failed" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ -n "$start_branch" && "$CURRENT_BRANCH" != "$start_branch" ]]; then + REASON="Git branch changed during RLCR loop. + +Started on: $start_branch +Current: $CURRENT_BRANCH + +Branch switching is not allowed. Switch back to $start_branch or cancel the loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - branch changed" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Quick-check 0.6: Plan File Integrity +check_plan_file_integrity() { + local review_started="$1" + local plan_tracked="$2" + local plan_file="$3" + local project_root="$4" + local git_timeout="$5" + local template_dir="$6" + + if [[ "$review_started" == "true" ]]; then + echo "Review phase: skipping plan file integrity check (plan no longer needed)" >&2 + return 0 + fi + + local backup_plan="${7:-.humanize/backup-plan.md}" + local full_plan_path="$project_root/$plan_file" + + if [[ ! -f "$backup_plan" ]]; then + REASON="Plan file backup not found in loop directory. + +Please copy the plan file to the loop directory: + cp \"$full_plan_path\" \"$backup_plan\" + +This backup is required for plan integrity verification." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan backup missing" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ ! -f "$full_plan_path" ]]; then + REASON="Project plan file has been deleted. + +Original: $plan_file +Backup available at: $backup_plan + +You can restore from backup if needed. Plan file modifications are not allowed during RLCR loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file deleted" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ "$plan_tracked" == "true" ]]; then + PLAN_GIT_STATUS=$(run_with_timeout "$git_timeout" git -C "$project_root" status --porcelain "$plan_file" 2>/dev/null || echo "") + if [[ -n "$PLAN_GIT_STATUS" ]]; then + REASON="Plan file has uncommitted modifications. + +File: $plan_file +Status: $PLAN_GIT_STATUS + +This RLCR loop was started with --track-plan-file. Plan file modifications are not allowed during the loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file modified (uncommitted)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + fi + + if ! diff -q "$full_plan_path" "$backup_plan" &>/dev/null; then + FALLBACK="# Plan File Modified + +The plan file \`$plan_file\` has been modified since the RLCR loop started. + +**Modifying plan files is forbidden during an active RLCR loop.** + +If you need to change the plan: +1. Cancel the current loop: \`/humanize:cancel-rlcr-loop\` +2. Update the plan file +3. Start a new loop: \`/humanize:start-rlcr-loop $plan_file\` + +Backup available at: \`$backup_plan\`" + REASON=$(load_and_render_safe "$template_dir" "block/plan-file-modified.md" "$FALLBACK" \ + "PLAN_FILE=$plan_file" \ + "BACKUP_PATH=$backup_plan") + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file modified" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Quick Check: Are All Tasks Completed +check_todos_completed() { + local hook_input="$1" + local script_dir="$2" + + local todo_checker="$script_dir/check-todos-from-transcript.py" + + if [[ ! -f "$todo_checker" ]]; then + return 0 + fi + + local todo_result="" + local todo_exit=0 + todo_result=$(echo "$hook_input" | python3 "$todo_checker" 2>&1) || todo_exit=$? + todo_exit=${todo_exit:-0} + + if [[ "$todo_exit" -eq 2 ]]; then + REASON="Task checker encountered a parse error. + +Error: $todo_result + +This may indicate an issue with the hook input or transcript format. +Please try again or cancel the loop if this persists." + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - task checker parse error" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + + if [[ "$todo_exit" -eq 1 ]]; then + local incomplete_list=$(echo "$todo_result" | tail -n +2) + + FALLBACK="# Incomplete Tasks + +Complete these tasks before exiting: + +{{INCOMPLETE_LIST}}" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/incomplete-todos.md" "$FALLBACK" \ + "INCOMPLETE_LIST=$incomplete_list") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - incomplete tasks detected, please finish all tasks first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} + +# Helper: Clean Up Stale index.lock +cleanup_stale_index_lock() { + local project_root="${1:-$PROJECT_ROOT}" + local git_dir + git_dir=$(git -C "$project_root" rev-parse --git-dir 2>/dev/null) || return 0 + if [[ "$git_dir" != /* ]]; then + git_dir="$project_root/$git_dir" + fi + if [[ -f "$git_dir/index.lock" ]]; then + echo "Removing stale $git_dir/index.lock" >&2 + rm -f "$git_dir/index.lock" + fi +} + +# Cache Git Status Output +cache_git_status() { + local project_root="$1" + local git_timeout="$2" + + if command -v git &>/dev/null && run_with_timeout "$git_timeout" git -C "$project_root" rev-parse --git-dir &>/dev/null 2>&1; then + GIT_IS_REPO=true + GIT_STATUS_EXIT=0 + GIT_STATUS_CACHED=$(run_with_timeout "$git_timeout" git -C "$project_root" status --porcelain 2>/dev/null) || GIT_STATUS_EXIT=$? + + if [[ $GIT_STATUS_EXIT -ne 0 ]]; then + cleanup_stale_index_lock "$project_root" + FALLBACK="# Git Status Failed + +Git status operation failed or timed out (exit code {{GIT_STATUS_EXIT}}). + +Cannot verify repository state. Please check git status manually and try again." + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/git-status-failed.md" "$FALLBACK" \ + "GIT_STATUS_EXIT=$GIT_STATUS_EXIT") + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - git status failed (exit $GIT_STATUS_EXIT)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + else + GIT_IS_REPO=false + GIT_STATUS_CACHED="" + fi +} + +# Quick Check: Large File Detection +check_large_files() { + local git_status_cached="$1" + local git_is_repo="$2" + local project_root="$3" + local template_dir="$4" + local max_lines="${5:-2000}" + + if [[ "$git_is_repo" != "true" ]]; then + return 0 + fi + + local large_files="" + + while IFS= read -r line; do + [[ -z "$line" ]] && continue + + local filename="${line#???}" + case "$filename" in + *" -> "*) filename="${filename##* -> }" ;; + esac + + filename="$project_root/$filename" + [[ ! -f "$filename" ]] && continue + + local ext="${filename##*.}" + local ext_lower=$(to_lower "$ext") + local file_type="" + + case "$ext_lower" in + py|js|ts|tsx|jsx|java|c|cpp|cc|cxx|h|hpp|cs|go|rs|rb|php|swift|kt|kts|scala|sh|bash|zsh) + file_type="code" ;; + md|rst|txt|adoc|asciidoc) + file_type="documentation" ;; + *) continue ;; + esac + + local line_count=$(wc -l < "$filename" 2>/dev/null | tr -d ' ') || continue + [[ "$line_count" =~ ^[0-9]+$ ]] || continue + + if [ "$line_count" -gt "$max_lines" ]; then + large_files="${large_files} +- \`${filename}\`: ${line_count} lines (${file_type} file)" + fi + done <<< "$git_status_cached" + + if [ -n "$large_files" ]; then + FALLBACK="# Large Files Detected + +Files exceeding {{MAX_LINES}} lines: + +{{LARGE_FILES}} + +Split these into smaller modules before continuing." + REASON=$(load_and_render_safe "$template_dir" "block/large-files.md" "$FALLBACK" \ + "MAX_LINES=$max_lines" \ + "LARGE_FILES=$large_files") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - large files detected (>${max_lines} lines), please split into smaller modules" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} + +# Quick Check: Git Clean and Pushed +check_git_clean() { + local project_root="$1" + local git_status_cached="$2" + local git_is_repo="$3" + local push_every_round="$4" + local template_dir="$5" + local git_timeout="$6" + + [[ "$git_is_repo" != "true" ]] && return 0 + + local git_issues="" + local special_notes="" + + if git_has_tracked_humanize_state "$project_root"; then + cleanup_stale_index_lock "$project_root" + REASON=$(git_tracked_humanize_blocked_message) + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - tracked Humanize state detected, remove it from git first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + + local humanize_untracked_pattern='^\?\? \.humanize[-/]' + local git_status_for_block=$(echo "$git_status_cached" | grep -vE "$humanize_untracked_pattern" || true) + if [[ -n "$git_status_for_block" ]]; then + git_issues="uncommitted changes" + + local untracked=$(echo "$git_status_cached" | grep '^??' || true) + + if echo "$untracked" | grep -qE "$humanize_untracked_pattern"; then + local humanize_local_note=$(load_template "$template_dir" "block/git-not-clean-humanize-local.md" 2>/dev/null) + [[ -z "$humanize_local_note" ]] && humanize_local_note="Note: .humanize/ and .humanize-* directories are intentionally untracked." + special_notes="$special_notes$humanize_local_note" + fi + + local other_untracked=$(echo "$untracked" | grep -vE "$humanize_untracked_pattern" || true) + if [[ -n "$other_untracked" ]]; then + local untracked_note=$(load_template "$template_dir" "block/git-not-clean-untracked.md" 2>/dev/null) + [[ -z "$untracked_note" ]] && untracked_note="Review untracked files - add to .gitignore or commit them." + special_notes="$special_notes$untracked_note" + fi + fi + + if [[ -n "$git_issues" ]]; then + cleanup_stale_index_lock "$project_root" + FALLBACK="# Git Not Clean + +Detected: {{GIT_ISSUES}} + +Please commit all changes before exiting. +{{SPECIAL_NOTES}}" + REASON=$(load_and_render_safe "$template_dir" "block/git-not-clean.md" "$FALLBACK" \ + "GIT_ISSUES=$git_issues" \ + "SPECIAL_NOTES=$special_notes") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - $git_issues detected, please commit first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + + if [[ "$push_every_round" == "true" ]]; then + local git_ahead=$(run_with_timeout "$git_timeout" git -C "$project_root" status -sb 2>/dev/null | grep -o 'ahead [0-9]*' || true) + if [[ -n "$git_ahead" ]]; then + local ahead_count=$(echo "$git_ahead" | grep -o '[0-9]*') + local current_branch=$(run_with_timeout "$git_timeout" git -C "$project_root" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown") + + FALLBACK="# Unpushed Commits + +You have {{AHEAD_COUNT}} unpushed commit(s) on branch {{CURRENT_BRANCH}}. + +Please push before exiting." + REASON=$(load_and_render_safe "$template_dir" "block/unpushed-commits.md" "$FALLBACK" \ + "AHEAD_COUNT=$ahead_count" \ + "CURRENT_BRANCH=$current_branch") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - $ahead_count unpushed commit(s) detected, please push first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + fi +} + +# Check Summary File Exists +check_summary_file() { + local summary_file="$1" + local is_finalize_phase="$2" + local current_round="$3" + local template_dir="$4" + + if [[ ! -f "$summary_file" ]]; then + FALLBACK="# Work Summary Missing + +Please write your work summary to: {{SUMMARY_FILE}}" + REASON=$(load_and_render_safe "$template_dir" "block/work-summary-missing.md" "$FALLBACK" \ + "SUMMARY_FILE=$summary_file") + + local system_msg="Loop: Summary file missing for round $current_round" + [[ "$is_finalize_phase" == "true" ]] && system_msg="Loop: Finalize Phase - summary file missing" + + jq -n \ + --arg reason "$REASON" \ + --arg msg "$system_msg" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} + +# Check Goal Tracker Initialization +check_goal_tracker_init() { + local goal_tracker_file="$1" + local is_finalize_phase="$2" + local review_started="$3" + local current_round="$4" + local template_dir="$5" + + [[ "$is_finalize_phase" == "true" ]] && return 0 + [[ "$review_started" == "true" ]] && return 0 + [[ "$current_round" -ne 0 ]] && return 0 + [[ ! -f "$goal_tracker_file" ]] && return 0 + + local has_goal_placeholder=false + local has_ac_placeholder=false + local has_tasks_placeholder=false + + local goal_section=$(awk '/^### Ultimate Goal/{found=1; next} /^##/{found=0} found' "$goal_tracker_file" 2>/dev/null) + echo "$goal_section" | grep -qE '\[To be [a-z]' && has_goal_placeholder=true + + local ac_section=$(awk '/^### Acceptance Criteria/{found=1; next} /^##/{found=0} found' "$goal_tracker_file" 2>/dev/null) + echo "$ac_section" | grep -qE '\[To be [a-z]' && has_ac_placeholder=true + + local tasks_section=$(awk '/^#### Active Tasks/{found=1; next} /^##/{found=0} found' "$goal_tracker_file" 2>/dev/null) + echo "$tasks_section" | grep -qE '\[To be [a-z]' && has_tasks_placeholder=true + + local missing_items="" + [[ "$has_goal_placeholder" == "true" ]] && missing_items="$missing_items +- **Ultimate Goal**: Still contains placeholder text" + [[ "$has_ac_placeholder" == "true" ]] && missing_items="$missing_items +- **Acceptance Criteria**: Still contains placeholder text" + [[ "$has_tasks_placeholder" == "true" ]] && missing_items="$missing_items +- **Active Tasks**: Still contains placeholder text" + + if [[ -n "$missing_items" ]]; then + FALLBACK="# Goal Tracker Not Initialized + +Please fill in the Goal Tracker ({{GOAL_TRACKER_FILE}}): +{{MISSING_ITEMS}}" + REASON=$(load_and_render_safe "$template_dir" "block/goal-tracker-not-initialized.md" "$FALLBACK" \ + "GOAL_TRACKER_FILE=$goal_tracker_file" \ + "MISSING_ITEMS=$missing_items") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Goal Tracker not initialized in Round 0" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} diff --git a/hooks/lib/loop-codex-handlers.sh b/hooks/lib/loop-codex-handlers.sh new file mode 100644 index 00000000..9d6a9030 --- /dev/null +++ b/hooks/lib/loop-codex-handlers.sh @@ -0,0 +1,373 @@ +#!/usr/bin/env bash +# +# Phase Handler Functions +# +# Manages different loop phases (finalize, review, etc.) and blocking conditions. + +set -euo pipefail + +# Enter finalize phase with appropriate prompt +# Arguments: $1=skip_reason (empty if not skipped), $2=system_message +enter_finalize_phase() { + local skip_reason="$1" + local system_msg="$2" + + mv "$STATE_FILE" "$LOOP_DIR/finalize-state.md" + echo "State file renamed to: $LOOP_DIR/finalize-state.md" >&2 + + local finalize_summary_file="$LOOP_DIR/finalize-summary.md" + local finalize_prompt + + if [[ -n "$skip_reason" ]]; then + local fallback="# Finalize Phase (Review Skipped) + +**Warning**: Code review was skipped due to: {{REVIEW_SKIP_REASON}} + +The implementation could not be fully validated. You are now in the **Finalize Phase**. + +## Important Notice +Since the code review was skipped, please manually verify your changes before finalizing: +1. Review your code changes for any obvious issues +2. Run any available tests to verify correctness +3. Check for common code quality issues + +## Simplification (Optional) +If time permits, use the \`code-simplifier:code-simplifier\` agent via the Task tool to simplify and refactor your code. Focus more on changes between branch from {{BASE_BRANCH}} to {{START_BRANCH}}. + +## Constraints +- Must NOT change existing functionality +- Must NOT fail existing tests +- Must NOT introduce new bugs +- Only perform functionality-equivalent code refactoring and simplification + +## Before Exiting +1. Complete all todos +2. Commit your changes +3. Write your finalize summary to: {{FINALIZE_SUMMARY_FILE}}" + + finalize_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/finalize-phase-skipped-prompt.md" "$fallback" \ + "FINALIZE_SUMMARY_FILE=$finalize_summary_file" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "REVIEW_SKIP_REASON=$skip_reason" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "START_BRANCH=$START_BRANCH") + else + local fallback="# Finalize Phase + +Codex review has passed. The implementation is complete. + +You are now in the **Finalize Phase**. Use the \`code-simplifier:code-simplifier\` agent via the Task tool to simplify and refactor your code. + +## Constraints +- Must NOT change existing functionality +- Must NOT fail existing tests +- Must NOT introduce new bugs +- Only perform functionality-equivalent code refactoring and simplification + +## Focus +Focus on the code changes made during this RLCR session. Focus more on changes between branch from {{BASE_BRANCH}} to {{START_BRANCH}}. + +## Before Exiting +1. Complete all todos +2. Commit your changes +3. Write your finalize summary to: {{FINALIZE_SUMMARY_FILE}}" + + finalize_prompt=$(load_and_render_safe "$TEMPLATE_DIR" "claude/finalize-phase-prompt.md" "$fallback" \ + "FINALIZE_SUMMARY_FILE=$finalize_summary_file" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "START_BRANCH=$START_BRANCH") + fi + + jq -n \ + --arg reason "$finalize_prompt" \ + --arg msg "$system_msg" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Append task tag routing reminder to follow-up prompts +# Arguments: $1=prompt_file_path +append_task_tag_routing_note() { + local prompt_file="$1" + + cat >> "$prompt_file" << 'ROUTING_EOF' + +## Task Tag Routing Reminder + +Follow the plan's per-task routing tags strictly: +- `coding` task -> Claude executes directly +- `analyze` task -> execute via `/humanize:ask-codex`, then integrate the result +- Keep Goal Tracker Active Tasks columns `Tag` and `Owner` aligned with execution +ROUTING_EOF +} + +# Stop the loop when mainline progress has stalled for too many consecutive rounds +# Arguments: $1=stall_count, $2=last_verdict +stop_for_mainline_drift() { + local stall_count="$1" + local last_verdict="$2" + + upsert_state_fields "$STATE_FILE" \ + "${FIELD_MAINLINE_STALL_COUNT}=${stall_count}" \ + "${FIELD_LAST_MAINLINE_VERDICT}=${last_verdict}" \ + "${FIELD_DRIFT_STATUS}=${DRIFT_STATUS_REPLAN_REQUIRED}" + + local fallback="# Mainline Drift Circuit Breaker + +The RLCR loop has been stopped because the mainline failed to advance for {{STALL_COUNT}} consecutive implementation rounds. + +- Last mainline verdict: {{LAST_VERDICT}} +- Drift status: replan_required + +This loop should not continue automatically. Revisit the original plan, recover the round contract, and restart with a narrower mainline objective." + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-drift-stop.md" "$fallback" \ + "STALL_COUNT=$stall_count" \ + "LAST_VERDICT=$last_verdict" \ + "PLAN_FILE=$PLAN_FILE") + + end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_STOP" + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Stopped - mainline drift circuit breaker triggered" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when implementation review output omits the required mainline verdict +# Arguments: $1=review_result_file, $2=review_prompt_file +block_missing_mainline_verdict() { + local review_result_file="$1" + local review_prompt_file="$2" + + local fallback="# Mainline Verdict Missing + +The implementation review output is missing the required line: + +\`Mainline Progress Verdict: ADVANCED / STALLED / REGRESSED\` + +Humanize cannot safely update drift state or choose the correct next-round prompt without this verdict. + +Retry the exit so Codex reruns the implementation review. + +Files: +- Review result: {{REVIEW_RESULT_FILE}} +- Review prompt: {{REVIEW_PROMPT_FILE}}" + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/mainline-verdict-missing.md" "$fallback" \ + "REVIEW_RESULT_FILE=$review_result_file" \ + "REVIEW_PROMPT_FILE=$review_prompt_file") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - implementation review missing Mainline Progress Verdict" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Continue review loop when issues are found +# Arguments: $1=round_number, $2=review_content +continue_review_loop_with_issues() { + local round="$1" + local review_content="$2" + + echo "Code review found issues. Continuing review loop..." >&2 + + local temp_file="${STATE_FILE}.tmp.$$" + sed "s/^current_round: .*/current_round: $round/" "$STATE_FILE" > "$temp_file" + mv "$temp_file" "$STATE_FILE" + + local next_prompt_file="$LOOP_DIR/round-${round}-prompt.md" + local next_summary_file="$LOOP_DIR/round-${round}-summary.md" + if [[ ! -f "$next_summary_file" ]]; then + cat > "$next_summary_file" << EOF +# Review Round $round Summary + +## Work Completed +- [Describe what was implemented in this phase] + +## Files Changed +- [List created/modified files] + +## Validation +- [List tests/commands run and outcomes] + +## Remaining Items +- [List unresolved items, if any] + +## BitLesson Delta +- Action: none|add|update +- Lesson ID(s): NONE +- Notes: [what changed and why] +EOF + fi + local next_contract_file="$LOOP_DIR/round-${round}-contract.md" + + local fallback="# Code Review Findings + +You are in the **Review Phase** of the RLCR loop. Codex has performed a code review and found issues. + +## Review Results + +{{REVIEW_CONTENT}} + +## Instructions + +1. Re-anchor on the original plan and current goal tracker before changing code +2. Refresh the round contract at {{ROUND_CONTRACT_FILE}} +3. Address only the issues that are truly blocking the current mainline objective or code-review acceptance +4. Record non-blocking follow-up items as queued, not as the main goal +5. Commit your changes after fixing the issues +6. Write your summary to: {{SUMMARY_FILE}}" + + load_and_render_safe "$TEMPLATE_DIR" "claude/review-phase-prompt.md" "$fallback" \ + "REVIEW_CONTENT=$review_content" \ + "SUMMARY_FILE=$next_summary_file" \ + "BITLESSON_FILE=$BITLESSON_FILE" \ + "PLAN_FILE=$PLAN_FILE" \ + "GOAL_TRACKER_FILE=$GOAL_TRACKER_FILE" \ + "ROUND_CONTRACT_FILE=$next_contract_file" \ + "CURRENT_ROUND=$round" > "$next_prompt_file" + if [[ "$BITLESSON_REQUIRED" == "true" ]] && ! grep -q 'bitlesson-selector' "$next_prompt_file"; then + cat >> "$next_prompt_file" << EOF + +## BitLesson Selection (REQUIRED FOR EACH FIX TASK) + +Before implementing each fix task, you MUST: + +1. Read @$BITLESSON_FILE +2. Run \`bitlesson-selector\` for each fix task/sub-task to select relevant lesson IDs +3. Follow the selected lesson IDs (or \`NONE\`) during implementation + +Reference: @$BITLESSON_FILE +EOF + fi + append_task_tag_routing_note "$next_prompt_file" + + jq -n \ + --arg reason "$(cat "$next_prompt_file")" \ + --arg msg "Loop: Review Phase Round $round - Fix code review issues" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Block exit when codex review fails or produces no output +# Arguments: $1=round_number, $2=failure_reason, $3=exit_code (optional) +block_review_failure() { + local round="$1" + local failure_reason="$2" + local exit_code="${3:-unknown}" + + echo "ERROR: Codex review failed. Blocking exit and requiring retry." >&2 + + local stderr_content="" + local stderr_file="$CACHE_DIR/round-${round}-codex-review.log" + if [[ -f "$stderr_file" ]]; then + stderr_content=$(tail -50 "$stderr_file" 2>/dev/null || echo "(unable to read stderr)") + fi + + local fallback="# Codex Review Failed + +The code review could not be completed. This is a blocking error that requires retry. + +## Error Details + +**Reason**: {{FAILURE_REASON}} +**Round**: {{ROUND_NUMBER}} +**Base Branch**: {{BASE_BRANCH}} +**Exit Code**: {{EXIT_CODE}} + +## What Happened + +The \`codex review\` command failed to produce valid output. This can occur due to: +- Network connectivity issues +- Codex service timeout or unavailability +- Invalid review configuration +- Internal Codex errors + +## Required Action + +**You must retry the exit.** The review phase cannot be skipped - the loop must continue until code review passes with no \`[P0-9]\` issues found. + +Steps to retry: +1. Ensure your changes are committed +2. Write your summary to the expected file +3. Attempt to exit again + +If this error persists, consider canceling and restarting the loop: \`/humanize:cancel-rlcr-loop\` + +## Debug Information + +Stderr (last 50 lines): +\`\`\` +{{STDERR_CONTENT}} +\`\`\`" + + local reason + reason=$(load_and_render_safe "$TEMPLATE_DIR" "block/codex-review-failed.md" "$fallback" \ + "FAILURE_REASON=$failure_reason" \ + "ROUND_NUMBER=$round" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "EXIT_CODE=$exit_code" \ + "STDERR_CONTENT=$stderr_content" \ + "REVIEW_RESULT_FILE=$LOOP_DIR/round-${round}-review-result.md" \ + "CODEX_CMD_FILE=$CACHE_DIR/round-${round}-codex-review.cmd" \ + "CODEX_LOG_FILE=$CACHE_DIR/round-${round}-codex-review.log") + + jq -n \ + --arg reason "$reason" \ + --arg msg "Loop: Blocked - Codex review failed, retry required" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 +} + +# Helper function to print Codex failure and block exit for retry +codex_failure_exit() { + local error_type="$1" + local details="$2" + + REASON="# Codex Review Failed + +**Error Type:** $error_type + +$details + +**Debug files:** +- Command: $CODEX_CMD_FILE +- Stdout: $CODEX_STDOUT_FILE +- Stderr: $CODEX_STDERR_FILE + +Please retry or use \`/cancel-rlcr-loop\` to end the loop." + + cat <<EOF +{ + "decision": "block", + "reason": $(echo "$REASON" | jq -Rs .) +} +EOF + exit 0 +} diff --git a/hooks/lib/loop-codex-impl-phase.sh b/hooks/lib/loop-codex-impl-phase.sh new file mode 100644 index 00000000..64a5508d --- /dev/null +++ b/hooks/lib/loop-codex-impl-phase.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Implementation Phase Execution +# +# Handles Codex exec invocation for summary review in the implementation phase. +# Sets: CODEX_EXIT_CODE, CODEX_CMD_FILE, CODEX_STDOUT_FILE, CODEX_STDERR_FILE + +set -euo pipefail + +# Run codex exec for implementation phase summary review +# Arguments: (none - uses globals: CURRENT_ROUND, REVIEW_PROMPT_FILE, CACHE_DIR, CODEX_TIMEOUT, CODEX_DISABLE_HOOKS_ARGS, CODEX_EXEC_ARGS, PROJECT_ROOT) +# Sets: CODEX_EXIT_CODE, CODEX_CMD_FILE, CODEX_STDOUT_FILE, CODEX_STDERR_FILE +run_codex_impl_phase_review() { + CODEX_CMD_FILE="$CACHE_DIR/round-${CURRENT_ROUND}-codex-run.cmd" + CODEX_STDOUT_FILE="$CACHE_DIR/round-${CURRENT_ROUND}-codex-run.out" + CODEX_STDERR_FILE="$CACHE_DIR/round-${CURRENT_ROUND}-codex-run.log" + + # Save the command for debugging + CODEX_PROMPT_CONTENT=$(cat "$REVIEW_PROMPT_FILE") + { + echo "# Codex invocation debug info" + echo "# Timestamp: $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "# Working directory: $PROJECT_ROOT" + echo "# Timeout: $CODEX_TIMEOUT seconds" + echo "" + echo "codex exec ${CODEX_DISABLE_HOOKS_ARGS[*]+"${CODEX_DISABLE_HOOKS_ARGS[*]}"} ${CODEX_EXEC_ARGS[*]} \"<prompt>\"" + echo "" + echo "# Prompt content:" + echo "$CODEX_PROMPT_CONTENT" + } > "$CODEX_CMD_FILE" + + echo "Codex command saved to: $CODEX_CMD_FILE" >&2 + echo "Running summary review with timeout ${CODEX_TIMEOUT}s..." >&2 + + CODEX_EXIT_CODE=0 + printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec ${CODEX_DISABLE_HOOKS_ARGS[@]+"${CODEX_DISABLE_HOOKS_ARGS[@]}"} "${CODEX_EXEC_ARGS[@]}" - \ + > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? + + echo "Codex exit code: $CODEX_EXIT_CODE" >&2 + echo "Codex stdout saved to: $CODEX_STDOUT_FILE" >&2 + echo "Codex stderr saved to: $CODEX_STDERR_FILE" >&2 +} diff --git a/hooks/lib/loop-codex-quick-checks-runner.sh b/hooks/lib/loop-codex-quick-checks-runner.sh new file mode 100644 index 00000000..f20119cd --- /dev/null +++ b/hooks/lib/loop-codex-quick-checks-runner.sh @@ -0,0 +1,305 @@ +#!/usr/bin/env bash +# +# Quick Checks Runner for Stop Hook +# +# Extracted quick check execution logic from loop-codex-stop-hook.sh +# Runs all pre-Codex validation checks +# + +# Run all quick checks in sequence +# Returns: exits on failure, continues on success +run_all_quick_checks() { + local project_root="$1" + local state_file="$2" + + check_branch_consistency "$project_root" + check_plan_file_integrity "$project_root" "$state_file" + check_incomplete_tasks + cache_git_status_output "$project_root" + check_large_files "$project_root" +} + +# Quick Check: Branch Consistency +check_branch_consistency() { + local project_root="$1" + + CURRENT_BRANCH=$(run_with_timeout "$GIT_TIMEOUT" git -C "$project_root" rev-parse --abbrev-ref HEAD 2>/dev/null) || GIT_EXIT_CODE=$? + GIT_EXIT_CODE=${GIT_EXIT_CODE:-0} + if [[ $GIT_EXIT_CODE -ne 0 || -z "$CURRENT_BRANCH" ]]; then + REASON="Git operation failed or timed out. + +Cannot verify branch consistency. This may indicate: +- Git is not responding +- Repository is in an invalid state +- Network issues (if remote operations are involved) + +Please check git status manually and try again." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - git operation failed" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ -n "$START_BRANCH" && "$CURRENT_BRANCH" != "$START_BRANCH" ]]; then + REASON="Git branch changed during RLCR loop. + +Started on: $START_BRANCH +Current: $CURRENT_BRANCH + +Branch switching is not allowed. Switch back to $START_BRANCH or cancel the loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - branch changed" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Quick Check: Plan File Integrity +check_plan_file_integrity() { + local project_root="$1" + local state_file="$2" + + # Skip this check in Review Phase (review_started=true) + # In review phase, the plan file is no longer needed - only code review matters. + if [[ "$REVIEW_STARTED" == "true" ]]; then + echo "Review phase: skipping plan file integrity check (plan no longer needed)" >&2 + return + fi + + BACKUP_PLAN="$LOOP_DIR/plan.md" + FULL_PLAN_PATH="$project_root/$PLAN_FILE" + + # Check backup exists + if [[ ! -f "$BACKUP_PLAN" ]]; then + REASON="Plan file backup not found in loop directory. + +Please copy the plan file to the loop directory: + cp \"$FULL_PLAN_PATH\" \"$BACKUP_PLAN\" + +This backup is required for plan integrity verification." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan backup missing" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + # Check original plan file still exists + if [[ ! -f "$FULL_PLAN_PATH" ]]; then + REASON="Project plan file has been deleted. + +Original: $PLAN_FILE +Backup available at: $BACKUP_PLAN + +You can restore from backup if needed. Plan file modifications are not allowed during RLCR loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file deleted" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + # Check plan file integrity + # For tracked files: check both git status (uncommitted) AND content diff (committed changes) + if [[ "$PLAN_TRACKED" == "true" ]]; then + PLAN_GIT_STATUS=$(run_with_timeout "$GIT_TIMEOUT" git -C "$project_root" status --porcelain "$PLAN_FILE" 2>/dev/null || echo "") + if [[ -n "$PLAN_GIT_STATUS" ]]; then + REASON="Plan file has uncommitted modifications. + +File: $PLAN_FILE +Status: $PLAN_GIT_STATUS + +This RLCR loop was started with --track-plan-file. Plan file modifications are not allowed during the loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file modified (uncommitted)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + fi + + # Check content diff (plan.md may be a symlink to the original) + if ! diff -q "$FULL_PLAN_PATH" "$BACKUP_PLAN" &>/dev/null; then + FALLBACK="# Plan File Modified + +The plan file \`$PLAN_FILE\` has been modified since the RLCR loop started. + +**Modifying plan files is forbidden during an active RLCR loop.** + +If you need to change the plan: +1. Cancel the current loop: \`/humanize:cancel-rlcr-loop\` +2. Update the plan file +3. Start a new loop: \`/humanize:start-rlcr-loop $PLAN_FILE\` + +Backup available at: \`$BACKUP_PLAN\`" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/plan-file-modified.md" "$FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "BACKUP_PATH=$BACKUP_PLAN") + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file modified" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Quick Check: Incomplete Tasks +check_incomplete_tasks() { + local todo_checker="$SCRIPT_DIR/check-todos-from-transcript.py" + + if [[ ! -f "$todo_checker" ]]; then + return + fi + + # Pass hook input to the task checker + TODO_RESULT=$(echo "$HOOK_INPUT" | python3 "$todo_checker" 2>&1) || TODO_EXIT=$? + TODO_EXIT=${TODO_EXIT:-0} + + if [[ "$TODO_EXIT" -eq 2 ]]; then + # Parse error - block and surface the error + REASON="Task checker encountered a parse error. + +Error: $TODO_RESULT + +This may indicate an issue with the hook input or transcript format. +Please try again or cancel the loop if this persists." + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - task checker parse error" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + + if [[ "$TODO_EXIT" -eq 1 ]]; then + # Incomplete tasks found - block immediately without Codex review + INCOMPLETE_LIST=$(echo "$TODO_RESULT" | tail -n +2) + + FALLBACK="# Incomplete Tasks + +Complete these tasks before exiting: + +{{INCOMPLETE_LIST}}" + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/incomplete-todos.md" "$FALLBACK" \ + "INCOMPLETE_LIST=$INCOMPLETE_LIST") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - incomplete tasks detected, please finish all tasks first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} + +# Cache git status output for reuse +cache_git_status_output() { + local project_root="$1" + + GIT_STATUS_CACHED="" + GIT_IS_REPO=false + + if command -v git &>/dev/null && run_with_timeout "$GIT_TIMEOUT" git -C "$project_root" rev-parse --git-dir &>/dev/null 2>&1; then + GIT_IS_REPO=true + # Capture exit code to detect timeout/failure - do NOT use || echo "" which would fail-open + GIT_STATUS_EXIT=0 + GIT_STATUS_CACHED=$(run_with_timeout "$GIT_TIMEOUT" git -C "$project_root" status --porcelain 2>/dev/null) || GIT_STATUS_EXIT=$? + + if [[ $GIT_STATUS_EXIT -ne 0 ]]; then + # Git status failed or timed out - fail-closed by blocking exit + cleanup_stale_index_lock + FALLBACK="# Git Status Failed + +Git status operation failed or timed out (exit code {{GIT_STATUS_EXIT}}). + +Cannot verify repository state. Please check git status manually and try again." + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/git-status-failed.md" "$FALLBACK" \ + "GIT_STATUS_EXIT=$GIT_STATUS_EXIT") + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - git status failed (exit $GIT_STATUS_EXIT)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + fi +} + +# Quick Check: Large File Detection +check_large_files() { + local project_root="$1" + local max_lines=2000 + + if [[ "$GIT_IS_REPO" != "true" ]]; then + return + fi + + LARGE_FILES="" + + while IFS= read -r line; do + # Skip empty lines + if [ -z "$line" ]; then + continue + fi + + # Extract filename (skip first 3 chars: "XY ") + filename="${line#???}" + + # Handle renames: "old -> new" format + case "$filename" in + *" -> "*) filename="${filename##* -> }" ;; + esac + + # Resolve filename relative to PROJECT_ROOT + filename="$project_root/$filename" + + # Skip deleted files + if [ ! -f "$filename" ]; then + continue + fi + + # Get file extension and convert to lowercase + ext="${filename##*.}" + ext_lower=$(to_lower "$ext") + + # Determine file type based on extension + case "$ext_lower" in + py|js|ts|tsx|jsx|java|c|cpp|cc|cxx|h|hpp|cs|go|rs|rb|php|swift|kt|kts|scala|sh|bash|zsh) + file_type="code" + ;; + md|rst|txt|adoc|asciidoc) + file_type="documentation" + ;; + *) + continue + ;; + esac + + # Count lines and trim whitespace + line_count=$(wc -l < "$filename" 2>/dev/null | tr -d ' ') || continue + + # Validate line_count is numeric before comparison + [[ "$line_count" =~ ^[0-9]+$ ]] || continue + + if [ "$line_count" -gt "$max_lines" ]; then + LARGE_FILES="${LARGE_FILES} +- \`${filename}\`: ${line_count} lines (${file_type} file)" + fi + done <<< "$GIT_STATUS_CACHED" + + if [ -n "$LARGE_FILES" ]; then + FALLBACK="# Large Files Detected + +Files exceeding {{MAX_LINES}} lines: + +{{LARGE_FILES}} + +Split these into smaller modules before continuing." + REASON=$(load_and_render_safe "$TEMPLATE_DIR" "block/large-files.md" "$FALLBACK" \ + "MAX_LINES=$max_lines" \ + "LARGE_FILES=$LARGE_FILES") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - large files detected (>${max_lines} lines), please split into smaller modules" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} diff --git a/hooks/lib/loop-codex-review.sh b/hooks/lib/loop-codex-review.sh new file mode 100644 index 00000000..ae7c9f2d --- /dev/null +++ b/hooks/lib/loop-codex-review.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# +# Code Review Phase Functions +# +# Handles Codex code review execution and result processing. +# Calls: detect_review_issues (from loop-common.sh) +# enter_finalize_phase, continue_review_loop_with_issues, block_review_failure (from loop-codex-handlers.sh) + +set -euo pipefail + +# Run code review and save debug files +# Arguments: $1=round_number +# Sets: CODEX_REVIEW_EXIT_CODE, CODEX_REVIEW_LOG_FILE +# Returns: exit code from the configured review CLI +run_codex_code_review() { + local round="$1" + local timestamp + timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ) + + local review_base="${BASE_COMMIT:-$BASE_BRANCH}" + local review_base_type="branch" + if [[ -n "$BASE_COMMIT" ]]; then + review_base_type="commit" + fi + + CODEX_REVIEW_CMD_FILE="$CACHE_DIR/round-${round}-codex-review.cmd" + CODEX_REVIEW_LOG_FILE="$CACHE_DIR/round-${round}-codex-review.log" + local prompt_file="$LOOP_DIR/round-${round}-review-prompt.md" + + local prompt_fallback="# Code Review Phase - Round ${round} + +This file documents the code review invocation for audit purposes. +Provider: codex + +## Review Configuration +- Base Branch: ${BASE_BRANCH} +- Base Commit: ${BASE_COMMIT:-N/A} +- Review Base (${review_base_type}): ${review_base} +- Review Round: ${round} +- Timestamp: ${timestamp} +" + load_and_render_safe "$TEMPLATE_DIR" "codex/code-review-phase.md" "$prompt_fallback" \ + "REVIEW_ROUND=$round" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "BASE_COMMIT=${BASE_COMMIT:-N/A}" \ + "REVIEW_BASE=$review_base" \ + "REVIEW_BASE_TYPE=$review_base_type" \ + "TIMESTAMP=$timestamp" > "$prompt_file" + + echo "Code review prompt (audit) saved to: $prompt_file" >&2 + + { + echo "# Code review invocation debug info" + echo "# Timestamp: $timestamp" + echo "# Working directory: $PROJECT_ROOT" + echo "# Base branch: $BASE_BRANCH" + echo "# Base commit: ${BASE_COMMIT:-N/A}" + echo "# Review base ($review_base_type): $review_base" + echo "# Timeout: $CODEX_TIMEOUT seconds" + echo "" + echo "cat '$prompt_file' | codex review ${CODEX_DISABLE_HOOKS_ARGS[*]+"${CODEX_DISABLE_HOOKS_ARGS[*]}"} --base $review_base ${CODEX_REVIEW_ARGS[*]} -" + } > "$CODEX_REVIEW_CMD_FILE" + + echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 + echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 + + CODEX_REVIEW_EXIT_CODE=0 + (cd "$PROJECT_ROOT" && cat "$prompt_file" | run_with_timeout "$CODEX_TIMEOUT" codex review ${CODEX_DISABLE_HOOKS_ARGS[@]+"${CODEX_DISABLE_HOOKS_ARGS[@]}"} --base "$review_base" "${CODEX_REVIEW_ARGS[@]}" -) \ + > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? + + echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 + echo "Code review log saved to: $CODEX_REVIEW_LOG_FILE" >&2 + + return "$CODEX_REVIEW_EXIT_CODE" +} + +# Run code review and handle the result +# Arguments: $1=round_number, $2=success_system_message +# On success (no issues), calls enter_finalize_phase and exits +# On issues found, calls continue_review_loop_with_issues and exits +# On failure, calls block_review_failure and exits +run_and_handle_code_review() { + local round="$1" + local success_msg="$2" + + echo "Running codex review against base branch: $BASE_BRANCH..." >&2 + + if ! run_codex_code_review "$round"; then + block_review_failure "$round" "Codex review command failed" "$CODEX_REVIEW_EXIT_CODE" + fi + + local merged_content="" + local detect_exit=0 + merged_content=$(detect_review_issues "$round") || detect_exit=$? + + if [[ "$detect_exit" -eq 2 ]]; then + block_review_failure "$round" "Codex review produced no stdout output" "N/A" + elif [[ "$detect_exit" -eq 0 ]] && [[ -n "$merged_content" ]]; then + continue_review_loop_with_issues "$round" "$merged_content" + else + echo "Code review passed with no issues. Proceeding to finalize phase." >&2 + enter_finalize_phase "" "$success_msg" + fi +} diff --git a/hooks/lib/loop-codex-state-parser.sh b/hooks/lib/loop-codex-state-parser.sh new file mode 100644 index 00000000..4dce5c1f --- /dev/null +++ b/hooks/lib/loop-codex-state-parser.sh @@ -0,0 +1,197 @@ +#!/usr/bin/env bash +# +# State File Parser for Stop Hook +# +# Extracted state parsing and initial validation logic from loop-codex-stop-hook.sh +# Parses state.md, finalize-state.md, or methodology-analysis-state.md +# Exports all state variables for use by caller +# + +# Detect which phase we're in based on state file type +detect_loop_phase() { + local state_file="$1" + + IS_FINALIZE_PHASE=false + [[ "$state_file" == *"/finalize-state.md" ]] && IS_FINALIZE_PHASE=true + + IS_METHODOLOGY_ANALYSIS_PHASE=false + [[ "$state_file" == *"/methodology-analysis-state.md" ]] && IS_METHODOLOGY_ANALYSIS_PHASE=true +} + +# Parse state file and set all STATE_* variables +# Returns 0 on success, logs warnings on validation issues +parse_and_export_state() { + local state_file="$1" + + # Extract raw frontmatter to check which fields are actually present + # This prevents silently using defaults for missing critical fields + RAW_FRONTMATTER=$(sed -n '/^---$/,/^---$/{ /^---$/d; p; }' "$state_file" 2>/dev/null || echo "") + + # Check if critical fields are present before parsing (which applies defaults) + RAW_CURRENT_ROUND=$(echo "$RAW_FRONTMATTER" | grep "^current_round:" || true) + RAW_MAX_ITERATIONS=$(echo "$RAW_FRONTMATTER" | grep "^max_iterations:" || true) + RAW_FULL_REVIEW_ROUND=$(echo "$RAW_FRONTMATTER" | grep "^full_review_round:" || true) + RAW_BITLESSON_REQUIRED=$(echo "$RAW_FRONTMATTER" | grep "^bitlesson_required:" || true) + RAW_BITLESSON_FILE=$(echo "$RAW_FRONTMATTER" | grep "^bitlesson_file:" || true) + RAW_BITLESSON_ALLOW_EMPTY_NONE=$(echo "$RAW_FRONTMATTER" | grep "^bitlesson_allow_empty_none:" || true) + + # Use tolerant parsing to extract values + # Note: parse_state_file applies defaults for missing current_round/max_iterations + if ! parse_state_file "$state_file" 2>/dev/null; then + echo "Warning: parse_state_file returned non-zero, proceeding to schema validation" >&2 + fi + + # Map STATE_* variables to local names for backward compatibility + PLAN_TRACKED="$STATE_PLAN_TRACKED" + START_BRANCH="$STATE_START_BRANCH" + BASE_BRANCH="${STATE_BASE_BRANCH:-}" + BASE_COMMIT="${STATE_BASE_COMMIT:-}" + PLAN_FILE="$STATE_PLAN_FILE" + CURRENT_ROUND="$STATE_CURRENT_ROUND" + MAX_ITERATIONS="$STATE_MAX_ITERATIONS" + PUSH_EVERY_ROUND="$STATE_PUSH_EVERY_ROUND" + FULL_REVIEW_ROUND="${STATE_FULL_REVIEW_ROUND:-5}" + REVIEW_STARTED="$STATE_REVIEW_STARTED" + CODEX_EXEC_MODEL="${STATE_CODEX_MODEL:-$DEFAULT_CODEX_MODEL}" + CODEX_EXEC_EFFORT="${STATE_CODEX_EFFORT:-$DEFAULT_CODEX_EFFORT}" + CODEX_REVIEW_MODEL="$CODEX_EXEC_MODEL" + CODEX_REVIEW_EFFORT="high" + CODEX_TIMEOUT="${STATE_CODEX_TIMEOUT:-${CODEX_TIMEOUT:-$DEFAULT_CODEX_TIMEOUT}}" + ASK_CODEX_QUESTION="${STATE_ASK_CODEX_QUESTION:-false}" + AGENT_TEAMS="${STATE_AGENT_TEAMS:-false}" + PRIVACY_MODE="${STATE_PRIVACY_MODE:-true}" + BITLESSON_REQUIRED="false" + if [[ -n "$RAW_BITLESSON_REQUIRED" ]]; then + BITLESSON_REQUIRED=$(echo "$RAW_BITLESSON_REQUIRED" | sed 's/^bitlesson_required:[[:space:]]*//' | tr -d ' "') + fi + BITLESSON_FILE_REL=".humanize/bitlesson.md" + if [[ -n "$RAW_BITLESSON_FILE" ]]; then + BITLESSON_FILE_REL=$(echo "$RAW_BITLESSON_FILE" | sed 's/^bitlesson_file:[[:space:]]*//' | sed 's/^"//; s/"$//') + fi + if [[ -z "$BITLESSON_FILE_REL" ]] || \ + [[ ! "$BITLESSON_FILE_REL" =~ ^[a-zA-Z0-9._/-]+$ ]] || \ + [[ "$BITLESSON_FILE_REL" = /* ]] || \ + [[ "$BITLESSON_FILE_REL" =~ (^|/)\.\.(/|$) ]]; then + BITLESSON_FILE_REL=".humanize/bitlesson.md" + fi + BITLESSON_FILE="$PROJECT_ROOT/$BITLESSON_FILE_REL" + BITLESSON_ALLOW_EMPTY_NONE="true" + if [[ -n "$RAW_BITLESSON_ALLOW_EMPTY_NONE" ]]; then + BITLESSON_ALLOW_EMPTY_NONE=$(echo "$RAW_BITLESSON_ALLOW_EMPTY_NONE" | sed 's/^bitlesson_allow_empty_none:[[:space:]]*//' | tr -d ' "') + fi + if [[ "${HUMANIZE_ALLOW_EMPTY_BITLESSON_NONE:-}" == "true" ]]; then + BITLESSON_ALLOW_EMPTY_NONE="true" + fi + if [[ "$BITLESSON_ALLOW_EMPTY_NONE" != "true" && "$BITLESSON_ALLOW_EMPTY_NONE" != "false" ]]; then + BITLESSON_ALLOW_EMPTY_NONE="true" + fi + MAINLINE_STALL_COUNT="${STATE_MAINLINE_STALL_COUNT:-0}" + LAST_MAINLINE_VERDICT="${STATE_LAST_MAINLINE_VERDICT:-$MAINLINE_VERDICT_UNKNOWN}" + DRIFT_STATUS="${STATE_DRIFT_STATUS:-$DRIFT_STATUS_NORMAL}" + + # Re-validate Codex Model and Effort for YAML safety (in case state.md was manually edited) + # Use same validation patterns as setup-rlcr-loop.sh + if [[ ! "$CODEX_EXEC_MODEL" =~ ^[a-zA-Z0-9._-]+$ ]]; then + echo "Error: Invalid codex_model in state file: $CODEX_EXEC_MODEL" >&2 + end_loop "$LOOP_DIR" "$state_file" "$EXIT_UNEXPECTED" + exit 0 + fi + if [[ ! "$CODEX_EXEC_EFFORT" =~ ^(xhigh|high|medium|low)$ ]]; then + echo "Error: Invalid codex effort in state file: $CODEX_EXEC_EFFORT" >&2 + echo " Must be one of: xhigh, high, medium, low" >&2 + end_loop "$LOOP_DIR" "$state_file" "$EXIT_UNEXPECTED" + exit 0 + fi + + # Validate critical fields were actually present (not just defaulted) + # This prevents silently treating a truncated state file as round 0 + if [[ -z "$RAW_CURRENT_ROUND" ]]; then + echo "Error: State file missing required field: current_round" >&2 + echo " State file may be truncated or corrupted" >&2 + end_loop "$LOOP_DIR" "$state_file" "$EXIT_UNEXPECTED" + exit 0 + fi + if [[ -z "$RAW_MAX_ITERATIONS" ]]; then + echo "Error: State file missing required field: max_iterations" >&2 + echo " State file may be truncated or corrupted" >&2 + end_loop "$LOOP_DIR" "$state_file" "$EXIT_UNEXPECTED" + exit 0 + fi + + # Validate numeric fields + if [[ ! "$CURRENT_ROUND" =~ ^[0-9]+$ ]]; then + echo "Warning: State file corrupted (current_round not numeric), stopping loop" >&2 + end_loop "$LOOP_DIR" "$state_file" "$EXIT_UNEXPECTED" + exit 0 + fi + + if [[ ! "$MAX_ITERATIONS" =~ ^[0-9]+$ ]]; then + echo "Warning: State file corrupted (max_iterations not numeric), using default" >&2 + MAX_ITERATIONS=42 + fi + + if [[ ! "$MAINLINE_STALL_COUNT" =~ ^[0-9]+$ ]]; then + echo "Warning: Invalid mainline_stall_count '$MAINLINE_STALL_COUNT', defaulting to 0" >&2 + MAINLINE_STALL_COUNT=0 + fi + LAST_MAINLINE_VERDICT=$(normalize_mainline_progress_verdict "$LAST_MAINLINE_VERDICT") + DRIFT_STATUS=$(normalize_drift_status "$DRIFT_STATUS") +} + +# Validate schema for v1.1.2+ fields +validate_state_schema_v1_1_2() { + if [[ -z "$PLAN_TRACKED" || -z "$START_BRANCH" ]]; then + REASON="RLCR loop state file is missing required fields (plan_tracked or start_branch). + +This indicates the loop was started with an older version of humanize. + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.1.2+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Validate schema for v1.5.0+ fields (review_started and base_branch) +validate_state_schema_v1_5_0() { + if [[ -z "$REVIEW_STARTED" || ( "$REVIEW_STARTED" != "true" && "$REVIEW_STARTED" != "false" ) ]]; then + REASON="RLCR loop state file is missing or has invalid review_started field. + +This indicates the loop was started with an older version of humanize (pre-1.5.0). + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.5.0+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated (missing review_started)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ -z "$BASE_BRANCH" ]]; then + REASON="RLCR loop state file is missing base_branch field. + +This indicates the loop was started with an older version of humanize (pre-1.5.0). + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.5.0+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated (missing base_branch)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Warn about missing v1.5.2+ fields (non-blocking) +validate_state_schema_v1_5_2() { + if [[ -z "$RAW_FULL_REVIEW_ROUND" ]]; then + echo "Note: State file missing full_review_round field (introduced in v1.5.2)." >&2 + echo " Using default value: 5 (Full Alignment Checks at rounds 4, 9, 14, ...)" >&2 + echo " To use configurable Full Alignment Check intervals, upgrade to humanize v1.5.2+" >&2 + echo " and restart the RLCR loop with --full-review-round <N> option." >&2 + fi +} diff --git a/hooks/lib/loop-codex-stop-hook-helpers.sh b/hooks/lib/loop-codex-stop-hook-helpers.sh new file mode 100644 index 00000000..0169923d --- /dev/null +++ b/hooks/lib/loop-codex-stop-hook-helpers.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# +# Stop Hook Helper Functions +# +# Utility and code review execution functions for the stop hook. +# Complements loop-codex-handlers.sh (phase handlers) with helper functions. + +set -euo pipefail + +# Helper: Clean Up Stale index.lock +# git status (and other git commands) temporarily create .git/index.lock +# while refreshing the index. If a git process is killed mid-operation +# (e.g., by a timeout wrapper), the lock file can be left behind, +# causing subsequent git add/commit to fail with: +# fatal: Unable to create '.git/index.lock': File exists. +# This helper removes the stale lock so Claude's commit won't fail. +cleanup_stale_index_lock() { + local project_root="${1:-$PROJECT_ROOT}" + local git_dir + git_dir=$(git -C "$project_root" rev-parse --git-dir 2>/dev/null) || return 0 + # git rev-parse --git-dir may return a relative path; make it absolute. + if [[ "$git_dir" != /* ]]; then + git_dir="$project_root/$git_dir" + fi + if [[ -f "$git_dir/index.lock" ]]; then + echo "Removing stale $git_dir/index.lock" >&2 + rm -f "$git_dir/index.lock" + fi +} + +# Run Codex code review +# Arguments: $1=round_number +# Runs the codex review command and captures output/logs. +# Returns exit code from codex command. +run_codex_code_review() { + local round="$1" + local timestamp + timestamp=$(date -u +%Y-%m-%dT%H:%M:%SZ) + + # Determine review base: prefer BASE_COMMIT (captured at loop start) over BASE_BRANCH + # Using the fixed commit SHA prevents comparing a branch to itself when working on main, + # as the branch ref advances with each commit but the captured SHA stays fixed + local review_base="${BASE_COMMIT:-$BASE_BRANCH}" + local review_base_type="branch" + if [[ -n "$BASE_COMMIT" ]]; then + review_base_type="commit" + fi + + CODEX_REVIEW_CMD_FILE="$CACHE_DIR/round-${round}-codex-review.cmd" + CODEX_REVIEW_LOG_FILE="$CACHE_DIR/round-${round}-codex-review.log" + local prompt_file="$LOOP_DIR/round-${round}-review-prompt.md" + + # Create audit prompt file describing the code review invocation + local prompt_fallback="# Code Review Phase - Round ${round} + +This file documents the code review invocation for audit purposes. +Provider: codex + +## Review Configuration +- Base Branch: ${BASE_BRANCH} +- Base Commit: ${BASE_COMMIT:-N/A} +- Review Base (${review_base_type}): ${review_base} +- Review Round: ${round} +- Timestamp: ${timestamp} +" + load_and_render_safe "$TEMPLATE_DIR" "codex/code-review-phase.md" "$prompt_fallback" \ + "REVIEW_ROUND=$round" \ + "BASE_BRANCH=$BASE_BRANCH" \ + "BASE_COMMIT=${BASE_COMMIT:-N/A}" \ + "REVIEW_BASE=$review_base" \ + "REVIEW_BASE_TYPE=$review_base_type" \ + "TIMESTAMP=$timestamp" > "$prompt_file" + + echo "Code review prompt (audit) saved to: $prompt_file" >&2 + + { + echo "# Code review invocation debug info" + echo "# Timestamp: $timestamp" + echo "# Working directory: $PROJECT_ROOT" + echo "# Base branch: $BASE_BRANCH" + echo "# Base commit: ${BASE_COMMIT:-N/A}" + echo "# Review base ($review_base_type): $review_base" + echo "# Timeout: $CODEX_TIMEOUT seconds" + echo "" + echo "cat '$prompt_file' | codex review ${CODEX_DISABLE_HOOKS_ARGS[*]+"${CODEX_DISABLE_HOOKS_ARGS[*]}"} --base $review_base ${CODEX_REVIEW_ARGS[*]} -" + } > "$CODEX_REVIEW_CMD_FILE" + + echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 + echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 + + CODEX_REVIEW_EXIT_CODE=0 + (cd "$PROJECT_ROOT" && cat "$prompt_file" | run_with_timeout "$CODEX_TIMEOUT" codex review ${CODEX_DISABLE_HOOKS_ARGS[@]+"${CODEX_DISABLE_HOOKS_ARGS[@]}"} --base "$review_base" "${CODEX_REVIEW_ARGS[@]}" -) \ + > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? + + echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 + echo "Code review log saved to: $CODEX_REVIEW_LOG_FILE" >&2 + + return "$CODEX_REVIEW_EXIT_CODE" +} + +# Run code review and handle the result +# Arguments: $1=round_number, $2=success_system_message +# This function consolidates the common pattern of: +# 1. Running codex review (no prompt - uses --base only) +# 2. Checking results and handling outcomes +# On success (no issues), calls enter_finalize_phase and exits +# On issues found, calls continue_review_loop_with_issues and exits +# On failure, calls block_review_failure and exits +# +# Round numbering: After COMPLETE at round N, all review phase files use round N+1 +# The caller passes CURRENT_ROUND + 1 as the round_number parameter +run_and_handle_code_review() { + local round="$1" + local success_msg="$2" + + echo "Running codex review against base branch: $BASE_BRANCH..." >&2 + + # Run codex review using helper function + # IMPORTANT: Review failure is a blocking error - do NOT skip to finalize + if ! run_codex_code_review "$round"; then + block_review_failure "$round" "Codex review command failed" "$CODEX_REVIEW_EXIT_CODE" + fi + + # Check both stdout and result file for [P0-9] issues (plan requirement) + # detect_review_issues returns: 0=issues found, 1=no issues, 2=stdout missing (hard error) + local merged_content="" + local detect_exit=0 + merged_content=$(detect_review_issues "$round") || detect_exit=$? + + if [[ "$detect_exit" -eq 2 ]]; then + # Stdout missing/empty is a hard error - block and require retry + block_review_failure "$round" "Codex review produced no stdout output" "N/A" + elif [[ "$detect_exit" -eq 0 ]] && [[ -n "$merged_content" ]]; then + # Issues found - continue review loop + continue_review_loop_with_issues "$round" "$merged_content" + else + # No issues found (exit code 1) - proceed to finalize + echo "Code review passed with no issues. Proceeding to finalize phase." >&2 + enter_finalize_phase "" "$success_msg" + fi +} diff --git a/hooks/lib/loop-codex-validation-checks.sh b/hooks/lib/loop-codex-validation-checks.sh new file mode 100644 index 00000000..3abc1f81 --- /dev/null +++ b/hooks/lib/loop-codex-validation-checks.sh @@ -0,0 +1,358 @@ +#!/usr/bin/env bash +# +# Validation Checks for Stop Hook +# +# Extracted pre-check validation logic from loop-codex-stop-hook.sh +# Runs all validation gates before Codex review execution +# + +# Validate state file numeric fields +validate_state_file_integrity() { + local state_file="$1" + + if [[ ! "$CURRENT_ROUND" =~ ^[0-9]+$ ]]; then + echo "Warning: State file corrupted (current_round not numeric), stopping loop" >&2 + end_loop "$LOOP_DIR" "$STATE_FILE" "$EXIT_UNEXPECTED" + exit 0 + fi + + if [[ ! "$MAX_ITERATIONS" =~ ^[0-9]+$ ]]; then + echo "Warning: State file corrupted (max_iterations not numeric), using default" >&2 + MAX_ITERATIONS=42 + fi + + if [[ ! "$MAINLINE_STALL_COUNT" =~ ^[0-9]+$ ]]; then + echo "Warning: Invalid mainline_stall_count '$MAINLINE_STALL_COUNT', defaulting to 0" >&2 + MAINLINE_STALL_COUNT=0 + fi + LAST_MAINLINE_VERDICT=$(normalize_mainline_progress_verdict "$LAST_MAINLINE_VERDICT") + DRIFT_STATUS=$(normalize_drift_status "$DRIFT_STATUS") +} + +# Schema validation for v1.1.2+ fields +validate_schema_v1_1_2() { + if [[ -z "$PLAN_TRACKED" || -z "$START_BRANCH" ]]; then + REASON="RLCR loop state file is missing required fields (plan_tracked or start_branch). + +This indicates the loop was started with an older version of humanize. + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.1.2+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Schema validation for v1.5.0+ fields +validate_schema_v1_5_0() { + if [[ -z "$REVIEW_STARTED" || ( "$REVIEW_STARTED" != "true" && "$REVIEW_STARTED" != "false" ) ]]; then + REASON="RLCR loop state file is missing or has invalid review_started field. + +This indicates the loop was started with an older version of humanize (pre-1.5.0). + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.5.0+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated (missing review_started)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ -z "$BASE_BRANCH" ]]; then + REASON="RLCR loop state file is missing base_branch field. + +This indicates the loop was started with an older version of humanize (pre-1.5.0). + +**Options:** +1. Cancel the loop: \`/humanize:cancel-rlcr-loop\` +2. Update humanize plugin to version 1.5.0+ +3. Restart the RLCR loop with the updated plugin" + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - state schema outdated (missing base_branch)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Schema warning for v1.5.2+ fields (non-blocking) +validate_schema_v1_5_2() { + if [[ -z "$RAW_FULL_REVIEW_ROUND" ]]; then + echo "Note: State file missing full_review_round field (introduced in v1.5.2)." >&2 + echo " Using default value: 5 (Full Alignment Checks at rounds 4, 9, 14, ...)" >&2 + echo " To use configurable Full Alignment Check intervals, upgrade to humanize v1.5.2+" >&2 + echo " and restart the RLCR loop with --full-review-round <N> option." >&2 + fi +} + +# Validate branch consistency +validate_branch_consistency() { + local git_timeout="$1" + local project_root="$2" + + CURRENT_BRANCH=$(run_with_timeout "$git_timeout" git -C "$project_root" rev-parse --abbrev-ref HEAD 2>/dev/null) || GIT_EXIT_CODE=$? + GIT_EXIT_CODE=${GIT_EXIT_CODE:-0} + if [[ $GIT_EXIT_CODE -ne 0 || -z "$CURRENT_BRANCH" ]]; then + REASON="Git operation failed or timed out. + +Cannot verify branch consistency. This may indicate: +- Git is not responding +- Repository is in an invalid state +- Network issues (if remote operations are involved) + +Please check git status manually and try again." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - git operation failed" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ -n "$START_BRANCH" && "$CURRENT_BRANCH" != "$START_BRANCH" ]]; then + REASON="Git branch changed during RLCR loop. + +Started on: $START_BRANCH +Current: $CURRENT_BRANCH + +Branch switching is not allowed. Switch back to $START_BRANCH or cancel the loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - branch changed" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Validate plan file integrity +validate_plan_file_integrity() { + local git_timeout="$1" + local project_root="$2" + local template_dir="$3" + + if [[ "$REVIEW_STARTED" == "true" ]]; then + echo "Review phase: skipping plan file integrity check (plan no longer needed)" >&2 + return 0 + fi + + BACKUP_PLAN="$LOOP_DIR/plan.md" + FULL_PLAN_PATH="$project_root/$PLAN_FILE" + + if [[ ! -f "$BACKUP_PLAN" ]]; then + REASON="Plan file backup not found in loop directory. + +Please copy the plan file to the loop directory: + cp \"$FULL_PLAN_PATH\" \"$BACKUP_PLAN\" + +This backup is required for plan integrity verification." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan backup missing" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ ! -f "$FULL_PLAN_PATH" ]]; then + REASON="Project plan file has been deleted. + +Original: $PLAN_FILE +Backup available at: $BACKUP_PLAN + +You can restore from backup if needed. Plan file modifications are not allowed during RLCR loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file deleted" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + + if [[ "$PLAN_TRACKED" == "true" ]]; then + PLAN_GIT_STATUS=$(run_with_timeout "$git_timeout" git -C "$project_root" status --porcelain "$PLAN_FILE" 2>/dev/null || echo "") + if [[ -n "$PLAN_GIT_STATUS" ]]; then + REASON="Plan file has uncommitted modifications. + +File: $PLAN_FILE +Status: $PLAN_GIT_STATUS + +This RLCR loop was started with --track-plan-file. Plan file modifications are not allowed during the loop." + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file modified (uncommitted)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + fi + + if ! diff -q "$FULL_PLAN_PATH" "$BACKUP_PLAN" &>/dev/null; then + FALLBACK="# Plan File Modified + +The plan file \`$PLAN_FILE\` has been modified since the RLCR loop started. + +**Modifying plan files is forbidden during an active RLCR loop.** + +If you need to change the plan: +1. Cancel the current loop: \`/humanize:cancel-rlcr-loop\` +2. Update the plan file +3. Start a new loop: \`/humanize:start-rlcr-loop $PLAN_FILE\` + +Backup available at: \`$BACKUP_PLAN\`" + REASON=$(load_and_render_safe "$template_dir" "block/plan-file-modified.md" "$FALLBACK" \ + "PLAN_FILE=$PLAN_FILE" \ + "BACKUP_PATH=$BACKUP_PLAN") + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - plan file modified" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi +} + +# Check for incomplete tasks +check_incomplete_tasks() { + local script_dir="$1" + local template_dir="$2" + + TODO_CHECKER="$script_dir/check-todos-from-transcript.py" + + if [[ ! -f "$TODO_CHECKER" ]]; then + return 0 + fi + + TODO_RESULT=$(echo "$HOOK_INPUT" | python3 "$TODO_CHECKER" 2>&1) || TODO_EXIT=$? + TODO_EXIT=${TODO_EXIT:-0} + + if [[ "$TODO_EXIT" -eq 2 ]]; then + REASON="Task checker encountered a parse error. + +Error: $TODO_RESULT + +This may indicate an issue with the hook input or transcript format. +Please try again or cancel the loop if this persists." + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - task checker parse error" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi + + if [[ "$TODO_EXIT" -eq 1 ]]; then + INCOMPLETE_LIST=$(echo "$TODO_RESULT" | tail -n +2) + + FALLBACK="# Incomplete Tasks + +Complete these tasks before exiting: + +{{INCOMPLETE_LIST}}" + REASON=$(load_and_render_safe "$template_dir" "block/incomplete-todos.md" "$FALLBACK" \ + "INCOMPLETE_LIST=$INCOMPLETE_LIST") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - incomplete tasks detected, please finish all tasks first" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} + +# Cache git status output +cache_git_status() { + local git_timeout="$1" + local project_root="$2" + local template_dir="$3" + + GIT_STATUS_CACHED="" + GIT_IS_REPO=false + + if command -v git &>/dev/null && run_with_timeout "$git_timeout" git -C "$project_root" rev-parse --git-dir &>/dev/null 2>&1; then + GIT_IS_REPO=true + GIT_STATUS_EXIT=0 + GIT_STATUS_CACHED=$(run_with_timeout "$git_timeout" git -C "$project_root" status --porcelain 2>/dev/null) || GIT_STATUS_EXIT=$? + + if [[ $GIT_STATUS_EXIT -ne 0 ]]; then + cleanup_stale_index_lock + FALLBACK="# Git Status Failed + +Git status operation failed or timed out (exit code {{GIT_STATUS_EXIT}}). + +Cannot verify repository state. Please check git status manually and try again." + REASON=$(load_and_render_safe "$template_dir" "block/git-status-failed.md" "$FALLBACK" \ + "GIT_STATUS_EXIT=$GIT_STATUS_EXIT") + jq -n --arg reason "$REASON" --arg msg "Loop: Blocked - git status failed (exit $GIT_STATUS_EXIT)" \ + '{"decision": "block", "reason": $reason, "systemMessage": $msg}' + exit 0 + fi + fi +} + +# Detect large files +detect_large_files() { + local template_dir="$1" + + if [[ "$GIT_IS_REPO" != "true" ]]; then + return 0 + fi + + local MAX_LINES=2000 + local LARGE_FILES="" + + while IFS= read -r line; do + if [ -z "$line" ]; then + continue + fi + + filename="${line#???}" + case "$filename" in + *" -> "*) filename="${filename##* -> }" ;; + esac + + filename="$PROJECT_ROOT/$filename" + + if [ ! -f "$filename" ]; then + continue + fi + + ext="${filename##*.}" + ext_lower=$(to_lower "$ext") + + case "$ext_lower" in + py|js|ts|tsx|jsx|java|c|cpp|cc|cxx|h|hpp|cs|go|rs|rb|php|swift|kt|kts|scala|sh|bash|zsh) + file_type="code" + ;; + md|rst|txt|adoc|asciidoc) + file_type="documentation" + ;; + *) + continue + ;; + esac + + line_count=$(wc -l < "$filename" 2>/dev/null | tr -d ' ') || continue + + [[ "$line_count" =~ ^[0-9]+$ ]] || continue + + if [ "$line_count" -gt "$MAX_LINES" ]; then + LARGE_FILES="${LARGE_FILES} +- \`${filename}\`: ${line_count} lines (${file_type} file)" + fi + done <<< "$GIT_STATUS_CACHED" + + if [ -n "$LARGE_FILES" ]; then + FALLBACK="# Large Files Detected + +Files exceeding {{MAX_LINES}} lines: + +{{LARGE_FILES}} + +Split these into smaller modules before continuing." + REASON=$(load_and_render_safe "$template_dir" "block/large-files.md" "$FALLBACK" \ + "MAX_LINES=$MAX_LINES" \ + "LARGE_FILES=$LARGE_FILES") + + jq -n \ + --arg reason "$REASON" \ + --arg msg "Loop: Blocked - large files detected (>${MAX_LINES} lines), please split into smaller modules" \ + '{ + "decision": "block", + "reason": $reason, + "systemMessage": $msg + }' + exit 0 + fi +} diff --git a/hooks/lib/loop-codex-verdict.sh b/hooks/lib/loop-codex-verdict.sh new file mode 100644 index 00000000..0dd1cde7 --- /dev/null +++ b/hooks/lib/loop-codex-verdict.sh @@ -0,0 +1,174 @@ +#!/usr/bin/env bash +# +# Codex Result Handling and Verdict Extraction +# +# Validates Codex execution results, extracts mainline verdicts, and handles +# COMPLETE/STOP markers. Sets verdict-tracking variables for state updates. + +set -euo pipefail + +# Helper function to print Codex failure and block exit for retry +# Arguments: $1=error_type, $2=details +codex_failure_exit() { + local error_type="$1" + local details="$2" + + REASON="# Codex Review Failed + +**Error Type:** $error_type + +$details + +**Debug files:** +- Command: $CODEX_CMD_FILE +- Stdout: $CODEX_STDOUT_FILE +- Stderr: $CODEX_STDERR_FILE + +Please retry or use \`/cancel-rlcr-loop\` to end the loop." + + cat <<EOF +{ + "decision": "block", + "reason": $(echo "$REASON" | jq -Rs .) +} +EOF + exit 0 +} + +# Validate Codex execution results +# Arguments: (none - uses globals: CODEX_EXIT_CODE, CODEX_STDOUT_FILE, CODEX_STDERR_FILE, REVIEW_RESULT_FILE, CODEX_CMD_FILE) +# Returns: 0 on success, exits with block decision on failure +validate_codex_execution() { + # Check 1: Codex exit code indicates failure + if [[ "$CODEX_EXIT_CODE" -ne 0 ]]; then + STDERR_CONTENT="" + if [[ -f "$CODEX_STDERR_FILE" ]]; then + STDERR_CONTENT=$(tail -30 "$CODEX_STDERR_FILE" 2>/dev/null || echo "(unable to read stderr)") + fi + + codex_failure_exit "Non-zero exit code ($CODEX_EXIT_CODE)" \ +"Codex exited with code $CODEX_EXIT_CODE. +This may indicate: + - Invalid arguments or configuration + - Authentication failure + - Network issues + - Prompt format issues (e.g., multiline handling) + +Stderr output (last 30 lines): +$STDERR_CONTENT" + fi + + # Check if Codex created the review result file (it should write to workspace) + # If not, check if it wrote to stdout + if [[ ! -f "$REVIEW_RESULT_FILE" ]]; then + # Codex might have written output to stdout instead + if [[ -s "$CODEX_STDOUT_FILE" ]]; then + echo "Codex output found in stdout, copying to review result file..." >&2 + if ! cp "$CODEX_STDOUT_FILE" "$REVIEW_RESULT_FILE" 2>/dev/null; then + codex_failure_exit "Failed to copy stdout to review result file" \ +"Codex wrote output to stdout but copying to review file failed. +Source: $CODEX_STDOUT_FILE +Target: $REVIEW_RESULT_FILE + +This may indicate permission issues or disk space problems. +Check if the loop directory is writable." + fi + fi + fi + + # Check 2: Review result file still doesn't exist + if [[ ! -f "$REVIEW_RESULT_FILE" ]]; then + STDERR_CONTENT="" + if [[ -f "$CODEX_STDERR_FILE" ]]; then + STDERR_CONTENT=$(tail -30 "$CODEX_STDERR_FILE" 2>/dev/null || echo "(no stderr output)") + fi + + STDOUT_CONTENT="" + if [[ -f "$CODEX_STDOUT_FILE" ]]; then + STDOUT_CONTENT=$(tail -30 "$CODEX_STDOUT_FILE" 2>/dev/null || echo "(no stdout output)") + fi + + codex_failure_exit "Review result file not created" \ +"Expected file: $REVIEW_RESULT_FILE +Codex completed (exit code 0) but did not create the review result file. + +This may indicate: + - Codex did not understand the prompt + - Codex wrote to wrong path + - Workspace/permission issues + +Stdout (last 30 lines): +$STDOUT_CONTENT + +Stderr (last 30 lines): +$STDERR_CONTENT" + fi + + # Check 3: Review result file is empty + if [[ ! -s "$REVIEW_RESULT_FILE" ]]; then + codex_failure_exit "Review result file is empty" \ +"File exists but is empty: $REVIEW_RESULT_FILE +Codex created the file but wrote no content. + +This may indicate Codex encountered an internal error." + fi +} + +# Extract and process mainline verdict +# Arguments: (none - uses globals: REVIEW_CONTENT, REVIEW_STARTED, CURRENT_ROUND, MAX_ITERATIONS, BASE_BRANCH) +# Sets: LAST_LINE_TRIMMED, EXTRACTED_MAINLINE_VERDICT, NEXT_MAINLINE_STALL_COUNT, +# NEXT_LAST_MAINLINE_VERDICT, NEXT_DRIFT_STATUS, DRIFT_REPLAN_REQUIRED, MAINLINE_DRIFT_STOP +process_verdict() { + # Check if the last non-empty line is exactly "COMPLETE" or "STOP" + # The word must be on its own line to avoid false positives like "CANNOT COMPLETE" + # Use strict matching: only whitespace before/after the word is allowed + LAST_LINE=$(echo "$REVIEW_CONTENT" | grep -v '^[[:space:]]*$' | tail -1) + LAST_LINE_TRIMMED=$(echo "$LAST_LINE" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//') + + NEXT_MAINLINE_STALL_COUNT="$MAINLINE_STALL_COUNT" + NEXT_LAST_MAINLINE_VERDICT="$LAST_MAINLINE_VERDICT" + NEXT_DRIFT_STATUS="$DRIFT_STATUS" + DRIFT_REPLAN_REQUIRED=false + MAINLINE_DRIFT_STOP=false + + if [[ "$REVIEW_STARTED" != "true" ]]; then + EXTRACTED_MAINLINE_VERDICT=$(extract_mainline_progress_verdict "$REVIEW_CONTENT") + + if [[ "$LAST_LINE_TRIMMED" != "$MARKER_STOP" ]] && [[ "$EXTRACTED_MAINLINE_VERDICT" == "$MAINLINE_VERDICT_UNKNOWN" ]]; then + echo "Implementation review output is missing Mainline Progress Verdict. Blocking exit for safety." >&2 + block_missing_mainline_verdict "$REVIEW_RESULT_FILE" "$REVIEW_PROMPT_FILE" + fi + + case "$EXTRACTED_MAINLINE_VERDICT" in + "$MAINLINE_VERDICT_ADVANCED") + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + ;; + "$MAINLINE_VERDICT_STALLED"|"$MAINLINE_VERDICT_REGRESSED") + NEXT_MAINLINE_STALL_COUNT=$((MAINLINE_STALL_COUNT + 1)) + NEXT_LAST_MAINLINE_VERDICT="$EXTRACTED_MAINLINE_VERDICT" + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 2 ]]; then + NEXT_DRIFT_STATUS="$DRIFT_STATUS_REPLAN_REQUIRED" + DRIFT_REPLAN_REQUIRED=true + else + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + fi + if [[ "$NEXT_MAINLINE_STALL_COUNT" -ge 3 ]]; then + MAINLINE_DRIFT_STOP=true + fi + ;; + *) + : + ;; + esac + + if [[ "$LAST_LINE_TRIMMED" == "$MARKER_COMPLETE" ]]; then + NEXT_MAINLINE_STALL_COUNT=0 + NEXT_LAST_MAINLINE_VERDICT="$MAINLINE_VERDICT_ADVANCED" + NEXT_DRIFT_STATUS="$DRIFT_STATUS_NORMAL" + DRIFT_REPLAN_REQUIRED=false + MAINLINE_DRIFT_STOP=false + fi + fi +} diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 3e54e6ef..9e7d334e 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -379,7 +379,7 @@ find_active_loop() { fi local stored_session_id - stored_session_id=$(sed -n '/^---$/,/^---$/{ /^'"${FIELD_SESSION_ID}"':/{ s/'"${FIELD_SESSION_ID}"': *//; p; } }' "$any_state" 2>/dev/null | tr -d ' ') + stored_session_id=$(awk -v key="${FIELD_SESSION_ID}" 'BEGIN{f=0} /^---$/{f++; next} f==1 && $0 ~ "^"key":"{sub("^"key":[[:space:]]*",""); print; exit}' "$any_state" 2>/dev/null | tr -d ' ') # Empty stored session_id matches any session (backward compat). if [[ -z "$stored_session_id" ]] || [[ "$stored_session_id" == "$filter_session_id" ]]; then @@ -809,8 +809,8 @@ extract_round_number() { local filename_lower filename_lower=$(to_lower "$filename") - # Use sed for portable regex extraction (works in both bash and zsh) - echo "$filename_lower" | sed -n 's/.*round-\([0-9][0-9]*\)-\(summary\|prompt\|todos\|contract\)\.md$/\1/p' + # Use ERE (-E) so | alternation works on both GNU and BSD sed (macOS) + echo "$filename_lower" | sed -En 's/.*round-([0-9]+)-(summary|prompt|todos|contract)\.md$/\1/p' } # Check if a file is in the allowlist for the active loop @@ -820,6 +820,11 @@ is_allowlisted_file() { local file_path="$1" local active_loop_dir="$2" + # Canonicalize both paths to resolve symlinks (e.g. /var -> /private/var on macOS). + local canonical_file canonical_loop + canonical_file=$(canonicalize_path "$file_path" 2>/dev/null || echo "$file_path") + canonical_loop=$(canonicalize_path "$active_loop_dir" 2>/dev/null || echo "$active_loop_dir") + local allowlist=( "round-1-todos.md" "round-2-todos.md" @@ -828,7 +833,7 @@ is_allowlisted_file() { ) for allowed in "${allowlist[@]}"; do - if [[ "$file_path" == "$active_loop_dir/$allowed" ]]; then + if [[ "$canonical_file" == "$canonical_loop/$allowed" ]]; then return 0 fi done @@ -1522,7 +1527,7 @@ Use Write or Edit on: {{CORRECT_PATH}} Rules: - Keep the **IMMUTABLE SECTION** unchanged -- Do not modify `goal-tracker.md` via Bash +- Do not modify goal-tracker.md via Bash - Do not write to an old loop session's tracker" load_and_render_safe "$TEMPLATE_DIR" "block/goal-tracker-modification.md" "$fallback" \ diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index a95e81af..b2731d47 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -162,14 +162,10 @@ complete_methodology_analysis() { ;; esac - # Rename methodology-analysis-state.md to the terminal state - local target_name="${exit_reason}-state.md" - mv "$LOOP_DIR/methodology-analysis-state.md" "$LOOP_DIR/$target_name" - echo "Methodology analysis complete. State preserved as: $LOOP_DIR/$target_name" >&2 - - # Clean up marker file - rm -f "$LOOP_DIR/.methodology-exit-reason" - + # Validation complete. The caller (stop hook) is responsible for renaming + # methodology-analysis-state.md to the terminal state and cleaning up + # .methodology-exit-reason AFTER the git-clean gate passes, so the active + # state file remains in place until cleanliness is confirmed. return 0 } diff --git a/hooks/lib/project-root.sh b/hooks/lib/project-root.sh index cb23403a..6e602a0f 100644 --- a/hooks/lib/project-root.sh +++ b/hooks/lib/project-root.sh @@ -3,9 +3,19 @@ # Deterministic project-root resolver for all humanize hooks and scripts. # # Resolution priority: -# 1. CLAUDE_PROJECT_DIR (set by Claude Code, stable across `cd` within a session) -# 2. git rev-parse --show-toplevel (nearest enclosing repo) -# 3. Non-zero return. +# 1. linked git worktree toplevel when it differs from CLAUDE_PROJECT_DIR +# 2. CLAUDE_PROJECT_DIR (Claude session root) +# 3. git rev-parse --show-toplevel (nearest enclosing repo) +# 4. Non-zero return. +# +# CLAUDE_PROJECT_DIR is normally the authoritative session root. Hooks and +# helper scripts are often executed from the plugin checkout while targeting a +# different project, so blindly preferring the plugin repo's git toplevel makes +# active loop state and project config disappear. +# +# The exception is a linked git worktree: explore-idea workers can inherit the +# coordinator's CLAUDE_PROJECT_DIR while running inside their own worktree. In +# that case the current checkout is the safer root. # # pwd is intentionally NOT used as a fallback: it drifts with `cd` # invocations during a session and silently causes state.md lookups @@ -39,17 +49,30 @@ _HUMANIZE_PROJECT_ROOT_SOURCED=1 # } # resolve_project_root() { - local root="${CLAUDE_PROJECT_DIR:-}" - if [[ -z "$root" ]]; then - root="$(git rev-parse --show-toplevel 2>/dev/null || true)" + local env_root="${CLAUDE_PROJECT_DIR:-}" + local git_root="" + local root="" + + git_root="$(git rev-parse --show-toplevel 2>/dev/null || true)" + if [[ -n "$git_root" ]]; then + git_root="$(canonicalize_path "$git_root")" + fi + if [[ -n "$env_root" ]]; then + env_root="$(canonicalize_path "$env_root")" + fi + + if [[ -n "$git_root" && -n "$env_root" && "$git_root" != "$env_root" && -f "$git_root/.git" ]]; then + root="$git_root" + elif [[ -n "$env_root" ]]; then + root="$env_root" + else + root="$git_root" fi if [[ -z "$root" ]]; then return 1 fi - local canonical - canonical=$(canonicalize_path "$root") - printf '%s\n' "${canonical:-$root}" + printf '%s\n' "$root" } # canonicalize_path_prefix diff --git a/hooks/lib/template-loader.sh b/hooks/lib/template-loader.sh index 13d29f6e..5eef26f6 100644 --- a/hooks/lib/template-loader.sh +++ b/hooks/lib/template-loader.sh @@ -70,7 +70,7 @@ render_template() { # Scans for {{VAR}} patterns and replaces them with values from environment # Replaced content goes directly to output without re-scanning local awk_exit=0 - content=$(env "${env_vars[@]}" awk ' + content=$(env ${env_vars[@]+"${env_vars[@]}"} awk ' BEGIN { # Build lookup table from environment variables with TMPL_VAR_ prefix for (name in ENVIRON) { diff --git a/hooks/loop-bash-validator.sh b/hooks/loop-bash-validator.sh index ede35304..aa455353 100755 --- a/hooks/loop-bash-validator.sh +++ b/hooks/loop-bash-validator.sh @@ -559,9 +559,11 @@ fi # ======================================== if command_modifies_file "$COMMAND_LOWER" "round-[0-9]+-todos\.md"; then - # Require full path to active loop dir to prevent same-basename bypass from different roots + # Require full path to active loop dir to prevent same-basename bypass from different roots. + # Strip leading /private prefix so canonical paths (/private/var) match user paths (/var) on macOS. ACTIVE_LOOP_DIR_LOWER=$(to_lower "$ACTIVE_LOOP_DIR") - ACTIVE_LOOP_DIR_ESCAPED=$(echo "$ACTIVE_LOOP_DIR_LOWER" | sed 's/[\\.*^$[(){}+?|]/\\&/g') + ACTIVE_LOOP_DIR_LOWER_NORM="${ACTIVE_LOOP_DIR_LOWER#/private}" + ACTIVE_LOOP_DIR_ESCAPED=$(echo "$ACTIVE_LOOP_DIR_LOWER_NORM" | sed 's/[\\.*^$[(){}+?|]/\\&/g') if ! echo "$COMMAND_LOWER" | grep -qE "${ACTIVE_LOOP_DIR_ESCAPED}/round-[12]-todos\.md"; then todos_blocked_message "Bash" >&2 exit 2 diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 0c191d4c..da520a85 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -53,6 +53,13 @@ source "$PLUGIN_ROOT/scripts/portable-timeout.sh" # Source methodology analysis library source "$SCRIPT_DIR/lib/methodology-analysis.sh" +# Source validation gates library +source "$SCRIPT_DIR/lib/loop-codex-gates.sh" + +# Source phase handlers and stop hook helpers +source "$SCRIPT_DIR/lib/loop-codex-handlers.sh" +source "$SCRIPT_DIR/lib/loop-codex-stop-hook-helpers.sh" + # Default timeout for git operations (30 seconds) GIT_TIMEOUT=30 @@ -456,32 +463,6 @@ Complete these tasks before exiting: fi fi -# ======================================== -# Helper: Clean Up Stale index.lock -# ======================================== -# git status (and other git commands) temporarily create .git/index.lock -# while refreshing the index. If a git process is killed mid-operation -# (e.g., by a timeout wrapper), the lock file can be left behind, -# causing subsequent git add/commit to fail with: -# fatal: Unable to create '.git/index.lock': File exists. -# This helper removes the stale lock so Claude's commit won't fail. -cleanup_stale_index_lock() { - # Resolve the git dir relative to PROJECT_ROOT, not the hook's cwd, so - # that index.lock cleanup targets the correct repo even when the hook - # executes from a plugin/cache directory rather than the project root. - local project_root="${1:-$PROJECT_ROOT}" - local git_dir - git_dir=$(git -C "$project_root" rev-parse --git-dir 2>/dev/null) || return 0 - # git rev-parse --git-dir may return a relative path; make it absolute. - if [[ "$git_dir" != /* ]]; then - git_dir="$project_root/$git_dir" - fi - if [[ -f "$git_dir/index.lock" ]]; then - echo "Removing stale $git_dir/index.lock" >&2 - rm -f "$git_dir/index.lock" - fi -} - # ======================================== # Cache Git Status Output # ======================================== @@ -654,7 +635,14 @@ Please commit all changes before allowing the loop to exit. exit 0 fi fi - # Analysis complete and tree clean, allow exit + # Analysis complete and tree clean. Now do the terminal rename so the + # active state file stays in place until this cleanliness gate passes. + _meth_exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null | tr -d '[:space:]' || echo "") + if [[ -n "$_meth_exit_reason" ]]; then + mv "$LOOP_DIR/methodology-analysis-state.md" "$LOOP_DIR/${_meth_exit_reason}-state.md" 2>/dev/null || true + rm -f "$LOOP_DIR/.methodology-exit-reason" + echo "Methodology analysis complete. State preserved as: $LOOP_DIR/${_meth_exit_reason}-state.md" >&2 + fi exit 0 else # Analysis not yet complete, block @@ -1256,14 +1244,14 @@ Provider: codex echo "# Review base ($review_base_type): $review_base" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex review ${CODEX_DISABLE_HOOKS_ARGS[*]} --base $review_base ${CODEX_REVIEW_ARGS[*]}" + echo "cat '$prompt_file' | codex review ${CODEX_DISABLE_HOOKS_ARGS[*]+"${CODEX_DISABLE_HOOKS_ARGS[*]}"} --base $review_base ${CODEX_REVIEW_ARGS[*]} -" } > "$CODEX_REVIEW_CMD_FILE" echo "Code review command saved to: $CODEX_REVIEW_CMD_FILE" >&2 echo "Running codex review with timeout ${CODEX_TIMEOUT}s in $PROJECT_ROOT (base: $review_base)..." >&2 CODEX_REVIEW_EXIT_CODE=0 - (cd "$PROJECT_ROOT" && run_with_timeout "$CODEX_TIMEOUT" codex review "${CODEX_DISABLE_HOOKS_ARGS[@]}" --base "$review_base" "${CODEX_REVIEW_ARGS[@]}") \ + (cd "$PROJECT_ROOT" && cat "$prompt_file" | run_with_timeout "$CODEX_TIMEOUT" codex review ${CODEX_DISABLE_HOOKS_ARGS[@]+"${CODEX_DISABLE_HOOKS_ARGS[@]}"} --base "$review_base" "${CODEX_REVIEW_ARGS[@]}" -) \ > "$CODEX_REVIEW_LOG_FILE" 2>&1 || CODEX_REVIEW_EXIT_CODE=$? echo "Code review exit code: $CODEX_REVIEW_EXIT_CODE" >&2 @@ -1682,7 +1670,7 @@ CODEX_PROMPT_CONTENT=$(cat "$REVIEW_PROMPT_FILE") echo "# Working directory: $PROJECT_ROOT" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex exec ${CODEX_DISABLE_HOOKS_ARGS[*]} ${CODEX_EXEC_ARGS[*]} \"<prompt>\"" + echo "codex exec ${CODEX_DISABLE_HOOKS_ARGS[*]+"${CODEX_DISABLE_HOOKS_ARGS[*]}"} ${CODEX_EXEC_ARGS[*]} \"<prompt>\"" echo "" echo "# Prompt content:" echo "$CODEX_PROMPT_CONTENT" @@ -1692,7 +1680,7 @@ echo "Codex command saved to: $CODEX_CMD_FILE" >&2 echo "Running summary review with timeout ${CODEX_TIMEOUT}s..." >&2 CODEX_EXIT_CODE=0 -printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_DISABLE_HOOKS_ARGS[@]}" "${CODEX_EXEC_ARGS[@]}" - \ +printf '%s' "$CODEX_PROMPT_CONTENT" | run_with_timeout "$CODEX_TIMEOUT" codex exec ${CODEX_DISABLE_HOOKS_ARGS[@]+"${CODEX_DISABLE_HOOKS_ARGS[@]}"} "${CODEX_EXEC_ARGS[@]}" - \ > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? echo "Codex exit code: $CODEX_EXIT_CODE" >&2 diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index fb9f8e1b..6fb2cd19 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -203,8 +203,9 @@ fi if is_goal_tracker_path "$FILE_PATH_LOWER"; then GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" - NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") - NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + # Use canonicalize_path to resolve symlinks (e.g. /var -> /private/var on macOS) + NORMALIZED_FILE_PATH=$(canonicalize_path "$FILE_PATH" 2>/dev/null || _normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(canonicalize_path "$GOAL_TRACKER_PATH" 2>/dev/null || _normalize_path "$GOAL_TRACKER_PATH") if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 diff --git a/hooks/loop-post-bash-hook.sh b/hooks/loop-post-bash-hook.sh index 020fa877..82a4d2f7 100755 --- a/hooks/loop-post-bash-hook.sh +++ b/hooks/loop-post-bash-hook.sh @@ -26,50 +26,32 @@ set -euo pipefail # Read hook JSON input from stdin HOOK_INPUT=$(cat) -# Determine project root using the shared deterministic resolver. -# If neither CLAUDE_PROJECT_DIR nor a git toplevel is available, there -# is no active loop to patch - exit cleanly (pwd is NOT used as a -# fallback because it drifts with `cd` during a session). SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" source "$SCRIPT_DIR/lib/project-root.sh" -PROJECT_ROOT="$(resolve_project_root)" || exit 0 -# Check for pending session_id signal file -SIGNAL_FILE="$PROJECT_ROOT/.humanize/.pending-session-id" - -if [[ ! -f "$SIGNAL_FILE" ]]; then - # No pending session_id to record - this is the normal case - exit 0 -fi - -# Read the signal file contents -# Line 1: state file path -# Line 2: full resolved path of setup script (command signature) -STATE_FILE_PATH="" -COMMAND_SIGNATURE="" -{ - read -r STATE_FILE_PATH || true - read -r COMMAND_SIGNATURE || true -} < "$SIGNAL_FILE" - -if [[ -z "$STATE_FILE_PATH" ]] || [[ ! -f "$STATE_FILE_PATH" ]]; then - # Signal file is empty or points to non-existent state file - clean up - rm -f "$SIGNAL_FILE" - exit 0 +HOOK_COMMAND="" +HOOK_CWD="" +if command -v jq >/dev/null 2>&1; then + HOOK_COMMAND=$(printf '%s' "$HOOK_INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "") + HOOK_CWD=$(printf '%s' "$HOOK_INPUT" | jq -r '.cwd // empty' 2>/dev/null || echo "") fi # Verify the Bash command is a real setup script invocation (not arbitrary text) # The command signature is the full resolved path of setup-rlcr-loop.sh. # We require the command to START with this path (quoted or unquoted), # preventing false positives like 'echo setup-rlcr-loop.sh' from consuming the signal. -if [[ -n "$COMMAND_SIGNATURE" ]]; then - HOOK_COMMAND="" - if command -v jq >/dev/null 2>&1; then - HOOK_COMMAND=$(printf '%s' "$HOOK_INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null || echo "") +matches_setup_command_signature() { + local hook_command="$1" + local command_signature="$2" + + # Older signal files did not include a command signature. Preserve the + # previous behavior for those files. + if [[ -z "$command_signature" ]]; then + return 0 fi - if [[ -z "$HOOK_COMMAND" ]]; then - exit 0 + if [[ -z "$hook_command" ]]; then + return 1 fi # Normalize consecutive slashes (e.g. "PolyArch//scripts" -> "PolyArch/scripts"). @@ -79,8 +61,8 @@ if [[ -n "$COMMAND_SIGNATURE" ]]; then # tool_input.command preserves the original string. Without normalization, # the string comparison below always fails and session_id is never written. # See: https://github.com/PolyArch/humanize/issues/67 - HOOK_COMMAND=$(printf '%s' "$HOOK_COMMAND" | tr -s '/') - COMMAND_SIGNATURE=$(printf '%s' "$COMMAND_SIGNATURE" | tr -s '/') + hook_command=$(printf '%s' "$hook_command" | tr -s '/') + command_signature=$(printf '%s' "$command_signature" | tr -s '/') # Boundary-aware match: command must be a valid setup invocation form. # Requires the script path to be followed by end-of-string or any POSIX @@ -93,17 +75,95 @@ if [[ -n "$COMMAND_SIGNATURE" ]]; then # /full/path/setup-rlcr-loop.sh (unquoted, no args) # Rejects: "/full/path/setup-rlcr-loop.sh"foo (no boundary after quote) # echo /full/path/setup-rlcr-loop.sh (does not start with path) - IS_SETUP="false" - if [[ "$HOOK_COMMAND" == "\"${COMMAND_SIGNATURE}\"" ]] || [[ "$HOOK_COMMAND" == "\"${COMMAND_SIGNATURE}\""[[:space:]]* ]]; then - IS_SETUP="true" - elif [[ "$HOOK_COMMAND" == "${COMMAND_SIGNATURE}" ]] || [[ "$HOOK_COMMAND" == "${COMMAND_SIGNATURE}"[[:space:]]* ]]; then - IS_SETUP="true" + if [[ "$hook_command" == "\"${command_signature}\"" ]] || [[ "$hook_command" == "\"${command_signature}\""[[:space:]]* ]]; then + return 0 + fi + if [[ "$hook_command" == "${command_signature}" ]] || [[ "$hook_command" == "${command_signature}"[[:space:]]* ]]; then + return 0 fi - if [[ "$IS_SETUP" != "true" ]]; then - # This Bash event is not from the setup script - do not consume signal - exit 0 + return 1 +} + +resolve_candidate_root() { + local candidate_dir="$1" + local git_root="" + + if [[ -z "$candidate_dir" || ! -d "$candidate_dir" ]]; then + return 1 + fi + + git_root=$(git -C "$candidate_dir" rev-parse --show-toplevel 2>/dev/null || true) + if [[ -n "$git_root" ]]; then + canonicalize_path "$git_root" + else + canonicalize_path "$candidate_dir" + fi +} + +try_select_signal_file() { + local candidate_dir="$1" + local candidate_root="" + local candidate_signal="" + local candidate_state="" + local candidate_signature="" + + candidate_root=$(resolve_candidate_root "$candidate_dir") || return 1 + candidate_signal="$candidate_root/.humanize/.pending-session-id" + if [[ ! -f "$candidate_signal" ]]; then + return 1 fi + + { + read -r candidate_state || true + read -r candidate_signature || true + } < "$candidate_signal" + + if matches_setup_command_signature "$HOOK_COMMAND" "$candidate_signature"; then + PROJECT_ROOT="$candidate_root" + SIGNAL_FILE="$candidate_signal" + return 0 + fi + + return 1 +} + +# Locate the pending signal in the project associated with this hook event, +# not merely the shell process cwd. This avoids stale signals from a previous +# `cd` target claiming or blocking the setup command. +PROJECT_ROOT="" +SIGNAL_FILE="" +try_select_signal_file "$HOOK_CWD" \ + || try_select_signal_file "${CLAUDE_PROJECT_DIR:-}" \ + || try_select_signal_file "$(pwd)" \ + || true + +if [[ -z "$SIGNAL_FILE" ]]; then + # No pending session_id to record - this is the normal case + exit 0 +fi + +# Read the signal file contents +# Line 1: state file path +# Line 2: full resolved path of setup script (command signature) +STATE_FILE_PATH="" +COMMAND_SIGNATURE="" +{ + read -r STATE_FILE_PATH || true + read -r COMMAND_SIGNATURE || true +} < "$SIGNAL_FILE" + +if [[ -z "$STATE_FILE_PATH" ]] || [[ ! -f "$STATE_FILE_PATH" ]]; then + # Signal file is empty or points to non-existent state file - clean up + rm -f "$SIGNAL_FILE" + exit 0 +fi + +# Re-check the selected signal before consuming it. Candidate selection above +# may have skipped stale signals from other roots, but this is the authorization gate. +if ! matches_setup_command_signature "$HOOK_COMMAND" "$COMMAND_SIGNATURE"; then + # This Bash event is not from the setup script - do not consume signal + exit 0 fi # Extract session_id from the hook JSON input diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 1d8f1e31..42c88257 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -252,8 +252,9 @@ fi if is_goal_tracker_path "$FILE_PATH_LOWER"; then GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" - NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") - NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") + # Use canonicalize_path to resolve symlinks (e.g. /var -> /private/var on macOS) + NORMALIZED_FILE_PATH=$(canonicalize_path "$FILE_PATH" 2>/dev/null || _normalize_path "$FILE_PATH") + NORMALIZED_GOAL_TRACKER_PATH=$(canonicalize_path "$GOAL_TRACKER_PATH" 2>/dev/null || _normalize_path "$GOAL_TRACKER_PATH") if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 diff --git a/prompt-template/explore/report-template.md b/prompt-template/explore/report-template.md new file mode 100644 index 00000000..d2dfdc37 --- /dev/null +++ b/prompt-template/explore/report-template.md @@ -0,0 +1,122 @@ +# explore-idea Run Report + +**Run ID:** <RUN_ID> +**Base Branch:** <BASE_BRANCH> +**Base Commit:** <BASE_COMMIT> +**Created At:** <CREATED_AT> + +--- + +## Summary + +<SUMMARY_PARAGRAPH> + +--- + +## Tier 1: Best Product Direction + +*Ranked by user value, strategic fit, original direction quality, evidence, and known risks. This ranking reflects the quality of the original idea directions, not prototype implementation success.* + +| Rank | Direction | Confidence | Key Evidence | Known Risks | +|------|-----------|------------|--------------|-------------| +<PRODUCT_DIRECTION_RANKING_ROWS> + +### Rationale + +<PRODUCT_DIRECTION_RATIONALE> + +--- + +## Tier 2: Most Implementation-Ready Prototype + +*Ranked by prototype outcome: task status, Codex verdict, test results, commit status, and iteration count.* + +| Rank | Direction | Status | Codex | Tests | Commits | Iterations | +|------|-----------|--------|-------|-------|---------|------------| +<IMPLEMENTATION_RANKING_ROWS> + +### Rationale + +<IMPLEMENTATION_RANKING_RATIONALE> + +--- + +## Worker Results + +<WORKER_RESULT_ENTRIES> + +--- + +## Adoption Paths + +### Continue Winner Branch + +To continue development on the top-ranked prototype: + +```bash +# Navigate to the winner's worktree +cd <WINNER_WORKTREE_PATH> + +# Branch: <WINNER_BRANCH_NAME> +# Commit: <WINNER_COMMIT_SHA> + +# Start RLCR loop from the prototype state +/humanize:start-rlcr-loop --skip-impl +``` + +### Restart From Plan + +Use the winning direction's approach summary as input to `/humanize:gen-plan`: + +```bash +/humanize:gen-plan --input <DRAFT_PATH> --output <plan-path> +``` + +### Cherry-Pick Prototype + +To cherry-pick specific commits from a prototype branch: + +```bash +git cherry-pick <COMMIT_SHA> +# Verify the base branch matches before cherry-picking. +``` + +### Discard Non-Adopted Prototypes + +Remove worktrees and branches for directions you are not adopting: + +```bash +<CLEANUP_COMMANDS> +``` + +--- + +## All Worker Details + +<ALL_WORKER_DETAILS> + +--- + +## Cleanup Reference + +All explore run artifacts are stored in: + +``` +.humanize/explore/<RUN_ID>/ + manifest.json — coordinator state and per-worker metadata + dispatch-prompts/ — exact prompts sent to each worker + worker-results.jsonl — machine-readable result rows + report.md — this report +``` + +To remove all local explore artifacts for this run: +```bash +# Remove worktrees +<ALL_WORKTREE_REMOVE_COMMANDS> + +# Remove branches +<ALL_BRANCH_DELETE_COMMANDS> + +# Remove run directory (optional, for cleanup) +# rm -rf ".humanize/explore/<RUN_ID>" +``` diff --git a/prompt-template/explore/worker-prompt.md b/prompt-template/explore/worker-prompt.md new file mode 100644 index 00000000..38d03b94 --- /dev/null +++ b/prompt-template/explore/worker-prompt.md @@ -0,0 +1,155 @@ +# explore-idea Worker: <DIRECTION_NAME> + +You are a prototype worker for the `/humanize:explore-idea` command. +Your job is to implement a scoped prototype for one idea direction, review it with Codex, commit the result locally, and emit a structured JSON result. + +## Run Context + +- Run ID: `<RUN_ID>` +- Direction ID: `<DIRECTION_ID>` +- Dir slug: `<DIR_SLUG>` +- Base branch: `<BASE_BRANCH>` +- Max iterations: `<MAX_WORKER_ITERATIONS>` +- Codex timeout: `<CODEX_TIMEOUT_MIN>` minutes + +## Your Direction + +**Name:** <DIRECTION_NAME> + +**Rationale:** <DIRECTION_RATIONALE> + +**Approach Summary:** +<APPROACH_SUMMARY> + +**Objective Evidence:** +<OBJECTIVE_EVIDENCE> + +**Known Risks:** +<KNOWN_RISKS> + +**Confidence:** <CONFIDENCE> + +**Original Idea:** +<ORIGINAL_IDEA> + +## Hard Constraints (MUST follow — no exceptions) + +1. **Stay in your worktree.** Only modify files inside your assigned worktree directory. Do not create, modify, or delete files outside it. +2. **No nested Skills or slash commands.** Do not invoke any `/humanize:*` commands, skills, or skill tool calls. +3. **No nested Agent or Task workers.** Do not spawn sub-agents or task workers. +4. **No git push.** Do not push any branch to any remote. +5. **No access to sibling worktrees.** Do not read from or write to other workers' directories. +6. **Use only `ask-codex.sh` for Codex calls.** No direct `codex` CLI invocations. +7. **Scope Codex calls to this worktree.** Set `export CLAUDE_PROJECT_DIR="$PWD"` before calling `ask-codex.sh`. +8. **Emit result sentinel last.** Your final action must be printing the JSON result between the sentinel markers. + +## Worker Loop (up to <MAX_WORKER_ITERATIONS> iterations) + +### Setup + +1. Verify you are in your worktree. Check that `git rev-parse --show-toplevel` returns a path that matches your assigned worktree (not the coordinator checkout). +2. Anchor to the validated base commit before creating the explore branch: + ```bash + # Do NOT run `git checkout <BASE_BRANCH>`: the coordinator worktree already + # has that branch checked out, and Git forbids two worktrees from checking + # out the same branch simultaneously. The worktree was created at BASE_COMMIT + # in detached HEAD state, so HEAD is already at the correct commit. + ACTUAL_COMMIT=$(git rev-parse HEAD) + if [[ "$ACTUAL_COMMIT" != "<BASE_COMMIT>" ]]; then + echo "HEAD mismatch: expected <BASE_COMMIT>, got $ACTUAL_COMMIT" >&2 + # emit failure result immediately — do not proceed + fi + git checkout -b "explore/<RUN_ID>/<DIR_SLUG>" + ``` + If HEAD does not match `<BASE_COMMIT>`, emit a failure result with `error: "base commit mismatch"` and stop. +3. Set the Codex project root to this worktree: + ```bash + export CLAUDE_PROJECT_DIR="$PWD" + ``` +4. Verify the root: confirm `scripts/ask-codex.sh` resolves the project root to `$PWD`. If the root points to a different directory (coordinator checkout mismatch), emit a failure result immediately without proceeding. + +### Per-Iteration Steps + +For each iteration (up to `<MAX_WORKER_ITERATIONS>`): + +1. **Explore** — read the relevant files for this direction. Understand the existing patterns. +2. **Implement** — make scoped prototype changes targeting this direction's approach. Keep changes minimal and focused. +3. **Test** — run targeted tests for the files you touched. Do NOT run the full test suite. Examples: + - New script in `scripts/lib/`: run any existing tests for that module (e.g., `bash tests/test-<module>.sh`), or write and run a focused test for the new file. + - New test file in `tests/`: run that specific test file (`bash tests/<your-test>.sh`). + - Modified command in `commands/`: run the corresponding structure test if one exists. + If no targeted test exists for the area you touched, write a minimal test and run it. + Record `tests_passed` and `tests_failed` counts from the targeted test run(s). +4. **Review with Codex**: + ```bash + export CLAUDE_PROJECT_DIR="$PWD" + bash "${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" \ + --codex-timeout $(( <CODEX_TIMEOUT_MIN> * 60 )) \ + --codex-model "gpt-5.4:xhigh" \ + "Review the prototype changes for the '<DIRECTION_NAME>' direction. Focus on: correctness, fit with existing patterns, and implementation completeness. Reply with LGTM if acceptable, or list specific required changes." + ``` +5. **Apply feedback** — if Codex listed required changes, apply them. If Codex replied LGTM or similar, record `codex_final_verdict: "lgtm"` and stop iterating. + +### Commit + +After the final iteration (or early stop on LGTM), if there are any changes: +```bash +git add -A +git commit -m "prototype: <DIRECTION_NAME> direction (<DIR_SLUG>)" +``` +Record the commit SHA and count. + +If there are no changes to commit, record `commit_status: "none"`. + +## Result Emission + +After completing the loop, print the following JSON object between the sentinel markers as your final output. Do not print anything after the end sentinel. + +``` +=== EXPLORE_RESULT_JSON_BEGIN === +{ + "schema_version": 1, + "run_id": "<RUN_ID>", + "direction_id": "<DIRECTION_ID>", + "dir_slug": "<DIR_SLUG>", + "task_status": "<success|partial|failed>", + "codex_final_verdict": "<lgtm|partial|failed|unavailable>", + "rounds_used": <N>, + "tests_passed": <N>, + "tests_failed": <N>, + "worktree_path": "<absolute path to this worktree>", + "branch_name": "explore/<RUN_ID>/<DIR_SLUG>", + "commit_sha": "<SHA or empty string>", + "commit_count": <N>, + "dirty_state": "<clean|dirty|unknown>", + "commit_status": "<committed|none|wip|failed>", + "summary_markdown": "<Markdown summary of what was implemented and key findings>", + "what_worked": ["<item>"], + "what_didnt": ["<item>"], + "bitlesson_action": "none", + "error": null +} +=== EXPLORE_RESULT_JSON_END === +``` + +**Status enum guidance:** +- `task_status`: + - `success` — prototype implemented, Codex LGTM, tests clean + - `partial` — prototype partially implemented or Codex had remaining issues + - `failed` — could not implement a meaningful prototype +- `codex_final_verdict`: + - `lgtm` — Codex explicitly approved + - `partial` — Codex approved with minor caveats + - `failed` — Codex found blocking issues not resolved + - `unavailable` — Codex call failed or was not reached +- `dirty_state`: + - `clean` — no uncommitted changes at result time + - `dirty` — uncommitted changes remain (WIP state) + - `unknown` — could not determine +- `commit_status`: + - `committed` — changes committed to branch + - `none` — no changes to commit + - `wip` — changes exist but not committed + - `failed` — commit attempted but failed + +If an unrecoverable error occurs before completing the loop, set `task_status: "failed"`, fill `error` with a description, and still emit the result sentinel. diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index 47ffeab0..9836cb43 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -143,8 +143,8 @@ while [[ $# -gt 0 ]]; do esac done -# Join question parts into a single string -QUESTION="${QUESTION_PARTS[*]}" +# Join question parts into a single string (use ${arr[*]+...} to avoid set -u crash on bash 3.2) +QUESTION="${QUESTION_PARTS[*]+"${QUESTION_PARTS[*]}"}" # ======================================== # Validate Prerequisites @@ -241,8 +241,26 @@ EOF # Build Codex Command # ======================================== +# Probe whether the installed Codex CLI supports --disable codex_hooks to prevent +# nested hook recursion when ask-codex.sh is called from inside a running loop. +# Cache the probe result in the skill directory to avoid repeated probes. +CODEX_DISABLE_HOOKS_ARGS=() +_CODEX_DISABLE_HOOKS_CACHE="$SKILL_DIR/.codex-disable-hooks-supported" +if [[ -f "$_CODEX_DISABLE_HOOKS_CACHE" ]]; then + [[ "$(cat "$_CODEX_DISABLE_HOOKS_CACHE")" == "yes" ]] && CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +else + CODEX_HELP_OUTPUT="$(codex --help </dev/null 2>&1 || true)" + if grep -q -- '--disable' <<< "$CODEX_HELP_OUTPUT"; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) + echo "yes" > "$_CODEX_DISABLE_HOOKS_CACHE" 2>/dev/null || true + else + echo "no" > "$_CODEX_DISABLE_HOOKS_CACHE" 2>/dev/null || true + fi +fi + # Build codex exec arguments (same pattern as loop-codex-stop-hook.sh) -CODEX_EXEC_ARGS=("-m" "$CODEX_MODEL") +# Use ${arr[@]+"${arr[@]}"} to safely expand possibly-empty arrays under set -u (bash 3.2 compat) +CODEX_EXEC_ARGS=(${CODEX_DISABLE_HOOKS_ARGS[@]+"${CODEX_DISABLE_HOOKS_ARGS[@]}"} "-m" "$CODEX_MODEL") if [[ -n "$CODEX_EFFORT" ]]; then CODEX_EXEC_ARGS+=("-c" "model_reasoning_effort=${CODEX_EFFORT}") fi diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index fd19a445..287bd593 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -12,18 +12,6 @@ source "$SCRIPT_DIR/lib/model-router.sh" source "$SCRIPT_DIR/../hooks/lib/project-root.sh" PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" -PROJECT_ROOT="$(resolve_project_root)" || { - echo "Error: Cannot determine project root." >&2 - echo " Set CLAUDE_PROJECT_DIR or run inside a git repository." >&2 - exit 1 -} -MERGED_CONFIG="$(load_merged_config "$PLUGIN_ROOT" "$PROJECT_ROOT")" -BITLESSON_MODEL="$(get_config_value "$MERGED_CONFIG" "bitlesson_model")" -BITLESSON_MODEL="${BITLESSON_MODEL:-haiku}" -CODEX_FALLBACK_MODEL="$(get_config_value "$MERGED_CONFIG" "codex_model")" -CODEX_FALLBACK_MODEL="${CODEX_FALLBACK_MODEL:-$DEFAULT_CODEX_MODEL}" -PROVIDER_MODE="$(get_config_value "$MERGED_CONFIG" "provider_mode")" -PROVIDER_MODE="${PROVIDER_MODE:-auto}" # Source portable timeout wrapper source "$SCRIPT_DIR/portable-timeout.sh" @@ -108,6 +96,28 @@ if ! printf '%s\n' "$BITLESSON_CONTENT" | grep -Eq '^[[:space:]]*##[[:space:]]+L exit 0 fi +# ======================================== +# Detect BitLesson Project Root (for config and -C) +# ======================================== + +BITLESSON_DIR="$(cd "$(dirname "$BITLESSON_FILE")" && pwd -P)" +if git -C "$BITLESSON_DIR" rev-parse --show-toplevel &>/dev/null; then + BITLESSON_PROJECT_ROOT="$(git -C "$BITLESSON_DIR" rev-parse --show-toplevel)" +elif [[ "$(basename "$BITLESSON_DIR")" == ".humanize" ]]; then + BITLESSON_PROJECT_ROOT="$(cd "$BITLESSON_DIR/.." && pwd -P)" +else + BITLESSON_PROJECT_ROOT="$BITLESSON_DIR" +fi +CODEX_PROJECT_ROOT="$BITLESSON_PROJECT_ROOT" + +MERGED_CONFIG="$(load_merged_config "$PLUGIN_ROOT" "$BITLESSON_PROJECT_ROOT")" +BITLESSON_MODEL="$(get_config_value "$MERGED_CONFIG" "bitlesson_model")" +BITLESSON_MODEL="${BITLESSON_MODEL:-haiku}" +CODEX_FALLBACK_MODEL="$(get_config_value "$MERGED_CONFIG" "codex_model")" +CODEX_FALLBACK_MODEL="${CODEX_FALLBACK_MODEL:-$DEFAULT_CODEX_MODEL}" +PROVIDER_MODE="$(get_config_value "$MERGED_CONFIG" "provider_mode")" +PROVIDER_MODE="${PROVIDER_MODE:-auto}" + # ======================================== # Determine Provider from BITLESSON_MODEL # ======================================== @@ -130,17 +140,6 @@ if ! check_provider_dependency "$BITLESSON_PROVIDER" 2>/dev/null; then check_provider_dependency "$BITLESSON_PROVIDER" fi -# ======================================== -# Detect Project Root (for -C) -# ======================================== - -BITLESSON_DIR="$(cd "$(dirname "$BITLESSON_FILE")" && pwd -P)" -if git -C "$BITLESSON_DIR" rev-parse --show-toplevel &>/dev/null; then - CODEX_PROJECT_ROOT="$(git -C "$BITLESSON_DIR" rev-parse --show-toplevel)" -else - CODEX_PROJECT_ROOT="$BITLESSON_DIR" -fi - # ======================================== # Build Selector Prompt # ======================================== @@ -191,15 +190,20 @@ run_selector() { if [[ "$provider" == "codex" ]]; then local codex_exec_args=() + # Capture help output first to avoid pipefail+SIGPIPE interaction when + # grep exits early (after finding a match) before codex finishes writing. + local codex_help_output codex_exec_help_output + codex_help_output=$(codex --help 2>&1) || true + codex_exec_help_output=$(codex exec --help 2>&1) || true # Probe whether the installed Codex CLI supports --disable flag - if codex --help 2>&1 | grep -q -- '--disable'; then + if echo "$codex_help_output" | grep -q -- '--disable'; then codex_exec_args+=("--disable" "codex_hooks") fi # Probe for --skip-git-repo-check and --ephemeral support - if codex exec --help 2>&1 | grep -q -- '--skip-git-repo-check'; then + if echo "$codex_exec_help_output" | grep -q -- '--skip-git-repo-check'; then codex_exec_args+=("--skip-git-repo-check") fi - if codex exec --help 2>&1 | grep -q -- '--ephemeral'; then + if echo "$codex_exec_help_output" | grep -q -- '--ephemeral'; then codex_exec_args+=("--ephemeral") fi codex_exec_args+=( diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index 407fe668..3ac75173 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -77,9 +77,18 @@ require_codex_hooks_support() { die "Codex CLI with native hooks support is required. Install Codex 0.114.0+ first." fi - if ! codex features list 2>/dev/null | grep -qE '^codex_hooks[[:space:]]'; then + local codex_features_output + local codex_help_output + + codex_features_output="$(codex features list 2>/dev/null || true)" + if ! grep -qE '^codex_hooks[[:space:]]' <<< "$codex_features_output"; then die "Installed Codex CLI does not expose the codex_hooks feature. Humanize Codex install requires Codex 0.114.0+." fi + + codex_help_output="$(codex --help 2>&1 || true)" + if ! grep -q -- '--disable' <<< "$codex_help_output"; then + die "Installed Codex CLI supports codex_hooks but lacks the --disable flag. Humanize's stop hook uses --disable codex_hooks to prevent recursive hook invocation. Please upgrade Codex." + fi } merge_hooks_json() { diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index fa546618..a18d520f 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -379,13 +379,33 @@ EOF log "installed bitlesson-selector shim into: $shim_path" } +overwrite_kimi_rlcr_skill() { + local target_dir="$1" + local kimi_src="$SKILLS_SOURCE_ROOT/humanize-rlcr/SKILL-kimi.md" + local skill_file="$target_dir/humanize-rlcr/SKILL.md" + local runtime_root="$target_dir/humanize" + + [[ -f "$kimi_src" ]] || die "missing Kimi RLCR skill source: $kimi_src" + [[ "$DRY_RUN" == "true" ]] && { log "DRY-RUN overwrite Kimi RLCR skill"; return; } + + local tmp + tmp="$(mktemp)" + _HYDRATE_RUNTIME_ROOT="$runtime_root" \ + awk '{gsub(/\{\{HUMANIZE_RUNTIME_ROOT\}\}/, ENVIRON["_HYDRATE_RUNTIME_ROOT"]); print}' \ + "$kimi_src" > "$tmp" \ + || { rm -f "$tmp"; die "failed to hydrate Kimi RLCR skill"; } + mv "$tmp" "$skill_file" + log "installed Kimi-specific humanize-rlcr SKILL.md (gate-based)" +} + install_kimi_target() { sync_target "kimi" "$KIMI_SKILLS_DIR" + overwrite_kimi_rlcr_skill "$KIMI_SKILLS_DIR" } install_codex_target() { sync_target "codex" "$CODEX_SKILLS_DIR" - install_codex_user_config "$CODEX_SKILLS_DIR/humanize" "$TARGET" + install_codex_user_config "$CODEX_SKILLS_DIR/humanize" "codex" install_codex_native_hooks "$CODEX_SKILLS_DIR" } @@ -457,6 +477,14 @@ if [[ -n "$LEGACY_SKILLS_DIR" ]]; then esac fi +if [[ "$TARGET" == "both" ]]; then + _kimi_real="$(realpath "$KIMI_SKILLS_DIR" 2>/dev/null || echo "$KIMI_SKILLS_DIR")" + _codex_real="$(realpath "$CODEX_SKILLS_DIR" 2>/dev/null || echo "$CODEX_SKILLS_DIR")" + if [[ "$_kimi_real" == "$_codex_real" ]]; then + die "--target both requires distinct kimi and codex skills dirs; both resolved to: $_kimi_real (use --kimi-skills-dir and --codex-skills-dir to set separate paths)" + fi +fi + log "repo root: $REPO_ROOT" log "target: $TARGET" if [[ "$TARGET" == "kimi" || "$TARGET" == "both" ]]; then diff --git a/scripts/portable-timeout.sh b/scripts/portable-timeout.sh index 2dcd9308..7238bcb3 100755 --- a/scripts/portable-timeout.sh +++ b/scripts/portable-timeout.sh @@ -10,20 +10,25 @@ detect_timeout_impl() { if command -v gtimeout &>/dev/null; then echo "gtimeout" - elif command -v timeout &>/dev/null; then - # Check if it's GNU timeout (Linux) vs BSD (which doesn't exist on macOS) - if timeout --version &>/dev/null 2>&1; then + return + fi + if command -v timeout &>/dev/null; then + # Require recognizable GNU coreutils output to avoid matching shims + # (shims typically output nothing for --version and lack "timeout" in output) + if timeout --version 2>&1 | grep -qiE 'GNU|coreutils|timeout [0-9]'; then echo "timeout" - else - echo "none" + return fi - elif command -v python3 &>/dev/null; then + fi + if command -v python3 &>/dev/null; then echo "python3" - elif command -v python &>/dev/null; then + return + fi + if command -v python &>/dev/null; then echo "python" - else - echo "none" + return fi + echo "none" } TIMEOUT_IMPL=$(detect_timeout_impl) diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 15326bc4..eb775b14 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -52,7 +52,7 @@ SKIP_IMPL_PLAN_ANCHORED="false" ASK_CODEX_QUESTION="true" AGENT_TEAMS="${DEFAULT_AGENT_TEAMS:-false}" BITLESSON_ALLOW_EMPTY_NONE="true" -PRIVACY_MODE="false" +PRIVACY_MODE="true" extract_plan_goal_content() { local plan_path="$1" @@ -136,7 +136,8 @@ OPTIONS: Allow BitLesson delta with action:none even with no new entries (default) --require-bitlesson-entry-for-none Require at least one BitLesson entry when action is none - --privacy Disable methodology analysis at loop exit (default: analysis enabled) + --privacy No-op; analysis is disabled by default (kept for backward compatibility) + --no-privacy Enable methodology analysis at loop exit (default: analysis disabled) -h, --help Show this help message DESCRIPTION: @@ -301,6 +302,10 @@ while [[ $# -gt 0 ]]; do PRIVACY_MODE="true" shift ;; + --no-privacy) + PRIVACY_MODE="false" + shift + ;; -*) echo "Unknown option: $1" >&2 echo "Use --help for usage information" >&2 diff --git a/scripts/validate-directions-json.sh b/scripts/validate-directions-json.sh new file mode 100755 index 00000000..dfadef65 --- /dev/null +++ b/scripts/validate-directions-json.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# validate-directions-json.sh +# Validates a directions.json file against the schema version 1 contract. +# +# Usage: validate-directions-json.sh <path/to/file.directions.json> +# +# Exit codes: +# 0 - Validation passed +# 1 - Missing input file argument or file does not exist +# 2 - jq not available +# 3 - Schema validation failed (jq returned false or file is invalid JSON) + +set -euo pipefail + +usage() { + echo "Usage: $0 <path/to/file.directions.json>" + echo "" + echo "Validates a directions.json file against schema version 1." + exit 1 +} + +if [[ $# -eq 0 || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then + usage +fi + +INPUT_FILE="$1" + +if [[ ! -f "$INPUT_FILE" ]]; then + echo "ERROR: File not found: $INPUT_FILE" >&2 + exit 1 +fi + +if ! command -v jq &>/dev/null; then + echo "ERROR: jq is required but not installed" >&2 + exit 2 +fi + +# Full schema validation using a single jq -e expression. +# Returns false (exit 1) if any rule fails. +if jq -e ' + # schema_version must be 1 + .schema_version == 1 + + # required top-level keys must be present and be strings + and ((.title | type) == "string") + and ((.original_idea | type) == "string") + and ((.synthesis_notes | type) == "string") + and has("metadata") + and has("directions") + + # directions array: 1..10 elements + and ((.directions | type) == "array") + and ((.directions | length) >= 1) + and ((.directions | length) <= 10) + + # exactly one primary direction + and ((.directions | map(select(.is_primary == true)) | length) == 1) + + # direction_id: present, is a string, unique, and safe as a whitespace-delimited token + and (.directions | map(has("direction_id") and ((.direction_id | type) == "string")) | all) + and (.directions | map(.direction_id) | all(test("^dir-[0-9]{2}-[a-z0-9-]+$"))) + and ((.directions | map(.direction_id) | unique | length) == (.directions | length)) + + # dir_slug: present, is a string, unique, and branch/path safe (lowercase alphanumeric + hyphens) + and (.directions | map(has("dir_slug") and ((.dir_slug | type) == "string")) | all) + and ((.directions | map(.dir_slug) | unique | length) == (.directions | length)) + and (.directions | map(.dir_slug) | all(. != null and test("^[a-z0-9-]+$"))) + + # source_index: present and must be an integer (not a string) + and (.directions | map(has("source_index") and ((.source_index | type) == "number") and (.source_index == (.source_index | floor))) | all) + and ((.directions | map(.source_index) | unique | length) == (.directions | length)) + + # display_order values must be integers (number type and equal to floor) + and (.directions | map(has("display_order") and ((.display_order | type) == "number") and (.display_order == (.display_order | floor))) | all) + + # metadata.n_returned must equal directions.length + and (.metadata.n_returned == (.directions | length)) + + # confidence must be high, medium, or low for each direction + and (.directions | map(.confidence) | all(. == "high" or . == "medium" or . == "low")) + + # each direction must have all required string fields + and (.directions | map( + ((.name | type) == "string") + and ((.rationale | type) == "string") + and ((.raw_phase3_response | type) == "string") + and ((.approach_summary | type) == "string") + and ((.objective_evidence | type) == "array") + and ((.known_risks | type) == "array") + # array items must be strings + and (.objective_evidence | map(type == "string") | all) + and (.known_risks | map(type == "string") | all) + ) | all) +' "$INPUT_FILE" > /dev/null 2>&1; then + echo "VALIDATION_SUCCESS" + exit 0 +else + echo "VALIDATION_FAILED: $INPUT_FILE does not conform to directions.json schema version 1" >&2 + exit 3 +fi diff --git a/scripts/validate-explore-idea-io.sh b/scripts/validate-explore-idea-io.sh new file mode 100755 index 00000000..43a7a788 --- /dev/null +++ b/scripts/validate-explore-idea-io.sh @@ -0,0 +1,376 @@ +#!/usr/bin/env bash +# validate-explore-idea-io.sh +# Validates all inputs for the explore-idea command before any dispatch side effects. +# +# Usage: validate-explore-idea-io.sh <input-path> [OPTIONS] +# +# Input: +# <input-path> Path to a .directions.json file, or a draft .md file with a companion +# .directions.json (resolved as <draft>.directions.json). +# +# Options: +# --directions <ids> Comma-separated direction_id or source_index values. +# Default: first min(6, total) by display_order. +# --concurrency <N> Parallel worker count. Default: 6. Max: 10. +# --max-worker-iterations <N> Per-worker iteration cap. Default: 2. Max: 3. +# --worker-timeout-min <N> Worker timeout in minutes. Default: 60. Max: 60. +# --codex-timeout-min <N> Codex call timeout in minutes. Default: 20. Max: 20. +# +# Exit codes: +# 0 - Validation passed; structured output emitted on stdout +# 1 - Missing required input argument +# 2 - Input file not found or unreadable +# 3 - Input path is a .md file but companion .directions.json is missing +# 4 - Input is not .directions.json or .md +# 5 - Directions JSON schema validation failed +# 6 - Invalid arguments (caps exceeded, bad direction selectors, duplicate selectors) +# 7 - Main checkout has uncommitted tracked changes (dirty-checkout hard-fail) +# 8 - Run directory already exists (collision) +# 9 - Required template file missing (plugin configuration error) +# +# On success, emits key-value pairs on stdout followed by VALIDATION_SUCCESS: +# DIRECTIONS_JSON_FILE: <abs-path> +# DRAFT_PATH: <abs-path or empty> +# RUN_ID: YYYY-MM-DD_HH-MM-SS +# RUN_DIR: <abs-path> +# BASE_BRANCH: <branch> +# BASE_COMMIT: <sha> +# SELECTED_DIRECTION_IDS: <space-separated list> +# EFFECTIVE_CONCURRENCY: <N> +# MAX_WORKER_ITERATIONS: <N> +# WORKER_TIMEOUT_MIN: <N> +# CODEX_TIMEOUT_MIN: <N> +# WORKER_PROMPT_TEMPLATE: <abs-path> +# REPORT_TEMPLATE: <abs-path> +# VALIDATION_SUCCESS + +set -euo pipefail + +# ======================================== +# Defaults and caps +# ======================================== + +DEFAULT_DIRECTIONS_COUNT=6 +MAX_DIRECTIONS=10 +DEFAULT_CONCURRENCY=6 +MAX_CONCURRENCY=10 +DEFAULT_MAX_WORKER_ITERATIONS=2 +MAX_WORKER_ITERATIONS_CAP=3 +DEFAULT_WORKER_TIMEOUT_MIN=60 +MAX_WORKER_TIMEOUT_MIN=60 +DEFAULT_CODEX_TIMEOUT_MIN=20 +MAX_CODEX_TIMEOUT_MIN=20 + +# ======================================== +# Parse arguments +# ======================================== + +usage() { + cat >&2 << 'USAGE_EOF' +Usage: validate-explore-idea-io.sh <input-path> [OPTIONS] + +Input: + <input-path> Path to a .directions.json file or a draft .md file with a + companion .directions.json (auto-resolved). + +Options: + --directions <ids> Comma-separated direction_id or source_index values + --concurrency <N> Workers in parallel (default: 6, max: 10) + --max-worker-iterations <N> Iterations per worker (default: 2, max: 3) + --worker-timeout-min <N> Worker timeout minutes (default: 60, max: 60) + --codex-timeout-min <N> Codex timeout minutes (default: 20, max: 20) + -h, --help Show this message +USAGE_EOF + exit 6 +} + +INPUT_PATH="" +DIRECTIONS_FLAG="" +CONCURRENCY="$DEFAULT_CONCURRENCY" +MAX_WORKER_ITERATIONS="$DEFAULT_MAX_WORKER_ITERATIONS" +WORKER_TIMEOUT_MIN="$DEFAULT_WORKER_TIMEOUT_MIN" +CODEX_TIMEOUT_MIN="$DEFAULT_CODEX_TIMEOUT_MIN" + +while [[ $# -gt 0 ]]; do + case "$1" in + --directions) + [[ $# -lt 2 || "$2" == --* ]] && { echo "ERROR: --directions requires a value" >&2; exit 6; } + DIRECTIONS_FLAG="$2"; shift 2 ;; + --concurrency) + [[ $# -lt 2 || "$2" == --* ]] && { echo "ERROR: --concurrency requires a value" >&2; exit 6; } + CONCURRENCY="$2"; shift 2 ;; + --max-worker-iterations) + [[ $# -lt 2 || "$2" == --* ]] && { echo "ERROR: --max-worker-iterations requires a value" >&2; exit 6; } + MAX_WORKER_ITERATIONS="$2"; shift 2 ;; + --worker-timeout-min) + [[ $# -lt 2 || "$2" == --* ]] && { echo "ERROR: --worker-timeout-min requires a value" >&2; exit 6; } + WORKER_TIMEOUT_MIN="$2"; shift 2 ;; + --codex-timeout-min) + [[ $# -lt 2 || "$2" == --* ]] && { echo "ERROR: --codex-timeout-min requires a value" >&2; exit 6; } + CODEX_TIMEOUT_MIN="$2"; shift 2 ;; + -h|--help) usage ;; + --*) + echo "ERROR: Unknown option: $1" >&2; exit 6 ;; + *) + if [[ -z "$INPUT_PATH" ]]; then + INPUT_PATH="$1"; shift + else + echo "ERROR: Unexpected positional argument: $1" >&2; exit 6 + fi ;; + esac +done + +# ======================================== +# Require input +# ======================================== + +if [[ -z "$INPUT_PATH" ]]; then + echo "ERROR: input path is required" >&2 + echo "Use --help for usage." >&2 + exit 1 +fi + +# ======================================== +# Numeric cap validation +# ======================================== + +validate_int_cap() { + local name="$1" value="$2" max="$3" + if ! [[ "$value" =~ ^[0-9]+$ ]]; then + echo "ERROR: $name must be a positive integer; got: $value" >&2 + exit 6 + fi + if (( value < 1 || value > max )); then + echo "ERROR: $name must be between 1 and $max; got: $value" >&2 + exit 6 + fi +} + +validate_int_cap "--concurrency" "$CONCURRENCY" "$MAX_CONCURRENCY" +validate_int_cap "--max-worker-iterations" "$MAX_WORKER_ITERATIONS" "$MAX_WORKER_ITERATIONS_CAP" +validate_int_cap "--worker-timeout-min" "$WORKER_TIMEOUT_MIN" "$MAX_WORKER_TIMEOUT_MIN" +validate_int_cap "--codex-timeout-min" "$CODEX_TIMEOUT_MIN" "$MAX_CODEX_TIMEOUT_MIN" + +# ======================================== +# Resolve directions.json input +# ======================================== + +DIRECTIONS_JSON_FILE="" +DRAFT_PATH="" + +if [[ "$INPUT_PATH" == *.directions.json ]]; then + # Direct .directions.json path + if [[ ! -f "$INPUT_PATH" ]]; then + echo "ERROR: File not found: $INPUT_PATH" >&2 + exit 2 + fi + DIRECTIONS_JSON_FILE="$(realpath "$INPUT_PATH" 2>/dev/null || echo "$INPUT_PATH")" +elif [[ "$INPUT_PATH" == *.md ]]; then + # Draft .md path — resolve companion + if [[ ! -f "$INPUT_PATH" ]]; then + echo "ERROR: Draft file not found: $INPUT_PATH" >&2 + exit 2 + fi + DRAFT_PATH="$(realpath "$INPUT_PATH" 2>/dev/null || echo "$INPUT_PATH")" + COMPANION="${INPUT_PATH%.md}.directions.json" + if [[ ! -f "$COMPANION" ]]; then + echo "ERROR: Companion directions.json not found for draft: $INPUT_PATH" >&2 + echo " Expected companion: $COMPANION" >&2 + echo " Please regenerate the idea draft with: /humanize:gen-idea <idea>" >&2 + exit 3 + fi + DIRECTIONS_JSON_FILE="$(realpath "$COMPANION" 2>/dev/null || echo "$COMPANION")" +else + echo "ERROR: Input must be a .directions.json or .md file; got: $INPUT_PATH" >&2 + exit 4 +fi + +# ======================================== +# Locate plugin scripts and templates +# ======================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +if [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then + PLUGIN_ROOT="$CLAUDE_PLUGIN_ROOT" +else + PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +fi + +SCHEMA_VALIDATOR="$PLUGIN_ROOT/scripts/validate-directions-json.sh" +WORKER_PROMPT_TEMPLATE="$PLUGIN_ROOT/prompt-template/explore/worker-prompt.md" +REPORT_TEMPLATE="$PLUGIN_ROOT/prompt-template/explore/report-template.md" + +if [[ ! -f "$WORKER_PROMPT_TEMPLATE" ]]; then + echo "ERROR: Worker prompt template missing: $WORKER_PROMPT_TEMPLATE" >&2 + exit 9 +fi +if [[ ! -f "$REPORT_TEMPLATE" ]]; then + echo "ERROR: Report template missing: $REPORT_TEMPLATE" >&2 + exit 9 +fi + +# ======================================== +# Schema validation +# ======================================== + +if ! command -v jq &>/dev/null; then + echo "ERROR: jq is required but not installed" >&2 + exit 5 +fi + +if ! bash "$SCHEMA_VALIDATOR" "$DIRECTIONS_JSON_FILE" > /dev/null 2>&1; then + echo "ERROR: Directions JSON schema validation failed: $DIRECTIONS_JSON_FILE" >&2 + echo " The file does not conform to directions.json schema version 1." >&2 + exit 5 +fi + +# ======================================== +# Load directions from JSON +# ======================================== + +TOTAL_DIRECTIONS=$(jq '.directions | length' "$DIRECTIONS_JSON_FILE") + +# ======================================== +# Direction selection +# ======================================== + +if [[ -z "$DIRECTIONS_FLAG" ]]; then + # Default: first min(6, total) by display_order + SELECT_COUNT=$(( TOTAL_DIRECTIONS < DEFAULT_DIRECTIONS_COUNT ? TOTAL_DIRECTIONS : DEFAULT_DIRECTIONS_COUNT )) + SELECTED_IDS=$(jq -r ' + .directions + | sort_by(.display_order) + | .[:'"$SELECT_COUNT"'] + | map(.direction_id) + | join(" ") + ' "$DIRECTIONS_JSON_FILE") +else + # Parse --directions: comma-separated direction_id or source_index values + IFS=',' read -ra RAW_SELECTORS <<< "$DIRECTIONS_FLAG" + + # Check for duplicates + DEDUPED=$(printf '%s\n' "${RAW_SELECTORS[@]}" | sort | uniq | wc -l | tr -d ' ') + if (( DEDUPED != ${#RAW_SELECTORS[@]} )); then + echo "ERROR: --directions contains duplicate selector values: $DIRECTIONS_FLAG" >&2 + exit 6 + fi + + # Check count cap + if (( ${#RAW_SELECTORS[@]} > MAX_DIRECTIONS )); then + echo "ERROR: --directions selects ${#RAW_SELECTORS[@]} directions; max is $MAX_DIRECTIONS" >&2 + exit 6 + fi + + # Resolve each selector to a direction_id + RESOLVED_IDS=() + for sel in "${RAW_SELECTORS[@]}"; do + if [[ "$sel" =~ ^[0-9]+$ ]]; then + # Numeric source_index + RESOLVED=$(jq -r --argjson idx "$sel" ' + .directions + | map(select(.source_index == $idx)) + | first + | .direction_id // empty + ' "$DIRECTIONS_JSON_FILE") + else + # direction_id string + RESOLVED=$(jq -r --arg id "$sel" ' + .directions + | map(select(.direction_id == $id)) + | first + | .direction_id // empty + ' "$DIRECTIONS_JSON_FILE") + fi + + if [[ -z "$RESOLVED" ]]; then + echo "ERROR: Unknown direction selector: $sel" >&2 + echo " Valid direction_ids: $(jq -r '.directions | map(.direction_id) | join(", ")' "$DIRECTIONS_JSON_FILE")" >&2 + echo " Valid source_indexes: $(jq -r '.directions | map(.source_index|tostring) | join(", ")' "$DIRECTIONS_JSON_FILE")" >&2 + exit 6 + fi + RESOLVED_IDS+=("$RESOLVED") + done + + # Check for duplicates after resolution (catches mixed selector forms like "1,dir-01-slug") + RESOLVED_DEDUPED=$(printf '%s\n' "${RESOLVED_IDS[@]}" | sort | uniq | wc -l | tr -d ' ') + if (( RESOLVED_DEDUPED != ${#RESOLVED_IDS[@]} )); then + echo "ERROR: --directions resolves to duplicate direction_ids: $DIRECTIONS_FLAG" >&2 + exit 6 + fi + + SELECTED_IDS="${RESOLVED_IDS[*]}" +fi + +# Count selected directions +read -ra SELECTED_ARRAY <<< "$SELECTED_IDS" +SELECTED_COUNT="${#SELECTED_ARRAY[@]}" + +if (( SELECTED_COUNT > MAX_DIRECTIONS )); then + echo "ERROR: Selected $SELECTED_COUNT directions; max is $MAX_DIRECTIONS" >&2 + exit 6 +fi + +# Effective concurrency is min(requested, selected_count) +EFFECTIVE_CONCURRENCY=$(( CONCURRENCY < SELECTED_COUNT ? CONCURRENCY : SELECTED_COUNT )) + +# ======================================== +# Dirty checkout check (hard-fail) +# ======================================== + +PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +DIRTY_FILES="$(git -C "$PROJECT_ROOT" diff --name-only HEAD -- 2>/dev/null || true)" +if [[ -n "$DIRTY_FILES" ]]; then + echo "ERROR: Main checkout has uncommitted tracked changes." >&2 + echo " Commit or stash changes before running explore-idea." >&2 + echo " Dirty files:" >&2 + printf '%s\n' "$DIRTY_FILES" | sed 's/^/ /' >&2 + exit 7 +fi + +# ======================================== +# Generate RUN_ID and check collision +# ======================================== + +RUN_ID="$(date -u +%Y-%m-%d_%H-%M-%S)" +RUN_DIR="$PROJECT_ROOT/.humanize/explore/$RUN_ID" + +if [[ -e "$RUN_DIR" ]]; then + echo "ERROR: Run directory already exists (same-second collision): $RUN_DIR" >&2 + echo " Please wait one second and retry." >&2 + exit 8 +fi + +# ======================================== +# Base branch and commit +# ======================================== +# +# Worker base-anchor contract (enforced by worker-prompt.md): +# Workers are created at BASE_COMMIT in detached HEAD state. +# Do NOT run `git checkout <BASE_BRANCH>` in worker setup because the coordinator +# checkout may already have that branch checked out. Each worker asserts +# HEAD == BASE_COMMIT before creating its explore branch. +# A HEAD mismatch is a fatal worker error. +# Workers MUST run only targeted tests for the files they touched, not the full test suite. + +BASE_BRANCH="$(git -C "$PROJECT_ROOT" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown")" +BASE_COMMIT="$(git -C "$PROJECT_ROOT" rev-parse HEAD 2>/dev/null || echo "unknown")" + +# ======================================== +# Emit validation output +# ======================================== + +echo "DIRECTIONS_JSON_FILE: $DIRECTIONS_JSON_FILE" +echo "DRAFT_PATH: $DRAFT_PATH" +echo "RUN_ID: $RUN_ID" +echo "RUN_DIR: $RUN_DIR" +echo "BASE_BRANCH: $BASE_BRANCH" +echo "BASE_COMMIT: $BASE_COMMIT" +echo "SELECTED_DIRECTION_IDS: $SELECTED_IDS" +echo "EFFECTIVE_CONCURRENCY: $EFFECTIVE_CONCURRENCY" +echo "MAX_WORKER_ITERATIONS: $MAX_WORKER_ITERATIONS" +echo "WORKER_TIMEOUT_MIN: $WORKER_TIMEOUT_MIN" +echo "CODEX_TIMEOUT_MIN: $CODEX_TIMEOUT_MIN" +echo "WORKER_PROMPT_TEMPLATE: $WORKER_PROMPT_TEMPLATE" +echo "REPORT_TEMPLATE: $REPORT_TEMPLATE" +echo "VALIDATION_SUCCESS" +exit 0 diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index 99c4bb1a..5006ff23 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -8,8 +8,9 @@ # 3 - Output parent directory does not exist (user-supplied path only) # 4 - Output file already exists # 5 - No write permission to output directory -# 6 - Invalid arguments (including --n out of range) +# 6 - Invalid arguments (including --n out of range, missing .md suffix) # 7 - Template file not found (plugin configuration error) +# 8 - Companion directions.json file already exists set -e @@ -89,13 +90,13 @@ SLUG="" # Detect whether IDEA_INPUT is meant as a file path. The `-f` test below is # the primary gate; this heuristic only matters when that test fails and we # must decide whether to emit INPUT_NOT_FOUND (user meant a path) or treat -# the text as inline. Any whitespace disqualifies the input from path mode, -# so inline ideas that happen to mention a filename like "rename README.md" -# or that contain "/" fall through to inline. Limitation: a real path that -# contains whitespace and does not exist is silently treated as inline. +# the text as inline. Only whitespace-free inputs ending in ".md" trigger +# path mode: slashes alone are not reliable indicators (ideas like "undo/redo" +# or "CI/CD" are valid inline text). Limitation: a real path that contains +# whitespace and does not exist is silently treated as inline. looks_like_path=false if [[ "$IDEA_INPUT" != *[[:space:]]* ]]; then - if [[ "$IDEA_INPUT" == *.md || "$IDEA_INPUT" == */* ]]; then + if [[ "$IDEA_INPUT" == *.md ]]; then looks_like_path=true fi fi @@ -148,8 +149,15 @@ if [[ -z "$OUTPUT_FILE" ]]; then DEFAULT_OUTPUT=true fi +if [[ "${OUTPUT_FILE##*.}" != "md" ]]; then + echo "VALIDATION_ERROR: OUTPUT_NOT_MD" + echo "Output path must have .md suffix for companion JSON derivation; got: $OUTPUT_FILE" + exit 6 +fi + OUTPUT_FILE="$(realpath -m "$OUTPUT_FILE" 2>/dev/null || echo "$OUTPUT_FILE")" OUTPUT_DIR="$(dirname "$OUTPUT_FILE")" +DIRECTIONS_JSON_FILE="${OUTPUT_FILE%.md}.directions.json" if [[ "$DEFAULT_OUTPUT" == true ]]; then mkdir -p "$OUTPUT_DIR" 2>/dev/null || true @@ -167,6 +175,12 @@ if [[ -e "$OUTPUT_FILE" ]]; then exit 4 fi +if [[ -e "$DIRECTIONS_JSON_FILE" ]]; then + echo "VALIDATION_ERROR: COMPANION_EXISTS" + echo "Companion directions.json already exists: $DIRECTIONS_JSON_FILE" + exit 8 +fi + if [[ ! -w "$OUTPUT_DIR" ]]; then echo "VALIDATION_ERROR: NO_WRITE_PERMISSION" echo "No write permission: $OUTPUT_DIR" @@ -192,6 +206,7 @@ if [[ "$INPUT_MODE" == "file" ]]; then echo "IDEA_BODY_FILE: $IDEA_BODY_FILE" fi echo "OUTPUT_FILE: $OUTPUT_FILE" +echo "DIRECTIONS_JSON_FILE: $DIRECTIONS_JSON_FILE" echo "SLUG: $SLUG" echo "TEMPLATE_FILE: $TEMPLATE_FILE" echo "N: $N" diff --git a/skills/humanize-rlcr/SKILL-kimi.md b/skills/humanize-rlcr/SKILL-kimi.md new file mode 100644 index 00000000..65046900 --- /dev/null +++ b/skills/humanize-rlcr/SKILL-kimi.md @@ -0,0 +1,128 @@ +--- +name: humanize-rlcr +description: Start RLCR (Ralph-Loop with Codex Review) with hook-equivalent enforcement from skill mode by reusing the existing stop-hook logic. +type: flow +--- + +# Humanize RLCR Loop (Hook-Equivalent) + +Use this flow to run RLCR in environments without native hooks. +Do not re-implement review logic manually. Always call the RLCR stop gate wrapper: + +```bash +"{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh" +``` + +The wrapper executes `hooks/loop-codex-stop-hook.sh`, so skill-mode behavior stays aligned with hook-mode behavior. + +## Runtime Root + +The installer hydrates this skill with an absolute runtime root path: + +```bash +{{HUMANIZE_RUNTIME_ROOT}} +``` + +All commands below assume `{{HUMANIZE_RUNTIME_ROOT}}`. + +## Required Sequence + +### 1. Setup + +Start the loop with the setup script: + +```bash +"{{HUMANIZE_RUNTIME_ROOT}}/scripts/setup-rlcr-loop.sh" $ARGUMENTS +``` + +If setup exits non-zero, stop and report the error. + +### 2. Work Round + +For each round: + +1. Read current loop prompt from `.humanize/rlcr/<timestamp>/round-<N>-prompt.md` (or `finalize` prompt files when in finalize phase). +2. Implement required changes. +3. Commit changes. +4. Write required summary file: + - Normal phase: `.humanize/rlcr/<timestamp>/round-<N>-summary.md` + - Finalize phase: `.humanize/rlcr/<timestamp>/finalize-summary.md` +5. Run gate command: + +```bash +GATE_CMD=("{{HUMANIZE_RUNTIME_ROOT}}/scripts/rlcr-stop-gate.sh") +[[ -n "${CLAUDE_SESSION_ID:-}" ]] && GATE_CMD+=(--session-id "$CLAUDE_SESSION_ID") +[[ -n "${CLAUDE_TRANSCRIPT_PATH:-}" ]] && GATE_CMD+=(--transcript-path "$CLAUDE_TRANSCRIPT_PATH") +"${GATE_CMD[@]}" +GATE_EXIT=$? +``` + +6. Handle gate result: + - `0`: loop is allowed to exit (done). + - `10`: blocked by RLCR logic. Follow returned instructions exactly, continue next round. + - `20`: infrastructure error (wrapper/hook/runtime). Report error, do not fake completion. + +## What This Enforces + +By routing through the stop-hook logic, this skill enforces: + +- state/schema validation (`current_round`, `max_iterations`, `review_started`, `base_branch`, etc.) +- branch consistency checks +- plan-file integrity checks (when applicable) +- incomplete Task/Todo blocking +- git-clean requirement before exit +- `--push-every-round` unpushed-commit blocking +- summary presence checks +- max-iteration handling +- full-alignment rounds (`--full-review-round`) +- strict `COMPLETE`/`STOP` marker handling +- review-phase transition guard (`.review-phase-started` marker) +- code-review gating on `[P0-9]` markers +- hard blocking on codex review failure or empty output +- open-question handling when `ask_codex_question=true` + +## Critical Rules + +1. Never manually edit `state.md` or `finalize-state.md`. +2. Never skip a blocked hook result by declaring completion manually. +3. Never run ad-hoc `codex exec` / `codex review` in place of the hook-managed phase transitions. +4. Always use files generated by the loop (`round-*-prompt.md`, `round-*-review-result.md`) as source of truth. + +## Options + +Pass these through `setup-rlcr-loop.sh`: + +| Option | Description | Default | +|--------|-------------|---------| +| `path/to/plan.md` | Plan file path | Required unless `--skip-impl` | +| `--plan-file <path>` | Explicit plan path | - | +| `--track-plan-file` | Enforce tracked plan immutability | false | +| `--max N` | Maximum iterations | 42 | +| `--codex-model MODEL:EFFORT` | Codex model and effort for `codex exec` | gpt-5.4:high | +| `--codex-timeout SECONDS` | Codex timeout | 5400 | +| `--base-branch BRANCH` | Base for review phase | auto-detect | +| `--full-review-round N` | Full alignment interval | 5 | +| `--skip-impl` | Start directly in review path | false | +| `--push-every-round` | Require push each round | false | +| `--claude-answer-codex` | Let Claude answer open questions directly | false | +| `--agent-teams` | Enable agent teams mode | false | +| `--yolo` | Skip quiz and enable --claude-answer-codex | false | +| `--skip-quiz` | Skip Plan Understanding Quiz (implicit in skill mode) | false | + +Review phase `codex review` runs with `gpt-5.4:high`. + +## Usage + +```bash +# Start with plan file +/flow:humanize-rlcr path/to/plan.md + +# Review-only mode +/flow:humanize-rlcr --skip-impl +``` + +## Cancel + +```bash +"{{HUMANIZE_RUNTIME_ROOT}}/scripts/cancel-rlcr-loop.sh" +``` diff --git a/tests/fixtures/directions/valid.directions.json b/tests/fixtures/directions/valid.directions.json new file mode 100644 index 00000000..a76efe50 --- /dev/null +++ b/tests/fixtures/directions/valid.directions.json @@ -0,0 +1,42 @@ +{ + "schema_version": 1, + "title": "Command Pattern Undo Stack", + "original_idea": "add undo/redo to the editor", + "synthesis_notes": "The command-history approach is strongest due to existing repo patterns.", + "metadata": { + "n_requested": 2, + "n_returned": 2, + "timestamp": "20260429-120000", + "draft_path": ".humanize/ideas/undo-redo-20260429-120000.md" + }, + "directions": [ + { + "direction_id": "dir-00-command-history", + "dir_slug": "command-history", + "source_index": 0, + "display_order": 0, + "is_primary": true, + "name": "Command History", + "rationale": "Reuses existing command pattern infrastructure with minimal surface area.", + "raw_phase3_response": "Implement a command stack that records each action as an invertible command object.", + "approach_summary": "Wrap each editor action in a command object with do/undo methods; maintain a bounded history stack.", + "objective_evidence": ["src/editor/actions.ts extends existing Command interface"], + "known_risks": ["Memory pressure from large history stacks"], + "confidence": "high" + }, + { + "direction_id": "dir-01-event-sourcing", + "dir_slug": "event-sourcing", + "source_index": 1, + "display_order": 1, + "is_primary": false, + "name": "Event Sourcing", + "rationale": "Provides full audit log but introduces significant complexity versus command pattern.", + "raw_phase3_response": "Store all mutations as immutable events; replay events to reconstruct state.", + "approach_summary": "Replace mutable state with an append-only event log; replay to any point.", + "objective_evidence": ["exploratory, no concrete precedent"], + "known_risks": ["Event schema migration complexity", "Performance degradation on large logs"], + "confidence": "low" + } + ] +} diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 1d4a21f5..5a29706b 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -452,7 +452,10 @@ EOF UPDATED_CONTENT=$(jq -Rs . < "$TEST_DIR/goal-tracker-updated.md") JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' set +e -RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +# cd into TEST_DIR so git rev-parse fails (temp dir has no git repo) and the +# resolver falls back to CLAUDE_PROJECT_DIR, preventing the real active loop +# from being picked up. +RESULT=$(echo "$JSON" | (cd "$TEST_DIR"; CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh") 2>&1) EXIT_CODE=$? set -e if [[ $EXIT_CODE -eq 0 ]]; then @@ -498,7 +501,9 @@ echo "" echo "Test 12e: Edit validator allows mutable goal-tracker edits after round 0" JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":"| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored |"}}' set +e -RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +# cd into TEST_DIR so git rev-parse fails and the resolver falls back to +# CLAUDE_PROJECT_DIR, preventing the real active loop from being picked up. +RESULT=$(echo "$JSON" | (cd "$TEST_DIR"; CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh") 2>&1) EXIT_CODE=$? set -e if [[ $EXIT_CODE -eq 0 ]]; then @@ -512,7 +517,9 @@ echo "" echo "Test 12ea: Edit validator allows mutable deletions after round 0" JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$HOOK_LOOP_DIR"'/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":""}}' set +e -RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +# cd into TEST_DIR so git rev-parse fails and the resolver falls back to +# CLAUDE_PROJECT_DIR, preventing the real active loop from being picked up. +RESULT=$(echo "$JSON" | (cd "$TEST_DIR"; CLAUDE_PROJECT_DIR="$TEST_DIR" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh") 2>&1) EXIT_CODE=$? set -e if [[ $EXIT_CODE -eq 0 ]]; then @@ -647,7 +654,10 @@ mkdir -p "$TEST_DIR/no-state" # No .humanize directory - should allow exit (no block decision) set +e -OUTPUT=$(echo '{}' | CLAUDE_PROJECT_DIR="$TEST_DIR/no-state" bash "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" 2>&1) +# cd into no-state dir so git rev-parse fails (temp dir has no git repo) and the +# resolver falls back to CLAUDE_PROJECT_DIR; otherwise the real active loop is +# found and the hook blocks instead of allowing exit. +OUTPUT=$(echo '{}' | (cd "$TEST_DIR/no-state"; CLAUDE_PROJECT_DIR="$TEST_DIR/no-state" bash "$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh") 2>&1) EXIT_CODE=$? set -e # Should exit 0 (pass through) when no loop is active, with no block decision diff --git a/tests/robustness/test-plan-file-robustness.sh b/tests/robustness/test-plan-file-robustness.sh index d2f5ee7f..d9aa1816 100755 --- a/tests/robustness/test-plan-file-robustness.sh +++ b/tests/robustness/test-plan-file-robustness.sh @@ -399,7 +399,7 @@ echo "Test 10: Plan file with very long lines" echo "Another normal line." } > "$TEST_DIR/long-lines.md" -LINE_COUNT=$(wc -l < "$TEST_DIR/long-lines.md") +LINE_COUNT=$(wc -l < "$TEST_DIR/long-lines.md" | tr -d ' ') if [[ "$LINE_COUNT" == "5" ]]; then pass "Long lines handled correctly ($LINE_COUNT lines)" else diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index 00373b45..e29452e0 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -91,6 +91,15 @@ TEST_SUITES=( # Session ID and Agent Teams tests "test-session-id.sh" "test-agent-teams.sh" + # gen-idea companion JSON tests (PR-A) + "test-validate-gen-idea-io.sh" + "test-directions-json-schema.sh" + "test-gen-idea-dual-write.sh" + # explore-idea tests (PR-B) + "test-validate-explore-idea-io.sh" + "test-worker-result-contract.sh" + "test-explore-manifest.sh" + "test-explore-command-structure.sh" # Ask Codex tests "test-ask-codex.sh" # Bitlesson routing tests @@ -140,6 +149,28 @@ MOCK_CODEX export PATH="$OUTPUT_DIR/mock-bin:$PATH" fi +# Provide a portable `timeout` shim on platforms that lack it (e.g. macOS base install). +# Uses python3 subprocess so stdin is preserved and exit code 124 is returned on timeout. +if ! command -v timeout &>/dev/null; then + mkdir -p "$OUTPUT_DIR/mock-bin" + cat > "$OUTPUT_DIR/mock-bin/timeout" << 'TIMEOUT_SHIM' +#!/usr/bin/env python3 +import subprocess, sys +timeout_secs = float(sys.argv[1]) +cmd = sys.argv[2:] +try: + result = subprocess.run(cmd, timeout=timeout_secs) + sys.exit(result.returncode) +except subprocess.TimeoutExpired: + sys.exit(124) +except Exception as e: + print(f"timeout shim error: {e}", file=sys.stderr) + sys.exit(1) +TIMEOUT_SHIM + chmod +x "$OUTPUT_DIR/mock-bin/timeout" + export PATH="$OUTPUT_DIR/mock-bin:$PATH" +fi + # Check if a suite needs zsh needs_zsh() { local suite="$1" @@ -159,9 +190,15 @@ format_ms() { echo "${s}.${frac}s" } +# Portable millisecond timestamp (date +%s%3N is GNU-only, not on macOS bash 3.2) +ms_now() { + python3 -c "import time; print(int(time.time()*1000))" 2>/dev/null \ + || echo "$(date +%s)000" +} + # Launch all test suites in parallel -declare -A PIDS # suite -> PID -declare -A SKIPPED # suite -> reason +# PIDs and skip reasons are stored as per-suite files under OUTPUT_DIR rather than +# associative arrays (declare -A requires bash 4+). ACTIVE_PIDS=() for suite in "${TEST_SUITES[@]}"; do @@ -172,31 +209,31 @@ for suite in "${TEST_SUITES[@]}"; do time_file="$OUTPUT_DIR/${safe_name}.time" if [[ ! -f "$suite_path" ]]; then - SKIPPED["$suite"]="not found" + echo "not found" > "$OUTPUT_DIR/${safe_name}.skip" continue fi if needs_zsh "$suite"; then if ! command -v zsh &>/dev/null; then - SKIPPED["$suite"]="zsh not available" + echo "zsh not available" > "$OUTPUT_DIR/${safe_name}.skip" continue fi ( - t_start=$(date +%s%3N) + t_start=$(ms_now) zsh "$suite_path" >"$out_file" 2>&1 echo $? >"$exit_file" - echo $(( $(date +%s%3N) - t_start )) >"$time_file" + echo $(( $(ms_now) - t_start )) >"$time_file" ) & else ( - t_start=$(date +%s%3N) + t_start=$(ms_now) "$suite_path" >"$out_file" 2>&1 echo $? >"$exit_file" - echo $(( $(date +%s%3N) - t_start )) >"$time_file" + echo $(( $(ms_now) - t_start )) >"$time_file" ) & fi - PIDS["$suite"]=$! - ACTIVE_PIDS+=("${PIDS[$suite]}") + echo $! > "$OUTPUT_DIR/${safe_name}.pid" + ACTIVE_PIDS+=($!) # Throttle background jobs while [[ "${#ACTIVE_PIDS[@]}" -ge "$MAX_JOBS" ]]; do @@ -209,7 +246,7 @@ for suite in "${TEST_SUITES[@]}"; do still_running+=("$pid") fi done - ACTIVE_PIDS=("${still_running[@]}") + ACTIVE_PIDS=(${still_running[@]+"${still_running[@]}"}) else # Fallback: wait for the oldest PID (less efficient but portable in older bash) wait "${ACTIVE_PIDS[0]}" 2>/dev/null || true @@ -228,12 +265,12 @@ SORT_FILE="$OUTPUT_DIR/sortable.txt" esc=$'\033' for suite in "${TEST_SUITES[@]}"; do - [[ -n "${SKIPPED[$suite]+x}" ]] && continue + safe_name="$(echo "$suite" | tr '/' '_')" + [[ -f "$OUTPUT_DIR/${safe_name}.skip" ]] && continue - pid="${PIDS[$suite]}" - wait "$pid" 2>/dev/null + pid=$(cat "$OUTPUT_DIR/${safe_name}.pid" 2>/dev/null || echo "") + [[ -n "$pid" ]] && wait "$pid" 2>/dev/null - safe_name="$(echo "$suite" | tr '/' '_')" out_file="$OUTPUT_DIR/${safe_name}.out" exit_file="$OUTPUT_DIR/${safe_name}.exit" time_file="$OUTPUT_DIR/${safe_name}.time" @@ -267,8 +304,11 @@ done # Print skipped suites first for suite in "${TEST_SUITES[@]}"; do - if [[ -n "${SKIPPED[$suite]+x}" ]]; then - echo -e "${YELLOW}SKIP${NC}: $suite (${SKIPPED[$suite]})" + safe_name="$(echo "$suite" | tr '/' '_')" + skip_file="$OUTPUT_DIR/${safe_name}.skip" + if [[ -f "$skip_file" ]]; then + skip_reason=$(cat "$skip_file" 2>/dev/null || echo "unknown") + echo -e "${YELLOW}SKIP${NC}: $suite ($skip_reason)" fi done diff --git a/tests/test-ask-codex.sh b/tests/test-ask-codex.sh index 896f282a..8d6b1846 100755 --- a/tests/test-ask-codex.sh +++ b/tests/test-ask-codex.sh @@ -57,11 +57,55 @@ export MOCK_CODEX_EXIT_CODE="" export MOCK_CODEX_STDOUT="" export MOCK_CODEX_STDERR="" -# Reset mock state between tests +# Reset mock state between tests; also clears the skill dir so that +# find...sort|tail -1 always picks the single dir from the next invocation. reset_mock() { export MOCK_CODEX_EXIT_CODE="0" export MOCK_CODEX_STDOUT="" export MOCK_CODEX_STDERR="" + rm -rf "$MOCK_PROJECT/.humanize/skill" 2>/dev/null || true +} + +# Override XDG_CACHE_HOME for run_ask_codex_capturing_dir; set to a non-writable path +# to exercise the fallback cache branch (CACHE_DIR=$SKILL_DIR/cache). +RUN_XDG_CACHE_HOME="$TEST_DIR/cache" + +# Helper: run ask-codex with a controllable XDG_CACHE_HOME, capture stderr, and +# derive the exact project-local skill dir for that invocation. +# Sets RUN_EXIT_CODE (int) and RUN_SKILL_DIR (path, empty on resolution failure). +# +# Primary: "ask-codex: response saved to .../output.md" (emitted on success, always +# project-local regardless of which cache layout was used). +# Fallback A: "ask-codex: cache=.../skill-<id>" -> normal layout +# Fallback B: "ask-codex: cache=.../.humanize/skill/<id>/cache" -> fallback layout +# If none of the above match, RUN_SKILL_DIR is set to "" (explicit failure). +run_ask_codex_capturing_dir() { + local run_stderr output_path cache_path skill_basename + RUN_EXIT_CODE=0 + run_stderr=$( + cd "$MOCK_PROJECT" + export CLAUDE_PROJECT_DIR="$MOCK_PROJECT" + export XDG_CACHE_HOME="$RUN_XDG_CACHE_HOME" + PATH="$MOCK_BIN_DIR:$PATH" bash "$ASK_CODEX_SCRIPT" "$@" 2>&1 >/dev/null + ) || RUN_EXIT_CODE=$? + output_path=$(printf '%s\n' "$run_stderr" | grep "^ask-codex: response saved to " | sed 's/^ask-codex: response saved to //') + if [[ -n "$output_path" ]]; then + RUN_SKILL_DIR=$(dirname "$output_path") + return + fi + cache_path=$(printf '%s\n' "$run_stderr" | grep "^ask-codex: cache=" | sed 's/^ask-codex: cache=//') + skill_basename=$(basename "$cache_path") + case "$skill_basename" in + skill-*) + RUN_SKILL_DIR="$MOCK_PROJECT/.humanize/skill/${skill_basename#skill-}" + ;; + cache) + RUN_SKILL_DIR=$(dirname "$cache_path") + ;; + *) + RUN_SKILL_DIR="" + ;; + esac } # Helper: run ask-codex with mock codex in PATH, inside mock project @@ -330,9 +374,10 @@ echo "" # Test: --codex-model MODEL:EFFORT sets both model and effort reset_mock export MOCK_CODEX_STDOUT="model-test" -run_ask_codex --codex-model "custom-model:high" "model test" > /dev/null 2>&1 -LATEST_DIR=$(find "$MOCK_PROJECT/.humanize/skill" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort | tail -1) -if [[ -n "$LATEST_DIR" ]] && grep -q "Model: custom-model" "$LATEST_DIR/input.md" && grep -q "Effort: high" "$LATEST_DIR/input.md"; then +run_ask_codex_capturing_dir --codex-model "custom-model:high" "model test" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -d "$RUN_SKILL_DIR" ]] \ + && grep -q "Model: custom-model" "$RUN_SKILL_DIR/input.md" \ + && grep -q "Effort: high" "$RUN_SKILL_DIR/input.md"; then pass "--codex-model MODEL:EFFORT parses model and effort" else fail "--codex-model MODEL:EFFORT parses model and effort" @@ -341,9 +386,10 @@ fi # Test: --codex-model MODEL (no effort) uses default effort reset_mock export MOCK_CODEX_STDOUT="effort-default-test" -run_ask_codex --codex-model "solo-model" "effort default test" > /dev/null 2>&1 -LATEST_DIR=$(find "$MOCK_PROJECT/.humanize/skill" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort | tail -1) -if [[ -n "$LATEST_DIR" ]] && grep -q "Model: solo-model" "$LATEST_DIR/input.md" && grep -q "Effort: high" "$LATEST_DIR/input.md"; then +run_ask_codex_capturing_dir --codex-model "solo-model" "effort default test" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -d "$RUN_SKILL_DIR" ]] \ + && grep -q "Model: solo-model" "$RUN_SKILL_DIR/input.md" \ + && grep -q "Effort: high" "$RUN_SKILL_DIR/input.md"; then pass "--codex-model MODEL without effort uses default high" else fail "--codex-model MODEL without effort uses default high" @@ -352,9 +398,9 @@ fi # Test: -- separator treats remaining args as question reset_mock export MOCK_CODEX_STDOUT="separator-test" -run_ask_codex -- --not-a-flag "is question" > /dev/null 2>&1 -LATEST_DIR=$(find "$MOCK_PROJECT/.humanize/skill" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort | tail -1) -if [[ -n "$LATEST_DIR" ]] && grep -qF -- "--not-a-flag" "$LATEST_DIR/input.md"; then +run_ask_codex_capturing_dir -- --not-a-flag "is question" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -d "$RUN_SKILL_DIR" ]] \ + && grep -qF -- "--not-a-flag" "$RUN_SKILL_DIR/input.md"; then pass "-- separator passes remaining args as question text" else fail "-- separator passes remaining args as question text" @@ -363,14 +409,34 @@ fi # Test: --codex-timeout is recorded in input.md reset_mock export MOCK_CODEX_STDOUT="timeout-val" -run_ask_codex --codex-timeout 123 "timeout value test" > /dev/null 2>&1 -LATEST_DIR=$(find "$MOCK_PROJECT/.humanize/skill" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort | tail -1) -if [[ -n "$LATEST_DIR" ]] && grep -q "Timeout: 123s" "$LATEST_DIR/input.md"; then +run_ask_codex_capturing_dir --codex-timeout 123 "timeout value test" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -d "$RUN_SKILL_DIR" ]] \ + && grep -q "Timeout: 123s" "$RUN_SKILL_DIR/input.md"; then pass "--codex-timeout value is recorded in input.md" else fail "--codex-timeout value is recorded in input.md" fi +# Test: run_ask_codex_capturing_dir resolves correct skill dir when home cache is not writable +# (exercises the ask-codex.sh fallback branch: CACHE_DIR=$SKILL_DIR/cache) +READONLY_CACHE="$TEST_DIR/readonly-cache" +mkdir -p "$READONLY_CACHE" +chmod 444 "$READONLY_CACHE" +reset_mock +export MOCK_CODEX_STDOUT="fallback-cache-test" +RUN_XDG_CACHE_HOME="$READONLY_CACHE" +run_ask_codex_capturing_dir "fallback cache skill dir test" +RUN_XDG_CACHE_HOME="$TEST_DIR/cache" +chmod 755 "$READONLY_CACHE" +if [[ "$RUN_EXIT_CODE" -eq 0 ]] && [[ -d "$RUN_SKILL_DIR" ]] \ + && grep -q "fallback cache skill dir test" "$RUN_SKILL_DIR/input.md"; then + pass "run_ask_codex_capturing_dir resolves skill dir when home cache is not writable" +else + fail "run_ask_codex_capturing_dir resolves skill dir when home cache is not writable" \ + "exit 0 + valid skill dir with input.md" \ + "exit=$RUN_EXIT_CODE skill_dir=$RUN_SKILL_DIR" +fi + # ======================================== # Cache Directory Tests # ======================================== @@ -433,6 +499,117 @@ else fail "skill requires one quoted final argument for free-form text" "quoted final argument guidance" "missing" fi +# ======================================== +# Auto-Probe: Nested Hook Disable Tests +# ======================================== + +echo "" +echo "--- Auto-Probe: Nested Hook Disable Tests ---" +echo "" + +# Setup: create a secondary mock codex binary directory for probe tests, +# so the probe result is not cached from earlier tests. +PROBE_BIN_DIR="$TEST_DIR/probe-bin" +PROBE_PROJECT="$TEST_DIR/probe-project" +init_test_git_repo "$PROBE_PROJECT" +mkdir -p "$PROBE_BIN_DIR" + +run_ask_codex_probe() { + ( + cd "$PROBE_PROJECT" + export CLAUDE_PROJECT_DIR="$PROBE_PROJECT" + export XDG_CACHE_HOME="$TEST_DIR/cache-probe" + PATH="$PROBE_BIN_DIR:$PATH" bash "$ASK_CODEX_SCRIPT" "$@" + ) +} + +# Test A: when codex supports --disable, ask-codex.sh injects --disable codex_hooks +# Create a mock codex that echoes "--disable" in its --help output +cat > "$PROBE_BIN_DIR/codex" << 'PROBE_MOCK_SUPPORTS' +#!/usr/bin/env bash +if [[ "${1:-}" == "--help" ]] || echo "$*" | grep -q -- '--help'; then + echo "--disable <feature> Disable a named feature" + for i in $(seq 1 5000); do + printf -- "--noise-%s\n" "$i" + done + exit 0 +fi +if [[ -n "${MOCK_CODEX_STDERR:-}" ]]; then echo "$MOCK_CODEX_STDERR" >&2; fi +if [[ -n "${MOCK_CODEX_STDOUT:-}" ]]; then echo "$MOCK_CODEX_STDOUT"; fi +cat > /dev/null +exit "${MOCK_CODEX_EXIT_CODE:-0}" +PROBE_MOCK_SUPPORTS +chmod +x "$PROBE_BIN_DIR/codex" + +reset_mock +export MOCK_CODEX_STDOUT="probe-test-supports" +run_ask_codex_probe "probe disable test" > /dev/null 2>&1 || true + +# Check that the cached probe result is "yes" in the skill dir +PROBE_SKILL_DIR=$(find "$PROBE_PROJECT/.humanize/skill" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort | tail -1) +if [[ -n "$PROBE_SKILL_DIR" ]] && [[ -f "$PROBE_SKILL_DIR/.codex-disable-hooks-supported" ]]; then + PROBE_RESULT=$(cat "$PROBE_SKILL_DIR/.codex-disable-hooks-supported") + if [[ "$PROBE_RESULT" == "yes" ]]; then + pass "auto-probe: cached 'yes' when codex supports --disable" + else + fail "auto-probe: cached 'yes' when codex supports --disable" "yes" "$PROBE_RESULT" + fi +else + fail "auto-probe: probe cache file created" "cache file exists" "not found" +fi + +# Test B: when codex does NOT support --disable, probe result is "no" +PROBE_BIN_NO_DIR="$TEST_DIR/probe-bin-no" +PROBE_PROJECT_NO="$TEST_DIR/probe-project-no" +init_test_git_repo "$PROBE_PROJECT_NO" +mkdir -p "$PROBE_BIN_NO_DIR" + +cat > "$PROBE_BIN_NO_DIR/codex" << 'PROBE_MOCK_NO_SUPPORT' +#!/usr/bin/env bash +if [[ "${1:-}" == "--help" ]] || echo "$*" | grep -q -- '--help'; then + echo "Usage: codex exec [options]" + echo " --full-auto Run without prompts" + exit 0 +fi +if [[ -n "${MOCK_CODEX_STDERR:-}" ]]; then echo "$MOCK_CODEX_STDERR" >&2; fi +if [[ -n "${MOCK_CODEX_STDOUT:-}" ]]; then echo "$MOCK_CODEX_STDOUT"; fi +cat > /dev/null +exit "${MOCK_CODEX_EXIT_CODE:-0}" +PROBE_MOCK_NO_SUPPORT +chmod +x "$PROBE_BIN_NO_DIR/codex" + +run_ask_codex_probe_no() { + ( + cd "$PROBE_PROJECT_NO" + export CLAUDE_PROJECT_DIR="$PROBE_PROJECT_NO" + export XDG_CACHE_HOME="$TEST_DIR/cache-probe-no" + PATH="$PROBE_BIN_NO_DIR:$PATH" bash "$ASK_CODEX_SCRIPT" "$@" + ) +} + +reset_mock +export MOCK_CODEX_STDOUT="probe-test-no-support" +run_ask_codex_probe_no "probe no-support test" > /dev/null 2>&1 || true + +PROBE_NO_SKILL_DIR=$(find "$PROBE_PROJECT_NO/.humanize/skill" -maxdepth 1 -mindepth 1 -type d 2>/dev/null | sort | tail -1) +if [[ -n "$PROBE_NO_SKILL_DIR" ]] && [[ -f "$PROBE_NO_SKILL_DIR/.codex-disable-hooks-supported" ]]; then + PROBE_NO_RESULT=$(cat "$PROBE_NO_SKILL_DIR/.codex-disable-hooks-supported") + if [[ "$PROBE_NO_RESULT" == "no" ]]; then + pass "auto-probe: cached 'no' when codex does not support --disable" + else + fail "auto-probe: cached 'no' when codex does not support --disable" "no" "$PROBE_NO_RESULT" + fi +else + fail "auto-probe: probe cache file created for no-support case" "cache file exists" "not found" +fi + +# Test C: ask-codex.sh script contains the probe implementation +if grep -q "codex_hooks" "$ASK_CODEX_SCRIPT" && grep -q "codex-disable-hooks-supported" "$ASK_CODEX_SCRIPT"; then + pass "ask-codex.sh contains nested hook disable auto-probe implementation" +else + fail "ask-codex.sh contains nested hook disable auto-probe implementation" "codex_hooks + probe cache" "not found" +fi + # ======================================== # Summary # ======================================== diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh index 2d70bb2d..59215add 100755 --- a/tests/test-codex-hook-install.sh +++ b/tests/test-codex-hook-install.sh @@ -41,6 +41,17 @@ cat > "$FAKE_BIN/codex" <<'EOF' #!/usr/bin/env bash set -euo pipefail +if [[ "${1:-}" == "--help" ]]; then + cat <<'HELP' +Usage: codex [OPTIONS] [PROMPT] + --disable <feature> Disable a named feature for this invocation +HELP + for i in $(seq 1 5000); do + printf ' --noise-%s\n' "$i" + done + exit 0 +fi + if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then cat <<'LIST' codex_hooks under development false @@ -253,6 +264,7 @@ PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$FEATURE_LOG" XDG_CONFIG_HOME="$X --target codex \ --codex-config-dir "$CODEX_HOME_DIR" \ --codex-skills-dir "$CODEX_HOME_DIR/skills" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ > "$TEST_DIR/install-2.log" 2>&1 PY_OUTPUT_2="$( @@ -289,7 +301,9 @@ fi UNSUPPORTED_BIN="$TEST_DIR/bin-unsupported" UNSUPPORTED_HOME="$TEST_DIR/codex-home-unsupported" -mkdir -p "$UNSUPPORTED_BIN" "$UNSUPPORTED_HOME" +UNSUPPORTED_COMMAND_BIN_DIR="$TEST_DIR/command-bin-unsupported" +UNSUPPORTED_XDG_CONFIG_HOME_DIR="$TEST_DIR/xdg-config-unsupported" +mkdir -p "$UNSUPPORTED_BIN" "$UNSUPPORTED_HOME" "$UNSUPPORTED_COMMAND_BIN_DIR" "$UNSUPPORTED_XDG_CONFIG_HOME_DIR" cat > "$UNSUPPORTED_BIN/codex" <<'EOF' #!/usr/bin/env bash @@ -308,11 +322,12 @@ EOF chmod +x "$UNSUPPORTED_BIN/codex" set +e -PATH="$UNSUPPORTED_BIN:$PATH" \ +PATH="$UNSUPPORTED_BIN:$PATH" XDG_CONFIG_HOME="$UNSUPPORTED_XDG_CONFIG_HOME_DIR" \ "$INSTALL_SCRIPT" \ --target codex \ --codex-config-dir "$UNSUPPORTED_HOME" \ --codex-skills-dir "$UNSUPPORTED_HOME/skills" \ + --command-bin-dir "$UNSUPPORTED_COMMAND_BIN_DIR" \ > "$TEST_DIR/install-unsupported.log" 2>&1 UNSUPPORTED_EXIT=$? set -e @@ -331,4 +346,167 @@ else "$(cat "$TEST_DIR/install-unsupported.log")" fi +# --- Codex with codex_hooks but without --disable must be rejected --- +# Regression: a Codex build that exposes codex_hooks but lacks --disable cannot +# be safely installed because the stop hook's recursive-invocation guard relies on +# `--disable codex_hooks`. The installer must catch this configuration before +# writing any files. + +NO_DISABLE_BIN="$TEST_DIR/bin-no-disable" +NO_DISABLE_HOME="$TEST_DIR/codex-home-no-disable" +NO_DISABLE_XDG="$TEST_DIR/xdg-no-disable" +mkdir -p "$NO_DISABLE_BIN" "$NO_DISABLE_HOME" + +cat > "$NO_DISABLE_BIN/codex" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail + +if [[ "${1:-}" == "--help" ]]; then + echo "Usage: codex [OPTIONS] [PROMPT]" + exit 0 +fi + +if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then + cat <<'LIST' +codex_hooks under development false +LIST + exit 0 +fi + +echo "unexpected fake codex invocation: $*" >&2 +exit 1 +EOF +chmod +x "$NO_DISABLE_BIN/codex" + +set +e +PATH="$NO_DISABLE_BIN:$PATH" XDG_CONFIG_HOME="$NO_DISABLE_XDG" \ + "$INSTALL_SCRIPT" \ + --target codex \ + --codex-config-dir "$NO_DISABLE_HOME" \ + --codex-skills-dir "$NO_DISABLE_HOME/skills" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install-no-disable.log" 2>&1 +NO_DISABLE_EXIT=$? +set -e + +if [[ "$NO_DISABLE_EXIT" -ne 0 ]]; then + pass "Codex install rejects builds with codex_hooks but without --disable" +else + fail "Codex install rejects builds with codex_hooks but without --disable" "non-zero exit" "exit 0" +fi + +if grep -q "\-\-disable" "$TEST_DIR/install-no-disable.log"; then + pass "No-disable Codex failure mentions --disable flag requirement" +else + fail "No-disable Codex failure mentions --disable flag requirement" \ + "error mentioning --disable" \ + "$(cat "$TEST_DIR/install-no-disable.log")" +fi + +# --- Kimi RLCR skill gate test --- +# Regression: after the native-hook SKILL.md was introduced, Kimi installs +# received the same "stop or exit normally / native hook" instructions. +# overwrite_kimi_rlcr_skill() must replace that with the gate-based SKILL.md. + +KIMI_HOME_DIR="$TEST_DIR/kimi-home" +KIMI_SKILLS_DIR="$KIMI_HOME_DIR/skills" +mkdir -p "$KIMI_HOME_DIR" + +PATH="$FAKE_BIN:$PATH" XDG_CONFIG_HOME="$XDG_CONFIG_HOME_DIR" \ + "$INSTALL_SCRIPT" \ + --target kimi \ + --kimi-skills-dir "$KIMI_SKILLS_DIR" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install-kimi.log" 2>&1 + +KIMI_RLCR_SKILL="$KIMI_SKILLS_DIR/humanize-rlcr/SKILL.md" + +if [[ -f "$KIMI_RLCR_SKILL" ]]; then + pass "Kimi install produces humanize-rlcr/SKILL.md" +else + fail "Kimi install produces humanize-rlcr/SKILL.md" "SKILL.md exists" "missing" +fi + +if grep -q "rlcr-stop-gate.sh" "$KIMI_RLCR_SKILL" 2>/dev/null; then + pass "Kimi humanize-rlcr/SKILL.md uses explicit rlcr-stop-gate.sh gate" +else + fail "Kimi humanize-rlcr/SKILL.md uses explicit rlcr-stop-gate.sh gate" \ + "rlcr-stop-gate.sh present" \ + "$(head -10 "$KIMI_RLCR_SKILL" 2>/dev/null || echo MISSING)" +fi + +if ! grep -q "native.*Stop hook\|Stop hook run automatically\|exit normally" "$KIMI_RLCR_SKILL" 2>/dev/null; then + pass "Kimi humanize-rlcr/SKILL.md does not reference native Stop hook" +else + fail "Kimi humanize-rlcr/SKILL.md does not reference native Stop hook" \ + "native hook text absent" "native hook text present" +fi + +# --- --target both provider_mode test --- +# Regression: install_codex_target() was passing $TARGET ("both") to +# install_codex_user_config(), so provider_mode: "codex-only" was never written +# for mixed Codex+Kimi installs. + +BOTH_CODEX_HOME="$TEST_DIR/both-codex-home" +BOTH_KIMI_SKILLS="$TEST_DIR/both-kimi-skills" +BOTH_XDG_CONFIG="$TEST_DIR/both-xdg-config" +BOTH_USER_CONFIG="$BOTH_XDG_CONFIG/humanize/config.json" +mkdir -p "$BOTH_CODEX_HOME" "$BOTH_KIMI_SKILLS" + +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$TEST_DIR/feature-log-both.log" \ + XDG_CONFIG_HOME="$BOTH_XDG_CONFIG" \ + HUMANIZE_USER_CONFIG_DIR="$BOTH_XDG_CONFIG/humanize" \ + "$INSTALL_SCRIPT" \ + --target both \ + --codex-config-dir "$BOTH_CODEX_HOME" \ + --codex-skills-dir "$BOTH_CODEX_HOME/skills" \ + --kimi-skills-dir "$BOTH_KIMI_SKILLS" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install-both.log" 2>&1 + +if [[ "$(jq -r '.provider_mode // empty' "$BOTH_USER_CONFIG" 2>/dev/null)" == "codex-only" ]]; then + pass "--target both install writes provider_mode: codex-only" +else + fail "--target both install writes provider_mode: codex-only" \ + "codex-only" "$(jq -c '.' "$BOTH_USER_CONFIG" 2>/dev/null || echo MISSING)" +fi + +# --- --target both with shared skills dir must be rejected --- +# Regression: when KIMI_SKILLS_DIR == CODEX_SKILLS_DIR, install_codex_target +# overwrites the Kimi-specific humanize-rlcr/SKILL.md. The installer must +# reject this configuration before any install work happens. + +SHARED_DIR="$TEST_DIR/shared-skills" +mkdir -p "$SHARED_DIR" + +SHARED_CODEX_HOME="$TEST_DIR/shared-codex-home" +SHARED_XDG_CONFIG="$TEST_DIR/shared-xdg-config" +mkdir -p "$SHARED_CODEX_HOME" + +set +e +PATH="$FAKE_BIN:$PATH" TEST_CODEX_FEATURE_LOG="$TEST_DIR/feature-log-shared.log" \ + XDG_CONFIG_HOME="$SHARED_XDG_CONFIG" \ + "$INSTALL_SCRIPT" \ + --target both \ + --codex-config-dir "$SHARED_CODEX_HOME" \ + --codex-skills-dir "$SHARED_DIR" \ + --kimi-skills-dir "$SHARED_DIR" \ + --command-bin-dir "$COMMAND_BIN_DIR" \ + > "$TEST_DIR/install-shared.log" 2>&1 +SHARED_EXIT=$? +set -e + +if [[ "$SHARED_EXIT" -ne 0 ]]; then + pass "--target both with shared skills dir exits non-zero" +else + fail "--target both with shared skills dir exits non-zero" "non-zero exit" "exit 0" +fi + +if grep -qi "distinct\|same.*dir\|conflict\|identical" "$TEST_DIR/install-shared.log" 2>/dev/null; then + pass "--target both shared-dir error explains conflict" +else + fail "--target both shared-dir error explains conflict" \ + "conflict message" "$(cat "$TEST_DIR/install-shared.log")" +fi + print_test_summary "Codex Hook Install Tests" diff --git a/tests/test-directions-json-schema.sh b/tests/test-directions-json-schema.sh new file mode 100755 index 00000000..20883460 --- /dev/null +++ b/tests/test-directions-json-schema.sh @@ -0,0 +1,247 @@ +#!/usr/bin/env bash +# +# Tests for validate-directions-json.sh — schema version 1 contract enforcement. +# +# Covers all AC-3 positive and negative cases. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +VALIDATE_SCRIPT="$PROJECT_ROOT/scripts/validate-directions-json.sh" +VALID_FIXTURE="$SCRIPT_DIR/fixtures/directions/valid.directions.json" + +echo "==========================================" +echo "validate-directions-json.sh Tests" +echo "==========================================" +echo "" + +if ! command -v jq &>/dev/null; then + echo "SKIP: jq not available — skipping all tests" + exit 0 +fi + +setup_test_dir + +# Helper: create a mutated fixture from valid.directions.json +make_fixture() { + local name="$1" + local jq_expr="$2" + local outfile="$TEST_DIR/${name}.directions.json" + jq "$jq_expr" "$VALID_FIXTURE" > "$outfile" + echo "$outfile" +} + +# Helper: run the validator on a fixture file +run_validate() { + bash "$VALIDATE_SCRIPT" "$1" +} + +echo "--- Positive Tests ---" +echo "" + +# PT-1: Valid fixture passes +EXIT_CODE=0 +run_validate "$VALID_FIXTURE" > /dev/null 2>&1 || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]]; then + pass "valid fixture: exits 0" +else + fail "valid fixture: exits 0" "exit 0" "exit=$EXIT_CODE" +fi + +echo "" +echo "--- Negative Tests ---" +echo "" + +# NT-1: Missing schema_version +F=$(make_fixture "no-schema-version" 'del(.schema_version)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing schema_version: exits non-zero" \ + || fail "missing schema_version: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-2: 11 directions (exceeds max) +F=$(make_fixture "too-many-directions" ' + . as $base | + .directions = [range(11) | $base.directions[0] | .source_index = .] | + .directions |= to_entries | .directions |= map(.value.direction_id = ("dir-" + (.key|tostring) + "-x") | .value.dir_slug = ("slug-" + (.key|tostring)) | .value.source_index = .key | .value) | + .metadata.n_returned = 11 +') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "11 directions: exits non-zero" \ + || fail "11 directions: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-3: Two entries with is_primary: true +F=$(make_fixture "two-primary" '.directions |= map(.is_primary = true)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "two is_primary: exits non-zero" \ + || fail "two is_primary: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-4: Zero entries with is_primary: true +F=$(make_fixture "zero-primary" '.directions |= map(.is_primary = false)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "zero is_primary: exits non-zero" \ + || fail "zero is_primary: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-5: Duplicate direction_id +F=$(make_fixture "dup-direction-id" '.directions[1].direction_id = .directions[0].direction_id') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "duplicate direction_id: exits non-zero" \ + || fail "duplicate direction_id: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-6: Empty direction_id +F=$(make_fixture "empty-direction-id" '.directions[0].direction_id = ""') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "empty direction_id: exits non-zero" \ + || fail "empty direction_id: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-7: Whitespace-only direction_id +F=$(make_fixture "whitespace-direction-id" '.directions[0].direction_id = " "') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "whitespace-only direction_id: exits non-zero" \ + || fail "whitespace-only direction_id: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-8: direction_id contains spaces +F=$(make_fixture "spaced-direction-id" '.directions[0].direction_id = "dir 00 command history"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "direction_id with spaces: exits non-zero" \ + || fail "direction_id with spaces: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-9: Duplicate dir_slug +F=$(make_fixture "dup-dir-slug" '.directions[1].dir_slug = .directions[0].dir_slug') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "duplicate dir_slug: exits non-zero" \ + || fail "duplicate dir_slug: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-10: Duplicate source_index +F=$(make_fixture "dup-source-index" '.directions[1].source_index = .directions[0].source_index') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "duplicate source_index: exits non-zero" \ + || fail "duplicate source_index: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-11: display_order is a string (not integer) +F=$(make_fixture "display-order-string" '.directions[0].display_order = "zero"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "display_order string: exits non-zero" \ + || fail "display_order string: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-12: dir_slug contains uppercase +F=$(make_fixture "dir-slug-uppercase" '.directions[0].dir_slug = "CommandHistory"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "dir_slug uppercase: exits non-zero" \ + || fail "dir_slug uppercase: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-13: dir_slug contains spaces +F=$(make_fixture "dir-slug-space" '.directions[0].dir_slug = "command history"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "dir_slug with spaces: exits non-zero" \ + || fail "dir_slug with spaces: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-14: Missing required per-direction field (name) +F=$(make_fixture "missing-name" '.directions[0] |= del(.name)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing direction.name: exits non-zero" \ + || fail "missing direction.name: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-15: objective_evidence is not an array +F=$(make_fixture "evidence-not-array" '.directions[0].objective_evidence = "single string"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "objective_evidence not array: exits non-zero" \ + || fail "objective_evidence not array: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-16: known_risks is not an array +F=$(make_fixture "risks-not-array" '.directions[0].known_risks = "single string"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "known_risks not array: exits non-zero" \ + || fail "known_risks not array: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-17: Invalid confidence value +F=$(make_fixture "bad-confidence" '.directions[0].confidence = "maybe"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "invalid confidence: exits non-zero" \ + || fail "invalid confidence: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-18: metadata.n_returned mismatch +F=$(make_fixture "n-returned-mismatch" '.metadata.n_returned = 99') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "n_returned mismatch: exits non-zero" \ + || fail "n_returned mismatch: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-19: Missing required top-level key (directions) +F=$(make_fixture "missing-directions-key" 'del(.directions)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing .directions key: exits non-zero" \ + || fail "missing .directions key: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-20: Missing required top-level key (title) +F=$(make_fixture "missing-title-key" 'del(.title)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing .title key: exits non-zero" \ + || fail "missing .title key: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-21: Missing required top-level key (original_idea) +F=$(make_fixture "missing-original-idea" 'del(.original_idea)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing .original_idea key: exits non-zero" \ + || fail "missing .original_idea key: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-22: Missing required top-level key (metadata) +F=$(make_fixture "missing-metadata" 'del(.metadata)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing .metadata key: exits non-zero" \ + || fail "missing .metadata key: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-23: Missing direction_id (per-direction required field) +F=$(make_fixture "missing-direction-id" '.directions[0] |= del(.direction_id)') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "missing direction_id: exits non-zero" \ + || fail "missing direction_id: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-24: source_index is a string (not integer) +F=$(make_fixture "source-index-string" '.directions[0].source_index = "0"') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "string source_index: exits non-zero" \ + || fail "string source_index: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-25: title is not a string (numeric type) +F=$(make_fixture "title-numeric" '.title = 123') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "numeric title: exits non-zero" \ + || fail "numeric title: exits non-zero" "non-zero" "$EXIT_CODE" + +# NT-26: objective_evidence items are not strings (numeric array) +F=$(make_fixture "evidence-items-numeric" '.directions[0].objective_evidence = [1, 2]') +EXIT_CODE=0 +run_validate "$F" > /dev/null 2>&1 || EXIT_CODE=$? +[[ $EXIT_CODE -ne 0 ]] && pass "numeric objective_evidence items: exits non-zero" \ + || fail "numeric objective_evidence items: exits non-zero" "non-zero" "$EXIT_CODE" + +echo "" +print_test_summary "validate-directions-json.sh Test Summary" diff --git a/tests/test-explore-command-structure.sh b/tests/test-explore-command-structure.sh new file mode 100755 index 00000000..4997bac8 --- /dev/null +++ b/tests/test-explore-command-structure.sh @@ -0,0 +1,249 @@ +#!/usr/bin/env bash +# +# Tests for explore-idea command structural requirements. +# +# Verifies the explore-idea command file contains: +# - Required allowed tools +# - All six workflow phases +# - Hard constraints +# - Two-tier report structure +# - Correct validation script invocation +# - Worker dispatch via Agent with isolation: "worktree" +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +EXPLORE_CMD="$PROJECT_ROOT/commands/explore-idea.md" +VALIDATE_IO_SCRIPT="$PROJECT_ROOT/scripts/validate-explore-idea-io.sh" +REPORT_TEMPLATE="$PROJECT_ROOT/prompt-template/explore/report-template.md" + +echo "==========================================" +echo "explore-idea Command Structure Tests" +echo "==========================================" +echo "" + +echo "--- Command File Existence ---" +echo "" + +if [[ -f "$EXPLORE_CMD" ]]; then + pass "commands/explore-idea.md exists" +else + fail "commands/explore-idea.md exists" "file found" "not found" +fi + +if [[ -f "$VALIDATE_IO_SCRIPT" ]]; then + pass "scripts/validate-explore-idea-io.sh exists" +else + fail "scripts/validate-explore-idea-io.sh exists" "file found" "not found" +fi + +echo "" +echo "--- Allowed Tools ---" +echo "" + +# validate-explore-idea-io.sh in allowed-tools +if grep -q "validate-explore-idea-io.sh" "$EXPLORE_CMD"; then + pass "validate-explore-idea-io.sh in allowed-tools" +else + fail "validate-explore-idea-io.sh in allowed-tools" +fi + +# validate-directions-json.sh in allowed-tools +if grep -q "validate-directions-json.sh" "$EXPLORE_CMD"; then + pass "validate-directions-json.sh in allowed-tools" +else + fail "validate-directions-json.sh in allowed-tools" +fi + +# Agent tool in allowed-tools +if grep -q '"Agent"' "$EXPLORE_CMD"; then + pass "Agent tool in allowed-tools" +else + fail "Agent tool in allowed-tools" +fi + +# Write tool in allowed-tools (for manifest and report) +if grep -q '"Write"' "$EXPLORE_CMD"; then + pass "Write tool in allowed-tools" +else + fail "Write tool in allowed-tools" +fi + +# Read tool in allowed-tools +if grep -q '"Read"' "$EXPLORE_CMD"; then + pass "Read tool in allowed-tools" +else + fail "Read tool in allowed-tools" +fi + +# jq in allowed-tools (Phase 5 coordinator JSON parsing) +if grep -q '"Bash(jq \*)"\|Bash(jq' "$EXPLORE_CMD"; then + pass "jq in allowed-tools" +else + fail "jq in allowed-tools" +fi + +# AskUserQuestion in allowed-tools (Phase 2 confirmation) +if grep -q '"AskUserQuestion"' "$EXPLORE_CMD"; then + pass "AskUserQuestion in allowed-tools" +else + fail "AskUserQuestion in allowed-tools" +fi + +echo "" +echo "--- Workflow Phases ---" +echo "" + +# All 6 workflow phases present +PHASES=( + "Phase 1" + "Phase 2" + "Phase 3" + "Phase 4" + "Phase 5" + "Phase 6" +) +for phase in "${PHASES[@]}"; do + if grep -q "$phase" "$EXPLORE_CMD"; then + pass "workflow contains $phase" + else + fail "workflow contains $phase" "$phase in command" "not found" + fi +done + +echo "" +echo "--- Hard Constraints ---" +echo "" + +# Hard constraints section exists +if grep -q "Hard Constraints" "$EXPLORE_CMD"; then + pass "Hard Constraints section present" +else + fail "Hard Constraints section present" +fi + +# No remote push constraint +if grep -q "MUST NOT push" "$EXPLORE_CMD" || grep -q "push.*remote" "$EXPLORE_CMD"; then + pass "constraint: no remote push" +else + fail "constraint: no remote push" +fi + +# Manifest written before dispatch +if grep -q "MUST write.*manifest" "$EXPLORE_CMD" || grep -q "BEFORE.*dispatch\|manifest.*BEFORE" "$EXPLORE_CMD"; then + pass "constraint: manifest written before dispatch" +else + fail "constraint: manifest written before dispatch" +fi + +# No nested skills +if grep -q "nested Skills\|nested.*skill" "$EXPLORE_CMD"; then + pass "constraint: no nested skills" +else + fail "constraint: no nested skills" +fi + +# Worker confirmation required before dispatch +if grep -q "explicit.*confirm\|Proceed.*\[y/N\]\|\[y/N\]" "$EXPLORE_CMD"; then + pass "user confirmation required before dispatch" +else + fail "user confirmation required before dispatch" +fi + +echo "" +echo "--- Worker Dispatch Pattern ---" +echo "" + +# Worker dispatch uses isolation: "worktree" +if grep -q 'isolation.*worktree\|worktree.*isolation' "$EXPLORE_CMD"; then + pass "worker dispatch uses isolation: worktree" +else + fail "worker dispatch uses isolation: worktree" +fi + +# Single Agent-tool message (parallel dispatch) +if grep -q "single Agent-tool message\|single.*Agent.*message" "$EXPLORE_CMD"; then + pass "parallel dispatch documented as single Agent-tool message" +else + fail "parallel dispatch as single Agent-tool message" +fi + +# Worker branch naming +if grep -q "explore/<RUN_ID>/<dir_slug>" "$EXPLORE_CMD"; then + pass "worker branch naming format documented" +else + fail "worker branch naming format documented" "explore/<RUN_ID>/<dir_slug>" "not found" +fi + +echo "" +echo "--- Result Collection ---" +echo "" + +# Sentinel-based result parsing +if grep -q "EXPLORE_RESULT_JSON_BEGIN" "$EXPLORE_CMD"; then + pass "result collection uses EXPLORE_RESULT_JSON_BEGIN sentinel" +else + fail "result collection uses sentinel markers" +fi + +# worker-results.jsonl append +if grep -q "worker-results.jsonl" "$EXPLORE_CMD"; then + pass "results appended to worker-results.jsonl" +else + fail "results appended to worker-results.jsonl" +fi + +echo "" +echo "--- Report Template Structure ---" +echo "" + +# Two-tier report +if grep -q "Tier 1" "$EXPLORE_CMD" && grep -q "Tier 2" "$EXPLORE_CMD"; then + pass "two-tier report structure documented in command" +else + fail "two-tier report structure in command" "Tier 1 + Tier 2" "not found" +fi + +# Report template placeholders +REPORT_PLACEHOLDERS=( + "<RUN_ID>" + "<BASE_BRANCH>" + "<BASE_COMMIT>" + "<CREATED_AT>" + "<SUMMARY_PARAGRAPH>" + "<WORKER_RESULT_ENTRIES>" +) +for placeholder in "${REPORT_PLACEHOLDERS[@]}"; do + if grep -q "$placeholder" "$REPORT_TEMPLATE"; then + pass "report template contains placeholder $placeholder" + else + fail "report template contains $placeholder" "$placeholder" "not found" + fi +done + +echo "" +echo "--- Validate-explore-idea-io.sh Script Structure ---" +echo "" + +# Script has all required exit codes documented +for code in 1 2 3 4 5 6 7 8 9; do + if grep -q "exit $code" "$VALIDATE_IO_SCRIPT"; then + pass "validate-explore-idea-io.sh has exit $code" + else + fail "validate-explore-idea-io.sh has exit $code" + fi +done + +# VALIDATION_SUCCESS emitted on success +if grep -q "VALIDATION_SUCCESS" "$VALIDATE_IO_SCRIPT"; then + pass "validate-explore-idea-io.sh emits VALIDATION_SUCCESS on success" +else + fail "validate-explore-idea-io.sh emits VALIDATION_SUCCESS" +fi + +echo "" +print_test_summary "explore-idea Command Structure Test Summary" diff --git a/tests/test-explore-manifest.sh b/tests/test-explore-manifest.sh new file mode 100755 index 00000000..f3ac06f7 --- /dev/null +++ b/tests/test-explore-manifest.sh @@ -0,0 +1,173 @@ +#!/usr/bin/env bash +# +# Tests for explore-idea manifest and run state structure. +# +# Verifies the manifest.json schema and run directory structure described +# in commands/explore-idea.md and the worker-results.jsonl contract. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +EXPLORE_CMD="$PROJECT_ROOT/commands/explore-idea.md" +WORKER_PROMPT="$PROJECT_ROOT/prompt-template/explore/worker-prompt.md" +REPORT_TEMPLATE="$PROJECT_ROOT/prompt-template/explore/report-template.md" +VALIDATE_IO_SCRIPT="$PROJECT_ROOT/scripts/validate-explore-idea-io.sh" + +echo "==========================================" +echo "explore-idea Manifest and Run State Tests" +echo "==========================================" +echo "" + +echo "--- File Existence ---" +echo "" + +# All required files exist +for f in "$EXPLORE_CMD" "$WORKER_PROMPT" "$REPORT_TEMPLATE"; do + if [[ -f "$f" ]]; then + pass "file exists: $(basename "$f")" + else + fail "file exists: $(basename "$f")" "file found" "not found" + fi +done + +echo "" +echo "--- Manifest JSON Schema (from explore-idea.md) ---" +echo "" + +# manifest.json fields mentioned in command +MANIFEST_FIELDS=( + "run_id" + "created_at" + "directions_json_file" + "draft_path" + "selected_direction_ids" + "base_branch" + "base_commit" + "concurrency" + "max_worker_iterations" + "worker_timeout_min" + "codex_timeout_min" + "expected_worker_count" + "runtime_spike_status" + "workers" +) + +for field in "${MANIFEST_FIELDS[@]}"; do + if grep -q "\"$field\"" "$EXPLORE_CMD"; then + pass "manifest.json field documented: $field" + else + fail "manifest.json field documented: $field" "\"$field\" in explore-idea.md" "not found" + fi +done + +echo "" +echo "--- Per-Worker Manifest Entry ---" +echo "" + +WORKER_FIELDS=( + "direction_id" + "dir_slug" + "prompt_path" + "prompt_hash" + "branch_name" + "status" +) + +for field in "${WORKER_FIELDS[@]}"; do + if grep -q "\"$field\"" "$EXPLORE_CMD"; then + pass "per-worker manifest entry documents: $field" + else + fail "per-worker manifest entry documents: $field" "\"$field\"" "not found" + fi +done + +echo "" +echo "--- Run Directory Structure ---" +echo "" + +# Run directory path pattern (defined in validation script, referenced as <RUN_DIR> in command) +if grep -q "\.humanize/explore/" "$VALIDATE_IO_SCRIPT"; then + pass "run directory is under .humanize/explore/ (validate-explore-idea-io.sh)" +else + fail "run directory under .humanize/explore/" ".humanize/explore/" "not found" +fi + +# dispatch-prompts subdirectory +if grep -q "dispatch-prompts" "$EXPLORE_CMD"; then + pass "dispatch-prompts/ subdirectory documented" +else + fail "dispatch-prompts/ subdirectory documented" +fi + +# worker-results.jsonl +if grep -q "worker-results.jsonl" "$EXPLORE_CMD"; then + pass "worker-results.jsonl file documented" +else + fail "worker-results.jsonl file documented" +fi + +# report.md +if grep -q "report.md" "$EXPLORE_CMD"; then + pass "report.md file documented" +else + fail "report.md file documented" +fi + +# .failed sentinel +if grep -q "\.failed" "$EXPLORE_CMD"; then + pass ".failed sentinel file documented for error recovery" +else + fail ".failed sentinel file documented" +fi + +echo "" +echo "--- worker-results.jsonl Schema ---" +echo "" + +# worker-results.jsonl fields +JSONL_FIELDS=( + "schema_version" + "run_id" + "direction_id" + "task_status" + "codex_final_verdict" + "tests_passed" + "tests_failed" + "branch_name" + "commit_sha" + "commit_status" + "summary_markdown" +) + +for field in "${JSONL_FIELDS[@]}"; do + if grep -q "\"$field\"" "$EXPLORE_CMD"; then + pass "worker-results.jsonl schema documents: $field" + else + fail "worker-results.jsonl schema documents: $field" "\"$field\"" "not found" + fi +done + +echo "" +echo "--- manifest.json Write Order ---" +echo "" + +# manifest.json must be written BEFORE dispatch +if grep -q "BEFORE" "$EXPLORE_CMD" && grep -q "manifest" "$EXPLORE_CMD"; then + pass "command requires manifest.json written BEFORE dispatch" +else + fail "command requires manifest.json written BEFORE dispatch" +fi + +# report template has required sections +if grep -q "Tier 1" "$REPORT_TEMPLATE" && grep -q "Tier 2" "$REPORT_TEMPLATE"; then + pass "report template contains two-tier ranking sections" +else + fail "report template contains Tier 1 and Tier 2 sections" +fi + +echo "" +print_test_summary "explore-idea Manifest and Run State Test Summary" diff --git a/tests/test-finalize-phase.sh b/tests/test-finalize-phase.sh index 75949696..80e96a7f 100755 --- a/tests/test-finalize-phase.sh +++ b/tests/test-finalize-phase.sh @@ -732,7 +732,9 @@ echo "T-NEG-9b: Codex review log file exists and is empty" # Compute the real cache dir using same logic as loop-codex-stop-hook.sh # Cache path: $XDG_CACHE_HOME/humanize/$SANITIZED_PROJECT_PATH/$LOOP_TIMESTAMP/round-N-codex-review.log LOOP_TIMESTAMP=$(basename "$LOOP_DIR") -SANITIZED_PROJECT_PATH=$(echo "$TEST_DIR" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') +# Canonicalize the test dir so it matches what loop-codex-stop-hook.sh computes via resolve_project_root +CANONICAL_TEST_DIR=$(realpath "$TEST_DIR" 2>/dev/null || echo "$TEST_DIR") +SANITIZED_PROJECT_PATH=$(echo "$CANONICAL_TEST_DIR" | sed 's/[^a-zA-Z0-9._-]/-/g' | sed 's/--*/-/g') REVIEW_CACHE_DIR="$XDG_CACHE_HOME/humanize/$SANITIZED_PROJECT_PATH/$LOOP_TIMESTAMP" # Round 5 because we pass CURRENT_ROUND + 1 (4 + 1 = 5) to run_and_handle_code_review REVIEW_LOG="$REVIEW_CACHE_DIR/round-5-codex-review.log" diff --git a/tests/test-gen-idea-dual-write.sh b/tests/test-gen-idea-dual-write.sh new file mode 100755 index 00000000..61742e5f --- /dev/null +++ b/tests/test-gen-idea-dual-write.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# +# Tests for gen-idea dual-write contract (AC-2). +# +# Verifies the structural contract between validate-gen-idea-io.sh and commands/gen-idea.md: +# - Validation emits DIRECTIONS_JSON_FILE on success +# - Validation prevents write when output already exists (no partial write possible) +# - commands/gen-idea.md contains instructions for dual-write and explore-idea hint +# +# No live Claude invocations — all tests are deterministic shell and file-content checks. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +VALIDATE_SCRIPT="$PROJECT_ROOT/scripts/validate-gen-idea-io.sh" +GEN_IDEA_CMD="$PROJECT_ROOT/commands/gen-idea.md" +VALID_SCHEMA_SCRIPT="$PROJECT_ROOT/scripts/validate-directions-json.sh" + +echo "==========================================" +echo "gen-idea Dual-Write Contract Tests" +echo "==========================================" +echo "" + +setup_test_dir + +# Create mock git repo + plugin root for validate-gen-idea-io.sh +MOCK_REPO="$TEST_DIR/repo" +init_test_git_repo "$MOCK_REPO" +PLUGIN_ROOT="$TEST_DIR/plugin" +mkdir -p "$PLUGIN_ROOT/prompt-template/idea" +touch "$PLUGIN_ROOT/prompt-template/idea/gen-idea-template.md" +export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" + +run_validate() { + (cd "$MOCK_REPO" && bash "$VALIDATE_SCRIPT" "$@") +} + +echo "--- Positive Tests (structural contract) ---" +echo "" + +# PT-1: Validation emits DIRECTIONS_JSON_FILE on success +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/outA" +mkdir -p "$OUTPUT_DIR" +OUTPUT=$(run_validate "test idea" --output "$OUTPUT_DIR/idea.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -q "DIRECTIONS_JSON_FILE:"; then + DJSON=$(echo "$OUTPUT" | grep "DIRECTIONS_JSON_FILE:" | sed 's/DIRECTIONS_JSON_FILE: //') + pass "DIRECTIONS_JSON_FILE: $DJSON emitted on success" +else + fail "DIRECTIONS_JSON_FILE emitted on success" "exit 0 + DIRECTIONS_JSON_FILE" "exit=$EXIT_CODE" +fi + +# PT-2: gen-idea.md contains instructions to write companion JSON +if grep -q "DIRECTIONS_JSON_FILE" "$GEN_IDEA_CMD"; then + pass "gen-idea.md references DIRECTIONS_JSON_FILE (dual-write instruction present)" +else + fail "gen-idea.md references DIRECTIONS_JSON_FILE" "DIRECTIONS_JSON_FILE in file" "not found" +fi + +# PT-3: gen-idea.md contains explore-idea hint +if grep -q "explore-idea" "$GEN_IDEA_CMD"; then + pass "gen-idea.md contains explore-idea hint" +else + fail "gen-idea.md contains explore-idea hint" "explore-idea in file" "not found" +fi + +# PT-4: gen-idea.md includes validate-directions-json.sh in allowed-tools +if grep -q "validate-directions-json.sh" "$GEN_IDEA_CMD"; then + pass "gen-idea.md lists validate-directions-json.sh in allowed-tools" +else + fail "gen-idea.md lists validate-directions-json.sh in allowed-tools" "found in allowed-tools" "not found" +fi + +# PT-5: validate-directions-json.sh validates the valid fixture +if command -v jq &>/dev/null; then + VALID_FIXTURE="$SCRIPT_DIR/fixtures/directions/valid.directions.json" + EXIT_CODE=0 + bash "$VALID_SCHEMA_SCRIPT" "$VALID_FIXTURE" > /dev/null 2>&1 || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 0 ]]; then + pass "valid fixture passes validate-directions-json.sh" + else + fail "valid fixture passes validate-directions-json.sh" "exit 0" "exit=$EXIT_CODE" + fi +else + skip "jq not available — skipping schema validation test" +fi + +echo "" +echo "--- Negative Tests (no-write-on-failure contract) ---" +echo "" + +# NT-1: When output already exists, validation exits non-zero (draft cannot be written) +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/outB" +mkdir -p "$OUTPUT_DIR" +touch "$OUTPUT_DIR/existing.md" +OUTPUT=$(run_validate "test idea" --output "$OUTPUT_DIR/existing.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -ne 0 ]]; then + pass "validation fails when draft already exists (no-write contract upheld)" +else + fail "validation fails when draft already exists" "non-zero exit" "exit 0" +fi + +# NT-2: When companion JSON already exists, validation exits non-zero (neither file written) +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/outC" +mkdir -p "$OUTPUT_DIR" +touch "$OUTPUT_DIR/idea.directions.json" +OUTPUT=$(run_validate "test idea" --output "$OUTPUT_DIR/idea.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -ne 0 ]]; then + pass "validation fails when companion already exists (no-write contract upheld)" +else + fail "validation fails when companion already exists" "non-zero exit" "exit 0" +fi + +# NT-3: gen-idea.md error handling mentions not writing OUTPUT_FILE on error +if grep -q "DIRECTIONS_JSON_FILE" "$GEN_IDEA_CMD" && grep -q "Error Handling" "$GEN_IDEA_CMD"; then + pass "gen-idea.md Error Handling section present alongside dual-write instructions" +else + fail "gen-idea.md Error Handling section present" "Error Handling section" "not found" +fi + +echo "" +print_test_summary "gen-idea Dual-Write Contract Test Summary" diff --git a/tests/test-gen-plan.sh b/tests/test-gen-plan.sh index b5bcab07..e16f24e1 100755 --- a/tests/test-gen-plan.sh +++ b/tests/test-gen-plan.sh @@ -69,7 +69,7 @@ fi echo "" echo "PT-2: Command description validation" if [[ -f "$GEN_PLAN_CMD" ]]; then - DESC=$(sed -n '/^---$/,/^---$/{ /^description:/{ s/^description:[[:space:]]*//p; q; } }' "$GEN_PLAN_CMD") + DESC=$(awk 'BEGIN{f=0} /^---$/{f++; next} f==1 && /^description:/{sub(/^description:[[:space:]]*/,""); print; exit}' "$GEN_PLAN_CMD") if [[ -n "$DESC" ]]; then pass "gen-plan.md has description: ${DESC:0:50}..." else @@ -252,7 +252,7 @@ fi echo "" echo "PT-6: Agent name validation" if [[ -f "$RELEVANCE_AGENT" ]]; then - NAME=$(sed -n '/^---$/,/^---$/{ /^name:/{ s/^name:[[:space:]]*//p; q; } }' "$RELEVANCE_AGENT") + NAME=$(awk 'BEGIN{f=0} /^---$/{f++; next} f==1 && /^name:/{sub(/^name:[[:space:]]*/,""); print; exit}' "$RELEVANCE_AGENT") if [[ "$NAME" == "draft-relevance-checker" ]]; then pass "draft-relevance-checker agent has correct name field" else @@ -266,7 +266,7 @@ fi echo "" echo "PT-7: Agent model specification validation" if [[ -f "$RELEVANCE_AGENT" ]]; then - MODEL=$(sed -n '/^---$/,/^---$/{ /^model:/{ s/^model:[[:space:]]*//p; q; } }' "$RELEVANCE_AGENT") + MODEL=$(awk 'BEGIN{f=0} /^---$/{f++; next} f==1 && /^model:/{sub(/^model:[[:space:]]*/,""); print; exit}' "$RELEVANCE_AGENT") if [[ "$MODEL" == "haiku" ]]; then pass "draft-relevance-checker agent uses haiku model" else @@ -521,7 +521,7 @@ fi # Verify agent has valid model if [[ -f "$RELEVANCE_AGENT" ]]; then - MODEL=$(sed -n '/^---$/,/^---$/{ /^model:/{ s/^model:[[:space:]]*//p; q; } }' "$RELEVANCE_AGENT") + MODEL=$(awk 'BEGIN{f=0} /^---$/{f++; next} f==1 && /^model:/{sub(/^model:[[:space:]]*/,""); print; exit}' "$RELEVANCE_AGENT") if [[ -n "$MODEL" ]]; then if validate_model_name "$MODEL"; then pass "NT-6c: draft-relevance-checker has valid model: $MODEL" diff --git a/tests/test-monitor-runtime.sh b/tests/test-monitor-runtime.sh index e146adaf..1a0f6e97 100755 --- a/tests/test-monitor-runtime.sh +++ b/tests/test-monitor-runtime.sh @@ -344,6 +344,25 @@ _cleanup() { echo "CLEANUP_BY_SIGINT" } +# Probe whether SIGINT is deliverable in this shell context. +# In parallel test runners (background processes), POSIX mandates SIGINT=SIG_IGN; +# bash cannot receive the signal even after installing a trap. +# Detection: install a probe, send SIGINT to self, wait briefly. +_sigint_deliverable=false +_probe() { _sigint_deliverable=true; } +trap '_probe' INT 2>/dev/null +kill -INT $$ 2>/dev/null +sleep 0.15 +trap - INT 2>/dev/null + +if [[ "$_sigint_deliverable" == "false" ]]; then + # SIGINT=SIG_IGN in this context (parallel runner background process). + # Runtime delivery cannot be tested here; static verification is in Test 7. + echo "CLEANUP_BY_SIGINT" + echo "SIGINT_HANDLED" + exit 0 +fi + # Set up trap like humanize.sh does trap '_cleanup' INT TERM @@ -354,8 +373,8 @@ trap '_cleanup' INT TERM ) & child_pid=$! -# Wait for signal (up to 1 second) -for i in {1..10}; do +# Wait for signal (up to 5 seconds — wider window absorbs parallel-runner latency) +for i in {1..50}; do sleep 0.1 if [[ "$cleanup_triggered" == "true" ]]; then break @@ -454,8 +473,8 @@ TRAPINT() { ) & child_pid=$! -# Wait for signal (up to 1 second) -for i in {1..10}; do +# Wait for signal (up to 5 seconds — wider window absorbs parallel-runner latency) +for i in {1..50}; do sleep 0.1 if [[ "$cleanup_triggered" == "true" ]]; then break diff --git a/tests/test-refine-plan.sh b/tests/test-refine-plan.sh index c43ba60f..780f51d9 100755 --- a/tests/test-refine-plan.sh +++ b/tests/test-refine-plan.sh @@ -117,7 +117,7 @@ assert_equals() { frontmatter_value() { local file="$1" local key="$2" - sed -n "/^---$/,/^---$/{ /^${key}:[[:space:]]*/{ s/^${key}:[[:space:]]*//p; q; } }" "$file" + awk -v k="$key" 'BEGIN{f=0} /^---$/{f++; next} f==1 && $0 ~ "^"k":[[:space:]]"{sub("^"k":[[:space:]]*",""); print; exit}' "$file" } json_first_string_value() { @@ -139,7 +139,7 @@ trim_string() { } collapse_whitespace() { - printf '%s' "$1" | tr '\n' ' ' | sed 's/[[:space:]]\+/ /g; s/^ //; s/ $//' + printf '%s' "$1" | tr '\n' ' ' | tr -s ' ' | sed 's/^ //; s/ $//' } VALIDATOR_OUTPUT="" @@ -530,17 +530,20 @@ scan_reference_comments() { } comment_matches_question() { - local text="${1,,}" + local text + text=$(echo "$1" | tr '[:upper:]' '[:lower:]') [[ "$text" == *"why"* || "$text" == *"how"* || "$text" == *"what"* || "$text" == *"explain"* || "$text" == *"clarify"* || "$text" == *"unclear"* ]] } comment_matches_change_request() { - local text="${1,,}" + local text + text=$(echo "$1" | tr '[:upper:]' '[:lower:]') [[ "$text" == *"add"* || "$text" == *"remove"* || "$text" == *"delete"* || "$text" == *"rewrite"* || "$text" == *"restore"* || "$text" == *"rename"* || "$text" == *"split"* || "$text" == *"merge"* || "$text" == *"modify"* ]] } comment_matches_research_request() { - local text="${1,,}" + local text + text=$(echo "$1" | tr '[:upper:]' '[:lower:]') [[ "$text" == *"investigate"* || "$text" == *"compare"* || "$text" == *"confirm"* || "$text" == *"current behavior"* || "$text" == *"gather evidence"* || "$text" == *"before deciding"* ]] } @@ -561,7 +564,7 @@ normalize_alt_language() { local raw local lower raw="$(trim_string "$1")" - lower="${raw,,}" + lower=$(echo "$raw" | tr '[:upper:]' '[:lower:]') case "$lower" in chinese|zh) echo "Chinese|zh|variant" ;; diff --git a/tests/test-stop-gate.sh b/tests/test-stop-gate.sh index 08b037b3..0fc1deaf 100755 --- a/tests/test-stop-gate.sh +++ b/tests/test-stop-gate.sh @@ -69,7 +69,7 @@ setup_active_loop_fixture "$T1_DIR/project" set +e ( cd "$T1_DIR/project" - "$GATE_SCRIPT" + CLAUDE_PROJECT_DIR="" "$GATE_SCRIPT" ) > "$T1_DIR/out.txt" 2>&1 EXIT1=$? set -e @@ -125,7 +125,7 @@ git -C "$T3_DIR/project" add -f .humanize/rlcr/2026-03-01_00-00-00/goal-tracker. set +e ( cd "$T3_DIR/project" - "$GATE_SCRIPT" + CLAUDE_PROJECT_DIR="" "$GATE_SCRIPT" ) > "$T3_DIR/out.txt" 2>&1 EXIT3=$? set -e @@ -158,7 +158,7 @@ git -C "$T4_DIR/project" add -f .humanize-backup .humanizeconfig set +e ( cd "$T4_DIR/project" - "$GATE_SCRIPT" + CLAUDE_PROJECT_DIR="" "$GATE_SCRIPT" ) > "$T4_DIR/out.txt" 2>&1 EXIT4=$? set -e @@ -184,7 +184,7 @@ mkdir -p "$T5_DIR/empty-project" set +e ( cd "$T5_DIR/empty-project" - "$GATE_SCRIPT" + CLAUDE_PROJECT_DIR="" "$GATE_SCRIPT" ) > "$T5_DIR/out.txt" 2>&1 EXIT5=$? set -e diff --git a/tests/test-unified-codex-config.sh b/tests/test-unified-codex-config.sh index 5948193f..f429da05 100755 --- a/tests/test-unified-codex-config.sh +++ b/tests/test-unified-codex-config.sh @@ -16,6 +16,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$SCRIPT_DIR/test-helpers.sh" +source "$PROJECT_ROOT/scripts/portable-timeout.sh" # Helper: assert_eq DESCRIPTION EXPECTED ACTUAL # Calls pass/fail based on string equality @@ -584,7 +585,7 @@ PLAN_EOF # Run setup-rlcr-loop.sh with --codex-model override setup_exit=0 - output=$(cd "$EXEC_PROJECT" && CLAUDE_PROJECT_DIR="$EXEC_PROJECT" timeout 30 bash "$SETUP_SCRIPT" --codex-model gpt-5.3:xhigh --base-branch master --track-plan-file plan.md 2>&1) || setup_exit=$? + output=$(cd "$EXEC_PROJECT" && CLAUDE_PROJECT_DIR="$EXEC_PROJECT" run_with_timeout 30 bash "$SETUP_SCRIPT" --codex-model gpt-5.3:xhigh --base-branch master --track-plan-file plan.md 2>&1) || setup_exit=$? assert_eq "setup execution: setup-rlcr-loop.sh exited successfully" \ "0" "$setup_exit" @@ -735,7 +736,7 @@ MOCK_EOF CLAUDE_PROJECT_DIR="$ASK_CFG_PROJECT" \ XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ PATH="$MOCK_BIN:$PATH" \ - timeout 30 bash "$ASK_CODEX" "test question" 2>&1 >/dev/null) || true + run_with_timeout 30 bash "$ASK_CODEX" "test question" 2>&1 >/dev/null) || true # Stderr should report config-backed model and effort if echo "$ask_stderr" | grep -q 'model=o3-mini'; then @@ -755,7 +756,7 @@ MOCK_EOF CLAUDE_PROJECT_DIR="$ASK_CFG_PROJECT" \ XDG_CONFIG_HOME="$TEST_DIR/no-user-config" \ PATH="$MOCK_BIN:$PATH" \ - timeout 30 bash "$ASK_CODEX" --codex-model override-model:xhigh "test question" 2>&1 >/dev/null) || true + run_with_timeout 30 bash "$ASK_CODEX" --codex-model override-model:xhigh "test question" 2>&1 >/dev/null) || true if echo "$override_stderr" | grep -q 'model=override-model'; then pass "ask-codex runtime: --codex-model override reported in stderr (override-model)" diff --git a/tests/test-validate-explore-idea-io.sh b/tests/test-validate-explore-idea-io.sh new file mode 100755 index 00000000..f0506a0a --- /dev/null +++ b/tests/test-validate-explore-idea-io.sh @@ -0,0 +1,336 @@ +#!/usr/bin/env bash +# +# Tests for validate-explore-idea-io.sh — explore-idea input validation. +# +# Covers: +# - Exit codes 1-9 for all error conditions +# - Success: emits VALIDATION_SUCCESS + structured key-value output +# - Direction selection: default, --directions by id, --directions by source_index +# - Cap enforcement: concurrency, iterations, timeouts +# - Dirty checkout hard-fail +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +VALIDATE_SCRIPT="$PROJECT_ROOT/scripts/validate-explore-idea-io.sh" +VALID_FIXTURE="$SCRIPT_DIR/fixtures/directions/valid.directions.json" + +echo "==========================================" +echo "validate-explore-idea-io.sh Tests" +echo "==========================================" +echo "" + +if ! command -v jq &>/dev/null; then + skip "jq not available — skipping all tests" + print_test_summary "validate-explore-idea-io.sh Test Summary" + exit 0 +fi + +setup_test_dir + +# Create a mock git repo (clean state) +MOCK_REPO="$TEST_DIR/repo" +init_test_git_repo "$MOCK_REPO" + +# Copy valid fixture into the mock repo and commit it +cp "$VALID_FIXTURE" "$MOCK_REPO/valid.directions.json" +(cd "$MOCK_REPO" && git add valid.directions.json && git commit -q -m "add directions") + +# Create a draft .md alongside the companion +(cd "$MOCK_REPO" && echo "draft content" > draft.md && cp valid.directions.json draft.directions.json && git add draft.md draft.directions.json && git commit -q -m "add draft") + +# Set up plugin root with required templates +PLUGIN_ROOT="$TEST_DIR/plugin" +mkdir -p "$PLUGIN_ROOT/scripts" +mkdir -p "$PLUGIN_ROOT/prompt-template/explore" +cp "$PROJECT_ROOT/scripts/validate-directions-json.sh" "$PLUGIN_ROOT/scripts/" +touch "$PLUGIN_ROOT/prompt-template/explore/worker-prompt.md" +touch "$PLUGIN_ROOT/prompt-template/explore/report-template.md" + +# Helper: run validation inside the mock repo (clean state) +run_validate() { + (cd "$MOCK_REPO" && CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" bash "$VALIDATE_SCRIPT" "$@") +} + +# ---------------------------------------- +# Negative Tests: error exit codes +# ---------------------------------------- + +echo "--- Negative Tests: error exit codes ---" +echo "" + +# Exit 1: missing input +EXIT_CODE=0 +run_validate 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 1 ]]; then + pass "exit 1 when no input path provided" +else + fail "exit 1 when no input path provided" "exit 1" "exit=$EXIT_CODE" +fi + +# Exit 2: file not found (.directions.json) +EXIT_CODE=0 +run_validate "$MOCK_REPO/nonexistent.directions.json" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 2 ]]; then + pass "exit 2 when .directions.json not found" +else + fail "exit 2 when .directions.json not found" "exit 2" "exit=$EXIT_CODE" +fi + +# Exit 2: draft .md not found +EXIT_CODE=0 +run_validate "$MOCK_REPO/missing.md" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 2 ]]; then + pass "exit 2 when draft .md not found" +else + fail "exit 2 when draft .md not found" "exit 2" "exit=$EXIT_CODE" +fi + +# Exit 3: .md exists but companion .directions.json missing +ORPHAN_MD="$MOCK_REPO/orphan.md" +echo "no companion" > "$ORPHAN_MD" +(cd "$MOCK_REPO" && git add orphan.md && git commit -q -m "add orphan") +EXIT_CODE=0 +run_validate "$ORPHAN_MD" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 3 ]]; then + pass "exit 3 when companion .directions.json missing for .md" +else + fail "exit 3 when companion .directions.json missing" "exit 3" "exit=$EXIT_CODE" +fi + +# Exit 4: unsupported extension +JUNK_FILE="$MOCK_REPO/idea.txt" +echo "txt" > "$JUNK_FILE" +(cd "$MOCK_REPO" && git add idea.txt && git commit -q -m "add txt") +EXIT_CODE=0 +run_validate "$JUNK_FILE" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 4 ]]; then + pass "exit 4 for unsupported file extension" +else + fail "exit 4 for unsupported extension" "exit 4" "exit=$EXIT_CODE" +fi + +# Exit 5: invalid JSON schema +BAD_JSON_FILE="$TEST_DIR/bad.directions.json" +echo '{"schema_version": 99, "directions": []}' > "$BAD_JSON_FILE" +EXIT_CODE=0 +run_validate "$BAD_JSON_FILE" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 5 ]]; then + pass "exit 5 for invalid directions.json schema" +else + fail "exit 5 for invalid schema" "exit 5" "exit=$EXIT_CODE" +fi + +# Exit 6: --concurrency above cap +EXIT_CODE=0 +run_validate "$MOCK_REPO/valid.directions.json" --concurrency 11 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]]; then + pass "exit 6 when --concurrency exceeds cap (11 > 10)" +else + fail "exit 6 when concurrency exceeds cap" "exit 6" "exit=$EXIT_CODE" +fi + +# Exit 6: --max-worker-iterations above cap +EXIT_CODE=0 +run_validate "$MOCK_REPO/valid.directions.json" --max-worker-iterations 4 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]]; then + pass "exit 6 when --max-worker-iterations exceeds cap (4 > 3)" +else + fail "exit 6 when max-worker-iterations exceeds cap" "exit 6" "exit=$EXIT_CODE" +fi + +# Exit 6: unknown --directions selector +EXIT_CODE=0 +run_validate "$MOCK_REPO/valid.directions.json" --directions "dir-99-nonexistent" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]]; then + pass "exit 6 for unknown --directions selector" +else + fail "exit 6 for unknown direction selector" "exit 6" "exit=$EXIT_CODE" +fi + +# Exit 6: mixed selector forms that resolve to the same direction_id (regression for post-resolution dedup) +EXIT_CODE=0 +run_validate "$MOCK_REPO/valid.directions.json" --directions "1,dir-01-event-sourcing" 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]]; then + pass "exit 6 for mixed-form selectors resolving to same direction_id" +else + fail "exit 6 for mixed-form duplicate resolved direction_ids" "exit 6" "exit=$EXIT_CODE" +fi + +# Exit 6: unknown option +EXIT_CODE=0 +run_validate "$MOCK_REPO/valid.directions.json" --bad-option 2>/dev/null || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]]; then + pass "exit 6 for unknown option" +else + fail "exit 6 for unknown option" "exit 6" "exit=$EXIT_CODE" +fi + +# Exit 7: dirty checkout +DIRTY_REPO="$TEST_DIR/dirty-repo" +init_test_git_repo "$DIRTY_REPO" +cp "$VALID_FIXTURE" "$DIRTY_REPO/valid.directions.json" +(cd "$DIRTY_REPO" && git add valid.directions.json && git commit -q -m "add") +cp "$PLUGIN_ROOT/prompt-template/explore/worker-prompt.md" "$DIRTY_REPO/dirty.txt" +# Modify a tracked file to make it dirty +echo "dirty change" >> "$DIRTY_REPO/file.txt" +EXIT_CODE=0 +(cd "$DIRTY_REPO" && CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" bash "$VALIDATE_SCRIPT" "$DIRTY_REPO/valid.directions.json" 2>/dev/null) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 7 ]]; then + pass "exit 7 for dirty checkout with uncommitted tracked changes" +else + fail "exit 7 for dirty checkout" "exit 7" "exit=$EXIT_CODE" +fi + +# Exit 7: dirty checkout with enough files to catch git|grep SIGPIPE regressions +DIRTY_MANY_REPO="$TEST_DIR/dirty-many-repo" +init_test_git_repo "$DIRTY_MANY_REPO" +cp "$VALID_FIXTURE" "$DIRTY_MANY_REPO/valid.directions.json" +( + cd "$DIRTY_MANY_REPO" + mkdir -p dirty-files + for i in $(seq 1 2000); do + printf 'clean\n' > "dirty-files/file-$i.txt" + done + git add valid.directions.json dirty-files + git commit -q -m "add many tracked files" + for i in $(seq 1 2000); do + printf 'dirty\n' >> "dirty-files/file-$i.txt" + done +) +EXIT_CODE=0 +DIRTY_OUTPUT=$( + cd "$DIRTY_MANY_REPO" + CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" bash "$VALIDATE_SCRIPT" "$DIRTY_MANY_REPO/valid.directions.json" 2>&1 +) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 7 ]] \ + && grep -q "Dirty files:" <<<"$DIRTY_OUTPUT" \ + && grep -q "dirty-files/file-1.txt" <<<"$DIRTY_OUTPUT"; then + pass "exit 7 and lists dirty files when many tracked files are modified" +else + fail "exit 7 and lists dirty files when many tracked files are modified" \ + "exit 7 + dirty file list" \ + "exit=$EXIT_CODE output=$DIRTY_OUTPUT" +fi + +# Exit 9: missing worker prompt template +NO_TMPL_PLUGIN="$TEST_DIR/plugin-no-tmpl" +mkdir -p "$NO_TMPL_PLUGIN/scripts" +mkdir -p "$NO_TMPL_PLUGIN/prompt-template/explore" +cp "$PROJECT_ROOT/scripts/validate-directions-json.sh" "$NO_TMPL_PLUGIN/scripts/" +# No worker-prompt.md or report-template.md +EXIT_CODE=0 +(cd "$MOCK_REPO" && CLAUDE_PLUGIN_ROOT="$NO_TMPL_PLUGIN" bash "$VALIDATE_SCRIPT" "$MOCK_REPO/valid.directions.json" 2>/dev/null) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 9 ]]; then + pass "exit 9 when worker prompt template missing" +else + fail "exit 9 when templates missing" "exit 9" "exit=$EXIT_CODE" +fi + +# ---------------------------------------- +# Positive Tests: success output +# ---------------------------------------- + +echo "" +echo "--- Positive Tests: success output ---" +echo "" + +# Success: VALIDATION_SUCCESS emitted +EXIT_CODE=0 +OUTPUT=$(run_validate "$MOCK_REPO/valid.directions.json" 2>/dev/null) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -q "VALIDATION_SUCCESS"; then + pass "exits 0 with VALIDATION_SUCCESS for valid .directions.json" +else + fail "exits 0 with VALIDATION_SUCCESS" "exit 0 + VALIDATION_SUCCESS" "exit=$EXIT_CODE" +fi + +# Success: all required keys present in output +REQUIRED_KEYS=( + "DIRECTIONS_JSON_FILE:" + "RUN_ID:" + "RUN_DIR:" + "BASE_BRANCH:" + "BASE_COMMIT:" + "SELECTED_DIRECTION_IDS:" + "EFFECTIVE_CONCURRENCY:" + "MAX_WORKER_ITERATIONS:" + "WORKER_TIMEOUT_MIN:" + "CODEX_TIMEOUT_MIN:" + "WORKER_PROMPT_TEMPLATE:" + "REPORT_TEMPLATE:" +) +ALL_KEYS_PRESENT=true +for key in "${REQUIRED_KEYS[@]}"; do + if ! echo "$OUTPUT" | grep -q "^$key"; then + ALL_KEYS_PRESENT=false + fail "success output contains $key" + break + fi +done +if [[ "$ALL_KEYS_PRESENT" == "true" ]]; then + pass "success output contains all required key-value pairs" +fi + +# Success: .md draft input resolves companion +EXIT_CODE=0 +OUTPUT_MD=$(run_validate "$MOCK_REPO/draft.md" 2>/dev/null) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT_MD" | grep -q "VALIDATION_SUCCESS"; then + pass "exits 0 for .md input with companion .directions.json" +else + fail "exits 0 for .md input" "exit 0 + VALIDATION_SUCCESS" "exit=$EXIT_CODE" +fi + +# Direction selection by direction_id +EXIT_CODE=0 +OUTPUT_DIR=$(run_validate "$MOCK_REPO/valid.directions.json" --directions "dir-00-command-history" 2>/dev/null) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT_DIR" | grep -q "dir-00-command-history"; then + pass "--directions by direction_id selects the correct direction" +else + fail "--directions by direction_id" "dir-00-command-history in SELECTED" "exit=$EXIT_CODE" +fi + +# Direction selection by source_index +EXIT_CODE=0 +OUTPUT_IDX=$(run_validate "$MOCK_REPO/valid.directions.json" --directions "1" 2>/dev/null) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT_IDX" | grep -q "dir-01-event-sourcing"; then + pass "--directions by source_index resolves to correct direction_id" +else + fail "--directions by source_index" "dir-01-event-sourcing in SELECTED" "exit=$EXIT_CODE" +fi + +# Effective concurrency capped to selected count (1 direction selected, concurrency=6 → effective=1) +EFFECTIVE=$(echo "$OUTPUT_DIR" | grep "^EFFECTIVE_CONCURRENCY:" | sed 's/EFFECTIVE_CONCURRENCY: //') +if [[ "$EFFECTIVE" == "1" ]]; then + pass "EFFECTIVE_CONCURRENCY capped to selected direction count" +else + fail "EFFECTIVE_CONCURRENCY capped to direction count" "1" "$EFFECTIVE" +fi + +echo "" +echo "--- Static Contract Tests ---" +echo "" + +if grep -q 'Do NOT run `git checkout <BASE_BRANCH>`' "$VALIDATE_SCRIPT" \ + && grep -q "detached HEAD" "$VALIDATE_SCRIPT"; then + pass "worker base-anchor contract documents detached HEAD without checking out BASE_BRANCH" +else + fail "worker base-anchor contract documents detached HEAD without checking out BASE_BRANCH" \ + "detached HEAD + no checkout language" \ + "missing" +fi + +if grep -q 'diff --name-only HEAD --' "$VALIDATE_SCRIPT" \ + && ! grep -q 'diff --name-only HEAD .*| grep -q' "$VALIDATE_SCRIPT"; then + pass "dirty checkout check captures git diff output without grep -q pipeline" +else + fail "dirty checkout check captures git diff output without grep -q pipeline" \ + "capture-first dirty check" \ + "missing" +fi + +echo "" +print_test_summary "validate-explore-idea-io.sh Test Summary" diff --git a/tests/test-validate-gen-idea-io.sh b/tests/test-validate-gen-idea-io.sh new file mode 100755 index 00000000..313fb90f --- /dev/null +++ b/tests/test-validate-gen-idea-io.sh @@ -0,0 +1,162 @@ +#!/usr/bin/env bash +# +# Tests for validate-gen-idea-io.sh — companion JSON derivation and collision detection. +# +# Covers: +# - .md suffix enforcement on --output +# - DIRECTIONS_JSON_FILE derivation in stdout on success +# - Companion collision rejection (exit 8) +# - Existing output file rejection still works (exit 4) +# - Subdir companion path derivation +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +VALIDATE_SCRIPT="$PROJECT_ROOT/scripts/validate-gen-idea-io.sh" + +echo "==========================================" +echo "validate-gen-idea-io.sh Tests" +echo "==========================================" +echo "" + +setup_test_dir + +# Create a mock git repo so the script can call git rev-parse +MOCK_REPO="$TEST_DIR/repo" +init_test_git_repo "$MOCK_REPO" + +# Create a valid template tree so exit code 7 does not fire +PLUGIN_ROOT="$TEST_DIR/plugin" +mkdir -p "$PLUGIN_ROOT/prompt-template/idea" +touch "$PLUGIN_ROOT/prompt-template/idea/gen-idea-template.md" +export CLAUDE_PLUGIN_ROOT="$PLUGIN_ROOT" + +# Helper: run the validation script inside the mock repo +run_validate() { + (cd "$MOCK_REPO" && bash "$VALIDATE_SCRIPT" "$@") +} + +# ---------------------------------------- +# PT-1: Success with .md output emits DIRECTIONS_JSON_FILE +# ---------------------------------------- +echo "--- Positive Tests ---" +echo "" + +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/out1" +mkdir -p "$OUTPUT_DIR" +OUTPUT=$(run_validate "test idea text" --output "$OUTPUT_DIR/foo.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] \ + && echo "$OUTPUT" | grep -q "VALIDATION_SUCCESS" \ + && echo "$OUTPUT" | grep -q "DIRECTIONS_JSON_FILE: "; then + DJSON=$(echo "$OUTPUT" | grep "DIRECTIONS_JSON_FILE:" | sed 's/DIRECTIONS_JSON_FILE: //') + if [[ "$DJSON" == *"foo.directions.json" ]]; then + pass "success: DIRECTIONS_JSON_FILE emitted with .directions.json path" + else + fail "success: DIRECTIONS_JSON_FILE path ends in .directions.json" "*.directions.json" "$DJSON" + fi +else + fail "success: DIRECTIONS_JSON_FILE emitted on valid .md output" "exit 0 + DIRECTIONS_JSON_FILE" "exit=$EXIT_CODE" +fi + +# PT-2: Subdir companion path derived correctly +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/out2" +mkdir -p "$OUTPUT_DIR/subdir" +OUTPUT=$(run_validate "test idea text" --output "$OUTPUT_DIR/subdir/bar.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]]; then + DJSON=$(echo "$OUTPUT" | grep "DIRECTIONS_JSON_FILE:" | sed 's/DIRECTIONS_JSON_FILE: //') + if [[ "$DJSON" == *"subdir/bar.directions.json" ]]; then + pass "subdir: companion path derived as subdir/bar.directions.json" + else + fail "subdir: companion path includes subdir" "*subdir/bar.directions.json" "$DJSON" + fi +else + fail "subdir: exits 0 for valid subdir output path" "exit 0" "exit=$EXIT_CODE" +fi + +echo "" +echo "--- Negative Tests ---" +echo "" + +# NT-1: No .md suffix — exit 6 +EXIT_CODE=0 +OUTPUT=$(run_validate "test idea text" --output "$TEST_DIR/foo" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]] && echo "$OUTPUT" | grep -qi "md"; then + pass "no .md suffix: exits 6 with .md error" +else + fail "no .md suffix: exits 6" "exit 6 + md message" "exit=$EXIT_CODE" +fi + +# NT-2: .txt suffix — exit 6 +EXIT_CODE=0 +OUTPUT=$(run_validate "test idea text" --output "$TEST_DIR/foo.txt" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 6 ]]; then + pass ".txt suffix: exits 6" +else + fail ".txt suffix: exits 6" "exit 6" "exit=$EXIT_CODE" +fi + +# NT-3: Companion JSON already exists — exit 8 +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/out3" +mkdir -p "$OUTPUT_DIR" +touch "$OUTPUT_DIR/foo.directions.json" +OUTPUT=$(run_validate "test idea text" --output "$OUTPUT_DIR/foo.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 8 ]] && echo "$OUTPUT" | grep -qi "companion"; then + pass "companion exists: exits 8 with companion error" +else + fail "companion exists: exits 8" "exit 8 + companion message" "exit=$EXIT_CODE" +fi + +# NT-4: Output draft already exists — exit 4 (existing behavior preserved) +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/out4" +mkdir -p "$OUTPUT_DIR" +touch "$OUTPUT_DIR/bar.md" +OUTPUT=$(run_validate "test idea text" --output "$OUTPUT_DIR/bar.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 4 ]]; then + pass "output exists: exits 4 (existing behavior)" +else + fail "output exists: exits 4" "exit 4" "exit=$EXIT_CODE" +fi + +# NT-5: Missing idea — exit 1 +EXIT_CODE=0 +OUTPUT=$(run_validate 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 1 ]]; then + pass "missing idea: exits 1" +else + fail "missing idea: exits 1" "exit 1" "exit=$EXIT_CODE" +fi + +# NT-6: Slash-containing idea treated as inline, not a missing file path +# Regression for: whitespace-free input containing "/" was misclassified as a +# file path and failed with INPUT_NOT_FOUND (exit 2). +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/out5" +mkdir -p "$OUTPUT_DIR" +OUTPUT=$(run_validate "undo/redo" --output "$OUTPUT_DIR/undo-redo.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -q "VALIDATION_SUCCESS"; then + pass "slash idea (undo/redo): treated as inline text, exits 0" +else + fail "slash idea (undo/redo): treated as inline text" "exit 0 + VALIDATION_SUCCESS" "exit=$EXIT_CODE" +fi + +# NT-7: Another slash idea — CI/CD +EXIT_CODE=0 +OUTPUT_DIR="$TEST_DIR/out6" +mkdir -p "$OUTPUT_DIR" +OUTPUT=$(run_validate "CI/CD" --output "$OUTPUT_DIR/cicd.md" 2>&1) || EXIT_CODE=$? +if [[ $EXIT_CODE -eq 0 ]] && echo "$OUTPUT" | grep -q "VALIDATION_SUCCESS"; then + pass "slash idea (CI/CD): treated as inline text, exits 0" +else + fail "slash idea (CI/CD): treated as inline text" "exit 0 + VALIDATION_SUCCESS" "exit=$EXIT_CODE" +fi + +echo "" +print_test_summary "validate-gen-idea-io.sh Test Summary" diff --git a/tests/test-worker-result-contract.sh b/tests/test-worker-result-contract.sh new file mode 100755 index 00000000..d2cbaf75 --- /dev/null +++ b/tests/test-worker-result-contract.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# +# Tests for explore-idea worker result contract. +# +# Verifies the structural contract of the worker prompt template: +# - Template file exists +# - Contains result sentinel markers +# - Contains required placeholder variables +# - Contains required result JSON fields +# - Hard constraints are present +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +WORKER_PROMPT="$PROJECT_ROOT/prompt-template/explore/worker-prompt.md" + +echo "==========================================" +echo "Worker Result Contract Tests" +echo "==========================================" +echo "" + +echo "--- Template Existence ---" +echo "" + +# Template file exists +if [[ -f "$WORKER_PROMPT" ]]; then + pass "worker-prompt.md template exists" +else + fail "worker-prompt.md template exists" "file found" "not found" +fi + +echo "" +echo "--- Sentinel Markers ---" +echo "" + +# Result sentinel begin marker +if grep -q "=== EXPLORE_RESULT_JSON_BEGIN ===" "$WORKER_PROMPT"; then + pass "template contains EXPLORE_RESULT_JSON_BEGIN sentinel" +else + fail "template contains EXPLORE_RESULT_JSON_BEGIN sentinel" +fi + +# Result sentinel end marker +if grep -q "=== EXPLORE_RESULT_JSON_END ===" "$WORKER_PROMPT"; then + pass "template contains EXPLORE_RESULT_JSON_END sentinel" +else + fail "template contains EXPLORE_RESULT_JSON_END sentinel" +fi + +# Sentinels appear in correct order (BEGIN before END) +BEGIN_LINE=$(grep -n "=== EXPLORE_RESULT_JSON_BEGIN ===" "$WORKER_PROMPT" | head -1 | cut -d: -f1) +END_LINE=$(grep -n "=== EXPLORE_RESULT_JSON_END ===" "$WORKER_PROMPT" | head -1 | cut -d: -f1) +if [[ -n "$BEGIN_LINE" && -n "$END_LINE" && "$BEGIN_LINE" -lt "$END_LINE" ]]; then + pass "EXPLORE_RESULT_JSON_BEGIN appears before EXPLORE_RESULT_JSON_END" +else + fail "EXPLORE_RESULT_JSON_BEGIN before END" "begin < end" "begin=$BEGIN_LINE end=$END_LINE" +fi + +echo "" +echo "--- Placeholder Variables ---" +echo "" + +REQUIRED_PLACEHOLDERS=( + "<RUN_ID>" + "<DIRECTION_ID>" + "<DIR_SLUG>" + "<DIRECTION_NAME>" + "<DIRECTION_RATIONALE>" + "<APPROACH_SUMMARY>" + "<OBJECTIVE_EVIDENCE>" + "<KNOWN_RISKS>" + "<CONFIDENCE>" + "<MAX_WORKER_ITERATIONS>" + "<CODEX_TIMEOUT_MIN>" + "<BASE_BRANCH>" + "<BASE_COMMIT>" + "<ORIGINAL_IDEA>" +) + +for placeholder in "${REQUIRED_PLACEHOLDERS[@]}"; do + if grep -q "$placeholder" "$WORKER_PROMPT"; then + pass "template contains placeholder $placeholder" + else + fail "template contains placeholder $placeholder" "$placeholder in template" "not found" + fi +done + +echo "" +echo "--- Result JSON Fields ---" +echo "" + +# Required result JSON fields +REQUIRED_FIELDS=( + "schema_version" + "run_id" + "direction_id" + "dir_slug" + "task_status" + "codex_final_verdict" + "rounds_used" + "tests_passed" + "tests_failed" + "worktree_path" + "branch_name" + "commit_sha" + "commit_count" + "dirty_state" + "commit_status" + "summary_markdown" + "what_worked" + "what_didnt" + "bitlesson_action" + "error" +) + +for field in "${REQUIRED_FIELDS[@]}"; do + if grep -q "\"$field\"" "$WORKER_PROMPT"; then + pass "result JSON contains field: $field" + else + fail "result JSON contains field: $field" "\"$field\" in template" "not found" + fi +done + +echo "" +echo "--- Hard Constraints ---" +echo "" + +# Hard constraints section +if grep -q "Hard Constraints" "$WORKER_PROMPT"; then + pass "template has Hard Constraints section" +else + fail "template has Hard Constraints section" +fi + +# No nested Skills constraint +if grep -q "nested Skills" "$WORKER_PROMPT" || grep -q "No nested" "$WORKER_PROMPT"; then + pass "template forbids nested skills/slash commands" +else + fail "template forbids nested skills/slash commands" +fi + +# No git push constraint: require explicitly prohibitive wording, not a passing +# incidental mention of the command. +if grep -q "No git push" "$WORKER_PROMPT" && grep -qi "Do not push .*remote" "$WORKER_PROMPT"; then + pass "template forbids git push" +else + fail "template forbids git push" "explicit no-push phrasing" "missing" +fi + +# ask-codex.sh scope constraint +if grep -q "CLAUDE_PROJECT_DIR" "$WORKER_PROMPT"; then + pass "template requires CLAUDE_PROJECT_DIR scoping for Codex calls" +else + fail "template requires CLAUDE_PROJECT_DIR scoping" +fi + +# Branch naming format +if grep -q "explore/<RUN_ID>/<DIR_SLUG>" "$WORKER_PROMPT"; then + pass "template enforces branch naming format explore/<RUN_ID>/<DIR_SLUG>" +else + fail "template enforces branch naming format" "explore/<RUN_ID>/<DIR_SLUG>" "not found" +fi + +echo "" +print_test_summary "Worker Result Contract Test Summary"