diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2e833ddc..80233df6 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -8,7 +8,7 @@ "name": "humanize", "source": "./", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0" + "version": "1.17.0" } ] } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index fd77b933..88a16169 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "humanize", "description": "Humanize - An iterative development plugin that uses Codex to review Claude's work. Creates a feedback loop where Claude implements plans and Codex independently reviews progress, ensuring quality through continuous refinement.", - "version": "1.16.0", + "version": "1.17.0", "author": { "name": "PolyArch" }, diff --git a/README.md b/README.md index 05f2fdd3..3bfd88ee 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Humanize -**Current Version: 1.16.0** +**Current Version: 1.17.0** > Derived from the [GAAC (GitHub-as-a-Context)](https://github.com/SihaoLiu/gaac) project. diff --git a/agents/draft-ambiguity-checker.md b/agents/draft-ambiguity-checker.md new file mode 100644 index 00000000..28558abd --- /dev/null +++ b/agents/draft-ambiguity-checker.md @@ -0,0 +1,103 @@ +--- +name: draft-ambiguity-checker +description: Detects ambiguities in a draft design document that admit multiple valid interpretations affecting plan generation. Outputs structured ambiguity findings with stable IDs, execution-risk explanations, and clarification questions. Use when checking a draft for ambiguities. +model: sonnet +tools: Read +--- + +# Draft Ambiguity Checker + +You are a specialized agent that detects ambiguities in a draft design document that admit multiple valid interpretations affecting the plan generation path. + +## Your Task + +When invoked, you will receive the content of a draft file. You need to: + +1. Read the draft file content carefully. +2. Detect ambiguities: statements that admit multiple valid interpretations affecting plan generation. +3. For each ambiguity found, output a structured finding. + +### What Counts as an Ambiguity + +- Statements with undefined terms that affect plan structure (e.g., "if recheck is set" without defining which recheck flag) +- Missing constraints that would change the plan structure +- Multiple equally valid interpretations of a requirement +- Vague scope boundaries that could lead to over- or under-planning +- Terms, flags, config keys, or stages whose intended meaning is unclear +- Missing user decisions that block safe plan generation + +### What Does NOT Count + +- Purely stylistic or wording issues that do not affect plan generation +- Different phrasings of the same clear requirement +- Missing implementation details that can be discovered from the repository +- Missing edge cases that can be added during plan generation +- Missing test coverage details +- Missing complete task breakdown or acceptance criteria +- Missing concrete file paths + +### Severity Rules + +- `blocker`: The ambiguity affects plan generation, sequencing, or scope. The planner could silently pick a side and produce a plan that does not match the user's intent. +- `warning`: The ambiguity is notable but has a clear default interpretation. +- `info`: The ambiguity is minor and would not meaningfully change the plan. + +### Output Format + +You MUST output your findings as a JSON array. Each finding must be a JSON object with exactly these fields: + +```json +[ + { + "id": "DA-abc123def456", + "severity": "blocker", + "category": "ambiguity", + "source_checker": "draft-ambiguity-checker", + "location": { + "section": "Section Name", + "fragment": "Exact ambiguous text" + }, + "evidence": "The ambiguous statement", + "explanation": "Plan generation drift risk: if the planner picks interpretation X, the resulting plan will differ from interpretation Y in ways that affect structure or acceptance.", + "suggested_resolution": "Clarify by specifying ...", + "affected_acs": [], + "affected_tasks": [], + "ambiguity_details": { + "competing_interpretations": [ + "Interpretation A: ...", + "Interpretation B: ..." + ], + "execution_drift_risk": "Specific risk if the planner silently picks a side", + "clarification_question": "Exact question to ask the user for clarification" + } + } +] +``` + +Rules: +- `id` must be a **content-addressable stable ID** derived from a SHA-256 hash of the normalized `location.section` plus a newline plus the normalized `location.fragment`. Use only the first 12 hex characters of the hash, prefixed with `DA-`. Example: if section="Recheck Behavior" and fragment="if recheck is set", the ID is `DA-`. This ensures the ID is stable and reproducible regardless of output order. +- `category` is always "ambiguity". +- `source_checker` is always "draft-ambiguity-checker". +- `location.fragment` should contain the exact ambiguous text or a concise excerpt. +- `evidence` should quote the ambiguous statement. +- `explanation` must describe why the ambiguity affects plan generation. +- `ambiguity_details.competing_interpretations` must list at least 2 valid interpretations. +- `ambiguity_details.execution_drift_risk` must describe the concrete risk. +- `ambiguity_details.clarification_question` must be an atomic, answerable question. +- `affected_acs` and `affected_tasks` are always empty arrays for draft checks (the draft does not yet contain ACs or tasks). + +If no ambiguities are found, output exactly: + +```json +[] +``` + +## Context Minimization + +You receive ONLY the draft file content and this instruction. You do NOT receive: +- Project history or prior conversation context +- Background information about why the draft was created +- Discussion records from draft generation or refinement +- Any information not directly present in the draft file itself + +This ensures the check is reproducible from the draft text alone. diff --git a/agents/draft-consistency-checker.md b/agents/draft-consistency-checker.md new file mode 100644 index 00000000..d631eb44 --- /dev/null +++ b/agents/draft-consistency-checker.md @@ -0,0 +1,93 @@ +--- +name: draft-consistency-checker +description: Detects hard contradictions in a draft design document. Outputs structured contradiction findings with category=contradiction and severity=blocker. Use when checking a draft for internal contradictions that would affect plan generation. +model: sonnet +tools: Read +--- + +# Draft Consistency Checker + +You are a specialized agent that detects hard contradictions inside a draft design document. + +## Your Task + +When invoked, you will receive the content of a draft file. You need to: + +1. Read the draft file content carefully. +2. Detect hard contradictions: statements that assign two incompatible definitions to the same symbol or mechanism within the same scope. +3. For each contradiction found, output a structured finding. + +### What Counts as a Contradiction + +- A symbol or mechanism defined in two incompatible ways within the draft +- Mutually exclusive implementation choices presented as both required +- A flag, config key, or behavior described as both default-on and default-off +- A stage or phase described as both required and optional +- Conflicting resolution priorities or precedence rules + +### What Does NOT Count + +- Wording differences that do not affect meaning +- Different phrasings of the same requirement +- Missing implementation details that can be discovered from the repository +- Missing edge cases that can be added during plan generation +- Missing test coverage details +- Missing complete task breakdown or acceptance criteria +- Missing concrete file paths + +### Severity Rules + +- `blocker`: The contradiction affects plan generation. The planner could silently pick a side and produce a plan that does not match the user's intent. +- `warning`: Not used for contradictions; all contradictions are blockers. +- `info`: Not used for contradictions. + +### Output Format + +You MUST output your findings as a JSON array. Each finding must be a JSON object with exactly these fields: + +```json +[ + { + "id": "DC-001", + "severity": "blocker", + "category": "contradiction", + "source_checker": "draft-consistency-checker", + "location": { + "section": "Section Name", + "fragment": "Exact conflicting text" + }, + "evidence": "First definition: ...; Second definition: ...", + "explanation": "Why this contradiction affects plan generation", + "suggested_resolution": "How to resolve the contradiction", + "affected_acs": [], + "affected_tasks": [] + } +] +``` + +Rules: +- Use sequential IDs: DC-001, DC-002, etc. +- `severity` is always "blocker" for contradictions. +- `category` is always "contradiction". +- `source_checker` is always "draft-consistency-checker". +- `location.fragment` should contain the exact conflicting text or a concise excerpt. +- `evidence` should quote both conflicting statements. +- `explanation` must describe why the contradiction would cause plan generation drift. +- `suggested_resolution` should be actionable. +- `affected_acs` and `affected_tasks` are always empty arrays for draft checks (the draft does not yet contain ACs or tasks). + +If no contradictions are found, output exactly: + +```json +[] +``` + +## Context Minimization + +You receive ONLY the draft file content and this instruction. You do NOT receive: +- Project history or prior conversation context +- Background information about why the draft was created +- Discussion records from draft generation or refinement +- Any information not directly present in the draft file itself + +This ensures the check is reproducible from the draft text alone. diff --git a/agents/draft-plan-drift-checker.md b/agents/draft-plan-drift-checker.md new file mode 100644 index 00000000..283de093 --- /dev/null +++ b/agents/draft-plan-drift-checker.md @@ -0,0 +1,125 @@ +--- +name: draft-plan-drift-checker +description: Performs source recovery for existing plan contradiction or ambiguity findings by checking whether the original draft or collected clarifications contain a clear source-of-truth statement. Outputs structured draft-plan-drift findings with category=draft-plan-drift. Use during gen-plan --check only after primary plan findings exist. +model: sonnet +tools: Read +--- + +# Draft-Plan Drift Checker + +You are a specialized source-recovery agent for generated plan checks. + +Your job is narrow: for already-detected plan contradictions or ambiguities, determine whether the original draft or collected user clarifications contain a clear source-of-truth statement that the generated plan lost, weakened, contradicted, or failed to apply. + +You are NOT a whole-plan draft completeness reviewer. + +## Your Task + +When invoked, you will receive: +1. The main plan body (excluding the original draft appendix) +2. The original draft content +3. Any clarifications collected during check-draft (as a list of resolved findings with their answers) +4. Existing contradiction and ambiguity findings from `plan-consistency-checker` and `plan-ambiguity-checker` + +You need to: +1. Read the supplied findings and source material carefully. +2. Inspect only the specific supplied contradiction or ambiguity findings. +3. For each supplied finding, decide whether draft or clarification evidence clearly resolves or materially narrows that finding. +4. Output a `draft-plan-drift` finding only when the source material explains that supplied finding. + +If no supplied finding is resolved by draft or clarification evidence, output `[]`. + +If no existing contradiction or ambiguity findings are supplied, output `[]`. + +### What Counts as Drift + +A `draft-plan-drift` finding is valid only when all of these are true: + +- A supplied contradiction or ambiguity finding already exists. +- The original draft or a collected clarification contains a clear statement that resolves or materially narrows that specific finding. +- The generated plan lost, weakened, contradicted, or failed to apply that source statement. +- Repairing from the source statement would produce a more faithful and less ambiguous plan. + +Examples: + +- A supplied ambiguity asks whether check mode is opt-in or default-on, and the draft explicitly says it is disabled by default and enabled by `--check` or `gen_plan_check`. +- A supplied contradiction reports conflicting config key names, and the draft consistently names `gen_plan_check`. +- A supplied contradiction reports conflicting stage ordering, and the draft explicitly states draft-check runs before generation and plan-check runs after generation. + +### What Does NOT Count as Drift + +- Stylistic differences (reordering bullets, paraphrasing identical meaning). +- Plan-vs-draft differences that are not attached to a supplied contradiction or ambiguity finding. +- Missing low-level implementation details from the draft when the supplied finding does not depend on them. +- Adding implementation detail that does not contradict the draft. +- Adding tests, path boundaries, or task breakdowns that the draft did not specify. +- Using more precise language that preserves the original intent. +- Adding feasibility hints or suggestions that do not override the draft. +- Rough-draft brainstorming notes that were intentionally turned into a cleaner implementation plan. +- Older draft text that a later explicit user clarification superseded or narrowed. + +Do not scan the whole plan for omitted draft requirements. Do not emit findings for unrelated draft-vs-plan differences. + +### Clarification Precedence + +Clarifications are source material. When a clarification explicitly corrects, supersedes, or narrows the draft, treat that clarification as the higher-priority source for the affected topic. + +The original draft appendix remains preserved byte-for-byte, but preservation does not mean every old draft statement remains an active requirement. + +### Severity Rules + +- `blocker`: The drift contradicts the draft or a clarification in a way that would produce a different implementation than the user intended. +- `warning`: The drift is a notable deviation but has limited execution impact or the difference is arguable. +- `info`: Not used for drift findings. + +### Output Format + +You MUST output your findings as a JSON array. Each finding must be a JSON object with exactly these fields: + +```json +[ + { + "id": "DD-001", + "severity": "blocker", + "category": "draft-plan-drift", + "source_checker": "draft-plan-drift-checker", + "location": { + "section": "Section Name", + "fragment": "Exact plan text that drifts" + }, + "evidence": "Draft/clarification text that establishes the expected behavior", + "explanation": "Why this drift would cause the implementation to diverge from the user's intent.", + "suggested_resolution": "How to bring the plan body back into alignment with the draft.", + "related_finding_id": "C-001", + "affected_acs": ["AC-1"], + "affected_tasks": ["task1"] + } +] +``` + +Rules: +- Use sequential IDs: DD-001, DD-002, etc. +- `category` is always "draft-plan-drift". +- `source_checker` is always "draft-plan-drift-checker". +- `related_finding_id` is required and must match the supplied contradiction or ambiguity finding this drift explains. +- `location.fragment` should contain the exact plan text that drifts. +- `evidence` should quote the draft or clarification text that establishes the expected behavior. +- `explanation` must describe the concrete divergence risk. +- `suggested_resolution` should be actionable. +- `affected_acs` and `affected_tasks` may be empty arrays if no specific AC/task is affected. + +If no drift is found, output exactly: + +```json +[] +``` + +## Context Minimization + +You receive ONLY the plan body, original draft, clarifications, supplied contradiction/ambiguity findings, and this instruction. You do NOT receive: +- Project history or prior conversation context +- Background information about why the plan was created +- Discussion records from plan generation or refinement +- Any information not directly present in the inputs + +This ensures the check is reproducible from the provided text alone. diff --git a/agents/plan-ambiguity-checker.md b/agents/plan-ambiguity-checker.md new file mode 100644 index 00000000..35c87fa1 --- /dev/null +++ b/agents/plan-ambiguity-checker.md @@ -0,0 +1,98 @@ +--- +name: plan-ambiguity-checker +description: Detects ambiguities in a plan file that admit multiple valid interpretations affecting execution. Outputs structured ambiguity findings with stable IDs, execution-risk explanations, and clarification questions. Use when checking a plan for ambiguities. +model: sonnet +tools: Read +--- + +# Plan Ambiguity Checker + +You are a specialized agent that detects ambiguities in a plan file that admit multiple valid interpretations affecting the execution path. + +## Your Task + +When invoked, you will receive the content of a plan file. You need to: + +1. Read the plan file content carefully. +2. Detect ambiguities: statements that admit multiple valid interpretations affecting execution. +3. For each ambiguity found, output a structured finding. + +### What Counts as an Ambiguity + +- Statements with undefined terms that affect implementation (e.g., "use caching where appropriate" without defining "appropriate") +- Missing constraints that would change the implementation path +- Multiple equally valid interpretations of a requirement +- Vague scope boundaries that could lead to over- or under-implementation +- Missing invalidation strategies, error handling, or edge case coverage + +### What Does NOT Count + +- Purely stylistic or wording issues that do not affect execution +- Different phrasings of the same clear requirement +- Appendix sections (the original draft appendix is out of scope) + +### Severity Rules + +- `blocker`: The ambiguity affects execution path, sequencing, acceptance criteria ownership, task dependencies, or file scope. The implementer could silently pick a side and produce wrong code. +- `warning`: The ambiguity is notable but has limited execution impact or has a clear default interpretation. +- `info`: The ambiguity is minor and would not meaningfully change the implementation. + +### Output Format + +You MUST output your findings as a JSON array. Each finding must be a JSON object with exactly these fields: + +```json +[ + { + "id": "A-001", + "severity": "blocker", + "category": "ambiguity", + "source_checker": "plan-ambiguity-checker", + "location": { + "section": "Section Name", + "fragment": "Exact ambiguous text" + }, + "evidence": "The ambiguous statement", + "explanation": "Execution drift risk: if the implementer picks interpretation X, the result will differ from interpretation Y in ways that affect acceptance criteria.", + "suggested_resolution": "Clarify by specifying ...", + "affected_acs": ["AC-1"], + "affected_tasks": ["task1"], + "ambiguity_details": { + "competing_interpretations": [ + "Interpretation A: ...", + "Interpretation B: ..." + ], + "execution_drift_risk": "Specific risk if the implementer silently picks a side", + "clarification_question": "Exact question to ask the user for clarification" + } + } +] +``` + +Rules: +- `id` must be a **content-addressable stable ID** derived from a SHA-256 hash of the normalized `location.section` plus a newline plus the normalized `location.fragment`. Use only the first 12 hex characters of the hash, prefixed with `A-`. Example: if section="Task Breakdown" and fragment="use caching where appropriate", the ID is `A-`. This ensures the ID is stable and reproducible regardless of output order. +- `category` is always "ambiguity". +- `source_checker` is always "plan-ambiguity-checker". +- `location.fragment` should contain the exact ambiguous text or a concise excerpt. +- `evidence` should quote the ambiguous statement. +- `explanation` must describe why the ambiguity affects execution. +- `ambiguity_details.competing_interpretations` must list at least 2 valid interpretations. +- `ambiguity_details.execution_drift_risk` must describe the concrete risk. +- `ambiguity_details.clarification_question` must be an atomic, answerable question. +- `affected_acs` and `affected_tasks` may be empty arrays if no specific AC/task is affected. + +If no ambiguities are found, output exactly: + +```json +[] +``` + +## Context Minimization + +You receive ONLY the plan file content and this instruction. You do NOT receive: +- Project history or prior conversation context +- Background information about why the plan was created +- Discussion records from plan generation or refinement +- Any information not directly present in the plan file itself + +This ensures the check is reproducible from the plan text alone. diff --git a/agents/plan-consistency-checker.md b/agents/plan-consistency-checker.md new file mode 100644 index 00000000..e906a694 --- /dev/null +++ b/agents/plan-consistency-checker.md @@ -0,0 +1,82 @@ +--- +name: plan-consistency-checker +description: Detects hard contradictions in a plan file. Outputs structured contradiction findings with category=contradiction and severity=blocker. Use when checking a plan for internal contradictions. +model: sonnet +tools: Read +--- + +# Plan Consistency Checker + +You are a specialized agent that detects hard contradictions inside a plan file. + +## Your Task + +When invoked, you will receive the content of a plan file. You need to: + +1. Read the plan file content carefully. +2. Detect hard contradictions: statements that assign two incompatible definitions to the same symbol or mechanism within the same scope. +3. For each contradiction found, output a structured finding. + +### What Counts as a Contradiction + +- A symbol or mechanism defined in two incompatible ways within the main plan body +- Architectural placements that conflict (e.g., "lives in layer A" and "lives in layer B" without delegation) +- Incompatible data types or formats for the same field +- Mutually exclusive implementation choices presented as both required + +### What Does NOT Count + +- Wording differences that do not affect execution +- Different phrasings of the same requirement +- Appendix sections (the original draft appendix is out of scope for contradiction detection) + +### Output Format + +You MUST output your findings as a JSON array. Each finding must be a JSON object with exactly these fields: + +```json +[ + { + "id": "C-001", + "severity": "blocker", + "category": "contradiction", + "source_checker": "plan-consistency-checker", + "location": { + "section": "Section Name", + "fragment": "Exact conflicting text" + }, + "evidence": "First definition: ...; Second definition: ...", + "explanation": "Why this contradiction affects execution", + "suggested_resolution": "How to resolve the contradiction", + "affected_acs": ["AC-1"], + "affected_tasks": ["task1"] + } +] +``` + +Rules: +- Use sequential IDs: C-001, C-002, etc. +- `severity` is always "blocker" for contradictions. +- `category` is always "contradiction". +- `source_checker` is always "plan-consistency-checker". +- `location.fragment` should contain the exact conflicting text or a concise excerpt. +- `evidence` should quote both conflicting statements. +- `explanation` must describe why the contradiction would cause execution drift. +- `suggested_resolution` should be actionable. +- `affected_acs` and `affected_tasks` may be empty arrays if no specific AC/task is affected. + +If no contradictions are found, output exactly: + +```json +[] +``` + +## Context Minimization + +You receive ONLY the plan file content and this instruction. You do NOT receive: +- Project history or prior conversation context +- Background information about why the plan was created +- Discussion records from plan generation or refinement +- Any information not directly present in the plan file itself + +This ensures the check is reproducible from the plan text alone. diff --git a/commands/gen-idea.md b/commands/gen-idea.md index 2ef61e82..941fd2fb 100644 --- a/commands/gen-idea.md +++ b/commands/gen-idea.md @@ -47,7 +47,7 @@ Do not interpret or rewrite the idea text here. Pass `$ARGUMENTS` through to Pha Run: ```bash -"${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh" $ARGUMENTS +"${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-idea-io.sh" --raw-arguments "$ARGUMENTS" ``` Handle exit codes: diff --git a/commands/gen-plan.md b/commands/gen-plan.md index 3b97435e..ff3255e2 100644 --- a/commands/gen-plan.md +++ b/commands/gen-plan.md @@ -1,10 +1,13 @@ --- description: "Generate implementation plan from draft document" -argument-hint: "--input --output [--auto-start-rlcr-if-converged] [--discussion|--direct]" +argument-hint: "--input --output [--auto-start-rlcr-if-converged] [--discussion|--direct] [--check|--no-check]" allowed-tools: - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-gen-plan-io.sh:*)" - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh:*)" - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/setup-rlcr-loop.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/lib/gen-plan-check-mode.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/plan-check.sh:*)" - "Read" - "Glob" - "Grep" @@ -24,6 +27,16 @@ This command MUST ONLY generate a plan document during the planning phases. It M Permitted writes (before any optional auto-start) are limited to: - The plan output file (`--output`) - Optional translated language variant (only when `ALT_PLAN_LANGUAGE` is configured) +- When `EFFECTIVE_CHECK_MODE=true`: + - `.humanize/gen-plan-check//draft-findings.json` + - `.humanize/gen-plan-check//plan-findings.json` + - `.humanize/gen-plan-check//report.md` + - `.humanize/gen-plan-check//resolution.json` + - `.humanize/gen-plan-check//backup/.bak` + - Ephemeral temp files under `.humanize/gen-plan-check//tmp/` (the `tmp/` directory is recursively deleted before the command exits on both success and failure) + - Ephemeral atomic-write temp files in the same directory as `--output`, named `.plan-check-write.*` (created by `plan_check_atomic_write` and immediately moved over the target file) + +The persistent layout under `.humanize/gen-plan-check//` is flat. No `draft/` or `plan/` subdirectories are permitted. If `--auto-start-rlcr-if-converged` is enabled, the command MAY immediately start the RLCR loop by running `/humanize:start-rlcr-loop `, but only in `discussion` mode when `PLAN_CONVERGENCE_STATUS=converged` and there are no pending user decisions. All coding happens in that subsequent command/loop, not during plan generation. @@ -53,6 +66,8 @@ Parse `$ARGUMENTS` and set: - `AUTO_START_RLCR_IF_CONVERGED=false` otherwise - `GEN_PLAN_MODE_DISCUSSION=true` if `--discussion` is present - `GEN_PLAN_MODE_DIRECT=true` if `--direct` is present +- `CHECK_FLAG=true` if `--check` is present +- `NO_CHECK_FLAG=true` if `--no-check` is present - If both `--discussion` and `--direct` are present simultaneously, report error "Cannot use --discussion and --direct together" and stop `AUTO_START_RLCR_IF_CONVERGED=true` allows skipping manual plan review and starting implementation immediately (by invoking `/humanize:start-rlcr-loop `), but only when `GEN_PLAN_MODE=discussion`, plan convergence is achieved, and no pending user decisions remain. In `direct` mode this condition is never satisfied. @@ -123,7 +138,17 @@ Also detect whether `alternative_plan_language` is explicitly present in `MERGED - CLI flag: if `GEN_PLAN_MODE_DISCUSSION=true`, set `GEN_PLAN_MODE=discussion`; if `GEN_PLAN_MODE_DIRECT=true`, set `GEN_PLAN_MODE=direct` - Merged config `gen_plan_mode` field (if valid) - Default: `discussion` -6. Malformed optional user or project config files should be reported as warnings by `load_merged_config` and must NOT stop execution. In those cases, continue with the remaining valid layers and the same effective defaults (`ALT_PLAN_LANGUAGE=""`, `ALT_PLAN_LANG_CODE=""`, and `GEN_PLAN_MODE=discussion`) when no higher-precedence value is available. +6. Resolve `EFFECTIVE_CHECK_MODE`: + - Source `${CLAUDE_PLUGIN_ROOT}/scripts/lib/gen-plan-check-mode.sh`. + - Extract `CONFIG_GEN_PLAN_CHECK_RAW` from `MERGED_CONFIG_JSON` using `get_config_value MERGED gen_plan_check`. + - Call `_gen_plan_resolve_check_mode "$CHECK_FLAG" "$NO_CHECK_FLAG" "$CONFIG_GEN_PLAN_CHECK_RAW"`. + - The helper implements the following priority (highest to lowest): + - `--no-check` flag forces `EFFECTIVE_CHECK_MODE=false` + - `--check` flag forces `EFFECTIVE_CHECK_MODE=true` + - Merged config `gen_plan_check` value (`true` or `false`) + - Default: `EFFECTIVE_CHECK_MODE=false` + - If the merged config contains an invalid value (anything other than `true`, `false`, or empty/missing), the helper prints: `Warning: unsupported gen_plan_check "". Expected true or false. Check mode is disabled unless --check is passed.` and sets `EFFECTIVE_CHECK_MODE=false`. An explicit `null` in any config layer is silently stripped by `load_merged_config` before the resolver sees it, so no warning is emitted for `null`. +7. Malformed optional user or project config files should be reported as warnings by `load_merged_config` and must NOT stop execution. In those cases, continue with the remaining valid layers and the same effective defaults (`ALT_PLAN_LANGUAGE=""`, `ALT_PLAN_LANG_CODE=""`, and `GEN_PLAN_MODE=discussion`) when no higher-precedence value is available. `ALT_PLAN_LANGUAGE` and `ALT_PLAN_LANG_CODE` control whether a translated language variant of the output file is written in Phase 8. When `ALT_PLAN_LANGUAGE` is non-empty, a variant file with the `_` suffix is generated. @@ -173,11 +198,117 @@ After IO validation passes, check if the draft is relevant to this repository. - Show the reason from the relevance check - Stop the command -4. **If RELEVANT**: Create the output plan file by copying the template and appending the draft: +4. **If RELEVANT**: + - If `EFFECTIVE_CHECK_MODE=true`: Continue to Phase 2.5. Output file creation is deferred until after check-draft resolves. + - If `EFFECTIVE_CHECK_MODE=false`: Create the output plan file immediately by copying the template and appending the draft: + ```bash + { + cat "$TEMPLATE_FILE" + printf '\n--- Original Design Draft Start ---\n' + cat "$INPUT_FILE" + printf '\n--- Original Design Draft End ---\n' + } > "$OUTPUT_FILE" + ``` + The bytes between the LF after the start marker and the LF before the end marker are exactly the bytes of `$INPUT_FILE`. Then continue to Phase 3. + +--- + +## Phase 2.5: Check-Draft (Conditional) + +This phase runs only when `EFFECTIVE_CHECK_MODE=true`. + +### Step 1: Initialize Report Directory + +Create a timestamped directory for check-mode artifacts: + +```bash +source "${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh" +CHECK_REPORT_DIR="$(plan_check_init_report_dir "${PROJECT_ROOT}/.humanize/gen-plan-check")" +mkdir -p "${CHECK_REPORT_DIR}/tmp" +``` + +Set a trap to recursively remove `${CHECK_REPORT_DIR}/tmp/` on both success and failure paths before the command exits. + +### Step 2: Run Draft Checkers + +Invoke the draft checker agents via `ask-codex.sh`. Each agent receives the raw draft content. + +1. Run draft-consistency-checker: ```bash - cp "$TEMPLATE_FILE" "$OUTPUT_FILE" && echo "" >> "$OUTPUT_FILE" && echo "--- Original Design Draft Start ---" >> "$OUTPUT_FILE" && echo "" >> "$OUTPUT_FILE" && cat "$INPUT_FILE" >> "$OUTPUT_FILE" && echo "" >> "$OUTPUT_FILE" && echo "--- Original Design Draft End ---" >> "$OUTPUT_FILE" + "${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" "" ``` - Then continue to Phase 3. + Expected output: JSON array of contradiction findings with `source_checker: draft-consistency-checker`. + +2. Run draft-ambiguity-checker: + ```bash + "${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh" "" + ``` + Expected output: JSON array of ambiguity findings with `source_checker: draft-ambiguity-checker`. + +If `ask-codex.sh` fails (missing CLI, timeout, or runtime error), do not silently continue: +- Use AskUserQuestion with options: Retry or Abort. +- If the user chooses Abort: stop the command. +- If the user chooses Retry: re-run the failed checker once. If it fails again, persist a `runtime-error` diagnostic finding with severity `info` and category `runtime-error`, and treat the check-draft phase as having unresolved blockers for auto-start gating. + +### Step 3: Merge and Persist Draft Findings + +Merge findings from both agents into a single array. Persist: +- `${CHECK_REPORT_DIR}/draft-findings.json` +- Draft section appended to `${CHECK_REPORT_DIR}/report.md` + +### Step 4: Process Blocker Findings + +For each finding with `severity=blocker`: + +1. Present the finding to the user via AskUserQuestion with exactly these options: + - Provide an answer that resolves the blocker + - Abort the command + (No `skip` option is offered.) + +2. If the user provides an answer: + - Record the clarification in an in-memory structure: `clarifications[id] = {source: "user", answer: ..., finding_id: ...}` + +3. If the user does not provide an answer (or AskUserQuestion is not available): + - The leader agent must decide via Source-of-Truth precedence using the original draft text and repository context. + - Record the decision: `clarifications[id] = {source: "agent", answer: ..., rationale: ..., finding_id: ...}` + +4. Clarifications are stored in memory for downstream phases. They supplement the original draft but NEVER overwrite it. + +5. Do NOT create any `## Pending User Decisions` entry for check-draft blockers. + +### Step 5: Handle Abort + +If the user chooses `abort` at any point during Step 4: +- Stop the command before any output plan file is created. +- Retain diagnostic artifacts already written under `${CHECK_REPORT_DIR}/` (specifically `draft-findings.json`, `report.md`, and `resolution.json` if persisted) for post-mortem inspection. +- Recursively remove `${CHECK_REPORT_DIR}/tmp/`. +- Report: "Draft check aborted. No output plan was created. Diagnostic artifacts are available at ${CHECK_REPORT_DIR}/" + +### Step 6: Create Output Plan File + +Only after all blocker findings are resolved (user-answered or agent-fallback-resolved), create the output plan file by copying the template and appending the draft: + +```bash +{ + cat "$TEMPLATE_FILE" + printf '\n--- Original Design Draft Start ---\n' + cat "$INPUT_FILE" + printf '\n--- Original Design Draft End ---\n' +} > "$OUTPUT_FILE" +``` + +The bytes between the LF after the start marker and the LF before the end marker are exactly the bytes of `$INPUT_FILE`. + +This deferred creation ensures that if check-draft aborts, no output file exists on disk. + +### Step 7: Feed Clarifications into Downstream Phases + +Include the collected clarifications in the context fed to: +- Phase 3 (Codex First-Pass Analysis) +- Phase 4 (Claude Candidate Plan) +- All downstream planning phases + +Treat clarifications as source material alongside the original draft. When an explicit user clarification corrects, supersedes, or narrows older draft text, the clarification is the higher-priority source for that topic. Preserve the original draft appendix bytes, but do not treat superseded draft statements as active plan requirements. --- @@ -300,7 +431,7 @@ Set convergence state explicitly: ## Phase 6: Issue and Disagreement Resolution -> **Critical**: The draft document contains the most valuable human input. During issue resolution, NEVER discard or override any original draft content. All clarifications should be treated as incremental additions that supplement the draft, not replacements. Keep track of both the original draft statements and the clarified information. +> **Critical**: The draft document and explicit user clarifications are high-priority source material. Preserve the original draft appendix bytes exactly, track both original draft statements and clarifications, and treat newer explicit clarifications as authoritative when they supersede or narrow older draft text. ### Step 1: Manual Review Gate @@ -520,7 +651,7 @@ When `alternative_plan_language` is empty, absent, set to `"English"`, or set to 10. **Code Style Constraint**: The generated plan MUST include a section or note instructing that implementation code and comments should NOT contain plan-specific progress terminology such as "AC-", "Milestone", "Step", "Phase", or similar workflow markers. These terms belong in the plan document, not in the resulting codebase. -11. **Draft Completeness Requirement**: The generated plan MUST incorporate ALL information from the input draft document without omission. The draft represents the most valuable human input and must be fully preserved. Any clarifications obtained through Phase 6 should be added incrementally to the draft's original content, never replacing or losing any original requirements. The final plan must be a superset of the draft information plus all clarified details. +11. **Active Source Fidelity**: The generated plan MUST preserve active source-of-truth requirements from the draft and clarifications. Preserve the original draft appendix bytes exactly, but do not force rough-draft notes or clarification-superseded statements into the generated plan body. When an explicit user clarification corrects, supersedes, or narrows older draft text, the clarification takes precedence for that topic. 12. **Debate Traceability**: The plan MUST include Codex-first findings, Claude/Codex agreements, resolved disagreements, and unresolved decisions. Unresolved opposite opinions MUST be recorded in `## Pending User Decisions` for explicit user decision. @@ -541,12 +672,138 @@ Use the **Edit tool** (not Write) to update the plan file with the generated con - Keep the original draft section intact at the bottom of the file - The final file should contain both the structured plan AND the original draft for reference +### Step 1.5: Check-Plan and Repair (Conditional) + +This step runs only when `EFFECTIVE_CHECK_MODE=true`. + +> **Note**: If `EFFECTIVE_CHECK_MODE=false`, skip this entire step and proceed directly to Step 2. + +#### Step 1.5.1: Run Deterministic Schema Validation + +Invoke the deterministic schema validator on the plan file using the canonical template: + +```bash +source "${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh" +SCHEMA_FINDINGS=$(TMPDIR="${CHECK_REPORT_DIR}/tmp" plan_check_validate_schema "$OUTPUT_FILE" "${CLAUDE_PLUGIN_ROOT}/prompt-template/plan/gen-plan-template.md") +``` + +This checks for required core sections, AC IDs, optional task tags, dependencies, and circular dependencies when `## Task Breakdown` is present. The returned `SCHEMA_FINDINGS` is a comma-separated string of JSON finding objects. + +#### Step 1.5.2: Run Semantic Checkers + +Invoke the primary semantic checker agents via `ask-codex.sh`. + +1. Run plan-consistency-checker on the plan body. +2. Run plan-ambiguity-checker on the plan body. + +After both primary checker results are available, collect the contradiction and ambiguity findings returned by `plan-consistency-checker` and `plan-ambiguity-checker` into `PRIMARY_PLAN_FINDINGS`. + +Run `draft-plan-drift-checker` only if `PRIMARY_PLAN_FINDINGS` is non-empty. When invoked, pass all of the following explicit context: +- The plan body +- The original draft content +- The collected clarifications +- `PRIMARY_PLAN_FINDINGS` as the existing plan findings to inspect + +If `PRIMARY_PLAN_FINDINGS` is empty, skip `draft-plan-drift-checker` and treat its result as `[]`. + +`draft-plan-drift-checker` is a secondary source-recovery pass for existing contradiction or ambiguity findings. It must not run as an independent whole-plan draft completeness audit. + +Each invocation receives the appropriate content and returns a JSON array of findings. + +If `ask-codex.sh` fails (missing CLI, timeout, or runtime error), do not silently continue: +- Use AskUserQuestion with options: Retry or Abort. +- If the user chooses Abort: stop the command. +- If the user chooses Retry: re-run the failed checker once. If it fails again, persist a `runtime-error` diagnostic finding with severity `info` and category `runtime-error`, and treat any unresolved semantic findings as blockers for auto-start gating. + +#### Step 1.5.3: Merge Findings and Persist + +Merge `SCHEMA_FINDINGS`, the primary semantic checker findings, and any conditional draft-plan drift findings into a single JSON array and write to: +- `${CHECK_REPORT_DIR}/plan-findings.json` + +Also append the plan section to `${CHECK_REPORT_DIR}/report.md`. + +#### Step 1.5.4: Source-of-Truth Precedence Repair + +For each finding with `severity=blocker`: + +1. Determine the resolution source in this priority order: + - **Explicit user answers** collected during check-draft (the in-memory clarifications structure) + - **Original draft text** (the inner byte range between `--- Original Design Draft Start ---` and `--- Original Design Draft End ---`) + - **Repository facts** discovered during planning + - **Safe leader-agent judgment** for purely structural or wording fixes + - Generated plan text (lowest priority) + +2. If the resolution source is clarifications, draft text, or repository facts (high-priority sources): + - Compute the repair patch. + - Apply the rewrite **silently** (no diff preview, no AskUserQuestion): + ```bash + plan_check_backup_plan "$OUTPUT_FILE" "$CHECK_REPORT_DIR" + plan_check_atomic_write "$OUTPUT_FILE" "$patched_content" + ``` + - Record the resolution in an in-memory structure with citation to the source. + +3. If the resolution source is leader-agent judgment: + - Compute the repair patch. + - Present a diff preview to the user. + - Use AskUserQuestion to confirm: "Apply this repair?" + - If confirmed: + ```bash + plan_check_backup_plan "$OUTPUT_FILE" "$CHECK_REPORT_DIR" + plan_check_atomic_write "$OUTPUT_FILE" "$patched_content" + ``` + - Record the resolution with rationale. + +4. If the resolution cannot be determined from any high-priority source: + - Use AskUserQuestion to ask the user for a product-level decision. + - Record the answer and apply the repair through the same backup + atomic write path. + +5. **Appendix preservation constraint**: No repair may modify any byte in the inner appendix byte range (the content between the `--- Original Design Draft Start ---` and `--- Original Design Draft End ---` markers). If a repair would touch the appendix, it must be rejected or adjusted to preserve the original draft bytes exactly. + +#### Step 1.5.5: Persist Resolution Records + +After all repair decisions are made, persist: +- `${CHECK_REPORT_DIR}/resolution.json` containing the resolution records for every blocker finding. + +#### Step 1.5.6: Optional Recheck + +Check whether a recheck should run: + +```bash +source "${CLAUDE_PLUGIN_ROOT}/scripts/lib/config-loader.sh" +RECHECK_CONFIG="$(get_config_value "$MERGED_CONFIG_JSON" plan_check_recheck)" +``` + +Recheck runs **exactly once** if and only if **all** of the following are true: +- `RECHECK_CONFIG` is `true` +- At least one accepted repair during Step 1.5.4 changed plan bytes + +If recheck runs: +- Run `plan_check_validate_schema` again on the repaired plan with `TMPDIR="${CHECK_REPORT_DIR}/tmp"`. +- Run `plan-consistency-checker` and `plan-ambiguity-checker` again on the repaired plan. +- Collect contradiction and ambiguity findings from those two recheck results. +- Run `draft-plan-drift-checker` during recheck only when the recheck primary findings collection is non-empty, and pass that collection as explicit context. +- Merge findings and update `${CHECK_REPORT_DIR}/plan-findings.json`. +- **This recheck is check-only**: findings are recorded but no further repair is performed. +- Update the unresolved-blocker state for the auto-start gate. + +If recheck does not run (recheck disabled or zero byte-changing repairs), proceed without recheck. + +#### Step 1.5.7: Finalize Unresolved Blocker State + +After repair (and optional recheck), compute the final unresolved-blocker state: + +- `UNRESOLVED_DRAFT_BLOCKERS`: count of draft-check blockers that remain unresolved after check-draft. +- `UNRESOLVED_PLAN_BLOCKERS`: count of plan-check blockers (categories: `contradiction`, `ambiguity`, `schema`, `dependency`, `appendix-drift`, `draft-plan-drift`) with `severity=blocker` that remain unresolved after repair and optional recheck. `runtime-error` findings do not count toward this total. +- `RECHECK_NEW_BLOCKERS`: count of new blockers introduced during the recheck pass. + +This state is used by the auto-start gate in Phase 8 Step 5. + ### Step 2: Comprehensive Review After updating, **read the complete plan file** and verify: - The plan is complete and comprehensive - All sections are consistent with each other -- The structured plan aligns with the original draft content +- The structured plan does not contradict high-priority source material, including resolved clarifications and active draft requirements - Claude/Codex disagreement handling is explicit and correctly reflected - No contradictions exist between different parts of the document @@ -589,6 +846,8 @@ Algorithm: - The variant file is a translated reading view of the same plan; it must not add new information not present in the main file. - The original draft section at the bottom should be kept as-is (not re-translated). +When `EFFECTIVE_CHECK_MODE=true`, the translated variant reflects the **final repaired plan content** after Step 1.5 (Check-Plan and Repair). The variant is produced only after all repairs and optional recheck have completed. + If `ALT_PLAN_LANGUAGE` is empty (the default), do NOT create a translated variant file. ### Step 5: Optional Direct Work Start @@ -598,6 +857,16 @@ If all of the following are true: - `PLAN_CONVERGENCE_STATUS=converged` - `GEN_PLAN_MODE=discussion` - There are no pending decisions with status `PENDING` +- When `EFFECTIVE_CHECK_MODE=true`, all of the following must also hold: + - `UNRESOLVED_DRAFT_BLOCKERS` is 0 (no unresolved draft-check blockers) + - `UNRESOLVED_PLAN_BLOCKERS` is 0 (no unresolved plan-check blockers after repair and optional recheck) + - `RECHECK_NEW_BLOCKERS` is 0 (no new blockers introduced during recheck) + +If any check-mode gate is violated, skip auto-start and report the specific rule: +- `Auto-start skipped: unresolved-draft-blocker (finding IDs)` when draft-check blockers remain. +- `Auto-start skipped: unresolved-plan-check-blocker (count: finding ID category)` when plan-check blockers remain. +- `Auto-start skipped: recheck-failure (new blockers)` when recheck introduces new blockers. +- `Auto-start skipped: existing-rlcr-constraint (reason)` when existing constraints fail. Then start work immediately by running: @@ -629,6 +898,7 @@ Report to the user: - Number of unresolved user decisions (if any) - Whether language was unified (if applicable) - Whether direct work start was attempted, and its result +- When `EFFECTIVE_CHECK_MODE=true`: path to check-mode artifacts under `.humanize/gen-plan-check//` and summary of draft-check / plan-check / repair / recheck status --- diff --git a/commands/plan-check.md b/commands/plan-check.md new file mode 100644 index 00000000..f240be41 --- /dev/null +++ b/commands/plan-check.md @@ -0,0 +1,365 @@ +--- +description: "Check a plan file for contradictions, ambiguities, and schema compliance" +argument-hint: "--plan path/to/plan.md [--recheck] [--alt-language lang]" +allowed-tools: + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/lib/config-loader.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/validate-plan-check-io.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/plan-check.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh:*)" + - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh:*)" + - "Bash(source ${CLAUDE_PLUGIN_ROOT}/scripts/lib/config-loader.sh:*)" + - "Bash(source ${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh:*)" + - "Bash(mktemp:*)" + - "Bash(diff:*)" + - "Read" + - "Write" + - "Edit" + - "Task" + - "AskUserQuestion" +--- + +# Plan Check + +Analyze a plan file for internal contradictions, ambiguities, and structural schema compliance. + +## Workflow Overview + +1. **Argument Parsing**: Parse `--plan`, `--recheck`, `--alt-language`, and `-h/--help` +2. **Load Project Config**: Resolve merged Humanize config defaults for `plan_check_recheck` +3. **IO Validation**: Validate input plan file and output directory +4. **Load Plan**: Read the plan file content +5. **Check Pipeline**: Spawn a dedicated sub-agent to execute the full check pipeline +6. **Report Generation**: Assemble and write `report.md` and `findings.json` +7. **Contradiction Resolution**: Present contradictions to the user and collect resolutions +8. **Ambiguity Clarification**: Present ambiguities atomically and collect clarifications +9. **Rewrite**: Ask whether to apply resolutions to the plan file in-place +10. **Display Results**: Output the final report to the terminal + +--- + +## Phase 1: Argument Parsing + +Parse `$ARGUMENTS` and set: +- `PLAN_FILE`: value following `--plan` (required) +- `RECHECK_REQUESTED=true` if `--recheck` is present; `RECHECK_REQUESTED=false` otherwise +- `ALT_LANGUAGE`: value following `--alt-language` if present + +If `-h` or `--help` is present, print: +``` +Usage: /humanize:plan-check --plan [--recheck] [--alt-language ] + +Options: + --plan Path to the plan file to check (required) + --recheck Re-run plan-check after an accepted rewrite (default: disabled) + --alt-language Generate an additional report in the specified language + -h, --help Show this help message +``` + +If `--plan` is missing, report error "--plan is required" and stop. + +--- + +## Phase 2: Load Project Config + +Resolve configuration using `${CLAUDE_PLUGIN_ROOT}/scripts/lib/config-loader.sh`. Reuse that behavior; do not read `.humanize/config.json` directly. + +### Config Merge Semantics + +1. Source `${CLAUDE_PLUGIN_ROOT}/scripts/lib/config-loader.sh`. +2. Determine `PROJECT_ROOT` from the directory where the command was invoked. +3. Call `load_merged_config "${CLAUDE_PLUGIN_ROOT}" "${PROJECT_ROOT}"` to obtain `MERGED_CONFIG_JSON`. +4. `load_merged_config` merges these layers in order: + - Required default config: `${CLAUDE_PLUGIN_ROOT}/config/default_config.json` + - Optional user config: `${XDG_CONFIG_HOME:-$HOME/.config}/humanize/config.json` + - Optional project config: `${HUMANIZE_CONFIG:-$PROJECT_ROOT/.humanize/config.json}` +5. Later layers override earlier layers. Malformed optional JSON objects are warnings and ignored. A malformed required default config, missing `jq`, or any other fatal `load_merged_config` failure is a configuration error and must stop the command. + +### Recheck Resolution + +Use `get_config_value` against `MERGED_CONFIG_JSON` to read: + +- `CONFIG_PLAN_CHECK_RECHECK_RAW` from `plan_check_recheck` + +Resolve `EFFECTIVE_RECHECK` using this priority: +1. CLI flag: if `RECHECK_REQUESTED=true`, set `EFFECTIVE_RECHECK=true`. +2. Merged config `plan_check_recheck`, when it is exactly `true` or `false` (case-insensitive). +3. Default: `false`. + +If `CONFIG_PLAN_CHECK_RECHECK_RAW` is present but is not `true` or `false`, log: +`Warning: unsupported plan_check_recheck "". Expected true or false. Recheck after rewrite is disabled unless --recheck is passed.` + +`--recheck` is a positive override only. If config sets `plan_check_recheck=true`, the command rechecks after accepted rewrites without requiring a flag. + +--- + +## Phase 3: IO Validation + +Run `${CLAUDE_PLUGIN_ROOT}/scripts/validate-plan-check-io.sh` with: +- `--plan "$PLAN_FILE"` +- `--recheck` when `EFFECTIVE_RECHECK=true` +- `--alt-language "$ALT_LANGUAGE"` when `ALT_LANGUAGE` is non-empty + +Capture its exit code: +- `0`: success, continue +- `1`: input missing -- report error and stop +- `2`: input empty -- report error and stop +- `3`: output dir missing and cannot be created -- report error and stop +- `4`: output exists -- report error and stop +- `5`: no write permission -- report error and stop +- `6`: invalid args -- report error and stop +- Any other code: report unexpected validation failure and stop + +--- + +## Phase 4: Load Plan + +Use the Read tool to read the plan file at `PLAN_FILE`. + +If the file cannot be read, report error and stop. + +--- + +## Phase 5: Check Pipeline (Sub-Agent) + +Spawn a dedicated sub-agent via the Task tool to execute the check pipeline. + +### Sub-Agent Payload Boundary + +The command layer (Claude) must: +1. Create a temporary path with `tmp_plan="$(mktemp)"`, then write `PLAN_CONTENT` to that path using the `Write` tool. +2. Pass ONLY these two pieces of information to the sub-agent: + - The temporary plan file path (`$tmp_plan`) + - The plan content as text (for semantic checks that do not need file access) +3. The sub-agent does NOT receive the original plan file path, project history, prior conversation context, or background information. + +### Sub-Agent Parameters + +``` +- model: "sonnet" +- prompt: | + You are the plan-check pipeline executor. + + Your task is to analyze the provided plan file for: + 1. Structural schema compliance (deterministic) + 2. Internal contradictions (semantic) + 3. Execution-affecting ambiguities (semantic) + + ## Input + + You receive exactly two inputs: + - `PLAN_TEMP_PATH`: path to a temporary file containing the plan content + - `PLAN_CONTENT`: the plan content as plain text + + You do NOT receive project history, prior conversation context, or background information. + + ## Deterministic Checks + + Source `${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh` and run: + - `plan_check_validate_schema "$PLAN_TEMP_PATH" "${CLAUDE_PLUGIN_ROOT}/prompt-template/plan/gen-plan-template.md"` + - If the template is unavailable, skip schema validation and produce a single info-level finding explaining the skip + - `## Task Breakdown` is optional; validate task tags, Target ACs, and dependencies only when the section is present + - Write findings from deterministic checks to a temporary JSON file + + ## Semantic Checks + + Run the following semantic checks via Codex using `${CLAUDE_PLUGIN_ROOT}/scripts/ask-codex.sh`: + + 1. Contradiction check: Invoke the plan-consistency-checker agent logic. + Pass `PLAN_CONTENT` (the plan text, not the file path). Expect a JSON array of contradiction findings. + Each finding must have: id, severity=blocker, category=contradiction, location, evidence, explanation, suggested_resolution, affected_acs, affected_tasks. + + 2. Ambiguity check: Invoke the plan-ambiguity-checker agent logic. + Pass `PLAN_CONTENT` (the plan text, not the file path). Expect a JSON array of ambiguity findings. + Each finding must have: id, severity, category=ambiguity, location, evidence, explanation, suggested_resolution, affected_acs, affected_tasks, ambiguity_details. + + If ask-codex.sh fails or returns malformed JSON, retry once. If still failing after retry, produce a single `runtime-error` info-level finding for that checker and continue. The runtime-error finding must have category `runtime-error`, severity `info`, and explain that the semantic check was skipped due to malformed agent output. + + ## Merge and Deduplicate + + Merge all findings (deterministic + semantic) into a single JSON array. + Sort by severity: blocker first, then warning, then info. + Assign sequential F-IDs if any finding lacks a stable ID. + + ## Output + + Return ONLY a JSON object with this exact structure: + { + "findings": [...], + "summary": { + "total": N, + "blockers": N, + "warnings": N, + "infos": N, + "status": "pass" | "fail" + } + } + + The `findings` array must contain all findings from deterministic and semantic checks. + `status` is "fail" if any blocker exists, otherwise "pass". +``` + +### Parse Sub-Agent Output + +1. Extract the JSON object from the sub-agent output. The sub-agent may wrap the JSON in markdown code fences; strip them if present. +2. Validate that the output contains a `findings` array and a `summary` object. +3. Post-process ambiguity findings to ensure stable content-addressable IDs: + - Pipe the `findings` array through `plan_check_postprocess_ambiguity_ids()` from `plan-check-common.sh`. + - This replaces any ambiguity IDs with a deterministic SHA-256 hash of `section + "\n" + fragment`, regardless of what the sub-agent returned. +4. If parsing fails or the output is malformed, report "Sub-agent returned malformed findings. Falling back to deterministic-only report." and use only the deterministic findings if available. + +--- + +## Phase 6: Report Generation + +Create a timestamped report directory under `.humanize/plan-check//`. + +Use `${CLAUDE_PLUGIN_ROOT}/scripts/plan-check.sh` to assemble the report: +```bash +${CLAUDE_PLUGIN_ROOT}/scripts/plan-check.sh \ + --plan "$PLAN_FILE" \ + --report-dir "$REPORT_DIR" \ + --findings-file "$FINDINGS_FILE" +``` + +Where: +- `REPORT_DIR` is the timestamped directory +- `FINDINGS_FILE` is a temporary file containing the merged findings JSON array + +If `--alt-language` is specified, write an additional `report..md` using the same structure but with section headers translated. + +### Print Initial Findings Report + +After `scripts/plan-check.sh` has written `report.md` and `findings.json`, print the initial findings report to the terminal as a contiguous block. Read the generated `report.md` and print its contents. This ensures the user sees the full findings before any resolution or clarification questions are asked, and that terminal output matches the file report (AC-5). + +Format: +``` +=== Initial Plan Check Findings === +Plan: +Status: +Blockers: Warnings: Infos: + + +``` + +--- + +## Phase 7: Contradiction Resolution + +If any findings have `category=contradiction` and `severity=blocker`, present them to the user for resolution. + +For each contradiction finding: +1. Display the contradiction details: + - ID, section, fragment, evidence, explanation + - The competing definitions or conflicting statements +2. Use `AskUserQuestion` with: + - Question: "How do you want to resolve contradiction ?" + - Options: + - "Accept first definition" (if two definitions are present) + - "Accept second definition" + - "Provide custom resolution" + - If "Provide custom resolution" is selected, prompt for free-text input. +3. Record the resolution: + - Create a resolution record with the finding ID, the selected option, and any custom text. + - Mark the finding as resolved. + +If no contradictions exist, skip this phase. + +--- + +## Phase 8: Ambiguity Clarification + +If any findings have `category=ambiguity` and `severity=blocker`, present them to the user one by one for clarification. + +For each ambiguity finding: +1. Display the ambiguity details: + - ID, section, fragment, evidence, explanation + - Competing interpretations + - Execution drift risk +2. Use `AskUserQuestion` to ask the `ambiguity_details.clarification_question`. + - Options: + - Provide a specific answer (as free-text if the question is open-ended) + - "Skip this ambiguity" +3. Record the clarification: + - If answered: create a clarification record with the finding ID and the user's answer. + - If skipped: the ambiguity remains a blocker. + +The check does not pass until all ambiguity questions are answered. Skipped ambiguities remain as blockers. + +If no blocker ambiguities exist, skip this phase. + +--- + +## Phase 9: Resolution Report + +After all contradictions are resolved and all ambiguities are clarified (or skipped): + +1. Build the resolutions array from the collected resolutions and clarifications. +2. Use `plan_check_build_resolved_json()` to assemble a resolution report JSON including: + - Original findings + - Resolutions array + - Updated summary with resolved status +3. Write the resolution report to `$REPORT_DIR/resolution.json`. +4. Append a resolution summary to `$REPORT_DIR/report.md`. + +--- + +## Phase 10: In-Place Rewrite + +If all blockers are resolved (no remaining unresolved contradictions or unskipped ambiguities): + +1. Ask the user via `AskUserQuestion`: + - Question: "Do you want to apply the resolutions and clarifications to the plan file?" + - Options: + - "Yes, rewrite the plan file" + - "No, keep the plan file unchanged" +2. If the user agrees: + - Generate a revised plan content that incorporates the resolutions and clarifications into the relevant sections. + - Show a diff preview: `diff -u "$PLAN_FILE" <(echo "$revised_content")` or equivalent. + - Ask for final confirmation: "Apply these changes?" + - If confirmed: + - Create a backup using `plan_check_backup_plan()`: `.humanize/plan-check//backup/.bak` + - Write atomically using `plan_check_atomic_write()` in the same directory as the plan file. + - If `EFFECTIVE_RECHECK=true`, rerun the check pipeline from Phase 5 on the rewritten plan. +3. If the user declines, the plan file remains unchanged. + +--- + +## Phase 11: Display Final Resolution Report + +After the resolution report is written, print the final resolution report to the terminal as a contiguous block. This ensures terminal output and file report are consistent (AC-5). + +Format: +``` +=== Plan Check Resolution Report === +Plan: +Final Status: +Unresolved Blockers: +Total Resolutions: + + +``` + +If unresolved blockers remain (unresolved contradictions or skipped ambiguities), state: +"The plan has unresolved blockers. Review the findings report and resolve all contradictions and ambiguities before implementation." + +If all blockers are resolved, state: +"All blockers resolved. The plan is ready for implementation." + +--- + +## Phase 12: Exit + +Exit with code 0 if no unresolved blockers remain, or with code 1 if unresolved blockers exist. + +--- + +## Deterministic-Only Fallback + +If the sub-agent fails entirely (no usable output), fall back to deterministic-only validation: + +1. Source `${CLAUDE_PLUGIN_ROOT}/scripts/lib/plan-check-common.sh` +2. Run `plan_check_validate_schema "$PLAN_FILE" "${CLAUDE_PLUGIN_ROOT}/prompt-template/plan/gen-plan-template.md"` +3. Collect findings, write report via `scripts/plan-check.sh` +4. Display results and exit with appropriate code + +This ensures the command always produces a report even when semantic agents are unavailable. diff --git a/config/default_config.json b/config/default_config.json index af41af12..0d0e4da5 100644 --- a/config/default_config.json +++ b/config/default_config.json @@ -4,5 +4,7 @@ "bitlesson_model": "haiku", "agent_teams": false, "alternative_plan_language": "", - "gen_plan_mode": "discussion" + "gen_plan_mode": "discussion", + "plan_check_recheck": false, + "gen_plan_check": false } diff --git a/docs/install-for-codex.md b/docs/install-for-codex.md index 2c70a1cc..bc36708d 100644 --- a/docs/install-for-codex.md +++ b/docs/install-for-codex.md @@ -23,7 +23,7 @@ Or use the unified installer directly: ``` This will: -- Sync `humanize`, `humanize-gen-plan`, `humanize-refine-plan`, and `humanize-rlcr` into `${CODEX_HOME:-~/.codex}/skills` +- Sync `humanize`, `humanize-gen-plan`, `humanize-plan-check`, `humanize-refine-plan`, and `humanize-rlcr` into `${CODEX_HOME:-~/.codex}/skills` - Copy runtime dependencies into `${CODEX_HOME:-~/.codex}/skills/humanize` - Install/update native Humanize Stop hooks in `${CODEX_HOME:-~/.codex}/hooks.json` - Enable the experimental `codex_hooks` feature in `${CODEX_HOME:-~/.codex}/config.toml` when `codex` is available @@ -42,6 +42,7 @@ ls -la "${CODEX_HOME:-$HOME/.codex}/skills" Expected directories: - `humanize` - `humanize-gen-plan` +- `humanize-plan-check` - `humanize-refine-plan` - `humanize-rlcr` @@ -56,6 +57,7 @@ Runtime dependencies in `humanize/`: Installed files/directories: - `${CODEX_HOME:-~/.codex}/skills/humanize/SKILL.md` - `${CODEX_HOME:-~/.codex}/skills/humanize-gen-plan/SKILL.md` +- `${CODEX_HOME:-~/.codex}/skills/humanize-plan-check/SKILL.md` - `${CODEX_HOME:-~/.codex}/skills/humanize-refine-plan/SKILL.md` - `${CODEX_HOME:-~/.codex}/skills/humanize-rlcr/SKILL.md` - `${CODEX_HOME:-~/.codex}/skills/humanize/scripts/` diff --git a/docs/install-for-kimi.md b/docs/install-for-kimi.md index c947ffac..d26246f5 100644 --- a/docs/install-for-kimi.md +++ b/docs/install-for-kimi.md @@ -24,7 +24,7 @@ From the Humanize repo root, run: ``` This command will: -- Sync `humanize`, `humanize-gen-plan`, `humanize-refine-plan`, and `humanize-rlcr` into `~/.config/agents/skills` +- Sync `humanize`, `humanize-gen-plan`, `humanize-plan-check`, `humanize-refine-plan`, and `humanize-rlcr` into `~/.config/agents/skills` - Copy runtime dependencies into `~/.config/agents/skills/humanize` Common installer script (all targets): @@ -47,9 +47,10 @@ cd /path/to/humanize # Create the skills directory if it doesn't exist mkdir -p ~/.config/agents/skills -# Copy all four skills +# Copy all Humanize skills cp -r skills/humanize ~/.config/agents/skills/ cp -r skills/humanize-gen-plan ~/.config/agents/skills/ +cp -r skills/humanize-plan-check ~/.config/agents/skills/ cp -r skills/humanize-refine-plan ~/.config/agents/skills/ cp -r skills/humanize-rlcr ~/.config/agents/skills/ @@ -63,14 +64,14 @@ cp -r config ~/.config/agents/skills/humanize/ cp -r agents ~/.config/agents/skills/humanize/ # Hydrate runtime root placeholders inside SKILL.md files -for skill in humanize humanize-gen-plan humanize-refine-plan humanize-rlcr; do +for skill in humanize humanize-gen-plan humanize-plan-check humanize-refine-plan humanize-rlcr; do sed -i.bak "s|{{HUMANIZE_RUNTIME_ROOT}}|$HOME/.config/agents/skills/humanize|g" \ "$HOME/.config/agents/skills/$skill/SKILL.md" done # Strip user-invocable flag from SKILL.md files for runtime visibility # (This matches the behavior of scripts/install-skill.sh) -for skill in humanize humanize-gen-plan humanize-refine-plan humanize-rlcr; do +for skill in humanize humanize-gen-plan humanize-plan-check humanize-refine-plan humanize-rlcr; do awk ' BEGIN { in_fm = 0; fm_done = 0 } /^---[[:space:]]*$/ { @@ -99,6 +100,7 @@ ls -la ~/.config/agents/skills/ # Should show: # humanize/ # humanize-gen-plan/ +# humanize-plan-check/ # humanize-refine-plan/ # humanize-rlcr/ ``` diff --git a/docs/usage.md b/docs/usage.md index 313ae7ce..716432da 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -63,6 +63,7 @@ The quiz is advisory, not a gate. You always have the option to proceed. But tha | `/cancel-rlcr-loop` | Cancel active loop | | `/gen-plan --input --output ` | Generate structured plan from draft | | `/refine-plan --input ` | Refine an annotated plan and generate a QA ledger | +| `/plan-check --plan ` | Check a plan for contradictions, ambiguities, and schema compliance | | `/ask-codex [question]` | One-shot consultation with Codex | ## Command Reference @@ -112,6 +113,8 @@ OPTIONS: (discussion mode only; ignored in --direct) --discussion Use discussion mode (iterative Claude/Codex convergence rounds) --direct Use direct mode (skip convergence rounds, proceed immediately to plan) + --check Enable integrated draft-check and plan-check mode for this invocation + --no-check Disable integrated check mode for this invocation (overrides --check and config) -h, --help Show help message ``` @@ -120,14 +123,59 @@ The gen-plan command transforms rough draft documents into structured implementa Workflow: 1. Validates input/output paths 2. Checks if draft is relevant to the repository -3. Analyzes draft for clarity, consistency, completeness, and functionality -4. Engages user to resolve any issues found -5. Generates a structured plan.md with acceptance criteria -6. Optionally starts `/humanize:start-rlcr-loop` if `--auto-start-rlcr-if-converged` conditions are met +3. When check mode is enabled, runs `check-draft` to detect contradictions and ambiguities that would affect plan generation +4. Analyzes draft for clarity, consistency, completeness, and functionality +5. Engages user to resolve any issues found +6. Generates a structured plan.md with acceptance criteria +7. When check mode is enabled, runs `check-plan` against the generated plan and repairs discrepancies using the original draft as the source of truth +8. Optionally starts `/humanize:start-rlcr-loop` if `--auto-start-rlcr-if-converged` conditions are met If reviewers later annotate the generated plan with comment blocks, run `/humanize:refine-plan --input ` before starting or resuming implementation. +#### Check Mode + +Check mode adds pre-generation draft checking and post-generation plan checking with targeted repair. It is **disabled by default**. + +Enable check mode via CLI flag: +```bash +/humanize:gen-plan --input draft.md --output plan.md --check +``` + +Disable check mode for one invocation even if config enables it: +```bash +/humanize:gen-plan --input draft.md --output plan.md --no-check +``` + +Enable check mode by default via config (`gen_plan_check`): +```json +{ + "gen_plan_check": true +} +``` + +**Resolution priority** (highest to lowest): +1. `--no-check` flag forces disabled +2. `--check` flag forces enabled +3. Merged config `gen_plan_check` value (`true` or `false`) +4. Default: disabled + +If the merged config contains an invalid value (anything other than `true` or `false`), a warning is printed and check mode falls back to disabled unless `--check` is passed. + +**Check mode behavior**: +- `check-draft`: Runs after the relevance check and before plan generation. Semantic contradiction and ambiguity checks run through native sub-agents. Blocker findings are presented to the user for clarification before the plan is generated. +- `check-plan`: Runs after the plan body is written. Deterministic schema validation runs locally; semantic consistency, ambiguity, and conditional draft-vs-plan drift checks run through native sub-agents. Findings are repaired using source-of-truth precedence: explicit user answers > original draft text > repository facts > safe leader-agent judgment. +- `repair`: High-priority-source rewrites (clarifications, draft text, repo facts) are applied silently. Leader-agent-judgment rewrites require a diff preview and user confirmation. +- `recheck`: Controlled by the existing `plan_check_recheck` config. When enabled and at least one repair changed plan bytes, `check-plan` runs once more in check-only mode (no further repair). + +**Artifacts**: When check mode is enabled, diagnostic artifacts are written to `.humanize/gen-plan-check//`: +- `draft-findings.json` -- findings from the draft-check phase +- `plan-findings.json` -- findings from the plan-check phase +- `report.md` -- combined human-readable report +- `resolution.json` -- resolution records for repaired findings +- `backup/` -- backup of the plan before each accepted rewrite +- `tmp/` -- ephemeral temp files (deleted before the command exits) + ### refine-plan ``` @@ -253,6 +301,59 @@ for getting a second opinion, reviewing a design, or asking domain-specific ques Responses are saved to `.humanize/skill//` with `input.md`, `output.md`, and `metadata.md` for reference. +### plan-check + +``` +/humanize:plan-check --plan [OPTIONS] + +OPTIONS: + --plan Path to the plan file to check (required) + --recheck Re-run plan-check after an accepted rewrite (default: disabled) + --alt-language Generate an additional report in the specified language + -h, --help Show help message +``` + +The `plan-check` command analyzes a plan file for internal contradictions, +ambiguities, and structural schema compliance before implementation begins. + +**What it checks:** +- **Contradictions**: Statements that assign two incompatible definitions to the same symbol or mechanism +- **Ambiguities**: Statements that admit multiple valid interpretations affecting execution +- **Schema compliance**: Required sections, duplicate AC IDs, invalid task tags, orphaned dependencies, circular dependencies, appendix drift + +**How it works:** +1. Runs deterministic schema validation using the canonical `gen-plan-template.md` +2. Spawns a sub-agent to perform semantic contradiction and ambiguity detection +3. Generates a structured findings report under `.humanize/plan-check//` +4. Presents contradictions to the user for interactive resolution +5. Presents ambiguities atomically for clarification +6. Optionally rewrites the plan file in-place after user confirmation + +By default, `plan-check` does not re-run after an accepted rewrite. Use `--recheck` +for a single run, or set `"plan_check_recheck": true` in Humanize config to make +recheck the default for the project or user. + +**Report artifacts:** +- `report.md` -- Human-readable findings report +- `findings.json` -- Machine-readable findings (schema v1.0) +- `resolution.json` -- Post-resolution report with user clarifications (if applicable) +- `backup/plan.md.bak` -- Backup of the original plan (if rewrite is performed) + +**Exit codes:** +- `0` -- No unresolved blockers remain +- `1` -- Unresolved blockers exist + +**Example:** +```bash +/humanize:plan-check --plan .humanize/plans/my-plan.md +``` + +In Codex, use the installed skill form: + +```bash +$humanize-plan-check --plan .humanize/plans/my-plan.md +``` + ## Configuration Humanize uses a 4-layer config hierarchy (lowest to highest priority): @@ -272,6 +373,7 @@ Current built-in keys: | `agent_teams` | `false` | Project-level default for agent teams workflow | | `alternative_plan_language` | `""` | Optional translated plan variant language; supported values include `Chinese`, `Korean`, `Japanese`, `Spanish`, `French`, `German`, `Portuguese`, `Russian`, `Arabic`, or ISO codes like `zh` | | `gen_plan_mode` | `discussion` | Default plan-generation mode | +| `plan_check_recheck` | `false` | Re-run `plan-check` after an accepted rewrite; CLI `--recheck` enables this for a single run | ### Codex Model Configuration diff --git a/hooks/lib/loop-common.sh b/hooks/lib/loop-common.sh index 5726b23b..154d6bde 100755 --- a/hooks/lib/loop-common.sh +++ b/hooks/lib/loop-common.sh @@ -1030,6 +1030,23 @@ _normalize_path() { echo "$1" | sed 's|/\./|/|g; s|//|/|g' } +# Compare paths using the same normalization and parent-canonicalization used by +# loop-file validators. This accepts symlinked project prefixes without +# dereferencing the target filename itself. +loop_paths_match() { + local left="$1" + local right="$2" + local normalized_left normalized_right + local canonical_left canonical_right + + normalized_left=$(_normalize_path "$left") + normalized_right=$(_normalize_path "$right") + canonical_left=$(canonicalize_path_prefix "$normalized_left") + canonical_right=$(canonicalize_path_prefix "$normalized_right") + + [[ "${canonical_left:-$normalized_left}" == "${canonical_right:-$normalized_right}" ]] +} + # Check if cancel operation is authorized via signal file # Usage: is_cancel_authorized "$active_loop_dir" "$command_lower" # Returns: 0 if authorized, non-zero otherwise @@ -1522,7 +1539,7 @@ Use Write or Edit on: {{CORRECT_PATH}} Rules: - Keep the **IMMUTABLE SECTION** unchanged -- Do not modify `goal-tracker.md` via Bash +- Do not modify \`goal-tracker.md\` via Bash - Do not write to an old loop session's tracker" load_and_render_safe "$TEMPLATE_DIR" "block/goal-tracker-modification.md" "$fallback" \ diff --git a/hooks/lib/methodology-analysis.sh b/hooks/lib/methodology-analysis.sh index a95e81af..d6edd384 100644 --- a/hooks/lib/methodology-analysis.sh +++ b/hooks/lib/methodology-analysis.sh @@ -99,19 +99,19 @@ When done, write a completion note to $LOOP_DIR/methodology-analysis-done.md." return 0 } -# Complete the methodology analysis phase +# Check whether the methodology analysis phase is ready to complete # -# Checks the completion artifact, reads the original exit reason, renames the -# state file to the appropriate terminal state, and cleans up marker files. +# Checks the completion artifact, analysis report, and original exit reason +# without changing loop state. # # Globals read: # LOOP_DIR - path to the loop directory # # Returns: -# 0 - completion successful, caller should exit 0 (allow exit) +# 0 - completion gates passed # 1 - incomplete (done marker missing/empty, report missing, or exit reason invalid) # -complete_methodology_analysis() { +methodology_analysis_ready_to_complete() { local done_file="$LOOP_DIR/methodology-analysis-done.md" local report_file="$LOOP_DIR/methodology-analysis-report.md" @@ -162,6 +162,30 @@ complete_methodology_analysis() { ;; esac + return 0 +} + +# Complete the methodology analysis phase +# +# Checks readiness, then renames the state file to the appropriate terminal +# state and cleans up marker files. +# +# Globals read: +# LOOP_DIR - path to the loop directory +# +# Returns: +# 0 - completion successful, caller should exit 0 (allow exit) +# 1 - incomplete (done marker missing/empty, report missing, or exit reason invalid) +# +complete_methodology_analysis() { + if ! methodology_analysis_ready_to_complete; then + return 1 + fi + + local exit_reason + exit_reason=$(cat "$LOOP_DIR/.methodology-exit-reason" 2>/dev/null || echo "") + exit_reason=$(echo "$exit_reason" | tr -d '[:space:]') + # Rename methodology-analysis-state.md to the terminal state local target_name="${exit_reason}-state.md" mv "$LOOP_DIR/methodology-analysis-state.md" "$LOOP_DIR/$target_name" diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 0c191d4c..b3462cd0 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -615,7 +615,7 @@ fi # if .humanize is tracked, which would block exit before reaching this handler. if [[ "$IS_METHODOLOGY_ANALYSIS_PHASE" == "true" ]]; then - if complete_methodology_analysis; then + if methodology_analysis_ready_to_complete; then # Before allowing the terminal state transition, re-verify the # working tree is clean. The main git-clean gate below is skipped # in the methodology branch, so without this check, tracked edits @@ -654,6 +654,9 @@ Please commit all changes before allowing the loop to exit. exit 0 fi fi + if ! complete_methodology_analysis; then + block_methodology_analysis_incomplete + fi # Analysis complete and tree clean, allow exit exit 0 else diff --git a/hooks/loop-edit-validator.sh b/hooks/loop-edit-validator.sh index fb9f8e1b..9a177021 100755 --- a/hooks/loop-edit-validator.sh +++ b/hooks/loop-edit-validator.sh @@ -203,10 +203,8 @@ fi if is_goal_tracker_path "$FILE_PATH_LOWER"; then GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" - NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") - NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") - if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + if ! loop_paths_match "$FILE_PATH" "$GOAL_TRACKER_PATH"; then goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 exit 2 fi @@ -272,4 +270,24 @@ Edit: {{CORRECT_PATH}}" fi fi +# ======================================== +# Validate Directory Path +# ======================================== + +if [[ -n "${CLAUDE_FILENAME:-}" ]]; then + CORRECT_PATH="$ACTIVE_LOOP_DIR/$CLAUDE_FILENAME" + _EDIT_FILE_REAL=$(canonicalize_path_prefix "$FILE_PATH") + _EDIT_CORRECT_REAL=$(canonicalize_path_prefix "$CORRECT_PATH") + if [[ "${_EDIT_FILE_REAL:-$FILE_PATH}" != "${_EDIT_CORRECT_REAL:-$CORRECT_PATH}" ]]; then + FALLBACK="# Wrong Directory Path + +You tried to {{ACTION}} {{FILE_PATH}} but the correct path is {{CORRECT_PATH}}" + load_and_render_safe "$TEMPLATE_DIR" "block/wrong-directory-path.md" "$FALLBACK" \ + "ACTION=edit" \ + "FILE_PATH=$FILE_PATH" \ + "CORRECT_PATH=$CORRECT_PATH" >&2 + exit 2 + fi +fi + exit 0 diff --git a/hooks/loop-read-validator.sh b/hooks/loop-read-validator.sh index b812288a..6146530f 100755 --- a/hooks/loop-read-validator.sh +++ b/hooks/loop-read-validator.sh @@ -226,10 +226,8 @@ fi if [[ "$IS_GOAL_TRACKER" == "true" ]] && [[ "$IN_HUMANIZE_LOOP_DIR" == "true" ]]; then CORRECT_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" - NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") - NORMALIZED_CORRECT_PATH=$(_normalize_path "$CORRECT_PATH") - if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_CORRECT_PATH" ]]; then + if ! loop_paths_match "$FILE_PATH" "$CORRECT_PATH"; then FALLBACK="# Wrong Goal Tracker Path Read the active loop goal tracker instead: {{CORRECT_PATH}}" diff --git a/hooks/loop-write-validator.sh b/hooks/loop-write-validator.sh index 1d8f1e31..364cd23b 100755 --- a/hooks/loop-write-validator.sh +++ b/hooks/loop-write-validator.sh @@ -252,10 +252,8 @@ fi if is_goal_tracker_path "$FILE_PATH_LOWER"; then GOAL_TRACKER_PATH="$ACTIVE_LOOP_DIR/goal-tracker.md" - NORMALIZED_FILE_PATH=$(_normalize_path "$FILE_PATH") - NORMALIZED_GOAL_TRACKER_PATH=$(_normalize_path "$GOAL_TRACKER_PATH") - if [[ "$NORMALIZED_FILE_PATH" != "$NORMALIZED_GOAL_TRACKER_PATH" ]]; then + if ! loop_paths_match "$FILE_PATH" "$GOAL_TRACKER_PATH"; then goal_tracker_blocked_message "$CURRENT_ROUND" "$GOAL_TRACKER_PATH" >&2 exit 2 fi diff --git a/scripts/ask-codex.sh b/scripts/ask-codex.sh index fee439a8..423199d6 100755 --- a/scripts/ask-codex.sh +++ b/scripts/ask-codex.sh @@ -242,6 +242,11 @@ EOF # ======================================== # Build codex exec arguments (same pattern as loop-codex-stop-hook.sh) +CODEX_DISABLE_HOOKS_ARGS=() +if codex --help 2>&1 | grep -q -- '--disable'; then + CODEX_DISABLE_HOOKS_ARGS=(--disable codex_hooks) +fi + CODEX_EXEC_ARGS=("-m" "$CODEX_MODEL") if [[ -n "$CODEX_EFFORT" ]]; then CODEX_EXEC_ARGS+=("-c" "model_reasoning_effort=${CODEX_EFFORT}") @@ -269,7 +274,7 @@ CODEX_STDERR_FILE="$CACHE_DIR/codex-run.log" echo "# Working directory: $PROJECT_ROOT" echo "# Timeout: $CODEX_TIMEOUT seconds" echo "" - echo "codex exec ${CODEX_EXEC_ARGS[*]} \"\"" + echo "codex exec ${CODEX_DISABLE_HOOKS_ARGS[*]} ${CODEX_EXEC_ARGS[*]} \"\"" echo "" echo "# Prompt content:" echo "$QUESTION" @@ -294,7 +299,7 @@ epoch_to_iso() { START_TIME=$(date +%s) CODEX_EXIT_CODE=0 -printf '%s' "$QUESTION" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_EXEC_ARGS[@]}" - \ +printf '%s' "$QUESTION" | run_with_timeout "$CODEX_TIMEOUT" codex exec "${CODEX_DISABLE_HOOKS_ARGS[@]}" "${CODEX_EXEC_ARGS[@]}" - \ > "$CODEX_STDOUT_FILE" 2> "$CODEX_STDERR_FILE" || CODEX_EXIT_CODE=$? END_TIME=$(date +%s) diff --git a/scripts/bitlesson-select.sh b/scripts/bitlesson-select.sh index fd19a445..38f08142 100755 --- a/scripts/bitlesson-select.sh +++ b/scripts/bitlesson-select.sh @@ -11,6 +11,14 @@ source "$SCRIPT_DIR/lib/config-loader.sh" source "$SCRIPT_DIR/lib/model-router.sh" source "$SCRIPT_DIR/../hooks/lib/project-root.sh" +# Source portable timeout wrapper +source "$SCRIPT_DIR/portable-timeout.sh" + +# Source shared loop library before config fallback expansion so +# DEFAULT_CODEX_MODEL is initialized under set -u. +HOOKS_LIB_DIR="$(cd "$SCRIPT_DIR/../hooks/lib" && pwd)" +source "$HOOKS_LIB_DIR/loop-common.sh" + PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" PROJECT_ROOT="$(resolve_project_root)" || { echo "Error: Cannot determine project root." >&2 @@ -25,13 +33,6 @@ CODEX_FALLBACK_MODEL="${CODEX_FALLBACK_MODEL:-$DEFAULT_CODEX_MODEL}" PROVIDER_MODE="$(get_config_value "$MERGED_CONFIG" "provider_mode")" PROVIDER_MODE="${PROVIDER_MODE:-auto}" -# Source portable timeout wrapper -source "$SCRIPT_DIR/portable-timeout.sh" - -# Source shared loop library (kept for consistency with ask-codex.sh) -HOOKS_LIB_DIR="$(cd "$SCRIPT_DIR/../hooks/lib" && pwd)" -source "$HOOKS_LIB_DIR/loop-common.sh" - usage() { cat <<'USAGE_EOF' >&2 Usage: diff --git a/scripts/install-codex-hooks.sh b/scripts/install-codex-hooks.sh index 407fe668..a3474b31 100755 --- a/scripts/install-codex-hooks.sh +++ b/scripts/install-codex-hooks.sh @@ -77,7 +77,12 @@ require_codex_hooks_support() { die "Codex CLI with native hooks support is required. Install Codex 0.114.0+ first." fi - if ! codex features list 2>/dev/null | grep -qE '^codex_hooks[[:space:]]'; then + local features_output + if ! features_output="$(codex features list 2>/dev/null)"; then + die "Installed Codex CLI does not expose the codex_hooks feature. Humanize Codex install requires Codex 0.114.0+." + fi + + if ! grep -Eq '^codex_hooks[[:space:]]' <<<"$features_output"; then die "Installed Codex CLI does not expose the codex_hooks feature. Humanize Codex install requires Codex 0.114.0+." fi } @@ -188,8 +193,6 @@ log "codex config dir: $CODEX_CONFIG_DIR" log "runtime root: $RUNTIME_ROOT" log "hooks file: $HOOKS_FILE" -require_codex_hooks_support - if [[ "$DRY_RUN" == "true" ]]; then log "DRY-RUN merge $HOOKS_TEMPLATE -> $HOOKS_FILE" if [[ "$ENABLE_FEATURE" == "true" ]]; then @@ -198,6 +201,8 @@ if [[ "$DRY_RUN" == "true" ]]; then exit 0 fi +require_codex_hooks_support + merge_hooks_json "$HOOKS_FILE" "$HOOKS_TEMPLATE" "$RUNTIME_ROOT" enable_feature "$CODEX_CONFIG_DIR" diff --git a/scripts/install-skill.sh b/scripts/install-skill.sh index 3106201d..4fb20a9e 100755 --- a/scripts/install-skill.sh +++ b/scripts/install-skill.sh @@ -3,7 +3,7 @@ # Install/upgrade Humanize skills for Kimi and/or Codex. # # What this does: -# 1) Sync skills/{humanize,humanize-gen-plan,humanize-rlcr} to target skills dir(s) +# 1) Sync Humanize skills to target skills dir(s) # 2) Copy runtime dependencies into /humanize/{scripts,hooks,prompt-template} # 3) Hydrate SKILL.md command paths with concrete runtime root paths # @@ -39,6 +39,7 @@ DRY_RUN="false" SKILL_NAMES=( "humanize" "humanize-gen-plan" + "humanize-plan-check" "humanize-refine-plan" "humanize-rlcr" ) @@ -273,17 +274,17 @@ install_codex_user_config() { local user_config_file="$user_config_dir/config.json" local default_config_file="$runtime_root/config/default_config.json" + if [[ "$DRY_RUN" == "true" ]]; then + log "DRY-RUN seed Codex-friendly BitLesson config in $user_config_file" + return + fi + [[ -f "$default_config_file" ]] || die "missing default config: $default_config_file" if ! command -v python3 >/dev/null 2>&1; then die "python3 is required to update Humanize user config for Codex installs" fi - if [[ "$DRY_RUN" == "true" ]]; then - log "DRY-RUN seed Codex-friendly BitLesson config in $user_config_file" - return - fi - mkdir -p "$user_config_dir" python3 - "$default_config_file" "$user_config_file" "$install_target" <<'PY' diff --git a/scripts/lib/gen-plan-check-mode.sh b/scripts/lib/gen-plan-check-mode.sh new file mode 100755 index 00000000..d2a66fef --- /dev/null +++ b/scripts/lib/gen-plan-check-mode.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# gen-plan-check-mode.sh +# Resolves the effective check-mode flag for gen-plan from CLI flags and merged config. +# +# Usage (sourced): +# source "${CLAUDE_PLUGIN_ROOT}/scripts/lib/gen-plan-check-mode.sh" +# _gen_plan_resolve_check_mode "$CHECK_FLAG" "$NO_CHECK_FLAG" "$CONFIG_GEN_PLAN_CHECK_RAW" +# echo "Effective check mode: $EFFECTIVE_CHECK_MODE" +# +# Resolution priority (highest to lowest): +# 1. --no-check flag -> EFFECTIVE_CHECK_MODE=false +# 2. --check flag -> EFFECTIVE_CHECK_MODE=true +# 3. Merged config gen_plan_check value (true/false) +# 4. Default -> EFFECTIVE_CHECK_MODE=false +# +# Invalid config values warn and fall back to disabled unless --check is passed. + +# Source guard +[[ -n "${_GEN_PLAN_CHECK_MODE_LOADED:-}" ]] && return 0 2>/dev/null || true +_GEN_PLAN_CHECK_MODE_LOADED=1 + +set -euo pipefail + +# Resolve effective check mode from CLI flags and merged config. +# Inputs: +# $1 - CHECK_FLAG ("true" or "false") +# $2 - NO_CHECK_FLAG ("true" or "false") +# $3 - CONFIG_GEN_PLAN_CHECK_RAW (raw config value string, may be empty) +# Outputs: +# Sets global EFFECTIVE_CHECK_MODE to "true" or "false" +# Prints warnings to stderr when appropriate +_gen_plan_resolve_check_mode() { + local check_flag="${1:-false}" + local no_check_flag="${2:-false}" + local config_raw="${3:-}" + + # Priority 1: --no-check always wins + if [[ "$no_check_flag" == "true" ]]; then + EFFECTIVE_CHECK_MODE="false" + return 0 + fi + + # Priority 2: --check flag + if [[ "$check_flag" == "true" ]]; then + EFFECTIVE_CHECK_MODE="true" + return 0 + fi + + # Priority 3: merged config value + local config_normalized="" + if [[ -n "$config_raw" ]]; then + config_normalized="$(printf '%s' "$config_raw" | tr '[:upper:]' '[:lower:]')" + fi + + if [[ "$config_normalized" == "true" ]]; then + EFFECTIVE_CHECK_MODE="true" + return 0 + fi + + if [[ "$config_normalized" == "false" || -z "$config_raw" ]]; then + EFFECTIVE_CHECK_MODE="false" + return 0 + fi + + # Invalid config value: warn and fall back to disabled + # (config_raw is non-empty and not true/false) + echo "Warning: unsupported gen_plan_check \"${config_raw}\". Expected true or false. Check mode is disabled unless --check is passed." >&2 + EFFECTIVE_CHECK_MODE="false" + return 0 +} diff --git a/scripts/lib/plan-check-common.sh b/scripts/lib/plan-check-common.sh new file mode 100644 index 00000000..d74fa348 --- /dev/null +++ b/scripts/lib/plan-check-common.sh @@ -0,0 +1,1203 @@ +#!/usr/bin/env bash +# plan-check-common.sh +# Shared utilities for the plan-check command. +# Deterministic only: NO agents, NO LLMs, NO user interaction. +# +# Provides: +# - Report directory initialization +# - findings.json schema v1.0 assembly +# - Markdown report formatting +# - Backup and atomic write helpers +# - Deterministic schema validation (required core sections, AC IDs, optional +# task tags/dependencies when Task Breakdown is present, appendix drift) +# +set -euo pipefail + +# Source guard +[[ -n "${_PLAN_CHECK_COMMON_LOADED:-}" ]] && return 0 2>/dev/null || true +_PLAN_CHECK_COMMON_LOADED=1 + +# ======================================== +# Internal Helpers +# ======================================== + +_plan_check_warn() { + echo "Warning: $*" >&2 +} + +_plan_check_error() { + echo "Error: $*" >&2 +} + +_plan_check_json_string() { + python3 -c 'import json, sys; print(json.dumps(sys.argv[1]))' "${1:-}" +} + +_plan_check_json_array() { + python3 -c 'import json, sys; print(json.dumps(sys.argv[1:]))' "$@" +} + +_plan_check_build_finding() { + local fid="$1" + local severity="$2" + local category="$3" + local source_checker="$4" + local section="$5" + local fragment="$6" + local evidence="$7" + local explanation="$8" + local suggested_resolution="$9" + local affected_acs_json="${10:-[]}" + local affected_tasks_json="${11:-[]}" + + python3 - "$fid" "$severity" "$category" "$source_checker" "$section" "$fragment" "$evidence" "$explanation" "$suggested_resolution" "$affected_acs_json" "$affected_tasks_json" <<'PY' +import json +import sys + +( + fid, + severity, + category, + source_checker, + section, + fragment, + evidence, + explanation, + suggested_resolution, + affected_acs_json, + affected_tasks_json, +) = sys.argv[1:12] + +try: + affected_acs = json.loads(affected_acs_json) +except Exception: + affected_acs = [] +try: + affected_tasks = json.loads(affected_tasks_json) +except Exception: + affected_tasks = [] +if not isinstance(affected_acs, list): + affected_acs = [] +if not isinstance(affected_tasks, list): + affected_tasks = [] + +print(json.dumps({ + "id": fid, + "severity": severity, + "category": category, + "source_checker": source_checker, + "location": { + "section": section, + "fragment": fragment, + }, + "evidence": evidence, + "explanation": explanation, + "suggested_resolution": suggested_resolution, + "affected_acs": affected_acs, + "affected_tasks": affected_tasks, +}, separators=(",", ":"))) +PY +} + +# Global finding ID counter (incremented by _plan_check_next_fid) +_PLAN_CHECK_FID_COUNTER=0 +_PLAN_CHECK_LAST_FID="" + +# Generate the next finding ID. +# Increments the global counter and stores the result in _PLAN_CHECK_LAST_FID. +# Does NOT use stdout to avoid subshell issues with local+command substitution. +_plan_check_next_fid() { + _PLAN_CHECK_FID_COUNTER=$((_PLAN_CHECK_FID_COUNTER + 1)) + _PLAN_CHECK_LAST_FID="$(printf 'F-%03d' "$_PLAN_CHECK_FID_COUNTER")" +} + +# ======================================== +# Report Directory +# ======================================== + +# Create a timestamped report directory under the given base path. +# Outputs the directory path on stdout. +plan_check_init_report_dir() { + local base_dir="${1:-}" + if [[ -z "$base_dir" ]]; then + _plan_check_error "plan_check_init_report_dir requires a base directory path" + return 1 + fi + local ts + ts="$(date +%Y-%m-%d_%H-%M-%S)" + local report_dir="$base_dir/$ts" + mkdir -p "$report_dir/backup" + printf '%s\n' "$report_dir" +} + +# ======================================== +# findings.json Assembly (Schema v1.0) +# ======================================== +# +# Schema: +# { +# "version": "1.0", +# "check_run": { "timestamp": "ISO8601", "plan_path": "...", "plan_hash": "SHA256", +# "model": "...", "config": {}, "exit_code": 0 }, +# "findings": [ +# { "id": "F-001", "severity": "blocker|warning|info", +# "category": "contradiction|ambiguity|schema|dependency|appendix-drift|rewrite-risk|runtime-error", +# "source_checker": "plan-consistency-checker|plan-ambiguity-checker|plan-schema-validator", +# "location": { "section": "...", "fragment": "..." }, +# "evidence": "...", "explanation": "...", "suggested_resolution": "...", +# "affected_acs": ["AC-1"], "affected_tasks": ["task1"] } +# ], +# "summary": { "total": 0, "blockers": 0, "warnings": 0, "infos": 0, +# "status": "pass|fail|needs_clarification" } +# } + +plan_check_assemble_findings_json() { + local plan_path="${1:-}" + local plan_hash="${2:-}" + local model="${3:-}" + local config_json="${4:-}" + if [[ -z "$config_json" ]]; then + config_json="{}" + fi + local exit_code="${5:-0}" + local findings_json_array="${6:-[]}" + + local timestamp + timestamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + + # Count severities via JSON parsing to avoid false matches in non-severity fields + local total blockers warnings infos status + local severity_counts + severity_counts="$(python3 -c ' +import json, sys +try: + findings = json.loads(sys.argv[1]) + if not isinstance(findings, list): + findings = [] +except Exception: + findings = [] +blockers = sum(1 for f in findings if f.get("severity") == "blocker") +warnings = sum(1 for f in findings if f.get("severity") == "warning") +infos = sum(1 for f in findings if f.get("severity") == "info") +print(f"{len(findings)} {blockers} {warnings} {infos}") +' "$findings_json_array")" + total="$(printf '%s' "$severity_counts" | awk '{print $1}')" + blockers="$(printf '%s' "$severity_counts" | awk '{print $2}')" + warnings="$(printf '%s' "$severity_counts" | awk '{print $3}')" + infos="$(printf '%s' "$severity_counts" | awk '{print $4}')" + + if [[ "$blockers" -gt 0 ]]; then + status="fail" + else + status="pass" + fi + + local timestamp_json plan_path_json plan_hash_json model_json status_json + timestamp_json="$(_plan_check_json_string "$timestamp")" + plan_path_json="$(_plan_check_json_string "$plan_path")" + plan_hash_json="$(_plan_check_json_string "$plan_hash")" + model_json="$(_plan_check_json_string "$model")" + status_json="$(_plan_check_json_string "$status")" + + cat << JSON_EOF +{ + "version": "1.0", + "check_run": { + "timestamp": $timestamp_json, + "plan_path": $plan_path_json, + "plan_hash": $plan_hash_json, + "model": $model_json, + "config": $config_json, + "exit_code": $exit_code + }, + "findings": $findings_json_array, + "summary": { + "total": $total, + "blockers": $blockers, + "warnings": $warnings, + "infos": $infos, + "status": $status_json + } +} +JSON_EOF +} + +# Build a findings.json object that includes a resolutions array. +# Computes final status from unresolved blockers (original blockers minus resolved/answered). +# Usage: plan_check_build_resolved_json +plan_check_build_resolved_json() { + local plan_path="${1:-}" + local plan_hash="${2:-}" + local model="${3:-}" + local config_json="${4:-}" + if [[ -z "$config_json" ]]; then + config_json="{}" + fi + local exit_code="${5:-0}" + local findings_json_array="${6:-[]}" + local resolutions_json_array="${7:-[]}" + + local timestamp + timestamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + + # Use Python for accurate resolution-aware summary computation + python3 -c ' +import json, sys + +findings = json.loads(sys.argv[1]) +resolutions = json.loads(sys.argv[2]) +plan_path = sys.argv[3] +plan_hash = sys.argv[4] +model = sys.argv[5] +config = sys.argv[6] +exit_code = int(sys.argv[7]) +timestamp = sys.argv[8] + +total = len(findings) +blockers = sum(1 for f in findings if f.get("severity") == "blocker") +warnings = sum(1 for f in findings if f.get("severity") == "warning") +infos = sum(1 for f in findings if f.get("severity") == "info") + +# Build map: finding_id -> category +finding_categories = {f.get("id", ""): f.get("category", "") for f in findings} + +# Determine which blocker findings are resolved or answered. +# Category-aware: only contradiction findings can be cleared by contradiction_resolution, +# and only ambiguity findings by ambiguity_answer. Schema/dependency/rewrite-risk/runtime-error +# blockers remain unresolved regardless of resolution records. +valid_resolutions = { + ("contradiction", "contradiction_resolution"), + ("ambiguity", "ambiguity_answer"), + ("draft-plan-drift", "drift_resolution"), +} +resolved_ids = set() +for r in resolutions: + rid = r.get("finding_id", "") + rtype = r.get("resolution_type", "") + fcat = finding_categories.get(rid, "") + if rid and (fcat, rtype) in valid_resolutions: + resolved_ids.add(rid) + +unresolved_blockers = sum( + 1 for f in findings + if f.get("severity") == "blocker" and f.get("id", "") not in resolved_ids +) + +status = "pass" if unresolved_blockers == 0 else "fail" + +# Annotate each finding with its resolution state +for f in findings: + fid = f.get("id", "") + f["resolution_state"] = "resolved" if fid in resolved_ids else "unresolved" + +output = { + "version": "1.0", + "check_run": { + "timestamp": timestamp, + "plan_path": plan_path, + "plan_hash": plan_hash, + "model": model, + "config": json.loads(config), + "exit_code": exit_code + }, + "findings": findings, + "resolutions": resolutions, + "summary": { + "total": total, + "blockers": blockers, + "warnings": warnings, + "infos": infos, + "unresolved_blockers": unresolved_blockers, + "status": status + } +} + +print(json.dumps(output, indent=2)) +' "$findings_json_array" "$resolutions_json_array" "$plan_path" "$plan_hash" "$model" "$config_json" "$exit_code" "$timestamp" +} + +# ======================================== +# Markdown Report Formatting +# ======================================== + +plan_check_format_report_header() { + local plan_path="$1" + local timestamp + timestamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + cat << MD_EOF +# Plan Check Report + +- **Plan**: \`$plan_path\` +- **Timestamp**: $timestamp +- **Checker**: plan-schema-validator + +MD_EOF +} + +plan_check_format_finding_md() { + local id="$1" + local severity="$2" + local category="$3" + local section="$4" + local fragment="$5" + local evidence="$6" + local explanation="$7" + local suggested_resolution="$8" + + cat << MD_EOF +## $id ($severity) + +- **Category**: $category +- **Section**: $section +- **Fragment**: $fragment +- **Evidence**: $evidence +- **Explanation**: $explanation +- **Suggested Resolution**: $suggested_resolution + +MD_EOF +} + +plan_check_format_resolution_report() { + local plan_path="$1" + local timestamp + timestamp="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + cat << MD_EOF +# Plan Check Resolution Report + +- **Plan**: \`$plan_path\` +- **Timestamp**: $timestamp +- **Status**: All blockers resolved + +MD_EOF +} + +# ======================================== +# Backup and Atomic Write +# ======================================== + +# Create a backup of the plan file in the report directory. +plan_check_backup_plan() { + local plan_path="$1" + local report_dir="$2" + if [[ ! -f "$plan_path" ]]; then + _plan_check_error "Cannot backup missing file: $plan_path" + return 1 + fi + mkdir -p "$report_dir/backup" + local backup_path="$report_dir/backup/$(basename "$plan_path").bak" + cp "$plan_path" "$backup_path" + printf '%s\n' "$backup_path" +} + +# Atomically write content to a file using a temp file in the same directory. +plan_check_atomic_write() { + local target_path="$1" + local content="$2" + local target_dir + target_dir="$(dirname "$target_path")" + local tmpfile + tmpfile="$(mktemp "$target_dir/.plan-check-write.XXXXXX")" + if ! printf '%s\n' "$content" > "$tmpfile"; then + rm -f "$tmpfile" + return 1 + fi + + if [[ -e "$target_path" ]]; then + if chmod --reference="$target_path" "$tmpfile" 2>/dev/null; then + : + else + local target_mode + target_mode="$(stat -c '%a' "$target_path" 2>/dev/null || stat -f '%Lp' "$target_path" 2>/dev/null || true)" + if [[ -n "$target_mode" ]]; then + chmod "$target_mode" "$tmpfile" 2>/dev/null || true + fi + fi + chown --reference="$target_path" "$tmpfile" 2>/dev/null || true + fi + + if ! mv "$tmpfile" "$target_path"; then + rm -f "$tmpfile" + return 1 + fi +} + +# ======================================== +# Schema Template Parsing +# ======================================== + +# Parse the canonical schema template and extract required core sections. +# Outputs one section name per line on stdout. +# Returns non-zero if the template is malformed (missing required markers). +_plan_check_parse_schema_template() { + local template_path="$1" + local required_markers=("Goal Description" "Acceptance Criteria" "Path Boundaries") + local found_markers=() + + # Extract ## headings from the template + local sections + sections="$(sed -n -E 's/^##[[:space:]]+(.+)$/\1/p' "$template_path")" + + # Check for required markers + for marker in "${required_markers[@]}"; do + if printf '%s\n' "$sections" | grep -qF "$marker"; then + found_markers+=("$marker") + fi + done + + # All required markers must be present; any missing heading means the template is malformed + if [[ ${#found_markers[@]} -ne ${#required_markers[@]} ]]; then + return 1 + fi + + printf '%s\n' "${found_markers[@]}" + return 0 +} + +# Normalize a Target AC cell by extracting AC-N or AC-N.M tokens and expanding ranges. +# Outputs one AC ID per line on stdout. +_plan_check_normalize_target_acs() { + local target_ac="$1" + + # Strip markdown bold + target_ac="$(printf '%s' "$target_ac" | sed 's/\*\*//g')" + + # Replace commas with newlines for uniform processing + target_ac="$(printf '%s' "$target_ac" | tr ',' '\n')" + + # Process each token + while IFS= read -r token; do + # Trim whitespace + token="$(printf '%s' "$token" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + [[ -z "$token" || "$token" == "-" ]] && continue + + # Check for range pattern: "AC-N through AC-M" + local start_ac end_ac + if [[ "$token" =~ AC-([0-9]+)[[:space:]]+through[[:space:]]+AC-([0-9]+) ]]; then + start_ac="${BASH_REMATCH[1]}" + end_ac="${BASH_REMATCH[2]}" + local i + for ((i = start_ac; i <= end_ac; i++)); do + printf 'AC-%d\n' "$i" + done + elif [[ "$token" =~ ^AC-[0-9]+(\.[0-9]+)?$ ]]; then + printf '%s\n' "$token" + fi + done <<< "$target_ac" +} + +# Extract defined AC IDs from the Acceptance Criteria section of a main plan body. +# Accepts bullet definitions such as `- AC-1:` / `- **AC-1**:` and sub-criteria +# such as `- AC-1.1:`. Mentions outside this section are intentionally ignored. +_plan_check_extract_defined_ac_ids() { + local main_body="$1" + + printf '%s\n' "$main_body" \ + | awk ' + /^##[[:space:]]+Acceptance Criteria[[:space:]]*$/ { in_ac=1; next } + in_ac && /^##[[:space:]]+/ { exit } + in_ac { print } + ' \ + | sed -n -E 's/^[[:space:]]*-[[:space:]]+(\*\*)?(AC-[0-9]+(\.[0-9]+)?)(\*\*)?[[:space:]]*:.*/\2/p' +} + +_plan_check_is_table_separator() { + local line="${1:-}" + [[ "$line" == *"|"* ]] || return 1 + + local cells=() + IFS='|' read -r -a cells <<< "$line" + + local saw_cell=0 + local cell + local trimmed + for cell in "${cells[@]}"; do + trimmed="$(printf '%s' "$cell" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + [[ -z "$trimmed" ]] && continue + if [[ ! "$trimmed" =~ ^:?-{3,}:?$ ]]; then + return 1 + fi + saw_cell=1 + done + + [[ "$saw_cell" -eq 1 ]] +} + +# Post-process ambiguity findings to assign/verify content-addressable stable IDs. +# Reads findings JSON array from stdin, outputs corrected JSON array on stdout. +plan_check_postprocess_ambiguity_ids() { + python3 -c ' +import json, hashlib, sys + +raw = sys.stdin.read() +try: + findings = json.loads(raw) +except Exception: + sys.stdout.write(raw) + sys.exit(0) + +for f in findings: + if f.get("category") == "ambiguity": + section = f.get("location", {}).get("section", "") + fragment = f.get("location", {}).get("fragment", "") + normalized = (section + "\n" + fragment).strip() + h = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:12] + f["id"] = "A-" + h + +json.dump(findings, sys.stdout, indent=2) +' 2>/dev/null || cat +} + +# ======================================== +# Deterministic Schema Validation +# ======================================== + +# Required core sections for a plan file (default fallback if template parsing fails). +# Task Breakdown is intentionally optional: Codex-generated gen-plan output may +# omit it, while Claude-generated plans can still include and validate it. +_PLAN_CHECK_REQUIRED_SECTIONS=( + "Goal Description" + "Acceptance Criteria" + "Path Boundaries" +) + +# Check if a section header exists in the plan file. +_plan_check_has_section() { + local plan_path="$1" + local section="$2" + grep -qF "## $section" "$plan_path" 2>/dev/null +} + +# Extract only the main plan body (before the original draft appendix). +_plan_check_extract_main_body() { + local plan_path="$1" + if grep -qF -- "--- Original Design Draft Start ---" "$plan_path" 2>/dev/null; then + sed '/^--- Original Design Draft Start ---$/,$d' "$plan_path" + else + cat "$plan_path" + fi +} + +# Extract only the appendix content between the draft markers. +# Returns the inner byte range: content after "--- Original Design Draft Start ---\n" +# and before "\n--- Original Design Draft End ---". +# If markers are missing, returns empty string with a non-fatal info log. +_plan_check_extract_appendix() { + local plan_path="$1" + python3 -c ' +import sys +path = sys.argv[1] +start_marker = b"--- Original Design Draft Start ---\n" +end_marker = b"\n--- Original Design Draft End ---" +try: + content = open(path, "rb").read() +except Exception: + sys.stderr.write("Info: could not read plan file\n") + sys.exit(0) +s = content.find(start_marker) +e = content.find(end_marker) +if s == -1 or e == -1 or e < s: + sys.stderr.write("Info: draft appendix markers not found in " + path + "\n") + sys.exit(0) +inner = content[s + len(start_marker):e] +sys.stdout.buffer.write(inner) +' "$plan_path" +} + +# Append a single finding JSON object to the findings file. +_plan_check_append_finding() { + local findings_file="$1" + local finding="$2" + printf '%s\n' "$finding" >> "$findings_file" +} + +# Validate that all required sections are present in the main plan body. +# Usage: plan_check_validate_required_sections [section1 section2 ...] +# If no sections are provided after findings_file, uses _PLAN_CHECK_REQUIRED_SECTIONS. +plan_check_validate_required_sections() { + local plan_path="$1" + local findings_file="$2" + shift 2 + + local sections_to_check=("$@") + if [[ ${#sections_to_check[@]} -eq 0 ]]; then + sections_to_check=("${_PLAN_CHECK_REQUIRED_SECTIONS[@]}") + fi + + # Extract main body to temp file for reliable repeated scanning + local main_body_file + main_body_file="$(mktemp)" + _plan_check_extract_main_body "$plan_path" > "$main_body_file" + + for section in "${sections_to_check[@]}"; do + if ! grep -qF "## $section" "$main_body_file" 2>/dev/null; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "schema" "plan-schema-validator" \ + "$section" "" \ + "Required section '$section' is missing from the main plan body." \ + "The canonical plan template requires this section in the main plan body (before the appendix). Appendix sections do not satisfy this requirement." \ + "Add the '$section' section to the main plan body." \ + "[]" "[]")" + _plan_check_append_finding "$findings_file" "$finding" + fi + done + + rm -f "$main_body_file" +} + +# Extract all defined AC IDs from the Acceptance Criteria section and detect duplicates. +# Supports canonical `- AC-N:` / `- AC-N.M:` syntax and bold `- **AC-N**:` / `- **AC-N.M**:` syntax. +# Usage: plan_check_validate_ac_ids +plan_check_validate_ac_ids() { + local plan_path="$1" + local findings_file="$2" + + # Only check definitions in the main plan body's Acceptance Criteria section, + # excluding the appendix and incidental AC mentions in other sections. + local main_body + main_body="$(_plan_check_extract_main_body "$plan_path")" + + local ac_ids + ac_ids="$(_plan_check_extract_defined_ac_ids "$main_body" | sort)" + + if [[ -z "$ac_ids" ]]; then + return 0 + fi + + # Find duplicates + local duplicates + duplicates="$(printf '%s\n' "$ac_ids" | uniq -d)" + + while IFS= read -r dup; do + [[ -z "$dup" ]] && continue + local count + count="$(printf '%s\n' "$ac_ids" | grep -Fxc "$dup")" + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "schema" "plan-schema-validator" \ + "Acceptance Criteria" "$dup" \ + "AC ID '$dup' appears $count times in the main plan body." \ + "Duplicate acceptance criterion IDs create ambiguity about which criteria tasks must satisfy." \ + "Rename duplicate AC IDs so each is unique." \ + "[]" "[]")" + _plan_check_append_finding "$findings_file" "$finding" + done <<< "$duplicates" +} + +# Validate that each task's Target AC references an existing AC ID. +# Usage: plan_check_validate_target_acs +plan_check_validate_target_acs() { + local plan_path="$1" + local findings_file="$2" + + # Extract only AC IDs defined in the Acceptance Criteria section. Incidental + # bold mentions elsewhere must not satisfy task Target AC validation. + local main_body + main_body="$(_plan_check_extract_main_body "$plan_path")" + + local ac_ids + ac_ids="$(_plan_check_extract_defined_ac_ids "$main_body" | sort -u)" + + # Extract task rows and their Target AC + local in_task_section=0 + local in_table=0 + + while IFS= read -r line; do + if [[ "$line" =~ ^##+[[:space:]]+Task[[:space:]]+Breakdown ]]; then + in_task_section=1 + continue + fi + if [[ "$in_task_section" -eq 1 && "$line" =~ ^##[[:space:]] && ! "$line" =~ ^### ]]; then + break + fi + if [[ "$in_task_section" -eq 1 ]] && _plan_check_is_table_separator "$line"; then + in_table=1 + continue + fi + if [[ "$in_task_section" -eq 1 && "$line" =~ Task[[:space:]]+ID ]]; then + continue + fi + if [[ "$in_task_section" -eq 1 && "$in_table" -eq 1 && "$line" =~ ^\| ]]; then + local cols + cols="$(printf '%s' "$line" | tr '|' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + local col_array=() + while IFS= read -r col; do + [[ -z "$col" ]] && continue + col_array+=("$col") + done <<< "$cols" + + # Expected columns: Task ID, Description, Target AC, Tag, Depends On + if [[ ${#col_array[@]} -lt 3 ]]; then + continue + fi + + local tid="${col_array[0]}" + local target_ac="${col_array[2]}" + + # Normalize target AC using range expansion + local normalized_acs + normalized_acs="$(_plan_check_normalize_target_acs "$target_ac")" + if [[ -z "$normalized_acs" ]]; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "dependency" "plan-schema-validator" \ + "Task Breakdown" "$tid" \ + "Task '$tid' has no parsable Target AC value: '$target_ac'." \ + "Every task must reference one or more acceptance criteria that exist in the main plan body." \ + "Set the Target AC for task '$tid' to one or more existing AC IDs." \ + "[]" "$(_plan_check_json_array "$tid")")" + _plan_check_append_finding "$findings_file" "$finding" + continue + fi + + while IFS= read -r single_ac; do + [[ -z "$single_ac" ]] && continue + + # Check if this AC exists + local found=0 + local ac + while IFS= read -r ac; do + [[ "$ac" == "$single_ac" ]] && found=1 + done <<< "$ac_ids" + + if [[ "$found" -eq 0 ]]; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "dependency" "plan-schema-validator" \ + "Task Breakdown" "$tid" \ + "Task '$tid' targets nonexistent AC '$single_ac'." \ + "Every task must reference an acceptance criterion that exists in the main plan body." \ + "Create AC '$single_ac' in the Acceptance Criteria section or change the target of task '$tid'." \ + "[]" "$(_plan_check_json_array "$tid")")" + _plan_check_append_finding "$findings_file" "$finding" + fi + done <<< "$normalized_acs" + fi + done <<< "$main_body" +} + +# Validate task tags in the Task Breakdown table. +# Usage: plan_check_validate_task_tags +plan_check_validate_task_tags() { + local plan_path="$1" + local findings_file="$2" + local main_body + main_body="$(_plan_check_extract_main_body "$plan_path")" + + # Find the Task Breakdown section and extract table rows + local in_task_section=0 + local in_table=0 + local task_rows=() + + while IFS= read -r line; do + # Detect Task Breakdown section + if [[ "$line" =~ ^##+[[:space:]]+Task[[:space:]]+Breakdown ]]; then + in_task_section=1 + continue + fi + # Stop at next ## section (but not ### subsections within Task Breakdown) + if [[ "$in_task_section" -eq 1 && "$line" =~ ^##[[:space:]] && ! "$line" =~ ^### ]]; then + break + fi + # Detect table separator row + if [[ "$in_task_section" -eq 1 ]] && _plan_check_is_table_separator "$line"; then + in_table=1 + continue + fi + # Skip header row (contains "Task ID") + if [[ "$in_task_section" -eq 1 && "$line" =~ Task[[:space:]]+ID ]]; then + continue + fi + # Collect table rows + if [[ "$in_task_section" -eq 1 && "$in_table" -eq 1 && "$line" =~ ^\| ]]; then + task_rows+=("$line") + fi + done <<< "$main_body" + + # Parse each row + for row in "${task_rows[@]}"; do + # Split by | and trim whitespace + local cols + cols="$(printf '%s' "$row" | tr '|' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + local col_array=() + while IFS= read -r col; do + [[ -z "$col" ]] && continue + col_array+=("$col") + done <<< "$cols" + + # Expected columns: Task ID, Description, Target AC, Tag, Depends On + if [[ ${#col_array[@]} -lt 4 ]]; then + continue + fi + + local tid="${col_array[0]}" + local tag="${col_array[3]}" + + # Strip markdown formatting from tag + tag="${tag//\`/}" + tag="${tag// /}" + + if [[ "$tag" != "coding" && "$tag" != "analyze" ]]; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "schema" "plan-schema-validator" \ + "Task Breakdown" "$tid" \ + "Task '$tid' has invalid routing tag: '$tag'." \ + "Each task must carry exactly one routing tag: either 'coding' (implemented by Claude) or 'analyze' (executed via Codex)." \ + "Set the tag to 'coding' or 'analyze' for task '$tid'." \ + "[]" "$(_plan_check_json_array "$tid")")" + _plan_check_append_finding "$findings_file" "$finding" + fi + done +} + +# Validate task dependencies: orphaned references and circular dependencies. +# Usage: plan_check_validate_dependencies +plan_check_validate_dependencies() { + local plan_path="$1" + local findings_file="$2" + local main_body + main_body="$(_plan_check_extract_main_body "$plan_path")" + + # Extract all task IDs and their dependencies from the Task Breakdown table + local in_task_section=0 + local in_table=0 + declare -A task_deps + declare -A all_tasks + + while IFS= read -r line; do + if [[ "$line" =~ ^##+[[:space:]]+Task[[:space:]]+Breakdown ]]; then + in_task_section=1 + continue + fi + if [[ "$in_task_section" -eq 1 && "$line" =~ ^##[[:space:]] && ! "$line" =~ ^### ]]; then + break + fi + if [[ "$in_task_section" -eq 1 ]] && _plan_check_is_table_separator "$line"; then + in_table=1 + continue + fi + if [[ "$in_task_section" -eq 1 && "$line" =~ Task[[:space:]]+ID ]]; then + continue + fi + if [[ "$in_task_section" -eq 1 && "$in_table" -eq 1 && "$line" =~ ^\| ]]; then + local cols + cols="$(printf '%s' "$line" | tr '|' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + local col_array=() + while IFS= read -r col; do + [[ -z "$col" ]] && continue + col_array+=("$col") + done <<< "$cols" + + if [[ ${#col_array[@]} -lt 5 ]]; then + continue + fi + + local tid="${col_array[0]}" + local deps="${col_array[4]}" + + all_tasks["$tid"]=1 + task_deps["$tid"]="$deps" + fi + done <<< "$main_body" + + # Check for orphaned dependencies + for tid in "${!task_deps[@]}"; do + local deps="${task_deps[$tid]}" + # Skip if deps is "-" or empty + [[ "$deps" == "-" || -z "$deps" ]] && continue + + # Split deps by comma + local dep_array=() + IFS=',' read -ra dep_array <<< "$deps" + for dep in "${dep_array[@]}"; do + dep="$(printf '%s' "$dep" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + [[ -z "$dep" ]] && continue + if [[ -z "${all_tasks[$dep]:-}" ]]; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "dependency" "plan-schema-validator" \ + "Task Breakdown" "$tid" \ + "Task '$tid' depends on nonexistent task '$dep'." \ + "Every task dependency must reference a task ID that exists in the Task Breakdown table." \ + "Create task '$dep' or remove the dependency from '$tid'." \ + "[]" "$(_plan_check_json_array "$tid")")" + _plan_check_append_finding "$findings_file" "$finding" + fi + done + done + + # Check for circular dependencies using DFS + local visited=() + local rec_stack=() + + _plan_check_has_cycle() { + local node="$1" + visited+=("$node") + rec_stack+=("$node") + + local deps="${task_deps[$node]:-}" + if [[ "$deps" != "-" && -n "$deps" ]]; then + local dep_array=() + IFS=',' read -ra dep_array <<< "$deps" + for dep in "${dep_array[@]}"; do + dep="$(printf '%s' "$dep" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + [[ -z "$dep" ]] && continue + + local in_visited=0 + for v in "${visited[@]}"; do + if [[ "$v" == "$dep" ]]; then + in_visited=1 + break + fi + done + + local in_rec=0 + for r in "${rec_stack[@]}"; do + if [[ "$r" == "$dep" ]]; then + in_rec=1 + break + fi + done + + if [[ "$in_visited" -eq 0 ]]; then + if _plan_check_has_cycle "$dep"; then + return 0 + fi + elif [[ "$in_rec" -eq 1 ]]; then + return 0 + fi + done + fi + + # Remove from rec_stack + local new_stack=() + for r in "${rec_stack[@]}"; do + if [[ "$r" != "$node" ]]; then + new_stack+=("$r") + fi + done + rec_stack=("${new_stack[@]}") + return 1 + } + + for tid in "${!all_tasks[@]}"; do + local in_visited=0 + for v in "${visited[@]}"; do + if [[ "$v" == "$tid" ]]; then + in_visited=1 + break + fi + done + if [[ "$in_visited" -eq 0 ]]; then + if _plan_check_has_cycle "$tid"; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "blocker" "dependency" "plan-schema-validator" \ + "Task Breakdown" "$tid" \ + "Circular dependency detected starting from task '$tid'." \ + "Tasks form a cycle in their dependency graph, making it impossible to determine an execution order." \ + "Break the cycle by removing at least one dependency or restructuring tasks." \ + "[]" "$(_plan_check_json_array "$tid")")" + _plan_check_append_finding "$findings_file" "$finding" + fi + fi + done +} + +# Check for drift between main plan and original draft appendix. +# Usage: plan_check_check_appendix_drift +plan_check_check_appendix_drift() { + local plan_path="$1" + local findings_file="$2" + + # Check if appendix markers exist + if ! grep -qF -- "--- Original Design Draft Start ---" "$plan_path" 2>/dev/null; then + return 0 + fi + + if ! grep -qF -- "--- Original Design Draft End ---" "$plan_path" 2>/dev/null; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "info" "appendix-drift" "plan-schema-validator" \ + "Appendix" "" \ + "Appendix start marker found but no end marker." \ + "The original draft appendix section is malformed, making drift detection impossible." \ + "Add the '--- Original Design Draft End ---' marker or remove the start marker." \ + "[]" "[]")" + _plan_check_append_finding "$findings_file" "$finding" + return 0 + fi + + # This is a simplified drift check: we just note that an appendix exists + # A more thorough check would compare section content, but that requires + # semantic comparison beyond the scope of deterministic validation. + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "info" "appendix-drift" "plan-schema-validator" \ + "Appendix" "" \ + "Original draft appendix section is present." \ + "An appendix section exists. Reviewers should verify that the main plan has not diverged from the original draft in ways that invalidate design decisions." \ + "Review the appendix and main plan for inconsistencies." \ + "[]" "[]")" + _plan_check_append_finding "$findings_file" "$finding" +} + +# Main schema validation entry point. +# Outputs a comma-separated list of JSON finding objects. +# Usage: plan_check_validate_schema [template_path] +plan_check_validate_schema() { + local plan_path="$1" + local template_path="${2:-}" + local all_findings="" + + # Reset global counter + _PLAN_CHECK_FID_COUNTER=0 + + # Check if template path is provided and readable + if [[ -n "$template_path" && ! -f "$template_path" ]]; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + all_findings="$(_plan_check_build_finding \ + "$fid" "info" "runtime-error" "plan-schema-validator" \ + "" "" \ + "Canonical schema source not found: $template_path" \ + "The canonical gen-plan template is unavailable or malformed. Schema validation is skipped; only semantic review will be performed." \ + "Ensure prompt-template/plan/gen-plan-template.md exists and is readable." \ + "[]" "[]")" + printf '%s' "$all_findings" + return 0 + fi + + local findings_file + findings_file="$(mktemp)" + + # Parse required sections from canonical template if available + local parsed_sections=() + local template_malformed=0 + if [[ -n "$template_path" && -f "$template_path" ]]; then + local parsed + if parsed="$(_plan_check_parse_schema_template "$template_path" 2>/dev/null)" && [[ -n "$parsed" ]]; then + while IFS= read -r sec; do + [[ -n "$sec" ]] && parsed_sections+=("$sec") + done <<< "$parsed" + else + template_malformed=1 + fi + fi + + # If template is malformed, emit runtime-error finding and skip deterministic schema checks + if [[ "$template_malformed" -eq 1 ]]; then + _plan_check_next_fid + local fid="$_PLAN_CHECK_LAST_FID" + local finding + finding="$(_plan_check_build_finding \ + "$fid" "info" "runtime-error" "plan-schema-validator" \ + "" "" \ + "Canonical schema source is malformed or unparseable: $template_path" \ + "The canonical gen-plan template does not contain the required core schema markers. Deterministic schema validation is skipped; only semantic review will be performed." \ + "Ensure prompt-template/plan/gen-plan-template.md is a valid plan template with the required core sections." \ + "[]" "[]")" + _plan_check_append_finding "$findings_file" "$finding" + else + # Run each validator (direct calls, no subshell, so counter increments persist) + if [[ ${#parsed_sections[@]} -gt 0 ]]; then + plan_check_validate_required_sections "$plan_path" "$findings_file" "${parsed_sections[@]}" + else + plan_check_validate_required_sections "$plan_path" "$findings_file" + fi + plan_check_validate_ac_ids "$plan_path" "$findings_file" + plan_check_validate_target_acs "$plan_path" "$findings_file" + plan_check_validate_task_tags "$plan_path" "$findings_file" + plan_check_validate_dependencies "$plan_path" "$findings_file" + plan_check_check_appendix_drift "$plan_path" "$findings_file" + fi + + # Combine findings from file + while IFS= read -r finding; do + [[ -z "$finding" ]] && continue + if [[ -z "$all_findings" ]]; then + all_findings="$finding" + else + all_findings="$all_findings,$finding" + fi + done < "$findings_file" + + rm -f "$findings_file" + printf '%s' "$all_findings" +} + +# ======================================== +# Config Resolution +# ======================================== + +# Resolve the canonical schema template path. +plan_check_resolve_schema_template() { + local plugin_root="${1:-}" + if [[ -n "$plugin_root" ]]; then + local path="$plugin_root/prompt-template/plan/gen-plan-template.md" + if [[ -f "$path" ]]; then + printf '%s\n' "$path" + return 0 + fi + fi + # Fallback to script-relative path + local script_dir + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + local path="$script_dir/../../prompt-template/plan/gen-plan-template.md" + if [[ -f "$path" ]]; then + printf '%s\n' "$path" + return 0 + fi + return 1 +} + +# Resolve alt-language code from merged config. +plan_check_resolve_alt_language() { + local merged_config="$1" + if [[ -z "$merged_config" ]]; then + printf '' + return 0 + fi + printf '%s' "$merged_config" | jq -r '.alternative_plan_language // empty' +} + +# Resolve whether plan-check should re-run after an accepted rewrite. +plan_check_resolve_recheck() { + local merged_config="$1" + if [[ -z "$merged_config" ]]; then + printf 'false' + return 0 + fi + + printf '%s' "$merged_config" | jq -r ' + if has("plan_check_recheck") then + .plan_check_recheck + | if type == "boolean" then tostring + elif type == "string" then ascii_downcase + else "false" + end + | if . == "true" or . == "false" then . else "false" end + else + "false" + end + ' +} diff --git a/scripts/plan-check.sh b/scripts/plan-check.sh new file mode 100755 index 00000000..6804ed33 --- /dev/null +++ b/scripts/plan-check.sh @@ -0,0 +1,377 @@ +#!/usr/bin/env bash +# plan-check.sh +# Report assembler and writer for the plan-check command. +# Deterministic only: NO agents, NO LLMs, NO user interaction. +# +# Receives structured findings from the command layer (via stdin or file) +# and assembles/writes report.md and findings.json to the report directory. +# +# Usage: +# plan-check.sh --plan --report-dir [--findings-file ] +# +# If --findings-file is provided, reads findings from that file. +# Otherwise reads findings JSON array from stdin. +# +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" + +# Source shared library +source "$SCRIPT_DIR/lib/plan-check-common.sh" + +usage() { + cat << 'USAGE_EOF' +Usage: plan-check.sh --plan --report-dir <.report-dir> [--findings-file ] + +Options: + --plan Path to the plan file that was checked (required) + --report-dir Directory where report.md and findings.json will be written (required) + --findings-file Path to a file containing the findings JSON array (optional, defaults to stdin) + -h, --help Show this help message +USAGE_EOF + exit 1 +} + +PLAN_FILE="" +REPORT_DIR="" +FINDINGS_FILE="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --plan) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --plan requires a value" >&2 + usage + fi + PLAN_FILE="$2" + shift 2 + ;; + --report-dir) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --report-dir requires a value" >&2 + usage + fi + REPORT_DIR="$2" + shift 2 + ;; + --findings-file) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --findings-file requires a value" >&2 + usage + fi + FINDINGS_FILE="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + echo "ERROR: Unknown option: $1" >&2 + usage + ;; + esac +done + +# Validate required arguments +if [[ -z "$PLAN_FILE" ]]; then + echo "ERROR: --plan is required" >&2 + usage +fi + +if [[ -z "$REPORT_DIR" ]]; then + echo "ERROR: --report-dir is required" >&2 + usage +fi + +# Resolve absolute paths +PLAN_FILE="$(realpath -m "$PLAN_FILE" 2>/dev/null || echo "$PLAN_FILE")" +REPORT_DIR="$(realpath -m "$REPORT_DIR" 2>/dev/null || echo "$REPORT_DIR")" + +# Ensure report directory exists +if [[ ! -d "$REPORT_DIR" ]]; then + echo "ERROR: Report directory does not exist: $REPORT_DIR" >&2 + exit 1 +fi + +# Read findings JSON array +if [[ -n "$FINDINGS_FILE" ]]; then + if [[ ! -f "$FINDINGS_FILE" ]]; then + echo "ERROR: Findings file does not exist: $FINDINGS_FILE" >&2 + exit 1 + fi + FINDINGS_JSON="$(cat "$FINDINGS_FILE")" +else + FINDINGS_JSON="$(cat)" +fi + +# Validate findings JSON with Python: must be an array of valid finding objects. +# Malformed arrays keep valid findings and append a runtime-error diagnostic. +VALIDATED_FINDINGS="$(python3 -c ' +import json, sys + +raw = sys.stdin.read().strip() +if not raw: + print("[]") + sys.exit(0) + +try: + data = json.loads(raw) +except Exception: + print(json.dumps([{ + "id": "F-RUNTIME-001", + "severity": "info", + "category": "runtime-error", + "source_checker": "plan-schema-validator", + "location": {"section": "", "fragment": ""}, + "evidence": "Findings input is not valid JSON.", + "explanation": "The command layer produced malformed findings that could not be parsed. Only the deterministic schema validation results are reliable.", + "suggested_resolution": "Review the sub-agent output and retry the check.", + "affected_acs": [], + "affected_tasks": [] + }])) + sys.exit(0) + +if not isinstance(data, list): + print(json.dumps([{ + "id": "F-RUNTIME-001", + "severity": "info", + "category": "runtime-error", + "source_checker": "plan-schema-validator", + "location": {"section": "", "fragment": ""}, + "evidence": "Findings input is not a JSON array.", + "explanation": "The command layer produced findings in an unexpected format. Expected a JSON array of finding objects.", + "suggested_resolution": "Review the sub-agent output and retry the check.", + "affected_acs": [], + "affected_tasks": [] + }])) + sys.exit(0) + +valid_severities = {"blocker", "warning", "info"} +valid_categories = {"contradiction", "ambiguity", "schema", "dependency", "appendix-drift", "rewrite-risk", "runtime-error", "draft-plan-drift"} +required_fields = {"id", "severity", "category", "source_checker", "location", "evidence", "explanation", "suggested_resolution"} +primary_finding_categories = {"contradiction", "ambiguity"} +valid_checkers = {"plan-consistency-checker", "plan-ambiguity-checker", "plan-schema-validator", "draft-consistency-checker", "draft-ambiguity-checker", "draft-plan-drift-checker"} + +def common_validation_error(item): + if not isinstance(item, dict): + return "item is not an object" + missing = required_fields - set(item.keys()) + if missing: + return "missing required fields: " + ", ".join(sorted(missing)) + if not isinstance(item.get("severity"), str) or item.get("severity") not in valid_severities: + return "invalid severity" + if not isinstance(item.get("category"), str) or item.get("category") not in valid_categories: + return "invalid category" + + string_fields = ["id", "source_checker", "evidence", "explanation", "suggested_resolution"] + for sf in string_fields: + if not isinstance(item.get(sf), str): + return "invalid scalar field: " + sf + if item.get("source_checker") not in valid_checkers: + return "unknown source_checker" + + if not isinstance(item.get("affected_acs"), list): + return "affected_acs is not an array" + if not all(isinstance(v, str) for v in item.get("affected_acs")): + return "affected_acs contains a non-string value" + if not isinstance(item.get("affected_tasks"), list): + return "affected_tasks is not an array" + if not all(isinstance(v, str) for v in item.get("affected_tasks")): + return "affected_tasks contains a non-string value" + + loc = item.get("location") + if not isinstance(loc, dict): + return "location is not an object" + if not isinstance(loc.get("section"), str): + return "location.section is not a string" + if not isinstance(loc.get("fragment"), str): + return "location.fragment is not a string" + + if item.get("category") == "ambiguity": + details = item.get("ambiguity_details") + if not isinstance(details, dict): + return "ambiguity_details is missing or not an object" + interpretations = details.get("competing_interpretations") + if not isinstance(interpretations, list): + return "competing_interpretations is not an array" + non_empty = [s for s in interpretations if isinstance(s, str) and s.strip()] + if len(non_empty) < 2: + return "fewer than 2 non-empty competing interpretations" + if not isinstance(details.get("execution_drift_risk"), str) or not details.get("execution_drift_risk", "").strip(): + return "empty ambiguity execution_drift_risk" + if not isinstance(details.get("clarification_question"), str) or not details.get("clarification_question", "").strip(): + return "empty ambiguity clarification_question" + + return None + +finding_categories = {} +for item in data: + if common_validation_error(item) is None and item.get("category") in primary_finding_categories: + finding_categories[item.get("id")] = item.get("category") + +valid_items = [] +invalid_items = [] +for idx, item in enumerate(data): + error = common_validation_error(item) + if error is None and item.get("category") == "draft-plan-drift": + related_id = item.get("related_finding_id") + if not isinstance(related_id, str) or not related_id.strip(): + error = "missing draft-plan-drift related_finding_id" + elif finding_categories.get(related_id) not in primary_finding_categories: + error = "draft-plan-drift related_finding_id does not reference a valid primary finding" + + if error is None: + valid_items.append(item) + else: + invalid_items.append({"index": idx, "reason": error}) + +if invalid_items: + used_ids = {item.get("id") for item in valid_items if isinstance(item.get("id"), str)} + runtime_number = 1 + while True: + runtime_id = f"F-RUNTIME-{runtime_number:03d}" + if runtime_id not in used_ids: + break + runtime_number += 1 + invalid_indexes = [entry["index"] for entry in invalid_items] + invalid_reasons = "; ".join("{}: {}".format(entry["index"], entry["reason"]) for entry in invalid_items) + runtime_finding = { + "id": runtime_id, + "severity": "info", + "category": "runtime-error", + "source_checker": "plan-schema-validator", + "location": {"section": "", "fragment": ""}, + "evidence": f"Findings array contains {len(invalid_items)} invalid item(s) at index(es): {invalid_indexes}.", + "explanation": "Invalid findings were filtered out while valid findings were preserved. Reasons: " + invalid_reasons, + "suggested_resolution": "Review the sub-agent output and ensure all findings conform to the full schema.", + "affected_acs": [], + "affected_tasks": [] + } + print(json.dumps(valid_items + [runtime_finding])) + sys.exit(0) + +print(json.dumps(valid_items)) +' <<< "$FINDINGS_JSON")" + +FINDINGS_JSON="$VALIDATED_FINDINGS" + +# Compute plan hash (SHA256) +PLAN_HASH="" +if [[ -f "$PLAN_FILE" ]]; then + PLAN_HASH="$(sha256sum "$PLAN_FILE" 2>/dev/null | awk '{print $1}')" +fi + +# Assemble findings.json +FINDINGS_JSON_OUTPUT="$(plan_check_assemble_findings_json "$PLAN_FILE" "$PLAN_HASH" "plan-schema-validator" "{}" 0 "$FINDINGS_JSON")" + +# Validate the assembled output is parseable JSON before writing +if ! python3 -c 'import json,sys; json.load(sys.stdin)' <<< "$FINDINGS_JSON_OUTPUT" 2>/dev/null; then + echo "ERROR: Assembled findings.json is not valid JSON" >&2 + exit 1 +fi + +# Write findings.json +printf '%s\n' "$FINDINGS_JSON_OUTPUT" > "$REPORT_DIR/findings.json" + +# Assemble report.md +{ + plan_check_format_report_header "$PLAN_FILE" + + # Write findings + if [[ "$FINDINGS_JSON" == "[]" ]]; then + echo "No findings detected." + echo "" + else + # Parse each finding and format it + # We use a simple approach: the findings are already JSON objects in an array + # We rely on the caller to provide well-formed findings + echo "## Findings" + echo "" + + # Count findings by severity for summary via JSON parsing + severity_counts="$(python3 -c ' +import json, sys +try: + findings = json.loads(sys.argv[1]) + if not isinstance(findings, list): + findings = [] +except Exception: + findings = [] +blockers = sum(1 for f in findings if f.get("severity") == "blocker") +warnings = sum(1 for f in findings if f.get("severity") == "warning") +infos = sum(1 for f in findings if f.get("severity") == "info") +print(f"{blockers} {warnings} {infos}") +' "$FINDINGS_JSON")" + blockers="$(printf '%s' "$severity_counts" | awk '{print $1}')" + warnings="$(printf '%s' "$severity_counts" | awk '{print $2}')" + infos="$(printf '%s' "$severity_counts" | awk '{print $3}')" + + echo "- Blockers: $blockers" + echo "- Warnings: $warnings" + echo "- Infos: $infos" + echo "" + + # For each finding, try to extract key fields and format them + # Since we're in bash, we do a best-effort extraction using pattern matching + # The command layer is expected to produce well-formed findings + + # Use a temp file to iterate + tmp_findings="$(mktemp)" + printf '%s\n' "$FINDINGS_JSON" > "$tmp_findings" + + # Parse findings using a simple state machine + # This is best-effort; the command layer should validate the JSON + python3 -c " +import json, sys + +try: + findings = json.load(open('$tmp_findings')) +except: + findings = [] + +for f in findings: + fid = f.get('id', 'F-???') + severity = f.get('severity', 'unknown') + category = f.get('category', 'unknown') + section = f.get('location', {}).get('section', '') + fragment = f.get('location', {}).get('fragment', '') + evidence = f.get('evidence', '') + explanation = f.get('explanation', '') + suggested = f.get('suggested_resolution', '') + related = f.get('related_finding_id', '') + + print(f'### {fid} ({severity})') + print(f'') + print(f'- **Category**: {category}') + if related: + print(f'- **Related Finding**: {related}') + print(f'- **Section**: {section}') + if fragment: + print(f'- **Fragment**: {fragment}') + print(f'- **Evidence**: {evidence}') + print(f'- **Explanation**: {explanation}') + print(f'- **Suggested Resolution**: {suggested}') + print(f'') +" 2>/dev/null || { + # Fallback: just dump the raw JSON + echo "Raw findings:" + echo "\`\`\`json" + cat "$tmp_findings" + echo "\`\`\`" + } + + rm -f "$tmp_findings" + fi + + echo "---" + echo "" + echo "*Report generated by plan-schema-validator*" +} > "$REPORT_DIR/report.md" + +echo "=== plan-check Report ===" +echo "Plan: $PLAN_FILE" +echo "Report directory: $REPORT_DIR" +echo "Findings written to: $REPORT_DIR/findings.json" +echo "Report written to: $REPORT_DIR/report.md" +exit 0 diff --git a/scripts/validate-gen-idea-io.sh b/scripts/validate-gen-idea-io.sh index 99c4bb1a..1097ed42 100755 --- a/scripts/validate-gen-idea-io.sh +++ b/scripts/validate-gen-idea-io.sh @@ -25,9 +25,98 @@ usage() { } IDEA_INPUT="" +IDEA_PARTS=() N=6 OUTPUT_FILE="" +if [[ $# -eq 2 && "$1" == "--raw-arguments" ]]; then + RAW_ARGUMENTS="$2" + PARSED_ARGS_FILE="$(mktemp)" + if ! python3 - "$RAW_ARGUMENTS" > "$PARSED_ARGS_FILE" <<'PY' +import sys + +raw = sys.argv[1] +length = len(raw) +tokens = [] + + +def read_token(pos): + if pos >= length: + return "", pos + if raw[pos] in ("'", '"'): + quote = raw[pos] + end = raw.find(quote, pos + 1) + if end != -1: + return raw[pos + 1:end], end + 1 + end = pos + while end < length and not raw[end].isspace(): + end += 1 + return raw[pos:end], end + + +idx = 0 +while idx < length: + while idx < length and raw[idx].isspace(): + idx += 1 + if idx >= length: + break + start = idx + value, idx = read_token(idx) + tokens.append((value, start, idx)) + + +def strip_outer_quotes(text): + if len(text) >= 2 and text[0] == text[-1] and text[0] in ("'", '"'): + return text[1:-1] + return text + + +args = [] +skip_spans = [] +idx = 0 +while idx < len(tokens): + token, start, end = tokens[idx] + if token in ("--n", "--output"): + args.append(token) + skip_spans.append((start, end)) + if idx + 1 < len(tokens): + value, value_start, value_end = tokens[idx + 1] + args.append(value) + skip_spans.append((value_start, value_end)) + idx += 2 + else: + idx += 1 + continue + if token in ("-h", "--help") or token.startswith("--"): + args.append(token) + skip_spans.append((start, end)) + idx += 1 + +chars = list(raw) +for start, end in skip_spans: + for pos in range(start, end): + chars[pos] = " " +idea = strip_outer_quotes(" ".join("".join(chars).strip().split())) +if idea: + args.append(idea) + +for arg in args: + sys.stdout.buffer.write(arg.encode("utf-8") + b"\0") +PY + then + cat "$PARSED_ARGS_FILE" 2>/dev/null || true + rm -f "$PARSED_ARGS_FILE" + usage + fi + + PARSED_ARGS=() + while IFS= read -r -d '' arg; do + PARSED_ARGS+=("$arg") + done < "$PARSED_ARGS_FILE" + rm -f "$PARSED_ARGS_FILE" + set -- "${PARSED_ARGS[@]}" +fi + while [[ $# -gt 0 ]]; do case "$1" in --n) @@ -54,17 +143,16 @@ while [[ $# -gt 0 ]]; do usage ;; *) - if [[ -z "$IDEA_INPUT" ]]; then - IDEA_INPUT="$1" - shift - else - echo "ERROR: Unexpected positional argument: $1" - usage - fi + IDEA_PARTS+=("$1") + shift ;; esac done +if (( ${#IDEA_PARTS[@]} > 0 )); then + IDEA_INPUT="${IDEA_PARTS[*]}" +fi + if [[ -z "$IDEA_INPUT" ]]; then echo "VALIDATION_ERROR: MISSING_IDEA" echo "No idea provided. Pass inline text or a .md file path as the first argument." diff --git a/scripts/validate-gen-plan-io.sh b/scripts/validate-gen-plan-io.sh index 90eac475..443966b9 100755 --- a/scripts/validate-gen-plan-io.sh +++ b/scripts/validate-gen-plan-io.sh @@ -14,7 +14,7 @@ set -e usage() { - echo "Usage: $0 --input --output [--auto-start-rlcr-if-converged] [--discussion|--direct]" + echo "Usage: $0 --input --output [--auto-start-rlcr-if-converged] [--discussion|--direct] [--check|--no-check]" echo "" echo "Options:" echo " --input Path to the input draft file (required)" @@ -22,6 +22,8 @@ usage() { echo " --auto-start-rlcr-if-converged Enable direct RLCR start after converged planning (discussion mode only)" echo " --discussion Use discussion mode (iterative Claude/Codex convergence rounds)" echo " --direct Use direct mode (skip convergence rounds, proceed immediately to plan)" + echo " --check Enable integrated draft-check and plan-check mode for this invocation" + echo " --no-check Disable integrated check mode for this invocation (overrides --check and config)" echo " -h, --help Show this help message" exit 6 } @@ -31,6 +33,8 @@ OUTPUT_FILE="" AUTO_START_RLCR_IF_CONVERGED="false" GEN_PLAN_MODE_DISCUSSION="false" GEN_PLAN_MODE_DIRECT="false" +CHECK_FLAG="false" +NO_CHECK_FLAG="false" # Parse arguments while [[ $# -gt 0 ]]; do @@ -63,6 +67,14 @@ while [[ $# -gt 0 ]]; do GEN_PLAN_MODE_DIRECT="true" shift ;; + --check) + CHECK_FLAG="true" + shift + ;; + --no-check) + NO_CHECK_FLAG="true" + shift + ;; -h|--help) usage ;; diff --git a/scripts/validate-plan-check-io.sh b/scripts/validate-plan-check-io.sh new file mode 100755 index 00000000..3b0f9c13 --- /dev/null +++ b/scripts/validate-plan-check-io.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# validate-plan-check-io.sh +# Validates input and output paths for the plan-check command +# Exit codes: +# 0 - Success, all validations passed +# 1 - Input file does not exist +# 2 - Input file is empty +# 3 - Report directory does not exist and cannot be created +# 4 - Report output path already exists and is not a directory +# 5 - No write permission to output directory +# 6 - Invalid arguments +# +set -e + +usage() { + cat << 'USAGE_EOF' +Usage: validate-plan-check-io.sh --plan [--recheck] [--alt-language ] + +Options: + --plan Path to the plan file to check (required) + --recheck Re-run plan-check after an accepted rewrite + --alt-language Language code for translated report variants + -h, --help Show this help message +USAGE_EOF + exit 6 +} + +PLAN_FILE="" +RECHECK="false" +ALT_LANGUAGE="" + +# Parse arguments +while [[ $# -gt 0 ]]; do + case $1 in + --plan) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --plan requires a value" >&2 + usage + fi + PLAN_FILE="$2" + shift 2 + ;; + --recheck) + RECHECK="true" + shift + ;; + --alt-language) + if [[ $# -lt 2 || "$2" == --* ]]; then + echo "ERROR: --alt-language requires a value" >&2 + usage + fi + ALT_LANGUAGE="$2" + shift 2 + ;; + -h|--help) + usage + ;; + *) + echo "ERROR: Unknown option: $1" >&2 + usage + ;; + esac +done + +# Validate required arguments +if [[ -z "$PLAN_FILE" ]]; then + echo "ERROR: --plan is required" >&2 + usage +fi + +# Resolve to absolute path +PLAN_FILE=$(realpath -m "$PLAN_FILE" 2>/dev/null || echo "$PLAN_FILE") +PLAN_DIR=$(dirname "$PLAN_FILE") + +# Determine report output directory +if PROJECT_ROOT=$(git -C "$PLAN_DIR" rev-parse --show-toplevel 2>/dev/null); then + REPORT_DIR="$PROJECT_ROOT/.humanize/plan-check" +else + REPORT_DIR="$PLAN_DIR/.humanize/plan-check" +fi + +echo "=== plan-check IO Validation ===" +echo "Plan file: $PLAN_FILE" +echo "Report directory: $REPORT_DIR" + +# Check 1: Input file exists +if [[ ! -f "$PLAN_FILE" ]]; then + echo "VALIDATION_ERROR: INPUT_NOT_FOUND" + echo "The plan file does not exist: $PLAN_FILE" + echo "Please ensure the plan file exists before running plan-check." + exit 1 +fi + +# Check 2: Input file is not empty +if [[ ! -s "$PLAN_FILE" ]]; then + echo "VALIDATION_ERROR: INPUT_EMPTY" + echo "The plan file is empty: $PLAN_FILE" + echo "Please add content to your plan file before running plan-check." + exit 2 +fi + +# Check 3: Output path must not already exist as a non-directory +if [[ -e "$REPORT_DIR" && ! -d "$REPORT_DIR" ]]; then + echo "VALIDATION_ERROR: OUTPUT_EXISTS" + echo "The report output path already exists and is not a directory: $REPORT_DIR" + echo "Please remove the file or choose a different output path." + exit 4 +fi + +# Check 4: Output directory exists or can be created +if [[ ! -d "$REPORT_DIR" ]]; then + # Try to create it + if ! mkdir -p "$REPORT_DIR" 2>/dev/null; then + echo "VALIDATION_ERROR: OUTPUT_DIR_NOT_FOUND" + echo "The report directory does not exist and cannot be created: $REPORT_DIR" + echo "Please create the directory or ensure write permission." + exit 3 + fi +fi + +# Check 5: Write permission to output directory +if [[ ! -w "$REPORT_DIR" ]]; then + echo "VALIDATION_ERROR: NO_WRITE_PERMISSION" + echo "No write permission for the report directory: $REPORT_DIR" + echo "Please check directory permissions." + exit 5 +fi + +# All checks passed +INPUT_LINE_COUNT=$(wc -l < "$PLAN_FILE" | tr -d ' ') +echo "VALIDATION_SUCCESS" +echo "Plan file: $PLAN_FILE ($INPUT_LINE_COUNT lines)" +echo "Report directory: $REPORT_DIR" +echo "Recheck: $RECHECK" +if [[ -n "$ALT_LANGUAGE" ]]; then + echo "Alt language: $ALT_LANGUAGE" +fi +echo "IO validation passed." +exit 0 diff --git a/skills/humanize-gen-plan/SKILL.md b/skills/humanize-gen-plan/SKILL.md index 1fd58d25..ee65f7ad 100644 --- a/skills/humanize-gen-plan/SKILL.md +++ b/skills/humanize-gen-plan/SKILL.md @@ -18,7 +18,7 @@ The installer hydrates this skill with an absolute runtime root path: ```mermaid flowchart TD - BEGIN([BEGIN]) --> VALIDATE[Validate input/output paths
Run: {{HUMANIZE_RUNTIME_ROOT}}/scripts/validate-gen-plan-io.sh --input <draft> --output <plan>] + BEGIN([BEGIN]) --> VALIDATE[Validate input/output paths and mode flags
Run: {{HUMANIZE_RUNTIME_ROOT}}/scripts/validate-gen-plan-io.sh --input <draft> --output <plan> [--check|--no-check]] VALIDATE --> CHECK{Validation passed?} CHECK -->|No| REPORT_ERROR[Report validation error
Stop] REPORT_ERROR --> END_FAIL([END]) @@ -52,6 +52,51 @@ flowchart TD - `--input ` - The draft document - `--output ` - Where to write the plan +**Optional Arguments:** +- `--check` - Enable integrated draft-check before plan generation and plan-check with targeted repair after plan generation. This is a request to run the semantic checkers through native Codex sub-agents. +- `--no-check` - Disable integrated check mode for this invocation, overriding `--check` and config. +- `--discussion` - Use iterative discussion mode. +- `--direct` - Use direct mode. +- `--auto-start-rlcr-if-converged` - Start RLCR automatically when discussion mode converges and check-mode gates pass. + +Check mode can also be enabled by the merged `gen_plan_check` config key. Effective priority is `--no-check` > `--check` > `gen_plan_check` > default disabled. + +## Check Mode Delegation Contract + +When effective check mode is true, treat it as an explicit user request for sub-agent based checking. Use Codex native `spawn_agent` / `wait_agent` for semantic draft and plan checks. Do not satisfy check mode by reading checker prompt files and performing all semantic checks only in the parent session. + +Payload boundary for checker sub-agents: +- Spawn checker agents with `fork_context=false`. +- Pass only the checker instructions and the exact draft or plan content needed for that checker. +- Do not pass prior conversation history, project history, or unrelated repository context. +- Close completed checker agents after collecting their final output. + +### Draft-Check Phase + +Run this phase after relevance passes and before creating the output plan when `EFFECTIVE_CHECK_MODE=true`. + +1. Initialize `.humanize/gen-plan-check//` with `plan_check_init_report_dir`. +2. Spawn one checker sub-agent for `draft-consistency-checker` and one for `draft-ambiguity-checker`. + - The draft consistency checker receives the raw draft and the intent of `{{HUMANIZE_RUNTIME_ROOT}}/agents/draft-consistency-checker.md`. + - The draft ambiguity checker receives the raw draft and the intent of `{{HUMANIZE_RUNTIME_ROOT}}/agents/draft-ambiguity-checker.md`. +3. Wait for both checker agents and require each to return a JSON array matching the `findings.json` schema. +4. If a checker fails or returns malformed JSON, retry that checker once with a fresh sub-agent. If it still fails, persist one `runtime-error` info finding for that checker and treat check mode as having unresolved blockers for auto-start gating. +5. Merge draft findings into `${CHECK_REPORT_DIR}/draft-findings.json`. +6. Resolve blocker findings with user clarification before generating the plan. Do not create the output file when the user aborts draft-check. + +### Plan-Check Phase + +Run this phase after the plan body has been written when `EFFECTIVE_CHECK_MODE=true`. + +1. Run deterministic schema validation locally with `plan_check_validate_schema`. +2. Spawn one checker sub-agent for `plan-consistency-checker` and one for `plan-ambiguity-checker`. + - The plan consistency checker receives the generated plan body and the intent of `{{HUMANIZE_RUNTIME_ROOT}}/agents/plan-consistency-checker.md`. + - The plan ambiguity checker receives the generated plan body and the intent of `{{HUMANIZE_RUNTIME_ROOT}}/agents/plan-ambiguity-checker.md`. +3. If the primary plan findings are non-empty, spawn a `draft-plan-drift-checker` sub-agent with only the plan body, original draft content, collected clarifications, and primary findings. +4. Merge schema findings, primary semantic findings, and conditional draft-plan drift findings into `${CHECK_REPORT_DIR}/plan-findings.json`. +5. Repair blocker findings using source-of-truth precedence: explicit user answers, original draft text, repository facts discovered during planning, safe leader-agent judgment, then generated plan text. +6. If `plan_check_recheck` is enabled and repair changed bytes, repeat plan-check once in check-only mode using the same sub-agent contract. + ## Plan Structure Output The generated plan includes: @@ -110,7 +155,10 @@ Minimum viable implementation ```bash # Start the flow -/flow:humanize-gen-plan +/flow:humanize-gen-plan --input .humanize/drafts/example.md --output .humanize/plans/example.md + +# Start with integrated check mode +/flow:humanize-gen-plan --input .humanize/drafts/example.md --output .humanize/plans/example.md --check # The flow will ask for: # - Input draft file path diff --git a/skills/humanize-plan-check/SKILL.md b/skills/humanize-plan-check/SKILL.md new file mode 100644 index 00000000..94fc3c19 --- /dev/null +++ b/skills/humanize-plan-check/SKILL.md @@ -0,0 +1,122 @@ +--- +name: humanize-plan-check +description: Check a Humanize plan file for contradictions, ambiguities, and schema compliance, then write a structured report under .humanize/plan-check. +type: flow +argument-hint: "--plan [--recheck] [--alt-language lang]" +user-invocable: false +--- + +# Humanize Plan Check + +Use this flow as the Codex entrypoint for checking an existing Humanize plan. +It mirrors the `/humanize:plan-check` Claude command, but it uses Codex native +`spawn_agent` / `wait_agent` instead of Claude slash-command directories, +`Task`, or `AskUserQuestion`. + +## Runtime Root + +The installer hydrates this skill with an absolute runtime root path: + +```bash +{{HUMANIZE_RUNTIME_ROOT}} +``` + +All commands below assume `{{HUMANIZE_RUNTIME_ROOT}}`. + +## Usage + +```bash +$humanize-plan-check --plan .humanize/plans/example.md +$humanize-plan-check --plan .humanize/plans/example.md --recheck +``` + +Options: +- `--plan `: plan file to check. Required. +- `--recheck`: re-run plan-check after an accepted rewrite. +- `--alt-language `: accepted for parity with the Claude command. +- `-h`, `--help`: show usage and stop. + +## Workflow + +1. Parse `$ARGUMENTS`. + - Require `--plan `. + - Treat `--recheck` as a positive override. + - Preserve `--alt-language ` when present. +2. Load config and shared helpers: + ```bash + source "{{HUMANIZE_RUNTIME_ROOT}}/scripts/lib/config-loader.sh" + source "{{HUMANIZE_RUNTIME_ROOT}}/scripts/lib/plan-check-common.sh" + PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + MERGED_CONFIG_JSON="$(load_merged_config "{{HUMANIZE_RUNTIME_ROOT}}" "$PROJECT_ROOT")" + ``` +3. Resolve effective recheck: + - If `--recheck` was supplied, set `EFFECTIVE_RECHECK=true`. + - Otherwise set `EFFECTIVE_RECHECK="$(plan_check_resolve_recheck "$MERGED_CONFIG_JSON")"`. +4. Validate IO by running: + ```bash + VALIDATE_ARGS=(--plan "$PLAN_FILE") + [[ "$EFFECTIVE_RECHECK" == "true" ]] && VALIDATE_ARGS+=(--recheck) + [[ -n "$ALT_LANGUAGE" ]] && VALIDATE_ARGS+=(--alt-language "$ALT_LANGUAGE") + "{{HUMANIZE_RUNTIME_ROOT}}/scripts/validate-plan-check-io.sh" \ + "${VALIDATE_ARGS[@]}" + ``` + Stop on validation failure and report the script output. +5. Read the plan file. Keep the exact path for report metadata. +6. Create the timestamped report directory: + - Parse `Report directory: ...` from validation output as `REPORT_BASE`. + - Run `REPORT_DIR="$(plan_check_init_report_dir "$REPORT_BASE")"`. +7. Run deterministic schema checks: + ```bash + SCHEMA_TEMPLATE="$(plan_check_resolve_schema_template "{{HUMANIZE_RUNTIME_ROOT}}" || true)" + SCHEMA_FINDINGS="$(plan_check_validate_schema "$PLAN_FILE" "$SCHEMA_TEMPLATE")" + ``` + `## Task Breakdown` is optional; when present, task tags, Target ACs, and dependencies are still validated. Wrap non-empty `SCHEMA_FINDINGS` as a JSON array; otherwise use `[]`. +8. Run semantic checks through native Codex sub-agents. + - Treat `$humanize-plan-check` as an explicit request to delegate semantic checking. + - Use `spawn_agent` / `wait_agent`; do not call the Claude `Task` tool. + - Do not call nested `codex exec` unless the user explicitly asks. + - Spawn checker agents with `fork_context=false` and pass only the plan content plus checker instructions. Do not pass project history, prior conversation context, or background information. + - Produce JSON-array findings matching the `findings.json` schema used by `plan-check.sh`. + - Run two semantic passes: + - contradiction pass using the intent of `{{HUMANIZE_RUNTIME_ROOT}}/agents/plan-consistency-checker.md` + - ambiguity pass using the intent of `{{HUMANIZE_RUNTIME_ROOT}}/agents/plan-ambiguity-checker.md` + - If a semantic pass fails or returns malformed JSON, retry that checker once with a fresh sub-agent. If it still cannot be completed, add one `runtime-error` info finding for that checker and continue. + - Close completed checker agents after collecting their final output. +9. Merge deterministic and semantic findings into one JSON array. + - Sort blockers first, then warnings, then infos. + - Pipe the merged array through `plan_check_postprocess_ambiguity_ids`. + - Write it to `${REPORT_DIR}/findings_array.json`. +10. Generate the report: + ```bash + "{{HUMANIZE_RUNTIME_ROOT}}/scripts/plan-check.sh" \ + --plan "$PLAN_FILE" \ + --report-dir "$REPORT_DIR" \ + --findings-file "$REPORT_DIR/findings_array.json" + ``` +11. Print `${REPORT_DIR}/report.md` to the user and summarize: + - report path + - blockers, warnings, infos + - whether unresolved blockers remain +12. If blocker findings exist, ask the user whether to resolve them now. + - For contradictions, collect the chosen resolution. + - For ambiguities, collect a concrete clarification. + - Write `${REPORT_DIR}/resolution.json` using `plan_check_build_resolved_json`. +13. If the user wants an in-place rewrite: + - Preview the intended diff first. + - Ask for explicit confirmation before writing. + - Run `plan_check_backup_plan "$PLAN_FILE" "$REPORT_DIR"`. + - Run `plan_check_atomic_write "$PLAN_FILE" "$REWRITTEN_PLAN"`. + - If `EFFECTIVE_RECHECK=true`, repeat the check workflow on the rewritten plan. + +## Output Contract + +Successful runs create: +- `.humanize/plan-check//report.md` +- `.humanize/plan-check//findings.json` +- `.humanize/plan-check//findings_array.json` +- `.humanize/plan-check//resolution.json` when user resolutions are collected +- `.humanize/plan-check//backup/.bak` when an in-place rewrite is accepted + +Exit-facing status: +- Pass when there are no unresolved blocker findings. +- Fail when blocker findings remain unresolved. diff --git a/tests/fixtures/gen-plan-check/default-draft.md b/tests/fixtures/gen-plan-check/default-draft.md new file mode 100644 index 00000000..1b9f6008 --- /dev/null +++ b/tests/fixtures/gen-plan-check/default-draft.md @@ -0,0 +1,2 @@ +Draft line 1. +Draft line 2. diff --git a/tests/fixtures/gen-plan-check/default-expected.md b/tests/fixtures/gen-plan-check/default-expected.md new file mode 100644 index 00000000..eedd6793 --- /dev/null +++ b/tests/fixtures/gen-plan-check/default-expected.md @@ -0,0 +1,10 @@ +# Plan + +## Goal +Goal text. + +--- Original Design Draft Start --- +Draft line 1. +Draft line 2. + +--- Original Design Draft End --- diff --git a/tests/fixtures/gen-plan-check/default-template.md b/tests/fixtures/gen-plan-check/default-template.md new file mode 100644 index 00000000..63368299 --- /dev/null +++ b/tests/fixtures/gen-plan-check/default-template.md @@ -0,0 +1,4 @@ +# Plan + +## Goal +Goal text. diff --git a/tests/robustness/test-hook-system-robustness.sh b/tests/robustness/test-hook-system-robustness.sh index 1d4a21f5..138468bc 100755 --- a/tests/robustness/test-hook-system-robustness.sh +++ b/tests/robustness/test-hook-system-robustness.sh @@ -461,6 +461,24 @@ else fail "Goal-tracker mutable write" "exit 0" "exit $EXIT_CODE, result: $RESULT" fi +# Test 12ca: Write validator allows active goal-tracker update through symlinked project root +echo "" +echo "Test 12ca: Write validator allows symlinked active goal-tracker path" +SYMLINK_PARENT="$TEST_DIR/symlink-root" +SYMLINK_PROJECT="$SYMLINK_PARENT/project" +mkdir -p "$SYMLINK_PARENT" +ln -s "$TEST_DIR" "$SYMLINK_PROJECT" +JSON='{"tool_name":"Write","tool_input":{"file_path":"'"$SYMLINK_PROJECT"'/.humanize/rlcr/2026-01-19_12-00-00/goal-tracker.md","content":'"$UPDATED_CONTENT"'}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$SYMLINK_PROJECT" bash "$PROJECT_ROOT/hooks/loop-write-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Write allows symlinked active goal-tracker path" +else + fail "Goal-tracker symlinked active write" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + # Test 12d: Write validator blocks immutable goal-tracker changes after round 0 echo "" echo "Test 12d: Write validator blocks immutable goal-tracker changes after round 0" @@ -493,6 +511,12 @@ else fail "Goal-tracker immutable write" "exit 2" "exit $EXIT_CODE, result: $RESULT" fi +if ! echo "$RESULT" | grep -qF "command not found" && echo "$RESULT" | grep -qF '`goal-tracker.md`'; then + pass "Goal-tracker blocked fallback preserves literal backticks" +else + fail "Goal-tracker blocked fallback preserves literal backticks" "literal backticks and no command-not-found noise" "result: $RESULT" +fi + # Test 12e: Edit validator allows mutable goal-tracker edits after round 0 echo "" echo "Test 12e: Edit validator allows mutable goal-tracker edits after round 0" @@ -507,6 +531,20 @@ else fail "Goal-tracker mutable edit" "exit 0" "exit $EXIT_CODE, result: $RESULT" fi +# Test 12eb: Edit validator allows active goal-tracker edit through symlinked project root +echo "" +echo "Test 12eb: Edit validator allows symlinked active goal-tracker path" +JSON='{"tool_name":"Edit","tool_input":{"file_path":"'"$SYMLINK_PROJECT"'/.humanize/rlcr/2026-01-19_12-00-00/goal-tracker.md","old_string":"| [mainline] Keep AC-1 moving | AC-1 | pending | - |","new_string":"| [mainline] Keep AC-1 moving | AC-1 | in_progress | re-anchored |"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$SYMLINK_PROJECT" bash "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Edit allows symlinked active goal-tracker path" +else + fail "Goal-tracker symlinked active edit" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + # Test 12f: Edit validator blocks immutable goal-tracker edits after round 0 echo "" echo "Test 12ea: Edit validator allows mutable deletions after round 0" @@ -549,6 +587,20 @@ else fail "Goal-tracker old-session read" "exit 2" "exit $EXIT_CODE, result: $RESULT" fi +# Test 12ga: Read validator allows active goal-tracker read through symlinked project root +echo "" +echo "Test 12ga: Read validator allows symlinked active goal-tracker path" +JSON='{"tool_name":"Read","tool_input":{"file_path":"'"$SYMLINK_PROJECT"'/.humanize/rlcr/2026-01-19_12-00-00/goal-tracker.md"}}' +set +e +RESULT=$(echo "$JSON" | CLAUDE_PROJECT_DIR="$SYMLINK_PROJECT" bash "$PROJECT_ROOT/hooks/loop-read-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 0 ]]; then + pass "Read allows symlinked active goal-tracker path" +else + fail "Goal-tracker symlinked active read" "exit 0" "exit $EXIT_CODE, result: $RESULT" +fi + # Test 12h: Unrelated dangerous commands are allowed through (sandbox handles security) echo "" echo "Test 12h: Unrelated dangerous commands allowed through (sandbox responsibility)" diff --git a/tests/run-all-tests.sh b/tests/run-all-tests.sh index 00373b45..e5008df1 100755 --- a/tests/run-all-tests.sh +++ b/tests/run-all-tests.sh @@ -62,6 +62,8 @@ TEST_SUITES=( "test-bash-validator-patterns.sh" "test-todo-checker.sh" "test-plan-file-validation.sh" + "test-plan-check.sh" + "test-template-references.sh" "test-state-exit-naming.sh" "test-stop-gate.sh" @@ -81,6 +83,7 @@ TEST_SUITES=( "test-monitor-e2e-deletion.sh" "test-monitor-e2e-sigint.sh" "test-gen-plan.sh" + "test-gen-plan-check.sh" "test-refine-plan.sh" "test-task-tag-routing.sh" "test-config-merge.sh" diff --git a/tests/test-allowlist-validators.sh b/tests/test-allowlist-validators.sh index fc5c2c98..df2d5851 100755 --- a/tests/test-allowlist-validators.sh +++ b/tests/test-allowlist-validators.sh @@ -261,8 +261,24 @@ else fail "Edit validator round-5-contract.md" "exit 0" "exit $EXIT_CODE, output: $RESULT" fi -# Test 13c: Edit validator blocks stale round contract -echo "Test 13c: Edit validator blocks round-0-contract.md" +# Test 13c: Edit validator blocks same-round stale loop contract +echo "Test 13c: Edit validator blocks same-round stale loop contract" +STALE_LOOP_DIR="$TEST_DIR/.humanize/rlcr/2024-01-01_11-00-00" +mkdir -p "$STALE_LOOP_DIR" +touch "$STALE_LOOP_DIR/round-5-contract.md" +HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$STALE_LOOP_DIR'/round-5-contract.md"}}' +set +e +RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) +EXIT_CODE=$? +set -e +if [[ $EXIT_CODE -eq 2 ]] && echo "$RESULT" | grep -qi "correct path"; then + pass "Edit validator blocks same-round stale loop contract" +else + fail "Edit validator same-round stale loop contract" "exit 2 with correct path error" "exit $EXIT_CODE, output: $RESULT" +fi + +# Test 13d: Edit validator blocks stale round contract +echo "Test 13d: Edit validator blocks round-0-contract.md" HOOK_INPUT='{"tool_name": "Edit", "tool_input": {"file_path": "'$LOOP_DIR'/round-0-contract.md"}}' set +e RESULT=$(echo "$HOOK_INPUT" | "$PROJECT_ROOT/hooks/loop-edit-validator.sh" 2>&1) diff --git a/tests/test-ask-codex.sh b/tests/test-ask-codex.sh index 896f282a..8cc976cf 100755 --- a/tests/test-ask-codex.sh +++ b/tests/test-ask-codex.sh @@ -40,6 +40,13 @@ cat > "$MOCK_BIN_DIR/codex" << 'MOCK_EOF' #!/usr/bin/env bash # Mock codex binary for testing ask-codex.sh # Controlled via environment variables. +if [[ "${1:-}" == "--help" ]]; then + echo " --disable Disable a specific Codex hook (e.g. codex_hooks)" + exit 0 +fi +if [[ -n "${MOCK_CODEX_ARGS_FILE:-}" ]]; then + printf '%s\n' "$@" > "$MOCK_CODEX_ARGS_FILE" +fi if [[ -n "${MOCK_CODEX_STDERR:-}" ]]; then echo "$MOCK_CODEX_STDERR" >&2 fi @@ -52,17 +59,14 @@ exit "${MOCK_CODEX_EXIT_CODE:-0}" MOCK_EOF chmod +x "$MOCK_BIN_DIR/codex" -# Export mock variables so child processes (the mock codex) can see them -export MOCK_CODEX_EXIT_CODE="" -export MOCK_CODEX_STDOUT="" -export MOCK_CODEX_STDERR="" - # Reset mock state between tests reset_mock() { export MOCK_CODEX_EXIT_CODE="0" export MOCK_CODEX_STDOUT="" export MOCK_CODEX_STDERR="" + export MOCK_CODEX_ARGS_FILE="" } +reset_mock # Helper: run ask-codex with mock codex in PATH, inside mock project run_ask_codex() { @@ -70,6 +74,7 @@ run_ask_codex() { cd "$MOCK_PROJECT" export CLAUDE_PROJECT_DIR="$MOCK_PROJECT" export XDG_CACHE_HOME="$TEST_DIR/cache" + export XDG_CONFIG_HOME="$TEST_DIR/config" PATH="$MOCK_BIN_DIR:$PATH" bash "$ASK_CODEX_SCRIPT" "$@" ) } @@ -209,6 +214,21 @@ else fail "successful run exits 0" "exit 0" "exit=$EXIT_CODE" fi +# Test: nested codex exec disables Codex hooks +reset_mock +export MOCK_CODEX_STDOUT="hook-disable-test" +CODEX_ARGS_FILE="$TEST_DIR/ask-codex-args.txt" +export MOCK_CODEX_ARGS_FILE="$CODEX_ARGS_FILE" +run_ask_codex "hook disable test" > /dev/null 2>&1 +if [[ -f "$CODEX_ARGS_FILE" ]] \ + && grep -qx -- "exec" "$CODEX_ARGS_FILE" \ + && grep -qx -- "--disable" "$CODEX_ARGS_FILE" \ + && grep -qx -- "codex_hooks" "$CODEX_ARGS_FILE"; then + pass "nested codex exec disables codex_hooks" +else + fail "nested codex exec disables codex_hooks" "exec args include --disable codex_hooks" "$(cat "$CODEX_ARGS_FILE" 2>/dev/null || echo missing)" +fi + # ======================================== # Error Handling Tests # ======================================== diff --git a/tests/test-bitlesson-select-routing.sh b/tests/test-bitlesson-select-routing.sh index 68ecfa13..d13de71f 100755 --- a/tests/test-bitlesson-select-routing.sh +++ b/tests/test-bitlesson-select-routing.sh @@ -412,6 +412,55 @@ else fail "codex-only provider mode forces codex routing" "exit=0 + mock codex rationale" "exit=$exit_code, stdout=$stdout_out" fi +echo "" +echo "--- Test 8b: Empty codex_model falls back without nounset abort ---" +echo "" + +setup_test_dir +create_real_bitlesson "$TEST_DIR" +mkdir -p "$TEST_DIR/.humanize" +printf '{"bitlesson_model": "haiku", "codex_model": "", "provider_mode": "codex-only"}' > "$TEST_DIR/.humanize/config.json" +EMPTY_FALLBACK_BIN="$TEST_DIR/empty-fallback-bin" +mkdir -p "$EMPTY_FALLBACK_BIN" +cat > "$EMPTY_FALLBACK_BIN/codex" <<'EOF' +#!/usr/bin/env bash +if [[ "${1:-}" == "--help" ]] || [[ "${2:-}" == "--help" ]]; then + echo " --disable Disable a feature" + exit 0 +fi +printf '%s\n' "$@" > "${TEST_CAPTURE_ARGS:?}" +cat > /dev/null +cat <<'OUT' +LESSON_IDS: NONE +RATIONALE: Empty codex_model fell back to the default Codex model. +OUT +EOF +chmod +x "$EMPTY_FALLBACK_BIN/codex" + +CAPTURE_ARGS="$TEST_DIR/empty-codex-model-args.txt" +exit_code=0 +stdout_out="" +stdout_out=$(TEST_CAPTURE_ARGS="$CAPTURE_ARGS" CLAUDE_PROJECT_DIR="$TEST_DIR" XDG_CONFIG_HOME="$TEST_DIR/no-user" \ + PATH="$EMPTY_FALLBACK_BIN:$SAFE_BASE_PATH" \ + bash "$BITLESSON_SELECT" \ + --task "Initialize tracker" \ + --paths "plans/plan.md" \ + --bitlesson-file "$TEST_DIR/bitlesson.md" 2>/dev/null) || exit_code=$? + +captured_args="$(cat "$CAPTURE_ARGS" 2>/dev/null || true)" +expected_default_model="$(python3 -c 'import json, sys; print(json.load(open(sys.argv[1]))["codex_model"])' "$PROJECT_ROOT/config/default_config.json")" + +if [[ $exit_code -eq 0 ]] \ + && echo "$stdout_out" | grep -q "LESSON_IDS: NONE" \ + && echo "$captured_args" | grep -q -- "-m" \ + && echo "$captured_args" | grep -q -- "$expected_default_model"; then + pass "empty codex_model falls back to DEFAULT_CODEX_MODEL" +else + fail "empty codex_model falls back to DEFAULT_CODEX_MODEL" \ + "exit=0 + -m $expected_default_model" \ + "exit=$exit_code, stdout=$stdout_out, args=$captured_args" +fi + echo "" echo "--- Test 9: Placeholder BitLesson file short-circuits to NONE ---" echo "" diff --git a/tests/test-codex-hook-install.sh b/tests/test-codex-hook-install.sh index da20fb96..4e11002f 100755 --- a/tests/test-codex-hook-install.sh +++ b/tests/test-codex-hook-install.sh @@ -10,6 +10,7 @@ PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "$SCRIPT_DIR/test-helpers.sh" INSTALL_SCRIPT="$PROJECT_ROOT/scripts/install-skill.sh" +HOOK_INSTALL_SCRIPT="$PROJECT_ROOT/scripts/install-codex-hooks.sh" echo "==========================================" echo "Codex Hook Install Tests" @@ -26,6 +27,11 @@ if ! command -v python3 >/dev/null 2>&1; then exit 1 fi +if [[ ! -x "$HOOK_INSTALL_SCRIPT" ]]; then + echo "FATAL: install-codex-hooks.sh not found at $HOOK_INSTALL_SCRIPT" >&2 + exit 1 +fi + setup_test_dir FAKE_BIN="$TEST_DIR/bin" @@ -42,9 +48,10 @@ cat > "$FAKE_BIN/codex" <<'EOF' set -euo pipefail if [[ "${1:-}" == "features" && "${2:-}" == "list" ]]; then - cat <<'LIST' -codex_hooks under development false -LIST + printf 'codex_hooks under development false\n' + for ((i = 0; i < 20000; i++)); do + printf 'feature_%05d stable false\n' "$i" + done exit 0 fi @@ -127,6 +134,12 @@ else fail "Codex install keeps humanize-rlcr entrypoint skill" "skills/humanize-rlcr/SKILL.md exists" "missing" fi +if [[ -f "$CODEX_HOME_DIR/skills/humanize-plan-check/SKILL.md" ]]; then + pass "Codex install keeps humanize-plan-check entrypoint skill" +else + fail "Codex install keeps humanize-plan-check entrypoint skill" "skills/humanize-plan-check/SKILL.md exists" "missing" +fi + if [[ -f "$HOOKS_FILE" ]]; then pass "Codex install writes hooks.json" else @@ -331,4 +344,52 @@ else "$(cat "$TEST_DIR/install-unsupported.log")" fi +DRY_RUN_BIN="$TEST_DIR/bin-dry-run" +DRY_RUN_HOME="$TEST_DIR/codex-home-dry-run" +DRY_RUN_CALL_LOG="$TEST_DIR/dry-run-codex-calls.log" +DRY_RUN_LOG="$TEST_DIR/install-codex-hooks-dry-run.log" +mkdir -p "$DRY_RUN_BIN" "$DRY_RUN_HOME" + +cat > "$DRY_RUN_BIN/codex" <<'EOF' +#!/usr/bin/env bash +set -euo pipefail +printf 'called: %s\n' "$*" >> "${DRY_RUN_CODEX_CALL_LOG:?}" +exit 99 +EOF +chmod +x "$DRY_RUN_BIN/codex" + +set +e +PATH="$DRY_RUN_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + DRY_RUN_CODEX_CALL_LOG="$DRY_RUN_CALL_LOG" \ + "$HOOK_INSTALL_SCRIPT" \ + --codex-config-dir "$DRY_RUN_HOME" \ + --runtime-root "$DRY_RUN_HOME/skills/humanize" \ + --dry-run \ + > "$DRY_RUN_LOG" 2>&1 +DRY_RUN_EXIT=$? +set -e + +if [[ "$DRY_RUN_EXIT" -eq 0 ]]; then + pass "Codex hooks installer dry-run succeeds without probing codex_hooks support" +else + fail "Codex hooks installer dry-run succeeds without probing codex_hooks support" \ + "exit 0" "exit $DRY_RUN_EXIT, output=$(cat "$DRY_RUN_LOG")" +fi + +if grep -q "DRY-RUN merge" "$DRY_RUN_LOG" \ + && grep -q "DRY-RUN enable codex_hooks feature" "$DRY_RUN_LOG"; then + pass "Codex hooks installer dry-run prints planned actions" +else + fail "Codex hooks installer dry-run prints planned actions" \ + "DRY-RUN merge and enable messages" \ + "$(cat "$DRY_RUN_LOG")" +fi + +if [[ ! -f "$DRY_RUN_CALL_LOG" ]]; then + pass "Codex hooks installer dry-run does not invoke codex" +else + fail "Codex hooks installer dry-run does not invoke codex" \ + "no codex invocations" "$(cat "$DRY_RUN_CALL_LOG")" +fi + print_test_summary "Codex Hook Install Tests" diff --git a/tests/test-config-merge.sh b/tests/test-config-merge.sh index df877222..679a2e0c 100755 --- a/tests/test-config-merge.sh +++ b/tests/test-config-merge.sh @@ -63,6 +63,13 @@ else fail "default-only: gen_plan_mode is set from defaults" "non-empty value" "empty" fi +val=$(get_config_value "$merged" "plan_check_recheck") +if [[ "$val" == "false" ]]; then + pass "default-only: plan_check_recheck defaults to false" +else + fail "default-only: plan_check_recheck defaults to false" "false" "$val" +fi + # ======================================== # Test 2: Project config overrides a default key # ======================================== @@ -88,6 +95,15 @@ else fail "project override: non-overridden keys still use defaults" "false" "$val" fi +printf '{"plan_check_recheck": true}' > "$PROJECT_DIR/.humanize/config.json" +merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config2" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) +val=$(get_config_value "$merged" "plan_check_recheck") +if [[ "$val" == "true" ]]; then + pass "project override: plan_check_recheck can enable recheck" +else + fail "project override: plan_check_recheck can enable recheck" "true" "$val" +fi + # ======================================== # Test 3: Project config wins over user config (priority order) # ======================================== @@ -197,4 +213,93 @@ else "$val_g + $val_a + $val_b" fi +# ======================================== +# Test 8: gen_plan_check default is false +# ======================================== + +setup_test_dir +PROJECT_DIR="$TEST_DIR/gen-plan-check-default" +mkdir -p "$PROJECT_DIR" + +merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-cfg-gpc" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) + +val=$(get_config_value "$merged" "gen_plan_check") +if [[ "$val" == "false" ]]; then + pass "gen_plan_check default: defaults to false" +else + fail "gen_plan_check default: defaults to false" "false" "$val" +fi + +# ======================================== +# Test 9: Project config can override gen_plan_check to true +# ======================================== + +setup_test_dir +PROJECT_DIR="$TEST_DIR/gen-plan-check-override-true" +mkdir -p "$PROJECT_DIR/.humanize" +printf '{"gen_plan_check": true}' > "$PROJECT_DIR/.humanize/config.json" + +merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-cfg-gpc2" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) + +val=$(get_config_value "$merged" "gen_plan_check") +if [[ "$val" == "true" ]]; then + pass "gen_plan_check override: project config can set true" +else + fail "gen_plan_check override: project config can set true" "true" "$val" +fi + +# ======================================== +# Test 10: Project config can override gen_plan_check to false +# ======================================== + +setup_test_dir +PROJECT_DIR="$TEST_DIR/gen-plan-check-override-false" +mkdir -p "$PROJECT_DIR/.humanize" +printf '{"gen_plan_check": false}' > "$PROJECT_DIR/.humanize/config.json" + +merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-cfg-gpc3" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) + +val=$(get_config_value "$merged" "gen_plan_check") +if [[ "$val" == "false" ]]; then + pass "gen_plan_check override: project config can set false" +else + fail "gen_plan_check override: project config can set false" "false" "$val" +fi + +# ======================================== +# Test 11: Invalid gen_plan_check value is present in merged config +# ======================================== + +setup_test_dir +PROJECT_DIR="$TEST_DIR/gen-plan-check-invalid" +mkdir -p "$PROJECT_DIR/.humanize" +printf '{"gen_plan_check": "yes"}' > "$PROJECT_DIR/.humanize/config.json" + +merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-cfg-gpc4" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) + +val=$(get_config_value "$merged" "gen_plan_check") +if [[ "$val" == "yes" ]]; then + pass "gen_plan_check invalid: invalid value is preserved in merged config (resolver handles fallback)" +else + fail "gen_plan_check invalid: invalid value should be preserved in merged config" "yes" "$val" +fi + +# ======================================== +# Test 12: Null gen_plan_check is stripped by merged config loader +# ======================================== + +setup_test_dir +PROJECT_DIR="$TEST_DIR/gen-plan-check-null" +mkdir -p "$PROJECT_DIR/.humanize" +printf '{"gen_plan_check": null}' > "$PROJECT_DIR/.humanize/config.json" + +merged=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-cfg-gpc5" load_merged_config "$PROJECT_ROOT" "$PROJECT_DIR" 2>/dev/null) + +val=$(get_config_value "$merged" "gen_plan_check") +if [[ "$val" == "false" ]]; then + pass "gen_plan_check null: null is silently stripped, default false remains" +else + fail "gen_plan_check null: null should be silently stripped, leaving default false" "false" "$val" +fi + print_test_summary "Config Merge Tests" diff --git a/tests/test-gen-idea.sh b/tests/test-gen-idea.sh new file mode 100755 index 00000000..913765a4 --- /dev/null +++ b/tests/test-gen-idea.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash +set -u + +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +VALIDATE_SCRIPT="$PROJECT_ROOT/scripts/validate-gen-idea-io.sh" +GEN_IDEA_CMD="$PROJECT_ROOT/commands/gen-idea.md" + +PASSED=0 +FAILED=0 + +pass() { + echo "PASS: $1" + PASSED=$((PASSED + 1)) +} + +fail() { + echo "FAIL: $1" + echo " expected: $2" + echo " actual: $3" + FAILED=$((FAILED + 1)) +} + +TEST_DIR="$(mktemp -d)" +trap 'rm -rf "$TEST_DIR"' EXIT + +echo "=== Test: gen-idea validation ===" + +if grep -q -- '--raw-arguments "$ARGUMENTS"' "$GEN_IDEA_CMD"; then + pass "gen-idea.md passes raw arguments through parser mode" +else + fail "gen-idea.md passes raw arguments through parser mode" 'contains --raw-arguments "$ARGUMENTS"' "missing" +fi + +OUT_DIR="$TEST_DIR/out dir" +mkdir -p "$OUT_DIR" +RAW_OUTPUT="$OUT_DIR/idea draft.md" +RAW_ARGS="--n 4 --output '$RAW_OUTPUT' \"idea with spaces\"" +RAW_RESULT="$("$VALIDATE_SCRIPT" --raw-arguments "$RAW_ARGS" 2>&1)" +RAW_EXIT=$? + +if [[ "$RAW_EXIT" -eq 0 ]] && \ + echo "$RAW_RESULT" | grep -q '^N: 4$' && \ + echo "$RAW_RESULT" | grep -qF "OUTPUT_FILE: $RAW_OUTPUT" && \ + echo "$RAW_RESULT" | grep -q '^idea with spaces$'; then + pass "validate-gen-idea-io: raw argument mode preserves flags and quoted inline idea" +else + fail "validate-gen-idea-io: raw argument mode preserves flags and quoted inline idea" "exit 0, N 4, output path, idea body" "exit $RAW_EXIT; output: $RAW_RESULT" +fi + +UNQUOTED_OUTPUT="$OUT_DIR/unquoted idea.md" +UNQUOTED_RAW_ARGS="--n 4 --output '$UNQUOTED_OUTPUT' add undo/redo to editor" +UNQUOTED_RESULT="$("$VALIDATE_SCRIPT" --raw-arguments "$UNQUOTED_RAW_ARGS" 2>&1)" +UNQUOTED_EXIT=$? +if [[ "$UNQUOTED_EXIT" -eq 0 ]] && \ + echo "$UNQUOTED_RESULT" | grep -q '^N: 4$' && \ + echo "$UNQUOTED_RESULT" | grep -qF "OUTPUT_FILE: $UNQUOTED_OUTPUT" && \ + echo "$UNQUOTED_RESULT" | grep -q '^add undo/redo to editor$'; then + pass "validate-gen-idea-io: raw argument mode preserves unquoted multi-word inline idea" +else + fail "validate-gen-idea-io: raw argument mode preserves unquoted multi-word inline idea" "exit 0, N 4, output path, idea body" "exit $UNQUOTED_EXIT; output: $UNQUOTED_RESULT" +fi + +APOSTROPHE_OUTPUT="$OUT_DIR/apostrophe idea.md" +APOSTROPHE_RAW_ARGS="--n 4 --output '$APOSTROPHE_OUTPUT' don't use global state" +APOSTROPHE_RESULT="$("$VALIDATE_SCRIPT" --raw-arguments "$APOSTROPHE_RAW_ARGS" 2>&1)" +APOSTROPHE_EXIT=$? +if [[ "$APOSTROPHE_EXIT" -eq 0 ]] && \ + echo "$APOSTROPHE_RESULT" | grep -q '^N: 4$' && \ + echo "$APOSTROPHE_RESULT" | grep -qF "OUTPUT_FILE: $APOSTROPHE_OUTPUT" && \ + echo "$APOSTROPHE_RESULT" | grep -q "^don't use global state$"; then + pass "validate-gen-idea-io: raw argument mode treats apostrophes as idea text" +else + fail "validate-gen-idea-io: raw argument mode treats apostrophes as idea text" "exit 0, N 4, output path, idea body" "exit $APOSTROPHE_EXIT; output: $APOSTROPHE_RESULT" +fi + +POSTFIX_OUTPUT="$OUT_DIR/postfix idea.md" +POSTFIX_RAW_ARGS="add undo/redo to editor --n 5 --output '$POSTFIX_OUTPUT'" +POSTFIX_RESULT="$("$VALIDATE_SCRIPT" --raw-arguments "$POSTFIX_RAW_ARGS" 2>&1)" +POSTFIX_EXIT=$? +if [[ "$POSTFIX_EXIT" -eq 0 ]] && \ + echo "$POSTFIX_RESULT" | grep -q '^N: 5$' && \ + echo "$POSTFIX_RESULT" | grep -qF "OUTPUT_FILE: $POSTFIX_OUTPUT" && \ + echo "$POSTFIX_RESULT" | grep -q '^add undo/redo to editor$'; then + pass "validate-gen-idea-io: raw argument mode preserves idea-first option parsing" +else + fail "validate-gen-idea-io: raw argument mode preserves idea-first option parsing" "exit 0, N 5, output path, idea body" "exit $POSTFIX_EXIT; output: $POSTFIX_RESULT" +fi + +DIRECT_OUTPUT="$TEST_DIR/direct.md" +DIRECT_RESULT="$("$VALIDATE_SCRIPT" "direct idea with spaces" --n 3 --output "$DIRECT_OUTPUT" 2>&1)" +DIRECT_EXIT=$? +if [[ "$DIRECT_EXIT" -eq 0 ]] && \ + echo "$DIRECT_RESULT" | grep -q '^N: 3$' && \ + echo "$DIRECT_RESULT" | grep -q '^direct idea with spaces$'; then + pass "validate-gen-idea-io: direct argv mode still accepts separate arguments" +else + fail "validate-gen-idea-io: direct argv mode still accepts separate arguments" "exit 0, N 3, idea body" "exit $DIRECT_EXIT; output: $DIRECT_RESULT" +fi + +UNMATCHED_RESULT="$("$VALIDATE_SCRIPT" --raw-arguments '--n 4 "unterminated idea' 2>&1)" +UNMATCHED_EXIT=$? +if [[ "$UNMATCHED_EXIT" -eq 0 ]] && \ + echo "$UNMATCHED_RESULT" | grep -q '^N: 4$' && \ + echo "$UNMATCHED_RESULT" | grep -q '^"unterminated idea$'; then + pass "validate-gen-idea-io: raw argument mode treats unmatched idea quote as text" +else + fail "validate-gen-idea-io: raw argument mode treats unmatched idea quote as text" "exit 0, N 4, idea body with leading quote" "exit $UNMATCHED_EXIT; output: $UNMATCHED_RESULT" +fi + +echo "" +echo "=== gen-idea Test Summary ===" +echo "Passed: $PASSED" +echo "Failed: $FAILED" + +if [[ "$FAILED" -ne 0 ]]; then + exit 1 +fi + +exit 0 diff --git a/tests/test-gen-plan-check.sh b/tests/test-gen-plan-check.sh new file mode 100755 index 00000000..9904eda4 --- /dev/null +++ b/tests/test-gen-plan-check.sh @@ -0,0 +1,2520 @@ +#!/usr/bin/env bash +# +# Integration tests for gen-plan --check mode +# +# Covers: draft-check pass/blocker, plan-check pass/blocker, repair, +# recheck, auto-start gating, tmp cleanup, and path-leak detection. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "$SCRIPT_DIR/test-helpers.sh" + +CONFIG_LOADER="$PROJECT_ROOT/scripts/lib/config-loader.sh" +CHECK_MODE_LIB="$PROJECT_ROOT/scripts/lib/gen-plan-check-mode.sh" +PLAN_CHECK_COMMON="$PROJECT_ROOT/scripts/lib/plan-check-common.sh" +VALIDATE_SCRIPT="$PROJECT_ROOT/scripts/validate-gen-plan-io.sh" + +GEN_PLAN_CMD="$PROJECT_ROOT/commands/gen-plan.md" + +TESTS_PASSED=0 +TESTS_FAILED=0 + +echo "========================================" +echo "gen-plan Check Mode Integration Tests" +echo "========================================" +echo "" + +# ======================================== +# Test: Resolver priority table +# ======================================== + +echo "--- Resolver priority table ---" + +source "$CHECK_MODE_LIB" + +# Default: false +_gen_plan_resolve_check_mode "false" "false" "" +if [[ "$EFFECTIVE_CHECK_MODE" == "false" ]]; then + pass "resolver: default is false" +else + fail "resolver: default is false" "false" "$EFFECTIVE_CHECK_MODE" +fi + +# --check alone: true +_gen_plan_resolve_check_mode "true" "false" "" +if [[ "$EFFECTIVE_CHECK_MODE" == "true" ]]; then + pass "resolver: --check alone enables" +else + fail "resolver: --check alone enables" "true" "$EFFECTIVE_CHECK_MODE" +fi + +# Config true: true +_gen_plan_resolve_check_mode "false" "false" "true" +if [[ "$EFFECTIVE_CHECK_MODE" == "true" ]]; then + pass "resolver: config true enables" +else + fail "resolver: config true enables" "true" "$EFFECTIVE_CHECK_MODE" +fi + +# --check + config true: true (idempotent) +_gen_plan_resolve_check_mode "true" "false" "true" +if [[ "$EFFECTIVE_CHECK_MODE" == "true" ]]; then + pass "resolver: --check + config true is true" +else + fail "resolver: --check + config true is true" "true" "$EFFECTIVE_CHECK_MODE" +fi + +# --no-check overrides --check: false +_gen_plan_resolve_check_mode "true" "true" "true" +if [[ "$EFFECTIVE_CHECK_MODE" == "false" ]]; then + pass "resolver: --no-check overrides --check" +else + fail "resolver: --no-check overrides --check" "false" "$EFFECTIVE_CHECK_MODE" +fi + +# --no-check overrides config: false +_gen_plan_resolve_check_mode "false" "true" "true" +if [[ "$EFFECTIVE_CHECK_MODE" == "false" ]]; then + pass "resolver: --no-check overrides config" +else + fail "resolver: --no-check overrides config" "false" "$EFFECTIVE_CHECK_MODE" +fi + +# Invalid config value warns and falls back to false +WARN_OUTPUT=$("$CHECK_MODE_LIB" &1 || true) +_gen_plan_resolve_check_mode "false" "false" "yes" 2>/tmp/gen-plan-check-warn.log +if [[ "$EFFECTIVE_CHECK_MODE" == "false" ]]; then + pass "resolver: invalid config falls back to false" +else + fail "resolver: invalid config falls back to false" "false" "$EFFECTIVE_CHECK_MODE" +fi +if grep -q "Warning: unsupported gen_plan_check" /tmp/gen-plan-check-warn.log; then + pass "resolver: invalid config emits warning" +else + fail "resolver: invalid config emits warning" "warning emitted" "no warning" +fi +rm -f /tmp/gen-plan-check-warn.log + +# --check wins over invalid config (no warning should affect outcome) +_gen_plan_resolve_check_mode "true" "false" "yes" 2>/dev/null +if [[ "$EFFECTIVE_CHECK_MODE" == "true" ]]; then + pass "resolver: --check wins over invalid config" +else + fail "resolver: --check wins over invalid config" "true" "$EFFECTIVE_CHECK_MODE" +fi + +# Null config value is treated as absent (default false) +_gen_plan_resolve_check_mode "false" "false" "" +if [[ "$EFFECTIVE_CHECK_MODE" == "false" ]]; then + pass "resolver: empty config is default false" +else + fail "resolver: empty config is default false" "false" "$EFFECTIVE_CHECK_MODE" +fi + +# ======================================== +# Test: Default config contains gen_plan_check +# ======================================== + +echo "" +echo "--- Default config ---" + +DEFAULT_CONFIG="$PROJECT_ROOT/config/default_config.json" +if [[ -f "$DEFAULT_CONFIG" ]]; then + val=$(jq -r '.gen_plan_check' "$DEFAULT_CONFIG") + if [[ "$val" == "false" ]]; then + pass "default config: gen_plan_check is false" + else + fail "default config: gen_plan_check is false" "false" "$val" + fi +else + fail "default config: file exists" "exists" "missing" +fi + +# ======================================== +# Test: gen-plan.md mentions check-mode phases +# ======================================== + +echo "" +echo "--- gen-plan.md check-mode phases ---" + +if grep -q "Phase 2.5: Check-Draft" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: Check-Draft phase exists" +else + fail "gen-plan.md: Check-Draft phase exists" "present" "missing" +fi + +if grep -q "Step 1.5: Check-Plan and Repair" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: Check-Plan and Repair step exists" +else + fail "gen-plan.md: Check-Plan and Repair step exists" "present" "missing" +fi + +if grep -q "EFFECTIVE_CHECK_MODE" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references EFFECTIVE_CHECK_MODE" +else + fail "gen-plan.md: references EFFECTIVE_CHECK_MODE" "present" "missing" +fi + +if grep -q "draft-consistency-checker" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references draft-consistency-checker" +else + fail "gen-plan.md: references draft-consistency-checker" "present" "missing" +fi + +if grep -q "draft-ambiguity-checker" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references draft-ambiguity-checker" +else + fail "gen-plan.md: references draft-ambiguity-checker" "present" "missing" +fi + +if grep -q "draft-plan-drift-checker" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references draft-plan-drift-checker" +else + fail "gen-plan.md: references draft-plan-drift-checker" "present" "missing" +fi + +if grep -q "Run plan-consistency-checker on the plan body" "$GEN_PLAN_CMD" && \ + grep -q "Run plan-ambiguity-checker on the plan body" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: runs primary semantic checkers before drift lookup" +else + fail "gen-plan.md: runs primary semantic checkers before drift lookup" "present" "missing" +fi + +if grep -q "Run \`draft-plan-drift-checker\` only if \`PRIMARY_PLAN_FINDINGS\` is non-empty" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: drift checker is conditional on primary findings" +else + fail "gen-plan.md: drift checker is conditional on primary findings" "present" "missing" +fi + +if grep -q "\`PRIMARY_PLAN_FINDINGS\` as the existing plan findings to inspect" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: passes existing plan findings to drift checker" +else + fail "gen-plan.md: passes existing plan findings to drift checker" "present" "missing" +fi + +if grep -q "secondary source-recovery pass" "$GEN_PLAN_CMD" && \ + grep -q "must not run as an independent whole-plan draft completeness audit" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: documents drift as secondary source recovery" +else + fail "gen-plan.md: documents drift as secondary source recovery" "present" "missing" +fi + +if grep -q "Active Source Fidelity" "$GEN_PLAN_CMD" && \ + grep -q "the clarification takes precedence for that topic" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: documents active source fidelity and clarification precedence" +else + fail "gen-plan.md: documents active source fidelity and clarification precedence" "present" "missing" +fi + +if grep -q "MUST incorporate ALL information" "$GEN_PLAN_CMD"; then + fail "gen-plan.md: does not require all draft information verbatim" "absent" "MUST incorporate ALL information" +else + pass "gen-plan.md: does not require all draft information verbatim" +fi + +if grep -q "superset of the draft" "$GEN_PLAN_CMD"; then + fail "gen-plan.md: does not require plan to be a draft superset" "absent" "superset of the draft" +else + pass "gen-plan.md: does not require plan to be a draft superset" +fi + +if grep -q "NEVER discard or override any original draft content" "$GEN_PLAN_CMD"; then + fail "gen-plan.md: does not forbid clarification supersession" "absent" "NEVER discard or override any original draft content" +else + pass "gen-plan.md: does not forbid clarification supersession" +fi + +if grep -q "plan_check_backup_plan" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references plan_check_backup_plan" +else + fail "gen-plan.md: references plan_check_backup_plan" "present" "missing" +fi + +if grep -q "plan_check_atomic_write" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references plan_check_atomic_write" +else + fail "gen-plan.md: references plan_check_atomic_write" "present" "missing" +fi + +if grep -q "unresolved-draft-blocker" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: auto-start skip message uses unresolved-draft-blocker" +else + fail "gen-plan.md: auto-start skip message uses unresolved-draft-blocker" "present" "missing" +fi + +if grep -q "unresolved-plan-check-blocker" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: auto-start skip message uses unresolved-plan-check-blocker" +else + fail "gen-plan.md: auto-start skip message uses unresolved-plan-check-blocker" "present" "missing" +fi + +if grep -q "recheck-failure" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: auto-start skip message uses recheck-failure" +else + fail "gen-plan.md: auto-start skip message uses recheck-failure" "present" "missing" +fi + +if grep -q ".humanize/gen-plan-check" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references artifact directory" +else + fail "gen-plan.md: references artifact directory" "present" "missing" +fi + +if grep -q "tmp/" "$GEN_PLAN_CMD"; then + pass "gen-plan.md: references tmp/ cleanup" +else + fail "gen-plan.md: references tmp/ cleanup" "present" "missing" +fi + +# ======================================== +# Test: Agent files exist and have correct metadata +# ======================================== + +echo "" +echo "--- Agent files ---" + +DRAFT_CONSISTENCY="$PROJECT_ROOT/agents/draft-consistency-checker.md" +DRAFT_AMBIGUITY="$PROJECT_ROOT/agents/draft-ambiguity-checker.md" +DRAFT_DRIFT="$PROJECT_ROOT/agents/draft-plan-drift-checker.md" + +for agent_file in "$DRAFT_CONSISTENCY" "$DRAFT_AMBIGUITY" "$DRAFT_DRIFT"; do + basename_file=$(basename "$agent_file") + if [[ -f "$agent_file" ]]; then + pass "agent: $basename_file exists" + else + fail "agent: $basename_file exists" "exists" "missing" + continue + fi + + if grep -q '^---$' "$agent_file"; then + pass "agent: $basename_file has YAML frontmatter" + else + fail "agent: $basename_file has YAML frontmatter" "present" "missing" + fi + + if grep -q "model:" "$agent_file"; then + pass "agent: $basename_file has model field" + else + fail "agent: $basename_file has model field" "present" "missing" + fi +done + +# Check ID prefixes +if grep -q '"id": "DC-' "$DRAFT_CONSISTENCY"; then + pass "draft-consistency-checker: uses DC- prefix" +else + fail "draft-consistency-checker: uses DC- prefix" "present" "missing" +fi + +if grep -q '"id": "DA-' "$DRAFT_AMBIGUITY"; then + pass "draft-ambiguity-checker: uses DA- prefix" +else + fail "draft-ambiguity-checker: uses DA- prefix" "present" "missing" +fi + +if grep -q '"id": "DD-' "$DRAFT_DRIFT"; then + pass "draft-plan-drift-checker: uses DD- prefix" +else + fail "draft-plan-drift-checker: uses DD- prefix" "present" "missing" +fi + +if grep -q '"category": "draft-plan-drift"' "$DRAFT_DRIFT"; then + pass "draft-plan-drift-checker: category is draft-plan-drift" +else + fail "draft-plan-drift-checker: category is draft-plan-drift" "present" "missing" +fi + +if grep -q "Inspect only the specific supplied contradiction or ambiguity findings" "$DRAFT_DRIFT"; then + pass "draft-plan-drift-checker: only inspects supplied findings" +else + fail "draft-plan-drift-checker: only inspects supplied findings" "present" "missing" +fi + +if grep -q "Do not scan the whole plan for omitted draft requirements" "$DRAFT_DRIFT"; then + pass "draft-plan-drift-checker: prohibits whole-plan completeness review" +else + fail "draft-plan-drift-checker: prohibits whole-plan completeness review" "present" "missing" +fi + +if grep -q "Plan-vs-draft differences that are not attached to a supplied contradiction or ambiguity finding" "$DRAFT_DRIFT"; then + pass "draft-plan-drift-checker: unrelated draft differences are out of scope" +else + fail "draft-plan-drift-checker: unrelated draft differences are out of scope" "present" "missing" +fi + +if grep -q '"related_finding_id": "C-001"' "$DRAFT_DRIFT"; then + pass "draft-plan-drift-checker: includes related_finding_id field" +else + fail "draft-plan-drift-checker: includes related_finding_id field" "present" "missing" +fi + +# ======================================== +# Test: plan-check.sh whitelist extensions +# ======================================== + +echo "" +echo "--- plan-check.sh whitelist ---" + +PLAN_CHECK_SCRIPT="$PROJECT_ROOT/scripts/plan-check.sh" + +if grep -q '"draft-plan-drift"' "$PLAN_CHECK_SCRIPT"; then + pass "plan-check.sh: valid_categories includes draft-plan-drift" +else + fail "plan-check.sh: valid_categories includes draft-plan-drift" "present" "missing" +fi + +if grep -q '"draft-consistency-checker"' "$PLAN_CHECK_SCRIPT"; then + pass "plan-check.sh: valid_checkers includes draft-consistency-checker" +else + fail "plan-check.sh: valid_checkers includes draft-consistency-checker" "present" "missing" +fi + +if grep -q '"draft-ambiguity-checker"' "$PLAN_CHECK_SCRIPT"; then + pass "plan-check.sh: valid_checkers includes draft-ambiguity-checker" +else + fail "plan-check.sh: valid_checkers includes draft-ambiguity-checker" "present" "missing" +fi + +if grep -q '"draft-plan-drift-checker"' "$PLAN_CHECK_SCRIPT"; then + pass "plan-check.sh: valid_checkers includes draft-plan-drift-checker" +else + fail "plan-check.sh: valid_checkers includes draft-plan-drift-checker" "present" "missing" +fi + +# ======================================== +# Test: plan-check-common.sh valid_resolutions extension +# ======================================== + +echo "" +echo "--- plan-check-common.sh extensions ---" + +if grep -q '("draft-plan-drift", "drift_resolution")' "$PLAN_CHECK_COMMON"; then + pass "plan-check-common.sh: valid_resolutions includes draft-plan-drift" +else + fail "plan-check-common.sh: valid_resolutions includes draft-plan-drift" "present" "missing" +fi + +# ======================================== +# Test: _plan_check_extract_appendix helper +# ======================================== + +echo "" +echo "--- _plan_check_extract_appendix helper ---" + +# Need to source with set +e because plan-check-common.sh has set -e +set +e +source "$PLAN_CHECK_COMMON" +set -e + +# Create a plan with appendix +TMP_PLAN=$(mktemp) +cat > "$TMP_PLAN" <<'EOF' +# Plan Title + +## Goal Description +Some goal. + +--- Original Design Draft Start --- + +Original draft line 1. +Original draft line 2. + +--- Original Design Draft End --- +EOF + +APPENDIX=$(_plan_check_extract_appendix "$TMP_PLAN") +if echo "$APPENDIX" | grep -q "Original draft line 1"; then + pass "extract_appendix: returns inner appendix content" +else + fail "extract_appendix: returns inner appendix content" "contains draft text" "$APPENDIX" +fi + +# Create a plan without appendix +TMP_PLAN_NO_APPENDIX=$(mktemp) +cat > "$TMP_PLAN_NO_APPENDIX" <<'EOF' +# Plan Title + +## Goal Description +No appendix. +EOF + +APPENDIX_EMPTY=$(_plan_check_extract_appendix "$TMP_PLAN_NO_APPENDIX" 2>/dev/null) +if [[ -z "$APPENDIX_EMPTY" ]]; then + pass "extract_appendix: returns empty for missing markers" +else + fail "extract_appendix: returns empty for missing markers" "empty" "$APPENDIX_EMPTY" +fi + +rm -f "$TMP_PLAN" "$TMP_PLAN_NO_APPENDIX" + +# ======================================== +# Test: draft-plan-drift final schema validation +# ======================================== + +echo "" +echo "--- draft-plan-drift final schema validation ---" + +DRIFT_SCHEMA_DIR=$(mktemp -d) +DRIFT_PLAN="$DRIFT_SCHEMA_DIR/plan.md" +cat > "$DRIFT_PLAN" <<'EOF' +# Plan + +## Goal Description +Validate drift findings. +EOF + +cat > "$DRIFT_SCHEMA_DIR/valid-drift.json" <<'EOF' +[ + {"id":"C-001","severity":"warning","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"conflicting key names","explanation":"two key names appear","suggested_resolution":"use one key","affected_acs":[],"affected_tasks":[]}, + {"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"clarification says use gen_plan_check","explanation":"source material resolves the primary contradiction","suggested_resolution":"use gen_plan_check","related_finding_id":"C-001","affected_acs":[],"affected_tasks":[]} +] +EOF + +mkdir -p "$DRIFT_SCHEMA_DIR/report-valid" +bash "$PLAN_CHECK_SCRIPT" \ + --plan "$DRIFT_PLAN" \ + --report-dir "$DRIFT_SCHEMA_DIR/report-valid" \ + --findings-file "$DRIFT_SCHEMA_DIR/valid-drift.json" > /dev/null 2>&1 + +valid_drift_result=$(python3 -c ' +import json, sys +d=json.load(open(sys.argv[1])) +cats=[f.get("category") for f in d["findings"]] +ids=[f.get("id") for f in d["findings"]] +print("ok" if "draft-plan-drift" in cats and "runtime-error" not in cats and "DD-001" in ids else "bad") +' "$DRIFT_SCHEMA_DIR/report-valid/findings.json") +if [[ "$valid_drift_result" == "ok" ]]; then + pass "plan-check.sh: valid drift finding with related_finding_id is accepted" +else + fail "plan-check.sh: valid drift finding with related_finding_id is accepted" "ok" "$valid_drift_result" +fi + +if grep -q "Related Finding.*C-001" "$DRIFT_SCHEMA_DIR/report-valid/report.md"; then + pass "plan-check.sh: report includes related finding for drift" +else + fail "plan-check.sh: report includes related finding for drift" "Related Finding C-001" "missing" +fi + +cat > "$DRIFT_SCHEMA_DIR/missing-related.json" <<'EOF' +[ + {"id":"C-001","severity":"warning","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"conflicting key names","explanation":"two key names appear","suggested_resolution":"use one key","affected_acs":[],"affected_tasks":[]}, + {"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"clarification says use gen_plan_check","explanation":"source material resolves the primary contradiction","suggested_resolution":"use gen_plan_check","affected_acs":[],"affected_tasks":[]} +] +EOF + +mkdir -p "$DRIFT_SCHEMA_DIR/report-missing" +bash "$PLAN_CHECK_SCRIPT" \ + --plan "$DRIFT_PLAN" \ + --report-dir "$DRIFT_SCHEMA_DIR/report-missing" \ + --findings-file "$DRIFT_SCHEMA_DIR/missing-related.json" > /dev/null 2>&1 +missing_related_result=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); cats=[f.get("category") for f in d["findings"]]; ids=[f.get("id") for f in d["findings"]]; print("ok" if "runtime-error" in cats and "C-001" in ids else "bad")' "$DRIFT_SCHEMA_DIR/report-missing/findings.json") +if [[ "$missing_related_result" == "ok" ]]; then + pass "plan-check.sh: missing drift related_finding_id becomes runtime-error" +else + fail "plan-check.sh: missing drift related_finding_id becomes runtime-error" "runtime-error plus preserved C-001" "$missing_related_result" +fi + +cat > "$DRIFT_SCHEMA_DIR/unknown-related.json" <<'EOF' +[ + {"id":"C-001","severity":"warning","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"conflicting key names","explanation":"two key names appear","suggested_resolution":"use one key","affected_acs":[],"affected_tasks":[]}, + {"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"clarification says use gen_plan_check","explanation":"source material resolves the primary contradiction","suggested_resolution":"use gen_plan_check","related_finding_id":"C-999","affected_acs":[],"affected_tasks":[]} +] +EOF + +mkdir -p "$DRIFT_SCHEMA_DIR/report-unknown" +bash "$PLAN_CHECK_SCRIPT" \ + --plan "$DRIFT_PLAN" \ + --report-dir "$DRIFT_SCHEMA_DIR/report-unknown" \ + --findings-file "$DRIFT_SCHEMA_DIR/unknown-related.json" > /dev/null 2>&1 +unknown_related_result=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); cats=[f.get("category") for f in d["findings"]]; ids=[f.get("id") for f in d["findings"]]; print("ok" if "runtime-error" in cats and "C-001" in ids else "bad")' "$DRIFT_SCHEMA_DIR/report-unknown/findings.json") +if [[ "$unknown_related_result" == "ok" ]]; then + pass "plan-check.sh: unknown drift related_finding_id becomes runtime-error" +else + fail "plan-check.sh: unknown drift related_finding_id becomes runtime-error" "runtime-error plus preserved C-001" "$unknown_related_result" +fi + +cat > "$DRIFT_SCHEMA_DIR/wrong-category-related.json" <<'EOF' +[ + {"id":"S-001","severity":"warning","category":"schema","source_checker":"plan-schema-validator","location":{"section":"Goal","fragment":"config key"},"evidence":"schema issue","explanation":"not a primary semantic finding","suggested_resolution":"fix schema","affected_acs":[],"affected_tasks":[]}, + {"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"clarification says use gen_plan_check","explanation":"source material resolves the primary contradiction","suggested_resolution":"use gen_plan_check","related_finding_id":"S-001","affected_acs":[],"affected_tasks":[]} +] +EOF + +mkdir -p "$DRIFT_SCHEMA_DIR/report-wrong-category" +bash "$PLAN_CHECK_SCRIPT" \ + --plan "$DRIFT_PLAN" \ + --report-dir "$DRIFT_SCHEMA_DIR/report-wrong-category" \ + --findings-file "$DRIFT_SCHEMA_DIR/wrong-category-related.json" > /dev/null 2>&1 +wrong_category_result=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); cats=[f.get("category") for f in d["findings"]]; ids=[f.get("id") for f in d["findings"]]; print("ok" if "runtime-error" in cats and "S-001" in ids else "bad")' "$DRIFT_SCHEMA_DIR/report-wrong-category/findings.json") +if [[ "$wrong_category_result" == "ok" ]]; then + pass "plan-check.sh: wrong-category drift related_finding_id becomes runtime-error" +else + fail "plan-check.sh: wrong-category drift related_finding_id becomes runtime-error" "runtime-error plus preserved S-001" "$wrong_category_result" +fi + +resolved_drift_findings='[{"id":"C-001","severity":"warning","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"conflicting key names","explanation":"two key names appear","suggested_resolution":"use one key","affected_acs":[],"affected_tasks":[]},{"id":"DD-001","severity":"blocker","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"clarification says use gen_plan_check","explanation":"source material resolves the primary contradiction","suggested_resolution":"use gen_plan_check","related_finding_id":"C-001","affected_acs":[],"affected_tasks":[]}]' +resolved_drift_resolutions='[{"finding_id":"DD-001","resolution_type":"drift_resolution","resolution":"Use gen_plan_check from clarification"}]' +resolved_drift_json="$(plan_check_build_resolved_json "$DRIFT_PLAN" "abc" "test-model" "{}" 0 "$resolved_drift_findings" "$resolved_drift_resolutions")" +resolved_drift_status="$(echo "$resolved_drift_json" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +resolved_drift_unresolved="$(echo "$resolved_drift_json" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +resolved_drift_state="$(echo "$resolved_drift_json" | python3 -c 'import json,sys; d=json.load(sys.stdin); print([f for f in d["findings"] if f["id"]=="DD-001"][0]["resolution_state"])')" +if [[ "$resolved_drift_status" == "pass" && "$resolved_drift_unresolved" == "0" && "$resolved_drift_state" == "resolved" ]]; then + pass "plan-check-common.sh: drift_resolution resolves draft-plan-drift blocker" +else + fail "plan-check-common.sh: drift_resolution resolves draft-plan-drift blocker" "pass/0/resolved" "$resolved_drift_status/$resolved_drift_unresolved/$resolved_drift_state" +fi + +rm -rf "$DRIFT_SCHEMA_DIR" + +# ======================================== +# Test: docs/usage.md mentions check mode +# ======================================== + +echo "" +echo "--- docs/usage.md check mode docs ---" + +USAGE_MD="$PROJECT_ROOT/docs/usage.md" + +if grep -q '\--check' "$USAGE_MD"; then + pass "usage.md: mentions --check" +else + fail "usage.md: mentions --check" "present" "missing" +fi + +if grep -q '\--no-check' "$USAGE_MD"; then + pass "usage.md: mentions --no-check" +else + fail "usage.md: mentions --no-check" "present" "missing" +fi + +if grep -q 'gen_plan_check' "$USAGE_MD"; then + pass "usage.md: mentions gen_plan_check" +else + fail "usage.md: mentions gen_plan_check" "present" "missing" +fi + +if grep -q 'check-draft' "$USAGE_MD"; then + pass "usage.md: mentions check-draft" +else + fail "usage.md: mentions check-draft" "present" "missing" +fi + +if grep -q 'check-plan' "$USAGE_MD"; then + pass "usage.md: mentions check-plan" +else + fail "usage.md: mentions check-plan" "present" "missing" +fi + +if grep -q '.humanize/gen-plan-check' "$USAGE_MD"; then + pass "usage.md: mentions artifact directory" +else + fail "usage.md: mentions artifact directory" "present" "missing" +fi + +# ======================================== +# Test: Version bump +# ======================================== + +echo "" +echo "--- Version bump ---" + +PLUGIN_JSON="$PROJECT_ROOT/.claude-plugin/plugin.json" +MARKETPLACE_JSON="$PROJECT_ROOT/.claude-plugin/marketplace.json" +README_MD="$PROJECT_ROOT/README.md" + +PLUGIN_VER=$(grep -o '"version":[[:space:]]*"[^"]*"' "$PLUGIN_JSON" | grep -o '"[^"]*"$' | tr -d '"') +MARKETPLACE_VER=$(grep -o '"version":[[:space:]]*"[^"]*"' "$MARKETPLACE_JSON" | grep -o '"[^"]*"$' | tr -d '"') +README_VER=$(grep -o 'Current Version:[[:space:]]*[0-9.]*' "$README_MD" | grep -o '[0-9.]*$') + +if [[ "$PLUGIN_VER" == "1.17.0" ]]; then + pass "version: plugin.json is 1.17.0" +else + fail "version: plugin.json is 1.17.0" "1.17.0" "$PLUGIN_VER" +fi + +if [[ "$MARKETPLACE_VER" == "1.17.0" ]]; then + pass "version: marketplace.json is 1.17.0" +else + fail "version: marketplace.json is 1.17.0" "1.17.0" "$MARKETPLACE_VER" +fi + +if [[ "$README_VER" == "1.17.0" ]]; then + pass "version: README.md is 1.17.0" +else + fail "version: README.md is 1.17.0" "1.17.0" "$README_VER" +fi + +if [[ "$PLUGIN_VER" == "$MARKETPLACE_VER" && "$PLUGIN_VER" == "$README_VER" ]]; then + pass "version: all three files are in sync" +else + fail "version: all three files are in sync" "sync" "plugin=$PLUGIN_VER marketplace=$MARKETPLACE_VER readme=$README_VER" +fi + +# ======================================== +# Mocked Harness Setup +# ======================================== + +echo "" +echo "--- Mocked flow tests ---" + +set +e +source "$PLAN_CHECK_COMMON" 2>/dev/null +set -e + +# State directory for the current scenario +FLOW_STATE_DIR="" + +# Reset state for a new scenario +_flow_reset_state() { + FLOW_STATE_DIR="$1" + mkdir -p "$FLOW_STATE_DIR" + export FLOW_STATE_DIR + cat > "$FLOW_STATE_DIR/state.json" <<'EOF' +{"checker_calls":[],"auq_calls":[],"backup_paths":[],"atomic_writes":[],"resolution_records":[],"skip_message":"","cleanup_done":false,"artifacts":[],"repair_sources":[],"recheck_ran":false} +EOF + cat > "$FLOW_STATE_DIR/write-log.json" <<'EOF' +[] +EOF +} + +# Setup PATH wrappers for cp, mv, mktemp to observe all writes +_flow_setup_path_wrappers() { + local wrap_dir="$FLOW_STATE_DIR/wrappers" + mkdir -p "$wrap_dir" + local real_cp real_mv real_mktemp + real_cp=$(command -v cp) + real_mv=$(command -v mv) + real_mktemp=$(command -v mktemp) + + # cp wrapper: log destination then delegate + cat > "$wrap_dir/cp" < "$wrap_dir/mv" < "$wrap_dir/mktemp" < "$path" +} + +# Append file through flow instrumentation +_flow_append_file() { + local path="$1" + local content="$2" + _flow_log_write "append" "$path" + printf '%s' "$content" >> "$path" +} + +# Copy file through flow instrumentation +_flow_copy_file() { + local src="$1" dst="$2" + _flow_log_write "copy" "$dst" + cp "$src" "$dst" +} + +# Mock ask-codex.sh that logs invocations and returns predefined findings +mock_ask_codex() { + local checker_name="$1" + local pass_num="${2:-1}" + echo "invoked: $checker_name pass=$pass_num" >> "$FLOW_STATE_DIR/codex.log" + python3 -c ' +import json,sys +path=sys.argv[1] +checker=sys.argv[2] +pass_num=sys.argv[3] +with open(path) as f: state=json.load(f) +state["checker_calls"].append({"checker":checker,"pass":int(pass_num)}) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$checker_name" "$pass_num" + if [[ "$checker_name" == "draft-consistency-checker" ]]; then + printf '%s' "$MOCK_DRAFT_CONSISTENCY_FINDINGS" + elif [[ "$checker_name" == "draft-ambiguity-checker" ]]; then + printf '%s' "$MOCK_DRAFT_AMBIGUITY_FINDINGS" + elif [[ "$checker_name" == "plan-consistency-checker" ]]; then + printf '%s' "$MOCK_PLAN_CONSISTENCY_FINDINGS" + elif [[ "$checker_name" == "plan-ambiguity-checker" ]]; then + printf '%s' "$MOCK_PLAN_AMBIGUITY_FINDINGS" + elif [[ "$checker_name" == "draft-plan-drift-checker" ]]; then + printf '%s' "$MOCK_PLAN_DRIFT_FINDINGS" + else + printf '[]' + fi +} + +# Mock AskUserQuestion that logs questions, options, and returns predefined response +mock_ask_user_question() { + local question="$1" + shift + local options=("$@") + local options_json + options_json=$(python3 -c 'import json,sys; print(json.dumps(sys.argv[1:]))' "${options[@]}") + python3 -c ' +import json,sys +path=sys.argv[1] +question=sys.argv[2] +options=json.loads(sys.argv[3]) +with open(path) as f: state=json.load(f) +state["auq_calls"].append({"question":question,"options":options}) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$question" "$options_json" + printf '%s' "$MOCK_AUQ_RESPONSE" +} + +# Helper: merge two JSON finding arrays +_merge_findings() { + python3 -c 'import json,sys; a=json.loads(sys.argv[1]); b=json.loads(sys.argv[2]); print(json.dumps(a+b))' "$1" "$2" +} + +# Helper: count blockers in a findings array +_count_blockers() { + python3 -c 'import json,sys; print(sum(1 for f in json.loads(sys.argv[1]) if f.get("severity")=="blocker"))' "$1" +} + +# Helper: map a finding category to the resolution_type accepted by final schema +_resolution_type_for_finding() { + python3 -c ' +import json,sys +category=json.loads(sys.argv[1]).get("category","") +mapping={ + "contradiction": "contradiction_resolution", + "ambiguity": "ambiguity_answer", + "draft-plan-drift": "drift_resolution", +} +print(mapping.get(category, "")) +' "$1" +} + +# Resolve source-of-truth for a finding +_resolve_source_of_truth() { + local finding="$1" + local clarifications="$2" + local draft_text="$3" + local fid + fid=$(python3 -c 'import json,sys; print(json.loads(sys.argv[1]).get("id",""))' "$finding") + local has_clarification + has_clarification=$(python3 -c ' +import json,sys +clarifications=json.loads(sys.argv[1]) +fid=sys.argv[2] +print("true" if any(c.get("finding_id")==fid for c in clarifications) else "false") +' "$clarifications" "$fid") + if [[ "$has_clarification" == "true" ]]; then + printf 'clarification' + return + fi + # Check finding's explicit resolution_source hint (for test harness) + local explicit_source + explicit_source=$(python3 -c 'import json,sys; print(json.loads(sys.argv[1]).get("resolution_source",""))' "$finding") + if [[ -n "$explicit_source" ]]; then + printf '%s' "$explicit_source" + return + fi + # Default to leader_judgment for unclarified findings in mock + printf 'leader_judgment' +} + +# Validate all writes in the log against AC-20 allow-list +_flow_validate_ac20() { + local report_dir="$1" + local output_file="$2" + local variant_file="${3:-}" + local violations=0 + local entry_count + entry_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))))' "$FLOW_STATE_DIR/write-log.json") + local i=0 + while [[ $i -lt $entry_count ]]; do + local wpath + wpath=$(python3 -c ' +import json,sys +path=sys.argv[1] +idx=int(sys.argv[2]) +with open(path) as f: data=json.load(f) +print(data[idx]["path"]) +' "$FLOW_STATE_DIR/write-log.json" "$i") + local allowed="false" + # Exact allowed paths per AC-20 + if [[ "$wpath" == "$output_file" ]]; then + allowed="true" + elif [[ -n "$variant_file" && "$wpath" == "$variant_file" ]]; then + allowed="true" + elif [[ "$wpath" == "$report_dir/draft-findings.json" ]]; then + allowed="true" + elif [[ "$wpath" == "$report_dir/plan-findings.json" ]]; then + allowed="true" + elif [[ "$wpath" == "$report_dir/report.md" ]]; then + allowed="true" + elif [[ "$wpath" == "$report_dir/resolution.json" ]]; then + allowed="true" + elif [[ "$wpath" == "$report_dir/backup/$(basename "$output_file").bak" ]]; then + allowed="true" + elif [[ "$wpath" == "$report_dir/tmp/"* ]]; then + allowed="true" + elif [[ "$wpath" == "$(dirname "$output_file")/.plan-check-write."* ]]; then + allowed="true" + fi + if [[ "$allowed" == "false" ]]; then + violations=$((violations + 1)) + echo "AC-20 violation: $wpath" >> "$FLOW_STATE_DIR/ac20-violations.log" + fi + i=$((i + 1)) + done + printf '%d' "$violations" +} + +# Helper: apply a mock repair by replacing PRE_REPAIR_BODY_SENTINEL with POST_REPAIR_BODY_SENTINEL +_apply_mock_repair() { + local output_file="$1" + local repaired_content + repaired_content=$(python3 -c ' +import sys +path = sys.argv[1] +with open(path, "r") as f: + content = f.read() +content = content.replace("PRE_REPAIR_BODY_SENTINEL", "POST_REPAIR_BODY_SENTINEL") +print(content) +' "$output_file") + plan_check_atomic_write "$output_file" "$repaired_content" +} + +# Run the full mocked check-mode flow +_run_mock_check_mode_flow() { + local report_dir="$1" + local draft_file="$2" + local output_file="$3" + local plan_body_file="$4" + local recheck_enabled="${5:-false}" + local alt_lang="${6:-}" + + mkdir -p "$report_dir/tmp" + local _flow_original_path="$PATH" + _flow_setup_path_wrappers + local clarifications="[]" + local unresolved_draft=0 + local unresolved_plan=0 + local recheck_new=0 + local repairs_changed_bytes="false" + local abort_occurred="false" + + # ======================================== + # Phase 1: Draft Check + # ======================================== + local cf af + cf=$(mock_ask_codex "draft-consistency-checker" 1) + af=$(mock_ask_codex "draft-ambiguity-checker" 1) + + local draft_merged="[]" + [[ "$cf" != "[]" ]] && draft_merged="$cf" + [[ "$af" != "[]" ]] && draft_merged="$(_merge_findings "$draft_merged" "$af")" + + local draft_json + draft_json=$(plan_check_assemble_findings_json "$draft_file" "abc123" "test-model" "{}" 0 "$draft_merged") + _flow_write_file "$report_dir/draft-findings.json" "$draft_json" + + # Process draft blockers + local draft_blockers + draft_blockers=$(python3 -c 'import json,sys; print(json.dumps([f for f in json.loads(sys.stdin.read()) if f.get("severity")=="blocker"]))' <<< "$draft_merged") + local draft_blocker_count + draft_blocker_count=$(_count_blockers "$draft_blockers") + + local resolved_draft_count=0 + local i=0 + while [[ $i -lt $draft_blocker_count ]]; do + local finding explanation response resolvable + finding=$(python3 -c 'import json,sys; print(json.dumps(json.loads(sys.argv[1])[int(sys.argv[2])]))' "$draft_blockers" "$i") + explanation=$(python3 -c 'import json,sys; print(json.loads(sys.argv[1]).get("explanation",""))' "$finding") + resolvable=$(python3 -c 'import json,sys; print(str(json.loads(sys.argv[1]).get("resolvable",True)).lower())' "$finding") + response=$(mock_ask_user_question "$explanation" "Provide an answer that resolves the blocker" "Abort the command") + local fid + fid=$(python3 -c 'import json,sys; print(json.loads(sys.argv[1]).get("id",""))' "$finding") + if [[ "$response" == "Abort the command" || "$response" == "abort" ]]; then + abort_occurred="true" + break + elif [[ "$resolvable" == "false" ]]; then + # Blocker remains unresolved + true + elif [[ -n "$response" && "$response" != "Abort the command" ]]; then + resolved_draft_count=$((resolved_draft_count + 1)) + clarifications=$(python3 -c ' +import json,sys +clarifications=json.loads(sys.argv[1]) +fid=sys.argv[2] +answer=sys.argv[3] +clarifications.append({"finding_id":fid,"source":"user","answer":answer}) +print(json.dumps(clarifications)) +' "$clarifications" "$fid" "$response") + else + # No answer: leader-agent fallback with rationale + resolved_draft_count=$((resolved_draft_count + 1)) + clarifications=$(python3 -c ' +import json,sys +clarifications=json.loads(sys.argv[1]) +fid=sys.argv[2] +clarifications.append({"finding_id":fid,"source":"agent","answer":"fallback-resolved","rationale":"Leader agent decided via source-of-truth precedence"}) +print(json.dumps(clarifications)) +' "$clarifications" "$fid") + fi + i=$((i + 1)) + done + + unresolved_draft=$((draft_blocker_count - resolved_draft_count)) + + if [[ "$abort_occurred" == "true" ]]; then + rm -rf "$report_dir/tmp" + python3 -c ' +import json,sys +path=sys.argv[1] +with open(path) as f: state=json.load(f) +state["cleanup_done"]=True +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" + export PATH="$_flow_original_path" + return 1 + fi + + # Create output plan file + { + cat "$plan_body_file" + printf '\n--- Original Design Draft Start ---\n' + cat "$draft_file" + printf '\n--- Original Design Draft End ---\n' + } > "$output_file" + _flow_log_write "write" "$output_file" + + # ======================================== + # Phase 2: Plan Check + # ======================================== + local schema_out + if [[ -n "${MOCK_SCHEMA_FINDINGS:-}" ]]; then + schema_out="$MOCK_SCHEMA_FINDINGS" + else + schema_out=$(TMPDIR="$report_dir/tmp" plan_check_validate_schema "$output_file" "$PROJECT_ROOT/prompt-template/plan/gen-plan-template.md" 2>/dev/null) + fi + local sem_cons sem_amb sem_drift primary_plan_findings + sem_cons=$(mock_ask_codex "plan-consistency-checker" 1) + sem_amb=$(mock_ask_codex "plan-ambiguity-checker" 1) + primary_plan_findings="[]" + [[ "$sem_cons" != "[]" ]] && primary_plan_findings="$(_merge_findings "$primary_plan_findings" "$sem_cons")" + [[ "$sem_amb" != "[]" ]] && primary_plan_findings="$(_merge_findings "$primary_plan_findings" "$sem_amb")" + if [[ "$primary_plan_findings" != "[]" ]]; then + sem_drift=$(mock_ask_codex "draft-plan-drift-checker" 1) + else + sem_drift="[]" + fi + + local plan_merged="[]" + if [[ -n "$schema_out" && "$schema_out" != "[]" ]]; then + plan_merged="[$schema_out]" + fi + [[ "$sem_cons" != "[]" ]] && plan_merged="$(_merge_findings "$plan_merged" "$sem_cons")" + [[ "$sem_amb" != "[]" ]] && plan_merged="$(_merge_findings "$plan_merged" "$sem_amb")" + [[ "$sem_drift" != "[]" ]] && plan_merged="$(_merge_findings "$plan_merged" "$sem_drift")" + + local plan_json + plan_json=$(plan_check_assemble_findings_json "$output_file" "hash123" "test-model" "{}" 0 "$plan_merged") + _flow_write_file "$report_dir/plan-findings.json" "$plan_json" + _flow_append_file "$report_dir/report.md" "# Plan Check Report\n\n" + + # Process plan blockers + local plan_blockers + plan_blockers=$(python3 -c 'import json,sys; print(json.dumps([f for f in json.loads(sys.stdin.read()) if f.get("severity")=="blocker"]))' <<< "$plan_merged") + local plan_blocker_count + plan_blocker_count=$(_count_blockers "$plan_blockers") + + local resolution_records="[]" + i=0 + while [[ $i -lt $plan_blocker_count ]]; do + local finding source_of_truth resolution_type + finding=$(python3 -c 'import json,sys; print(json.dumps(json.loads(sys.argv[1])[int(sys.argv[2])]))' "$plan_blockers" "$i") + source_of_truth=$(_resolve_source_of_truth "$finding" "$clarifications" "draft content") + resolution_type=$(_resolution_type_for_finding "$finding") + local fid + fid=$(python3 -c 'import json,sys; print(json.loads(sys.argv[1]).get("id",""))' "$finding") + + python3 -c ' +import json,sys +path=sys.argv[1] +source=sys.argv[2] +with open(path) as f: state=json.load(f) +state["repair_sources"].append(source) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$source_of_truth" + + if [[ "$source_of_truth" == "clarification" || "$source_of_truth" == "draft_text" ]]; then + # High-priority source: silent repair + local backup_path + backup_path=$(plan_check_backup_plan "$output_file" "$report_dir") + python3 -c ' +import json,sys +path=sys.argv[1] +bp=sys.argv[2] +with open(path) as f: state=json.load(f) +state["backup_paths"].append(bp) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$backup_path" + _apply_mock_repair "$output_file" + _flow_log_write "write" "$output_file" + python3 -c ' +import json,sys +path=sys.argv[1] +aw=sys.argv[2] +with open(path) as f: state=json.load(f) +state["atomic_writes"].append(aw) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$output_file" + repairs_changed_bytes="true" + resolution_records=$(python3 -c ' +import json,sys +records=json.loads(sys.argv[1]) +fid=sys.argv[2] +source=sys.argv[3] +rtype=sys.argv[4] +records.append({"finding_id":fid,"resolution_type":rtype,"source":source,"applied":True}) +print(json.dumps(records)) +' "$resolution_records" "$fid" "$source_of_truth" "$resolution_type") + elif [[ "$source_of_truth" == "leader_judgment" ]]; then + # Leader judgment: diff preview + AskUserQuestion + local confirm_response + confirm_response=$(mock_ask_user_question "Apply this repair?" "yes" "no") + if [[ "$confirm_response" == "yes" ]]; then + local backup_path + backup_path=$(plan_check_backup_plan "$output_file" "$report_dir") + python3 -c ' +import json,sys +path=sys.argv[1] +bp=sys.argv[2] +with open(path) as f: state=json.load(f) +state["backup_paths"].append(bp) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$backup_path" + _apply_mock_repair "$output_file" + _flow_log_write "write" "$output_file" + python3 -c ' +import json,sys +path=sys.argv[1] +aw=sys.argv[2] +with open(path) as f: state=json.load(f) +state["atomic_writes"].append(aw) +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$output_file" + repairs_changed_bytes="true" + resolution_records=$(python3 -c ' +import json,sys +records=json.loads(sys.argv[1]) +fid=sys.argv[2] +rtype=sys.argv[3] +records.append({"finding_id":fid,"resolution_type":rtype,"source":"leader_judgment","applied":True}) +print(json.dumps(records)) +' "$resolution_records" "$fid" "$resolution_type") + else + resolution_records=$(python3 -c ' +import json,sys +records=json.loads(sys.argv[1]) +fid=sys.argv[2] +rtype=sys.argv[3] +records.append({"finding_id":fid,"resolution_type":rtype,"source":"leader_judgment","applied":False}) +print(json.dumps(records)) +' "$resolution_records" "$fid" "$resolution_type") + fi + fi + i=$((i + 1)) + done + + _flow_write_file "$report_dir/resolution.json" "$resolution_records" + python3 -c ' +import json,sys +path=sys.argv[1] +records=json.loads(sys.argv[2]) +with open(path) as f: state=json.load(f) +state["resolution_records"]=records +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$resolution_records" + + # Compute unresolved plan blockers + unresolved_plan=$(python3 -c ' +import json,sys +records=json.loads(sys.argv[1]) +unresolved=sum(1 for r in records if not r.get("applied",False)) +print(unresolved) +' "$resolution_records") + + # ======================================== + # Phase 3: Optional Recheck + # ======================================== + if [[ "$recheck_enabled" == "true" && "$repairs_changed_bytes" == "true" ]]; then + # Recheck runs exactly once, check-only + local recheck_schema + recheck_schema=$(TMPDIR="$report_dir/tmp" plan_check_validate_schema "$output_file" "$PROJECT_ROOT/prompt-template/plan/gen-plan-template.md" 2>/dev/null) + local recheck_cons recheck_amb recheck_primary + recheck_cons=$(mock_ask_codex "plan-consistency-checker" 2) + recheck_amb=$(mock_ask_codex "plan-ambiguity-checker" 2) + recheck_primary="[]" + [[ "$recheck_cons" != "[]" ]] && recheck_primary="$(_merge_findings "$recheck_primary" "$recheck_cons")" + [[ "$recheck_amb" != "[]" ]] && recheck_primary="$(_merge_findings "$recheck_primary" "$recheck_amb")" + if [[ "$recheck_primary" != "[]" ]]; then + mock_ask_codex "draft-plan-drift-checker" 2 >/dev/null + fi + # Simulate recheck new blockers if MOCK_RECHECK_NEW_BLOCKERS is set + if [[ "${MOCK_RECHECK_NEW_BLOCKERS:-0}" -gt 0 ]]; then + recheck_new="$MOCK_RECHECK_NEW_BLOCKERS" + fi + python3 -c ' +import json,sys +path=sys.argv[1] +with open(path) as f: state=json.load(f) +state["recheck_ran"]=True +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" + fi + + # ======================================== + # Phase 4: Translation (after repair) + # ======================================== + local variant_path="" + if [[ -n "$alt_lang" ]]; then + variant_path="${output_file%.md}_${alt_lang}.md" + cp "$output_file" "$variant_path" + _flow_log_write "write" "$variant_path" + fi + + # ======================================== + # Phase 5: Auto-start gating + # ======================================== + local skip_msg="" + if [[ "$unresolved_draft" -gt 0 ]]; then + skip_msg="Auto-start skipped: unresolved-draft-blocker" + elif [[ "$unresolved_plan" -gt 0 ]]; then + skip_msg="Auto-start skipped: unresolved-plan-check-blocker" + elif [[ "$recheck_new" -gt 0 ]]; then + skip_msg="Auto-start skipped: recheck-failure" + fi + python3 -c ' +import json,sys +path=sys.argv[1] +msg=sys.argv[2] +with open(path) as f: state=json.load(f) +state["skip_message"]=msg +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" "$skip_msg" + + # ======================================== + # Phase 6: Cleanup + # ======================================== + rm -rf "$report_dir/tmp" + python3 -c ' +import json,sys +path=sys.argv[1] +with open(path) as f: state=json.load(f) +state["cleanup_done"]=True +with open(path,"w") as f: json.dump(state,f) +' "$FLOW_STATE_DIR/state.json" + + export PATH="$_flow_original_path" + return 0 +} + +# ======================================== +# Mocked Flow Scenario Tests +# ======================================== + +# Scenario 1: Draft-check pass +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "Draft line 1.\nDraft line 2.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal text.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +if [[ -f "$SCENARIO_DIR/output.md" ]]; then + pass "flow: draft-check pass creates output" +else + fail "flow: draft-check pass creates output" "created" "missing" +fi + +auq_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["auq_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$auq_count" -eq 0 ]]; then + pass "flow: draft-check pass does not call AskUserQuestion" +else + fail "flow: draft-check pass does not call AskUserQuestion" "0" "$auq_count" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 2: Draft-check blocker + user clarification +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS='[{"id":"DC-001","severity":"blocker","category":"contradiction","source_checker":"draft-consistency-checker","location":{"section":"Goal","fragment":"X"},"evidence":"X contradicts Y","explanation":"Draft says both X and Y","suggested_resolution":"Clarify","affected_acs":[],"affected_tasks":[]}]' +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="Use X, not Y" + +printf "Draft line 1.\nDraft line 2.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal text.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +if [[ -f "$SCENARIO_DIR/output.md" ]]; then + pass "flow: draft-check blocker + clarification continues" +else + fail "flow: draft-check blocker + clarification continues" "created" "missing" +fi + +auq_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["auq_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$auq_count" -eq 1 ]]; then + pass "flow: draft-check blocker calls AskUserQuestion exactly once" +else + fail "flow: draft-check blocker calls AskUserQuestion exactly once" "1" "$auq_count" +fi + +# Verify clarification was recorded +clar_count=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); calls=data.get("auq_calls",[]); print(len(calls))' "$FLOW_STATE_DIR/state.json") +if [[ "$clar_count" -eq 1 ]]; then + pass "flow: clarification AskUserQuestion recorded" +else + fail "flow: clarification AskUserQuestion recorded" "1" "$clar_count" +fi + +# Verify AskUserQuestion options are exactly answer and abort, no skip +auq_options=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); opts=data["auq_calls"][0]["options"]; print(json.dumps(opts))' "$FLOW_STATE_DIR/state.json") +if echo "$auq_options" | grep -q "Provide an answer that resolves the blocker" && echo "$auq_options" | grep -q "Abort the command"; then + pass "flow: draft-check AskUserQuestion offers answer and abort options" +else + fail "flow: draft-check AskUserQuestion offers answer and abort options" "[answer, abort]" "$auq_options" +fi +if echo "$auq_options" | grep -qi "skip"; then + fail "flow: draft-check AskUserQuestion does not offer skip option" +else + pass "flow: draft-check AskUserQuestion does not offer skip option" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 3: Draft-check blocker + no answer + leader fallback +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS='[{"id":"DC-001","severity":"blocker","category":"contradiction","source_checker":"draft-consistency-checker","location":{"section":"Goal","fragment":"X"},"evidence":"X contradicts Y","explanation":"Draft says both X and Y","suggested_resolution":"Clarify","affected_acs":[],"affected_tasks":[]}]' +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "Draft line 1.\nDraft line 2.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal text.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +if [[ -f "$SCENARIO_DIR/output.md" ]]; then + pass "flow: draft-check no-answer fallback continues" +else + fail "flow: draft-check no-answer fallback continues" "created" "missing" +fi + +auq_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["auq_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$auq_count" -eq 1 ]]; then + pass "flow: draft-check no-answer still calls AskUserQuestion once" +else + fail "flow: draft-check no-answer still calls AskUserQuestion once" "1" "$auq_count" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 4: Draft-check abort flow +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +mkdir -p "$REPORT_DIR/tmp" +touch "$REPORT_DIR/tmp/work.tmp" +MOCK_DRAFT_CONSISTENCY_FINDINGS='[{"id":"DC-001","severity":"blocker","category":"contradiction","source_checker":"draft-consistency-checker","location":{"section":"Goal","fragment":"X"},"evidence":"X contradicts Y","explanation":"Draft says both X and Y","suggested_resolution":"Clarify","affected_acs":[],"affected_tasks":[]}]' +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="abort" + +printf "Draft line 1.\nDraft line 2.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal text.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" || true + +if [[ ! -f "$SCENARIO_DIR/output.md" ]]; then + pass "flow: draft-check abort does not create output" +else + fail "flow: draft-check abort does not create output" "not created" "created" +fi + +if [[ ! -d "$REPORT_DIR/tmp" ]]; then + pass "flow: draft-check abort cleans tmp" +else + fail "flow: draft-check abort cleans tmp" "removed" "still present" +fi + +if [[ -f "$REPORT_DIR/draft-findings.json" ]]; then + pass "flow: draft-check abort retains diagnostics" +else + fail "flow: draft-check abort retains diagnostics" "retained" "missing" +fi + +cleanup_done=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["cleanup_done"])' "$FLOW_STATE_DIR/state.json") +if [[ "$cleanup_done" == "True" ]]; then + pass "flow: draft-check abort sets cleanup flag" +else + fail "flow: draft-check abort sets cleanup flag" "True" "$cleanup_done" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 5: Plan-check pass +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS='[{"id":"DD-001","severity":"blocker","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"missing draft detail"},"evidence":"Draft-only detail","explanation":"This should not be used when primary plan findings are empty","suggested_resolution":"Do not call drift checker","related_finding_id":"C-001","affected_acs":[],"affected_tasks":[]}]' +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "Draft line 1.\nDraft line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nTest goal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n - Positive Tests:\n - test passes\n - Negative Tests:\n - test fails\n\n## Path Boundaries\n\n### Upper Bound\nMaximum scope.\n\n### Lower Bound\nMinimum scope.\n\n### Allowed Choices\n- Can use: bash\n- Cannot use: python\n\n## Dependencies and Sequence\n\n### Milestones\n1. M1: Do thing\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n\n## Claude-Codex Deliberation\n\n### Agreements\n- Both agree.\n\n### Resolved Disagreements\n- None.\n\n### Convergence Status\n- Final Status: converged\n\n## Pending User Decisions\n\n## Implementation Notes\n\n### Code Style Requirements\n- No AC- references in code.\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +plan_blockers=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(len(data.get("repair_sources",[])))' "$FLOW_STATE_DIR/state.json") +if [[ "$plan_blockers" -eq 0 ]]; then + pass "flow: plan-check pass has zero blockers" +else + fail "flow: plan-check pass has zero blockers" "0" "$plan_blockers" +fi + +drift_calls=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(sum(1 for c in data["checker_calls"] if c["checker"]=="draft-plan-drift-checker"))' "$FLOW_STATE_DIR/state.json") +if [[ "$drift_calls" -eq 0 ]]; then + pass "flow: no primary plan findings skips draft-plan-drift-checker" +else + fail "flow: no primary plan findings skips draft-plan-drift-checker" "0" "$drift_calls" +fi + +if grep -q "DD-001" "$REPORT_DIR/plan-findings.json"; then + fail "flow: no primary plan findings excludes drift findings" +else + pass "flow: no primary plan findings excludes drift findings" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 5b: Primary ambiguity enables drift source-recovery pass +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS='[{"id":"A-001","severity":"warning","category":"ambiguity","source_checker":"plan-ambiguity-checker","location":{"section":"Goal","fragment":"check mode"},"evidence":"Plan does not say whether check mode is opt-in","explanation":"The plan can be read as default-on or opt-in","suggested_resolution":"Use the draft default-off behavior","affected_acs":[],"affected_tasks":[],"ambiguity_details":{"competing_interpretations":["check mode is default-on","check mode is opt-in"],"execution_drift_risk":"medium","clarification_question":"Is check mode opt-in?"}}]' +MOCK_PLAN_DRIFT_FINDINGS='[{"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"check mode"},"evidence":"Draft says check mode is disabled by default and enabled by --check.","explanation":"The draft resolves the supplied ambiguity.","suggested_resolution":"State that check mode is opt-in.","related_finding_id":"A-001","affected_acs":[],"affected_tasks":[]}]' +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "Check mode is disabled by default and enabled by --check.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nDocument check mode.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +drift_calls=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(sum(1 for c in data["checker_calls"] if c["checker"]=="draft-plan-drift-checker"))' "$FLOW_STATE_DIR/state.json") +if [[ "$drift_calls" -eq 1 ]]; then + pass "flow: primary ambiguity enables draft-plan-drift-checker" +else + fail "flow: primary ambiguity enables draft-plan-drift-checker" "1" "$drift_calls" +fi + +if grep -q "DD-001" "$REPORT_DIR/plan-findings.json"; then + pass "flow: primary ambiguity merges drift source-recovery finding" +else + fail "flow: primary ambiguity merges drift source-recovery finding" "DD-001 present" "missing" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 5c: Primary contradiction can produce clarification-backed drift +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS='[{"id":"DC-001","severity":"blocker","category":"contradiction","source_checker":"draft-consistency-checker","location":{"section":"Goal","fragment":"old key vs new key"},"evidence":"Draft says use old_key but clarification is needed","explanation":"Config key source is unclear","suggested_resolution":"Clarify key name","affected_acs":[],"affected_tasks":[]}]' +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"warning","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"Plan still names both old_key and new_key","explanation":"The generated plan conflicts on the key name","suggested_resolution":"Use the clarified key name","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS='[{"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"config key"},"evidence":"Clarification DC-001 answer: Use new_key only.","explanation":"The newer clarification resolves the supplied contradiction and supersedes the older draft key.","suggested_resolution":"Use new_key only.","related_finding_id":"C-001","affected_acs":[],"affected_tasks":[]}]' +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="Use new_key only." + +printf "Original draft says use old_key until clarified.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nUse old_key and new_key.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +drift_calls=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(sum(1 for c in data["checker_calls"] if c["checker"]=="draft-plan-drift-checker"))' "$FLOW_STATE_DIR/state.json") +if [[ "$drift_calls" -eq 1 ]]; then + pass "flow: primary contradiction enables clarification-backed drift lookup" +else + fail "flow: primary contradiction enables clarification-backed drift lookup" "1" "$drift_calls" +fi + +if grep -q "Clarification DC-001 answer: Use new_key only." "$REPORT_DIR/plan-findings.json" && \ + grep -q '"related_finding_id": "C-001"' "$REPORT_DIR/plan-findings.json"; then + pass "flow: clarification-backed drift is tied to primary contradiction" +else + fail "flow: clarification-backed drift is tied to primary contradiction" "clarification evidence and related_finding_id" "missing" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 5d: Plan aligned with newer clarification does not drift only because older draft differs +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS='[{"id":"DC-001","severity":"blocker","category":"contradiction","source_checker":"draft-consistency-checker","location":{"section":"Goal","fragment":"old key vs new key"},"evidence":"Draft says use old_key but clarification is needed","explanation":"Config key source is unclear","suggested_resolution":"Clarify key name","affected_acs":[],"affected_tasks":[]}]' +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS='[{"id":"DD-001","severity":"warning","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"new_key"},"evidence":"Older draft says old_key.","explanation":"This would be invalid because the newer clarification supersedes the old draft text.","suggested_resolution":"Use old_key.","related_finding_id":"C-001","affected_acs":[],"affected_tasks":[]}]' +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="Use new_key only." + +printf "Original draft says use old_key until clarified.\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nUse new_key only.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +drift_calls=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(sum(1 for c in data["checker_calls"] if c["checker"]=="draft-plan-drift-checker"))' "$FLOW_STATE_DIR/state.json") +if [[ "$drift_calls" -eq 0 ]]; then + pass "flow: plan aligned with newer clarification skips older-draft drift" +else + fail "flow: plan aligned with newer clarification skips older-draft drift" "0" "$drift_calls" +fi + +if grep -q "DD-001" "$REPORT_DIR/plan-findings.json"; then + fail "flow: older draft text alone does not produce drift after clarification" "no DD-001" "DD-001 present" +else + pass "flow: older draft text alone does not produce drift after clarification" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 6: Plan-check blocker + high-priority silent repair +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"default off vs on"},"evidence":"Plan says default-on but draft says default-off","explanation":"Plan contradicts draft","suggested_resolution":"Use draft default-off","resolution_source":"draft_text","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content line 1.\ndraft content line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nTest goal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +auq_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["auq_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$auq_count" -eq 0 ]]; then + pass "flow: high-priority repair is silent (no AskUserQuestion)" +else + fail "flow: high-priority repair is silent (no AskUserQuestion)" "0" "$auq_count" +fi + +backup_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["backup_paths"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$backup_count" -eq 1 ]]; then + pass "flow: high-priority repair creates backup" +else + fail "flow: high-priority repair creates backup" "1" "$backup_count" +fi + +# Verify appendix preserved +_plan_check_extract_appendix "$SCENARIO_DIR/output.md" > "$SCENARIO_DIR/extracted.md" +if cmp -s "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/extracted.md"; then + pass "flow: high-priority repair preserves appendix" +else + fail "flow: high-priority repair preserves appendix" "identical" "differ" +fi + +# Verify repair source recorded +first_source=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(data["repair_sources"][0])' "$FLOW_STATE_DIR/state.json") +if [[ "$first_source" == "draft_text" ]]; then + pass "flow: high-priority repair source is draft_text" +else + fail "flow: high-priority repair source is draft_text" "draft_text" "$first_source" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 6b: Draft-plan-drift blocker + high-priority silent repair preserves appendix +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"warning","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"default off vs on"},"evidence":"Plan says default-on but draft says default-off","explanation":"Primary finding gates draft drift analysis","suggested_resolution":"Use draft default-off","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS='[{"id":"DD-001","severity":"blocker","category":"draft-plan-drift","source_checker":"draft-plan-drift-checker","location":{"section":"Goal","fragment":"default off vs on"},"evidence":"Original draft says the feature is default-off.","explanation":"The draft resolves the supplied primary contradiction.","suggested_resolution":"Rewrite generated plan body to default-off.","resolution_source":"draft_text","related_finding_id":"C-001","affected_acs":[],"affected_tasks":[]}]' +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "Original draft unique bytes line A.\nOriginal draft unique bytes line B.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nPRE_REPAIR_BODY_SENTINEL\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +drift_calls=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(sum(1 for c in data["checker_calls"] if c["checker"]=="draft-plan-drift-checker"))' "$FLOW_STATE_DIR/state.json") +if [[ "$drift_calls" -eq 1 ]]; then + pass "flow: drift repair calls draft-plan-drift-checker" +else + fail "flow: drift repair calls draft-plan-drift-checker" "1" "$drift_calls" +fi + +auq_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["auq_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$auq_count" -eq 0 ]]; then + pass "flow: drift repair is silent (no AskUserQuestion)" +else + fail "flow: drift repair is silent (no AskUserQuestion)" "0" "$auq_count" +fi + +backup_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["backup_paths"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$backup_count" -eq 1 ]]; then + pass "flow: drift repair creates exactly one backup" +else + fail "flow: drift repair creates exactly one backup" "1" "$backup_count" +fi + +atomic_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["atomic_writes"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$atomic_count" -eq 1 ]]; then + pass "flow: drift repair records exactly one atomic write" +else + fail "flow: drift repair records exactly one atomic write" "1" "$atomic_count" +fi + +if grep -q "POST_REPAIR_BODY_SENTINEL" "$SCENARIO_DIR/output.md" && \ + ! grep -q "PRE_REPAIR_BODY_SENTINEL" "$SCENARIO_DIR/output.md"; then + pass "flow: drift repair rewrites generated plan body" +else + fail "flow: drift repair rewrites generated plan body" "POST without PRE" "missing post or stale pre" +fi + +_plan_check_extract_appendix "$SCENARIO_DIR/output.md" > "$SCENARIO_DIR/extracted.md" +if cmp -s "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/extracted.md"; then + pass "flow: drift repair preserves appendix byte-for-byte" +else + fail "flow: drift repair preserves appendix byte-for-byte" "identical" "differ" +fi + +drift_resolution_record=$(python3 -c ' +import json,sys +records=json.load(open(sys.argv[1])) +match=[ + r for r in records + if r.get("finding_id")=="DD-001" + and r.get("resolution_type")=="drift_resolution" + and r.get("source")=="draft_text" + and r.get("applied") is True +] +print("ok" if len(match)==1 else "bad") +' "$REPORT_DIR/resolution.json") +if [[ "$drift_resolution_record" == "ok" ]]; then + pass "flow: drift repair resolution.json records drift_resolution" +else + fail "flow: drift repair resolution.json records drift_resolution" "ok" "$drift_resolution_record" +fi + +drift_state_record=$(python3 -c ' +import json,sys +state=json.load(open(sys.argv[1])) +records=state.get("resolution_records", []) +match=[ + r for r in records + if r.get("finding_id")=="DD-001" + and r.get("resolution_type")=="drift_resolution" + and r.get("source")=="draft_text" + and r.get("applied") is True +] +print("ok" if len(match)==1 else "bad") +' "$FLOW_STATE_DIR/state.json") +if [[ "$drift_state_record" == "ok" ]]; then + pass "flow: drift repair state records drift_resolution" +else + fail "flow: drift repair state records drift_resolution" "ok" "$drift_state_record" +fi + +drift_related_id=$(python3 -c ' +import json,sys +data=json.load(open(sys.argv[1])) +match=[ + f for f in data.get("findings", []) + if f.get("id")=="DD-001" + and f.get("category")=="draft-plan-drift" + and f.get("related_finding_id")=="C-001" +] +print("ok" if len(match)==1 else "bad") +' "$REPORT_DIR/plan-findings.json") +if [[ "$drift_related_id" == "ok" ]]; then + pass "flow: drift repair plan findings preserve related_finding_id" +else + fail "flow: drift repair plan findings preserve related_finding_id" "ok" "$drift_related_id" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 7: Plan-check blocker + leader judgment + diff + confirmation +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"wording"},"evidence":"Wording is ambiguous","explanation":"Leader judgment required for wording fix","suggested_resolution":"Rephrase","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="yes" + +printf "draft content line 1.\ndraft content line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nTest goal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +auq_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["auq_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$auq_count" -eq 1 ]]; then + pass "flow: leader-judgment calls AskUserQuestion for confirmation" +else + fail "flow: leader-judgment calls AskUserQuestion for confirmation" "1" "$auq_count" +fi + +backup_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["backup_paths"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$backup_count" -eq 1 ]]; then + pass "flow: leader-judgment confirmed repair creates backup" +else + fail "flow: leader-judgment confirmed repair creates backup" "1" "$backup_count" +fi + +first_source=$(python3 -c 'import json,sys; data=json.load(open(sys.argv[1])); print(data["repair_sources"][0])' "$FLOW_STATE_DIR/state.json") +if [[ "$first_source" == "leader_judgment" ]]; then + pass "flow: leader-judgment repair source recorded" +else + fail "flow: leader-judgment repair source recorded" "leader_judgment" "$first_source" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 8: Recheck runs exactly once +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"wording"},"evidence":"Wording is ambiguous","explanation":"Leader judgment required for wording fix","suggested_resolution":"Rephrase","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="yes" + +printf "draft content line 1.\ndraft content line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nTest goal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "true" "" + +recheck_ran=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["recheck_ran"])' "$FLOW_STATE_DIR/state.json") +if [[ "$recheck_ran" == "True" ]]; then + pass "flow: recheck runs when enabled and repairs changed bytes" +else + fail "flow: recheck runs when enabled and repairs changed bytes" "True" "$recheck_ran" +fi + +# Verify recheck added exactly 3 extra checker calls (schema + 3 semantic = 4 total post-repair, but schema is not mock_ask_codex) +# Actually schema validation is real, mock codex calls are: plan-consistency (pass1), plan-ambiguity (pass1), drift (pass1), plan-consistency (pass2), plan-ambiguity (pass2), drift (pass2) +# Total mock checker calls should be 6 (3 initial + 3 recheck) +checker_calls=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1]))["checker_calls"]))' "$FLOW_STATE_DIR/state.json") +if [[ "$checker_calls" -eq 8 ]]; then + pass "flow: recheck adds exactly 3 extra checker calls" +else + fail "flow: recheck adds exactly 3 extra checker calls" "8" "$checker_calls" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 9: Recheck skipped when repairs did not change bytes +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"wording"},"evidence":"Wording is ambiguous","explanation":"Leader judgment required for wording fix","suggested_resolution":"Rephrase","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="no" + +printf "draft content line 1.\ndraft content line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nTest goal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "true" "" + +recheck_ran=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["recheck_ran"])' "$FLOW_STATE_DIR/state.json") +if [[ "$recheck_ran" == "False" ]]; then + pass "flow: recheck skipped when user declines repair" +else + fail "flow: recheck skipped when user declines repair" "False" "$recheck_ran" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 10: Auto-start gating via driver +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +skip_msg=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["skip_message"])' "$FLOW_STATE_DIR/state.json") +if [[ -z "$skip_msg" ]]; then + pass "flow: auto-start allowed when all blockers clear" +else + fail "flow: auto-start allowed when all blockers clear" "allowed" "blocked: $skip_msg" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 11: Auto-start blocked by unresolved plan blocker +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"wording"},"evidence":"Wording is ambiguous","explanation":"Leader judgment required for wording fix","suggested_resolution":"Rephrase","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="no" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +skip_msg=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["skip_message"])' "$FLOW_STATE_DIR/state.json") +if [[ "$skip_msg" == "Auto-start skipped: unresolved-plan-check-blocker" ]]; then + pass "flow: auto-start skip message for plan-check-blocker" +else + fail "flow: auto-start skip message for plan-check-blocker" "unresolved-plan-check-blocker" "$skip_msg" +fi + +if echo "$skip_msg" | grep -qi "AC-"; then + fail "flow: auto-start skip message contains no AC- prefix" +else + pass "flow: auto-start skip message contains no AC- prefix" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 12: tmp cleanup on success +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +if [[ ! -d "$REPORT_DIR/tmp" ]]; then + pass "flow: tmp/ cleaned up on success path" +else + fail "flow: tmp/ cleaned up on success path" "removed" "still present" +fi + +cleanup_done=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["cleanup_done"])' "$FLOW_STATE_DIR/state.json") +if [[ "$cleanup_done" == "True" ]]; then + pass "flow: cleanup flag set on success" +else + fail "flow: cleanup flag set on success" "True" "$cleanup_done" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 13: Translation after repair via driver with alt_lang=zh +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"wording"},"evidence":"Wording is ambiguous","explanation":"Leader judgment required for wording fix","suggested_resolution":"Rephrase","resolution_source":"draft_text","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content line 1.\ndraft content line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nPRE_REPAIR_BODY_SENTINEL\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +output_path="$SCENARIO_DIR/output.md" +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$output_path" "$SCENARIO_DIR/body.md" "false" "zh" + +variant_path="${output_path%.md}_zh.md" + +# Variant path uses documented suffix +if [[ "$variant_path" == "$SCENARIO_DIR/output_zh.md" ]]; then + pass "flow: translation variant path uses documented suffix" +else + fail "flow: translation variant path uses documented suffix" "$SCENARIO_DIR/output_zh.md" "$variant_path" +fi + +# Variant exists after driver completes +if [[ -f "$variant_path" ]]; then + pass "flow: translation variant created after repair" +else + fail "flow: translation variant created after repair" "created" "missing" +fi + +# Variant contains repaired bytes (sentinel replaced by _apply_mock_repair) +if grep -q "POST_REPAIR_BODY_SENTINEL" "$variant_path"; then + pass "flow: translation variant contains repaired bytes" +else + fail "flow: translation variant contains repaired bytes" "contains POST_REPAIR_BODY_SENTINEL" "missing" +fi + +# Variant does not contain stale pre-repair bytes +if grep -q "PRE_REPAIR_BODY_SENTINEL" "$variant_path"; then + fail "flow: translation variant does not contain stale pre-repair bytes" +else + pass "flow: translation variant does not contain stale pre-repair bytes" +fi + +# Appendix preserved in variant +_plan_check_extract_appendix "$variant_path" > "$SCENARIO_DIR/extracted.md" +if cmp -s "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/extracted.md"; then + pass "flow: translation variant preserves appendix" +else + fail "flow: translation variant preserves appendix" "identical" "differ" +fi + +# Atomic write recorded in state +atomic_count=$(python3 -c 'import json,sys; print(len(json.load(open(sys.argv[1])).get("atomic_writes",[])))' "$FLOW_STATE_DIR/state.json") +if [[ "$atomic_count" -eq 1 ]]; then + pass "flow: translation repair records atomic write" +else + fail "flow: translation repair records atomic write" "1" "$atomic_count" +fi + +# AC-20 validation on repaired flow: zero violations including helper-produced paths +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$output_path" "$variant_path") +if [[ "$violations" -eq 0 ]]; then + pass "ac20: repaired translation flow produces zero write violations" +else + fail "ac20: repaired translation flow produces zero write violations" "0" "$violations" +fi + +# Assert write-log contains helper-observed cp (backup), mktemp (temp), and mv (output) paths +log_has_backup=$(python3 -c ' +import json,sys +path=sys.argv[1] +with open(path) as f: data=json.load(f) +has_cp = any(e.get("op")=="copy" and ".bak" in e.get("path","") for e in data) +print("true" if has_cp else "false") +' "$FLOW_STATE_DIR/write-log.json") +if [[ "$log_has_backup" == "true" ]]; then + pass "ac20: write-log observes backup copy from helper" +else + fail "ac20: write-log observes backup copy from helper" "true" "false" +fi + +log_has_temp=$(python3 -c ' +import json,sys +path=sys.argv[1] +with open(path) as f: data=json.load(f) +has_temp = any(e.get("op")=="temp" and ".plan-check-write." in e.get("path","") for e in data) +print("true" if has_temp else "false") +' "$FLOW_STATE_DIR/write-log.json") +if [[ "$log_has_temp" == "true" ]]; then + pass "ac20: write-log observes temp file from helper mktemp" +else + fail "ac20: write-log observes temp file from helper mktemp" "true" "false" +fi + +log_has_mv=$(python3 -c ' +import json,sys +path=sys.argv[1] +with open(path) as f: data=json.load(f) +has_mv = any(e.get("op")=="move" for e in data) +print("true" if has_mv else "false") +' "$FLOW_STATE_DIR/write-log.json") +if [[ "$log_has_mv" == "true" ]]; then + pass "ac20: write-log observes move from helper mv" +else + fail "ac20: write-log observes move from helper mv" "true" "false" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 14: Auto-start blocked by unresolved draft blocker +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS='[{"id":"DC-001","severity":"blocker","category":"contradiction","source_checker":"draft-consistency-checker","location":{"section":"Goal","fragment":"X"},"evidence":"X contradicts Y","explanation":"Draft says both X and Y","suggested_resolution":"Clarify","resolvable":false,"affected_acs":[],"affected_tasks":[]}]' +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +skip_msg=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["skip_message"])' "$FLOW_STATE_DIR/state.json") +if [[ "$skip_msg" == "Auto-start skipped: unresolved-draft-blocker" ]]; then + pass "flow: auto-start skip message for draft-blocker" +else + fail "flow: auto-start skip message for draft-blocker" "unresolved-draft-blocker" "$skip_msg" +fi + +if echo "$skip_msg" | grep -qi "AC-"; then + fail "flow: draft-blocker skip message contains no AC- prefix" +else + pass "flow: draft-blocker skip message contains no AC- prefix" +fi + +rm -rf "$SCENARIO_DIR" + +# Scenario 15: Auto-start blocked by recheck-failure +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS='[{"id":"C-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Goal","fragment":"wording"},"evidence":"Wording is ambiguous","explanation":"Leader judgment required for wording fix","suggested_resolution":"Rephrase","resolution_source":"draft_text","affected_acs":[],"affected_tasks":[]}]' +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" +MOCK_RECHECK_NEW_BLOCKERS=1 + +printf "draft content line 1.\ndraft content line 2.\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nTest goal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "true" "" + +skip_msg=$(python3 -c 'import json,sys; print(json.load(open(sys.argv[1]))["skip_message"])' "$FLOW_STATE_DIR/state.json") +if [[ "$skip_msg" == "Auto-start skipped: recheck-failure" ]]; then + pass "flow: auto-start skip message for recheck-failure" +else + fail "flow: auto-start skip message for recheck-failure" "recheck-failure" "$skip_msg" +fi + +if echo "$skip_msg" | grep -qi "AC-"; then + fail "flow: recheck-failure skip message contains no AC- prefix" +else + pass "flow: recheck-failure skip message contains no AC- prefix" +fi + +rm -rf "$SCENARIO_DIR" + +# ======================================== +# AC-20 Write Validation Tests +# ======================================== + +echo "" +echo "--- AC-20 write validation ---" + +# Positive: Normal flow writes only to allowed paths +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal.\n" > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 0 ]]; then + pass "ac20: normal flow produces zero write violations" +else + fail "ac20: normal flow produces zero write violations" "0" "$violations" +fi + +rm -rf "$SCENARIO_DIR" + +# Positive: Real schema validation temp files are routed through REPORT_DIR/tmp +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +# Leave MOCK_SCHEMA_FINDINGS unset so real plan_check_validate_schema runs +unset MOCK_SCHEMA_FINDINGS +MOCK_AUQ_RESPONSE="" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +{ + printf "# Test Plan\n\n## Goal Description\nGoal.\n\n## Acceptance Criteria\n\n- AC-1: Test criterion\n\n## Path Boundaries\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" +} > "$SCENARIO_DIR/body.md" + +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$SCENARIO_DIR/output.md" "$SCENARIO_DIR/body.md" "false" "" + +# Assert at least one temp entry was recorded under REPORT_DIR/tmp +schema_temps_inside=$(python3 -c ' +import json,sys +path=sys.argv[1] +report_dir=sys.argv[2] +with open(path) as f: data=json.load(f) +good = [e["path"] for e in data if e.get("op")=="temp" and e["path"].startswith(report_dir + "/tmp/")] +print(len(good)) +' "$FLOW_STATE_DIR/write-log.json" "$REPORT_DIR") +if [[ "$schema_temps_inside" -ge 1 ]]; then + pass "ac20: schema validation records at least one temp under REPORT_DIR/tmp" +else + fail "ac20: schema validation records at least one temp under REPORT_DIR/tmp" ">=1" "$schema_temps_inside" +fi + +# Assert no temp entries escaped REPORT_DIR/tmp +schema_temps_outside=$(python3 -c ' +import json,sys +path=sys.argv[1] +report_dir=sys.argv[2] +with open(path) as f: data=json.load(f) +bad = [e["path"] for e in data if e.get("op")=="temp" and not e["path"].startswith(report_dir + "/tmp/")] +print(len(bad)) +' "$FLOW_STATE_DIR/write-log.json" "$REPORT_DIR") +if [[ "$schema_temps_outside" -eq 0 ]]; then + pass "ac20: schema validation temps are under REPORT_DIR/tmp" +else + fail "ac20: schema validation temps are under REPORT_DIR/tmp" "0 outside" "$schema_temps_outside outside" +fi + +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 0 ]]; then + pass "ac20: real schema validation flow produces zero write violations" +else + fail "ac20: real schema validation flow produces zero write violations" "0" "$violations" +fi + +rm -rf "$SCENARIO_DIR" + +# Positive: Translation flow writes to allowed paths (output + variant) +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +MOCK_DRAFT_CONSISTENCY_FINDINGS="[]" +MOCK_DRAFT_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_CONSISTENCY_FINDINGS="[]" +MOCK_PLAN_AMBIGUITY_FINDINGS="[]" +MOCK_PLAN_DRIFT_FINDINGS="[]" +MOCK_SCHEMA_FINDINGS="[]" +MOCK_AUQ_RESPONSE="" + +printf "draft content\n" > "$SCENARIO_DIR/draft.md" +printf "# Plan\n\n## Goal\nGoal.\n" > "$SCENARIO_DIR/body.md" + +output_path="$SCENARIO_DIR/output.md" +_run_mock_check_mode_flow "$REPORT_DIR" "$SCENARIO_DIR/draft.md" "$output_path" "$SCENARIO_DIR/body.md" "false" "zh" + +variant_path="${output_path%.md}_zh.md" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$output_path" "$variant_path") +if [[ "$violations" -eq 0 ]]; then + pass "ac20: translation flow produces zero write violations" +else + fail "ac20: translation flow produces zero write violations" "0" "$violations" +fi + +rm -rf "$SCENARIO_DIR" + +# Negative: Detects forbidden path $REPORT_DIR/findings.json (wrong filename) +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +_flow_log_write "write" "$REPORT_DIR/findings.json" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 1 ]]; then + pass "ac20: flags $REPORT_DIR/findings.json as violation" +else + fail "ac20: flags $REPORT_DIR/findings.json as violation" "1" "$violations" +fi +rm -rf "$SCENARIO_DIR" + +# Negative: Detects forbidden path $REPORT_DIR/draft/foo.json (no draft/ subdir) +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +_flow_log_write "write" "$REPORT_DIR/draft/foo.json" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 1 ]]; then + pass "ac20: flags $REPORT_DIR/draft/ as violation" +else + fail "ac20: flags $REPORT_DIR/draft/ as violation" "1" "$violations" +fi +rm -rf "$SCENARIO_DIR" + +# Negative: Detects forbidden path $REPORT_DIR/plan/bar.json (no plan/ subdir) +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +_flow_log_write "write" "$REPORT_DIR/plan/bar.json" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 1 ]]; then + pass "ac20: flags $REPORT_DIR/plan/ as violation" +else + fail "ac20: flags $REPORT_DIR/plan/ as violation" "1" "$violations" +fi +rm -rf "$SCENARIO_DIR" + +# Negative: Detects write outside allow-list +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +_flow_log_write "write" "$SCENARIO_DIR/secret_leak.md" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 1 ]]; then + pass "ac20: flags write outside allow-list as violation" +else + fail "ac20: flags write outside allow-list as violation" "1" "$violations" +fi +rm -rf "$SCENARIO_DIR" + +# Negative: Detects forbidden backup path $REPORT_DIR/backup/other.bak +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +_flow_log_write "write" "$REPORT_DIR/backup/other.bak" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 1 ]]; then + pass "ac20: flags $REPORT_DIR/backup/other.bak as violation" +else + fail "ac20: flags $REPORT_DIR/backup/other.bak as violation" "1" "$violations" +fi +rm -rf "$SCENARIO_DIR" + +# Negative: Detects forbidden nested backup path $REPORT_DIR/backup/nested/file.bak +SCENARIO_DIR=$(mktemp -d) +REPORT_DIR=$(plan_check_init_report_dir "$SCENARIO_DIR") +_flow_reset_state "$SCENARIO_DIR/flow" +mkdir -p "$REPORT_DIR/backup/nested" +_flow_log_write "write" "$REPORT_DIR/backup/nested/file.bak" +violations=$(_flow_validate_ac20 "$REPORT_DIR" "$SCENARIO_DIR/output.md" "") +if [[ "$violations" -eq 1 ]]; then + pass "ac20: flags $REPORT_DIR/backup/nested/file.bak as violation" +else + fail "ac20: flags $REPORT_DIR/backup/nested/file.bak as violation" "1" "$violations" +fi +rm -rf "$SCENARIO_DIR" + +# ======================================== +# Regression Tests +# ======================================== + +echo "" +echo "--- Regression tests ---" + +# Regression 1: Schema-template path in gen-plan.md +if grep -q 'plan_check_validate_schema.*prompt-template/plan/gen-plan-template.md' "$GEN_PLAN_CMD"; then + pass "regression: schema validation uses canonical template path" +else + fail "regression: schema validation uses canonical template path" "present" "missing" +fi + +# Regression 2: Default-mode full output baseline using fixtures +FIXTURE_DIR="$SCRIPT_DIR/fixtures/gen-plan-check" +if [[ -d "$FIXTURE_DIR" ]]; then + TEST_REG_DIR=$(mktemp -d) + cp "$FIXTURE_DIR/default-draft.md" "$TEST_REG_DIR/draft.md" + cp "$FIXTURE_DIR/default-template.md" "$TEST_REG_DIR/template.md" + + { + cat "$TEST_REG_DIR/template.md" + printf '\n--- Original Design Draft Start ---\n' + cat "$TEST_REG_DIR/draft.md" + printf '\n--- Original Design Draft End ---\n' + } > "$TEST_REG_DIR/plan.md" + + if cmp -s "$TEST_REG_DIR/plan.md" "$FIXTURE_DIR/default-expected.md"; then + pass "regression: default-mode full output matches fixture baseline" + else + fail "regression: default-mode full output matches fixture baseline" "identical" "differ" + fi + rm -rf "$TEST_REG_DIR" +else + skip "regression: default-mode full output matches fixture baseline" "fixture dir not found" +fi + +# Regression 3: Appendix preservation through accepted repair path +TEST_REG_DIR=$(mktemp -d) +printf "draft content line 1.\ndraft content line 2.\n" > "$TEST_REG_DIR/draft.md" +{ + printf "# Plan\n\n## Goal\nOriginal goal.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" + printf '\n--- Original Design Draft Start ---\n' + cat "$TEST_REG_DIR/draft.md" + printf '\n--- Original Design Draft End ---\n' +} > "$TEST_REG_DIR/plan.md" + +REPORT_DIR=$(plan_check_init_report_dir "$TEST_REG_DIR") + +# Accepted repair path: backup + atomic write +BACKUP_RESULT=$(plan_check_backup_plan "$TEST_REG_DIR/plan.md" "$REPORT_DIR") +{ + printf "# Plan\n\n## Goal\nRepaired goal.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do thing | AC-1 | coding | - |\n" + printf '\n--- Original Design Draft Start ---\n' + cat "$TEST_REG_DIR/draft.md" + printf '\n--- Original Design Draft End ---\n' +} > "$TEST_REG_DIR/repaired.md" + +plan_check_atomic_write "$TEST_REG_DIR/plan.md" "$(cat "$TEST_REG_DIR/repaired.md")" + +_plan_check_extract_appendix "$TEST_REG_DIR/plan.md" > "$TEST_REG_DIR/extracted.md" +if cmp -s "$TEST_REG_DIR/draft.md" "$TEST_REG_DIR/extracted.md"; then + pass "regression: appendix preserved through accepted repair path" +else + fail "regression: appendix preserved through accepted repair path" "identical" "differ" +fi +if [[ -f "$BACKUP_RESULT" ]]; then + pass "regression: accepted repair creates backup at flat path" +else + fail "regression: accepted repair creates backup at flat path" "exists" "missing" +fi +rm -rf "$TEST_REG_DIR" + +# Regression 5: Draft without trailing newline +TEST_REG_DIR=$(mktemp -d) +printf "no trailing newline" > "$TEST_REG_DIR/draft.md" +{ + printf "# Plan\n\n## Goal\nGoal text.\n" + printf '\n--- Original Design Draft Start ---\n' + cat "$TEST_REG_DIR/draft.md" + printf '\n--- Original Design Draft End ---\n' +} > "$TEST_REG_DIR/plan.md" + +_plan_check_extract_appendix "$TEST_REG_DIR/plan.md" > "$TEST_REG_DIR/extracted.md" +if cmp -s "$TEST_REG_DIR/draft.md" "$TEST_REG_DIR/extracted.md"; then + pass "regression: draft without trailing newline preserved exactly" +else + fail "regression: draft without trailing newline preserved exactly" "identical" "differ" +fi +rm -rf "$TEST_REG_DIR" + +# Regression 6: plan_check_validate_schema runs deterministically with canonical template +if [[ -f "$PROJECT_ROOT/prompt-template/plan/gen-plan-template.md" ]]; then + TEST_REG_DIR=$(mktemp -d) + cat > "$TEST_REG_DIR/valid_plan.md" <<'EOF' +# Test Plan + +## Goal Description +Test goal. + +## Acceptance Criteria + +- AC-1: Test criterion + - Positive Tests: + - test passes + - Negative Tests: + - test fails + +## Path Boundaries + +### Upper Bound +Maximum scope. + +### Lower Bound +Minimum scope. + +### Allowed Choices +- Can use: bash +- Cannot use: python + +## Dependencies and Sequence + +### Milestones +1. M1: Do thing + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do thing | AC-1 | coding | - | + +## Claude-Codex Deliberation + +### Agreements +- Both agree. + +### Resolved Disagreements +- None. + +### Convergence Status +- Final Status: converged + +## Pending User Decisions + +## Implementation Notes + +### Code Style Requirements +- No AC- references in code. + +--- Original Design Draft Start --- + +draft content + +--- Original Design Draft End --- +EOF + + SCHEMA_OUT=$(plan_check_validate_schema "$TEST_REG_DIR/valid_plan.md" "$PROJECT_ROOT/prompt-template/plan/gen-plan-template.md" 2>/dev/null) + if echo "$SCHEMA_OUT" | grep -q "runtime-error"; then + fail "regression: schema validation with canonical template should not emit runtime-error" "no runtime-error" "has runtime-error" + else + pass "regression: schema validation runs deterministically with canonical template" + fi + rm -rf "$TEST_REG_DIR" +else + skip "regression: schema validation with canonical template" "canonical template not found" +fi + +# Regression 7: Backup path flat layout +TEST_BACKUP_DIR=$(mktemp -d) +printf "plan content\n" > "$TEST_BACKUP_DIR/plan.md" +BACKUP_RESULT=$(plan_check_backup_plan "$TEST_BACKUP_DIR/plan.md" "$TEST_BACKUP_DIR") +if [[ -f "$TEST_BACKUP_DIR/backup/plan.md.bak" ]]; then + pass "regression: backup path is /backup/.bak" +else + fail "regression: backup path is /backup/.bak" "exists" "missing" +fi +if [[ -d "$TEST_BACKUP_DIR/backup/backup" ]]; then + fail "regression: backup path never nests backup/backup/" "absent" "present" +else + pass "regression: backup path never nests backup/backup/" +fi +rm -rf "$TEST_BACKUP_DIR" + +# ======================================== +# Summary +# ======================================== + +print_test_summary "gen-plan Check Mode Integration Tests" diff --git a/tests/test-gen-plan.sh b/tests/test-gen-plan.sh index b5bcab07..526d608c 100755 --- a/tests/test-gen-plan.sh +++ b/tests/test-gen-plan.sh @@ -128,6 +128,29 @@ else fail "gen-plan command allows ask-codex script" "ask-codex script reference" "missing" fi +GEN_PLAN_SKILL="$PROJECT_ROOT/skills/humanize-gen-plan/SKILL.md" +if [[ -f "$GEN_PLAN_SKILL" ]] && grep -q -- "--check" "$GEN_PLAN_SKILL" && grep -q -- "--no-check" "$GEN_PLAN_SKILL"; then + pass "humanize-gen-plan skill documents check-mode flags" +else + fail "humanize-gen-plan skill documents check-mode flags" "--check and --no-check in SKILL.md" "missing" +fi + +if [[ -f "$GEN_PLAN_SKILL" ]] && grep -q "gen_plan_check" "$GEN_PLAN_SKILL"; then + pass "humanize-gen-plan skill documents gen_plan_check config" +else + fail "humanize-gen-plan skill documents gen_plan_check config" "gen_plan_check in SKILL.md" "missing" +fi + +if [[ -f "$GEN_PLAN_SKILL" ]] \ + && grep -q "spawn_agent" "$GEN_PLAN_SKILL" \ + && grep -q "fork_context=false" "$GEN_PLAN_SKILL" \ + && grep -q "draft-consistency-checker" "$GEN_PLAN_SKILL" \ + && grep -q "plan-consistency-checker" "$GEN_PLAN_SKILL"; then + pass "humanize-gen-plan check mode requires native checker sub-agents" +else + fail "humanize-gen-plan check mode sub-agent contract" "spawn_agent, fork_context=false, draft and plan checkers" "missing" +fi + if [[ -f "$GEN_PLAN_CMD" ]] && grep -q -- "--auto-start-rlcr-if-converged" "$GEN_PLAN_CMD"; then pass "gen-plan command exposes auto-start-if-converged option" else @@ -693,6 +716,82 @@ if [[ -x "$VALIDATE_SCRIPT" ]]; then fail "validate script rejects --discussion and --direct together" "mutual exclusion error" "no error produced" fi + # Test: --check flag is accepted + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/check-output.md" --check 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 0 ]]; then + pass "validate-gen-plan-io: --check accepted" + else + fail "validate-gen-plan-io: --check should be accepted" "0" "$EXIT_CODE" + fi + + # Test: --no-check flag is accepted + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/no-check-output.md" --no-check 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 0 ]]; then + pass "validate-gen-plan-io: --no-check accepted" + else + fail "validate-gen-plan-io: --no-check should be accepted" "0" "$EXIT_CODE" + fi + + # Test: --check + --discussion is compatible + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/check-discussion.md" --check --discussion 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 0 ]]; then + pass "validate-gen-plan-io: --check + --discussion compatible" + else + fail "validate-gen-plan-io: --check + --discussion should be compatible" "0" "$EXIT_CODE" + fi + + # Test: --check + --direct is compatible + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/check-direct.md" --check --direct 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 0 ]]; then + pass "validate-gen-plan-io: --check + --direct compatible" + else + fail "validate-gen-plan-io: --check + --direct should be compatible" "0" "$EXIT_CODE" + fi + + # Test: --no-check + --discussion is compatible + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/no-check-discussion.md" --no-check --discussion 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 0 ]]; then + pass "validate-gen-plan-io: --no-check + --discussion compatible" + else + fail "validate-gen-plan-io: --no-check + --discussion should be compatible" "0" "$EXIT_CODE" + fi + + # Test: --check with value exits 6 + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/out.md" --check value 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 6 ]]; then + pass "validate-gen-plan-io: --check value exits 6" + else + fail "validate-gen-plan-io: --check value should exit 6" "6" "$EXIT_CODE" + fi + + # Test: --no-check with value exits 6 + EXIT_CODE=0 + "$VALIDATE_SCRIPT" --input "$SCRIPT_TEST_DIR/valid.md" --output "$SCRIPT_TEST_DIR/out.md" --no-check value 2>/dev/null || EXIT_CODE=$? + if [[ $EXIT_CODE -eq 6 ]]; then + pass "validate-gen-plan-io: --no-check value exits 6" + else + fail "validate-gen-plan-io: --no-check value should exit 6" "6" "$EXIT_CODE" + fi + + # Test: Usage text mentions --check and --no-check + OUTPUT=$("$VALIDATE_SCRIPT" --help 2>&1) || true + if echo "$OUTPUT" | grep -q -- '--check'; then + pass "validate-gen-plan-io: usage mentions --check" + else + fail "validate-gen-plan-io: usage should mention --check" "mentioned" "missing" + fi + if echo "$OUTPUT" | grep -q -- '--no-check'; then + pass "validate-gen-plan-io: usage mentions --no-check" + else + fail "validate-gen-plan-io: usage should mention --no-check" "mentioned" "missing" + fi + # Test: Help option should exit 6 EXIT_CODE=0 "$VALIDATE_SCRIPT" --help 2>/dev/null || EXIT_CODE=$? diff --git a/tests/test-methodology-analysis.sh b/tests/test-methodology-analysis.sh new file mode 100755 index 00000000..778a2401 --- /dev/null +++ b/tests/test-methodology-analysis.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +set -u + +PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +source "$PROJECT_ROOT/hooks/lib/methodology-analysis.sh" + +PASSED=0 +FAILED=0 + +pass() { + echo "PASS: $1" + PASSED=$((PASSED + 1)) +} + +fail() { + echo "FAIL: $1" + echo " expected: $2" + echo " actual: $3" + FAILED=$((FAILED + 1)) +} + +TEST_DIR="$(mktemp -d)" +trap 'rm -rf "$TEST_DIR"' EXIT + +echo "=== Test: methodology analysis completion ===" + +LOOP_DIR="$TEST_DIR/loop" +mkdir -p "$LOOP_DIR" +echo "active state" > "$LOOP_DIR/methodology-analysis-state.md" +echo "complete" > "$LOOP_DIR/.methodology-exit-reason" +echo "analysis report" > "$LOOP_DIR/methodology-analysis-report.md" +echo "done" > "$LOOP_DIR/methodology-analysis-done.md" + +if methodology_analysis_ready_to_complete; then + if [[ -f "$LOOP_DIR/methodology-analysis-state.md" && ! -e "$LOOP_DIR/complete-state.md" ]]; then + pass "methodology readiness check does not rename active state" + else + fail "methodology readiness check does not rename active state" "active state remains, terminal state absent" "$(ls "$LOOP_DIR")" + fi +else + fail "methodology readiness check succeeds" "exit 0" "non-zero" +fi + +if complete_methodology_analysis; then + if [[ ! -e "$LOOP_DIR/methodology-analysis-state.md" && -f "$LOOP_DIR/complete-state.md" && ! -e "$LOOP_DIR/.methodology-exit-reason" ]]; then + pass "methodology completion finalizes state after readiness" + else + fail "methodology completion finalizes state after readiness" "terminal state exists and marker removed" "$(ls -a "$LOOP_DIR")" + fi +else + fail "methodology completion succeeds" "exit 0" "non-zero" +fi + +echo "" +echo "=== Methodology Analysis Test Summary ===" +echo "Passed: $PASSED" +echo "Failed: $FAILED" + +if [[ "$FAILED" -ne 0 ]]; then + exit 1 +fi + +exit 0 diff --git a/tests/test-plan-check.sh b/tests/test-plan-check.sh new file mode 100755 index 00000000..be5ca4d4 --- /dev/null +++ b/tests/test-plan-check.sh @@ -0,0 +1,1726 @@ +#!/usr/bin/env bash +# +# Fixture-style tests for the plan-check command pipeline. +# +# Tests deterministic validation, report assembly, and edge cases. +# + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +# Colors +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 +TESTS_SKIPPED=0 + +pass() { echo -e "${GREEN}PASS${NC}: $1"; TESTS_PASSED=$((TESTS_PASSED + 1)); } +fail() { echo -e "${RED}FAIL${NC}: $1"; if [[ $# -ge 2 ]]; then echo " Expected: $2"; fi; if [[ $# -ge 3 ]]; then echo " Got: $3"; fi; TESTS_FAILED=$((TESTS_FAILED + 1)); } +skip() { echo -e "${YELLOW}SKIP${NC}: $1 - $2"; TESTS_SKIPPED=$((TESTS_SKIPPED + 1)); } + +print_test_summary() { + local title="${1:-Test Summary}" + echo "" + echo "========================================" + echo "$title" + echo "========================================" + echo -e "Passed: ${GREEN}$TESTS_PASSED${NC}" + echo -e "Failed: ${RED}$TESTS_FAILED${NC}" + if [[ $TESTS_SKIPPED -gt 0 ]]; then + echo -e "Skipped: ${YELLOW}$TESTS_SKIPPED${NC}" + fi + echo "" + if [[ $TESTS_FAILED -eq 0 ]]; then + echo -e "${GREEN}All tests passed!${NC}" + return 0 + else + echo -e "${RED}Some tests failed!${NC}" + return 1 + fi +} + +# Setup +TEST_DIR=$(mktemp -d) +trap "rm -rf $TEST_DIR" EXIT + +source "$PROJECT_ROOT/scripts/lib/plan-check-common.sh" +# plan-check-common.sh sets 'set -e'; restore test-script behavior +set +e +source "$SCRIPT_DIR/test-helpers.sh" + +REPORT_DIR="$TEST_DIR/reports" +mkdir -p "$REPORT_DIR" + +# Helper: run schema validation and return findings JSON array +collect_schema_findings() { + local plan_file="$1" + local template_file="${2:-$PROJECT_ROOT/prompt-template/plan/gen-plan-template.md}" + local findings + findings="$(plan_check_validate_schema "$plan_file" "$template_file")" + if [[ -z "$findings" ]]; then + echo "[]" + else + echo "[$findings]" + fi +} + +# Helper: run schema validation in a fresh strict-mode shell. This catches +# regressions where optional greps abort only under set -euo pipefail. +collect_schema_findings_strict() { + local plan_file="$1" + local output_file="$2" + local template_file="${3:-$PROJECT_ROOT/prompt-template/plan/gen-plan-template.md}" + + bash -euo pipefail -c ' + project_root="$1" + plan_file="$2" + template_file="$3" + output_file="$4" + source "$project_root/scripts/lib/plan-check-common.sh" + findings="$(plan_check_validate_schema "$plan_file" "$template_file")" + if [[ -z "$findings" ]]; then + printf "[]" > "$output_file" + else + printf "[%s]" "$findings" > "$output_file" + fi + ' _ "$PROJECT_ROOT" "$plan_file" "$template_file" "$output_file" +} + +assert_strict_template_runtime_error() { + local plan_file="$1" + local template_file="$2" + local output_file="$3" + local pass_label="$4" + local fail_label="$5" + + local strict_exit=0 + collect_schema_findings_strict "$plan_file" "$output_file" "$template_file" || strict_exit=$? + + local strict_findings + strict_findings="$(cat "$output_file" 2>/dev/null || echo "[]")" + local strict_runtime + strict_runtime="$(count_category "$strict_findings" runtime-error)" + local strict_schema + strict_schema="$(count_category "$strict_findings" schema)" + + if [[ "$strict_exit" -eq 0 && "$strict_runtime" -ge 1 && "$strict_schema" -eq 0 ]]; then + pass "$pass_label" + else + fail "$fail_label" "exit 0, 1 runtime-error, 0 schema" "exit=$strict_exit, $strict_runtime runtime-error, $strict_schema schema" + fi +} + +# Helper: count findings by category +count_category() { + local findings="$1" + local category="$2" + printf '%s' "$findings" | python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for f in d if f.get('category')=='$category'))" +} + +# Helper: count findings by severity +count_severity() { + local findings="$1" + local severity="$2" + printf '%s' "$findings" | python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for f in d if f.get('severity')=='$severity'))" +} + +echo "=== Test: Plan Check Schema Validation ===" +echo "" + +# Test 0a: plan_check_resolve_recheck defaults to false +recheck_default="$(plan_check_resolve_recheck '{}')" +if [[ "$recheck_default" == "false" ]]; then + pass "plan_check_resolve_recheck defaults to false" +else + fail "plan_check_resolve_recheck default" "false" "$recheck_default" +fi + +# Test 0b: plan_check_resolve_recheck accepts true config +recheck_enabled="$(plan_check_resolve_recheck '{"plan_check_recheck": true}')" +if [[ "$recheck_enabled" == "true" ]]; then + pass "plan_check_resolve_recheck accepts true config" +else + fail "plan_check_resolve_recheck true config" "true" "$recheck_enabled" +fi + +# Test 0c: plan_check_resolve_recheck treats invalid config as false +recheck_invalid="$(plan_check_resolve_recheck '{"plan_check_recheck": "sometimes"}')" +if [[ "$recheck_invalid" == "false" ]]; then + pass "plan_check_resolve_recheck treats invalid config as false" +else + fail "plan_check_resolve_recheck invalid config" "false" "$recheck_invalid" +fi + +# Test 1: Valid plan produces no blockers +cat > "$TEST_DIR/valid-plan.md" << 'EOF' +# Valid Plan + +## Goal Description +A valid test plan. + +## Acceptance Criteria + +- **AC-1**: First criterion + - Positive Tests: + - Test passes + - Negative Tests: + - Test fails + +## Path Boundaries + +### Upper Bound +Complete implementation. + +### Lower Bound +Minimum viable implementation. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | + +## Claude-Codex Deliberation +EOF + +echo "Test 1: Valid plan produces no blockers" +findings="$(collect_schema_findings "$TEST_DIR/valid-plan.md")" +blockers="$(count_severity "$findings" blocker)" +if [[ "$blockers" -eq 0 ]]; then + pass "Valid plan has no blockers" +else + fail "Valid plan should have no blockers" "0 blockers" "$blockers blockers" +fi + +# Test 1b: Task Breakdown is optional for Codex-generated plans +cat > "$TEST_DIR/valid-plan-no-tasks.md" << 'EOF' +# Valid Plan Without Tasks + +## Goal Description +A valid test plan without an explicit task list. + +## Acceptance Criteria + +- **AC-1**: First criterion + - Positive Tests: + - Test passes + - Negative Tests: + - Test fails + +## Path Boundaries + +### Upper Bound +Complete implementation. + +### Lower Bound +Minimum viable implementation. +EOF + +echo "Test 1b: Valid plan without Task Breakdown produces no blockers" +findings="$(collect_schema_findings "$TEST_DIR/valid-plan-no-tasks.md")" +blockers="$(count_severity "$findings" blocker)" +if [[ "$blockers" -eq 0 ]]; then + pass "Task Breakdown is optional for schema validation" +else + fail "Plan without Task Breakdown should have no blockers" "0 blockers" "$blockers blockers" +fi + +# Test 2: Missing required section produces schema finding +cat > "$TEST_DIR/missing-section.md" << 'EOF' +# Missing Section Plan + +## Goal Description +A plan missing Path Boundaries. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | +EOF + +echo "Test 2: Missing required section produces schema finding" +findings="$(collect_schema_findings "$TEST_DIR/missing-section.md")" +schema_count="$(count_category "$findings" schema)" +if [[ "$schema_count" -ge 1 ]]; then + pass "Missing section detected" +else + fail "Missing section should be detected" "at least 1 schema finding" "$schema_count schema findings" +fi + +# Test 3: Duplicate canonical AC IDs +cat > "$TEST_DIR/duplicate-ac.md" << 'EOF' +# Duplicate AC Plan + +## Goal Description +A plan with duplicate ACs. + +## Acceptance Criteria + +- **AC-1**: First criterion +- **AC-1**: Duplicate criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | +EOF + +echo "Test 3: Duplicate canonical AC IDs detected" +findings="$(collect_schema_findings "$TEST_DIR/duplicate-ac.md")" +schema_count="$(count_category "$findings" schema)" +if [[ "$schema_count" -ge 1 ]]; then + pass "Duplicate AC detected" +else + fail "Duplicate AC should be detected" "at least 1 schema finding" "$schema_count schema findings" +fi + +# Test 4: Nonexistent Target AC produces dependency finding +cat > "$TEST_DIR/bad-target-ac.md" << 'EOF' +# Bad Target AC Plan + +## Goal Description +A plan with a bad target. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-99 | coding | - | +EOF + +echo "Test 4: Nonexistent Target AC detected" +findings="$(collect_schema_findings "$TEST_DIR/bad-target-ac.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -ge 1 ]]; then + pass "Nonexistent Target AC detected" +else + fail "Nonexistent Target AC should be detected" "at least 1 dependency finding" "$dep_count dependency findings" +fi + +# Test 5: Target AC range "AC-1 through AC-7" passes validation +cat > "$TEST_DIR/range-target-ac.md" << 'EOF' +# Range Target AC Plan + +## Goal Description +A plan with range target. + +## Acceptance Criteria + +- **AC-1**: First criterion +- **AC-2**: Second criterion +- **AC-3**: Third criterion +- **AC-4**: Fourth criterion +- **AC-5**: Fifth criterion +- **AC-6**: Sixth criterion +- **AC-7**: Seventh criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do everything | AC-1 through AC-7 | coding | - | +EOF + +echo "Test 5: Target AC range 'AC-1 through AC-7' passes" +findings="$(collect_schema_findings "$TEST_DIR/range-target-ac.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -eq 0 ]]; then + pass "Target AC range accepted" +else + fail "Target AC range should be accepted" "0 dependency findings" "$dep_count dependency findings" +fi + +# Test 5b: Existing AC-X.Y sub-criterion target passes validation +cat > "$TEST_DIR/sub-ac-target.md" << 'EOF' +# Sub-AC Target Plan + +## Goal Description +A plan with a sub-criterion target. + +## Acceptance Criteria + +- **AC-1**: First criterion + - AC-1.1: Sub-criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do the sub-criterion work | AC-1.1 | coding | - | +EOF + +echo "Test 5b: Existing AC-X.Y sub-criterion target passes" +findings="$(collect_schema_findings "$TEST_DIR/sub-ac-target.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -eq 0 ]]; then + pass "Existing AC-X.Y sub-criterion target accepted" +else + fail "Existing AC-X.Y sub-criterion target should be accepted" "0 dependency findings" "$dep_count dependency findings" +fi + +# Test 5c: Nonexistent AC-X.Y sub-criterion target is detected +cat > "$TEST_DIR/bad-sub-ac-target.md" << 'EOF' +# Bad Sub-AC Target Plan + +## Goal Description +A plan with a missing sub-criterion target. + +## Acceptance Criteria + +- **AC-1**: First criterion + - AC-1.1: Sub-criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do the missing sub-criterion work | AC-1.9 | coding | - | +EOF + +echo "Test 5c: Nonexistent AC-X.Y sub-criterion target detected" +findings="$(collect_schema_findings "$TEST_DIR/bad-sub-ac-target.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -ge 1 ]]; then + pass "Nonexistent AC-X.Y sub-criterion target detected" +else + fail "Nonexistent AC-X.Y sub-criterion target should be detected" "at least 1 dependency finding" "$dep_count dependency findings" +fi + +# Test 5d: Bold AC mention outside Acceptance Criteria does not define a target +cat > "$TEST_DIR/bold-mention-target-ac.md" << 'EOF' +# Bold Mention Target Plan + +## Goal Description +A plan whose task text mentions **AC-99** without defining it. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Preserve the literal **AC-99** note in docs | AC-99 | coding | - | +EOF + +echo "Test 5d: Bold non-definition AC mention does not satisfy target validation" +findings="$(collect_schema_findings "$TEST_DIR/bold-mention-target-ac.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -ge 1 ]]; then + pass "Bold non-definition AC mention ignored" +else + fail "Bold non-definition AC mention should not define Target AC" "at least 1 dependency finding" "$dep_count dependency findings" +fi + +# Test 5e: Empty or malformed Target AC is detected +cat > "$TEST_DIR/unparsable-target-ac.md" << 'EOF' +# Unparsable Target AC Plan + +## Goal Description +A plan whose task has no valid Target AC token. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | - | coding | - | +EOF + +echo "Test 5e: Unparsable Target AC detected" +findings="$(collect_schema_findings "$TEST_DIR/unparsable-target-ac.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -ge 1 ]]; then + pass "Unparsable Target AC detected" +else + fail "Unparsable Target AC should be detected" "at least 1 dependency finding" "$dep_count dependency findings" +fi + +# Test 6: Invalid routing tag produces schema finding +cat > "$TEST_DIR/bad-tag.md" << 'EOF' +# Bad Tag Plan + +## Goal Description +A plan with a bad tag. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | review | - | +EOF + +echo "Test 6: Invalid routing tag detected" +findings="$(collect_schema_findings "$TEST_DIR/bad-tag.md")" +schema_count="$(count_category "$findings" schema)" +if [[ "$schema_count" -ge 1 ]]; then + pass "Invalid routing tag detected" +else + fail "Invalid routing tag should be detected" "at least 1 schema finding" "$schema_count schema findings" +fi + +# Test 6b: Spaced/aligned Markdown separators still enable task validation +cat > "$TEST_DIR/spaced-separator-task-table.md" << 'EOF' +# Spaced Separator Task Table Plan + +## Goal Description +A plan with a common spaced and aligned Markdown table separator. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +| :--- | :--- | :---: | ---: | --- | +| task1 | Do something | AC-99 | review | missing-task | +EOF + +echo "Test 6b: Spaced Markdown table separator preserves task validation" +findings="$(collect_schema_findings "$TEST_DIR/spaced-separator-task-table.md")" +schema_count="$(count_category "$findings" schema)" +dep_count="$(count_category "$findings" dependency)" +if [[ "$schema_count" -ge 1 && "$dep_count" -ge 1 ]]; then + pass "Spaced Markdown table separator preserves task validation" +else + fail "Spaced Markdown table separator should preserve task validation" "schema>=1 and dependency>=1" "schema=$schema_count, dependency=$dep_count" +fi + +# Test 7: Circular dependency detected +cat > "$TEST_DIR/circular-deps.md" << 'EOF' +# Circular Dependency Plan + +## Goal Description +A plan with circular deps. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | task2 | +| task2 | Do another thing | AC-1 | coding | task1 | +EOF + +echo "Test 7: Circular dependency detected" +findings="$(collect_schema_findings "$TEST_DIR/circular-deps.md")" +dep_count="$(count_category "$findings" dependency)" +if [[ "$dep_count" -ge 1 ]]; then + pass "Circular dependency detected" +else + fail "Circular dependency should be detected" "at least 1 dependency finding" "$dep_count dependency findings" +fi + +# Test 8: Malformed template produces runtime-error info finding and skips schema checks +cat > "$TEST_DIR/malformed-template.md" << 'EOF' +not a plan schema +EOF + +cat > "$TEST_DIR/malformed-template-plan.md" << 'EOF' +# Plan + +## Goal Description +A plan. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | +EOF + +echo "Test 8: Malformed template produces runtime-error info finding" +findings="$(collect_schema_findings "$TEST_DIR/malformed-template-plan.md" "$TEST_DIR/malformed-template.md")" +runtime_count="$(count_category "$findings" runtime-error)" +schema_count="$(count_category "$findings" schema)" +if [[ "$runtime_count" -ge 1 && "$schema_count" -eq 0 ]]; then + pass "Malformed template produces runtime-error and skips schema checks" +else + fail "Malformed template handling" "1 runtime-error, 0 schema" "$runtime_count runtime-error, $schema_count schema" +fi + +assert_strict_template_runtime_error \ + "$TEST_DIR/malformed-template-plan.md" \ + "$TEST_DIR/malformed-template.md" \ + "$TEST_DIR/malformed-template-strict.json" \ + "Malformed template returns runtime-error under strict shell" \ + "Malformed template strict handling" + +# Test 9: plan-check.sh rejects malformed findings input +echo "Test 9: plan-check.sh rejects malformed JSON input" +mkdir -p "$TEST_DIR/report9" +echo '{not-json' > "$TEST_DIR/bad-findings.json" +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report9" \ + --findings-file "$TEST_DIR/bad-findings.json" > /dev/null 2>&1 +exit_code=$? +category="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report9/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$exit_code" -eq 0 && "$category" == "runtime-error" ]]; then + pass "Malformed findings produce runtime-error finding" +else + fail "Malformed findings handling" "exit 0 with runtime-error category" "exit $exit_code, category=$category" +fi + +# Test 10: plan-check.sh rejects non-array findings input +echo "Test 10: plan-check.sh rejects non-array JSON input" +mkdir -p "$TEST_DIR/report10" +echo '{}' > "$TEST_DIR/object-findings.json" +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report10" \ + --findings-file "$TEST_DIR/object-findings.json" > /dev/null 2>&1 +exit_code=$? +category="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report10/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$exit_code" -eq 0 && "$category" == "runtime-error" ]]; then + pass "Non-array findings produce runtime-error finding" +else + fail "Non-array findings handling" "exit 0 with runtime-error category" "exit $exit_code, category=$category" +fi + +# Test 11: Appendix drift produces info finding +cat > "$TEST_DIR/appendix-drift.md" << 'EOF' +# Appendix Drift Plan + +## Goal Description +A plan with appendix. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | + +--- Original Design Draft Start --- + +Some original draft content. + +--- Original Design Draft End --- +EOF + +echo "Test 11: Appendix drift produces info finding" +findings="$(collect_schema_findings "$TEST_DIR/appendix-drift.md")" +drift_count="$(count_category "$findings" appendix-drift)" +if [[ "$drift_count" -ge 1 ]]; then + pass "Appendix drift detected" +else + fail "Appendix drift should be detected" "at least 1 appendix-drift finding" "$drift_count appendix-drift findings" +fi + +# Test 12: Ambiguity ID post-processing produces stable hash IDs +echo "Test 12: Ambiguity ID post-processing" +cat > "$TEST_DIR/ambiguity-findings.json" << 'EOF' +[ + { + "id": "A-001", + "severity": "blocker", + "category": "ambiguity", + "source_checker": "plan-ambiguity-checker", + "location": {"section": "Task Breakdown", "fragment": "use caching where appropriate"}, + "evidence": "ambiguous caching instruction", + "explanation": "no invalidation strategy defined", + "suggested_resolution": "define cache invalidation", + "affected_acs": [], + "affected_tasks": [] + } +] +EOF +processed="$(python3 -c "import json,sys; print(json.dumps(json.load(sys.stdin)))" < <(cat "$TEST_DIR/ambiguity-findings.json" | plan_check_postprocess_ambiguity_ids))" +id="$(python3 -c "import json,sys; d=json.load(sys.stdin); print(d[0]['id'])" <<< "$processed")" +if [[ "$id" == A-* && "$id" != "A-001" ]]; then + pass "Ambiguity ID is stable hash" +else + fail "Ambiguity ID post-processing" "stable hash ID starting with A-" "$id" +fi + +# Test 12b: Malformed ambiguity findings are passed through unchanged +echo "Test 12b: Ambiguity post-processing preserves malformed input" +malformed_findings='[{"category":"ambiguity",' +processed_malformed="$(printf '%s' "$malformed_findings" | plan_check_postprocess_ambiguity_ids)" +if [[ "$processed_malformed" == "$malformed_findings" ]]; then + pass "Ambiguity post-processing preserves malformed input" +else + fail "Ambiguity post-processing malformed input fallback" "$malformed_findings" "$processed_malformed" +fi + +# Test 13: plan-check.sh produces valid findings.json with valid input +echo "Test 13: plan-check.sh produces valid findings.json" +mkdir -p "$TEST_DIR/report13" +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report13" \ + --findings-file "$TEST_DIR/ambiguity-findings.json" > /dev/null 2>&1 +if python3 -c "import json; json.load(open('$TEST_DIR/report13/findings.json'))" 2>/dev/null; then + pass "Valid findings produce parseable findings.json" +else + fail "Valid findings should produce parseable findings.json" +fi + +# Test 13b: plan-check.sh escapes JSON metacharacters in plan path metadata +echo "Test 13b: plan-check.sh escapes JSON metacharacters in plan path metadata" +QUOTED_PLAN="$TEST_DIR/quoted \"plan.md" +cp "$TEST_DIR/valid-plan.md" "$QUOTED_PLAN" +mkdir -p "$TEST_DIR/report13b" +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$QUOTED_PLAN" \ + --report-dir "$TEST_DIR/report13b" \ + --findings-file "$TEST_DIR/ambiguity-findings.json" > /dev/null 2>&1 +quoted_exit=$? +quoted_path="$(FINDINGS_JSON="$TEST_DIR/report13b/findings.json" python3 -c 'import json, os; d=json.load(open(os.environ["FINDINGS_JSON"])); print(d["check_run"]["plan_path"])' 2>/dev/null)" +if [[ "$quoted_exit" -eq 0 && "$quoted_path" == "$QUOTED_PLAN" && -f "$TEST_DIR/report13b/report.md" ]]; then + pass "Plan path with JSON metacharacters produces parseable findings.json and report.md" +else + fail "Plan path with JSON metacharacters should remain valid JSON" "exit 0, matching plan_path, report.md exists" "exit=$quoted_exit, plan_path=$quoted_path, report_exists=$([[ -f "$TEST_DIR/report13b/report.md" ]] && echo yes || echo no)" +fi + +# Test 14: Resolved contradiction produces status=pass +echo "Test 14: Resolved contradiction produces status=pass" +findings14='[{"id":"F-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Task Breakdown","fragment":""},"evidence":"conflict","explanation":"two defs","suggested_resolution":"pick one","affected_acs":[],"affected_tasks":[]}]' +resolutions14='[{"finding_id":"F-001","resolution_type":"contradiction_resolution","resolution":"accepted first definition"}]' +result14="$(plan_check_build_resolved_json "$TEST_DIR/valid-plan.md" "abc" "test" "{}" 0 "$findings14" "$resolutions14")" +status14="$(echo "$result14" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +unresolved14="$(echo "$result14" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +if [[ \"$status14\" == \"pass\" && \"$unresolved14\" == \"0\" ]]; then + pass "Resolved contradiction produces pass" +else + fail "Resolved contradiction should produce pass" "status=pass, unresolved_blockers=0" "status=$status14, unresolved_blockers=$unresolved14" +fi + +# Test 15: Answered ambiguity produces status=pass +echo "Test 15: Answered ambiguity produces status=pass" +findings15='[{"id":"A-abc123","severity":"blocker","category":"ambiguity","source_checker":"plan-ambiguity-checker","location":{"section":"Task Breakdown","fragment":"use caching"},"evidence":"vague","explanation":"no strategy","suggested_resolution":"define strategy","affected_acs":[],"affected_tasks":[],"ambiguity_details":{"competing_interpretations":["A","B"],"execution_drift_risk":"high","clarification_question":"what cache?"}}]' +resolutions15='[{"finding_id":"A-abc123","resolution_type":"ambiguity_answer","answer":"use LRU cache with 5-minute TTL"}]' +result15="$(plan_check_build_resolved_json "$TEST_DIR/valid-plan.md" "abc" "test" "{}" 0 "$findings15" "$resolutions15")" +status15="$(echo "$result15" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +unresolved15="$(echo "$result15" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +if [[ \"$status15\" == \"pass\" && \"$unresolved15\" == \"0\" ]]; then + pass "Answered ambiguity produces pass" +else + fail "Answered ambiguity should produce pass" "status=pass, unresolved_blockers=0" "status=$status15, unresolved_blockers=$unresolved15" +fi + +# Test 16: Skipped ambiguity produces status=fail +echo "Test 16: Skipped ambiguity produces status=fail" +resolutions16='[{"finding_id":"A-abc123","resolution_type":"ambiguity_skipped"}]' +result16="$(plan_check_build_resolved_json "$TEST_DIR/valid-plan.md" "abc" "test" "{}" 0 "$findings15" "$resolutions16")" +status16="$(echo "$result16" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +unresolved16="$(echo "$result16" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +if [[ \"$status16\" == \"fail\" && \"$unresolved16\" == \"1\" ]]; then + pass "Skipped ambiguity produces fail" +else + fail "Skipped ambiguity should produce fail" "status=fail, unresolved_blockers=1" "status=$status16, unresolved_blockers=$unresolved16" +fi + +# Test 17: Rewrite backup and atomic write +echo "Test 17: Rewrite backup and atomic write" +mkdir -p "$TEST_DIR/rewrite-report/backup" +cp "$TEST_DIR/valid-plan.md" "$TEST_DIR/original-plan.md" +backup_path="$(plan_check_backup_plan "$TEST_DIR/original-plan.md" "$TEST_DIR/rewrite-report")" +if [[ -f "$backup_path" ]]; then + pass "Backup created at $backup_path" +else + fail "Backup should be created" "backup file exists" "missing" +fi +new_content="# Modified Plan\n\n## Goal Description\nModified." +plan_check_atomic_write "$TEST_DIR/original-plan.md" "$new_content" +if grep -q "Modified" "$TEST_DIR/original-plan.md"; then + pass "Atomic write succeeded" +else + fail "Atomic write should modify the file" "file contains Modified" "missing" +fi +mode17="$(stat -c '%a' "$TEST_DIR/original-plan.md" 2>/dev/null || stat -f '%Lp' "$TEST_DIR/original-plan.md" 2>/dev/null || true)" +if [[ "$mode17" == "644" ]]; then + pass "Atomic write preserves file mode" +else + fail "Atomic write should preserve file mode" "644" "$mode17" +fi + +# Test 18: plan-check.sh respects valid ambiguity with full schema +echo "Test 18: plan-check.sh accepts valid ambiguity with full schema" +mkdir -p "$TEST_DIR/report18" +cat > "$TEST_DIR/full-ambiguity.json" << 'EOF' +[ + { + "id": "A-abc123", + "severity": "blocker", + "category": "ambiguity", + "source_checker": "plan-ambiguity-checker", + "location": {"section": "Task Breakdown", "fragment": "use caching"}, + "evidence": "vague", + "explanation": "no strategy", + "suggested_resolution": "define strategy", + "affected_acs": [], + "affected_tasks": [], + "ambiguity_details": { + "competing_interpretations": ["A", "B"], + "execution_drift_risk": "high", + "clarification_question": "what cache?" + } + } +] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report18" \ + --findings-file "$TEST_DIR/full-ambiguity.json" > /dev/null 2>&1 +if python3 -c "import json; d=json.load(open('$TEST_DIR/report18/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null | grep -q "ambiguity"; then + pass "Valid ambiguity with full schema accepted" +else + fail "Valid ambiguity with full schema should be accepted" +fi + +# Test 19: Partial template may omit optional Task Breakdown heading +echo "Test 19: Partial template missing optional Task Breakdown validates" +cat > "$TEST_DIR/partial-template.md" << 'EOF' +# Partial Template + +## Goal Description +Test. + +## Acceptance Criteria +- AC-1: test + +## Path Boundaries + +### Upper Bound +Complete. +EOF +findings19="$(collect_schema_findings "$TEST_DIR/valid-plan.md" "$TEST_DIR/partial-template.md")" +runtime19="$(count_category "$findings19" runtime-error)" +schema19="$(count_category "$findings19" schema)" +blockers19="$(count_severity "$findings19" blocker)" +if [[ "$runtime19" -eq 0 && "$schema19" -eq 0 && "$blockers19" -eq 0 ]]; then + pass "Partial template without optional Task Breakdown validates" +else + fail "Partial template without optional Task Breakdown" "0 runtime-error, 0 schema, 0 blockers" "$runtime19 runtime-error, $schema19 schema, $blockers19 blockers" +fi + +# Test 19b: Partial template missing a required core heading triggers runtime-error +echo "Test 19b: Partial template missing core heading triggers runtime-error" +cat > "$TEST_DIR/partial-template-missing-core.md" << 'EOF' +# Partial Template + +## Goal Description +Test. + +## Acceptance Criteria +- AC-1: test +EOF + +findings19b="$(collect_schema_findings "$TEST_DIR/valid-plan.md" "$TEST_DIR/partial-template-missing-core.md")" +runtime19b="$(count_category "$findings19b" runtime-error)" +schema19b="$(count_category "$findings19b" schema)" +if [[ "$runtime19b" -ge 1 && "$schema19b" -eq 0 ]]; then + pass "Partial template missing core heading triggers runtime-error and skips schema checks" +else + fail "Partial template missing core heading" "1 runtime-error, 0 schema" "$runtime19b runtime-error, $schema19b schema" +fi + +assert_strict_template_runtime_error \ + "$TEST_DIR/valid-plan.md" \ + "$TEST_DIR/partial-template-missing-core.md" \ + "$TEST_DIR/partial-template-missing-core-strict.json" \ + "Partial template missing core heading returns runtime-error under strict shell" \ + "Partial template missing core heading strict handling" + +# Test 20: plan-check.sh rejects missing affected_acs +echo "Test 20: plan-check.sh rejects missing affected_acs" +mkdir -p "$TEST_DIR/report20" +cat > "$TEST_DIR/missing-affected.json" << 'EOF' +[{"id":"F-001","severity":"blocker","category":"schema","source_checker":"plan-schema-validator","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix"}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report20" \ + --findings-file "$TEST_DIR/missing-affected.json" > /dev/null 2>&1 +cat20="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report20/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat20" == "runtime-error" ]]; then + pass "Missing affected_acs produces runtime-error" +else + fail "Missing affected_acs should produce runtime-error" "runtime-error" "$cat20" +fi + +# Test 21: plan-check.sh rejects missing ambiguity_details +echo "Test 21: plan-check.sh rejects missing ambiguity_details" +mkdir -p "$TEST_DIR/report21" +cat > "$TEST_DIR/missing-details.json" << 'EOF' +[{"id":"A-abc123","severity":"blocker","category":"ambiguity","source_checker":"plan-ambiguity-checker","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report21" \ + --findings-file "$TEST_DIR/missing-details.json" > /dev/null 2>&1 +cat21="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report21/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat21" == "runtime-error" ]]; then + pass "Missing ambiguity_details produces runtime-error" +else + fail "Missing ambiguity_details should produce runtime-error" "runtime-error" "$cat21" +fi + +# Test 22: Schema blocker + contradiction_resolution still fails (category-aware) +echo "Test 22: Schema blocker cannot be cleared by contradiction resolution" +findings22='[{"id":"F-001","severity":"blocker","category":"schema","source_checker":"plan-schema-validator","location":{"section":"Task Breakdown","fragment":""},"evidence":"missing section","explanation":"required","suggested_resolution":"add it","affected_acs":[],"affected_tasks":[]}]' +resolutions22='[{"finding_id":"F-001","resolution_type":"contradiction_resolution","resolution":"fixed"}]' +result22="$(plan_check_build_resolved_json "$TEST_DIR/valid-plan.md" "abc" "test" "{}" 0 "$findings22" "$resolutions22")" +status22="$(echo "$result22" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +unresolved22="$(echo "$result22" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +if [[ "$status22" == "fail" && "$unresolved22" == "1" ]]; then + pass "Schema blocker remains unresolved with contradiction resolution" +else + fail "Schema blocker should remain unresolved" "status=fail, unresolved=1" "status=$status22, unresolved=$unresolved22" +fi + +# Test 23: plan-check.sh rejects invalid scalar type for id +echo "Test 23: plan-check.sh rejects invalid scalar type for id" +mkdir -p "$TEST_DIR/report23" +cat > "$TEST_DIR/invalid-id.json" << 'EOF' +[{"id":[],"severity":"blocker","category":"schema","source_checker":"plan-schema-validator","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report23" \ + --findings-file "$TEST_DIR/invalid-id.json" > /dev/null 2>&1 +cat23="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report23/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat23" == "runtime-error" ]]; then + pass "Invalid scalar type for id produces runtime-error" +else + fail "Invalid scalar type for id should produce runtime-error" "runtime-error" "$cat23" +fi + +# Test 24: plan-check.sh rejects unknown source_checker +echo "Test 24: plan-check.sh rejects unknown source_checker" +mkdir -p "$TEST_DIR/report24" +cat > "$TEST_DIR/invalid-checker.json" << 'EOF' +[{"id":"F-001","severity":"blocker","category":"schema","source_checker":"unknown-checker","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report24" \ + --findings-file "$TEST_DIR/invalid-checker.json" > /dev/null 2>&1 +cat24="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report24/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat24" == "runtime-error" ]]; then + pass "Unknown source_checker produces runtime-error" +else + fail "Unknown source_checker should produce runtime-error" "runtime-error" "$cat24" +fi + +# Test 25: plan-check.sh rejects ambiguity with only 1 competing interpretation +echo "Test 25: plan-check.sh rejects ambiguity with only 1 interpretation" +mkdir -p "$TEST_DIR/report25" +cat > "$TEST_DIR/invalid-interpretations.json" << 'EOF' +[{"id":"A-abc123","severity":"blocker","category":"ambiguity","source_checker":"plan-ambiguity-checker","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[],"ambiguity_details":{"competing_interpretations":["A"],"execution_drift_risk":"high","clarification_question":"what?"}}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report25" \ + --findings-file "$TEST_DIR/invalid-interpretations.json" > /dev/null 2>&1 +cat25="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report25/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat25" == "runtime-error" ]]; then + pass "Only 1 interpretation produces runtime-error" +else + fail "Only 1 interpretation should produce runtime-error" "runtime-error" "$cat25" +fi + +# Test 26: plan-check.sh rejects empty ambiguity clarification question +echo "Test 26: plan-check.sh rejects empty ambiguity clarification question" +mkdir -p "$TEST_DIR/report26" +cat > "$TEST_DIR/invalid-question.json" << 'EOF' +[{"id":"A-abc123","severity":"blocker","category":"ambiguity","source_checker":"plan-ambiguity-checker","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[],"ambiguity_details":{"competing_interpretations":["A","B"],"execution_drift_risk":"high","clarification_question":""}}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report26" \ + --findings-file "$TEST_DIR/invalid-question.json" > /dev/null 2>&1 +cat26="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report26/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat26" == "runtime-error" ]]; then + pass "Empty clarification question produces runtime-error" +else + fail "Empty clarification question should produce runtime-error" "runtime-error" "$cat26" +fi + +# Test 27: Diff preview can be generated before rewrite +echo "Test 27: Diff preview can be generated before rewrite" +cp "$TEST_DIR/valid-plan.md" "$TEST_DIR/diff-plan.md" +new_content="# Modified Plan\n\n## Goal Description\nModified.\n\n## Acceptance Criteria\n\n- **AC-1**: First criterion\n\n## Path Boundaries\n\n### Upper Bound\nComplete.\n\n### Lower Bound\nMinimum.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do something | AC-1 | coding | - |" +diff_output="$(diff -u "$TEST_DIR/diff-plan.md" <(echo -e "$new_content") 2>/dev/null || true)" +if echo "$diff_output" | grep -q "Modified"; then + pass "Diff preview shows changes" +else + fail "Diff preview should show changes" "diff contains Modified" "missing" +fi + +# Test 28: plan-check.sh accepts valid contradiction with full schema +echo "Test 28: plan-check.sh accepts valid contradiction with full schema" +mkdir -p "$TEST_DIR/report28" +cat > "$TEST_DIR/full-contradiction.json" << 'EOF' +[{"id":"F-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Task Breakdown","fragment":""},"evidence":"conflict","explanation":"two defs","suggested_resolution":"pick one","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report28" \ + --findings-file "$TEST_DIR/full-contradiction.json" > /dev/null 2>&1 +if python3 -c "import json; d=json.load(open('$TEST_DIR/report28/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null | grep -q "contradiction"; then + pass "Valid contradiction with full schema accepted" +else + fail "Valid contradiction with full schema should be accepted" +fi + +# Test 29: Dependency blocker + contradiction_resolution still fails +echo "Test 29: Dependency blocker cannot be cleared by contradiction resolution" +findings29='[{"id":"F-001","severity":"blocker","category":"dependency","source_checker":"plan-schema-validator","location":{"section":"Task Breakdown","fragment":"task1"},"evidence":"circular","explanation":"cycle","suggested_resolution":"break it","affected_acs":[],"affected_tasks":["task1"]}]' +resolutions29='[{"finding_id":"F-001","resolution_type":"contradiction_resolution","resolution":"fixed"}]' +result29="$(plan_check_build_resolved_json "$TEST_DIR/valid-plan.md" "abc" "test" "{}" 0 "$findings29" "$resolutions29")" +status29="$(echo "$result29" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +unresolved29="$(echo "$result29" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +if [[ "$status29" == "fail" && "$unresolved29" == "1" ]]; then + pass "Dependency blocker remains unresolved with contradiction resolution" +else + fail "Dependency blocker should remain unresolved" "status=fail, unresolved=1" "status=$status29, unresolved=$unresolved29" +fi + +# Test 30: Ambiguity + wrong resolution type remains unresolved +echo "Test 30: Ambiguity + wrong resolution type remains unresolved" +findings30='[{"id":"A-abc123","severity":"blocker","category":"ambiguity","source_checker":"plan-ambiguity-checker","location":{"section":"Task Breakdown","fragment":"use caching"},"evidence":"vague","explanation":"no strategy","suggested_resolution":"define strategy","affected_acs":[],"affected_tasks":[],"ambiguity_details":{"competing_interpretations":["A","B"],"execution_drift_risk":"high","clarification_question":"what cache?"}}]' +resolutions30='[{"finding_id":"A-abc123","resolution_type":"contradiction_resolution","resolution":"wrong type"}]' +result30="$(plan_check_build_resolved_json "$TEST_DIR/valid-plan.md" "abc" "test" "{}" 0 "$findings30" "$resolutions30")" +status30="$(echo "$result30" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["status"])')" +unresolved30="$(echo "$result30" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["summary"]["unresolved_blockers"])')" +if [[ "$status30" == "fail" && "$unresolved30" == "1" ]]; then + pass "Ambiguity with wrong resolution type remains unresolved" +else + fail "Ambiguity with wrong resolution type should remain unresolved" "status=fail, unresolved=1" "status=$status30, unresolved=$unresolved30" +fi + +# Test 31: plan-check.sh rejects array severity +mkdir -p "$TEST_DIR/report31" +cat > "$TEST_DIR/array-severity.json" << 'EOF' +[{"id":"F-001","severity":[],"category":"schema","source_checker":"plan-schema-validator","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report31" \ + --findings-file "$TEST_DIR/array-severity.json" > /dev/null 2>&1 +cat31="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report31/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat31" == "runtime-error" ]]; then + pass "Array severity produces runtime-error" +else + fail "Array severity should produce runtime-error" "runtime-error" "$cat31" +fi + +# Test 32: plan-check.sh rejects object category +echo "Test 32: plan-check.sh rejects object category" +mkdir -p "$TEST_DIR/report32" +cat > "$TEST_DIR/object-category.json" << 'EOF' +[{"id":"F-001","severity":"blocker","category":{},"source_checker":"plan-schema-validator","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report32" \ + --findings-file "$TEST_DIR/object-category.json" > /dev/null 2>&1 +cat32="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report32/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat32" == "runtime-error" ]]; then + pass "Object category produces runtime-error" +else + fail "Object category should produce runtime-error" "runtime-error" "$cat32" +fi + +# Test 33: plan-check.sh rejects number severity +echo "Test 33: plan-check.sh rejects number severity" +mkdir -p "$TEST_DIR/report33" +cat > "$TEST_DIR/number-severity.json" << 'EOF' +[{"id":"F-001","severity":123,"category":"schema","source_checker":"plan-schema-validator","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report33" \ + --findings-file "$TEST_DIR/number-severity.json" > /dev/null 2>&1 +cat33="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report33/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat33" == "runtime-error" ]]; then + pass "Number severity produces runtime-error" +else + fail "Number severity should produce runtime-error" "runtime-error" "$cat33" +fi + +# Helper: simulate the full rewrite flow with actual helpers and filesystem assertions +simulate_rewrite_flow() { + local plan_file="$1" + local report_dir="$2" + local revised_content="$3" + local user_choice="$4" + local final_confirm="$5" + local recheck_enabled="$6" + + local diff_generated=0 + local backup_path="" + local atomic_write_performed=0 + local recheck_count=0 + local recheck_blockers="" + local pre_hash="" + local post_hash="" + + pre_hash="$(sha256sum "$plan_file" | awk '{print $1}')" + + if [[ "$user_choice" == "accept" ]]; then + diff -u "$plan_file" <(printf '%s\n' "$revised_content") > "$report_dir/diff.txt" 2>/dev/null || true + diff_generated=1 + + if [[ "$final_confirm" == "yes" ]]; then + backup_path="$(plan_check_backup_plan "$plan_file" "$report_dir")" + + plan_check_atomic_write "$plan_file" "$revised_content" + atomic_write_performed=1 + + if [[ "$recheck_enabled" == "true" ]]; then + recheck_count=1 + # Run actual schema validation on rewritten plan + local recheck_findings + recheck_findings="$(collect_schema_findings "$plan_file")" + recheck_blockers="$(count_severity "$recheck_findings" blocker)" + fi + fi + fi + + post_hash="$(sha256sum "$plan_file" | awk '{print $1}')" + + python3 -c 'import json,sys; d=json.loads(sys.stdin.read().strip()); d["pre_hash"]=d.pop("_pre"); d["post_hash"]=d.pop("_post"); print(json.dumps(d))' <<< "$(printf '{"diff_generated":%d,"backup_path":"%s","atomic_write_performed":%d,"recheck_count":%d,"recheck_blockers":"%s","_pre":"%s","_post":"%s"}\n' "$diff_generated" "$backup_path" "$atomic_write_performed" "$recheck_count" "$recheck_blockers" "$pre_hash" "$post_hash")" +} + +# Helper: simulate semantic retry-once behavior and validate findings through plan-check.sh +simulate_semantic_retry() { + local first_output="$1" + local second_output="$2" + local report_dir="$3" + local valid_plan="$4" + + local retry_count=0 + local accepted_findings="[]" + + retry_count=1 + if [[ "$first_output" == "valid" ]]; then + accepted_findings='[{"id":"F-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}]' + else + retry_count=2 + if [[ "$second_output" == "valid" ]]; then + accepted_findings='[{"id":"F-001","severity":"blocker","category":"contradiction","source_checker":"plan-consistency-checker","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}]' + else + accepted_findings='[{"id":"F-RUNTIME-001","severity":"info","category":"runtime-error","source_checker":"plan-schema-validator","location":{"section":"","fragment":""},"evidence":"Semantic check failed after retry","explanation":"The semantic checker produced malformed output after one retry","suggested_resolution":"Review the sub-agent output and retry the check","affected_acs":[],"affected_tasks":[]}]' + fi + fi + + # Validate accepted findings through plan-check.sh + local findings_file="$report_dir/findings.json" + mkdir -p "$report_dir" + printf '%s\n' "$accepted_findings" > "$findings_file" + bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$valid_plan" \ + --report-dir "$report_dir" \ + --findings-file "$findings_file" > /dev/null 2>&1 || true + + printf '{"retry_count":%d,"findings":%s}\n' "$retry_count" "$accepted_findings" +} + +# Test 34: Accept rewrite + confirm final defaults to no recheck +echo "Test 34: Accept+confirm defaults to no recheck" +mkdir -p "$TEST_DIR/report34" +# Start with an invalid plan; default behavior should still skip the recheck. +cat > "$TEST_DIR/flow-plan-34.md" << 'EOF' +# Invalid Plan + +## Goal Description +A plan. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | +EOF +# Fixed content adds missing Path Boundaries +fixed34="# Fixed Plan\n\n## Goal Description\nA plan.\n\n## Acceptance Criteria\n\n- **AC-1**: First criterion\n\n## Path Boundaries\n\n### Upper Bound\nComplete.\n\n### Lower Bound\nMinimum.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do something | AC-1 | coding | - |" +result34="$(simulate_rewrite_flow "$TEST_DIR/flow-plan-34.md" "$TEST_DIR/report34" "$fixed34" "accept" "yes" "false")" +diff34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["diff_generated"])')" +backup_path34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["backup_path"])')" +atomic34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["atomic_write_performed"])')" +recheck34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["recheck_count"])')" +recheck_blockers34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["recheck_blockers"])')" +pre34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["pre_hash"])')" +post34="$(echo "$result34" | python3 -c 'import json,sys; print(json.load(sys.stdin)["post_hash"])')" +if [[ "$diff34" == "1" && -f "$backup_path34" && "$atomic34" == "1" && "$recheck34" == "0" && "$recheck_blockers34" == "" && "$pre34" != "$post34" ]]; then + pass "Accept+confirm default: diff, backup exists, atomic write, recheck=0, plan changed" +else + fail "Accept+confirm default flow" "diff=1, backup exists, atomic=1, recheck=0, recheck_blockers empty, hash changed" "diff=$diff34, backup=$backup_path34, atomic=$atomic34, recheck=$recheck34, recheck_blockers=$recheck_blockers34, pre=$pre34, post=$post34" +fi + +# Test 35: Accept rewrite + confirm final + --recheck runs recheck +echo "Test 35: Accept+confirm with --recheck produces recheck=1" +mkdir -p "$TEST_DIR/report35" +cat > "$TEST_DIR/flow-plan-35.md" << 'EOF' +# Invalid Plan + +## Goal Description +A plan. + +## Acceptance Criteria + +- **AC-1**: First criterion + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | +EOF +fixed35="# Fixed Plan\n\n## Goal Description\nA plan.\n\n## Acceptance Criteria\n\n- **AC-1**: First criterion\n\n## Path Boundaries\n\n### Upper Bound\nComplete.\n\n### Lower Bound\nMinimum.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do something | AC-1 | coding | - |" +result35="$(simulate_rewrite_flow "$TEST_DIR/flow-plan-35.md" "$TEST_DIR/report35" "$fixed35" "accept" "yes" "true")" +atomic35="$(echo "$result35" | python3 -c 'import json,sys; print(json.load(sys.stdin)["atomic_write_performed"])')" +recheck35="$(echo "$result35" | python3 -c 'import json,sys; print(json.load(sys.stdin)["recheck_count"])')" +recheck_blockers35="$(echo "$result35" | python3 -c 'import json,sys; print(json.load(sys.stdin)["recheck_blockers"])')" +if [[ "$atomic35" == "1" && "$recheck35" == "1" && "$recheck_blockers35" == "0" ]]; then + pass "Accept+confirm with --recheck: atomic=1, recheck=1, recheck_blockers=0" +else + fail "Accept+confirm --recheck flow" "atomic=1, recheck=1, recheck_blockers=0" "atomic=$atomic35, recheck=$recheck35, recheck_blockers=$recheck_blockers35" +fi + +# Test 36: Decline rewrite leaves plan unchanged and creates no backup +echo "Test 36: Decline leaves plan unchanged, no backup, no atomic write, no recheck" +mkdir -p "$TEST_DIR/report36" +cp "$TEST_DIR/valid-plan.md" "$TEST_DIR/flow-plan-36.md" +fixed36="# Modified Plan\n\n## Goal Description\nModified.\n\n## Acceptance Criteria\n\n- **AC-1**: First criterion\n\n## Path Boundaries\n\n### Upper Bound\nComplete.\n\n### Lower Bound\nMinimum.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do something | AC-1 | coding | - |" +result36="$(simulate_rewrite_flow "$TEST_DIR/flow-plan-36.md" "$TEST_DIR/report36" "$fixed36" "decline" "no" "false")" +backup_path36="$(echo "$result36" | python3 -c 'import json,sys; print(json.load(sys.stdin)["backup_path"])')" +atomic36="$(echo "$result36" | python3 -c 'import json,sys; print(json.load(sys.stdin)["atomic_write_performed"])')" +recheck36="$(echo "$result36" | python3 -c 'import json,sys; print(json.load(sys.stdin)["recheck_count"])')" +pre36="$(echo "$result36" | python3 -c 'import json,sys; print(json.load(sys.stdin)["pre_hash"])')" +post36="$(echo "$result36" | python3 -c 'import json,sys; print(json.load(sys.stdin)["post_hash"])')" +if [[ -z "$backup_path36" && "$atomic36" == "0" && "$recheck36" == "0" && "$pre36" == "$post36" ]]; then + pass "Decline: no backup, no atomic write, no recheck, plan hash unchanged" +else + fail "Decline flow" "backup empty, atomic=0, recheck=0, hash unchanged" "backup=$backup_path36, atomic=$atomic36, recheck=$recheck36, pre=$pre36, post=$post36" +fi + +# Test 37: Accept then reject final confirmation leaves plan unchanged +echo "Test 37: Accept+reject-final leaves plan unchanged, no backup, no atomic write, no recheck" +mkdir -p "$TEST_DIR/report37" +cp "$TEST_DIR/valid-plan.md" "$TEST_DIR/flow-plan-37.md" +fixed37="# Modified Plan\n\n## Goal Description\nModified.\n\n## Acceptance Criteria\n\n- **AC-1**: First criterion\n\n## Path Boundaries\n\n### Upper Bound\nComplete.\n\n### Lower Bound\nMinimum.\n\n## Task Breakdown\n\n| Task ID | Description | Target AC | Tag | Depends On |\n|---------|-------------|-----------|-----|------------|\n| task1 | Do something | AC-1 | coding | - |" +result37="$(simulate_rewrite_flow "$TEST_DIR/flow-plan-37.md" "$TEST_DIR/report37" "$fixed37" "accept" "no" "false")" +backup_path37="$(echo "$result37" | python3 -c 'import json,sys; print(json.load(sys.stdin)["backup_path"])')" +atomic37="$(echo "$result37" | python3 -c 'import json,sys; print(json.load(sys.stdin)["atomic_write_performed"])')" +recheck37="$(echo "$result37" | python3 -c 'import json,sys; print(json.load(sys.stdin)["recheck_count"])')" +pre37="$(echo "$result37" | python3 -c 'import json,sys; print(json.load(sys.stdin)["pre_hash"])')" +post37="$(echo "$result37" | python3 -c 'import json,sys; print(json.load(sys.stdin)["post_hash"])')" +if [[ -z "$backup_path37" && "$atomic37" == "0" && "$recheck37" == "0" && "$pre37" == "$post37" ]]; then + pass "Accept+reject-final: no backup, no atomic write, no recheck, plan hash unchanged" +else + fail "Accept+reject-final flow" "backup empty, atomic=0, recheck=0, hash unchanged" "backup=$backup_path37, atomic=$atomic37, recheck=$recheck37, pre=$pre37, post=$post37" +fi + +# Test 38: Semantic retry-once: malformed then valid yields retry=2 and accepted findings +echo "Test 38: Semantic retry malformed-then-valid yields retry=2 and accepted findings" +mkdir -p "$TEST_DIR/report38" +result38="$(simulate_semantic_retry "malformed" "valid" "$TEST_DIR/report38" "$TEST_DIR/valid-plan.md")" +retry38="$(echo "$result38" | python3 -c 'import json,sys; print(json.load(sys.stdin)["retry_count"])')" +count38="$(echo "$result38" | python3 -c 'import json,sys; print(len(json.load(sys.stdin)["findings"]))')" +severity38="$(echo "$result38" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["findings"][0]["severity"])')" +# Validate findings passed through plan-check.sh +valid38="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report38/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$retry38" == "2" && "$count38" == "1" && "$severity38" == "blocker" && "$valid38" == "contradiction" ]]; then + pass "Semantic retry malformed-then-valid: retry=2, accepted blocker findings, validated by plan-check.sh" +else + fail "Semantic retry malformed-then-valid" "retry=2, count=1, severity=blocker, valid category" "retry=$retry38, count=$count38, severity=$severity38, valid=$valid38" +fi + +# Test 39: Semantic retry-once: malformed then malformed yields retry=2 and runtime-error +echo "Test 39: Semantic retry malformed-then-malformed yields retry=2 and runtime-error" +mkdir -p "$TEST_DIR/report39" +result39="$(simulate_semantic_retry "malformed" "malformed" "$TEST_DIR/report39" "$TEST_DIR/valid-plan.md")" +retry39="$(echo "$result39" | python3 -c 'import json,sys; print(json.load(sys.stdin)["retry_count"])')" +count39="$(echo "$result39" | python3 -c 'import json,sys; print(len(json.load(sys.stdin)["findings"]))')" +severity39="$(echo "$result39" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["findings"][0]["severity"])')" +category39="$(echo "$result39" | python3 -c 'import json,sys; d=json.load(sys.stdin); print(d["findings"][0]["category"])')" +# Validate runtime-error finding has all required schema fields via plan-check.sh +valid39_cat="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report39/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +valid39_id="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report39/findings.json')); print(d['findings'][0]['id'])" 2>/dev/null || echo 'ERROR')" +if [[ "$retry39" == "2" && "$count39" == "1" && "$severity39" == "info" && "$category39" == "runtime-error" && "$valid39_cat" == "runtime-error" && "$valid39_id" == "F-RUNTIME-001" ]]; then + pass "Semantic retry malformed-then-malformed: retry=2, runtime-error info, all schema fields valid" +else + fail "Semantic retry malformed-then-malformed" "retry=2, count=1, severity=info, category=runtime-error, valid id/cat" "retry=$retry39, count=$count39, severity=$severity39, category=$category39, valid_cat=$valid39_cat, valid_id=$valid39_id" +fi + +# Test 40: Null severity produces runtime-error +echo "Test 40: plan-check.sh rejects null severity" +mkdir -p "$TEST_DIR/report40" +cat > "$TEST_DIR/null-severity.json" << 'EOF' +[{"id":"F-001","severity":null,"category":"schema","source_checker":"plan-schema-validator","location":{"section":"Test","fragment":""},"evidence":"test","explanation":"test","suggested_resolution":"fix","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report40" \ + --findings-file "$TEST_DIR/null-severity.json" > /dev/null 2>&1 +cat40="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report40/findings.json')); print(d['findings'][0]['category'])" 2>/dev/null || echo 'ERROR')" +if [[ "$cat40" == "runtime-error" ]]; then + pass "Null severity produces runtime-error" +else + fail "Null severity should produce runtime-error" "runtime-error" "$cat40" +fi + +# Test 41: Mixed valid and malformed findings preserve valid blockers +echo "Test 41: Mixed valid and malformed findings preserve valid blockers" +mkdir -p "$TEST_DIR/report41" +cat > "$TEST_DIR/mixed-valid-invalid.json" << 'EOF' +[ + {"id":"F-SCHEMA-001","severity":"blocker","category":"schema","source_checker":"plan-schema-validator","location":{"section":"Task Breakdown","fragment":"task1"},"evidence":"schema blocker","explanation":"schema blocker remains valid","suggested_resolution":"fix schema","affected_acs":[],"affected_tasks":["task1"]}, + {"id":"F-BAD-001","severity":null,"category":"schema","source_checker":"plan-schema-validator","location":{"section":"Task Breakdown","fragment":"task2"},"evidence":"bad semantic finding","explanation":"bad item","suggested_resolution":"fix bad item","affected_acs":[],"affected_tasks":["task2"]} +] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report41" \ + --findings-file "$TEST_DIR/mixed-valid-invalid.json" > /dev/null 2>&1 +mixed41="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report41/findings.json')); cats=[f['category'] for f in d['findings']]; ids=[f['id'] for f in d['findings']]; print('ok' if 'F-SCHEMA-001' in ids and 'runtime-error' in cats and d['summary']['blockers'] == 1 and d['summary']['status'] == 'fail' else f'bad ids={ids} cats={cats} summary={d[\"summary\"]}')" 2>/dev/null || echo 'ERROR')" +if [[ "$mixed41" == "ok" ]]; then + pass "Mixed valid/invalid findings keep valid blocker and append runtime-error" +else + fail "Mixed valid/invalid findings should keep valid blocker" "ok" "$mixed41" +fi + +# Test 42: Command spec line order: rewrite prompt precedes diff preview precedes final confirmation precedes backup/atomic write precedes recheck gate +echo "Test 42: Command spec rewrite line order" +CMD_SPEC="$PROJECT_ROOT/commands/plan-check.md" +rewrite_line="$(grep -n -i 'rewrite the plan file' "$CMD_SPEC" | head -1 | cut -d: -f1)" +diff_line="$(grep -n -i 'diff preview' "$CMD_SPEC" | head -1 | cut -d: -f1)" +confirm_line="$(grep -n -i 'apply these changes' "$CMD_SPEC" | head -1 | cut -d: -f1)" +backup_line="$(grep -n 'plan_check_backup_plan' "$CMD_SPEC" | head -1 | cut -d: -f1)" +atomic_line="$(grep -n 'plan_check_atomic_write' "$CMD_SPEC" | head -1 | cut -d: -f1)" +recheck_line="$(grep -n 'EFFECTIVE_RECHECK=true' "$CMD_SPEC" | tail -1 | cut -d: -f1)" +if [[ -n "$rewrite_line" && -n "$diff_line" && -n "$confirm_line" && -n "$backup_line" && -n "$atomic_line" && -n "$recheck_line" ]]; then + if [[ "$rewrite_line" -lt "$diff_line" && "$diff_line" -lt "$confirm_line" && "$confirm_line" -lt "$backup_line" && "$backup_line" -lt "$atomic_line" && "$atomic_line" -lt "$recheck_line" ]]; then + pass "Command spec rewrite line order is correct" + else + fail "Command spec rewrite line order" "rewrite < diff < confirm < backup < atomic < recheck" "rewrite=$rewrite_line, diff=$diff_line, confirm=$confirm_line, backup=$backup_line, atomic=$atomic_line, recheck=$recheck_line" + fi +else + fail "Command spec rewrite line order: missing keywords" "all keywords present" "rewrite=$rewrite_line, diff=$diff_line, confirm=$confirm_line, backup=$backup_line, atomic=$atomic_line, recheck=$recheck_line" +fi + +# Test 43: Command spec restricts shell tools while allowing repair flow writes +echo "Test 43: Command spec restricts shell tools while allowing repair flow writes" +if ! grep -q '^ - "Bash"$' "$CMD_SPEC" && \ + grep -q '^ - "Bash(mktemp:\*)"$' "$CMD_SPEC" && \ + grep -q '^ - "Bash(diff:\*)"$' "$CMD_SPEC" && \ + grep -q '^ - "Bash(${CLAUDE_PLUGIN_ROOT}/scripts/plan-check.sh:\*)"$' "$CMD_SPEC" && \ + grep -q '^ - "Write"$' "$CMD_SPEC" && \ + grep -q '^ - "Edit"$' "$CMD_SPEC"; then + pass "Command spec restricts Bash and keeps required write/diff tools" +else + fail "Command spec shell tool restrictions" "no bare Bash; mktemp, diff, plan-check.sh, Write, Edit allowed" "missing or unrestricted" +fi + +# Test 44: Regression: info finding with severity strings in non-severity fields must not distort counts +echo "Test 44: Info finding with severity strings in non-severity fields reports correct counts" +mkdir -p "$TEST_DIR/report42" +cat > "$TEST_DIR/mixed-section.json" << 'EOF' +[{"id":"F-INFO-001","severity":"info","category":"appendix-drift","source_checker":"plan-schema-validator","location":{"section":"blocker warning info","fragment":""},"evidence":"appendix present","explanation":"review appendix","suggested_resolution":"check drift","affected_acs":[],"affected_tasks":[]}] +EOF +bash "$PROJECT_ROOT/scripts/plan-check.sh" \ + --plan "$TEST_DIR/valid-plan.md" \ + --report-dir "$TEST_DIR/report42" \ + --findings-file "$TEST_DIR/mixed-section.json" > /dev/null 2>&1 +blockers42="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report42/findings.json')); print(d['summary']['blockers'])" 2>/dev/null || echo 'ERROR')" +warnings42="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report42/findings.json')); print(d['summary']['warnings'])" 2>/dev/null || echo 'ERROR')" +infos42="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report42/findings.json')); print(d['summary']['infos'])" 2>/dev/null || echo 'ERROR')" +status42="$(python3 -c "import json; d=json.load(open('$TEST_DIR/report42/findings.json')); print(d['summary']['status'])" 2>/dev/null || echo 'ERROR')" +# Assert report.md markdown summary +md_blockers42="$(grep -oE 'Blockers: [0-9]+' "$TEST_DIR/report42/report.md" | awk '{print $2}')" +md_warnings42="$(grep -oE 'Warnings: [0-9]+' "$TEST_DIR/report42/report.md" | awk '{print $2}')" +md_infos42="$(grep -oE 'Infos: [0-9]+' "$TEST_DIR/report42/report.md" | awk '{print $2}')" +if [[ "$blockers42" == "0" && "$warnings42" == "0" && "$infos42" == "1" && "$status42" == "pass" && "$md_blockers42" == "0" && "$md_warnings42" == "0" && "$md_infos42" == "1" ]]; then + pass "Info finding with severity strings in non-severity fields: blockers=0, warnings=0, infos=1, status=pass, report.md correct" +else + fail "Info finding regression" "blockers=0, warnings=0, infos=1, status=pass, md matches" "blockers=$blockers42, warnings=$warnings42, infos=$infos42, status=$status42, md_blockers=$md_blockers42, md_warnings=$md_warnings42, md_infos=$md_infos42" +fi + +# Test 45: Command spec sub-agent contract asserts retry-once and runtime-error fallback +echo "Test 45: Command spec semantic retry-once contract" +CMD_SPEC="$PROJECT_ROOT/commands/plan-check.md" +retry_line="$(grep -n -i 'retry once' "$CMD_SPEC" | head -1 | cut -d: -f1)" +continue_line="$(grep -n -i 'runtime-error.*finding' "$CMD_SPEC" | grep -i 'continue' | head -1 | cut -d: -f1)" +if [[ -n "$retry_line" && -n "$continue_line" ]]; then + pass "Command spec contains retry-once and continue-with-runtime-error contract" +else + fail "Command spec retry contract" "retry-once and continue-with-runtime-error lines present" "retry=$retry_line, continue=$continue_line" +fi + +# Test 46: validate-plan-check-io.sh returns exit 4 when output path exists as a file +echo "Test 46: validate-plan-check-io.sh exit 4 when output path exists as file" +mkdir -p "$TEST_DIR/io-test/.humanize" +touch "$TEST_DIR/io-test/.humanize/plan-check" +echo "# test plan" > "$TEST_DIR/io-test/plan.md" +bash "$PROJECT_ROOT/scripts/validate-plan-check-io.sh" \ + --plan "$TEST_DIR/io-test/plan.md" > /dev/null 2>&1 +exit44=$? +if [[ "$exit44" == "4" ]]; then + pass "validate-plan-check-io.sh returns exit 4 for existing output file" +else + fail "validate-plan-check-io.sh exit 4" "exit 4" "exit $exit44" +fi + +# Test 47: validate-plan-check-io.sh uses project-level report dir for nested plan paths +echo "Test 47: validate-plan-check-io.sh uses project-level report dir for nested plan paths" +mkdir -p "$TEST_DIR/report-root-repo/docs" +git -C "$TEST_DIR/report-root-repo" init -q +echo "# test plan" > "$TEST_DIR/report-root-repo/docs/plan.md" +OUTPUT45="$(bash "$PROJECT_ROOT/scripts/validate-plan-check-io.sh" \ + --plan "$TEST_DIR/report-root-repo/docs/plan.md" 2>&1)" +exit45=$? +expected_report45="$TEST_DIR/report-root-repo/.humanize/plan-check" +wrong_report45="$TEST_DIR/report-root-repo/docs/.humanize/plan-check" +if [[ "$exit45" == "0" && "$OUTPUT45" == *"Report directory: $expected_report45"* && "$OUTPUT45" != *"$wrong_report45"* ]]; then + pass "validate-plan-check-io.sh uses project-level report dir for nested plan paths" +else + fail "validate-plan-check-io.sh nested plan report dir" "$expected_report45" "exit $exit45; output: $OUTPUT45" +fi + +# Test 48: validate-plan-check-io.sh accepts --recheck and reports enabled +echo "Test 48: validate-plan-check-io.sh accepts --recheck" +mkdir -p "$TEST_DIR/io-recheck" +echo "# test plan" > "$TEST_DIR/io-recheck/plan.md" +OUTPUT46="$(bash "$PROJECT_ROOT/scripts/validate-plan-check-io.sh" \ + --plan "$TEST_DIR/io-recheck/plan.md" \ + --recheck 2>&1)" +exit46=$? +if [[ "$exit46" == "0" && "$OUTPUT46" == *"Recheck: true"* ]]; then + pass "validate-plan-check-io.sh accepts --recheck" +else + fail "validate-plan-check-io.sh --recheck" "exit 0 and Recheck: true" "exit $exit46; output: $OUTPUT46" +fi + +# Test 49: validate-plan-check-io.sh rejects removed --no-recheck flag +echo "Test 49: validate-plan-check-io.sh rejects --no-recheck" +OUTPUT47="$(bash "$PROJECT_ROOT/scripts/validate-plan-check-io.sh" \ + --plan "$TEST_DIR/io-recheck/plan.md" \ + --no-recheck 2>&1)" +exit47=$? +if [[ "$exit47" == "6" && "$OUTPUT47" == *"Unknown option: --no-recheck"* ]]; then + pass "validate-plan-check-io.sh rejects --no-recheck" +else + fail "validate-plan-check-io.sh rejects --no-recheck" "exit 6 and unknown option" "exit $exit47; output: $OUTPUT47" +fi + +# Test 50: Regression: canonical-only AC syntax must not abort in strict mode +echo "Test 50: Canonical-only AC syntax validates under strict mode" +cat > "$TEST_DIR/canonical-only-ac.md" << 'EOF' +# Canonical AC Plan + +## Goal Description +A plan that uses the canonical AC syntax from the gen-plan template. + +## Acceptance Criteria + +- AC-1: First criterion + - Positive Tests: + - Test passes + - Negative Tests: + - Test fails + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | +EOF +strict47="$TEST_DIR/canonical-only-ac-findings.json" +collect_schema_findings_strict "$TEST_DIR/canonical-only-ac.md" "$strict47" > /dev/null 2>&1 +exit47=$? +findings47="$(cat "$strict47" 2>/dev/null || echo '[]')" +blockers47="$(count_severity "$findings47" blocker)" +if [[ "$exit47" == "0" && "$blockers47" == "0" ]]; then + pass "Canonical-only AC syntax does not abort and has no blockers" +else + fail "Canonical-only AC syntax strict validation" "exit 0 and 0 blockers" "exit $exit47, blockers=$blockers47, findings=$findings47" +fi + +# Test 51: Regression: appendix task tables must not affect main plan schema validation +echo "Test 51: Appendix task tables are ignored by schema validators" +cat > "$TEST_DIR/appendix-task-table.md" << 'EOF' +# Appendix Task Table Plan + +## Goal Description +A valid plan with a stale draft task table in the appendix. + +## Acceptance Criteria + +- AC-1: First criterion + - Positive Tests: + - Test passes + - Negative Tests: + - Test fails + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task1 | Do something | AC-1 | coding | - | + +--- Original Design Draft Start --- + +Draft notes. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| stale-task | Should be ignored | AC-99 | review | missing-task | + +--- Original Design Draft End --- +EOF +strict48="$TEST_DIR/appendix-task-table-findings.json" +collect_schema_findings_strict "$TEST_DIR/appendix-task-table.md" "$strict48" > /dev/null 2>&1 +exit48=$? +findings48="$(cat "$strict48" 2>/dev/null || echo '[]')" +blockers48="$(count_severity "$findings48" blocker)" +if [[ "$exit48" == "0" && "$blockers48" == "0" ]]; then + pass "Appendix task table rows do not produce schema/dependency blockers" +else + fail "Appendix task table strict validation" "exit 0 and 0 blockers" "exit $exit48, blockers=$blockers48, findings=$findings48" +fi + +# Test 52: Schema findings escape plan-derived JSON metacharacters +echo "Test 52: Schema findings escape plan-derived JSON metacharacters" +cat > "$TEST_DIR/quoted-task-id.md" << 'EOF' +# Quoted Task ID Plan + +## Goal Description +A malformed plan with JSON metacharacters in task fields. + +## Acceptance Criteria + +- AC-1: First criterion + - Positive Tests: + - Test passes + - Negative Tests: + - Test fails + +## Path Boundaries + +### Upper Bound +Complete. + +### Lower Bound +Minimum. + +## Task Breakdown + +| Task ID | Description | Target AC | Tag | Depends On | +|---------|-------------|-----------|-----|------------| +| task"1 | Do something | AC-99 | review"tag | missing"dep | +EOF +strict49="$TEST_DIR/quoted-task-id-findings.json" +collect_schema_findings_strict "$TEST_DIR/quoted-task-id.md" "$strict49" > /dev/null 2>&1 +exit49=$? +findings49="$(cat "$strict49" 2>/dev/null || echo '[]')" +quoted_task49="$(printf '%s' "$findings49" | python3 -c 'import json,sys; d=json.load(sys.stdin); matches=[f for f in d if f.get("location", {}).get("fragment") == "task\"1"]; print(matches[0]["affected_tasks"][0] if matches else "MISSING")' 2>/dev/null || echo 'ERROR')" +blockers49="$(count_severity "$findings49" blocker 2>/dev/null || echo ERROR)" +if [[ "$exit49" == "0" && "$quoted_task49" == 'task"1' && "$blockers49" != "ERROR" && "$blockers49" -ge 1 ]]; then + pass "Schema findings preserve quoted task ID as valid JSON" +else + fail "Schema findings should preserve quoted task ID as valid JSON" "exit 0, task\"1 in affected_tasks, parseable blockers" "exit $exit49, quoted_task=$quoted_task49, blockers=$blockers49, findings=$findings49" +fi + +# Test 53: Codex independent plan-check skill is wired into installer and docs +echo "Test 53: Codex independent plan-check skill wiring" +PLAN_CHECK_SKILL="$PROJECT_ROOT/skills/humanize-plan-check/SKILL.md" +INSTALL_SKILL_SCRIPT="$PROJECT_ROOT/scripts/install-skill.sh" +CODEX_INSTALL_DOC="$PROJECT_ROOT/docs/install-for-codex.md" +KIMI_INSTALL_DOC="$PROJECT_ROOT/docs/install-for-kimi.md" +USAGE_DOC="$PROJECT_ROOT/docs/usage.md" + +if [[ -f "$PLAN_CHECK_SKILL" ]]; then + pass "humanize-plan-check skill exists" +else + fail "humanize-plan-check skill exists" "$PLAN_CHECK_SKILL" "missing" +fi + +if grep -q '{{HUMANIZE_RUNTIME_ROOT}}/scripts/validate-plan-check-io.sh' "$PLAN_CHECK_SKILL" \ + && grep -q '{{HUMANIZE_RUNTIME_ROOT}}/scripts/plan-check.sh' "$PLAN_CHECK_SKILL" \ + && grep -q '.humanize/plan-check//report.md' "$PLAN_CHECK_SKILL"; then + pass "humanize-plan-check skill documents runtime scripts and report output" +else + fail "humanize-plan-check skill content" "runtime validation/report references" "missing" +fi + +if grep -q "spawn_agent" "$PLAN_CHECK_SKILL" \ + && grep -q "fork_context=false" "$PLAN_CHECK_SKILL" \ + && grep -q "plan-consistency-checker" "$PLAN_CHECK_SKILL" \ + && grep -q "plan-ambiguity-checker" "$PLAN_CHECK_SKILL" \ + && ! grep -q "Run semantic checks directly in this Codex session" "$PLAN_CHECK_SKILL"; then + pass "humanize-plan-check skill requires native checker sub-agents" +else + fail "humanize-plan-check sub-agent contract" "spawn_agent/fork_context and no direct-session semantic pass" "missing or direct-session fallback present" +fi + +if sed -n '/^SKILL_NAMES=(/,/^)/p' "$INSTALL_SKILL_SCRIPT" | grep -qF '"humanize-plan-check"'; then + pass "install-skill.sh includes humanize-plan-check in SKILL_NAMES" +else + fail "install-skill.sh includes humanize-plan-check in SKILL_NAMES" '"humanize-plan-check"' "missing from SKILL_NAMES" +fi + +if grep -q 'humanize-plan-check' "$CODEX_INSTALL_DOC" && grep -q 'humanize-plan-check' "$KIMI_INSTALL_DOC" && grep -q '\$humanize-plan-check --plan' "$USAGE_DOC"; then + pass "docs mention humanize-plan-check install and Codex usage" +else + fail "docs mention humanize-plan-check" "Codex/Kimi install docs and usage example" "missing" +fi + +echo "" +print_test_summary "Plan Check Test Summary" diff --git a/tests/test-unified-codex-config.sh b/tests/test-unified-codex-config.sh index 51e1e9b6..18d6b421 100755 --- a/tests/test-unified-codex-config.sh +++ b/tests/test-unified-codex-config.sh @@ -135,7 +135,7 @@ if [[ ! -f "$LOOP_COMMON" ]]; then skip "loop-common.sh tests require loop-common.sh" "file not found" else # Test default values load correctly - result=$(bash -c " + result=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config-defaults" bash -c " source '$LOOP_COMMON' 2>/dev/null echo \"\$DEFAULT_CODEX_MODEL|\$DEFAULT_CODEX_EFFORT\" " 2>/dev/null || echo "ERROR") @@ -147,7 +147,7 @@ else "high" "$(echo "$result" | cut -d'|' -f2)" # Verify no reviewer constants or defaults exist - result=$(bash -c " + result=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config-defaults" bash -c " source '$LOOP_COMMON' 2>/dev/null echo \"\${FIELD_LOOP_REVIEWER_MODEL:-ABSENT}|\${DEFAULT_LOOP_REVIEWER_MODEL:-ABSENT}\" " 2>/dev/null || echo "ERROR") @@ -281,7 +281,7 @@ agent_teams: false --- STATE_EOF - result=$(bash -c " + result=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config-defaults" bash -c " source '$LOOP_COMMON' 2>/dev/null parse_state_file '$TEST_DIR/codex-state.md' EXEC_MODEL=\"\${STATE_CODEX_MODEL:-\$DEFAULT_CODEX_MODEL}\" @@ -316,7 +316,7 @@ agent_teams: false --- BARE_EOF - result=$(bash -c " + result=$(XDG_CONFIG_HOME="$TEST_DIR/no-user-config-defaults" bash -c " source '$LOOP_COMMON' 2>/dev/null parse_state_file '$TEST_DIR/bare-state.md' EXEC_MODEL=\"\${STATE_CODEX_MODEL:-\$DEFAULT_CODEX_MODEL}\"