diff --git a/hooks/loop-codex-stop-hook.sh b/hooks/loop-codex-stop-hook.sh index 0c191d4c..f0138f28 100755 --- a/hooks/loop-codex-stop-hook.sh +++ b/hooks/loop-codex-stop-hook.sh @@ -1063,6 +1063,25 @@ COMMIT_HISTORY_SECTION=$(load_and_render_safe "$TEMPLATE_DIR" "codex/commit-hist "COMMIT_HISTORY=$COMMIT_HISTORY" \ "RECENT_ROUND_FILES=$RECENT_ROUND_FILES") +# ======================================== +# Build the Session Invariants block +# ======================================== +# Enumerates the loop's session-wide byte-locks and immutables so the +# reviewer prompt can route findings whose only fix mutates a locked +# artifact through the canonical cancel/amend/restart path rather than +# re-issuing the same critique each round. This addresses the +# stagnation-class methodology gap where a reviewer kept demanding +# edits to a plan file the loop forbids in-place edits to. +SESSION_INVARIANTS="" +if [[ "$PLAN_TRACKED" == "true" ]]; then + SESSION_INVARIANTS+="- **Plan file byte-lock**: \`${PLAN_FILE}\` is byte-locked for the entire session via \`--track-plan-file\`. The stop hook enforces byte-identity against the snapshot taken at session open. Any finding whose only fix is to edit this file's prose CANNOT be addressed in-loop and MUST be tagged \`out-of-loop\` rather than \`must-fix\`. +" +fi +SESSION_INVARIANTS+="- **Working branch fixed**: the working branch \`${START_BRANCH}\` is constant across all rounds. Findings whose only fix requires a branch switch CANNOT be addressed in-loop. +- **Witness lattice / approved-path discipline**: methodology-level constants and seals declared in the project's CLAUDE.md / AGENTS.md (e.g., sealed traits, approved-path factories, schema freezes) are session invariants. Findings whose only fix mutates one of these constants CANNOT be addressed in-loop without an explicit successor ADR or plan amendment. + +**Canonical resolution for invariant-locked findings**: \`/humanize:cancel-rlcr-loop\`, then amend the locked artifact in a fresh commit, then \`/humanize:start-rlcr-loop --track-plan-file \` to restart. The implementer cannot do this from inside the loop." + # Build the review prompt FULL_ALIGNMENT_FALLBACK="# Full Alignment Review (Round {{CURRENT_ROUND}}) @@ -1100,6 +1119,7 @@ if [[ "$FULL_ALIGNMENT_CHECK" == "true" ]]; then "DOCS_PATH=$DOCS_PATH" \ "GOAL_TRACKER_UPDATE_SECTION=$GOAL_TRACKER_UPDATE_SECTION" \ "COMMIT_HISTORY_SECTION=$COMMIT_HISTORY_SECTION" \ + "SESSION_INVARIANTS=$SESSION_INVARIANTS" \ "COMPLETED_ITERATIONS=$COMPLETED_ITERATIONS" \ "LOOP_TIMESTAMP=$LOOP_TIMESTAMP" \ "PREV_ROUND=$PREV_ROUND" \ @@ -1117,6 +1137,7 @@ else "DOCS_PATH=$DOCS_PATH" \ "GOAL_TRACKER_UPDATE_SECTION=$GOAL_TRACKER_UPDATE_SECTION" \ "COMMIT_HISTORY_SECTION=$COMMIT_HISTORY_SECTION" \ + "SESSION_INVARIANTS=$SESSION_INVARIANTS" \ "COMPLETED_ITERATIONS=$COMPLETED_ITERATIONS" \ "LOOP_TIMESTAMP=$LOOP_TIMESTAMP" \ "PREV_ROUND=$PREV_ROUND" \ diff --git a/prompt-template/claude/finalize-phase-prompt.md b/prompt-template/claude/finalize-phase-prompt.md index 2ee14176..92e6b1eb 100644 --- a/prompt-template/claude/finalize-phase-prompt.md +++ b/prompt-template/claude/finalize-phase-prompt.md @@ -50,3 +50,25 @@ Your summary should include: - Files modified during the Finalize Phase - Confirmation that tests still pass - Any notes about the refactoring decisions + +## Required: Outcome Classification + +The **first content line** of your finalize summary MUST be one of these three classifications, formatted exactly as shown: + +``` +Outcome: no-op (already-minimal) +``` +``` +Outcome: cosmetic (formatting only) +``` +``` +Outcome: substantive (logic edits) +``` + +Pick the one that matches what actually happened: + +- **`no-op (already-minimal)`** — the code was already at minimal complexity for its constraints; refactor agent (or you) made no edits. This is a positive signal — it means the prior rounds did not ship over-complex artifacts. Common in re-acceptance sessions where the substantive work landed in a prior session. +- **`cosmetic (formatting only)`** — only formatting / whitespace / comment-only changes. No logic touched. +- **`substantive (logic edits)`** — actual logic changes were made (extracted helpers, consolidated branches, removed dead code, etc.). For sessions whose Codex review approved COMPLETE before Finalize, a `substantive` outcome warrants a one-sentence justification of why the prior rounds shipped non-minimal artifacts. + +Why this classification matters: it lets future audits and methodology analyses tell at a glance whether the Finalize Phase added real value, was a no-op (the expected outcome in well-shaped rounds), or surfaced complexity the implementation rounds left behind. A `no-op` outcome is **not failure** — it is positive evidence that the prior rounds' exit point was already at local minimum complexity. diff --git a/prompt-template/claude/next-round-prompt.md b/prompt-template/claude/next-round-prompt.md index fd1b1cfe..149339c8 100644 --- a/prompt-template/claude/next-round-prompt.md +++ b/prompt-template/claude/next-round-prompt.md @@ -73,3 +73,27 @@ If you cannot safely reconcile the tracker yourself, include an optional "Goal T - blocking side issues - queued side issues - Only mainline gaps and blocking side issues should drive the next code changes + +## Optional: Blocked By Methodology Invariant Block + +If a Codex finding's only fix would mutate a session-byte-locked artifact (e.g., a plan file under `--track-plan-file`, a sealed witness lattice, a frozen wire-protocol), you cannot address it from inside the loop. Re-running the round shape over and over will not unstick it; the methodology will eventually trigger the stagnation circuit breaker. + +When you recognise this class of impasse, include the following block in your round summary so the reviewer treats it as a high-confidence signal rather than re-issuing the same critique: + +```markdown +## Blocked By Methodology Invariant + +- Invariant: (e.g., "plan-file-byte-lock", "witness-lattice-seven-impl-seal", "bus-v1-byte-freeze") +- Findings blocked: + - + - +- Canonical resolution: +- Why I cannot act in-loop: +``` + +When this block is present, the reviewer is asked to: +1. Acknowledge the block instead of re-issuing the listed findings as `must-fix`. +2. Tag the listed findings as `out-of-loop` rather than `must-fix`. +3. Recommend the canonical resolution path. + +Use this block conservatively. It is the implementer's escape hatch when the methodology's invariants prevent in-round action — it is NOT a way to defer ordinary follow-up work. Each finding in the block must be one that cannot be addressed without amending a byte-locked artifact. diff --git a/prompt-template/codex/full-alignment-review.md b/prompt-template/codex/full-alignment-review.md index 4367810e..8d98cec7 100644 --- a/prompt-template/codex/full-alignment-review.md +++ b/prompt-template/codex/full-alignment-review.md @@ -65,6 +65,18 @@ Queued Side Issues: N The `Mainline Progress Verdict` line is mandatory. If you omit it, the Humanize stop hook will block the round and require the review to be rerun. +### Out-of-Loop Findings (Loop-Aware Classification) + +If a finding's only fix would mutate a session-byte-locked artifact (see Session Invariants below), tag it `out-of-loop` rather than `must-fix` and recommend the canonical resolution path (cancel/amend/restart) instead of asking the implementer to address it in the next round. Re-issuing the same critique each round will not unstick the loop and will trigger the stagnation circuit breaker after a few iterations. + +### Session Invariants + +The implementer is operating under the following session-wide invariants: + +{{SESSION_INVARIANTS}} + +If your top-priority finding falls into the `out-of-loop` class, say so explicitly in your review and stop demanding the implementer act on it in-loop. The next state-changing action must come from outside the current loop session. + ## Part 3: Implementation Review - Conduct a deep critical review of the implementation diff --git a/prompt-template/codex/regular-review.md b/prompt-template/codex/regular-review.md index 4d4a8680..8d004a26 100644 --- a/prompt-template/codex/regular-review.md +++ b/prompt-template/codex/regular-review.md @@ -52,6 +52,35 @@ You MUST classify your findings into these lanes: - **Mainline Gaps**: plan-derived work or AC progress that is missing, incomplete, or regressing - **Blocking Side Issues**: bugs or implementation issues that block the current mainline objective from succeeding safely - **Queued Side Issues**: valid non-blocking follow-up issues that should be documented but must NOT take over the next round +- **Out-of-Loop Findings**: findings whose only fix would mutate a session-byte-locked artifact (see Session Invariants below). Tag these `out-of-loop` rather than `must-fix` and recommend the canonical resolution path (cancel/amend/restart) instead of asking the implementer to address them in the next round. + +### Session Invariants (Loop-Aware) + +The implementer is operating under the following session-wide invariants. **Findings whose only fix would mutate a byte-locked artifact MUST be tagged `out-of-loop` rather than `must-fix`** — re-issuing the same critique each round will not unstick the loop and will trigger the stagnation circuit breaker after a few iterations. Route invariant-locked findings to the canonical resolution path instead of demanding in-round fixes. + +{{SESSION_INVARIANTS}} + +If your top-priority finding falls into the `out-of-loop` class, say so explicitly in your review and stop demanding the implementer act on it in-loop. The next state-changing action must come from outside the current loop session. + +### Implementer's `## Blocked By Methodology Invariant` Block + +The implementer's summary may contain an optional `## Blocked By Methodology Invariant` block listing findings the implementer believes cannot be addressed in-loop because of a session invariant. The block has this shape: + +```markdown +## Blocked By Methodology Invariant + +- Invariant: +- Findings blocked: +- Canonical resolution: +- Why I cannot act in-loop: +``` + +When this block is present: +1. **Verify the implementer's claim**: confirm the listed findings are genuinely byte-locked behind the named invariant. If the implementer is using the block to defer ordinary follow-up work that they could in fact address in-loop, push back on that — only true invariant-locked findings belong here. +2. **For verified-blocked findings**: tag them `out-of-loop` in your review rather than `must-fix`, acknowledge the implementer's correct identification of the impasse class, and recommend the canonical resolution. Do NOT re-issue them as round-N+1 mainline gaps. +3. **For findings the implementer wrongly classified as blocked**: leave them in the appropriate `must-fix` lane and explain why they are in fact addressable in-loop. + +This signal exists to break the stagnation pattern where the reviewer keeps demanding edits the implementer is structurally forbidden from making. When the implementer correctly identifies that pattern via this block, your job is to validate it and route to the canonical resolution, not to litigate it for another round. Also include a one-line verdict: ``` diff --git a/scripts/setup-rlcr-loop.sh b/scripts/setup-rlcr-loop.sh index 15326bc4..1661a80e 100755 --- a/scripts/setup-rlcr-loop.sh +++ b/scripts/setup-rlcr-loop.sh @@ -814,11 +814,55 @@ if [[ -z "$BASE_COMMIT" ]]; then fi echo "Base commit SHA captured: $BASE_COMMIT" >&2 +# ======================================== +# Detect inherited-delta session +# ======================================== +# A new session is "inherited-delta" when the most recent prior session in +# .humanize/rlcr/ was working from a different base_commit than the one +# captured above — i.e., commits landed between the prior session's start +# and this session's start (whether by the prior session's own work, by +# off-loop amendments, or both). Re-acceptance sessions on prior work that +# hit the stagnation circuit breaker are the canonical example. +# +# Persisting this signal at session open lets the round-0 contract + +# the reviewer prompt route the session correctly without relying on +# the implementer's narration. +LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" + +INHERITED_DELTA="false" +PRIOR_TIMESTAMP="" +PRIOR_BASE_COMMIT="" +PRIOR_EXIT_STATE="unknown" + +if [[ -d "$LOOP_BASE_DIR" ]]; then + # Get the most recent prior session dir (any name; sorted lexicographically + # because session timestamps are zero-padded and sortable as strings). + PRIOR_SESSION_DIR=$(find "$LOOP_BASE_DIR" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sort | tail -1 || true) + if [[ -n "$PRIOR_SESSION_DIR" ]] && [[ -d "$PRIOR_SESSION_DIR" ]]; then + PRIOR_TIMESTAMP=$(basename "$PRIOR_SESSION_DIR") + # Read base_commit from whichever state file exists (state, finalize, complete, methodology). + for state_candidate in state.md finalize-state.md complete-state.md methodology-analysis-state.md; do + prior_state="$PRIOR_SESSION_DIR/$state_candidate" + if [[ -f "$prior_state" ]]; then + PRIOR_BASE_COMMIT=$(awk -F': *' '/^base_commit:/ {gsub(/^[[:space:]]+|[[:space:]]+$/, "", $2); print $2; exit}' "$prior_state" | tr -d '"' | tr -d "'") + PRIOR_EXIT_STATE="${state_candidate%.md}" + break + fi + done + + if [[ -n "$PRIOR_BASE_COMMIT" ]] && [[ "$PRIOR_BASE_COMMIT" != "$BASE_COMMIT" ]]; then + INHERITED_DELTA="true" + echo "Inherited-delta session detected: prior session $PRIOR_TIMESTAMP started at $PRIOR_BASE_COMMIT, this session starts at $BASE_COMMIT" >&2 + fi + fi +fi + # ======================================== # Setup State Directory # ======================================== -LOOP_BASE_DIR="$PROJECT_ROOT/.humanize/rlcr" +# LOOP_BASE_DIR was already defined above for the inherited-delta detection; +# reusing here without redefinition. # Create timestamp for this loop session TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) @@ -902,10 +946,67 @@ bitlesson_allow_empty_none: $BITLESSON_ALLOW_EMPTY_NONE mainline_stall_count: 0 last_mainline_verdict: unknown drift_status: normal +inherited_delta: $INHERITED_DELTA started_at: $(date -u +%Y-%m-%dT%H:%M:%SZ) --- EOF +# Generate session-lineage.md when this session opens with inherited delta +# from a prior session. The file gives future audits + the round-0 contract +# a machine-readable record of where the substantive work came from, instead +# of relying on the implementer's narrative across artifacts. +if [[ "$INHERITED_DELTA" == "true" ]]; then + INHERITED_COMMIT_LOG="" + if commit_range_log=$(run_with_timeout "$GIT_TIMEOUT" git -C "$PROJECT_ROOT" log --oneline "${PRIOR_BASE_COMMIT}..${BASE_COMMIT}" 2>/dev/null); then + INHERITED_COMMIT_LOG="$commit_range_log" + fi + if [[ -z "$INHERITED_COMMIT_LOG" ]]; then + INHERITED_COMMIT_LOG="(no commit subjects available — base commits are not in a single ancestry chain on this branch, or git log timed out)" + fi + + cat > "$LOOP_DIR/session-lineage.md" << LINEAGE_EOF +# Session Lineage — inherited-delta session + +This session opened with substantive work already at HEAD, inherited from a prior RLCR session +plus any off-loop commits that landed between sessions. The methodology classifies this as an +\`inherited-delta\` session: the round-0 contract should declare which prior-session changes are +being re-presented and which off-loop edits occurred, rather than re-running an +implementation-grade round shape over a thin re-acceptance task. + +## Prior session + +- Timestamp: \`$PRIOR_TIMESTAMP\` +- Base commit at prior start: \`$PRIOR_BASE_COMMIT\` +- Exit state file: \`$PRIOR_EXIT_STATE.md\` (whichever state file was found in the prior session directory) + +## This session + +- Base commit at start: \`$BASE_COMMIT\` +- Started at: $(date -u +%Y-%m-%dT%H:%M:%SZ) + +## Inherited commits + +Commit range \`${PRIOR_BASE_COMMIT}..${BASE_COMMIT}\` (between prior session base and this session base): + +\`\`\` +$INHERITED_COMMIT_LOG +\`\`\` + +## Why a new session is needed (please fill in) + +[Stub — the implementer should record why this is a new session rather than a continuation of +the prior session. Common reasons: +- Prior session hit the stagnation circuit breaker (cancel/amend/restart canonical resolution). +- Off-loop plan amendment (commit between sessions edited a byte-locked artifact). +- New acceptance criteria; the prior session closed cleanly but new scope landed. +- Prior session ended via max-iteration cap. + +Replace this block with one paragraph describing the actual reason. Keep it brief — the audit +trail benefits from concise prose.] +LINEAGE_EOF + echo "session-lineage.md generated for inherited-delta session" >&2 +fi + # Create signal file for PostToolUse hook to record session_id # The hook will read the session_id from its JSON input and patch state.md # Format: line 1 = state file path, line 2 = command marker for verification diff --git a/tests/test-blocked-by-invariant-block.sh b/tests/test-blocked-by-invariant-block.sh new file mode 100755 index 00000000..4463b791 --- /dev/null +++ b/tests/test-blocked-by-invariant-block.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# +# Test that the implementer prompt documents the optional +# Blocked By Methodology Invariant block, and the reviewer prompt +# documents how to recognise + route it. +# +# Positive Test Cases: +# - T-POS-1: next-round prompt documents the optional block + format +# - T-POS-2: next-round prompt lists the four required block fields +# - T-POS-3: next-round prompt warns against using block for ordinary follow-up +# - T-POS-4: regular-review prompt instructs reviewer to recognise the block +# - T-POS-5: regular-review prompt instructs reviewer to verify-then-route +# - T-POS-6: regular-review prompt instructs reviewer to push back on misuse +# + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { echo -e "${GREEN}PASS${NC}: $1"; TESTS_PASSED=$((TESTS_PASSED + 1)); } +fail() { echo -e "${RED}FAIL${NC}: $1"; echo " $2"; TESTS_FAILED=$((TESTS_FAILED + 1)); } + +NEXT_ROUND="$PROJECT_ROOT/prompt-template/claude/next-round-prompt.md" +REGULAR="$PROJECT_ROOT/prompt-template/codex/regular-review.md" + +# T-POS-1: next-round prompt documents the optional block +if grep -qF "## Blocked By Methodology Invariant" "$NEXT_ROUND"; then + pass "T-POS-1: next-round prompt names the optional block" +else + fail "T-POS-1: next-round prompt missing block name" "expected literal '## Blocked By Methodology Invariant' heading" +fi + +# T-POS-2: four required block fields +for field in "Invariant:" "Findings blocked:" "Canonical resolution:" "Why I cannot act in-loop:"; do + if grep -qF "$field" "$NEXT_ROUND"; then + pass "T-POS-2: block field documented: $field" + else + fail "T-POS-2: block field missing: $field" "expected literal '$field' in template" + fi +done + +# T-POS-3: misuse warning +if grep -qiE "use this block conservatively|NOT a way to defer|conservatively" "$NEXT_ROUND"; then + pass "T-POS-3: next-round warns against block misuse" +else + fail "T-POS-3: misuse warning missing" "expected language warning the implementer not to abuse the block" +fi + +# T-POS-4: reviewer recognises the block +if grep -qF "## Blocked By Methodology Invariant" "$REGULAR"; then + pass "T-POS-4: regular-review references the block" +else + fail "T-POS-4: regular-review missing block reference" "expected '## Blocked By Methodology Invariant' in template" +fi + +# T-POS-5: reviewer verify-then-route language +if grep -qiE "verify the implementer.s claim|confirm the listed findings" "$REGULAR"; then + pass "T-POS-5: reviewer instructed to verify-then-route" +else + fail "T-POS-5: verify-then-route guidance missing" "expected verification step in reviewer prompt" +fi + +# T-POS-6: push back on misuse +if grep -qiE "push back|wrongly classified|leave them in" "$REGULAR"; then + pass "T-POS-6: reviewer instructed to push back on misuse" +else + fail "T-POS-6: push-back guidance missing" "expected explicit push-back language for false-blocked findings" +fi + +echo "" +echo "Total: $TESTS_PASSED passed, $TESTS_FAILED failed" +[[ "$TESTS_FAILED" -eq 0 ]] || exit 1 +exit 0 diff --git a/tests/test-finalize-outcome-classification.sh b/tests/test-finalize-outcome-classification.sh new file mode 100755 index 00000000..6f6635a7 --- /dev/null +++ b/tests/test-finalize-outcome-classification.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# +# Test that the finalize-phase prompt template documents the required +# Outcome classification line (no-op / cosmetic / substantive). +# +# Positive Test Cases: +# - T-POS-1: template contains "Outcome: no-op (already-minimal)" exact form +# - T-POS-2: template contains "Outcome: cosmetic (formatting only)" exact form +# - T-POS-3: template contains "Outcome: substantive (logic edits)" exact form +# - T-POS-4: template explains that no-op is NOT failure +# +# Negative Test Cases: +# - T-NEG-1: template does not regress placeholders ({{FINALIZE_SUMMARY_FILE}}, etc.) +# + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { echo -e "${GREEN}PASS${NC}: $1"; TESTS_PASSED=$((TESTS_PASSED + 1)); } +fail() { echo -e "${RED}FAIL${NC}: $1"; echo " $2"; TESTS_FAILED=$((TESTS_FAILED + 1)); } + +TEMPLATE_FILE="$PROJECT_ROOT/prompt-template/claude/finalize-phase-prompt.md" + +if [[ ! -f "$TEMPLATE_FILE" ]]; then + fail "template file exists" "expected file at $TEMPLATE_FILE" + echo "Total: $TESTS_PASSED passed, $TESTS_FAILED failed" + exit 1 +fi + +# T-POS-1 +if grep -qF "Outcome: no-op (already-minimal)" "$TEMPLATE_FILE"; then + pass "T-POS-1: no-op classification line documented" +else + fail "T-POS-1: no-op classification line missing" "expected 'Outcome: no-op (already-minimal)' literal in template" +fi + +# T-POS-2 +if grep -qF "Outcome: cosmetic (formatting only)" "$TEMPLATE_FILE"; then + pass "T-POS-2: cosmetic classification line documented" +else + fail "T-POS-2: cosmetic classification line missing" "expected 'Outcome: cosmetic (formatting only)' literal in template" +fi + +# T-POS-3 +if grep -qF "Outcome: substantive (logic edits)" "$TEMPLATE_FILE"; then + pass "T-POS-3: substantive classification line documented" +else + fail "T-POS-3: substantive classification line missing" "expected 'Outcome: substantive (logic edits)' literal in template" +fi + +# T-POS-4: rationale clarifying no-op is NOT failure (regex tolerates backticks / bold around tokens) +if grep -qiE "no.?op[^a-z]+.*not[^a-z]+.*failure|not[^a-z]+.*failure[^a-z]+.*no.?op" "$TEMPLATE_FILE"; then + pass "T-POS-4: no-op-is-not-failure rationale documented" +else + fail "T-POS-4: no-op-is-not-failure rationale missing" "expected language clarifying that no-op is positive evidence, not failure" +fi + +# T-NEG-1: placeholders preserved +for placeholder in "{{FINALIZE_SUMMARY_FILE}}" "{{PLAN_FILE}}" "{{GOAL_TRACKER_FILE}}" "{{BASE_BRANCH}}" "{{START_BRANCH}}"; do + if grep -qF "$placeholder" "$TEMPLATE_FILE"; then + pass "T-NEG-1: placeholder $placeholder preserved" + else + fail "T-NEG-1: placeholder $placeholder missing" "template must keep all existing placeholders" + fi +done + +echo "" +echo "Total: $TESTS_PASSED passed, $TESTS_FAILED failed" +[[ "$TESTS_FAILED" -eq 0 ]] || exit 1 +exit 0 diff --git a/tests/test-session-invariants-injection.sh b/tests/test-session-invariants-injection.sh new file mode 100755 index 00000000..2913d15a --- /dev/null +++ b/tests/test-session-invariants-injection.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# +# Test that the reviewer prompt templates contain the {{SESSION_INVARIANTS}} +# placeholder and the loop-aware out-of-loop finding classification language. +# This is the static side of the change — the dynamic injection is exercised +# via the stop-hook, which is covered by the broader hook integration tests. +# +# Positive Test Cases: +# - T-POS-1: regular-review template contains {{SESSION_INVARIANTS}} placeholder +# - T-POS-2: full-alignment-review template contains {{SESSION_INVARIANTS}} placeholder +# - T-POS-3: regular-review template documents the out-of-loop finding lane +# - T-POS-4: full-alignment-review template documents the out-of-loop finding lane +# - T-POS-5: stop-hook builds SESSION_INVARIANTS from PLAN_TRACKED + START_BRANCH +# - T-POS-6: stop-hook passes SESSION_INVARIANTS into both load_and_render_safe calls +# + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { echo -e "${GREEN}PASS${NC}: $1"; TESTS_PASSED=$((TESTS_PASSED + 1)); } +fail() { echo -e "${RED}FAIL${NC}: $1"; echo " $2"; TESTS_FAILED=$((TESTS_FAILED + 1)); } + +REGULAR="$PROJECT_ROOT/prompt-template/codex/regular-review.md" +FULL_ALIGN="$PROJECT_ROOT/prompt-template/codex/full-alignment-review.md" +STOP_HOOK="$PROJECT_ROOT/hooks/loop-codex-stop-hook.sh" + +# T-POS-1 +if grep -qF "{{SESSION_INVARIANTS}}" "$REGULAR"; then + pass "T-POS-1: regular-review.md contains SESSION_INVARIANTS placeholder" +else + fail "T-POS-1: regular-review.md missing SESSION_INVARIANTS placeholder" "expected literal {{SESSION_INVARIANTS}} in template" +fi + +# T-POS-2 +if grep -qF "{{SESSION_INVARIANTS}}" "$FULL_ALIGN"; then + pass "T-POS-2: full-alignment-review.md contains SESSION_INVARIANTS placeholder" +else + fail "T-POS-2: full-alignment-review.md missing SESSION_INVARIANTS placeholder" "expected literal {{SESSION_INVARIANTS}} in template" +fi + +# T-POS-3 +if grep -qiE "out.of.loop" "$REGULAR" && grep -qiE "tag these .out.of.loop|tag .out.of.loop" "$REGULAR"; then + pass "T-POS-3: regular-review.md documents out-of-loop finding lane" +else + fail "T-POS-3: regular-review.md missing out-of-loop guidance" "expected out-of-loop tagging instruction" +fi + +# T-POS-4 +if grep -qiE "out.of.loop" "$FULL_ALIGN"; then + pass "T-POS-4: full-alignment-review.md documents out-of-loop finding lane" +else + fail "T-POS-4: full-alignment-review.md missing out-of-loop guidance" "expected out-of-loop tagging instruction" +fi + +# T-POS-5: stop-hook builds the invariants string from existing state vars +if grep -qF 'SESSION_INVARIANTS=""' "$STOP_HOOK" \ + && grep -qE 'PLAN_TRACKED.*==.*"true"' "$STOP_HOOK" \ + && grep -qE 'SESSION_INVARIANTS\+=.*Plan file byte-lock' "$STOP_HOOK" \ + && grep -qE 'SESSION_INVARIANTS\+=.*Working branch fixed' "$STOP_HOOK"; then + pass "T-POS-5: stop-hook builds SESSION_INVARIANTS from state vars" +else + fail "T-POS-5: stop-hook does not build SESSION_INVARIANTS as expected" "expected initialization + plan-tracked branch + working-branch line" +fi + +# T-POS-6: SESSION_INVARIANTS passed into both render calls +session_inv_count=$(grep -cF "SESSION_INVARIANTS=\$SESSION_INVARIANTS" "$STOP_HOOK" || true) +if [[ "$session_inv_count" -ge 2 ]]; then + pass "T-POS-6: stop-hook passes SESSION_INVARIANTS to both review-prompt renders ($session_inv_count occurrences)" +else + fail "T-POS-6: stop-hook missing SESSION_INVARIANTS in load_and_render_safe call(s)" "expected at least 2 occurrences (full-alignment + regular), got $session_inv_count" +fi + +echo "" +echo "Total: $TESTS_PASSED passed, $TESTS_FAILED failed" +[[ "$TESTS_FAILED" -eq 0 ]] || exit 1 +exit 0 diff --git a/tests/test-session-lineage-inherited-delta.sh b/tests/test-session-lineage-inherited-delta.sh new file mode 100755 index 00000000..781be940 --- /dev/null +++ b/tests/test-session-lineage-inherited-delta.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash +# +# Test that setup-rlcr-loop.sh detects inherited-delta sessions and generates +# a session-lineage.md when the most recent prior session's base_commit +# differs from the current session's base_commit. +# +# Positive Test Cases: +# - T-POS-1: inherited_delta field is added to state.md +# - T-POS-2: detection logic uses prior session base_commit comparison +# - T-POS-3: session-lineage.md generation is gated on INHERITED_DELTA == true +# - T-POS-4: lineage file contains commit-range git log between prior + current base +# - T-POS-5: lineage file contains stub for "why a new session is needed" +# +# Negative Test Cases: +# - T-NEG-1: detection block is positioned after BASE_COMMIT capture +# - T-NEG-2: state.md still contains all the existing fields +# + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +GREEN='\033[0;32m' +RED='\033[0;31m' +NC='\033[0m' +TESTS_PASSED=0 +TESTS_FAILED=0 + +pass() { echo -e "${GREEN}PASS${NC}: $1"; TESTS_PASSED=$((TESTS_PASSED + 1)); } +fail() { echo -e "${RED}FAIL${NC}: $1"; echo " $2"; TESTS_FAILED=$((TESTS_FAILED + 1)); } + +SETUP="$PROJECT_ROOT/scripts/setup-rlcr-loop.sh" + +# T-POS-1: state.md template includes inherited_delta field +if grep -qE "^inherited_delta: \\\$INHERITED_DELTA" "$SETUP"; then + pass "T-POS-1: inherited_delta field is added to state.md template" +else + fail "T-POS-1: inherited_delta field missing from state.md template" "expected line 'inherited_delta: \$INHERITED_DELTA' inside the heredoc" +fi + +# T-POS-2: detection compares prior session base_commit +if grep -qE 'PRIOR_BASE_COMMIT.*!=.*BASE_COMMIT' "$SETUP" && grep -qF 'INHERITED_DELTA="true"' "$SETUP"; then + pass "T-POS-2: detection compares prior + current base_commit" +else + fail "T-POS-2: prior-vs-current base_commit comparison missing" "expected explicit comparison setting INHERITED_DELTA=true" +fi + +# T-POS-3: lineage file generation is gated on inherited-delta +if grep -qE 'if \[\[ "\$INHERITED_DELTA" == "true" \]\]; then' "$SETUP" \ + && grep -qF 'session-lineage.md' "$SETUP"; then + pass "T-POS-3: session-lineage.md generation gated on INHERITED_DELTA == true" +else + fail "T-POS-3: lineage gate or filename missing" "expected gate + literal session-lineage.md filename" +fi + +# T-POS-4: lineage file embeds git log of inherited commit range +if grep -qE 'git -C .* log --oneline "\$\{PRIOR_BASE_COMMIT\}\.\.\$\{BASE_COMMIT\}"' "$SETUP" \ + || grep -qE 'log --oneline "\$\{PRIOR_BASE_COMMIT\}\.\.\$\{BASE_COMMIT\}"' "$SETUP"; then + pass "T-POS-4: lineage embeds git log for prior_base..current_base range" +else + fail "T-POS-4: lineage commit-range git log missing" "expected git log --oneline \\\${PRIOR_BASE_COMMIT}..\\\${BASE_COMMIT}" +fi + +# T-POS-5: lineage stub for "why a new session is needed" +if grep -qiE "why a new session is needed" "$SETUP"; then + pass "T-POS-5: lineage stub asks why a new session is needed" +else + fail "T-POS-5: lineage stub for new-session reason missing" "expected literal 'why a new session is needed' prompt in template" +fi + +# T-NEG-1: detection block runs after BASE_COMMIT capture +base_commit_line=$(grep -nE 'BASE_COMMIT=\$\(run_with_timeout' "$SETUP" | head -1 | cut -d: -f1) +detection_line=$(grep -n 'Detect inherited-delta session' "$SETUP" | head -1 | cut -d: -f1) +if [[ -n "$base_commit_line" ]] && [[ -n "$detection_line" ]] && [[ "$detection_line" -gt "$base_commit_line" ]]; then + pass "T-NEG-1: detection block ($detection_line) runs after BASE_COMMIT capture ($base_commit_line)" +else + fail "T-NEG-1: detection block positioning wrong" "BASE_COMMIT capture: line $base_commit_line; detection: line $detection_line" +fi + +# T-NEG-2: state.md template still has the original fields +for field in "current_round:" "max_iterations:" "plan_file:" "base_commit:" "base_branch:" "started_at:"; do + if grep -qE "^${field}" "$SETUP"; then + pass "T-NEG-2: state.md still contains $field" + else + fail "T-NEG-2: state.md regressed $field" "expected line starting with $field in state.md heredoc" + fi +done + +# Syntax check +if bash -n "$SETUP" 2>/dev/null; then + pass "T-NEG-3: setup-rlcr-loop.sh passes bash syntax check" +else + fail "T-NEG-3: setup-rlcr-loop.sh syntax error" "bash -n returned non-zero" +fi + +echo "" +echo "Total: $TESTS_PASSED passed, $TESTS_FAILED failed" +[[ "$TESTS_FAILED" -eq 0 ]] || exit 1 +exit 0