diff --git a/.changeset/rea-0-10-2-audit-integrity.md b/.changeset/rea-0-10-2-audit-integrity.md new file mode 100644 index 0000000..15b9c80 --- /dev/null +++ b/.changeset/rea-0-10-2-audit-integrity.md @@ -0,0 +1,233 @@ +--- +'@bookedsolid/rea': patch +--- + +Governance recovery + audit integrity + base-branch resolution + audit-chain corruption-tolerance (Defects S + P + N + T + U) + +This patch ships five fixes on one branch. S, P, and N were the 0.10.1 scope +that never shipped standalone — the release was superseded by 0.10.2 after T +and U surfaced on the same working tree. All five fixes are independent but +ship together because the push-gate and audit-helper surfaces they touch +overlap enough that landing T and U as a follow-up patch would have required +a second Codex pass over code already under review. + +## Defect S — TOFU drift recovery CLI (HIGH — governance recovery path) + +Before this patch, the only documented recovery path for a TOFU drift fail-close +was `REA_ACCEPT_DRIFT= rea serve` — a startup env var. When the gateway is +spawned indirectly (Claude Code via `.mcp.json`, a systemd unit, any wrapper that +owns the spawn), the operator has no env-injection surface. A legitimate +registry edit therefore silently removed the drifted server from +`__rea__health.downstreams` and `tools/list`, with the only manual workaround +being to hand-edit `.rea/fingerprints.json`. + +New CLI surface: + +- **`rea tofu list [--json]`** — prints every server declared in + `.rea/registry.yaml` with its current-vs-stored fingerprint verdict + (`first-seen` | `unchanged` | `drifted`) and short fingerprints. +- **`rea tofu accept [--reason ]`** — recomputes the current + canonical fingerprint for `` and writes it to `.rea/fingerprints.json`. + Emits a `tofu.drift_accepted_by_cli` (or `tofu.first_seen_accepted_by_cli`) + audit record with BOTH the stored and current fingerprints plus the optional + reason, so the accept decision is on the hash chain. + +The drift banner printed by `src/registry/tofu-gate.ts` now names the new CLI +path first and demotes `REA_ACCEPT_DRIFT` to a one-shot bypass. `rea doctor`'s +drift warning replaces the env-var pointer with `rea tofu list` / +`rea tofu accept `. + +## Defect P — `codex.review` audit record forgery surface (CRITICAL — integrity) + +Before this patch, any caller with filesystem access to `node_modules` could +write a valid, hash-chained `codex.review` entry claiming any `reviewer` and any +`verdict` for any `head_sha` by importing `node_modules/@bookedsolid/rea/dist/audit/append.js` +and calling `appendAuditRecord()` directly with `tool_name: 'codex.review'`. +Downstream auditors — and the push-review cache gate — could not distinguish +"real Codex CLI ran and passed" from "agent wrote an emit-audit script claiming +Codex passed." The governance promise of `codex.review` was a convention, not an +enforced contract. + +New mechanism: `AuditRecord` gains a required `emission_source` field +(`"rea-cli"` | `"codex-cli"` | `"other"`) that is part of the hashed record +body. + +- The public `appendAuditRecord()` helper stamps every record with + `emission_source: "other"`. The field is NOT part of the public + `AppendAuditInput` shape — external consumers cannot self-assert `"rea-cli"`. +- New `appendCodexReviewAuditRecord()` helper is the ONLY write path that stamps + `"rea-cli"` for `codex.review` records. `tool_name` and `server_name` are + fixed inside the helper and excluded from the input type, so callers cannot + route a generic record through the codex-certification path. Exclusively + reachable through the `rea audit record codex-review` CLI (classified as a + Write-tier Bash invocation by `reaCommandTier`, defect E). +- The push-review cache gate's jq predicate now requires + `.emission_source == "rea-cli" or .emission_source == "codex-cli"` for + `codex.review` lookups. Records emitted through the generic helper (tagged + `"other"`) or legacy pre-0.10.2 records (field missing) are rejected. + +**Upgrade effect:** The first push on each branch after upgrading to 0.10.2 will +require a fresh `rea audit record codex-review` invocation, because legacy +`codex.review` audit records predate `emission_source`. Subsequent pushes hit +the cache as normal. + +**CI impact:** Non-interactive pipelines that invoke the pre-push gate +(e.g. `rea push`, husky pre-push in CI runners) will see one failed push per +branch after upgrade. Bridge with `REA_SKIP_CODEX_REVIEW=` as the narrow +one-push waiver, or pre-stamp the branch tip with +`rea audit record codex-review --head-sha --branch --target +--verdict pass --finding-count 0 --also-set-cache` before upgrading. Consumers +who proxied Codex through a gateway-registered MCP and relied on middleware- +written records to satisfy the gate should note that those legacy records also +predate `emission_source` and are rejected until re-emitted. + +Regression tests at `src/audit/emission-source.test.ts`: public helper stamps +`"other"` even for `tool_name: "codex.review"`, dedicated helper stamps +`"rea-cli"` and forces canonical tool/server names, `emission_source` is part of +the computed hash (flipping the field breaks the chain). + +## Defect N — base-branch resolution consults `branch..base` (MEDIUM, partial) + +Before this patch, `hooks/_lib/push-review-core.sh`'s new-branch base resolution +fell through to `origin/HEAD` when the local branch had no upstream set yet, +without consulting operator-configured per-branch base tracking. A feature +branch targeting `dev` in a main-as-production repo was therefore reviewed +against `origin/main` silently, producing a diff that spanned every commit +between `main` and the feature — often thousands of lines for a handful of real +changes. + +This patch adds a per-branch git-config consultation: +`git config branch..base ` is now read BEFORE the `origin/HEAD` +fallback. When set, the gate diffs against the configured ref (preferring the +remote-tracking form for server-authoritative anchoring) and echoes it as the +`Target:` label. Without a config entry, behavior is unchanged. `configured_base` +is reset to empty at the top of every refspec-loop iteration so multi-refspec +pushes (e.g. `git push --all`) cannot leak state from an earlier iteration's +config lookup (Codex 0.10.1 finding #1). + +**Scope note:** This is the opt-in half of N. The fail-loud-when-no-base and +general-label-fix halves remain deferred to defect G's TypeScript port of +`push-review-core.sh`, where the merge-base-anchor / refspec-target separation +can be properly expressed without breaking the existing cache-key contract (an +inline bash attempt was reverted during this patch after it silently invalidated +consumer cache entries for bare pushes). + +## Defect T — audit writer serialization self-check (MEDIUM — integrity) + +Before this patch, `appendAuditRecord()` called +`fs.appendFile(auditFile, JSON.stringify(record) + '\n')` unconditionally. If a +future regression introduced a non-JSON-safe field into `AuditRecord` (BigInt, +circular reference, undefined in array position, hostile `metadata` value that +survives TypeScript typing but breaks JSON round-trip), the writer would +produce an unparseable line on disk and only surface the failure at +`rea audit verify` time — or, worse, when push-review-core.sh's jq scan +silently failed to find a legitimate `codex.review` record past the corruption +(which is precisely defect U). + +This patch adds a pre-append `JSON.parse` self-check. The helper now verifies +the serialized line round-trips before it touches `.rea/audit.jsonl`; a throw +aborts the append without writing and the on-disk chain tail is unchanged. +The diagnostic names the offending `tool_name`/`server_name` so the caller can +localize the regression. This is defense-in-depth against a class of bug that +would otherwise corrupt the hash chain at write time. + +The self-check is scoped to the public `appendAuditRecord()` / +`appendCodexReviewAuditRecord()` entry points (both flow through +`doAppend()`). The gateway middleware write at +`src/gateway/middleware/audit.ts` and rotation-marker emission at +`src/gateway/audit/rotator.ts` still use raw `JSON.stringify()` + +`appendFile()` / `writeFile()` without a self-check. Widening T to cover +those paths requires a shared serialization helper and is tracked as a +followup — this patch closes the two entry points that every external +consumer (Helix, Codex CLI, ad-hoc CLI scripts) actually reaches. + +Separately, `rea audit verify` now collects every unparseable line across +every file in the walk instead of aborting at the first one. Each failure is +reported as `audit.jsonl:LINE[:COL] `, and chain verification +continues over the parseable subset — a genuine hash tamper on a surviving +record still surfaces alongside the parse failures. The exit code is 1 if +there is any parse failure OR any chain failure; a fully clean file still +reports "Audit chain verified". Empty lines mid-file are a distinct parse +failure class (not silently skipped). Operators bisecting a corrupt audit +file now see every affected line number in one pass. + +Tamper diagnostics include BOTH the parseable-subset record index (for +audit-tooling consumers that walk `records[]`) AND the 1-based original-file +line number (for operator workflows that `sed -n Np` or editor:LINE into the +offending record). The two diverge whenever a malformed line precedes the +tamper — the file line is the authoritative jump target. + +Regression tests at `src/audit/append.test.ts` (self-check intercept) and +`__tests__/cli/audit-verify-mixed-malformed.test.ts` (five scenarios covering +mixed-corruption, tamper-alongside-parse-failure, tamper-AFTER-malformed-line +with divergent subset/file indices, clean-file success, and mid-file empty +lines). + +## Defect U — push-review-core.sh audit scan tolerates malformed lines (HIGH — availability) + +Before this patch, `hooks/_lib/push-review-core.sh` used +`jq -e --arg sha "$sha" '' "$_audit"` to test whether a +`codex.review` receipt existed for the push's head SHA. jq interprets the +file as a single JSON stream; a single malformed line anywhere in +`.rea/audit.jsonl` (one stray backslash-u-non-hex sequence, one truncated +write) makes jq exit with status 2 BEFORE the `select` ever runs against any +record. Every legitimate codex.review receipt past the corruption becomes +unreachable. The failure is total: every subsequent push that requires a +cache hit is silently blocked until the corrupt line is hand-edited out of +the audit file. One stray byte locks the gate closed. + +This patch rewrites the scan as +`jq -R --arg sha "$sha" 'fromjson? | select()' "$_audit" 2>/dev/null | grep -q .`. +`-R` takes each line as a raw string; `fromjson?` is the error-suppressing +parser — malformed lines yield empty output instead of failing the pipeline. +The `select` filter runs against every successfully parsed record. The +predicate (tool_name, head_sha, verdict, emission_source) is unchanged, so +defect P's forgery-rejection guarantee still holds line-by-line. + +Mirrored in both `hooks/_lib/push-review-core.sh` (upstream source) and +`.claude/hooks/_lib/push-review-core.sh` (this repo's dogfood install). The +two other jq scans in the file — `cache_result` inspection at approximately +lines 432 and 612, and the cache hit/pass predicate at approximately line +1107 — operate on single-value `printf`'d JSON strings, not on audit.jsonl, +and are left as `jq -e`. + +Regression test at `__tests__/hooks/push-review-fromjson-tolerance.test.ts` +drives the exact new pipeline against a scratch audit.jsonl with a malformed +line sandwiched between two valid `codex.review` records with distinct +head_sha values. Both records are findable. The forgery-rejection case +(a hand-written line with `emission_source: "other"` on the far side of a +malformed line) is also covered — tolerance for malformed lines must not +weaken the predicate into "anything passes". + +## Followups (not in this patch) + +- **G** (push-review-core.sh TS port) — 1154 LOC of shell + jq + awk with 10 + integration test suites that shell out in real git subprocesses. Requires a + clean-room TS implementation with ≥90% unit coverage and a thin bash shim. + Tracked separately for 0.11.0. +- **Widen T to gateway middleware + rotation markers** — both paths + (`src/gateway/middleware/audit.ts` line ~148 and `src/gateway/audit/rotator.ts` + line ~253) still write raw `JSON.stringify(record)+'\n'` without the + self-check. No known exploit today (TypeScript input types rule out non-JSON + field shapes, and proxied MCP metadata is already redacted), but a shared + serialization helper would make the T guarantee universal. Tracked for a + future pass. +- Shell-level integration test for defect P's gate predicate (forged record + with `emission_source: "other"` fails the cache gate). The existing test + suite passes end-to-end post-patch; a dedicated P integration fixture can be + added as part of the G rewrite. +- Codex 0.10.1 finding #2: proxied-MCP records through the gateway middleware + stamp `"rea-cli"` (technically correct — rea is the writer), which means an + MCP server named `codex` exposing a tool named `review` could produce + gate-satisfying records via the middleware path if a future middleware also + populated `metadata.head_sha`/`metadata.verdict`. Today no such middleware + exists and `ctx.metadata` is `{}` by default, so the residual surface is + narrow. Track for a future pass: either add a distinct `"rea-gateway"` + discriminator, or narrow the jq predicate to require a CLI-only metadata + shape. +- Codex 0.10.1 finding #3: `rea tofu accept` writes the fingerprint before + appending the audit record. If audit append fails, the on-disk fingerprint + is updated but unaudited, and a re-run short-circuits on the `stored === + current` guard. Track for a future pass — reverse the order (audit first, + then fingerprint) or explicitly document the recovery procedure in the + error message. diff --git a/.claude/hooks/_lib/push-review-core.sh b/.claude/hooks/_lib/push-review-core.sh index cf02936..b1b4bfa 100755 --- a/.claude/hooks/_lib/push-review-core.sh +++ b/.claude/hooks/_lib/push-review-core.sh @@ -719,12 +719,20 @@ pr_core_run() { # fail-closed and require an explicit review. local SOURCE_SHA="" MERGE_BASE="" TARGET_BRANCH="" SOURCE_REF="" local HAS_DELETE=0 BEST_COUNT=0 - local rec local_sha remote_sha local_ref remote_ref target mb mb_status count count_status + local rec local_sha remote_sha local_ref remote_ref target resolved_base mb mb_status count count_status for rec in "${REFSPEC_RECORDS[@]}"; do IFS='|' read -r local_sha remote_sha local_ref remote_ref <<<"$rec" target="${remote_ref#refs/heads/}" target="${target#refs/for/}" [[ -z "$target" ]] && target="main" + # Defect N: track the SEMANTIC base (the ref the diff was anchored on) + # distinctly from `target` (the pushed remote ref). For a tracked branch + # they coincide; for a new branch, `target` is the branch name being + # created — which is NOT what we reviewed against, so `Target:` must + # echo `resolved_base` instead. Default to `target` for the tracked + # case; the new-branch path overrides with the resolved default_ref + # short name below. + resolved_base="$target" if [[ "$local_sha" == "$ZERO_SHA" ]]; then HAS_DELETE=1 @@ -774,25 +782,81 @@ pr_core_run() { # # argv_remote is set from the adapter's argv (git passes the remote name # as $1 on pre-push); defaults to "origin" when absent (BUG-008 sniff). - local default_ref default_ref_status - default_ref=$(cd "$REA_ROOT" && git symbolic-ref "refs/remotes/${argv_remote}/HEAD" 2>/dev/null) - default_ref_status=$? - if [[ "$default_ref_status" -ne 0 || -z "$default_ref" ]]; then - # symbolic-ref failed (common on shallow or mirror clones where - # origin/HEAD was never set). Probe the common default-branch names in - # order: main, then master. Both are remote-tracking refs and still - # server-authoritative; the order matters only for projects that still - # default to `master` (older internal forks), where without this - # fallback the first push of a new branch would fail closed. - if cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/main" >/dev/null 2>&1; then - default_ref="refs/remotes/${argv_remote}/main" - elif cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/master" >/dev/null 2>&1; then - default_ref="refs/remotes/${argv_remote}/master" - else - default_ref="" + # + # Defect N (0.10.1): BEFORE falling back to the remote's default branch, + # consult per-branch config `branch..base`. A feature branch + # targeting `dev` in a main-as-production repo would otherwise resolve + # against `origin/main` silently, producing a diff that spans the entire + # dev→main history — reviewers see "Scope: 28690 lines" for a 4-file + # change. The git-config route uses local branch knowledge that is + # authoritative for this working copy (set via `git branch --set-upstream`, + # or by CI tooling that tracks the intended target). This is consulted + # BEFORE origin/HEAD because the latter is a server-default that may + # mis-represent the reviewer's actual intent for this specific branch. + local default_ref default_ref_status configured_base source_branch + source_branch="${local_ref#refs/heads/}" + default_ref="" + # Codex 0.10.1 finding #1: `local` is function-scoped, not loop- + # iteration-scoped — without an explicit reset, iteration N inherits + # iteration N-1's configured_base and falsely promotes resolved_base + # when the current refspec's local_ref does NOT begin with refs/heads/ + # (tag push, gerrit-style refs/for/, etc.). Reset before every + # potential assignment so each iteration sees a clean slate. + configured_base="" + + if [[ -n "$source_branch" && "$source_branch" != "HEAD" ]]; then + configured_base=$(cd "$REA_ROOT" && git config --get "branch.${source_branch}.base" 2>/dev/null || echo "") + if [[ -n "$configured_base" ]]; then + # Prefer the REMOTE-TRACKING form so the gate still anchors on a + # server-authoritative ref (see the local-ref hijack explanation + # above). Fall back to the local short ref only if the remote + # counterpart doesn't exist, with a visible WARN on stderr — the + # local ref is less trustworthy and the reviewer should know. + if cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/${configured_base}" >/dev/null 2>&1; then + default_ref="refs/remotes/${argv_remote}/${configured_base}" + elif cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/heads/${configured_base}" >/dev/null 2>&1; then + default_ref="refs/heads/${configured_base}" + printf 'WARN: branch.%s.base=%s resolved to local ref; remote counterpart %s/%s missing — reviewer-side diff may be stale\n' \ + "$source_branch" "$configured_base" "$argv_remote" "$configured_base" >&2 + fi + fi + fi + + if [[ -z "$default_ref" ]]; then + default_ref=$(cd "$REA_ROOT" && git symbolic-ref "refs/remotes/${argv_remote}/HEAD" 2>/dev/null) + default_ref_status=$? + if [[ "$default_ref_status" -ne 0 || -z "$default_ref" ]]; then + # symbolic-ref failed (common on shallow or mirror clones where + # origin/HEAD was never set). Probe the common default-branch names in + # order: main, then master. Both are remote-tracking refs and still + # server-authoritative; the order matters only for projects that still + # default to `master` (older internal forks), where without this + # fallback the first push of a new branch would fail closed. + if cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/main" >/dev/null 2>&1; then + default_ref="refs/remotes/${argv_remote}/main" + elif cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/master" >/dev/null 2>&1; then + default_ref="refs/remotes/${argv_remote}/master" + else + default_ref="" + fi fi fi if [[ -n "$default_ref" ]]; then + # Defect N: if operator-configured `branch..base` resolved the + # ref we're about to diff against, overwrite `resolved_base` with the + # short name so TARGET_BRANCH (and the Target: label) reflect the + # actual review anchor. Without an explicit config override, leave + # `resolved_base` at the refspec target — this preserves the cache + # contract for new-branch pushes where remote_ref is the same as the + # source branch (the common case) and for bare pushes that + # argv-resolve via `@{upstream}`. Only operators who opted into a + # per-branch base get the label promoted, keeping the change + # backward-compatible for every other path. + if [[ -n "$configured_base" ]]; then + resolved_base="${default_ref#refs/remotes/${argv_remote}/}" + resolved_base="${resolved_base#refs/heads/}" + [[ -z "$resolved_base" ]] && resolved_base="$default_ref" + fi mb=$(cd "$REA_ROOT" && git merge-base "$default_ref" "$local_sha" 2>/dev/null || echo "") if [[ -z "$mb" ]]; then # default_ref resolved but merge-base came back empty (unrelated @@ -867,13 +931,40 @@ pr_core_run() { if [[ "$CODEX_WAIVER_ACTIVE" == "1" ]]; then _codex_ok=1 elif [[ -f "$_audit" ]]; then - if jq -e --arg sha "$local_sha" ' - select( - .tool_name == "codex.review" - and .metadata.head_sha == $sha - and (.metadata.verdict == "pass" or .metadata.verdict == "concerns") - ) - ' "$_audit" >/dev/null 2>&1; then + # Defect P (0.10.1): require .emission_source == "rea-cli" or + # "codex-cli" so agents cannot forge a codex.review record by + # directly calling appendAuditRecord() from an ad-hoc .mjs script + # (the generic helper stamps "other"). Legacy records (pre-0.10.1) + # have no emission_source field and are rejected — the first push + # on an upgraded consumer requires a fresh `rea audit record + # codex-review` (or Codex CLI emission) which stamps "rea-cli". + # + # Defect T/U (0.10.2): read the audit file as raw lines and parse + # each with `fromjson?`. Before 0.10.2 this scan used + # `jq -e '' "$_audit"` which feeds the file as a single + # JSON stream — a single malformed line (literal backslash-u + # followed by non-hex characters inside a string, for example) + # makes jq bail on the stream with exit 2 and the `select` never + # runs against ANY record, including legitimate codex.review + # entries further down the file. The failure is total: every + # cached codex.review receipt becomes unreachable until the + # corrupt line is hand-edited out. `-R` flips jq into raw-input + # mode (one string per line), and `fromjson?` is the error- + # suppressing parser — malformed lines silently yield empty + # output. The `select` filter then inspects each successfully + # parsed record exactly as before, and `grep -q .` detects + # whether ANY record survived the filter. Lines 1107 and the + # earlier cache_result scans at :432/:612 operate on a single + # printf'd JSON string, not audit.jsonl, so they remain `jq -e`. + if jq -R --arg sha "$local_sha" ' + fromjson? + | select( + .tool_name == "codex.review" + and .metadata.head_sha == $sha + and (.metadata.verdict == "pass" or .metadata.verdict == "concerns") + and (.emission_source == "rea-cli" or .emission_source == "codex-cli") + ) + ' "$_audit" 2>/dev/null | grep -q .; then _codex_ok=1 fi fi @@ -918,7 +1009,12 @@ pr_core_run() { if [[ -z "$SOURCE_SHA" ]] || [[ "$count" -gt "$BEST_COUNT" ]]; then SOURCE_SHA="$local_sha" MERGE_BASE="$mb" - TARGET_BRANCH="$target" + # Defect N: use `resolved_base` (the actual merge-base anchor we + # diffed against), not `target` (the pushed-ref name). For tracked + # branches these are the same; for new branches without an upstream + # the distinction is the difference between "Target: " + # (misleading) and "Target: main" (or whichever base was resolved). + TARGET_BRANCH="$resolved_base" SOURCE_REF="$local_ref" BEST_COUNT="$count" fi diff --git a/__tests__/cli/audit-verify-mixed-malformed.test.ts b/__tests__/cli/audit-verify-mixed-malformed.test.ts new file mode 100644 index 0000000..f29e368 --- /dev/null +++ b/__tests__/cli/audit-verify-mixed-malformed.test.ts @@ -0,0 +1,282 @@ +/** + * Tests for `rea audit verify` collect-all-errors mode (Defect T / 0.10.2). + * + * Before 0.10.2 `rea audit verify` aborted at the first unparseable line with + * a one-shot `Cannot parse JSON at audit.jsonl line N` error and exit 1. A + * single corrupt line blocked verification over every legitimate record that + * followed — which in the defect-T incident meant a stray backslash-u + * sequence on one line hid several clean hash-chain tails from the operator. + * + * 0.10.2 contract: + * + * 1. The command walks the entire file (and all `--since` rotated files). + * 2. EVERY malformed line is reported with `audit.jsonl:LINE[:COL] `. + * 3. Chain verification runs over the parseable subset. A tamper on the + * parseable subset is still reported, alongside the parse failures. + * 4. Exit code is 1 if there is ANY parse failure OR any chain failure. + * 5. A fully clean file still reports "Audit chain verified" and exits 0. + * + * The test drives `runAuditVerify` in-process rather than shelling out — + * that mirrors the `runAuditRecordCodexReview` test pattern and lets us + * assert against captured stderr without worrying about CLI argv parsing. + */ + +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { runAuditVerify } from '../../src/cli/audit.js'; +import { appendAuditRecord } from '../../src/audit/append.js'; + +interface CapturedIo { + stdout: string; + stderr: string; + restore: () => void; +} + +function captureIo(): CapturedIo { + const captured = { stdout: '', stderr: '' }; + const stdoutSpy = vi + .spyOn(process.stdout, 'write') + .mockImplementation((chunk: string | Uint8Array): boolean => { + captured.stdout += typeof chunk === 'string' ? chunk : chunk.toString(); + return true; + }); + const stderrSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation((chunk: string | Uint8Array): boolean => { + captured.stderr += typeof chunk === 'string' ? chunk : chunk.toString(); + return true; + }); + const logSpy = vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => { + captured.stdout += args.map(String).join(' ') + '\n'; + }); + const errSpy = vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => { + captured.stderr += args.map(String).join(' ') + '\n'; + }); + return { + get stdout() { + return captured.stdout; + }, + get stderr() { + return captured.stderr; + }, + restore: () => { + stdoutSpy.mockRestore(); + stderrSpy.mockRestore(); + logSpy.mockRestore(); + errSpy.mockRestore(); + }, + } as CapturedIo; +} + +/** + * Wrap runAuditVerify so a `process.exit(code)` turns into a thrown marker + * we can assert on. The real handler calls `process.exit(1)` on any failure, + * which would tear down the test runner if we let it through. + */ +async function runVerify(): Promise<{ exitCode: number | null }> { + let exitCode: number | null = null; + const exitSpy = vi + .spyOn(process, 'exit') + .mockImplementation(((code?: number) => { + exitCode = code ?? 0; + throw new Error(`__exit__${exitCode}`); + }) as never); + try { + await runAuditVerify({}); + } catch (e) { + if (!(e instanceof Error) || !e.message.startsWith('__exit__')) throw e; + } finally { + exitSpy.mockRestore(); + } + return { exitCode }; +} + +describe('runAuditVerify — defect T collect-all-errors mode', () => { + let baseDir: string; + let previousCwd: string; + let io: CapturedIo; + + beforeEach(async () => { + baseDir = await fs.mkdtemp(path.join(os.tmpdir(), 'rea-verify-T-')); + baseDir = await fs.realpath(baseDir); + await fs.mkdir(path.join(baseDir, '.rea'), { recursive: true }); + previousCwd = process.cwd(); + process.chdir(baseDir); + io = captureIo(); + }); + + afterEach(async () => { + io.restore(); + process.chdir(previousCwd); + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('reports every malformed line with line number and exits 1 (mixed valid + malformed)', async () => { + // Emit two real records so the prev_hash chain has real entries. + await appendAuditRecord(baseDir, { tool_name: 'real.one', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.two', server_name: 'unit' }); + + // Splice two malformed lines between the valid records and at the tail. + // Direct write is the only way to simulate external corruption — the + // public helper's defect-T self-check prevents the same shape from + // reaching disk through it. + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + const original = await fs.readFile(auditFile, 'utf8'); + const [lineOne, lineTwo] = original.split('\n').filter((l) => l.length > 0); + const corrupted = + [lineOne, '{not-json-at-all', lineTwo, '{"incomplete":', ''].join('\n'); + await fs.writeFile(auditFile, corrupted); + + const { exitCode } = await runVerify(); + expect(exitCode).toBe(1); + + // Every malformed line is named, with its 1-based file line number. + // File layout: 1=real.one, 2=malformed, 3=real.two, 4=malformed, 5=empty-trailing. + // The trailing empty line is dropped by the leading \n-trim; line 2 and + // line 4 are the two failures. (If the splitter kept a mid-file blank, + // that would land here too — the verifier is explicit about it.) + expect(io.stderr).toContain('2 unparseable line(s) detected'); + expect(io.stderr).toMatch(/audit\.jsonl:2\b/); + expect(io.stderr).toMatch(/audit\.jsonl:4\b/); + + // The "verified … clean" success banner must NOT appear — any failure + // class suppresses it. + expect(io.stdout).not.toMatch(/Audit chain verified/); + }); + + it('chain-verifies the parseable subset and reports a tamper alongside parse failures', async () => { + // Three real records, then corrupt a byte inside the SECOND record's + // stored hash, and splice one malformed line AFTER the tamper. Both + // failures must surface in a single run. + await appendAuditRecord(baseDir, { tool_name: 'real.one', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.two', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.three', server_name: 'unit' }); + + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + const originalLines = (await fs.readFile(auditFile, 'utf8')) + .split('\n') + .filter((l) => l.length > 0); + expect(originalLines).toHaveLength(3); + + // Corrupt the second record's hash. Any byte inside `"hash":""` + // works; flipping the final hex character is the smallest possible + // tamper. + const tampered = originalLines[1]!.replace(/"hash":"([0-9a-f]+)"/, (_match, h: string) => { + const flipped = h.slice(0, -1) + (h.endsWith('0') ? '1' : '0'); + return `"hash":"${flipped}"`; + }); + expect(tampered).not.toBe(originalLines[1]); + + const corrupted = [ + originalLines[0], + tampered, + originalLines[2], + '{"unterminated', // intentional parse failure at line 4 + '', + ].join('\n'); + await fs.writeFile(auditFile, corrupted); + + const { exitCode } = await runVerify(); + expect(exitCode).toBe(1); + + // Parse failure block: one line reported. + expect(io.stderr).toContain('1 unparseable line(s) detected'); + expect(io.stderr).toMatch(/audit\.jsonl:4\b/); + + // Chain tamper block: the tamper on line 2 surfaces as index 1 within + // the parseable subset AND line 2 in the original file. The reason + // names hash-vs-recomputed OR prev_hash-mismatch (either is acceptable — + // flipping a stored hash breaks the self-hash check, but the tamper + // also breaks the NEXT record's prev_hash anchor; either is the first + // failure seen). + expect(io.stderr).toContain('TAMPER DETECTED'); + expect(io.stderr).toMatch(/Record index:\s+\d+ \(0-based within parseable subset\)/); + expect(io.stderr).toMatch(/File line:\s+\d+ \(1-based in audit\.jsonl\)/); + }); + + it('reports the ORIGINAL file line for a tamper that sits after a malformed line (concern 2)', async () => { + // Regression case for Codex concern #2 on the T/U 0.10.2 pass: when a + // malformed line appears BEFORE the tampered record, the parseable- + // subset index diverges from the original file line number. Operators + // jumping to the failure with an editor or `sed -n Np` need the file + // line, not the subset index. + // + // Layout: + // file line 1: real.one (valid, parseable-subset index 0) + // file line 2: MALFORMED (parse failure) + // file line 3: real.two (valid, parseable-subset index 1) — TAMPERED + // file line 4: real.three (valid, parseable-subset index 2) + // + // Without the recordLineMap fix, the chain failure would report + // "Record index: 1" which is ambiguous — subset position 1 is file + // line 3, not file line 1. With the fix we also print "File line: 3". + await appendAuditRecord(baseDir, { tool_name: 'real.one', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.two', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.three', server_name: 'unit' }); + + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + const originalLines = (await fs.readFile(auditFile, 'utf8')) + .split('\n') + .filter((l) => l.length > 0); + expect(originalLines).toHaveLength(3); + + // Tamper real.two (originalLines[1]). + const tampered = originalLines[1]!.replace( + /"hash":"([0-9a-f]+)"/, + (_m, h: string) => `"hash":"${h.slice(0, -1)}${h.endsWith('0') ? '1' : '0'}"`, + ); + expect(tampered).not.toBe(originalLines[1]); + + // Splice a malformed line BEFORE the tamper. + const corrupted = [ + originalLines[0], + '{not-json', + tampered, + originalLines[2], + '', + ].join('\n'); + await fs.writeFile(auditFile, corrupted); + + const { exitCode } = await runVerify(); + expect(exitCode).toBe(1); + + // Parse failure on file line 2. + expect(io.stderr).toMatch(/audit\.jsonl:2\b/); + // Tamper: parseable-subset index 1 (zero-based among the 3 parseable + // records), BUT original-file line 3. Both must appear. + expect(io.stderr).toContain('TAMPER DETECTED'); + expect(io.stderr).toMatch(/Record index:\s+1\b/); + expect(io.stderr).toMatch(/File line:\s+3\b/); + }); + + it('reports a clean file on exit 0 with no parse or chain findings', async () => { + await appendAuditRecord(baseDir, { tool_name: 'real.one', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.two', server_name: 'unit' }); + + const { exitCode } = await runVerify(); + expect(exitCode).toBeNull(); + expect(io.stdout).toMatch(/Audit chain verified: 2 records/); + expect(io.stderr).not.toContain('unparseable'); + expect(io.stderr).not.toContain('TAMPER'); + }); + + it('treats mid-file empty lines as parse failures (distinct from record absence)', async () => { + await appendAuditRecord(baseDir, { tool_name: 'real.one', server_name: 'unit' }); + await appendAuditRecord(baseDir, { tool_name: 'real.two', server_name: 'unit' }); + + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + const originalLines = (await fs.readFile(auditFile, 'utf8')) + .split('\n') + .filter((l) => l.length > 0); + // Splice an empty line between the two records. + const corrupted = [originalLines[0], '', originalLines[1], ''].join('\n'); + await fs.writeFile(auditFile, corrupted); + + const { exitCode } = await runVerify(); + expect(exitCode).toBe(1); + expect(io.stderr).toContain('1 unparseable line(s) detected'); + expect(io.stderr).toMatch(/audit\.jsonl:2\b/); + }); +}); diff --git a/__tests__/hooks/push-review-fromjson-tolerance.test.ts b/__tests__/hooks/push-review-fromjson-tolerance.test.ts new file mode 100644 index 0000000..0fad6de --- /dev/null +++ b/__tests__/hooks/push-review-fromjson-tolerance.test.ts @@ -0,0 +1,210 @@ +/** + * Integration test for Defect U (0.10.2): push-review-core.sh's Codex-review + * jq scan must tolerate malformed lines in .rea/audit.jsonl. + * + * Before 0.10.2 the scan was: + * + * jq -e --arg sha "$sha" ' + * select(.tool_name == "codex.review" and .metadata.head_sha == $sha + * and (.metadata.verdict == "pass" or .metadata.verdict == "concerns") + * and (.emission_source == "rea-cli" or .emission_source == "codex-cli")) + * ' "$_audit" >/dev/null 2>&1 + * + * jq parses the file as a single JSON stream. A single unparseable line + * makes jq exit with status 2 BEFORE `select` runs against any record, so + * every legitimate codex.review receipt past the corruption becomes + * unreachable. One stray backslash sequence locks the push gate closed. + * + * After 0.10.2 the scan is: + * + * jq -R --arg sha "$sha" ' + * fromjson? + * | select() + * ' "$_audit" 2>/dev/null | grep -q . + * + * `-R` takes each line as a raw string; `fromjson?` is the error-suppressing + * parser. Malformed lines yield empty output instead of failing the pipeline. + * The `select` filter runs against every successfully parsed record. + * + * This test exercises the exact pipeline against a .rea/audit.jsonl that + * sandwiches a malformed line between two valid codex.review records with + * different head_sha values. Both must be findable. + */ + +import { spawnSync } from 'node:child_process'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { appendCodexReviewAuditRecord, Tier, InvocationStatus } from '../../src/audit/append.js'; + +function jqExists(): boolean { + const res = spawnSync('jq', ['--version'], { encoding: 'utf8' }); + return res.status === 0; +} + +/** + * Run the EXACT jq pipeline push-review-core.sh uses after defect U. Returns + * true iff at least one audit line satisfies the Codex-review predicate for + * the given head_sha. Matches the shell logic line-for-line: + * + * - `-R` (raw input) + `fromjson?` (error-suppressing parse) + * - select on tool_name / head_sha / verdict / emission_source + * - downstream `grep -q .` turns "any output" into exit 0 + * + * stderr is captured so jq's per-line parse errors don't pollute the test + * runner's output — the hook pipes 2>/dev/null for the same reason. + */ +function runCodexOkScan(auditFile: string, headSha: string): boolean { + const filter = ` + fromjson? + | select( + .tool_name == "codex.review" + and .metadata.head_sha == $sha + and (.metadata.verdict == "pass" or .metadata.verdict == "concerns") + and (.emission_source == "rea-cli" or .emission_source == "codex-cli") + ) + `; + const jq = spawnSync('jq', ['-R', '--arg', 'sha', headSha, filter, auditFile], { + encoding: 'utf8', + }); + if (jq.status !== 0 && jq.status !== 1) { + // jq-1 is "no matches" under -R (grep-q-equivalent); jq-0 is "matches + // produced output". Anything else means the file couldn't be opened or + // a filter syntax error — both are test setup bugs, not the predicate + // behavior we're measuring. + throw new Error( + `jq exited with status ${jq.status ?? '?'}: stderr=${jq.stderr ?? ''}`, + ); + } + const stdout = jq.stdout ?? ''; + // `grep -q .` returns 0 iff at least one non-empty line was emitted. Match + // that behavior in JS. + return stdout.split('\n').some((l) => l.length > 0); +} + +describe('push-review-core.sh Codex-review jq scan — defect U tolerance', () => { + let baseDir: string; + + beforeEach(async () => { + baseDir = await fs.mkdtemp(path.join(os.tmpdir(), 'rea-fromjson-U-')); + baseDir = await fs.realpath(baseDir); + await fs.mkdir(path.join(baseDir, '.rea'), { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('finds a valid codex.review record when the audit file has no corruption', async () => { + if (!jqExists()) return; + const sha = 'a'.repeat(40); + await appendCodexReviewAuditRecord(baseDir, { + tier: Tier.Read, + status: InvocationStatus.Allowed, + metadata: { head_sha: sha, target: 'main', verdict: 'pass', finding_count: 0 }, + }); + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + expect(runCodexOkScan(auditFile, sha)).toBe(true); + }); + + it('finds BOTH valid codex.review records when a malformed line sits between them (defect U)', async () => { + if (!jqExists()) return; + const shaBefore = 'b'.repeat(40); + const shaAfter = 'c'.repeat(40); + + // First valid record. + await appendCodexReviewAuditRecord(baseDir, { + tier: Tier.Read, + status: InvocationStatus.Allowed, + metadata: { head_sha: shaBefore, target: 'main', verdict: 'pass', finding_count: 0 }, + }); + // Second valid record — distinct head_sha, same canonical shape. + await appendCodexReviewAuditRecord(baseDir, { + tier: Tier.Read, + status: InvocationStatus.Allowed, + metadata: { + head_sha: shaAfter, + target: 'main', + verdict: 'concerns', + finding_count: 2, + }, + }); + + // Splice a malformed line BETWEEN the two records. Direct write is the + // only way to simulate external corruption — defect T's self-check + // prevents the helper from producing this shape. + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + const rawBytes = (await fs.readFile(auditFile, 'utf8')) + .split('\n') + .filter((l) => l.length > 0); + expect(rawBytes).toHaveLength(2); + // Classic "backslash-u followed by non-hex" — the exact failure shape + // that triggered defect T/U in the field. jq on the whole-file stream + // sees this as an unterminated escape and bails with status 2. + const malformed = '{"tool_name":"codex.review","bogus":"\\uZZZZ"}'; + const corrupted = [rawBytes[0], malformed, rawBytes[1], ''].join('\n'); + await fs.writeFile(auditFile, corrupted); + + // Sanity: pre-0.10.2 pipeline (`jq -e '' file`) would bail on + // this file. We don't assert the old behavior here — the test exists + // to lock in the NEW behavior. Both head_shas must be findable. + expect(runCodexOkScan(auditFile, shaBefore)).toBe(true); + expect(runCodexOkScan(auditFile, shaAfter)).toBe(true); + + // And a nonexistent sha must still return false — tolerance for + // malformed lines must not weaken the predicate into "anything passes". + expect(runCodexOkScan(auditFile, 'd'.repeat(40))).toBe(false); + }); + + it('rejects forged records with emission_source="other" even past a malformed line', async () => { + if (!jqExists()) return; + // This locks the defect-P guarantee through the defect-U scan. An + // attacker who hand-wrote a tool_name=codex.review line with + // emission_source="other" into .rea/audit.jsonl must still fail the + // predicate, even if a malformed line precedes it (which would + // otherwise mask their forgery via the old whole-file parse). + const legitSha = 'e'.repeat(40); + const forgedSha = 'f'.repeat(40); + await appendCodexReviewAuditRecord(baseDir, { + tier: Tier.Read, + status: InvocationStatus.Allowed, + metadata: { + head_sha: legitSha, + target: 'main', + verdict: 'pass', + finding_count: 0, + }, + }); + + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + const legitLine = (await fs.readFile(auditFile, 'utf8')).split('\n')[0]!; + // A hand-rolled line that would satisfy every predicate clause EXCEPT + // `.emission_source == "rea-cli" or "codex-cli"`. Attackers can stamp + // "other" via the public appendAuditRecord() helper, so this is the + // concrete shape the gate guards against. + const forged = JSON.stringify({ + tool_name: 'codex.review', + server_name: 'codex', + emission_source: 'other', + metadata: { + head_sha: forgedSha, + target: 'main', + verdict: 'pass', + finding_count: 0, + }, + }); + const malformed = '{"tool_name":"codex.review","bogus":"\\uZZZZ"}'; + await fs.writeFile(auditFile, [legitLine, malformed, forged, ''].join('\n')); + + expect(runCodexOkScan(auditFile, legitSha)).toBe(true); + expect(runCodexOkScan(auditFile, forgedSha)).toBe(false); + }); + + it('returns false when the file contains only malformed lines', async () => { + if (!jqExists()) return; + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + await fs.writeFile(auditFile, ['{not json', '{"unterminated', ''].join('\n')); + expect(runCodexOkScan(auditFile, 'z'.repeat(40))).toBe(false); + }); +}); diff --git a/hooks/_lib/push-review-core.sh b/hooks/_lib/push-review-core.sh index cf02936..b1b4bfa 100755 --- a/hooks/_lib/push-review-core.sh +++ b/hooks/_lib/push-review-core.sh @@ -719,12 +719,20 @@ pr_core_run() { # fail-closed and require an explicit review. local SOURCE_SHA="" MERGE_BASE="" TARGET_BRANCH="" SOURCE_REF="" local HAS_DELETE=0 BEST_COUNT=0 - local rec local_sha remote_sha local_ref remote_ref target mb mb_status count count_status + local rec local_sha remote_sha local_ref remote_ref target resolved_base mb mb_status count count_status for rec in "${REFSPEC_RECORDS[@]}"; do IFS='|' read -r local_sha remote_sha local_ref remote_ref <<<"$rec" target="${remote_ref#refs/heads/}" target="${target#refs/for/}" [[ -z "$target" ]] && target="main" + # Defect N: track the SEMANTIC base (the ref the diff was anchored on) + # distinctly from `target` (the pushed remote ref). For a tracked branch + # they coincide; for a new branch, `target` is the branch name being + # created — which is NOT what we reviewed against, so `Target:` must + # echo `resolved_base` instead. Default to `target` for the tracked + # case; the new-branch path overrides with the resolved default_ref + # short name below. + resolved_base="$target" if [[ "$local_sha" == "$ZERO_SHA" ]]; then HAS_DELETE=1 @@ -774,25 +782,81 @@ pr_core_run() { # # argv_remote is set from the adapter's argv (git passes the remote name # as $1 on pre-push); defaults to "origin" when absent (BUG-008 sniff). - local default_ref default_ref_status - default_ref=$(cd "$REA_ROOT" && git symbolic-ref "refs/remotes/${argv_remote}/HEAD" 2>/dev/null) - default_ref_status=$? - if [[ "$default_ref_status" -ne 0 || -z "$default_ref" ]]; then - # symbolic-ref failed (common on shallow or mirror clones where - # origin/HEAD was never set). Probe the common default-branch names in - # order: main, then master. Both are remote-tracking refs and still - # server-authoritative; the order matters only for projects that still - # default to `master` (older internal forks), where without this - # fallback the first push of a new branch would fail closed. - if cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/main" >/dev/null 2>&1; then - default_ref="refs/remotes/${argv_remote}/main" - elif cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/master" >/dev/null 2>&1; then - default_ref="refs/remotes/${argv_remote}/master" - else - default_ref="" + # + # Defect N (0.10.1): BEFORE falling back to the remote's default branch, + # consult per-branch config `branch..base`. A feature branch + # targeting `dev` in a main-as-production repo would otherwise resolve + # against `origin/main` silently, producing a diff that spans the entire + # dev→main history — reviewers see "Scope: 28690 lines" for a 4-file + # change. The git-config route uses local branch knowledge that is + # authoritative for this working copy (set via `git branch --set-upstream`, + # or by CI tooling that tracks the intended target). This is consulted + # BEFORE origin/HEAD because the latter is a server-default that may + # mis-represent the reviewer's actual intent for this specific branch. + local default_ref default_ref_status configured_base source_branch + source_branch="${local_ref#refs/heads/}" + default_ref="" + # Codex 0.10.1 finding #1: `local` is function-scoped, not loop- + # iteration-scoped — without an explicit reset, iteration N inherits + # iteration N-1's configured_base and falsely promotes resolved_base + # when the current refspec's local_ref does NOT begin with refs/heads/ + # (tag push, gerrit-style refs/for/, etc.). Reset before every + # potential assignment so each iteration sees a clean slate. + configured_base="" + + if [[ -n "$source_branch" && "$source_branch" != "HEAD" ]]; then + configured_base=$(cd "$REA_ROOT" && git config --get "branch.${source_branch}.base" 2>/dev/null || echo "") + if [[ -n "$configured_base" ]]; then + # Prefer the REMOTE-TRACKING form so the gate still anchors on a + # server-authoritative ref (see the local-ref hijack explanation + # above). Fall back to the local short ref only if the remote + # counterpart doesn't exist, with a visible WARN on stderr — the + # local ref is less trustworthy and the reviewer should know. + if cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/${configured_base}" >/dev/null 2>&1; then + default_ref="refs/remotes/${argv_remote}/${configured_base}" + elif cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/heads/${configured_base}" >/dev/null 2>&1; then + default_ref="refs/heads/${configured_base}" + printf 'WARN: branch.%s.base=%s resolved to local ref; remote counterpart %s/%s missing — reviewer-side diff may be stale\n' \ + "$source_branch" "$configured_base" "$argv_remote" "$configured_base" >&2 + fi + fi + fi + + if [[ -z "$default_ref" ]]; then + default_ref=$(cd "$REA_ROOT" && git symbolic-ref "refs/remotes/${argv_remote}/HEAD" 2>/dev/null) + default_ref_status=$? + if [[ "$default_ref_status" -ne 0 || -z "$default_ref" ]]; then + # symbolic-ref failed (common on shallow or mirror clones where + # origin/HEAD was never set). Probe the common default-branch names in + # order: main, then master. Both are remote-tracking refs and still + # server-authoritative; the order matters only for projects that still + # default to `master` (older internal forks), where without this + # fallback the first push of a new branch would fail closed. + if cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/main" >/dev/null 2>&1; then + default_ref="refs/remotes/${argv_remote}/main" + elif cd "$REA_ROOT" && git rev-parse --verify --quiet "refs/remotes/${argv_remote}/master" >/dev/null 2>&1; then + default_ref="refs/remotes/${argv_remote}/master" + else + default_ref="" + fi fi fi if [[ -n "$default_ref" ]]; then + # Defect N: if operator-configured `branch..base` resolved the + # ref we're about to diff against, overwrite `resolved_base` with the + # short name so TARGET_BRANCH (and the Target: label) reflect the + # actual review anchor. Without an explicit config override, leave + # `resolved_base` at the refspec target — this preserves the cache + # contract for new-branch pushes where remote_ref is the same as the + # source branch (the common case) and for bare pushes that + # argv-resolve via `@{upstream}`. Only operators who opted into a + # per-branch base get the label promoted, keeping the change + # backward-compatible for every other path. + if [[ -n "$configured_base" ]]; then + resolved_base="${default_ref#refs/remotes/${argv_remote}/}" + resolved_base="${resolved_base#refs/heads/}" + [[ -z "$resolved_base" ]] && resolved_base="$default_ref" + fi mb=$(cd "$REA_ROOT" && git merge-base "$default_ref" "$local_sha" 2>/dev/null || echo "") if [[ -z "$mb" ]]; then # default_ref resolved but merge-base came back empty (unrelated @@ -867,13 +931,40 @@ pr_core_run() { if [[ "$CODEX_WAIVER_ACTIVE" == "1" ]]; then _codex_ok=1 elif [[ -f "$_audit" ]]; then - if jq -e --arg sha "$local_sha" ' - select( - .tool_name == "codex.review" - and .metadata.head_sha == $sha - and (.metadata.verdict == "pass" or .metadata.verdict == "concerns") - ) - ' "$_audit" >/dev/null 2>&1; then + # Defect P (0.10.1): require .emission_source == "rea-cli" or + # "codex-cli" so agents cannot forge a codex.review record by + # directly calling appendAuditRecord() from an ad-hoc .mjs script + # (the generic helper stamps "other"). Legacy records (pre-0.10.1) + # have no emission_source field and are rejected — the first push + # on an upgraded consumer requires a fresh `rea audit record + # codex-review` (or Codex CLI emission) which stamps "rea-cli". + # + # Defect T/U (0.10.2): read the audit file as raw lines and parse + # each with `fromjson?`. Before 0.10.2 this scan used + # `jq -e '' "$_audit"` which feeds the file as a single + # JSON stream — a single malformed line (literal backslash-u + # followed by non-hex characters inside a string, for example) + # makes jq bail on the stream with exit 2 and the `select` never + # runs against ANY record, including legitimate codex.review + # entries further down the file. The failure is total: every + # cached codex.review receipt becomes unreachable until the + # corrupt line is hand-edited out. `-R` flips jq into raw-input + # mode (one string per line), and `fromjson?` is the error- + # suppressing parser — malformed lines silently yield empty + # output. The `select` filter then inspects each successfully + # parsed record exactly as before, and `grep -q .` detects + # whether ANY record survived the filter. Lines 1107 and the + # earlier cache_result scans at :432/:612 operate on a single + # printf'd JSON string, not audit.jsonl, so they remain `jq -e`. + if jq -R --arg sha "$local_sha" ' + fromjson? + | select( + .tool_name == "codex.review" + and .metadata.head_sha == $sha + and (.metadata.verdict == "pass" or .metadata.verdict == "concerns") + and (.emission_source == "rea-cli" or .emission_source == "codex-cli") + ) + ' "$_audit" 2>/dev/null | grep -q .; then _codex_ok=1 fi fi @@ -918,7 +1009,12 @@ pr_core_run() { if [[ -z "$SOURCE_SHA" ]] || [[ "$count" -gt "$BEST_COUNT" ]]; then SOURCE_SHA="$local_sha" MERGE_BASE="$mb" - TARGET_BRANCH="$target" + # Defect N: use `resolved_base` (the actual merge-base anchor we + # diffed against), not `target` (the pushed-ref name). For tracked + # branches these are the same; for new branches without an upstream + # the distinction is the difference between "Target: " + # (misleading) and "Target: main" (or whichever base was resolved). + TARGET_BRANCH="$resolved_base" SOURCE_REF="$local_ref" BEST_COUNT="$count" fi diff --git a/src/audit/append.test.ts b/src/audit/append.test.ts index 4ce8d85..c245784 100644 --- a/src/audit/append.test.ts +++ b/src/audit/append.test.ts @@ -156,6 +156,71 @@ describe('appendAuditRecord — hash-chain normalization (finding #6)', () => { } }); + it('throws before writing when JSON.stringify output fails self-check (defect T)', async () => { + // Defect T contract: appendAuditRecord must verify its own serialization + // round-trips through JSON.parse BEFORE the line touches .rea/audit.jsonl. + // No public caller can currently produce an unparseable line (TypeScript + // input shapes rule it out), so we simulate the failure mode by + // monkey-patching JSON.stringify to emit a known-bad line for one call. + // + // The contract we assert: + // 1. The helper throws with a diagnostic naming tool_name/server_name. + // 2. `.rea/audit.jsonl` does NOT contain the malformed line — in fact, + // the file remains absent (this was the first write on a fresh repo). + // 3. The hash chain on disk is untouched; a subsequent valid write + // lands cleanly. + const originalStringify = JSON.stringify.bind(JSON); + const spy = (value: unknown, ...rest: unknown[]): string => { + // Only intercept the SECOND JSON.stringify call in the append path — + // the one that serializes the fully-formed `record` (includes `hash`) + // for on-disk write. The FIRST call is computeHash serializing + // `recordBase` (has `tool_name` but no `hash`) — we must let that + // return real JSON so the hash computes correctly; otherwise the + // helper throws inside computeHash instead of the line self-check, + // testing a different code path. + if ( + typeof value === 'object' && + value !== null && + (value as Record).tool_name === 'T-self-check-target' && + typeof (value as Record).hash === 'string' + ) { + // Emit a definitively-unparseable string. JSON.parse rejects a bare + // trailing backslash-quote sequence, which is the canonical + // "escape without target" failure — exactly the kind of byte + // sequence defect T surfaced in the wild. + return '{"broken":"\\}'; + } + return originalStringify(value, ...(rest as [])); + }; + (JSON as { stringify: typeof JSON.stringify }).stringify = spy; + + try { + await expect( + appendAuditRecord(baseDir, { + tool_name: 'T-self-check-target', + server_name: 'unit', + metadata: { defect: 'T' }, + }), + ).rejects.toThrow(/Audit append aborted.*T-self-check-target.*No data was written/s); + } finally { + (JSON as { stringify: typeof JSON.stringify }).stringify = originalStringify; + } + + // The audit file must not exist — the throw fired before fs.appendFile. + const auditFile = path.join(baseDir, '.rea', 'audit.jsonl'); + await expect(fs.stat(auditFile)).rejects.toMatchObject({ code: 'ENOENT' }); + + // A subsequent valid write lands cleanly and chains from GENESIS. + const ok = await appendAuditRecord(baseDir, { + tool_name: 'after-self-check', + server_name: 'unit', + }); + const lines = await readAuditLines(baseDir); + expect(lines).toHaveLength(1); + expect(lines[0]!.tool_name).toBe('after-self-check'); + expect(lines[0]!.hash).toBe(ok.hash); + }); + it('serializes appends across a symlinked baseDir and its real path', async () => { // Create a symlink that resolves to baseDir. Callers passing the link and // callers passing the real path must share the same queue. diff --git a/src/audit/append.ts b/src/audit/append.ts index 8035f99..48006cc 100644 --- a/src/audit/append.ts +++ b/src/audit/append.ts @@ -37,7 +37,7 @@ import fs from 'node:fs/promises'; import path from 'node:path'; import { Tier, InvocationStatus } from '../policy/types.js'; import type { Policy } from '../policy/types.js'; -import type { AuditRecord } from '../gateway/middleware/audit-types.js'; +import type { AuditRecord, EmissionSource } from '../gateway/middleware/audit-types.js'; import { GENESIS_HASH, computeHash, @@ -46,6 +46,7 @@ import { withAuditLock, } from './fs.js'; import { maybeRotate } from '../gateway/audit/rotator.js'; +import { CODEX_REVIEW_SERVER_NAME, CODEX_REVIEW_TOOL_NAME } from './codex-event.js'; const REA_DIR = '.rea'; const AUDIT_FILE = 'audit.jsonl'; @@ -120,6 +121,7 @@ async function resolveBaseDir(baseDir: string): Promise { async function doAppend( resolvedBase: string, input: AppendAuditInput, + emissionSource: EmissionSource, ): Promise { const reaDir = path.join(resolvedBase, REA_DIR); const auditFile = path.join(reaDir, AUDIT_FILE); @@ -146,6 +148,7 @@ async function doAppend( autonomy_level: input.autonomy_level ?? 'unknown', duration_ms: input.duration_ms ?? 0, prev_hash: effectivePrev, + emission_source: emissionSource, }; if (input.error) recordBase.error = input.error; if (input.redacted_fields?.length) recordBase.redacted_fields = input.redacted_fields; @@ -157,6 +160,35 @@ async function doAppend( const record: AuditRecord = { ...recordBase, hash }; const line = JSON.stringify(record) + '\n'; + // Defect T (0.10.2): serialization self-check. A valid AuditRecord + the + // trailing newline should always round-trip through JSON.parse, but we + // verify that invariant BEFORE the line touches the hash-chain file. A + // throw here aborts the append WITHOUT writing anything — the caller sees + // the failure and the on-disk chain tail is unchanged. This is + // defense-in-depth against the class of regression that would otherwise + // write an unparseable line to `.rea/audit.jsonl` and only surface at + // `rea audit verify` time (or, worse, when push-review-core.sh's jq scan + // silently fails to find a legitimate `codex.review` record past the + // corruption). The concrete failure modes guarded against: + // + // - A future refactor introducing a non-JSON-safe field into + // AuditRecord (BigInt, circular ref, undefined-in-array, etc.) that + // slips past TypeScript. + // - A hostile `metadata` value whose serialized form produces output + // JSON.parse rejects (currently impossible given our input types, + // but the check is cheap and the recovery cost is high). + try { + JSON.parse(line); + } catch (e) { + throw new Error( + `Audit append aborted: JSON.stringify produced an unparseable line ` + + `for tool_name=${JSON.stringify(record.tool_name)} ` + + `server_name=${JSON.stringify(record.server_name)}. ` + + `Underlying parser error: ${(e as Error).message}. ` + + `No data was written to ${auditFile}.`, + ); + } + await fs.appendFile(auditFile, line); await fsyncFile(auditFile); @@ -164,17 +196,10 @@ async function doAppend( }); } -/** - * Append a structured audit record to `${baseDir}/.rea/audit.jsonl` with a - * hash chained against the tail of the existing log. - * - * @param baseDir - Repo/project root (the directory that contains `.rea/`). - * @param input - Event data. `tool_name` and `server_name` are required. - * @returns The full written record, including the computed `hash`. - */ -export async function appendAuditRecord( +async function enqueueAppend( baseDir: string, input: AppendAuditInput, + emissionSource: EmissionSource, ): Promise { // Canonicalize the baseDir so every caller targeting the same on-disk // directory lands on the same queue key, regardless of whether they passed @@ -191,7 +216,7 @@ export async function appendAuditRecord( /* previous write's error is owned by that caller */ }) .then(async () => { - record = await doAppend(resolvedBase, input); + record = await doAppend(resolvedBase, input, emissionSource); }); writeQueues.set( key, @@ -216,7 +241,66 @@ export async function appendAuditRecord( return record; } -export type { AuditRecord } from '../gateway/middleware/audit-types.js'; +/** + * Append a structured audit record to `${baseDir}/.rea/audit.jsonl` with a + * hash chained against the tail of the existing log. + * + * ## emission_source (defect P) + * + * Records written through this public helper are ALWAYS stamped with + * `emission_source: "other"`. External consumers (Helix, ad-hoc scripts, + * plugins) have no way to self-assert `"rea-cli"` or `"codex-cli"` through + * this entry point — the parameter is not part of the public + * {@link AppendAuditInput} shape. Records emitted by the rea CLI itself use + * the dedicated {@link appendCodexReviewAuditRecord} helper, which is the + * ONLY path that stamps `"rea-cli"`. + * + * The push-review cache gate rejects `codex.review` records whose + * `emission_source` is `"other"` (or missing, for legacy records), so + * forging a `codex.review` record through this helper produces a line that + * is on the hash chain but does NOT satisfy the gate. + * + * @param baseDir - Repo/project root (the directory that contains `.rea/`). + * @param input - Event data. `tool_name` and `server_name` are required. + * @returns The full written record, including the computed `hash`. + */ +export async function appendAuditRecord( + baseDir: string, + input: AppendAuditInput, +): Promise { + return enqueueAppend(baseDir, input, 'other'); +} + +/** + * Append a `tool_name: "codex.review"` audit record certifying that a Codex + * adversarial review ran on a specific commit SHA (defect P). + * + * This is the ONLY write path in `@bookedsolid/rea` that produces + * `emission_source: "rea-cli"` for `codex.review` records. Consumers MUST + * reach this helper through the `rea audit record codex-review` CLI (which + * is classified as a Write-tier Bash invocation by `reaCommandTier`, defect + * E). Any other code path calling the generic {@link appendAuditRecord} + * with `tool_name: "codex.review"` lands with `emission_source: "other"` + * and does NOT satisfy the push-review cache gate — closing the forgery + * surface that `.reports/hook-patches/emit-audit-*.mjs` scripts exploited + * before this patch. + * + * `tool_name` and `server_name` are fixed to the canonical values + * (`"codex.review"` / `"codex"`) and are NOT accepted as caller inputs — + * the type excludes them so the contract is self-documenting. + */ +export async function appendCodexReviewAuditRecord( + baseDir: string, + input: Omit, +): Promise { + return enqueueAppend( + baseDir, + { ...input, tool_name: CODEX_REVIEW_TOOL_NAME, server_name: CODEX_REVIEW_SERVER_NAME }, + 'rea-cli', + ); +} + +export type { AuditRecord, EmissionSource } from '../gateway/middleware/audit-types.js'; export { Tier, InvocationStatus } from '../policy/types.js'; export { CODEX_REVIEW_TOOL_NAME, diff --git a/src/audit/emission-source.test.ts b/src/audit/emission-source.test.ts new file mode 100644 index 0000000..f82b39f --- /dev/null +++ b/src/audit/emission-source.test.ts @@ -0,0 +1,123 @@ +/** + * Defect P regression — emission_source discriminator for audit records. + * + * These tests pin down the three invariants that close the + * `appendAuditRecord()` forgery surface: + * + * 1. The public `appendAuditRecord()` helper stamps `emission_source: + * "other"` on every record. External consumers cannot self-assert + * "rea-cli" through this entry point (the field is NOT part of the + * public AppendAuditInput shape). + * + * 2. The dedicated `appendCodexReviewAuditRecord()` helper stamps + * `"rea-cli"` and forces the canonical tool_name / server_name so + * callers cannot route a generic record through the codex-certification + * path by accident or on purpose. + * + * 3. The hash chain includes `emission_source` in its computed hash, so + * the field cannot be altered post-hoc without breaking the chain. + */ + +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + appendAuditRecord, + appendCodexReviewAuditRecord, + CODEX_REVIEW_SERVER_NAME, + CODEX_REVIEW_TOOL_NAME, + type AuditRecord, +} from './append.js'; +import { computeHash } from './fs.js'; + +describe('emission_source — public appendAuditRecord stamps "other"', () => { + let baseDir: string; + + beforeEach(async () => { + baseDir = await fs.realpath(await fs.mkdtemp(path.join(os.tmpdir(), 'rea-emission-src-'))); + }); + + afterEach(async () => { + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('stamps emission_source: "other" on every record written through the public helper', async () => { + const rec = await appendAuditRecord(baseDir, { + tool_name: 'helix.plan', + server_name: 'helix', + }); + expect(rec.emission_source).toBe('other'); + + const raw = await fs.readFile(path.join(baseDir, '.rea', 'audit.jsonl'), 'utf8'); + const parsed = JSON.parse(raw.trim()) as AuditRecord; + expect(parsed.emission_source).toBe('other'); + }); + + it('stamps "other" even when the tool_name is codex.review — forgery through the generic helper is visible to the gate', async () => { + const rec = await appendAuditRecord(baseDir, { + tool_name: CODEX_REVIEW_TOOL_NAME, + server_name: CODEX_REVIEW_SERVER_NAME, + metadata: { head_sha: 'deadbeef', verdict: 'pass' }, + }); + expect(rec.tool_name).toBe('codex.review'); + expect(rec.emission_source).toBe('other'); + }); +}); + +describe('emission_source — appendCodexReviewAuditRecord stamps "rea-cli"', () => { + let baseDir: string; + + beforeEach(async () => { + baseDir = await fs.realpath(await fs.mkdtemp(path.join(os.tmpdir(), 'rea-emission-codex-'))); + }); + + afterEach(async () => { + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('stamps "rea-cli" and the canonical tool_name/server_name — the helper does not accept caller overrides', async () => { + const rec = await appendCodexReviewAuditRecord(baseDir, { + metadata: { head_sha: 'cafef00d', verdict: 'pass' }, + }); + expect(rec.emission_source).toBe('rea-cli'); + expect(rec.tool_name).toBe('codex.review'); + expect(rec.server_name).toBe('codex'); + }); + + it('persists emission_source: "rea-cli" to disk so the jq gate predicate matches', async () => { + await appendCodexReviewAuditRecord(baseDir, { + metadata: { head_sha: 'abc', verdict: 'pass' }, + }); + const raw = await fs.readFile(path.join(baseDir, '.rea', 'audit.jsonl'), 'utf8'); + const parsed = JSON.parse(raw.trim()) as AuditRecord; + expect(parsed.emission_source).toBe('rea-cli'); + expect(parsed.tool_name).toBe(CODEX_REVIEW_TOOL_NAME); + }); +}); + +describe('emission_source — hash chain integrity', () => { + let baseDir: string; + + beforeEach(async () => { + baseDir = await fs.realpath(await fs.mkdtemp(path.join(os.tmpdir(), 'rea-emission-hash-'))); + }); + + afterEach(async () => { + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('includes emission_source in the computed hash — flipping the field breaks the chain', async () => { + const record = await appendAuditRecord(baseDir, { + tool_name: 'any', + server_name: 'unit', + }); + const { hash: _hash, ...baseFields } = record; + void _hash; + expect(computeHash(baseFields)).toBe(record.hash); + // Flipping emission_source MUST produce a different hash — i.e. the field + // is part of the hashed recordBase, not an afterthought. + const forged = { ...baseFields, emission_source: 'rea-cli' as const }; + expect(computeHash(forged)).not.toBe(record.hash); + }); +}); diff --git a/src/cli/audit.ts b/src/cli/audit.ts index 529f39c..8d55db4 100644 --- a/src/cli/audit.ts +++ b/src/cli/audit.ts @@ -15,9 +15,7 @@ import fs from 'node:fs/promises'; import path from 'node:path'; import { forceRotate } from '../gateway/audit/rotator.js'; import { - appendAuditRecord, - CODEX_REVIEW_SERVER_NAME, - CODEX_REVIEW_TOOL_NAME, + appendCodexReviewAuditRecord, type CodexVerdict, } from '../audit/append.js'; import { computeHash, GENESIS_HASH } from '../audit/fs.js'; @@ -87,34 +85,106 @@ export async function runAuditRotate(_options: AuditRotateOptions): Promise { +interface ParseFailure { + file: string; + /** 1-based line number within the file (matches editor / awk / jq output). */ + lineNumber: number; + /** 1-based column of the parser's reported fault, if the error message names one. */ + column?: number | undefined; + /** Underlying `JSON.parse` error message. */ + message: string; +} + +/** + * Best-effort column extractor. Node's JSON.parse error messages include a + * `position N` that is a 0-based character offset into the parsed string. + * When we parse a single JSONL line, that offset maps directly to a column. + * Returns undefined when the position token is absent — the line number + * alone is still useful. + */ +function extractColumnFromParserError(message: string): number | undefined { + const m = /position (\d+)/.exec(message); + if (m === null) return undefined; + const n = Number.parseInt(m[1] ?? '', 10); + if (!Number.isFinite(n) || n < 0) return undefined; + return n + 1; +} + +/** + * Load a JSONL audit file as a record array + per-line raw text + a list of + * per-line parse failures, so we can re-hash against the exact serialization + * that was written AND report every malformed line in one pass (defect T). + * + * Unparseable lines are a DISTINCT failure class from hash-chain tampers: + * + * - Malformed lines are collected into `parseFailures` and dropped from + * `records`. `rawLines` still contains the full original line array, so + * callers can cross-reference. `recordLineMap[i]` holds the 1-based file + * line number of `records[i]`. + * - The chain-verify pass runs only over the parseable subset. A caller + * that wants to report the verification result as partial checks + * `parseFailures.length > 0`. + * + * Throws only on read errors; returns an empty shape for an empty file. + */ +async function loadRecords(filePath: string): Promise<{ + records: AuditRecord[]; + /** 1-based line numbers for each entry in `records`. Same length as `records`. */ + recordLineMap: number[]; + rawLines: string[]; + parseFailures: ParseFailure[]; +}> { const raw = await fs.readFile(filePath, 'utf8'); // Drop a single trailing newline but preserve blank lines inside the file // so index numbers line up with real record positions. const trimmedTail = raw.replace(/\n$/, ''); - if (trimmedTail.length === 0) return { records: [], rawLines: [] }; + if (trimmedTail.length === 0) { + return { records: [], recordLineMap: [], rawLines: [], parseFailures: [] }; + } const rawLines = trimmedTail.split('\n'); - const records: AuditRecord[] = rawLines.map((line, i) => { + const records: AuditRecord[] = []; + const recordLineMap: number[] = []; + const parseFailures: ParseFailure[] = []; + const basename = path.basename(filePath); + for (let i = 0; i < rawLines.length; i++) { + const line = rawLines[i]!; + // Empty lines mid-file are not records but also not parseable — JSON.parse('') + // throws. Treat as a parse failure so verify surfaces them explicitly. try { - return JSON.parse(line) as AuditRecord; + const parsed = JSON.parse(line) as AuditRecord; + records.push(parsed); + recordLineMap.push(i + 1); } catch (e) { - throw new Error( - `Cannot parse JSON at ${path.basename(filePath)} line ${i + 1}: ${(e as Error).message}`, - ); + const msg = (e as Error).message; + const col = extractColumnFromParserError(msg); + parseFailures.push({ + file: basename, + lineNumber: i + 1, + ...(col !== undefined ? { column: col } : {}), + message: msg, + }); } - }); - return { records, rawLines }; + } + return { records, recordLineMap, rawLines, parseFailures }; } interface VerifyFailure { file: string; - lineIndex: number; // 0-based within the file + /** 0-based position within the parseable subset of this file. */ + recordIndex: number; + /** + * 1-based line number in the original file (survives parse-failure skips + * via loadRecords.recordLineMap). Matches editor/awk/jq output directly. + * Defect T: when a malformed line precedes the tampered record, recordIndex + * and fileLineNumber diverge — operators need the latter to jq/grep to the + * right place. + */ + fileLineNumber: number; reason: string; expected?: string; actual?: string; @@ -123,15 +193,18 @@ interface VerifyFailure { function verifyChain( fileBasename: string, records: AuditRecord[], + recordLineMap: number[], expectedStartPrev: string, ): VerifyFailure | null { let prev = expectedStartPrev; for (let i = 0; i < records.length; i++) { const r = records[i]!; + const fileLineNumber = recordLineMap[i] ?? i + 1; if (r.prev_hash !== prev) { return { file: fileBasename, - lineIndex: i, + recordIndex: i, + fileLineNumber, reason: 'prev_hash does not match previous record', expected: prev, actual: r.prev_hash, @@ -144,7 +217,8 @@ function verifyChain( if (recomputed !== hash) { return { file: fileBasename, - lineIndex: i, + recordIndex: i, + fileLineNumber, reason: 'stored hash does not match recomputed hash over record body', expected: recomputed, actual: hash, @@ -225,39 +299,100 @@ export async function runAuditVerify(options: AuditVerifyOptions): Promise process.exit(1); } + // Defect T (0.10.2): collect-all-errors mode. We no longer abort at the + // first unparseable line — `rea audit verify` now walks every file, lists + // EVERY malformed line with its number + parser message, and attempts + // chain verification over the parseable subset. Unparseable lines are a + // distinct failure class from hash-chain tampers; both contribute to a + // non-zero exit, but they are reported separately so an operator can tell + // "JSONL corruption" from "someone edited a hash". let expectedPrev = GENESIS_HASH; let totalRecords = 0; + const allParseFailures: ParseFailure[] = []; + let chainFailure: VerifyFailure | null = null; + let chainFailureFile: string | null = null; + for (const filePath of filesToVerify) { - let records: AuditRecord[]; + let loaded: Awaited>; try { - ({ records } = await loadRecords(filePath)); + loaded = await loadRecords(filePath); } catch (e) { err(`${(e as Error).message}`); process.exit(1); } - const basename = path.basename(filePath); - const failure = verifyChain(basename, records, expectedPrev); - if (failure !== null) { - err(`Audit chain TAMPER DETECTED in ${failure.file}`); - console.error(` Record index: ${failure.lineIndex} (0-based within file)`); - console.error(` Reason: ${failure.reason}`); - if (failure.expected !== undefined) { - console.error(` Expected: ${failure.expected}`); - } - if (failure.actual !== undefined) { - console.error(` Actual: ${failure.actual}`); + const { records, recordLineMap, parseFailures } = loaded; + allParseFailures.push(...parseFailures); + + // Chain verify over the parseable subset only. If an earlier file had a + // chain failure we stop verifying further files — advancing `expectedPrev` + // past an unknown tail would produce misleading secondary failures. + // recordLineMap threads the 1-based original-file line number through so + // the failure diagnostic names the editor/jq position directly, not the + // parseable-subset index which diverges from the file whenever a + // malformed line precedes the tamper. + if (chainFailure === null) { + const failure = verifyChain( + path.basename(filePath), + records, + recordLineMap, + expectedPrev, + ); + if (failure !== null) { + chainFailure = failure; + chainFailureFile = filePath; + } else if (records.length > 0) { + expectedPrev = records[records.length - 1]!.hash; } - process.exit(1); } - // Advance the cross-file anchor for the next file. - if (records.length > 0) { - expectedPrev = records[records.length - 1]!.hash; - } totalRecords += records.length; } + // Report parse failures first — they're independent of the chain result. + if (allParseFailures.length > 0) { + err( + `Audit verify: ${allParseFailures.length} unparseable line(s) detected. ` + + `Chain verification was performed over the parseable subset only.`, + ); + for (const f of allParseFailures) { + const loc = + f.column !== undefined + ? `${f.file}:${f.lineNumber}:${f.column}` + : `${f.file}:${f.lineNumber}`; + console.error(` ${loc} ${f.message}`); + } + } + + // Then report any chain failure found on the parseable subset. + if (chainFailure !== null) { + err(`Audit chain TAMPER DETECTED in ${chainFailure.file}`); + // File-line-number is the operator-facing anchor — jump straight to the + // offending line with `sed -n "${n}p" audit.jsonl` or editor:LINE. The + // parseable-subset index is kept for audit-tooling consumers that walk + // the records[] array. + console.error( + ` File line: ${chainFailure.fileLineNumber} (1-based in ${chainFailure.file})`, + ); + console.error( + ` Record index: ${chainFailure.recordIndex} (0-based within parseable subset)`, + ); + console.error(` Reason: ${chainFailure.reason}`); + if (chainFailure.expected !== undefined) { + console.error(` Expected: ${chainFailure.expected}`); + } + if (chainFailure.actual !== undefined) { + console.error(` Actual: ${chainFailure.actual}`); + } + if (chainFailureFile !== null) { + console.error(` File path: ${path.relative(baseDir, chainFailureFile)}`); + } + } + + if (allParseFailures.length > 0 || chainFailure !== null) { + process.exit(1); + } + log( `Audit chain verified: ${totalRecords} records across ${filesToVerify.length} file(s) — clean.`, ); @@ -324,9 +459,12 @@ export async function runAuditRecordCodexReview( metadata.summary = options.summary; } - await appendAuditRecord(baseDir, { - tool_name: CODEX_REVIEW_TOOL_NAME, - server_name: CODEX_REVIEW_SERVER_NAME, + // Defect P: stamps emission_source: "rea-cli" so the record satisfies the + // push-review gate's new integrity predicate. Legacy records (without + // emission_source) and records written through the generic + // appendAuditRecord() helper (emission_source: "other") are rejected. + // tool_name/server_name are fixed inside the helper. + await appendCodexReviewAuditRecord(baseDir, { tier: Tier.Read, status: InvocationStatus.Allowed, ...(options.sessionId !== undefined ? { session_id: options.sessionId } : {}), diff --git a/src/cli/doctor.ts b/src/cli/doctor.ts index 48d52b5..5da6db1 100644 --- a/src/cli/doctor.ts +++ b/src/cli/doctor.ts @@ -122,7 +122,7 @@ export async function checkFingerprintStore(baseDir: string): Promise to accept)`, + detail: `${parts.join(', ')} — next \`rea serve\` will block drift (run \`rea tofu list\` for detail, \`rea tofu accept \` to rebase after a legitimate registry edit)`, }; } diff --git a/src/cli/index.ts b/src/cli/index.ts index 2c56997..c73cf71 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -15,6 +15,7 @@ import { runFreeze, runUnfreeze } from './freeze.js'; import { runInit } from './init.js'; import { runServe } from './serve.js'; import { runStatus } from './status.js'; +import { runTofuAccept, runTofuList } from './tofu.js'; import { runUpgrade } from './upgrade.js'; import { err, getPkgVersion } from './utils.js'; @@ -263,6 +264,35 @@ async function main(): Promise { await runCacheList({ ...(opts.branch !== undefined ? { branch: opts.branch } : {}) }); }); + const tofu = program + .command('tofu') + .description( + 'TOFU fingerprint operations (G7) — inspect and rebase `.rea/fingerprints.json` when a legitimate registry edit has triggered drift fail-close. Emits audit records.', + ); + + tofu + .command('list') + .description( + 'Print every server declared in `.rea/registry.yaml` with its current-vs-stored fingerprint verdict (first-seen | unchanged | drifted).', + ) + .option('--json', 'emit JSON instead of the human-readable table') + .action(async (opts: { json?: boolean }) => { + await runTofuList({ ...(opts.json === true ? { json: true } : {}) }); + }); + + tofu + .command('accept ') + .description( + 'Rebase the stored fingerprint for to match the current canonical shape in `.rea/registry.yaml`. Use after a deliberate registry edit (vault added, command path renamed, env-key set changed). Emits a `tofu.drift_accepted_by_cli` audit record; next `rea serve` will classify as unchanged.', + ) + .option( + '--reason ', + 'free-text note captured in the audit record (recommended when accepting drift — explains WHY the canonical shape changed)', + ) + .action(async (name: string, opts: { reason?: string }) => { + await runTofuAccept({ name, ...(opts.reason !== undefined ? { reason: opts.reason } : {}) }); + }); + program .command('doctor') .description('Validate the install: policy parses, .rea/ layout, hooks, Codex plugin.') diff --git a/src/cli/tofu.test.ts b/src/cli/tofu.test.ts new file mode 100644 index 0000000..6e871b5 --- /dev/null +++ b/src/cli/tofu.test.ts @@ -0,0 +1,246 @@ +/** + * CLI-level tests for `rea tofu` (defect S). + * + * Focus areas: + * 1. `classifyRows` — pure classifier matches verdicts from the G7 primitive. + * 2. `runTofuAccept` — rebases the stored fingerprint and emits an audit + * record on drift; no-op when stored already matches current. + * 3. `runTofuAccept` — rejects unknown server names (process.exit(1)). + * 4. `runTofuList` — emits JSON with classification rows when `--json` is set. + * + * Pattern matches `cache.test.ts`: mkdtemp + process.chdir + spy stdout/stderr. + */ + +import fs from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { fingerprintServer } from '../registry/fingerprint.js'; +import { + FINGERPRINT_STORE_VERSION, + loadFingerprintStore, +} from '../registry/fingerprints-store.js'; +import { invalidateRegistryCache } from '../registry/loader.js'; +import type { RegistryServer } from '../registry/types.js'; +import { classifyRows, runTofuAccept, runTofuList } from './tofu.js'; + +async function writeRegistry(baseDir: string, servers: RegistryServer[]): Promise { + const yamlLines: string[] = ['version: "1"', 'servers:']; + for (const s of servers) { + yamlLines.push(` - name: ${s.name}`); + yamlLines.push(` command: ${s.command}`); + yamlLines.push(` args:`); + for (const a of s.args ?? []) { + yamlLines.push(` - ${JSON.stringify(a)}`); + } + yamlLines.push(` enabled: ${s.enabled !== false ? 'true' : 'false'}`); + } + yamlLines.push(''); + await fs.writeFile(path.join(baseDir, '.rea', 'registry.yaml'), yamlLines.join('\n'), 'utf8'); +} + +async function writeStore(baseDir: string, servers: Record): Promise { + await fs.writeFile( + path.join(baseDir, '.rea', 'fingerprints.json'), + JSON.stringify({ version: FINGERPRINT_STORE_VERSION, servers }, null, 2) + '\n', + 'utf8', + ); +} + +function server(name: string, overrides: Partial = {}): RegistryServer { + return { + name, + command: 'node', + args: ['-e', `"${name}"`], + env: {}, + enabled: true, + ...overrides, + }; +} + +function captureIO(fn: () => Promise): Promise<{ + stdout: string; + stderr: string; + logs: string[]; + errors: string[]; +}> { + const stdoutChunks: string[] = []; + const stderrChunks: string[] = []; + const logs: string[] = []; + const errors: string[] = []; + + const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation((chunk: unknown) => { + stdoutChunks.push(typeof chunk === 'string' ? chunk : String(chunk)); + return true; + }); + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((chunk: unknown) => { + stderrChunks.push(typeof chunk === 'string' ? chunk : String(chunk)); + return true; + }); + const logSpy = vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => { + logs.push(args.map((a) => (typeof a === 'string' ? a : JSON.stringify(a))).join(' ')); + }); + const errSpy = vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => { + errors.push(args.map((a) => (typeof a === 'string' ? a : JSON.stringify(a))).join(' ')); + }); + + return fn() + .then(() => ({ + stdout: stdoutChunks.join(''), + stderr: stderrChunks.join(''), + logs, + errors, + })) + .finally(() => { + writeSpy.mockRestore(); + stderrSpy.mockRestore(); + logSpy.mockRestore(); + errSpy.mockRestore(); + }); +} + +describe('classifyRows', () => { + it('flags unchanged, drifted, and first-seen verdicts in one pass', () => { + const same = server('a'); + const altered = server('b', { args: ['-e', '"altered"'] }); + const fresh = server('c'); + const rows = classifyRows([same, altered, fresh], { + a: fingerprintServer(same), + b: fingerprintServer(server('b')), + }); + const byName = Object.fromEntries(rows.map((r) => [r.name, r.verdict])); + expect(byName).toEqual({ + a: 'unchanged', + b: 'drifted', + c: 'first-seen', + }); + }); +}); + +describe('rea tofu accept', () => { + let baseDir: string; + let previousCwd: string; + + beforeEach(async () => { + baseDir = await fs.realpath(await fs.mkdtemp(path.join(os.tmpdir(), 'rea-tofu-cli-'))); + await fs.mkdir(path.join(baseDir, '.rea'), { recursive: true }); + previousCwd = process.cwd(); + process.chdir(baseDir); + invalidateRegistryCache(); + }); + + afterEach(async () => { + process.chdir(previousCwd); + invalidateRegistryCache(); + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('rebases stored fingerprint to current canonical on drift and records audit', async () => { + const pristine = server('obsidian'); + const edited = server('obsidian', { args: ['-e', '"edited"'] }); + await writeRegistry(baseDir, [edited]); + await writeStore(baseDir, { obsidian: fingerprintServer(pristine) }); + + const { logs } = await captureIO(() => + runTofuAccept({ name: 'obsidian', reason: 'added vault path' }), + ); + + const store = await loadFingerprintStore(baseDir); + expect(store.servers.obsidian).toBe(fingerprintServer(edited)); + expect(logs.join('\n')).toMatch(/tofu: accepted "obsidian"/); + + const auditPath = path.join(baseDir, '.rea', 'audit.jsonl'); + const auditRaw = await fs.readFile(auditPath, 'utf8'); + const auditLines = auditRaw.trim().split('\n'); + const lastEntry = JSON.parse(auditLines[auditLines.length - 1]!) as { + tool_name: string; + metadata: { event: string; server: string; reason: string }; + }; + expect(lastEntry.tool_name).toBe('rea.tofu'); + expect(lastEntry.metadata.event).toBe('tofu.drift_accepted_by_cli'); + expect(lastEntry.metadata.server).toBe('obsidian'); + expect(lastEntry.metadata.reason).toBe('added vault path'); + }); + + it('records first-seen event when store has no prior entry', async () => { + const s = server('discord-ops'); + await writeRegistry(baseDir, [s]); + await writeStore(baseDir, {}); + + await captureIO(() => runTofuAccept({ name: 'discord-ops' })); + + const store = await loadFingerprintStore(baseDir); + expect(store.servers['discord-ops']).toBe(fingerprintServer(s)); + + const auditRaw = await fs.readFile(path.join(baseDir, '.rea', 'audit.jsonl'), 'utf8'); + const last = JSON.parse(auditRaw.trim().split('\n').pop()!) as { + metadata: { event: string; stored_fingerprint: string | null }; + }; + expect(last.metadata.event).toBe('tofu.first_seen_accepted_by_cli'); + expect(last.metadata.stored_fingerprint).toBeNull(); + }); + + it('is a no-op when stored already matches current fingerprint', async () => { + const s = server('obsidian'); + await writeRegistry(baseDir, [s]); + await writeStore(baseDir, { obsidian: fingerprintServer(s) }); + + const { logs } = await captureIO(() => runTofuAccept({ name: 'obsidian' })); + expect(logs.join('\n')).toMatch(/already matches stored fingerprint/); + + const auditPath = path.join(baseDir, '.rea', 'audit.jsonl'); + await expect(fs.access(auditPath)).rejects.toThrow(); + }); + + it('rejects unknown server names via process.exit(1)', async () => { + await writeRegistry(baseDir, [server('obsidian')]); + await writeStore(baseDir, {}); + + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(((code?: number) => { + throw new Error(`process.exit(${code})`); + }) as typeof process.exit); + + await expect( + captureIO(() => runTofuAccept({ name: 'does-not-exist' })), + ).rejects.toThrow(/process\.exit\(1\)/); + + exitSpy.mockRestore(); + }); +}); + +describe('rea tofu list --json', () => { + let baseDir: string; + let previousCwd: string; + + beforeEach(async () => { + baseDir = await fs.realpath(await fs.mkdtemp(path.join(os.tmpdir(), 'rea-tofu-list-'))); + await fs.mkdir(path.join(baseDir, '.rea'), { recursive: true }); + previousCwd = process.cwd(); + process.chdir(baseDir); + invalidateRegistryCache(); + }); + + afterEach(async () => { + process.chdir(previousCwd); + invalidateRegistryCache(); + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it('prints every row with verdict + short fingerprints', async () => { + const same = server('a'); + const drifted = server('b', { args: ['-e', '"changed"'] }); + await writeRegistry(baseDir, [same, drifted]); + await writeStore(baseDir, { + a: fingerprintServer(same), + b: fingerprintServer(server('b')), + }); + + const { stdout } = await captureIO(() => runTofuList({ json: true })); + const parsed = JSON.parse(stdout) as { + servers: Array<{ name: string; verdict: string; current: string; stored: string | null }>; + }; + expect(parsed.servers).toHaveLength(2); + const byName = Object.fromEntries(parsed.servers.map((s) => [s.name, s.verdict])); + expect(byName).toEqual({ a: 'unchanged', b: 'drifted' }); + }); +}); diff --git a/src/cli/tofu.ts b/src/cli/tofu.ts new file mode 100644 index 0000000..10076b6 --- /dev/null +++ b/src/cli/tofu.ts @@ -0,0 +1,183 @@ +/** + * `rea tofu` — operator-facing recovery surface for TOFU fingerprint drift + * (defect S). + * + * The TOFU gate in `src/registry/tofu-gate.ts` fail-closes on drift: an + * enabled downstream whose canonical fingerprint no longer matches the stored + * baseline is silently dropped from the spawn set. The only documented + * recovery path used to be `REA_ACCEPT_DRIFT=` as a startup env var, + * which is useless when the gateway is spawned indirectly (e.g. by Claude + * Code via `.mcp.json`) — there is no operator-reachable env in that path. + * + * This module provides two verbs: + * + * - `list` — print every declared server's current-vs-stored + * fingerprint verdict so the operator can see drift + * before reaching for `accept`. + * - `accept ` — recompute the current fingerprint for `` and + * write it to `.rea/fingerprints.json`. Emits a + * `tofu.drift_accepted_by_cli` audit record so the + * action is on the hash chain. + * + * Both verbs are pure CLI surface — they do NOT speak to a running `rea + * serve`. The next gateway boot re-runs `applyTofuGate` against the updated + * store and classifies the server as `unchanged` with no banner. + * + * ## Trust model + * + * `accept` updates the stored baseline to match whatever the YAML currently + * says. It is a **deliberate operator action**: anyone who can run `rea` + * could already edit `.rea/fingerprints.json` by hand. The CLI is an + * audit-recording wrapper over that capability, not a privilege expansion. + * + * The audit record captures BOTH fingerprints (stored + current) and the + * registry canonical shape at accept-time, so a forensic re-hash of the + * registry after the fact can confirm the operator accepted the shape they + * intended to accept. + */ + +import { appendAuditRecord } from '../audit/append.js'; +import { InvocationStatus, Tier } from '../policy/types.js'; +import { fingerprintServer } from '../registry/fingerprint.js'; +import { + FINGERPRINT_STORE_VERSION, + loadFingerprintStore, + saveFingerprintStore, +} from '../registry/fingerprints-store.js'; +import { loadRegistry } from '../registry/loader.js'; +import type { RegistryServer } from '../registry/types.js'; +import { err, log } from './utils.js'; + +export type TofuVerdictLabel = 'first-seen' | 'unchanged' | 'drifted'; + +export interface TofuRow { + name: string; + enabled: boolean; + current: string; + stored: string | undefined; + verdict: TofuVerdictLabel; +} + +/** Pure classifier used by both `list` and `accept` — keep free of I/O. */ +export function classifyRows( + servers: RegistryServer[], + stored: Record, +): TofuRow[] { + return servers.map((s) => { + const current = fingerprintServer(s); + const prior = stored[s.name]; + let verdict: TofuVerdictLabel; + if (prior === undefined) verdict = 'first-seen'; + else if (prior === current) verdict = 'unchanged'; + else verdict = 'drifted'; + return { + name: s.name, + enabled: s.enabled !== false, + current, + stored: prior, + verdict, + }; + }); +} + +export interface RunTofuListOptions { + json?: boolean; +} + +export async function runTofuList(options: RunTofuListOptions = {}): Promise { + const baseDir = process.cwd(); + const registry = loadRegistry(baseDir); + const store = await loadFingerprintStore(baseDir); + const rows = classifyRows(registry.servers, store.servers); + + if (options.json === true) { + process.stdout.write(JSON.stringify({ servers: rows }, null, 2) + '\n'); + return; + } + + if (rows.length === 0) { + log('No servers declared in .rea/registry.yaml.'); + return; + } + + log('TOFU fingerprint status:'); + log(''); + for (const row of rows) { + const shortCur = row.current.slice(0, 12); + const shortPrior = row.stored !== undefined ? row.stored.slice(0, 12) : '—'; + const flag = row.enabled ? '' : ' (disabled)'; + log( + ` ${row.verdict.padEnd(10)} ${row.name.padEnd(20)} stored=${shortPrior} current=${shortCur}${flag}`, + ); + } + log(''); + const drifted = rows.filter((r) => r.verdict === 'drifted'); + if (drifted.length > 0) { + log( + ` ${drifted.length} drifted — run \`rea tofu accept \` to rebase the stored fingerprint (emits an audit record).`, + ); + } +} + +export interface RunTofuAcceptOptions { + name: string; + reason?: string; +} + +export async function runTofuAccept(options: RunTofuAcceptOptions): Promise { + const baseDir = process.cwd(); + const registry = loadRegistry(baseDir); + const server = registry.servers.find((s) => s.name === options.name); + if (server === undefined) { + err( + `Server "${options.name}" is not declared in .rea/registry.yaml. Run \`rea tofu list\` to see declared servers.`, + ); + process.exit(1); + } + + const current = fingerprintServer(server); + const store = await loadFingerprintStore(baseDir); + const stored = store.servers[server.name]; + + if (stored === current) { + log( + `tofu: "${server.name}" already matches stored fingerprint (${current.slice(0, 12)}…) — no change written.`, + ); + return; + } + + const nextStore = { + version: FINGERPRINT_STORE_VERSION as typeof FINGERPRINT_STORE_VERSION, + servers: { ...store.servers, [server.name]: current }, + }; + await saveFingerprintStore(baseDir, nextStore); + + const event = stored === undefined ? 'tofu.first_seen_accepted_by_cli' : 'tofu.drift_accepted_by_cli'; + try { + await appendAuditRecord(baseDir, { + tool_name: 'rea.tofu', + server_name: 'rea', + tier: Tier.Write, + status: InvocationStatus.Allowed, + metadata: { + event, + server: server.name, + stored_fingerprint: stored ?? null, + current_fingerprint: current, + ...(options.reason !== undefined ? { reason: options.reason } : {}), + }, + }); + } catch (auditErr) { + err( + `tofu: fingerprint updated, but audit append failed — operator MUST investigate: ${ + auditErr instanceof Error ? auditErr.message : String(auditErr) + }`, + ); + process.exit(2); + } + + const shortPrior = stored !== undefined ? stored.slice(0, 12) : '(first-seen)'; + log( + `tofu: accepted "${server.name}" — stored=${shortPrior} → current=${current.slice(0, 12)}. Next \`rea serve\` will classify as unchanged.`, + ); +} diff --git a/src/gateway/audit/rotator.ts b/src/gateway/audit/rotator.ts index a4b1f26..b4297bd 100644 --- a/src/gateway/audit/rotator.ts +++ b/src/gateway/audit/rotator.ts @@ -260,6 +260,10 @@ export async function performRotation( autonomy_level: 'system', duration_ms: 0, prev_hash: tailHash, + // Defect P: rotation markers are written by rea itself, not by an + // external caller of appendAuditRecord() — tag as rea-cli so the + // hash chain remains consistent under the post-P schema. + emission_source: 'rea-cli', metadata: { rotated_from: path.basename(rotatedPath), rotated_at: now.toISOString(), diff --git a/src/gateway/middleware/audit-types.ts b/src/gateway/middleware/audit-types.ts index 640ff76..40556ee 100644 --- a/src/gateway/middleware/audit-types.ts +++ b/src/gateway/middleware/audit-types.ts @@ -1,5 +1,33 @@ import type { Tier, InvocationStatus } from '../../policy/types.js'; +/** + * Emission-path discriminator for the audit record (defect P). + * + * The push-review gate trusts `tool_name: "codex.review"` records to certify + * a real Codex adversarial review ran on the given commit SHA. Before this + * field existed, any script with filesystem access to `node_modules` could + * call `appendAuditRecord(...)` with a `codex.review` tool name and forge + * the certification — the governance promise was a convention, not enforced. + * + * `emission_source` tags the code path that wrote the record: + * + * - `"rea-cli"` — emitted by the `rea` CLI itself (e.g. `rea audit + * record codex-review`). The rea CLI is classified by + * `reaCommandTier()` (defect E) and is an audited, + * policy-governed entry point. + * - `"codex-cli"` — emitted by the Codex adversarial review path itself, + * the authoritative source. + * - `"other"` — every other caller of the public + * `appendAuditRecord()` helper (consumer plugins, + * ad-hoc scripts, tests). Legitimate for event types + * OTHER than `codex.review`; REJECTED by the + * push-review cache gate for `codex.review` lookups. + * + * The field is part of the hashed record body — it cannot be altered after + * the fact without breaking the chain. + */ +export type EmissionSource = 'rea-cli' | 'codex-cli' | 'other'; + export interface AuditRecord { timestamp: string; session_id: string; @@ -22,6 +50,14 @@ export interface AuditRecord { * the redaction middleware runs on `ctx.arguments`, not on metadata. */ metadata?: Record; + /** + * Defect P (0.10.1). Discriminates the emission path: `"rea-cli"` for + * rea's own CLI, `"codex-cli"` for the Codex adversarial reviewer, + * `"other"` for every other caller of the public audit helper. Required + * field; the push-review gate refuses to accept `codex.review` records + * whose source is `"other"` (or missing, for pre-0.10.1 legacy records). + */ + emission_source: EmissionSource; hash: string; prev_hash: string; } diff --git a/src/gateway/middleware/audit.ts b/src/gateway/middleware/audit.ts index 52f11b8..6c841ba 100644 --- a/src/gateway/middleware/audit.ts +++ b/src/gateway/middleware/audit.ts @@ -117,6 +117,12 @@ export function createAuditMiddleware( autonomy_level: autonomyLevel, duration_ms, prev_hash: prevHash, + // Defect P: gateway middleware records every proxied tool call. + // rea itself is the writer — tag as rea-cli so the schema is + // consistent. "rea-cli" here is a misnomer (the gateway isn't a + // CLI) but is part of the stable 0.10.1 discriminator set; + // semantically it means "written by @bookedsolid/rea itself". + emission_source: 'rea-cli', }; if (ctx.error) { diff --git a/src/registry/tofu-gate.ts b/src/registry/tofu-gate.ts index 95a2236..1bdd37e 100644 --- a/src/registry/tofu-gate.ts +++ b/src/registry/tofu-gate.ts @@ -182,7 +182,10 @@ async function emitSideEffects( boxLine(` current: ${c.current.slice(0, 16)}…`), boxLine(''), boxLine(' The server will NOT connect. Other servers remain up.'), - boxLine(' To accept (once): REA_ACCEPT_DRIFT= rea serve'), + boxLine(' After a legitimate registry edit:'), + boxLine(` rea tofu accept ${c.server} --reason ""`), + boxLine(' One-shot bypass (not recommended):'), + boxLine(` REA_ACCEPT_DRIFT=${c.server} rea serve`), ` ╚${'═'.repeat(BOX_INNER_WIDTH)}╝`, '', ].join('\n'),