From f5d17c7b9c096a5366c0d80fd488de2bc28055c4 Mon Sep 17 00:00:00 2001 From: Bingran You Date: Wed, 8 Apr 2026 16:36:24 -0700 Subject: [PATCH] docs: capture Claude Code test framework contracts --- .../ssh-remote-session-and-auth-proxy.md | 2 +- ...structured-io-and-headless-session-loop.md | 2 +- integrations/mcp/NODE.md | 1 + ...d-auth-conformance-and-idp-test-seeding.md | 50 +++++++++ .../oauth-step-up-and-client-registration.md | 2 +- platform-services/NODE.md | 1 + platform-services/auth-config-and-policy.md | 2 +- ...rate-limit-scenarios-and-test-contracts.md | 2 +- ...ngs-change-detection-and-runtime-reload.md | 2 +- ...tibility-and-invalid-field-preservation.md | 53 +++++++++ ...service-sequencing-and-capability-gates.md | 2 +- .../usage-analytics-and-migrations.md | 3 +- reconstruction-guardrails/NODE.md | 1 + .../NODE.md | 22 ++++ .../evidence-levels-and-missing-artifacts.md | 49 +++++++++ ...d-asset-provenance-and-acceptance-rules.md | 67 ++++++++++++ ...ment-fixtures-and-ci-fail-closed-policy.md | 74 +++++++++++++ .../test-framework-overview.md | 78 ++++++++++++++ ...s-reset-hooks-and-injected-dependencies.md | 101 ++++++++++++++++++ .../sed-command-validation-contracts.md | 2 +- .../permissions/yolo-classifier-contracts.md | 2 +- .../tool-catalog/tool-families.md | 4 +- .../tool-catalog/tool-pool-assembly.md | 3 +- ...rmission-prompt-shell-and-worker-states.md | 3 +- 24 files changed, 515 insertions(+), 13 deletions(-) create mode 100644 integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md create mode 100644 platform-services/settings-schema-compatibility-and-invalid-field-preservation.md create mode 100644 reconstruction-guardrails/verification-and-native-test-oracles/NODE.md create mode 100644 reconstruction-guardrails/verification-and-native-test-oracles/evidence-levels-and-missing-artifacts.md create mode 100644 reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md create mode 100644 reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md create mode 100644 reconstruction-guardrails/verification-and-native-test-oracles/test-framework-overview.md create mode 100644 reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md diff --git a/integrations/clients/ssh-remote-session-and-auth-proxy.md b/integrations/clients/ssh-remote-session-and-auth-proxy.md index 7efda28..b014c4a 100644 --- a/integrations/clients/ssh-remote-session-and-auth-proxy.md +++ b/integrations/clients/ssh-remote-session-and-auth-proxy.md @@ -1,7 +1,7 @@ --- title: "SSH Remote Session and Auth Proxy" owners: [] -soft_links: [/integrations/clients/direct-connect-session-bootstrap-and-environment-selection.md, /integrations/clients/remote-session-message-adaptation-and-viewer-state.md, /product-surface/startup-entrypoint-routing-and-session-handoff.md, /platform-services/provider-specific-api-clients-and-auth-routing.md] +soft_links: [/integrations/clients/direct-connect-session-bootstrap-and-environment-selection.md, /integrations/clients/remote-session-message-adaptation-and-viewer-state.md, /product-surface/startup-entrypoint-routing-and-session-handoff.md, /platform-services/provider-specific-api-clients-and-auth-routing.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md] --- # SSH Remote Session and Auth Proxy diff --git a/integrations/clients/structured-io-and-headless-session-loop.md b/integrations/clients/structured-io-and-headless-session-loop.md index 281cb91..6a32e68 100644 --- a/integrations/clients/structured-io-and-headless-session-loop.md +++ b/integrations/clients/structured-io-and-headless-session-loop.md @@ -1,7 +1,7 @@ --- title: "Structured I/O and Headless Session Loop" owners: [] -soft_links: [/integrations/clients/sdk-control-protocol.md, /runtime-orchestration/turn-flow/structured-output-enforcement-and-artifact-projection.md, /integrations/clients/sdk-hook-event-transport.md, /runtime-orchestration/turn-flow/unified-command-queue-and-drain.md, /collaboration-and-agents/bridge-contract.md, /platform-services/startup-service-sequencing-and-capability-gates.md, /product-surface/session-state-and-breakpoints.md] +soft_links: [/integrations/clients/sdk-control-protocol.md, /runtime-orchestration/turn-flow/structured-output-enforcement-and-artifact-projection.md, /integrations/clients/sdk-hook-event-transport.md, /runtime-orchestration/turn-flow/unified-command-queue-and-drain.md, /collaboration-and-agents/bridge-contract.md, /platform-services/startup-service-sequencing-and-capability-gates.md, /product-surface/session-state-and-breakpoints.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md] --- # Structured I/O and Headless Session Loop diff --git a/integrations/mcp/NODE.md b/integrations/mcp/NODE.md index 06ae110..3db2eac 100644 --- a/integrations/mcp/NODE.md +++ b/integrations/mcp/NODE.md @@ -13,6 +13,7 @@ Relevant leaves: - **[server-contract.md](server-contract.md)** — Umbrella contract from layered MCP server config through admission, connection, live surface exposure, interactive follow-up flows, and recovery. - **[config-layering-policy-and-dedup.md](config-layering-policy-and-dedup.md)** — How layered MCP sources, managed policy, project approval, and cross-source dedup produce the live server set. - **[oauth-step-up-and-client-registration.md](oauth-step-up-and-client-registration.md)** — How MCP OAuth discovery, callback handling, step-up scope requests, and secure reauth behave. +- **[federated-auth-conformance-and-idp-test-seeding.md](federated-auth-conformance-and-idp-test-seeding.md)** — How the federated XAA path, SEP-990 expectations, and mock-IdP token seeding behave as a conformance-oriented auth contract. - **[connection-and-recovery-contract.md](connection-and-recovery-contract.md)** — Transport types, session recovery, and runtime failure boundaries for MCP servers. - **[mcp-surface-state-assembly-and-live-refresh.md](mcp-surface-state-assembly-and-live-refresh.md)** — How connected MCP servers populate and replace session-state tools, commands, skills, and resources, and how `list_changed` refreshes avoid stale slices. - **[channel-servers-and-permission-relay.md](channel-servers-and-permission-relay.md)** — Channel admission gates, structured inbound message injection, and optional MCP-mediated permission relay behavior. diff --git a/integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md b/integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md new file mode 100644 index 0000000..54ed005 --- /dev/null +++ b/integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md @@ -0,0 +1,50 @@ +--- +title: "Federated Auth Conformance and IdP Test Seeding" +owners: [bingran-you] +soft_links: + - /integrations/mcp/oauth-step-up-and-client-registration.md + - /integrations/mcp/server-contract.md + - /platform-services/auth-config-and-policy.md + - /reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md +--- + +# Federated Auth Conformance and IdP Test Seeding + +The federated MCP auth path has a conformance-sensitive verification contract that is more specific than ordinary OAuth flow coverage. The current snapshot shows a dedicated XAA-style path whose testing posture must preserve both wire-level expectations and deterministic mock-IdP entrypoints. + +## Federated auth is not generic OAuth + +Equivalent behavior should preserve: + +- a distinct federated path instead of silently collapsing back to ordinary per-server consent +- stable auth-method expectations where conformance relies on a specific token-exchange style +- persistence of the federated auth server identity and related secure-storage slots across reconnects and refreshes + +The main clean-room point is that this path has stricter interoperability expectations than a generic "browser login happened" check. + +## Mock-IdP seeding is a supported verification seam + +The snapshot shows a direct token-seeding path for cases where the mock identity provider does not serve the full browser authorization surface. + +Equivalent behavior should preserve: + +- a way to inject a pre-obtained federated identity token for controlled verification contexts +- storage of that injected token in the same cache slot the ordinary login path later reads +- expiry handling derived from the token itself when possible, so seeded tokens behave like real cached credentials + +This is an important seam because conformance and e2e runs need a deterministic entrypoint that still exercises the real downstream cache and exchange path. + +## Wire-level conformance expectations + +Equivalent behavior should preserve: + +- token-exchange defaults that match the expected conformance posture unless a server explicitly requires another method +- clear failure when those expectations are violated, instead of silently falling through to a different auth style +- the distinction between configuration-time secure secrets and login-time seeded tokens + +## Failure modes + +- **federated fallback surprise**: the runtime silently uses a different auth path than the server contract intended +- **seed-slot mismatch**: injected test tokens do not land in the same cache identity ordinary auth later reads +- **wire drift**: token exchange changes method or request shape and quietly stops matching conformance expectations +- **fake auth success**: tests seed a token into a bypass-only slot and stop exercising the real federated credential path diff --git a/integrations/mcp/oauth-step-up-and-client-registration.md b/integrations/mcp/oauth-step-up-and-client-registration.md index 6666be6..0eee1ad 100644 --- a/integrations/mcp/oauth-step-up-and-client-registration.md +++ b/integrations/mcp/oauth-step-up-and-client-registration.md @@ -1,7 +1,7 @@ --- title: "OAuth, Step-Up, and Client Registration" owners: [] -soft_links: [/integrations/mcp/server-contract.md, /integrations/mcp/config-layering-policy-and-dedup.md, /platform-services/auth-config-and-policy.md, /integrations/clients/structured-io-and-headless-session-loop.md] +soft_links: [/integrations/mcp/server-contract.md, /integrations/mcp/config-layering-policy-and-dedup.md, /platform-services/auth-config-and-policy.md, /integrations/clients/structured-io-and-headless-session-loop.md, /integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md] --- # OAuth, Step-Up, and Client Registration diff --git a/platform-services/NODE.md b/platform-services/NODE.md index 2d17bfc..2f667d2 100644 --- a/platform-services/NODE.md +++ b/platform-services/NODE.md @@ -24,6 +24,7 @@ Relevant leaves: - **[privacy-level-resolution.md](privacy-level-resolution.md)** — Environment-driven privacy postures and how they suppress telemetry or broader optional traffic across the product. - **[consumer-privacy-policy-flow.md](consumer-privacy-policy-flow.md)** — Cache-first consumer privacy-policy eligibility, reminder timing, `/privacy-settings`, and headless enforcement behavior. - **[settings-change-detection-and-runtime-reload.md](settings-change-detection-and-runtime-reload.md)** — How file-watch and programmatic settings changes are gated, fanned out, and hot-applied across interactive state, headless state, permissions, env, sandbox, and plugin hooks. +- **[settings-schema-compatibility-and-invalid-field-preservation.md](settings-schema-compatibility-and-invalid-field-preservation.md)** — How settings schema evolution stays backward compatible, how invalid or unknown fields are preserved, and how compatibility tests act as the oracle for safe schema changes. - **[usage-analytics-and-migrations.md](usage-analytics-and-migrations.md)** — Telemetry, quotas, updates, and local evolution over time. - **[claude-ai-limits-and-extra-usage-state.md](claude-ai-limits-and-extra-usage-state.md)** — The shared Claude.ai quota state machine, warning and recovery surfaces, `/usage`, `/extra-usage`, and the extra-usage couplings that affect fast mode, prompt caching, and 1M model access. - **[doctor-command-and-health-diagnostics.md](doctor-command-and-health-diagnostics.md)** — How `/doctor`, `claude doctor`, shared install health checks, and persistent diagnostics aggregate warnings across settings, sandboxing, plugins, MCP, and context. diff --git a/platform-services/auth-config-and-policy.md b/platform-services/auth-config-and-policy.md index 1e69d64..d3d3290 100644 --- a/platform-services/auth-config-and-policy.md +++ b/platform-services/auth-config-and-policy.md @@ -1,7 +1,7 @@ --- title: "Auth, Config, and Policy" owners: [] -soft_links: [/integrations/mcp/server-contract.md, /collaboration-and-agents/remote-and-bridge-flows.md] +soft_links: [/integrations/mcp/server-contract.md, /collaboration-and-agents/remote-and-bridge-flows.md, /platform-services/settings-schema-compatibility-and-invalid-field-preservation.md] --- # Auth, Config, and Policy diff --git a/platform-services/mock-rate-limit-scenarios-and-test-contracts.md b/platform-services/mock-rate-limit-scenarios-and-test-contracts.md index d743383..98eaa38 100644 --- a/platform-services/mock-rate-limit-scenarios-and-test-contracts.md +++ b/platform-services/mock-rate-limit-scenarios-and-test-contracts.md @@ -1,7 +1,7 @@ --- title: "Mock Rate Limit Scenarios and Test Contracts" owners: [bingran-you] -soft_links: [/platform-services/claude-ai-limits-and-extra-usage-state.md] +soft_links: [/platform-services/claude-ai-limits-and-extra-usage-state.md, /reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md] native_source: services/mockRateLimits.ts verification_status: native_test_derived --- diff --git a/platform-services/settings-change-detection-and-runtime-reload.md b/platform-services/settings-change-detection-and-runtime-reload.md index 2d5a606..990818d 100644 --- a/platform-services/settings-change-detection-and-runtime-reload.md +++ b/platform-services/settings-change-detection-and-runtime-reload.md @@ -1,7 +1,7 @@ --- title: "Settings Change Detection and Runtime Reload" owners: [] -soft_links: [/platform-services/policy-and-managed-settings-lifecycle.md, /platform-services/sync-and-managed-state.md, /tools-and-permissions/permissions/permission-rule-loading-and-persistence.md, /tools-and-permissions/permissions/sandbox-selection-and-bypass-guards.md, /integrations/plugins/plugin-runtime-contract.md, /ui-and-experience/shell-and-input/keybinding-customization-and-context-resolution.md] +soft_links: [/platform-services/policy-and-managed-settings-lifecycle.md, /platform-services/sync-and-managed-state.md, /tools-and-permissions/permissions/permission-rule-loading-and-persistence.md, /tools-and-permissions/permissions/sandbox-selection-and-bypass-guards.md, /integrations/plugins/plugin-runtime-contract.md, /ui-and-experience/shell-and-input/keybinding-customization-and-context-resolution.md, /platform-services/settings-schema-compatibility-and-invalid-field-preservation.md] --- # Settings Change Detection and Runtime Reload diff --git a/platform-services/settings-schema-compatibility-and-invalid-field-preservation.md b/platform-services/settings-schema-compatibility-and-invalid-field-preservation.md new file mode 100644 index 0000000..1d295bb --- /dev/null +++ b/platform-services/settings-schema-compatibility-and-invalid-field-preservation.md @@ -0,0 +1,53 @@ +--- +title: "Settings Schema Compatibility and Invalid-Field Preservation" +owners: [bingran-you] +soft_links: + - /platform-services/auth-config-and-policy.md + - /platform-services/settings-change-detection-and-runtime-reload.md + - /platform-services/doctor-command-and-health-diagnostics.md + - /reconstruction-guardrails/verification-and-native-test-oracles/test-framework-overview.md +--- + +# Settings Schema Compatibility and Invalid-Field Preservation + +Settings evolution in Claude Code is guarded as a stable product contract, not as an incidental implementation detail. The visible snapshot treats backward compatibility tests as the oracle for safe schema changes and preserves invalid or unknown content rather than rewriting user files into a narrower shape. + +## Compatibility contract + +Equivalent behavior should preserve: + +- additive evolution as the default posture for public settings +- a dedicated compatibility lane guarding settings-file changes +- rejection of breaking schema changes as a product-level regression, not only as a typing concern + +The important design point is that settings compatibility is not left to convention alone. It is explicitly treated as something tests must defend. + +## Invalid and unknown field preservation + +Equivalent behavior should preserve: + +- unknown or invalid settings surviving on disk even when they are not currently usable +- selective filtering of bad sub-rules where the system can safely preserve the rest of the file +- type coercion or passthrough behavior where that protects durable compatibility instead of silently narrowing the accepted shape +- user-facing repair surfaces being able to report problems without having already destroyed the offending data + +This means the system prefers "ignore and report" over "drop and rewrite" for many invalid settings scenarios. + +## Why this belongs in platform services + +This is not just a test note. It shapes: + +- configuration loading behavior +- validation reporting +- hot reload semantics +- migration safety +- doctor and diagnostics surfaces + +The test lane is the oracle, but the contract itself is a platform behavior. + +## Failure modes + +- **breaking-tightening regression**: a schema change makes existing user files invalid without an intentional migration path +- **destructive cleanup**: invalid or unknown fields are removed from disk and users lose the information needed to repair them +- **all-or-nothing rejection**: one malformed rule causes the whole settings file to become unusable when narrower preservation was possible +- **testless drift**: schema evolution continues without compatibility tests defending the user-facing contract diff --git a/platform-services/startup-service-sequencing-and-capability-gates.md b/platform-services/startup-service-sequencing-and-capability-gates.md index f98d26a..a7c1791 100644 --- a/platform-services/startup-service-sequencing-and-capability-gates.md +++ b/platform-services/startup-service-sequencing-and-capability-gates.md @@ -1,7 +1,7 @@ --- title: "Startup Service Sequencing and Capability Gates" owners: [] -soft_links: [/platform-services/interactive-startup-and-project-activation.md, /platform-services/policy-and-managed-settings-lifecycle.md, /platform-services/trust-and-capability-hydration.md, /platform-services/bootstrap-and-service-failures.md, /platform-services/user-settings-sync-contract.md, /platform-services/usage-analytics-and-migrations.md, /integrations/clients/structured-io-and-headless-session-loop.md] +soft_links: [/platform-services/interactive-startup-and-project-activation.md, /platform-services/policy-and-managed-settings-lifecycle.md, /platform-services/trust-and-capability-hydration.md, /platform-services/bootstrap-and-service-failures.md, /platform-services/user-settings-sync-contract.md, /platform-services/usage-analytics-and-migrations.md, /integrations/clients/structured-io-and-headless-session-loop.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md] --- # Startup Service Sequencing and Capability Gates diff --git a/platform-services/usage-analytics-and-migrations.md b/platform-services/usage-analytics-and-migrations.md index 8221b38..ba1dbba 100644 --- a/platform-services/usage-analytics-and-migrations.md +++ b/platform-services/usage-analytics-and-migrations.md @@ -1,7 +1,7 @@ --- title: "Usage, Analytics, and Migrations" owners: [] -soft_links: [/platform-services/startup-service-sequencing-and-capability-gates.md, /platform-services/bootstrap-and-service-failures.md, /platform-services/claude-ai-limits-and-extra-usage-state.md, /product-surface/interaction-modes.md, /runtime-orchestration/state/build-profiles.md] +soft_links: [/platform-services/startup-service-sequencing-and-capability-gates.md, /platform-services/bootstrap-and-service-failures.md, /platform-services/claude-ai-limits-and-extra-usage-state.md, /product-surface/interaction-modes.md, /runtime-orchestration/state/build-profiles.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md] --- # Usage, Analytics, and Migrations @@ -59,6 +59,7 @@ Equivalent behavior should preserve: - heavier telemetry initialization waiting until trust-gated environment/config effects are ready - some telemetry paths deferring further until remote managed settings have loaded, so org-controlled env/config overlays affect telemetry posture before it starts - privacy or essential-traffic posture being able to suppress outbound analytics work without suppressing the whole product +- test posture being able to suppress telemetry and adjacent feedback-style side effects entirely, so automated runs do not emit nonessential traffic or pick up timing noise from support systems - telemetry or diagnostics failure remaining non-fatal to the interactive session ## Local migrations evolve persisted behavior diff --git a/reconstruction-guardrails/NODE.md b/reconstruction-guardrails/NODE.md index 8e58c9b..41313b7 100644 --- a/reconstruction-guardrails/NODE.md +++ b/reconstruction-guardrails/NODE.md @@ -15,3 +15,4 @@ Use these leaves before adding any new knowledge: - **[knowledge-lifecycle.md](knowledge-lifecycle.md)** — How extracted knowledge moves from observation to durable tree state, and how bad knowledge is detected. - **[rebuild-phasing.md](rebuild-phasing.md)** — Implementation order for a clean-room rebuild, including what must ship together. - **[tree-expansion-strategy.md](tree-expansion-strategy.md)** — How this repository should deepen from high-level coverage into reconstruction-ready subsystem contracts over repeated passes. +- **[verification-and-native-test-oracles/](verification-and-native-test-oracles/NODE.md)** — How cross-domain test framework knowledge, fixture oracles, and native-test-derived contract assets should be normalized without becoming a source mirror. diff --git a/reconstruction-guardrails/verification-and-native-test-oracles/NODE.md b/reconstruction-guardrails/verification-and-native-test-oracles/NODE.md new file mode 100644 index 0000000..8127895 --- /dev/null +++ b/reconstruction-guardrails/verification-and-native-test-oracles/NODE.md @@ -0,0 +1,22 @@ +--- +title: "Verification and Native Test Oracles" +owners: [bingran-you] +soft_links: + - /reconstruction-guardrails/source-boundary.md + - /reconstruction-guardrails/knowledge-lifecycle.md + - /platform-services/NODE.md + - /tools-and-permissions/NODE.md + - /integrations/NODE.md +--- + +# Verification and Native Test Oracles + +This subdomain captures cross-cutting knowledge about how the observed Claude Code build verifies itself. It exists because the tree already has domain-owned test contracts, but it still lacked one place to describe the shared verification architecture that spans runtime posture, fixtures, seams, and evidence quality. + +Relevant leaves: + +- **[test-framework-overview.md](test-framework-overview.md)** — The layered shape of the current test system, including the visible tier model and the boundary between confirmed and inferred runner details. +- **[test-environment-fixtures-and-ci-fail-closed-policy.md](test-environment-fixtures-and-ci-fail-closed-policy.md)** — How test posture suppresses side effects, how fixture replay works, and why missing recordings fail closed in CI. +- **[test-seams-reset-hooks-and-injected-dependencies.md](test-seams-reset-hooks-and-injected-dependencies.md)** — The narrow seams the product uses to keep hard behaviors testable without turning the whole runtime into a debug harness. +- **[native-test-derived-asset-provenance-and-acceptance-rules.md](native-test-derived-asset-provenance-and-acceptance-rules.md)** — How native test knowledge should be normalized into clean-room contract assets and how those assets should be linked back to their owning domains. +- **[evidence-levels-and-missing-artifacts.md](evidence-levels-and-missing-artifacts.md)** — What this source snapshot proves, what it only strongly suggests, and which missing artifacts still block exact runner-level reproduction. diff --git a/reconstruction-guardrails/verification-and-native-test-oracles/evidence-levels-and-missing-artifacts.md b/reconstruction-guardrails/verification-and-native-test-oracles/evidence-levels-and-missing-artifacts.md new file mode 100644 index 0000000..812c745 --- /dev/null +++ b/reconstruction-guardrails/verification-and-native-test-oracles/evidence-levels-and-missing-artifacts.md @@ -0,0 +1,49 @@ +--- +title: "Evidence Levels and Missing Artifacts" +owners: [bingran-you] +soft_links: + - /reconstruction-guardrails/source-boundary.md + - /reconstruction-guardrails/knowledge-lifecycle.md + - /reconstruction-guardrails/verification-and-native-test-oracles/test-framework-overview.md +--- + +# Evidence Levels and Missing Artifacts + +This repository should distinguish between what the current source snapshot proves, what it strongly suggests, and what it does not expose yet. + +## Confirmed from the current snapshot + +The snapshot is sufficient to confirm all of these: + +- there are distinct unit or regression, integration, end-to-end, conformance, and compatibility lanes +- `NODE_ENV=test` is a real runtime posture +- fixture and VCR replay are first-class testing mechanisms +- narrow seams such as injected dependencies, exported testing helpers, resets, and test-only helper surfaces are part of the current design + +## Strongly suggested but not fully proven + +The tree can safely treat these as strong signals rather than as closed facts: + +- the TypeScript runner environment is Bun-oriented in at least part of the stack +- repo-level scripts wrap at least some runner commands instead of every lane being invoked directly + +## Still missing for exact runner-level reproduction + +The current snapshot does not fully expose: + +- the top-level repository manifest and script table +- the complete test directory layout +- the full committed fixture corpus +- the CI workflow and any sharding or coverage rules + +Those artifacts are the main blockers for claiming exact reproduction of upstream test plumbing. + +## Clean-room rule + +While those artifacts are missing, the tree should: + +- document the confirmed architecture and tier model +- preserve clear evidence labels for inferred versus confirmed details +- refuse to guess exact runner wiring that the snapshot did not show + +This is a knowledge-quality rule, not a refusal to make progress. The visible framework is already rich enough to guide a clean-room rebuild of the verification architecture itself. diff --git a/reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md b/reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md new file mode 100644 index 0000000..3967433 --- /dev/null +++ b/reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md @@ -0,0 +1,67 @@ +--- +title: "Native-Test-Derived Asset Provenance and Acceptance Rules" +owners: [bingran-you] +soft_links: + - /reconstruction-guardrails/source-boundary.md + - /reconstruction-guardrails/knowledge-lifecycle.md + - /platform-services/mock-rate-limit-scenarios-and-test-contracts.md + - /tools-and-permissions/filesystem-and-shell/sed-command-validation-contracts.md + - /tools-and-permissions/permissions/yolo-classifier-contracts.md +--- + +# Native-Test-Derived Asset Provenance and Acceptance Rules + +The tree already contains subsystem leaves whose primary value comes from upstream-native test knowledge. That pattern is useful, but it needs guardrails so the tree stays a reconstruction spec instead of drifting into source mirroring. + +## Current pattern + +The current repo already uses domain-owned leaves for native-test-derived assets such as: + +- rate-limit mock scenario contracts +- sed command validation contracts +- YOLO classifier contracts + +This is the right ownership model: the acceptance oracle lives with the subsystem that owns the behavior. + +## Provenance rules + +When a leaf is derived primarily from upstream-native tests or testing-oriented helpers, it should preserve: + +- the owning concern domain +- explicit provenance markers such as `native_source` where that is clear and useful +- a verification marker such as `verification_status: native_test_derived` when the leaf is intentionally restating upstream test oracles +- behavior and acceptance guidance, not copied prompts, copied code, or repo-internal execution trivia + +## What these leaves should contain + +A good native-test-derived leaf should usually include: + +- scope boundary +- the contract or family of contracts being protected +- important state or parser edge cases +- the likely reconstruction mistakes another team would make without this oracle +- acceptance criteria when the evidence is strong enough to phrase them safely + +## What they should not become + +These leaves should not become: + +- hidden inventories of upstream test files +- runner-specific setup notes that are only useful inside the original repo +- copied schemas or copied implementation detail whose only purpose is literal reimplementation + +## Relationship to the broader framework + +The broader test framework docs in this subdomain explain cross-cutting posture, fixture strategy, and seam design. + +Native-test-derived leaves answer a different question: + +- what exact nuanced behaviors did upstream consider important enough to defend with tests? + +Both layers are necessary. The framework without the oracles is too generic. The oracles without the framework feel isolated and accidental. + +## Failure modes + +- **orphaned oracle**: a native-test-derived leaf has no framework links and reads like trivia +- **source-shaped leakage**: a contract leaf starts copying implementation detail instead of restating behavior +- **ownership drift**: test-derived assets collect in one generic place and stop reinforcing the domain that actually owns the behavior diff --git a/reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md b/reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md new file mode 100644 index 0000000..b501871 --- /dev/null +++ b/reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md @@ -0,0 +1,74 @@ +--- +title: "Test Environment, Fixtures, and CI Fail-Closed Policy" +owners: [bingran-you] +soft_links: + - /platform-services/startup-service-sequencing-and-capability-gates.md + - /platform-services/usage-analytics-and-migrations.md + - /integrations/clients/structured-io-and-headless-session-loop.md + - /runtime-orchestration/turn-flow/api-request-assembly-retry-and-prompt-cache-stability.md +--- + +# Test Environment, Fixtures, and CI Fail-Closed Policy + +`NODE_ENV=test` is a real runtime posture in the current Claude Code build. It suppresses nonessential side effects, avoids certain expensive or cyclic startup work, and activates deterministic fixture replay for API-dependent tests. + +## Test posture is system-wide, not local + +The visible contract is that test mode can suppress or simplify behaviors such as: + +- telemetry and feedback-like side effects +- exit-time bookkeeping that would otherwise write or emit support data +- selected startup enrichments or background loops whose value is production-only +- environment-sensitive helpers that would otherwise introduce nondeterministic noise + +The important point is not one specific branch. It is that the runtime treats tests as a supported operating posture with different side-effect rules. + +## Fixture replay is a first-class oracle + +The snapshot exposes a VCR-style replay layer for API-dependent behavior. + +That layer preserves: + +- explicit activation in test posture +- hash-based fixture naming from normalized inputs +- replay from a configurable fixture root +- rehydration back into runtime-shaped results rather than raw text blobs +- input dehydration and path normalization so equivalent tests keep hitting the same recordings across machines + +## CI must fail closed on missing recordings + +Equivalent behavior should preserve: + +- missing fixtures failing the run in CI by default +- explicit opt-in recording refresh instead of silent fixture regeneration +- a clear distinction between replay mode and record mode + +This is one of the most important stability contracts in the visible framework. It keeps network-backed tests deterministic and makes fixture refresh a deliberate maintenance act. + +## Transcript and hash stability matter + +The broader runtime also treats transcript shape as part of fixture stability. + +Equivalent behavior should preserve: + +- careful normalization before hashing +- avoidance of unnecessary transcript-shape churn in replay-sensitive flows +- deterministic identity or placeholder handling where raw runtime IDs would otherwise destabilize recordings + +The visible testing architecture therefore depends on transcript semantics, not only on a file cache. + +## Reconstruction rule + +If a clean-room rebuild keeps external API-backed tests, it should preserve all of these: + +- a dedicated test posture +- deterministic fixture hashing and hydration +- fail-closed CI behavior for missing recordings +- explicit recording refresh + +## Failure modes + +- **test-production blur**: automated tests still emit nonessential production side effects +- **machine-bound fixtures**: path, cwd, or tempdir differences cause needless cache misses +- **silent CI rewrite**: missing fixtures regenerate during CI and hide behavioral drift +- **hash instability**: transcript or input normalization changes break recordings even when behavior did not meaningfully change diff --git a/reconstruction-guardrails/verification-and-native-test-oracles/test-framework-overview.md b/reconstruction-guardrails/verification-and-native-test-oracles/test-framework-overview.md new file mode 100644 index 0000000..086c524 --- /dev/null +++ b/reconstruction-guardrails/verification-and-native-test-oracles/test-framework-overview.md @@ -0,0 +1,78 @@ +--- +title: "Test Framework Overview" +owners: [bingran-you] +soft_links: + - /reconstruction-guardrails/verification-and-native-test-oracles/test-environment-fixtures-and-ci-fail-closed-policy.md + - /reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md + - /reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md + - /platform-services/mock-rate-limit-scenarios-and-test-contracts.md + - /tools-and-permissions/filesystem-and-shell/sed-command-validation-contracts.md + - /tools-and-permissions/permissions/yolo-classifier-contracts.md + - /platform-services/settings-schema-compatibility-and-invalid-field-preservation.md + - /integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md +--- + +# Test Framework Overview + +The current Claude Code snapshot does not expose one self-contained `tests/` or runner manifest that answers everything. What it does expose is a layered testing architecture that spans runtime posture, fixtures, dedicated end-to-end harnesses, conformance-sensitive auth flows, and domain-owned contract oracles. + +## Confirmed layers + +The snapshot clearly shows all of these verification layers: + +- a script-wrapped suite entry layer, because at least one compatibility contract is tied to a named `npm run test:file ...` path rather than to a raw helper invocation +- ordinary module-level regression lanes, including `.test.ts`-style coverage +- integration lanes, including `.int.test.ts` behavior for cross-component runtime state +- end-to-end coverage for permission prompts and remote-control plumbing +- conformance-sensitive auth coverage for federated MCP and XAA-style flows +- runtime test posture via `NODE_ENV=test` +- fixture and VCR-style replay for API-dependent scenarios +- module-state isolation through exported reset, seed, and cleanup helpers for caches, watchers, registries, and other sticky services +- domain-owned contract assets derived from upstream-native tests + +## Stable tier model + +A faithful rebuild should preserve these tiers as distinct concerns: + +- fast unit and regression feedback +- integration tests for service sequencing, settings state, and resume-sensitive runtime interactions +- end-to-end coverage for transport, auth proxy, permission UI, and remote-local handoff +- conformance coverage where wire-level or provider-level expectations matter +- compatibility tests for durable public file formats such as settings + +Collapsing all of those into one broad suite would lose one of the main architectural signals in the current product: different behaviors are protected by different oracles. + +## Runner boundary + +The tree can safely claim: + +- there is a script-oriented entry layer +- the product code is written to coexist with a Bun-flavored module-mocking environment +- the visible framework depends on more than a generic "run tests" command + +The tree should not overclaim: + +- the exact full upstream runner manifest +- the complete CI orchestration or sharding plan +- the full top-level command matrix for every lane + +Those details remain partially hidden in this snapshot and are tracked separately in [evidence-levels-and-missing-artifacts.md](evidence-levels-and-missing-artifacts.md). + +## Reconstruction rule + +Equivalent implementations should preserve the verification architecture itself: + +- separate tiers with different speed and realism tradeoffs +- deterministic test posture rather than production side effects leaking into automated runs +- fixture-backed replay where live external dependencies would otherwise make tests flaky +- narrow seams for stateful or transport-sensitive code +- reliable reset and teardown for singleton or watcher state that would otherwise bleed across lanes +- domain-owned acceptance oracles for nuanced behavior contracts + +## Failure modes + +- **tier collapse**: all behaviors are tested through one slow or one shallow lane +- **runner overclaim**: the rebuild hardcodes unconfirmed upstream runner plumbing as if it were observed fact +- **fixture blindness**: API-dependent flows lose deterministic replay and become network-coupled +- **state bleed**: caches, timers, registries, or persistent singleton state leak across tests and make failures order-dependent +- **oracle drift**: domain-specific edge-case contracts stop being represented after the broad framework is documented diff --git a/reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md b/reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md new file mode 100644 index 0000000..0b6568b --- /dev/null +++ b/reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md @@ -0,0 +1,101 @@ +--- +title: "Test Seams, Reset Hooks, and Injected Dependencies" +owners: [bingran-you] +soft_links: + - /tools-and-permissions/tool-catalog/tool-families.md + - /tools-and-permissions/tool-catalog/tool-pool-assembly.md + - /ui-and-experience/dialogs-and-approvals/permission-prompt-shell-and-worker-states.md + - /integrations/clients/ssh-remote-session-and-auth-proxy.md + - /integrations/clients/structured-io-and-headless-session-loop.md + - /integrations/mcp/federated-auth-conformance-and-idp-test-seeding.md + - /platform-services/settings-change-detection-and-runtime-reload.md + - /runtime-orchestration/sessions/session-artifacts-and-sharing.md +--- + +# Test Seams, Reset Hooks, and Injected Dependencies + +The current Claude Code build does not rely only on coarse top-down black-box tests. It also exposes narrow, product-owned seams that make stateful or transport-sensitive behavior testable without turning the runtime into a generic debug shell. + +## Seam families visible in the snapshot + +The snapshot shows several recurring seam patterns: + +- targeted dependency injection where module-spy boilerplate would otherwise be brittle or cyclic +- helper functions explicitly exported for testing, especially around parsing, serialization, cache placement, and runtime edge behavior +- reset or clear hooks for stateful services and caches +- admission-sensitive helper surfaces that only exist under test posture +- local harness modes that exercise real remote or auth plumbing without requiring the full external environment + +## Why the seams matter + +These seams are not random internal conveniences. They reveal the kinds of behavior the product itself considers hard to validate otherwise: + +- permission prompts and approval surfaces +- auth-proxy or transport wiring +- stateful caches and watchers +- parser and serializer edge cases +- resume- and transcript-sensitive flows + +## Resettable singleton state is part of the seam contract + +The source snapshot repeatedly exposes reset or seed helpers for long-lived runtime state. That is part of the test framework, not merely local cleanup style. + +Equivalent behavior should preserve: + +- explicit reset or teardown hooks for sticky services such as telemetry clients, settings watchers, session artifacts, registries, probe caches, and async startup promises +- awaitable cleanup where filesystem watchers, polling timers, or delayed work could otherwise outlive the test that created them +- the ability to seed controlled state through those hooks when the ordinary startup path would be too expensive, too flaky, or feature-gated away under the test runner +- reset boundaries that are narrow enough to isolate one subsystem without forcing a whole-process restart for every regression lane + +Without this layer, the same product architecture becomes much harder to test deterministically because singleton state starts leaking across cases and shards. + +## Production-like harnesses beat bypass-only fakes + +The strongest seams in the snapshot still drive the real downstream contract instead of writing into a disposable mock-only side channel. + +Equivalent behavior should preserve: + +- approval-oriented harnesses that still surface the normal permission dialog and permission-decision pipeline +- transport-oriented harnesses that can exercise remote auth proxying and session plumbing without requiring a real remote host for every run +- federated-auth harnesses that can inject deterministic credentials into the same secure-storage slots the ordinary login and refresh paths later read +- resume- and transcript-oriented setters that operate on the same session artifact model and ingress plumbing production uses + +This is the important distinction between a useful seam and a fake e2e shortcut: the seam shortens setup cost, but it should still validate the real runtime path on the other side. + +## Feature-gated flows still need narrow test entrypoints + +Some runtime capabilities are gated away in ordinary test posture or non-production builds. The visible framework compensates with small, purpose-built hooks rather than by widening those product features permanently. + +Equivalent behavior should preserve: + +- narrow helper entrypoints for feature-gated services whose normal boot path is unavailable under the test runner +- test access that restores just enough state to exercise the downstream behavior, rather than globally disabling capability gates everywhere +- a clear difference between test harness admission and end-user feature exposure + +## Scope discipline + +Equivalent behavior should preserve the discipline of the current seams: + +- they should stay narrow and purpose-built +- they should still exercise the real runtime contract +- they should not become broad backdoors that bypass ordinary policy or orchestration logic + +One visible example of this discipline is the difference between a test-only permission probe and an ordinary user-facing tool family. The former is a verification harness, not a product capability surface. + +## Reconstruction rule + +A clean-room rebuild should keep: + +- some form of dependency seam for core flows that would otherwise require invasive module spying +- explicit reset hooks for sticky state +- at least one approval-oriented end-to-end harness +- at least one transport-oriented local harness for remote or federated flows +- harness entrypoints that reuse production storage, permission, and session channels instead of writing into bypass-only test slots + +## Failure modes + +- **black-box rigidity**: everything must be tested through the very top of the product, so failures become slow and hard to isolate +- **seam sprawl**: helper hooks grow into broad escape hatches that stop validating the real runtime +- **hidden state bleed**: caches, watchers, or registries persist across tests with no reliable reset path +- **gate blindness**: a rebuild turns off product gates globally in tests and stops exercising the real admission logic +- **fake e2e**: remote or approval flows are "tested" only through mocks and stop covering real orchestration diff --git a/tools-and-permissions/filesystem-and-shell/sed-command-validation-contracts.md b/tools-and-permissions/filesystem-and-shell/sed-command-validation-contracts.md index 5331343..aca5338 100644 --- a/tools-and-permissions/filesystem-and-shell/sed-command-validation-contracts.md +++ b/tools-and-permissions/filesystem-and-shell/sed-command-validation-contracts.md @@ -1,7 +1,7 @@ --- title: "Sed Command Validation Contracts" owners: [bingran-you] -soft_links: [/tools-and-permissions/filesystem-and-shell/shell-command-parsing-and-classifier-flow.md, /tools-and-permissions/permissions/permission-decision-pipeline.md] +soft_links: [/tools-and-permissions/filesystem-and-shell/shell-command-parsing-and-classifier-flow.md, /tools-and-permissions/permissions/permission-decision-pipeline.md, /reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md] native_source: tools/BashTool/sedValidation.ts verification_status: native_test_derived --- diff --git a/tools-and-permissions/permissions/yolo-classifier-contracts.md b/tools-and-permissions/permissions/yolo-classifier-contracts.md index f42de36..48cf209 100644 --- a/tools-and-permissions/permissions/yolo-classifier-contracts.md +++ b/tools-and-permissions/permissions/yolo-classifier-contracts.md @@ -1,7 +1,7 @@ --- title: "YOLO Classifier Contracts" owners: [bingran-you] -soft_links: [/tools-and-permissions/permissions/permission-decision-pipeline.md, /tools-and-permissions/permissions/permission-model.md] +soft_links: [/tools-and-permissions/permissions/permission-decision-pipeline.md, /tools-and-permissions/permissions/permission-model.md, /reconstruction-guardrails/verification-and-native-test-oracles/native-test-derived-asset-provenance-and-acceptance-rules.md] native_source: utils/permissions/yoloClassifier.ts verification_status: native_test_derived --- diff --git a/tools-and-permissions/tool-catalog/tool-families.md b/tools-and-permissions/tool-catalog/tool-families.md index f4be066..9253d10 100644 --- a/tools-and-permissions/tool-catalog/tool-families.md +++ b/tools-and-permissions/tool-catalog/tool-families.md @@ -1,7 +1,7 @@ --- title: "Tool Families" owners: [] -soft_links: [/integrations/mcp/server-contract.md, /runtime-orchestration/tasks/task-model.md, /tools-and-permissions/agent-and-task-control/control-plane-tools.md, /tools-and-permissions/agent-and-task-control/task-and-team-control-tool-contracts.md, /tools-and-permissions/permissions/config-permission-and-sandbox-admin-surfaces.md] +soft_links: [/integrations/mcp/server-contract.md, /runtime-orchestration/tasks/task-model.md, /tools-and-permissions/agent-and-task-control/control-plane-tools.md, /tools-and-permissions/agent-and-task-control/task-and-team-control-tool-contracts.md, /tools-and-permissions/permissions/config-permission-and-sandbox-admin-surfaces.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md] --- # Tool Families @@ -27,3 +27,5 @@ Across all families, tools should share a common contract: - results that can be stitched back into the same conversation turn The important reconstruction point is that not all tools act on the outside world. Some mutate the runtime's own work graph, settings, or approval posture, and those families still need the same schema, observability, and safety guarantees. + +Helper or test-only tools can still exist in the catalog, but they should be treated as admission- and posture-sensitive helper surfaces rather than as a separate public product family. diff --git a/tools-and-permissions/tool-catalog/tool-pool-assembly.md b/tools-and-permissions/tool-catalog/tool-pool-assembly.md index e3f08ae..5bc3f3e 100644 --- a/tools-and-permissions/tool-catalog/tool-pool-assembly.md +++ b/tools-and-permissions/tool-catalog/tool-pool-assembly.md @@ -1,7 +1,7 @@ --- title: "Tool Pool Assembly" owners: [] -soft_links: [/integrations/mcp/connection-and-recovery-contract.md, /tools-and-permissions/permissions/permission-model.md, /runtime-orchestration/turn-flow/turn-assembly-and-recovery.md] +soft_links: [/integrations/mcp/connection-and-recovery-contract.md, /tools-and-permissions/permissions/permission-model.md, /runtime-orchestration/turn-flow/turn-assembly-and-recovery.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md] --- # Tool Pool Assembly @@ -32,6 +32,7 @@ This layered assembly is essential because "which tools exist" is part of the se - built-in tools should win on name collisions with extension tools - server-prefix deny rules must be able to remove an entire MCP namespace - special helper tools may exist in the runtime without being exposed like ordinary user tools +- test-only or compatibility helper tools may be admitted only under narrow postures such as test mode, but they should still flow through the same catalog, enablement, and exposure rules instead of bypassing tool-pool assembly entirely - some surfaces need the merged tool set, while other logic only needs built-ins - a rebuild should preserve the distinction between the exhaustive catalog and the currently exposed pool diff --git a/ui-and-experience/dialogs-and-approvals/permission-prompt-shell-and-worker-states.md b/ui-and-experience/dialogs-and-approvals/permission-prompt-shell-and-worker-states.md index 73237c7..749b7d0 100644 --- a/ui-and-experience/dialogs-and-approvals/permission-prompt-shell-and-worker-states.md +++ b/ui-and-experience/dialogs-and-approvals/permission-prompt-shell-and-worker-states.md @@ -1,7 +1,7 @@ --- title: "Permission Prompt Shell and Worker States" owners: [] -soft_links: [/ui-and-experience/feedback-and-notifications/interaction-feedback.md, /tools-and-permissions/permissions/permission-model.md, /tools-and-permissions/permissions/permission-decision-pipeline.md, /tools-and-permissions/filesystem-and-shell/shell-command-parsing-and-classifier-flow.md, /collaboration-and-agents/teammate-mailbox-and-permission-bridge.md, /runtime-orchestration/tasks/monitor-task-families-and-watch-lifecycle.md] +soft_links: [/ui-and-experience/feedback-and-notifications/interaction-feedback.md, /tools-and-permissions/permissions/permission-model.md, /tools-and-permissions/permissions/permission-decision-pipeline.md, /tools-and-permissions/filesystem-and-shell/shell-command-parsing-and-classifier-flow.md, /collaboration-and-agents/teammate-mailbox-and-permission-bridge.md, /runtime-orchestration/tasks/monitor-task-families-and-watch-lifecycle.md, /reconstruction-guardrails/verification-and-native-test-oracles/test-seams-reset-hooks-and-injected-dependencies.md] --- # Permission Prompt Shell and Worker States @@ -26,6 +26,7 @@ Equivalent behavior should preserve: - file edit and file write requests using diff-oriented file dialogs, while glob, grep, and file-read requests share a filesystem prompt - bash and PowerShell each using shell-specific approval surfaces rather than a generic tool card - enter-plan and exit-plan requests using plan-specific renderers, and notebook edit, web fetch, skill, and ask-user requests each getting their own specialized surface +- approval-oriented end-to-end harnesses reusing that same router and dialog shell, so a test-only approval probe still validates ordinary prompt behavior instead of a separate fake UI - review-artifact, workflow, and monitor permission UIs being feature-gated and falling back to the generic renderer when their specialized implementation is unavailable - unknown tools still rendering through a safe fallback prompt instead of failing to surface approval state