diff --git a/.changeset/workflow-audit-skill.md b/.changeset/workflow-audit-skill.md
new file mode 100644
index 0000000000..586c3ae2ba
--- /dev/null
+++ b/.changeset/workflow-audit-skill.md
@@ -0,0 +1,5 @@
+---
+"workflow": patch
+---
+
+Add `workflow-audit` review skill and update skill surface validation and docs
diff --git a/.changeset/workflow-skill-runtime-gate.md b/.changeset/workflow-skill-runtime-gate.md
new file mode 100644
index 0000000000..52c2d8e6ee
--- /dev/null
+++ b/.changeset/workflow-skill-runtime-gate.md
@@ -0,0 +1,5 @@
+---
+"workflow": patch
+---
+
+Add runtime verification for workflow skill golden fixtures
diff --git a/.changeset/workflow-skills-blueprints.md b/.changeset/workflow-skills-blueprints.md
new file mode 100644
index 0000000000..42eaa2444a
--- /dev/null
+++ b/.changeset/workflow-skills-blueprints.md
@@ -0,0 +1,6 @@
+---
+'workflow': patch
+'@workflow/core': patch
+---
+
+Align workflow skills docs and README with two-stage teach-then-build contract
diff --git a/.gitignore b/.gitignore
index ee38f9164c..89f769d12d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,9 @@ packages/swc-plugin-workflow/build-hash.json
# SWC plugin cache
.swc
+# Workflow vitest compiler output
+.workflow-vitest
+
# claude local settings
.claude/settings.local.json
@@ -37,6 +40,9 @@ packages/swc-plugin-workflow/build-hash.json
.DS_Store
+# Generated workflow context file (created by workflow-teach skill)
+.workflow.md
+
# Generated manifest files copied to static asset directories by builders
workbench/nextjs-*/public/.well-known/workflow
workbench/sveltekit/static/.well-known/workflow
diff --git a/docs/content/docs/getting-started/meta.json b/docs/content/docs/getting-started/meta.json
index d0e189dd73..fcd2350082 100644
--- a/docs/content/docs/getting-started/meta.json
+++ b/docs/content/docs/getting-started/meta.json
@@ -9,7 +9,8 @@
"nitro",
"nuxt",
"sveltekit",
- "vite"
+ "vite",
+ "workflow-skills"
],
"defaultOpen": true
}
diff --git a/docs/content/docs/getting-started/workflow-skills.mdx b/docs/content/docs/getting-started/workflow-skills.mdx
new file mode 100644
index 0000000000..7cca554cda
--- /dev/null
+++ b/docs/content/docs/getting-started/workflow-skills.mdx
@@ -0,0 +1,388 @@
+---
+title: Workflow Skills
+description: Use AI skills to design and build durable workflows through a two-stage teach-then-build loop.
+type: guide
+summary: Use AI skills to capture project context and build workflows interactively.
+prerequisites:
+ - /docs/getting-started
+related:
+ - /docs/foundations/workflows-and-steps
+ - /docs/testing
+ - /docs/api-reference/workflow-vitest
+---
+
+Workflow skills are AI-assisted commands that guide you through creating durable
+workflows. Start from a specific problem with a scenario command, or use the
+two-stage loop to capture your project context once and build correct workflows
+interactively.
+
+## Scenario Commands: Start from What You're Building
+
+If you know what kind of workflow you need, start here:
+
+| Command | What it builds |
+|---------|---------------|
+| `/workflow-approval` | Approval with expiry, escalation, and deterministic hooks |
+| `/workflow-webhook` | External webhook ingestion with duplicate handling and compensation |
+| `/workflow-saga` | Partial-success side effects and compensation |
+| `/workflow-timeout` | Correctness depends on sleep/wake-up behavior |
+| `/workflow-idempotency` | Retries and replay can duplicate effects |
+| `/workflow-observe` | Operators need progress streams and terminal signals |
+
+Scenario commands reuse `.workflow.md` when present and fall back to a focused
+context capture when not. They apply domain-specific guardrails and terminate
+with the same `verification_plan_ready` contract as `/workflow-build`.
+
+### Example Prompts
+
+```
+/workflow-saga reserve inventory, charge payment, compensate on shipping failure
+```
+
+```
+/workflow-timeout wait 24h for approval, then expire
+```
+
+```
+/workflow-idempotency make duplicate webhook delivery safe
+```
+
+```
+/workflow-observe stream operator progress and final status
+```
+
+## Review Existing Workflows
+
+When you already have workflow code and need to assess correctness before changing it, run:
+
+```
+/workflow-audit
+```
+
+For example:
+
+> Audit our purchase approval workflow for timeout holes, replay safety, and missing suspension tests.
+
+The audit skill reads `.workflow.md` when present, inspects the workflow code and tests,
+scores 12 durable-workflow checks, tags issues P0-P3, and recommends the single best next skill.
+
+## The Manual Path: Teach, Then Build
+
+For workflows that don't fit a scenario command, use the two-stage loop:
+
+
+ Workflow skills require an AI coding assistant that supports user-invocable
+ skills, such as [Claude Code](https://claude.ai/code) or
+ [Cursor](https://cursor.com).
+
+
+### Two-Stage Loop: Teach, Then Build
+
+| Stage | Command | Purpose | Output |
+|-------|---------|---------|--------|
+| 1 | `/workflow-teach` | Capture project context | `.workflow.md` |
+| 2 | `/workflow-build` | Build workflow code interactively | TypeScript code + tests |
+
+The `workflow` skill is an always-on API reference available at any point.
+
+
+
+
+## Install the Skills Bundle
+
+Copy the skills into your project. Choose the bundle that matches your AI tool:
+
+**Claude Code:**
+
+```bash
+cp -r node_modules/workflow/dist/workflow-skills/claude-code/.claude/skills/* .claude/skills/
+```
+
+**Cursor:**
+
+```bash
+cp -r node_modules/workflow/dist/workflow-skills/cursor/.cursor/skills/* .cursor/skills/
+```
+
+After copying, you should see 11 skill directories:
+
+- **Core skills:** `workflow` (always-on reference), `workflow-teach` (stage 1),
+ and `workflow-build` (stage 2)
+- **Scenario skills:** `workflow-approval` (approval with expiry and escalation),
+ `workflow-webhook` (webhook ingestion with duplicate handling),
+ `workflow-saga` (multi-step compensation),
+ `workflow-timeout` (expiry and wake-up correctness),
+ `workflow-idempotency` (replay-safe side effects),
+ and `workflow-observe` (operator streams and terminal signals)
+- **Review skill:** `workflow-audit` (score an existing workflow or design before touching code)
+- **Optional helper:** `workflow-init` (first-time project setup before
+ `workflow` is installed as a dependency)
+
+
+
+
+## Teach Your Project Context
+
+Run `/workflow-teach` to start an interactive interview that captures your
+project's domain knowledge:
+
+```bash
+/workflow-teach
+```
+
+The skill scans your repository and asks about:
+
+- **Trigger surfaces** — API routes, webhooks, queues, cron jobs
+- **External systems** — databases, payment providers, notification services
+- **Business invariants** — rules that must never be violated
+- **Idempotency requirements** — which operations must be safe to retry
+- **Timeout and approval rules** — human-in-the-loop constraints
+- **Compensation rules** — what to undo when later steps fail
+- **Observability needs** — what operators need to see in logs and streams
+
+The output is saved to `.workflow.md` in the project root — a plain-English
+markdown file containing project context, business rules, failure expectations,
+and approved patterns. This file is git-ignored and stays local to your checkout.
+
+
+
+
+## Build a Workflow
+
+Run `/workflow-build` and describe the workflow you want to create:
+
+```bash
+/workflow-build
+```
+
+For example:
+
+> Build a workflow that routes purchase orders for manager approval, escalates
+> to a director after 48 hours, and auto-rejects after a further 24 hours.
+
+The build skill reads `.workflow.md` and walks through six interactive phases:
+
+1. **Propose step boundaries** — which functions need `"use workflow"` vs `"use step"`, suspension points, stream requirements
+2. **Flag relevant traps** — run a 12-point stress checklist against the design
+3. **Decide failure modes** — `FatalError` vs `RetryableError`, idempotency strategies, compensation plans
+4. **Write code + tests** — produce the workflow file and integration tests
+5. **Self-review** — run the stress checklist again against the generated code
+6. **Verification summary** — emit a structured verification artifact and a single-line `verification_plan_ready` event for machine consumption
+
+Each phase waits for your confirmation before proceeding.
+
+
+
+
+
+## Persisted Artifacts
+
+The skill loop produces two categories of artifacts:
+
+| Category | Artifact | Path | Owner |
+|----------|----------|------|-------|
+| Skill-managed | Project context | `.workflow.md` | `workflow-teach` |
+| Host-managed | Project context (JSON) | `.workflow-skills/context.json` | Host runtime |
+| Host-managed | Workflow blueprint | `.workflow-skills/blueprints/.json` | Host runtime |
+| Host-managed | Verification plan | `.workflow-skills/verification/.json` | Host runtime |
+
+### Skill-managed: `.workflow.md`
+
+Written directly by `workflow-teach`. A plain-English markdown file containing
+project context, business rules, failure expectations, and approved patterns.
+This is the primary bridge between teach and build — `workflow-build` reads
+this file to inform step boundaries, failure modes, and test coverage.
+
+### Host-managed: `.workflow-skills/*.json`
+
+The `.workflow-skills/` directory contains machine-readable companion artifacts
+managed by the host runtime or persistence layer — not by the skill prompts
+themselves. The skill text never references these JSON paths directly; instead,
+the host extracts structured data from the skill conversation and persists it
+for agent consumption. This separation keeps the skill prompts focused on
+human-readable guidance while enabling programmatic queries against the JSON
+artifacts.
+
+- **`context.json`** — structured project context derived from `workflow-teach`
+- **`blueprints/.json`** — workflow blueprint with step boundaries, suspension points, and trap analysis
+- **`verification/.json`** — verification plan with files, test matrix, runtime commands, and implementation notes
+
+The `files` array must list only files that are actually produced. Add the `route` entry only when a route file is generated.
+
+Example verification plan:
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "approval-expiry-escalation",
+ "files": [
+ { "kind": "workflow", "path": "workflows/approval-expiry-escalation.ts" },
+ { "kind": "test", "path": "workflows/approval-expiry-escalation.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path",
+ "helpers": [],
+ "expects": "Workflow completes successfully"
+ },
+ {
+ "name": "hook-suspension",
+ "helpers": ["waitForHook", "resumeHook"],
+ "expects": "Workflow resumes from hook"
+ },
+ {
+ "name": "sleep-suspension",
+ "helpers": ["waitForSleep", "wakeUp"],
+ "expects": "Workflow resumes after sleep"
+ }
+ ],
+ "runtimeCommands": [
+ {
+ "name": "typecheck",
+ "command": "pnpm typecheck",
+ "expects": "No TypeScript errors"
+ },
+ {
+ "name": "test",
+ "command": "pnpm test",
+ "expects": "All repository tests pass"
+ },
+ {
+ "name": "focused-workflow-test",
+ "command": "pnpm vitest run workflows/approval-expiry-escalation.integration.test.ts",
+ "expects": "approval-expiry-escalation integration tests pass"
+ }
+ ],
+ "implementationNotes": [
+ "Invariant: A purchase order must receive exactly one final decision: approved, rejected, or auto-rejected",
+ "Invariant: Escalation must only trigger after the primary approval window expires",
+ "Operator signal: Log approval.requested with PO number and assigned manager",
+ "Operator signal: Log approval.escalated with PO number and director"
+ ]
+}
+```
+
+## The `.workflow.md` Bridge
+
+Written by `workflow-teach`, read by `workflow-build`, `.workflow.md` contains:
+
+| Section | Contents |
+|---------|----------|
+| Project Context | What the project does and why it needs durable workflows |
+| Business Rules | Invariants, idempotency requirements, domain constraints |
+| External Systems | Third-party services, trigger surfaces, rate limits |
+| Failure Expectations | Permanent vs retryable failures, timeouts, compensation rules |
+| Observability Needs | What operators and UIs need to see |
+| Approved Patterns | Anti-patterns relevant to this project's workflow surfaces |
+| Open Questions | Gaps that `workflow-build` will surface again |
+
+## Hero Scenario: Approval Expiry Escalation
+
+The approval-expiry-escalation scenario is the recommended first workflow to
+build with the skill loop. It exercises the hardest patterns in a single flow:
+
+| Pattern | How It Appears |
+|---------|---------------|
+| Human-in-the-loop approval | Manager and director hooks |
+| Timeout handling | 48h and 24h sleep suspensions |
+| Escalation logic | `Promise.race` between hook and sleep |
+| Idempotency | Every side-effecting step has an idempotency key |
+| Deterministic tokens | Hook tokens derived from the PO number |
+| Observability | Operator signals cover the full approval lifecycle |
+
+Run `/workflow-teach` first, then `/workflow-build` with the approval scenario
+prompt to walk through the full loop.
+
+## Stress Checklist
+
+The build skill runs this 12-point checklist twice — once against your proposed
+design and once against the generated code:
+
+1. Determinism boundary
+2. Step granularity
+3. Pass-by-value / serialization
+4. Hook token strategy
+5. Webhook response mode
+6. `start()` placement
+7. Stream I/O placement
+8. Idempotency keys
+9. Retry semantics
+10. Rollback / compensation
+11. Observability streams
+12. Integration test coverage
+
+## Inspect Build Output
+
+The workflow-skills builder emits structured JSON logs on stderr and a JSON
+manifest on stdout. Redirect them to inspect build state programmatically:
+
+```bash
+pnpm build:workflow-skills > /tmp/workflow-skills-manifest.json 2> /tmp/workflow-skills-build.log
+
+echo 'manifest summary'
+cat /tmp/workflow-skills-manifest.json | jq '{providers, totalOutputs}'
+
+echo 'first 3 structured log events'
+head -n 3 /tmp/workflow-skills-build.log | jq
+```
+
+Expected output shape:
+
+```json
+{ "providers": ["claude-code", "cursor"], "totalOutputs": 52 }
+```
+
+```
+{"event":"start","ts":"2026-03-27T16:41:23.035Z","mode":"build"}
+{"event":"skills_discovered","ts":"2026-03-27T16:41:23.120Z","count":11,"scenarioCount":6}
+{"event":"plan_computed","ts":"2026-03-27T16:41:23.240Z","totalOutputs":52,"providerCount":2,"skillCount":11,"scenarioCount":6,"goldensPerProvider":15,"outputsPerProvider":26}
+```
+
+**Verification commands**
+
+```bash
+node scripts/build-workflow-skills.mjs --check | jq '{skillSurface, totalOutputs}'
+pnpm vitest run workbench/vitest/test/workflow-skill-bundle-parity.test.ts
+pnpm vitest run workbench/vitest/test/workflow-skills-docs-contract.test.ts
+```
+
+Expected summary
+
+```json
+{
+ "skillSurface": {
+ "counts": {
+ "skills": 11,
+ "scenarios": 6,
+ "goldensPerProvider": 15,
+ "providers": 2,
+ "outputsPerProvider": 26,
+ "totalOutputs": 52
+ }
+ },
+ "totalOutputs": 52
+}
+```
+
+## Inspect Validation Output
+
+The validator emits structured JSON logs on stderr and a machine-readable result
+on stdout, even when validation fails.
+
+```bash
+node scripts/validate-workflow-skill-files.mjs > /tmp/workflow-skills-validate.json 2> /tmp/workflow-skills-validate.log || true
+
+echo 'validation summary'
+cat /tmp/workflow-skills-validate.json | jq '{ok, checked, summary}'
+
+echo 'last 3 validator events'
+tail -n 3 /tmp/workflow-skills-validate.log | jq
+```
+
+## Next Steps
+
+- Read the [Workflows and Steps](/docs/foundations/workflows-and-steps) guide to
+ understand the runtime model
+- See the [Testing](/docs/testing) guide for writing workflow tests by hand
+- Check the [`@workflow/vitest` API reference](/docs/api-reference/workflow-vitest)
+ for the full list of test helpers
diff --git a/package.json b/package.json
index 7cf5becf35..fb80c70ae6 100644
--- a/package.json
+++ b/package.json
@@ -30,7 +30,7 @@
"scripts": {
"prepare": "husky",
"build": "turbo build --filter='./packages/*'",
- "test": "turbo test",
+ "test": "turbo test && pnpm test:workflow-skills",
"clean": "turbo clean",
"typecheck": "turbo typecheck",
"test:e2e": "vitest run packages/core/e2e/e2e.test.ts packages/core/e2e/e2e-agent.test.ts",
@@ -44,7 +44,13 @@
"ci:version": "changeset version",
"ci:publish": "pnpm build && changeset publish",
"release:notes": "node scripts/generate-release-notes.mjs",
- "workbench:stage": "node scripts/stage-workbench-with-tarballs.mjs"
+ "workbench:stage": "node scripts/stage-workbench-with-tarballs.mjs",
+ "test:workflow-skills:unit": "vitest run scripts/build-workflow-skills.test.mjs scripts/validate-workflow-skill-files.test.mjs scripts/ensure-workflow-fixture-symlink.test.mjs workbench/vitest/test/workflow-skills-hero.test.ts workbench/vitest/test/workflow-scenarios.test.ts workbench/vitest/test/workflow-skills-docs.test.ts workbench/vitest/test/workflow-skills-docs-contract.test.ts workbench/vitest/test/workflow-skill-verification-summary-contract.test.ts",
+ "test:workflow-skills:cli": "node scripts/validate-workflow-skill-files.mjs",
+ "test:workflow-skills:text": "pnpm test:workflow-skills:unit && pnpm test:workflow-skills:cli",
+ "test:workflow-skills:runtime": "node scripts/verify-workflow-skill-goldens.mjs",
+ "test:workflow-skills": "pnpm test:workflow-skills:text && pnpm test:workflow-skills:runtime",
+ "build:workflow-skills": "node scripts/build-workflow-skills.mjs"
},
"lint-staged": {
"**/*": "biome format --write --no-errors-on-unmatched"
diff --git a/packages/workflow/README.md b/packages/workflow/README.md
index 12a4a1ac78..f247fdba4e 100644
--- a/packages/workflow/README.md
+++ b/packages/workflow/README.md
@@ -20,6 +20,10 @@ The **Workflow Development Kit** lets you easily add durability, reliability, an
Visit [https://useworkflow.dev](https://useworkflow.dev) to view the full documentation.
+### Workflow Skills (AI-Assisted Design)
+
+Workflow skills are an AI-driven design loop that helps you create durable workflows. Install the skills bundle into your AI coding assistant, then run the two-stage loop: **teach** your project context, then **build** the workflow interactively with guided stress-testing and verification. See the [Workflow Skills quick-start](https://useworkflow.dev/docs/getting-started/workflow-skills) for details.
+
## Community
The Workflow DevKit community can be found on [GitHub Discussions](https://github.com/vercel/workflow/discussions), where you can ask questions, voice ideas, and share your projects with other people.
diff --git a/scripts/build-workflow-skills.mjs b/scripts/build-workflow-skills.mjs
new file mode 100644
index 0000000000..82664e30ee
--- /dev/null
+++ b/scripts/build-workflow-skills.mjs
@@ -0,0 +1,361 @@
+#!/usr/bin/env node
+
+/**
+ * build-workflow-skills.mjs
+ *
+ * Builds provider-specific bundles from the source skills under skills/.
+ *
+ * Usage:
+ * node scripts/build-workflow-skills.mjs # build into dist/workflow-skills/
+ * node scripts/build-workflow-skills.mjs --check # dry-run, emit plan as JSON, exit 0 if valid
+ *
+ * Emits structured JSON lines on stderr for every state transition.
+ * Final output on stdout is a JSON manifest.
+ */
+
+import { createHash } from 'node:crypto';
+import {
+ cpSync,
+ existsSync,
+ mkdirSync,
+ readdirSync,
+ readFileSync,
+ writeFileSync,
+} from 'node:fs';
+import { dirname, join, relative, resolve } from 'node:path';
+import {
+ SCENARIO_SKILLS,
+ USER_INVOKABLE_SKILLS,
+ summarizeSkillSurface,
+} from './lib/workflow-skill-surface.mjs';
+
+// ---------------------------------------------------------------------------
+// Config
+// ---------------------------------------------------------------------------
+
+const ROOT = resolve(import.meta.dirname, '..');
+const SKILLS_DIR = join(ROOT, 'skills');
+const DIST_DIR = join(ROOT, 'dist', 'workflow-skills');
+
+/** Provider map: provider name → nested output path under dist//. */
+const PROVIDERS = {
+ 'claude-code': '.claude/skills',
+ cursor: '.cursor/skills',
+};
+
+const CHECK_MODE = process.argv.includes('--check');
+
+// ---------------------------------------------------------------------------
+// Logging helpers (structured JSON on stderr)
+// ---------------------------------------------------------------------------
+
+function log(event, data = {}) {
+ const line = JSON.stringify({ event, ts: new Date().toISOString(), ...data });
+ process.stderr.write(`${line}\n`);
+}
+
+// ---------------------------------------------------------------------------
+// Frontmatter parser (minimal, zero-dep)
+// ---------------------------------------------------------------------------
+
+const REQUIRED_FIELDS = ['name', 'description'];
+const REQUIRED_META = ['author', 'version'];
+const SCENARIO_SKILLS_SET = new Set(SCENARIO_SKILLS);
+const USER_INVOKABLE_SKILLS_SET = new Set(USER_INVOKABLE_SKILLS);
+
+function parseFrontmatter(text) {
+ const match = text.match(/^---\r?\n([\s\S]*?)\r?\n---/);
+ if (!match) return null;
+ const raw = match[1];
+ const fm = {};
+ let currentKey = null;
+ for (const line of raw.split('\n')) {
+ const topLevel = line.match(/^([\w][\w.\-]*):\s*(.*)/);
+ if (topLevel) {
+ const [, key, val] = topLevel;
+ if (key === 'metadata') {
+ fm.metadata = fm.metadata || {};
+ currentKey = 'metadata';
+ } else {
+ fm[key] = val.replace(/^['"]|['"]$/g, '').trim();
+ currentKey = key;
+ }
+ continue;
+ }
+ const nested = line.match(/^\s{2}([\w][\w.\-]*):\s*(.*)/);
+ if (nested && currentKey === 'metadata') {
+ fm.metadata[nested[1]] = nested[2].replace(/^['"]|['"]$/g, '').trim();
+ }
+ }
+ return fm;
+}
+
+function validateFrontmatter(fm, skillDir) {
+ const errors = [];
+ if (!fm) {
+ errors.push(`${skillDir}: missing YAML frontmatter`);
+ return errors;
+ }
+ for (const f of REQUIRED_FIELDS) {
+ if (!fm[f]) errors.push(`${skillDir}: missing required field "${f}"`);
+ }
+ if (!fm.metadata) {
+ errors.push(`${skillDir}: missing "metadata" block`);
+ } else {
+ for (const f of REQUIRED_META) {
+ if (!fm.metadata[f]) errors.push(`${skillDir}: missing metadata.${f}`);
+ }
+ }
+
+ // User-invocable skills must have user-invocable and argument-hint
+ if (USER_INVOKABLE_SKILLS_SET.has(skillDir)) {
+ if (fm['user-invocable'] !== 'true') {
+ errors.push(
+ `${skillDir}: user-invocable skill must set "user-invocable: true"`,
+ );
+ }
+ if (!fm['argument-hint']) {
+ errors.push(
+ `${skillDir}: user-invocable skill must provide "argument-hint"`,
+ );
+ }
+ log('user_invocable_validation', {
+ skill: skillDir,
+ category: SCENARIO_SKILLS_SET.has(skillDir) ? 'scenario' : 'review',
+ 'user-invocable': fm['user-invocable'] ?? null,
+ 'argument-hint': fm['argument-hint'] ?? null,
+ valid: errors.length === 0,
+ });
+ }
+
+ return errors;
+}
+
+// ---------------------------------------------------------------------------
+// Discover skills
+// ---------------------------------------------------------------------------
+
+function discoverSkills() {
+ const entries = readdirSync(SKILLS_DIR, { withFileTypes: true });
+ const skills = [];
+ for (const entry of entries) {
+ if (!entry.isDirectory()) continue;
+ const skillPath = join(SKILLS_DIR, entry.name, 'SKILL.md');
+ if (!existsSync(skillPath)) continue;
+ const content = readFileSync(skillPath, 'utf8');
+ const fm = parseFrontmatter(content);
+ const goldensDir = join(SKILLS_DIR, entry.name, 'goldens');
+ const goldens = existsSync(goldensDir)
+ ? readdirSync(goldensDir).filter((f) => f.endsWith('.md'))
+ : [];
+ skills.push({
+ dir: entry.name,
+ skillPath,
+ content,
+ frontmatter: fm,
+ goldens,
+ goldensDir,
+ });
+ }
+ return skills;
+}
+
+// ---------------------------------------------------------------------------
+// Checksum helper
+// ---------------------------------------------------------------------------
+
+function sha256(content) {
+ return createHash('sha256').update(content).digest('hex').slice(0, 16);
+}
+
+// ---------------------------------------------------------------------------
+// Plan: compute all outputs without writing
+// ---------------------------------------------------------------------------
+
+function buildPlan(skills) {
+ const outputs = [];
+ for (const [provider, nestedPath] of Object.entries(PROVIDERS)) {
+ for (const skill of skills) {
+ const destDir = join(DIST_DIR, provider, nestedPath, skill.dir);
+ const destFile = join(destDir, 'SKILL.md');
+ outputs.push({
+ provider,
+ skill: skill.dir,
+ source: relative(ROOT, skill.skillPath),
+ dest: relative(ROOT, destFile),
+ checksum: sha256(skill.content),
+ version: skill.frontmatter?.metadata?.version ?? 'unknown',
+ });
+ for (const golden of skill.goldens) {
+ const src = join(skill.goldensDir, golden);
+ const dest = join(destDir, 'goldens', golden);
+ const goldenContent = readFileSync(src, 'utf8');
+ outputs.push({
+ provider,
+ skill: skill.dir,
+ source: relative(ROOT, src),
+ dest: relative(ROOT, dest),
+ checksum: sha256(goldenContent),
+ type: 'golden',
+ });
+ }
+ }
+ }
+ return outputs;
+}
+
+// ---------------------------------------------------------------------------
+// Write: materialize files into dist/
+// ---------------------------------------------------------------------------
+
+function writeDist(skills, outputs) {
+ for (const out of outputs) {
+ const destAbs = join(ROOT, out.dest);
+ mkdirSync(dirname(destAbs), { recursive: true });
+ const srcAbs = join(ROOT, out.source);
+ cpSync(srcAbs, destAbs);
+ log('file_written', { dest: out.dest, checksum: out.checksum });
+ }
+
+ // Write manifest
+ const manifest = {
+ generatedAt: new Date().toISOString(),
+ providers: Object.keys(PROVIDERS),
+ skills: skills.map((s) => ({
+ name: s.dir,
+ version: s.frontmatter?.metadata?.version ?? 'unknown',
+ goldens: s.goldens.length,
+ checksum: sha256(s.content),
+ })),
+ totalOutputs: outputs.length,
+ };
+ const manifestPath = join(DIST_DIR, 'manifest.json');
+ mkdirSync(dirname(manifestPath), { recursive: true });
+ writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`);
+ log('manifest_written', { path: relative(ROOT, manifestPath) });
+ return manifest;
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+function main() {
+ log('start', { mode: CHECK_MODE ? 'check' : 'build' });
+
+ // 1. Discover
+ const skills = discoverSkills();
+ const surface = summarizeSkillSurface(skills, PROVIDERS);
+ log('skills_discovered', {
+ count: surface.counts.skills,
+ names: surface.discovered,
+ scenarioCount: surface.counts.scenarios,
+ scenarioNames: surface.scenario,
+ });
+
+ if (skills.length === 0) {
+ log('error', { message: 'No skills found under skills/' });
+ process.exit(1);
+ }
+
+ // 2. Validate frontmatter
+ const allErrors = [];
+ for (const skill of skills) {
+ const errors = validateFrontmatter(skill.frontmatter, skill.dir);
+ if (errors.length > 0) {
+ allErrors.push(...errors);
+ log('validation_error', { skill: skill.dir, errors });
+ } else {
+ log('validation_pass', {
+ skill: skill.dir,
+ version: skill.frontmatter.metadata.version,
+ });
+ }
+ }
+
+ if (allErrors.length > 0) {
+ log('validation_failed', {
+ errorCount: allErrors.length,
+ errors: allErrors,
+ });
+ process.exit(1);
+ }
+
+ // 3. Name/dir consistency check
+ for (const skill of skills) {
+ if (skill.frontmatter.name !== skill.dir) {
+ log('validation_error', {
+ skill: skill.dir,
+ message: `frontmatter name "${skill.frontmatter.name}" does not match directory "${skill.dir}"`,
+ });
+ process.exit(1);
+ }
+ }
+
+ // 4. Build plan
+ const outputs = buildPlan(skills);
+
+ if (outputs.length !== surface.counts.totalOutputs) {
+ log('error', {
+ message: 'Build plan total does not match computed skill surface',
+ expectedTotalOutputs: surface.counts.totalOutputs,
+ actualTotalOutputs: outputs.length,
+ expectedOutputsPerProvider: surface.counts.outputsPerProvider,
+ expectedGoldensPerProvider: surface.counts.goldensPerProvider,
+ expectedInstallDirectories: surface.counts.installDirectories,
+ });
+ process.exit(1);
+ }
+
+ log('plan_computed', {
+ totalOutputs: outputs.length,
+ providers: Object.keys(PROVIDERS),
+ providerCount: surface.counts.providers,
+ skillCount: surface.counts.skills,
+ scenarioCount: surface.counts.scenarios,
+ goldensPerProvider: surface.counts.goldensPerProvider,
+ outputsPerProvider: surface.counts.outputsPerProvider,
+ });
+
+ // 5. Check mode: emit plan and exit
+ if (CHECK_MODE) {
+ const result = {
+ ok: true,
+ mode: 'check',
+ skillSurface: surface,
+ skills: skills.map((s) => ({
+ name: s.dir,
+ version: s.frontmatter.metadata.version,
+ goldens: s.goldens.length,
+ checksum: sha256(s.content),
+ })),
+ providers: Object.keys(PROVIDERS),
+ outputs: outputs.map((o) => ({
+ provider: o.provider,
+ skill: o.skill,
+ dest: o.dest,
+ checksum: o.checksum,
+ ...(o.type ? { type: o.type } : {}),
+ })),
+ totalOutputs: outputs.length,
+ };
+ process.stdout.write(`${JSON.stringify(result, null, 2)}\n`);
+ log('check_complete', {
+ ok: true,
+ totalOutputs: outputs.length,
+ scenarioCount: surface.counts.scenarios,
+ });
+ process.exit(0);
+ }
+
+ // 6. Build mode: write files
+ const manifest = writeDist(skills, outputs);
+ manifest.skillSurface = surface;
+ process.stdout.write(`${JSON.stringify(manifest, null, 2)}\n`);
+ log('build_complete', {
+ totalOutputs: outputs.length,
+ scenarioCount: surface.counts.scenarios,
+ });
+}
+
+main();
diff --git a/scripts/build-workflow-skills.test.mjs b/scripts/build-workflow-skills.test.mjs
new file mode 100644
index 0000000000..34a6a63a5b
--- /dev/null
+++ b/scripts/build-workflow-skills.test.mjs
@@ -0,0 +1,260 @@
+import { createHash } from 'node:crypto';
+import { execSync } from 'node:child_process';
+import {
+ existsSync,
+ mkdirSync,
+ readFileSync,
+ readdirSync,
+ rmSync,
+} from 'node:fs';
+import { join, resolve } from 'node:path';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..');
+const DIST = join(ROOT, 'dist', 'workflow-skills');
+const SKILLS_DIR = join(ROOT, 'skills');
+
+const PROVIDERS = ['claude-code', 'cursor'];
+const PROVIDER_PATHS = {
+ 'claude-code': '.claude/skills',
+ cursor: '.cursor/skills',
+};
+
+// Core skills that must ship for every provider — the two-stage pipeline
+// plus the always-on reference skill.
+const CORE_SKILLS = ['workflow', 'workflow-teach', 'workflow-build'];
+
+// Dynamically discover all skills from the source directory so the test
+// covers any additional/helper skills without requiring constant updates.
+const ALL_SKILLS = readdirSync(SKILLS_DIR, { withFileTypes: true })
+ .filter((d) => d.isDirectory() && existsSync(join(SKILLS_DIR, d.name, 'SKILL.md')))
+ .map((d) => d.name);
+
+function sha256(content) {
+ return createHash('sha256').update(content).digest('hex').slice(0, 16);
+}
+
+function run(cmd) {
+ return execSync(cmd, { cwd: ROOT, encoding: 'utf8', stdio: 'pipe' });
+}
+
+describe('build-workflow-skills builder smoke tests', () => {
+ beforeAll(() => {
+ if (existsSync(DIST)) {
+ rmSync(DIST, { recursive: true, force: true });
+ }
+ run('node scripts/build-workflow-skills.mjs');
+ });
+
+ afterAll(() => {
+ if (existsSync(DIST)) {
+ rmSync(DIST, { recursive: true, force: true });
+ }
+ });
+
+ // -----------------------------------------------------------------------
+ // Guard: no stale four-stage references in this test file
+ // -----------------------------------------------------------------------
+
+ it('does not reference deleted four-stage skills', () => {
+ const source = readFileSync(new URL(import.meta.url), 'utf8');
+ // Build stale names dynamically so the assertion strings themselves
+ // don't trigger a false positive when scanning this file.
+ const prefix = 'workflow-';
+ for (const suffix of ['desi' + 'gn', 'stre' + 'ss', 'veri' + 'fy']) {
+ const stale = prefix + suffix;
+ expect(source).not.toContain(stale);
+ }
+ });
+
+ // -----------------------------------------------------------------------
+ // Dynamic discovery covers core skills
+ // -----------------------------------------------------------------------
+
+ it('ALL_SKILLS includes every CORE_SKILL', () => {
+ for (const core of CORE_SKILLS) {
+ expect(ALL_SKILLS).toContain(core);
+ }
+ });
+
+ // -----------------------------------------------------------------------
+ // Manifest
+ // -----------------------------------------------------------------------
+
+ it('produces dist/workflow-skills/manifest.json', () => {
+ const manifestPath = join(DIST, 'manifest.json');
+ expect(existsSync(manifestPath)).toBe(true);
+ });
+
+ it('manifest is valid JSON with required fields', () => {
+ const manifest = JSON.parse(
+ readFileSync(join(DIST, 'manifest.json'), 'utf8'),
+ );
+ expect(manifest).toHaveProperty('generatedAt');
+ expect(manifest).toHaveProperty('providers');
+ expect(manifest).toHaveProperty('skills');
+ expect(manifest).toHaveProperty('totalOutputs');
+ expect(manifest.providers).toEqual(expect.arrayContaining(PROVIDERS));
+ expect(manifest.skills.length).toBeGreaterThanOrEqual(CORE_SKILLS.length);
+ for (const skill of manifest.skills) {
+ expect(skill).toHaveProperty('name');
+ expect(skill).toHaveProperty('version');
+ expect(skill).toHaveProperty('goldens');
+ expect(skill).toHaveProperty('checksum');
+ }
+ });
+
+ it('manifest includes all core skills', () => {
+ const manifest = JSON.parse(
+ readFileSync(join(DIST, 'manifest.json'), 'utf8'),
+ );
+ for (const core of CORE_SKILLS) {
+ expect(manifest.skills.some((s) => s.name === core)).toBe(true);
+ }
+ });
+
+ // -----------------------------------------------------------------------
+ // Provider outputs — SKILL.md for every discovered skill
+ // -----------------------------------------------------------------------
+
+ for (const provider of PROVIDERS) {
+ for (const skill of ALL_SKILLS) {
+ const relPath = `${provider}/${PROVIDER_PATHS[provider]}/${skill}/SKILL.md`;
+
+ it(`${relPath} exists`, () => {
+ const p = join(DIST, provider, PROVIDER_PATHS[provider], skill, 'SKILL.md');
+ expect(existsSync(p)).toBe(true);
+ });
+
+ it(`${relPath} matches source content`, () => {
+ const src = readFileSync(join(SKILLS_DIR, skill, 'SKILL.md'), 'utf8');
+ const dst = readFileSync(
+ join(DIST, provider, PROVIDER_PATHS[provider], skill, 'SKILL.md'),
+ 'utf8',
+ );
+ expect(dst).toBe(src);
+ });
+ }
+ }
+
+ // -----------------------------------------------------------------------
+ // Goldens copied alongside their parent skill
+ // -----------------------------------------------------------------------
+
+ it('goldens are copied beneath their parent skill in dist output', () => {
+ const skillsWithGoldens = readdirSync(SKILLS_DIR, { withFileTypes: true })
+ .filter((d) => d.isDirectory())
+ .filter((d) => {
+ const gDir = join(SKILLS_DIR, d.name, 'goldens');
+ return existsSync(gDir) && readdirSync(gDir).some((f) => f.endsWith('.md'));
+ });
+
+ expect(skillsWithGoldens.length).toBeGreaterThan(0);
+
+ for (const skillEntry of skillsWithGoldens) {
+ const srcGoldens = join(SKILLS_DIR, skillEntry.name, 'goldens');
+ const goldenFiles = readdirSync(srcGoldens).filter((f) => f.endsWith('.md'));
+
+ for (const provider of PROVIDERS) {
+ for (const golden of goldenFiles) {
+ const destGolden = join(
+ DIST,
+ provider,
+ PROVIDER_PATHS[provider],
+ skillEntry.name,
+ 'goldens',
+ golden,
+ );
+ expect(
+ existsSync(destGolden),
+ `missing golden: ${provider}/${skillEntry.name}/goldens/${golden}`,
+ ).toBe(true);
+
+ const srcContent = readFileSync(join(srcGoldens, golden), 'utf8');
+ const dstContent = readFileSync(destGolden, 'utf8');
+ expect(dstContent).toBe(srcContent);
+ }
+ }
+ }
+ });
+
+ // -----------------------------------------------------------------------
+ // --check mode exits 0 and emits parseable JSON
+ // -----------------------------------------------------------------------
+
+ it('--check exits 0 and emits valid JSON plan', () => {
+ const stdout = run('node scripts/build-workflow-skills.mjs --check');
+ const plan = JSON.parse(stdout);
+ expect(plan.ok).toBe(true);
+ expect(plan.mode).toBe('check');
+ expect(plan.providers).toEqual(expect.arrayContaining(PROVIDERS));
+ expect(plan.outputs.length).toBeGreaterThan(0);
+ expect(plan.totalOutputs).toBe(plan.outputs.length);
+
+ // Core skills must appear in the check plan
+ for (const core of CORE_SKILLS) {
+ expect(plan.skills.some((s) => s.name === core)).toBe(true);
+ }
+
+ // All discovered skills must appear in the check plan
+ for (const skill of ALL_SKILLS) {
+ expect(plan.skills.some((s) => s.name === skill)).toBe(true);
+ }
+ });
+
+ // -----------------------------------------------------------------------
+ // Idempotence: second build is byte-stable
+ // -----------------------------------------------------------------------
+
+ describe('idempotence', () => {
+ let manifestBefore;
+ let fileHashesBefore;
+
+ beforeAll(() => {
+ // First build already ran in outer beforeAll.
+ // Capture manifest and hashes of all files.
+ manifestBefore = readFileSync(join(DIST, 'manifest.json'), 'utf8');
+ fileHashesBefore = collectFileHashes(DIST);
+
+ // Run a second build.
+ run('node scripts/build-workflow-skills.mjs');
+ });
+
+ it('manifest.json is byte-stable across builds', () => {
+ const manifestAfter = readFileSync(join(DIST, 'manifest.json'), 'utf8');
+ // Strip generatedAt since timestamps differ
+ const normalize = (m) => {
+ const parsed = JSON.parse(m);
+ delete parsed.generatedAt;
+ return JSON.stringify(parsed, null, 2);
+ };
+ expect(normalize(manifestAfter)).toBe(normalize(manifestBefore));
+ });
+
+ it('non-manifest outputs are byte-identical across builds', () => {
+ const fileHashesAfter = collectFileHashes(DIST);
+ // Remove manifest from comparison (has timestamp)
+ delete fileHashesBefore['manifest.json'];
+ delete fileHashesAfter['manifest.json'];
+ expect(fileHashesAfter).toEqual(fileHashesBefore);
+ });
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function collectFileHashes(dir, prefix = '') {
+ const result = {};
+ for (const entry of readdirSync(dir, { withFileTypes: true })) {
+ const rel = prefix ? `${prefix}/${entry.name}` : entry.name;
+ if (entry.isDirectory()) {
+ Object.assign(result, collectFileHashes(join(dir, entry.name), rel));
+ } else {
+ const content = readFileSync(join(dir, entry.name));
+ result[rel] = sha256(content);
+ }
+ }
+ return result;
+}
diff --git a/scripts/ensure-workflow-fixture-symlink.test.mjs b/scripts/ensure-workflow-fixture-symlink.test.mjs
new file mode 100644
index 0000000000..00b20991ae
--- /dev/null
+++ b/scripts/ensure-workflow-fixture-symlink.test.mjs
@@ -0,0 +1,170 @@
+import {
+ mkdtempSync,
+ mkdirSync,
+ readlinkSync,
+ rmSync,
+ symlinkSync,
+ writeFileSync,
+} from 'node:fs';
+import { tmpdir } from 'node:os';
+import { dirname, join, relative } from 'node:path';
+import { afterEach, describe, expect, it } from 'vitest';
+import { ensureFixtureSymlink } from './lib/ensure-workflow-fixture-symlink.mjs';
+
+const tempRoots = [];
+
+function makeWorkspace() {
+ const root = mkdtempSync(join(tmpdir(), 'workflow-fixture-symlink-'));
+ tempRoots.push(root);
+ const repoRoot = join(root, 'repo');
+ const fixtureDir = join(root, 'fixture');
+ const workflowPkg = join(repoRoot, 'packages', 'workflow');
+ mkdirSync(workflowPkg, { recursive: true });
+ mkdirSync(fixtureDir, { recursive: true });
+ return { repoRoot, fixtureDir, workflowPkg };
+}
+
+function makeHarness() {
+ const events = [];
+ return {
+ events,
+ log(event, fields = {}) {
+ events.push({ event, ...fields });
+ },
+ fail(reason, fields = {}) {
+ const error = new Error(reason);
+ error.reason = reason;
+ error.fields = fields;
+ throw error;
+ },
+ };
+}
+
+afterEach(() => {
+ while (tempRoots.length > 0) {
+ rmSync(tempRoots.pop(), { recursive: true, force: true });
+ }
+});
+
+describe('ensureFixtureSymlink', () => {
+ it('creates the symlink on first run', () => {
+ const { repoRoot, fixtureDir, workflowPkg } = makeWorkspace();
+ const harness = makeHarness();
+
+ const result = ensureFixtureSymlink({
+ name: 'fixture-a',
+ fixtureDir,
+ repoRoot,
+ linkName: 'workflow',
+ targetAbs: workflowPkg,
+ log: harness.log,
+ fail: harness.fail,
+ });
+
+ const linkPath = join(fixtureDir, 'node_modules', 'workflow');
+ expect(readlinkSync(linkPath)).toBe(
+ relative(dirname(linkPath), workflowPkg)
+ );
+ expect(result).toMatchObject({
+ link: 'node_modules/workflow',
+ status: 'created',
+ });
+ expect(result.target.split('\\').join('/')).toBe('packages/workflow');
+ expect(harness.events.at(-1)).toMatchObject({
+ event: 'symlink_created',
+ link: 'node_modules/workflow',
+ });
+ });
+
+ it('emits symlink_ok on repeat runs', () => {
+ const { repoRoot, fixtureDir, workflowPkg } = makeWorkspace();
+ const harness = makeHarness();
+
+ ensureFixtureSymlink({
+ name: 'fixture-a',
+ fixtureDir,
+ repoRoot,
+ linkName: 'workflow',
+ targetAbs: workflowPkg,
+ log: harness.log,
+ fail: harness.fail,
+ });
+
+ const result = ensureFixtureSymlink({
+ name: 'fixture-a',
+ fixtureDir,
+ repoRoot,
+ linkName: 'workflow',
+ targetAbs: workflowPkg,
+ log: harness.log,
+ fail: harness.fail,
+ });
+
+ expect(result).toMatchObject({
+ link: 'node_modules/workflow',
+ status: 'ok',
+ });
+ expect(harness.events.at(-1)).toMatchObject({
+ event: 'symlink_ok',
+ link: 'node_modules/workflow',
+ });
+ });
+
+ it('repairs a mismatched symlink target', () => {
+ const { repoRoot, fixtureDir, workflowPkg } = makeWorkspace();
+ const oldPkg = join(repoRoot, 'packages', 'workflow-old');
+ mkdirSync(oldPkg, { recursive: true });
+
+ const linkPath = join(fixtureDir, 'node_modules', 'workflow');
+ mkdirSync(dirname(linkPath), { recursive: true });
+ symlinkSync(relative(dirname(linkPath), oldPkg), linkPath);
+
+ const harness = makeHarness();
+ const result = ensureFixtureSymlink({
+ name: 'fixture-a',
+ fixtureDir,
+ repoRoot,
+ linkName: 'workflow',
+ targetAbs: workflowPkg,
+ log: harness.log,
+ fail: harness.fail,
+ });
+
+ expect(readlinkSync(linkPath)).toBe(
+ relative(dirname(linkPath), workflowPkg)
+ );
+ expect(result).toMatchObject({
+ link: 'node_modules/workflow',
+ status: 'repaired',
+ });
+ expect(result.target.split('\\').join('/')).toBe('packages/workflow');
+ expect(harness.events.at(-1)).toMatchObject({
+ event: 'symlink_repaired',
+ link: 'node_modules/workflow',
+ });
+ });
+
+ it('fails with symlink_path_conflict when a normal file occupies the path', () => {
+ const { repoRoot, fixtureDir, workflowPkg } = makeWorkspace();
+ const linkPath = join(fixtureDir, 'node_modules', 'workflow');
+ mkdirSync(dirname(linkPath), { recursive: true });
+ writeFileSync(linkPath, 'occupied');
+
+ const harness = makeHarness();
+ expect(() =>
+ ensureFixtureSymlink({
+ name: 'fixture-a',
+ fixtureDir,
+ repoRoot,
+ linkName: 'workflow',
+ targetAbs: workflowPkg,
+ log: harness.log,
+ fail: harness.fail,
+ })
+ ).toThrow('symlink_path_conflict');
+ expect(harness.events.at(-1)).toMatchObject({
+ event: 'symlink_conflict',
+ link: 'node_modules/workflow',
+ });
+ });
+});
diff --git a/scripts/lib/ensure-workflow-fixture-symlink.mjs b/scripts/lib/ensure-workflow-fixture-symlink.mjs
new file mode 100644
index 0000000000..fd573224de
--- /dev/null
+++ b/scripts/lib/ensure-workflow-fixture-symlink.mjs
@@ -0,0 +1,126 @@
+import {
+ existsSync,
+ lstatSync,
+ mkdirSync,
+ readlinkSync,
+ symlinkSync,
+ unlinkSync,
+} from 'node:fs';
+import { dirname, join, relative } from 'node:path';
+
+/**
+ * @typedef {'created' | 'ok' | 'repaired'} SymlinkStatus
+ *
+ * @typedef {{
+ * name: string,
+ * fixtureDir: string,
+ * repoRoot: string,
+ * linkName: string,
+ * targetAbs: string,
+ * log: (event: string, fields?: Record) => void,
+ * fail: (reason: string, fields?: Record) => never,
+ * }} EnsureFixtureSymlinkInput
+ *
+ * @typedef {{
+ * link: string,
+ * target: string,
+ * status: SymlinkStatus,
+ * previousTarget?: string,
+ * }} EnsureFixtureSymlinkResult
+ */
+
+/**
+ * Ensure a fixture-local workspace-package symlink exists and points at the expected target.
+ * Emits one of: symlink_created, symlink_ok, symlink_repaired, symlink_conflict, symlink_error.
+ *
+ * @param {EnsureFixtureSymlinkInput} input
+ * @returns {EnsureFixtureSymlinkResult}
+ */
+export function ensureFixtureSymlink({
+ name,
+ fixtureDir,
+ repoRoot,
+ linkName,
+ targetAbs,
+ log,
+ fail,
+}) {
+ const linkPath = join(fixtureDir, 'node_modules', linkName);
+ const link = `node_modules/${linkName}`;
+ const target = targetAbs.replace(repoRoot + '/', '');
+ const expectedTarget = relative(dirname(linkPath), targetAbs);
+
+ if (!existsSync(targetAbs)) {
+ fail('symlink_target_not_found', { name, link, target });
+ }
+
+ // Track whether fail() was already called so the outer catch doesn't
+ // swallow it as a generic symlink_error.
+ let failCalled = false;
+ function trackedFail(reason, fields) {
+ failCalled = true;
+ fail(reason, fields);
+ }
+
+ try {
+ mkdirSync(dirname(linkPath), { recursive: true });
+
+ try {
+ const stat = lstatSync(linkPath);
+
+ if (!stat.isSymbolicLink()) {
+ log('symlink_conflict', {
+ name,
+ link,
+ target,
+ actualType: 'non_symlink',
+ });
+ trackedFail('symlink_path_conflict', {
+ name,
+ link,
+ target,
+ actualType: 'non_symlink',
+ });
+ }
+
+ const actualTarget = readlinkSync(linkPath);
+ if (actualTarget === expectedTarget) {
+ log('symlink_ok', { name, link, target });
+ return { link, target, status: 'ok' };
+ }
+
+ unlinkSync(linkPath);
+ symlinkSync(expectedTarget, linkPath);
+ log('symlink_repaired', {
+ name,
+ link,
+ previousTarget: actualTarget,
+ target,
+ });
+ return { link, target, status: 'repaired', previousTarget: actualTarget };
+ } catch (e) {
+ // Re-throw errors that originated from the fail() callback
+ if (failCalled) {
+ throw e;
+ }
+ // lstatSync throws ENOENT when the path doesn't exist at all
+ if (e?.code !== 'ENOENT') {
+ throw e;
+ }
+ }
+
+ symlinkSync(expectedTarget, linkPath);
+ log('symlink_created', { name, link, target });
+ return { link, target, status: 'created' };
+ } catch (e) {
+ if (failCalled) {
+ throw e;
+ }
+ fail('symlink_error', {
+ name,
+ link,
+ target,
+ detail: e instanceof Error ? e.message : String(e),
+ });
+ }
+}
diff --git a/scripts/lib/materialize-workflow-skill-fixture.mjs b/scripts/lib/materialize-workflow-skill-fixture.mjs
new file mode 100644
index 0000000000..eb8497690d
--- /dev/null
+++ b/scripts/lib/materialize-workflow-skill-fixture.mjs
@@ -0,0 +1,212 @@
+#!/usr/bin/env node
+
+/**
+ * Materializes extracted golden fixtures into runnable fixture directories.
+ *
+ * Usage:
+ * node scripts/lib/materialize-workflow-skill-fixture.mjs
+ *
+ * Reads the spec.json, parses the referenced golden, and writes:
+ * workflows/.ts
+ * workflows/.integration.test.ts
+ * vitest.integration.config.ts
+ * app/api//route.ts (only when golden includes route output)
+ *
+ * Idempotent: re-running produces identical output for unchanged goldens.
+ * Exits 0 on success with JSONL status lines to stderr.
+ * Exits 1 on failure with machine-readable error to stderr.
+ */
+
+import {
+ readFileSync,
+ writeFileSync,
+ mkdirSync,
+ existsSync,
+} from 'node:fs';
+import { dirname, join, resolve } from 'node:path';
+import { execFileSync } from 'node:child_process';
+import { ensureFixtureSymlink } from './ensure-workflow-fixture-symlink.mjs';
+
+const VITEST_CONFIG = `import { dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { defineConfig } from 'vitest/config';
+import { workflow } from '@workflow/vitest';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+export default defineConfig({
+ root: __dirname,
+ plugins: [workflow()],
+ test: {
+ include: ['**/*.integration.test.ts'],
+ testTimeout: 60_000,
+ },
+});
+`;
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function log(event, fields = {}) {
+ process.stderr.write(JSON.stringify({ event, ...fields }) + '\n');
+}
+
+function fail(reason, fields = {}) {
+ log('materialize_error', { reason, ...fields });
+ process.exit(1);
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+const specPath = process.argv[2];
+if (!specPath) {
+ fail('usage: node materialize-workflow-skill-fixture.mjs ');
+}
+
+const absSpecPath = resolve(specPath);
+if (!existsSync(absSpecPath)) {
+ fail('spec_not_found', { specPath: absSpecPath });
+}
+
+let spec;
+try {
+ spec = JSON.parse(readFileSync(absSpecPath, 'utf-8'));
+} catch (e) {
+ fail('spec_parse_error', { specPath: absSpecPath, detail: e.message });
+}
+
+const { name, goldenPath } = spec;
+if (!name || !goldenPath) {
+ fail('spec_missing_fields', { specPath: absSpecPath, name, goldenPath });
+}
+
+log('materialize_start', { name, goldenPath });
+
+// Resolve golden path relative to repo root (spec lives in tests/fixtures/...)
+const repoRoot = resolve(dirname(absSpecPath), '..', '..', '..', '..');
+const absGoldenPath = resolve(repoRoot, goldenPath);
+
+if (!existsSync(absGoldenPath)) {
+ fail('golden_not_found', { goldenPath: absGoldenPath });
+}
+
+// Run the parser to extract fixture data
+const parserScript = resolve(
+ repoRoot,
+ 'scripts/lib/parse-workflow-skill-golden.mjs'
+);
+let parsed;
+try {
+ const stdout = execFileSync('node', [parserScript, absGoldenPath], {
+ encoding: 'utf-8',
+ stdio: ['pipe', 'pipe', 'pipe'],
+ });
+ parsed = JSON.parse(stdout);
+} catch (e) {
+ fail('parser_failed', {
+ goldenPath: absGoldenPath,
+ detail: e.stderr || e.message,
+ });
+}
+
+log('golden_extracted', { name: parsed.name, sourcePath: parsed.sourcePath });
+
+// Determine fixture directory (same directory as spec.json)
+const fixtureDir = dirname(absSpecPath);
+
+// Validate required tokens from spec
+const requires = spec.requires || {};
+if (requires.workflow && parsed.workflow) {
+ const missing = requires.workflow.filter(
+ (tok) => !parsed.workflow.code.includes(tok)
+ );
+ if (missing.length > 0) {
+ log('materialize_warning', {
+ name,
+ reason: 'missing_workflow_tokens',
+ missing,
+ });
+ }
+}
+if (requires.test && parsed.test) {
+ const missing = requires.test.filter(
+ (tok) => !parsed.test.code.includes(tok)
+ );
+ if (missing.length > 0) {
+ log('materialize_warning', {
+ name,
+ reason: 'missing_test_tokens',
+ missing,
+ });
+ }
+}
+
+// Write files
+const writtenFiles = [];
+
+function writeFixtureFile(relPath, content) {
+ const absPath = join(fixtureDir, relPath);
+ mkdirSync(dirname(absPath), { recursive: true });
+ writeFileSync(absPath, content, 'utf-8');
+ writtenFiles.push(relPath);
+ log('file_written', { name, path: relPath });
+}
+
+// Workflow file
+writeFixtureFile(parsed.workflow.path, parsed.workflow.code + '\n');
+
+// Test file
+writeFixtureFile(parsed.test.path, parsed.test.code + '\n');
+
+// Vitest config
+writeFixtureFile('vitest.integration.config.ts', VITEST_CONFIG);
+
+// Route file (only when golden includes route output)
+if (parsed.route) {
+ writeFixtureFile(parsed.route.path, parsed.route.code + '\n');
+}
+
+// Create node_modules symlinks so esbuild and vitest resolve workspace
+// packages from fixture dirs (pnpm strict mode doesn't hoist them).
+const symlinks = [
+ ['workflow', join(repoRoot, 'packages', 'workflow')],
+ [join('@workflow', 'vitest'), join(repoRoot, 'packages', 'vitest')],
+];
+
+const links = symlinks.map(([linkName, targetAbs]) =>
+ ensureFixtureSymlink({
+ name,
+ fixtureDir,
+ repoRoot,
+ linkName,
+ targetAbs,
+ log,
+ fail,
+ })
+);
+
+log('materialize_complete', {
+ name,
+ fixtureDir: fixtureDir.replace(repoRoot + '/', ''),
+ files: writtenFiles,
+ links,
+ hasRoute: !!parsed.route,
+});
+
+// Output manifest to stdout for machine consumption
+process.stdout.write(
+ JSON.stringify(
+ {
+ name,
+ fixtureDir: fixtureDir.replace(repoRoot + '/', ''),
+ files: writtenFiles,
+ links,
+ verificationArtifact: parsed.verificationArtifact,
+ },
+ null,
+ 2
+ ) + '\n'
+);
diff --git a/scripts/lib/parse-workflow-skill-golden.mjs b/scripts/lib/parse-workflow-skill-golden.mjs
new file mode 100644
index 0000000000..d3af4ec1fc
--- /dev/null
+++ b/scripts/lib/parse-workflow-skill-golden.mjs
@@ -0,0 +1,249 @@
+#!/usr/bin/env node
+
+/**
+ * Extracts executable fixture data from a workflow skill golden markdown file.
+ *
+ * Usage:
+ * node scripts/lib/parse-workflow-skill-golden.mjs
+ *
+ * Exits 0 with JSON matching ExtractedGoldenFixture on success.
+ * Exits 1 with a machine-readable error payload on failure.
+ */
+
+import { readFileSync } from 'node:fs';
+import { basename, dirname, relative } from 'node:path';
+
+// ---------------------------------------------------------------------------
+// Section extraction
+// ---------------------------------------------------------------------------
+
+/**
+ * Extract the content under a given markdown H2 heading.
+ * Stops at the next H2 (or end of file).
+ *
+ * @param {string} text Full markdown text
+ * @param {string} heading Heading text without the `## ` prefix
+ * @returns {string|null}
+ */
+function extractSection(text, heading) {
+ const lines = text.split('\n');
+ const startPattern = `## ${heading}`;
+ const startIdx = lines.findIndex((line) => line.trim() === startPattern);
+ if (startIdx === -1) return null;
+
+ // Find next H2 or end of file
+ let endIdx = lines.length;
+ for (let i = startIdx + 1; i < lines.length; i++) {
+ if (/^## /.test(lines[i].trim())) {
+ endIdx = i;
+ break;
+ }
+ }
+
+ return lines
+ .slice(startIdx + 1, endIdx)
+ .join('\n')
+ .trim();
+}
+
+/**
+ * Extract the first fenced code block of a given language from a section.
+ *
+ * @param {string} sectionText
+ * @param {string} language
+ * @returns {string|null}
+ */
+function extractCodeFence(sectionText, language) {
+ const lines = sectionText.split('\n');
+ const startFence = '```' + language;
+ const startIdx = lines.findIndex((line) => line.trim() === startFence);
+ if (startIdx === -1) return null;
+
+ const endIdx = lines.findIndex(
+ (line, index) => index > startIdx && line.trim() === '```'
+ );
+ if (endIdx === -1) return null;
+
+ return lines.slice(startIdx + 1, endIdx).join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Error helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * @param {string} goldenPath
+ * @param {string} section
+ * @param {string} reason
+ */
+function fail(goldenPath, section, reason) {
+ const error = {
+ event: 'golden_parse_error',
+ goldenPath,
+ section,
+ reason,
+ };
+ process.stderr.write(JSON.stringify(error) + '\n');
+ process.exit(1);
+}
+
+// ---------------------------------------------------------------------------
+// Main parse logic
+// ---------------------------------------------------------------------------
+
+/**
+ * @param {string} goldenPath Relative or absolute path to the golden markdown
+ * @param {string} text Full markdown content
+ * @returns {object} ExtractedGoldenFixture
+ */
+function parseGolden(goldenPath, text) {
+ const name = basename(goldenPath, '.md');
+
+ // --- Verification Artifact (required, parsed first to get file paths) ---
+ const artifactSection = extractSection(text, 'Verification Artifact');
+ if (!artifactSection) {
+ fail(goldenPath, 'Verification Artifact', 'section_missing');
+ }
+
+ const artifactJson = extractCodeFence(artifactSection, 'json');
+ if (!artifactJson) {
+ fail(goldenPath, 'Verification Artifact', 'code_fence_missing');
+ }
+
+ let verificationArtifact;
+ try {
+ verificationArtifact = JSON.parse(artifactJson);
+ } catch (/** @type {any} */ e) {
+ fail(goldenPath, 'Verification Artifact', `invalid_json: ${e.message}`);
+ }
+
+ // Validate required artifact keys
+ const requiredArtifactKeys = [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ];
+ const missingArtifactKeys = requiredArtifactKeys.filter(
+ (k) => !(k in verificationArtifact)
+ );
+ if (missingArtifactKeys.length > 0) {
+ fail(
+ goldenPath,
+ 'Verification Artifact',
+ `missing_keys: ${missingArtifactKeys.join(', ')}`
+ );
+ }
+
+ // Build a lookup from artifact files: kind -> path
+ const fileLookup = Object.fromEntries(
+ verificationArtifact.files.map(
+ (/** @type {{kind: string, path: string}} */ f) => [f.kind, f.path]
+ )
+ );
+
+ // --- Expected Code Output (workflow — required) ---
+ const workflowSection = extractSection(text, 'Expected Code Output');
+ if (!workflowSection) {
+ fail(goldenPath, 'Expected Code Output', 'section_missing');
+ }
+
+ const workflowCode = extractCodeFence(workflowSection, 'typescript');
+ if (!workflowCode) {
+ fail(goldenPath, 'Expected Code Output', 'code_fence_missing');
+ }
+
+ const workflowPath = fileLookup['workflow'] ?? null;
+ if (!workflowPath) {
+ fail(goldenPath, 'Verification Artifact', 'missing_file_kind: workflow');
+ }
+
+ // --- Expected Test Output (test — required) ---
+ const testSection = extractSection(text, 'Expected Test Output');
+ if (!testSection) {
+ fail(goldenPath, 'Expected Test Output', 'section_missing');
+ }
+
+ const testCode = extractCodeFence(testSection, 'typescript');
+ if (!testCode) {
+ fail(goldenPath, 'Expected Test Output', 'code_fence_missing');
+ }
+
+ const testPath = fileLookup['test'] ?? null;
+ if (!testPath) {
+ fail(goldenPath, 'Verification Artifact', 'missing_file_kind: test');
+ }
+
+ // --- Expected Route Output (route — optional) ---
+ let route = null;
+ const routeSection = extractSection(text, 'Expected Route Output');
+ if (routeSection) {
+ const routeCode = extractCodeFence(routeSection, 'typescript');
+ const routePath = fileLookup['route'] ?? null;
+ if (routeCode && routePath) {
+ route = { path: routePath, code: routeCode };
+ }
+ }
+
+ return {
+ name,
+ sourcePath: goldenPath,
+ workflow: { path: workflowPath, code: workflowCode },
+ test: { path: testPath, code: testCode },
+ route,
+ verificationArtifact,
+ };
+}
+
+// ---------------------------------------------------------------------------
+// CLI entry point
+// ---------------------------------------------------------------------------
+
+const goldenPath = process.argv[2];
+if (!goldenPath) {
+ process.stderr.write(
+ JSON.stringify({
+ event: 'golden_parse_error',
+ goldenPath: null,
+ section: 'argv',
+ reason: 'usage: node parse-workflow-skill-golden.mjs ',
+ }) + '\n'
+ );
+ process.exit(1);
+}
+
+let text;
+try {
+ text = readFileSync(goldenPath, 'utf-8');
+} catch (/** @type {any} */ e) {
+ process.stderr.write(
+ JSON.stringify({
+ event: 'golden_parse_error',
+ goldenPath,
+ section: 'file_read',
+ reason: e.message,
+ }) + '\n'
+ );
+ process.exit(1);
+}
+
+const fixture = parseGolden(goldenPath, text);
+
+// Structured success log to stderr, fixture JSON to stdout
+process.stderr.write(
+ JSON.stringify({
+ event: 'golden_extracted',
+ name: fixture.name,
+ sourcePath: fixture.sourcePath,
+ sections: {
+ workflow: !!fixture.workflow,
+ test: !!fixture.test,
+ route: !!fixture.route,
+ verificationArtifact: true,
+ },
+ }) + '\n'
+);
+
+process.stdout.write(JSON.stringify(fixture, null, 2) + '\n');
diff --git a/scripts/lib/validate-workflow-skill-files.mjs b/scripts/lib/validate-workflow-skill-files.mjs
new file mode 100644
index 0000000000..5b2bf7cdc3
--- /dev/null
+++ b/scripts/lib/validate-workflow-skill-files.mjs
@@ -0,0 +1,317 @@
+/**
+ * Pure validation logic for workflow skill files.
+ * No filesystem access — accepts file contents as a map.
+ */
+
+/**
+ * @param {string} text
+ * @param {string} [language='json']
+ * @returns {string|null}
+ */
+function extractCodeFence(text, language = 'json') {
+ const lines = text.split('\n');
+ const startFence = `\`\`\`${language}`;
+ const start = lines.findIndex((line) => line.trim() === startFence);
+
+ if (start === -1) return null;
+
+ const end = lines.findIndex(
+ (line, index) => index > start && line.trim() === '```'
+ );
+ if (end === -1) return null;
+
+ return lines.slice(start + 1, end).join('\n');
+}
+
+/**
+ * @param {{
+ * language?: string,
+ * requiredKeys?: string[],
+ * nonEmptyKeys?: string[],
+ * } | undefined} jsonFence
+ * @param {string} text
+ * @returns {{
+ * jsonFenceError?: 'missing_code_fence' | 'invalid_json',
+ * missingJsonKeys?: string[],
+ * emptyJsonKeys?: string[],
+ * }}
+ */
+function validateJsonFence(jsonFence, text) {
+ if (!jsonFence) return {};
+
+ const raw = extractCodeFence(text, jsonFence.language ?? 'json');
+ if (!raw) {
+ return {
+ jsonFenceError: 'missing_code_fence',
+ missingJsonKeys: [...(jsonFence.requiredKeys ?? [])],
+ emptyJsonKeys: [],
+ };
+ }
+
+ let parsed;
+ try {
+ parsed = JSON.parse(raw);
+ } catch {
+ return {
+ jsonFenceError: 'invalid_json',
+ missingJsonKeys: [...(jsonFence.requiredKeys ?? [])],
+ emptyJsonKeys: [],
+ };
+ }
+
+ const missingJsonKeys = (jsonFence.requiredKeys ?? []).filter(
+ (key) => !(key in parsed)
+ );
+
+ const emptyJsonKeys = (jsonFence.nonEmptyKeys ?? []).filter((key) => {
+ const value = parsed[key];
+ return value == null || (Array.isArray(value) && value.length === 0);
+ });
+
+ return { missingJsonKeys, emptyJsonKeys };
+}
+
+/**
+ * @param {string} text
+ * @param {string} headingLine
+ * @returns {string}
+ */
+function extractSection(text, headingLine) {
+ const lines = text.split('\n');
+ const start = lines.findIndex((line) => line.trim() === headingLine.trim());
+ if (start === -1) return '';
+
+ // Determine heading level of the target (count leading '#' characters)
+ const targetLevel = headingLine.trim().match(/^(#{2,6})\s/)?.[1]?.length ?? 2;
+
+ const body = [];
+ for (let i = start + 1; i < lines.length; i += 1) {
+ const match = lines[i].match(/^(#{2,6})\s/);
+ if (match && match[1].length <= targetLevel) break;
+ body.push(lines[i]);
+ }
+
+ return body.join('\n');
+}
+
+/**
+ * @param {{ sectionHeading?: string, mustIncludeWithinSection?: string[] }} check
+ * @param {string} text
+ * @returns {{ missingSectionTokens?: string[] }}
+ */
+function validateSectionTokens(check, text) {
+ if (!check.sectionHeading || !check.mustIncludeWithinSection?.length) {
+ return {};
+ }
+
+ const sectionFound = text
+ .split('\n')
+ .some((line) => line.trim() === check.sectionHeading.trim());
+ const section = extractSection(text, check.sectionHeading);
+ const missingSectionTokens = check.mustIncludeWithinSection.filter(
+ (token) => !section.includes(token)
+ );
+
+ return { sectionFound, missingSectionTokens };
+}
+
+function findOutOfOrder(text, values = []) {
+ if (!Array.isArray(values) || values.length < 2) return null;
+
+ const positions = values.map((value) => ({
+ value,
+ index: text.indexOf(value),
+ }));
+
+ if (positions.some((item) => item.index === -1)) return null;
+
+ for (let i = 1; i < positions.length; i += 1) {
+ if (positions[i].index < positions[i - 1].index) {
+ return {
+ expected: values,
+ positions,
+ firstInversion: {
+ before: positions[i - 1],
+ after: positions[i],
+ },
+ };
+ }
+ }
+
+ return null;
+}
+
+/**
+ * @param {string} text
+ * @param {string} needle
+ * @param {number} [radius=80]
+ * @returns {string|null}
+ */
+function excerptAround(text, needle, radius = 80) {
+ const index = text.indexOf(needle);
+ if (index === -1) return null;
+ const start = Math.max(0, index - radius);
+ const end = Math.min(text.length, index + needle.length + radius);
+ return text.slice(start, end);
+}
+
+function classifyFailureReason(missing, forbidden, orderFailure, extra) {
+ if (missing.length > 0) return 'missing_required_content';
+ if (forbidden.length > 0) return 'forbidden_content_present';
+ if (orderFailure) return 'content_out_of_order';
+ if (
+ extra.jsonFenceError ||
+ extra.missingJsonKeys?.length ||
+ extra.emptyJsonKeys?.length ||
+ extra.missingSectionTokens?.length
+ ) {
+ return 'structured_validation_failed';
+ }
+ return 'validation_failed';
+}
+
+function buildFailureMessage(check, reason, missing, forbidden, extra) {
+ if (reason === 'missing_required_content') {
+ return `Missing required content in ${check.file}: ${missing.join(', ')}`;
+ }
+ if (reason === 'forbidden_content_present') {
+ return `Forbidden content present in ${check.file}: ${forbidden.join(', ')}`;
+ }
+ if (reason === 'content_out_of_order') {
+ return `Content appears out of order in ${check.file}`;
+ }
+ if (reason === 'structured_validation_failed') {
+ if (extra.sectionHeading && extra.sectionFound === false) {
+ return `Missing required section ${extra.sectionHeading} in ${check.file}`;
+ }
+ const parts = [];
+ if (extra.jsonFenceError) parts.push(`jsonFenceError=${extra.jsonFenceError}`);
+ if (extra.missingJsonKeys?.length) {
+ parts.push(`missingJsonKeys=${extra.missingJsonKeys.join(', ')}`);
+ }
+ if (extra.emptyJsonKeys?.length) {
+ parts.push(`emptyJsonKeys=${extra.emptyJsonKeys.join(', ')}`);
+ }
+ if (extra.missingSectionTokens?.length) {
+ parts.push(
+ `${extra.sectionHeading ?? 'section'} missing ${extra.missingSectionTokens.join(', ')}`
+ );
+ }
+ return `Structured validation failed in ${check.file}: ${parts.join('; ')}`;
+ }
+ return `Validation failed in ${check.file}`;
+}
+
+function buildFailureResult(
+ check,
+ missing,
+ forbidden,
+ orderFailure = null,
+ extra = {},
+ text = ''
+) {
+ const reason = classifyFailureReason(missing, forbidden, orderFailure, extra);
+ const message = buildFailureMessage(check, reason, missing, forbidden, extra);
+ return {
+ ruleId: check.ruleId ?? `text.${check.file}`,
+ severity: check.severity ?? 'error',
+ file: check.file,
+ status: 'fail',
+ reason,
+ message,
+ ...(missing.length > 0 ? { missing } : {}),
+ ...(forbidden.length > 0 ? { forbidden } : {}),
+ ...(forbidden.length > 0
+ ? {
+ forbiddenContext: Object.fromEntries(
+ forbidden.map((token) => [token, excerptAround(text, token)])
+ ),
+ }
+ : {}),
+ ...(orderFailure
+ ? {
+ outOfOrder: orderFailure.expected,
+ orderDetails: orderFailure,
+ }
+ : {}),
+ ...extra,
+ ...(check.suggestedFix ? { suggestedFix: check.suggestedFix } : {}),
+ };
+}
+
+function validateSingleCheck(check, text) {
+ const missing = (check.mustInclude ?? []).filter(
+ (value) => !text.includes(value)
+ );
+ const forbidden = (check.mustNotInclude ?? []).filter((value) =>
+ text.includes(value)
+ );
+ const orderFailure =
+ missing.length === 0 ? findOutOfOrder(text, check.mustAppearInOrder) : null;
+
+ const structured = validateJsonFence(check.jsonFence, text);
+ const sectionValidation = validateSectionTokens(check, text);
+
+ const hasFailure =
+ missing.length > 0 ||
+ forbidden.length > 0 ||
+ !!orderFailure ||
+ !!structured.jsonFenceError ||
+ (structured.missingJsonKeys?.length ?? 0) > 0 ||
+ (structured.emptyJsonKeys?.length ?? 0) > 0 ||
+ (sectionValidation.missingSectionTokens?.length ?? 0) > 0;
+
+ if (hasFailure) {
+ return buildFailureResult(
+ check,
+ missing,
+ forbidden,
+ orderFailure,
+ {
+ ...structured,
+ ...sectionValidation,
+ ...(check.sectionHeading
+ ? { sectionHeading: check.sectionHeading }
+ : {}),
+ },
+ text
+ );
+ }
+
+ return {
+ ruleId: check.ruleId ?? `text.${check.file}`,
+ severity: check.severity ?? 'error',
+ file: check.file,
+ status: 'pass',
+ };
+}
+
+export function validateWorkflowSkillText(checks, filesByPath) {
+ const results = [];
+ let failed = false;
+
+ for (const check of checks) {
+ const text = filesByPath[check.file];
+ if (typeof text !== 'string') {
+ failed = true;
+ results.push({
+ ruleId: check.ruleId ?? `text.${check.file}`,
+ severity: check.severity ?? 'error',
+ file: check.file,
+ status: 'error',
+ error: 'file_not_found',
+ });
+ continue;
+ }
+
+ const result = validateSingleCheck(check, text);
+ if (result.status === 'fail') failed = true;
+ results.push(result);
+ }
+
+ return {
+ ok: !failed,
+ checked: checks.length,
+ results,
+ };
+}
diff --git a/scripts/lib/workflow-skill-checks.mjs b/scripts/lib/workflow-skill-checks.mjs
new file mode 100644
index 0000000000..e89bae95a9
--- /dev/null
+++ b/scripts/lib/workflow-skill-checks.mjs
@@ -0,0 +1,995 @@
+/**
+ * Validation rules for the two-skill workflow pipeline: teach → build.
+ *
+ * Each check targets a specific file and declares required/forbidden content.
+ * The validator engine in validate-workflow-skill-files.mjs runs these checks
+ * against actual file contents.
+ */
+
+// ---------------------------------------------------------------------------
+// workflow-teach checks
+// ---------------------------------------------------------------------------
+
+export const teachChecks = [
+ {
+ ruleId: 'skill.workflow-teach',
+ file: 'skills/workflow-teach/SKILL.md',
+ mustInclude: [
+ '.workflow.md',
+ '## Project Context',
+ '## Business Rules',
+ '## External Systems',
+ '## Failure Expectations',
+ '## Observability Needs',
+ '## Approved Patterns',
+ '## Open Questions',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/context.json',
+ 'contractVersion',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-teach.interview',
+ file: 'skills/workflow-teach/SKILL.md',
+ mustInclude: [
+ 'What starts this workflow',
+ 'Which side effects must be safe to repeat',
+ 'What counts as a permanent failure',
+ 'Does any step require human approval',
+ 'What timeout or expiry rules exist',
+ 'what compensation is required',
+ 'What must operators be able to observe',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-teach.loop-position',
+ file: 'skills/workflow-teach/SKILL.md',
+ mustInclude: ['Stage 1 of 2', 'workflow-build'],
+ mustNotInclude: [
+ 'Stage 1 of 4',
+ 'workflow-design',
+ 'workflow-stress',
+ 'workflow-verify',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// workflow-build checks
+// ---------------------------------------------------------------------------
+
+export const buildChecks = [
+ {
+ ruleId: 'skill.workflow-build',
+ file: 'skills/workflow-build/SKILL.md',
+ mustInclude: [
+ '.workflow.md',
+ 'skills/workflow/SKILL.md',
+ '"use workflow"',
+ '"use step"',
+ 'createHook',
+ 'createWebhook',
+ 'getWritable',
+ 'RetryableError',
+ 'FatalError',
+ 'start()',
+ 'Determinism boundary',
+ 'Step granularity',
+ 'Idempotency keys',
+ 'Rollback',
+ 'compensation',
+ 'self-review',
+ 'Self-review',
+ ],
+ mustNotInclude: [
+ 'WorkflowBlueprint',
+ '.workflow-skills/context.json',
+ '.workflow-skills/blueprints',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-build.loop-position',
+ file: 'skills/workflow-build/SKILL.md',
+ mustInclude: ['Stage 2 of 2'],
+ mustNotInclude: ['Stage 2 of 4', 'Stage 3 of 4', 'Stage 4 of 4'],
+ },
+ {
+ ruleId: 'skill.workflow-build.stress-checklist',
+ file: 'skills/workflow-build/SKILL.md',
+ mustInclude: [
+ '### 1. Determinism boundary',
+ '### 2. Step granularity',
+ '### 3. Pass-by-value',
+ '### 4. Hook token strategy',
+ '### 5. Webhook response mode',
+ '### 6. `start()` placement',
+ '### 7. Stream I/O placement',
+ '### 8. Idempotency keys',
+ '### 9. Retry semantics',
+ '### 10. Rollback',
+ '### 11. Observability streams',
+ '### 12. Integration test coverage',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-build.hard-rules',
+ file: 'skills/workflow-build/SKILL.md',
+ mustInclude: [
+ 'Workflow functions orchestrate only',
+ 'All side effects live in',
+ '`createHook()` may use deterministic tokens',
+ '`createWebhook()` may NOT use deterministic tokens',
+ 'Stream I/O happens in steps',
+ '`start()` inside a workflow must be wrapped in a step',
+ 'Return mutated values from steps',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-build.interactive-phases',
+ file: 'skills/workflow-build/SKILL.md',
+ mustInclude: [
+ 'Phase 1',
+ 'Phase 2',
+ 'Phase 3',
+ 'Phase 4',
+ 'Phase 5',
+ 'Propose step boundaries',
+ 'Flag relevant traps',
+ 'Decide failure modes',
+ 'Write code',
+ ],
+ mustAppearInOrder: ['Phase 1', 'Phase 2', 'Phase 3', 'Phase 4', 'Phase 5'],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Teach golden checks
+// ---------------------------------------------------------------------------
+
+export const teachGoldenChecks = [
+ {
+ ruleId: 'golden.teach.approval-expiry-escalation',
+ file: 'skills/workflow-teach/goldens/approval-expiry-escalation.md',
+ mustInclude: [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Business Rules',
+ '### Failure Expectations',
+ '### Observability Needs',
+ 'workflow-build',
+ ],
+ mustNotInclude: [
+ 'context.json',
+ 'WorkflowBlueprint',
+ 'workflow-design',
+ 'workflow-stress',
+ 'workflow-verify',
+ ],
+ },
+ {
+ ruleId: 'golden.teach.duplicate-webhook-order',
+ file: 'skills/workflow-teach/goldens/duplicate-webhook-order.md',
+ mustInclude: [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Business Rules',
+ 'idempotency',
+ 'workflow-build',
+ ],
+ mustNotInclude: ['context.json', 'WorkflowBlueprint'],
+ },
+ {
+ ruleId: 'golden.teach.operator-observability-streams',
+ file: 'skills/workflow-teach/goldens/operator-observability-streams.md',
+ mustInclude: [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Observability Needs',
+ 'stream',
+ 'workflow-build',
+ ],
+ mustNotInclude: ['context.json', 'WorkflowBlueprint'],
+ },
+ {
+ ruleId: 'golden.teach.partial-side-effect-compensation',
+ file: 'skills/workflow-teach/goldens/partial-side-effect-compensation.md',
+ mustInclude: [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Failure Expectations',
+ 'compensation',
+ 'workflow-build',
+ ],
+ mustNotInclude: ['context.json', 'WorkflowBlueprint'],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Build golden checks
+// ---------------------------------------------------------------------------
+
+export const buildGoldenChecks = [
+ {
+ ruleId: 'golden.build.compensation-saga',
+ file: 'skills/workflow-build/goldens/compensation-saga.md',
+ mustInclude: [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '"use step"',
+ 'compensation',
+ 'idempotency',
+ 'refund',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+ {
+ ruleId: 'golden.build.child-workflow-handoff',
+ file: 'skills/workflow-build/goldens/child-workflow-handoff.md',
+ mustInclude: [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '## Expected Code Output',
+ '"use step"',
+ 'start()',
+ ],
+ },
+ {
+ ruleId: 'golden.build.rate-limit-retry',
+ file: 'skills/workflow-build/goldens/rate-limit-retry.md',
+ mustInclude: [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ 'RetryableError',
+ 'FatalError',
+ '429',
+ ],
+ },
+ {
+ ruleId: 'golden.build.approval-timeout-streaming',
+ file: 'skills/workflow-build/goldens/approval-timeout-streaming.md',
+ mustInclude: [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ 'getWritable',
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ ],
+ },
+ {
+ ruleId: 'golden.build.multi-event-hook-loop',
+ file: 'skills/workflow-build/goldens/multi-event-hook-loop.md',
+ mustInclude: [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ 'createHook',
+ 'Promise.all',
+ 'deterministic',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-approval
+// ---------------------------------------------------------------------------
+
+export const approvalChecks = [
+ {
+ ruleId: 'skill.workflow-approval',
+ file: 'skills/workflow-approval/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ '.workflow.md',
+ 'approval',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'deterministic',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-approval.context-capture',
+ file: 'skills/workflow-approval/SKILL.md',
+ mustInclude: [
+ 'Approval actors',
+ 'Timeout/expiry rules',
+ 'Hook token strategy',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-approval.required-constraints',
+ file: 'skills/workflow-approval/SKILL.md',
+ mustInclude: [
+ 'Deterministic hook tokens',
+ 'Expiry via `sleep()`',
+ 'Escalation behavior',
+ 'Promise.race',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-approval.test-coverage',
+ file: 'skills/workflow-approval/SKILL.md',
+ mustInclude: [
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-webhook
+// ---------------------------------------------------------------------------
+
+export const webhookChecks = [
+ {
+ ruleId: 'skill.workflow-webhook',
+ file: 'skills/workflow-webhook/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ '.workflow.md',
+ 'webhook',
+ 'duplicate',
+ 'idempotency',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-webhook.context-capture',
+ file: 'skills/workflow-webhook/SKILL.md',
+ mustInclude: [
+ 'Webhook source',
+ 'Duplicate handling',
+ 'Idempotency strategy',
+ 'Response timeout',
+ 'Compensation requirements',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-webhook.required-constraints',
+ file: 'skills/workflow-webhook/SKILL.md',
+ mustInclude: [
+ 'Duplicate-delivery handling',
+ 'Stable idempotency keys',
+ 'Webhook response mode',
+ 'static',
+ 'manual',
+ 'Compensation when downstream steps fail',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-webhook.test-coverage',
+ file: 'skills/workflow-webhook/SKILL.md',
+ mustInclude: [
+ 'Happy path',
+ 'Duplicate webhook',
+ 'Compensation path',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario golden checks
+// ---------------------------------------------------------------------------
+
+export const approvalGoldenChecks = [
+ {
+ ruleId: 'golden.approval.approval-expiry-escalation',
+ file: 'skills/workflow-approval/goldens/approval-expiry-escalation.md',
+ mustInclude: [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+];
+
+export const webhookGoldenChecks = [
+ {
+ ruleId: 'golden.webhook.duplicate-webhook-order',
+ file: 'skills/workflow-webhook/goldens/duplicate-webhook-order.md',
+ mustInclude: [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'duplicate',
+ 'idempotency',
+ 'compensation',
+ 'refund',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-saga
+// ---------------------------------------------------------------------------
+
+export const sagaChecks = [
+ {
+ ruleId: 'skill.workflow-saga',
+ file: 'skills/workflow-saga/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ '.workflow.md',
+ 'compensation',
+ 'partial',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-saga.context-capture',
+ file: 'skills/workflow-saga/SKILL.md',
+ mustInclude: [
+ 'Side-effecting steps',
+ 'Compensation ordering',
+ 'Compensation idempotency',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-saga.required-constraints',
+ file: 'skills/workflow-saga/SKILL.md',
+ mustInclude: [
+ 'Compensation for every irreversible step',
+ 'Compensation ordering',
+ 'Compensation idempotency keys',
+ 'Compensation must eventually succeed',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-saga.test-coverage',
+ file: 'skills/workflow-saga/SKILL.md',
+ mustInclude: [
+ 'Happy path',
+ 'Compensation path',
+ 'Compensation idempotency',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-timeout
+// ---------------------------------------------------------------------------
+
+export const timeoutChecks = [
+ {
+ ruleId: 'skill.workflow-timeout',
+ file: 'skills/workflow-timeout/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ '.workflow.md',
+ 'sleep',
+ 'waitForSleep',
+ 'wakeUp',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-timeout.context-capture',
+ file: 'skills/workflow-timeout/SKILL.md',
+ mustInclude: [
+ 'Timeout triggers',
+ 'Timeout outcomes',
+ 'Sleep/wake-up pairing',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-timeout.required-constraints',
+ file: 'skills/workflow-timeout/SKILL.md',
+ mustInclude: [
+ 'Every suspension must have a bounded lifetime',
+ 'Sleep/wake-up correctness',
+ 'Hook/sleep races',
+ 'Promise.race',
+ 'Timeout as a domain outcome',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-timeout.test-coverage',
+ file: 'skills/workflow-timeout/SKILL.md',
+ mustInclude: [
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario golden checks: workflow-saga
+// ---------------------------------------------------------------------------
+
+export const sagaGoldenChecks = [
+ {
+ ruleId: 'golden.saga.compensation-saga',
+ file: 'skills/workflow-saga/goldens/compensation-saga.md',
+ mustInclude: [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'compensation',
+ 'refund',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario golden checks: workflow-timeout
+// ---------------------------------------------------------------------------
+
+export const timeoutGoldenChecks = [
+ {
+ ruleId: 'golden.timeout.approval-timeout-streaming',
+ file: 'skills/workflow-timeout/goldens/approval-timeout-streaming.md',
+ mustInclude: [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'sleep',
+ 'waitForSleep',
+ 'wakeUp',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-idempotency
+// ---------------------------------------------------------------------------
+
+export const idempotencyChecks = [
+ {
+ ruleId: 'skill.workflow-idempotency',
+ file: 'skills/workflow-idempotency/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ '.workflow.md',
+ 'duplicate',
+ 'retry',
+ 'idempotency',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-idempotency.context-capture',
+ file: 'skills/workflow-idempotency/SKILL.md',
+ mustInclude: [
+ 'Duplicate ingress',
+ 'Replay safety',
+ 'Idempotency key strategy',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-idempotency.required-constraints',
+ file: 'skills/workflow-idempotency/SKILL.md',
+ mustInclude: [
+ 'Duplicate delivery detection',
+ 'Stable idempotency keys',
+ 'Replay safety verification',
+ 'Compensation with idempotency keys',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-idempotency.test-coverage',
+ file: 'skills/workflow-idempotency/SKILL.md',
+ mustInclude: [
+ 'Happy path',
+ 'Duplicate event',
+ 'Replay safety',
+ 'Compensation path',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-observe
+// ---------------------------------------------------------------------------
+
+export const observeChecks = [
+ {
+ ruleId: 'skill.workflow-observe',
+ file: 'skills/workflow-observe/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ '.workflow.md',
+ 'stream',
+ 'namespace',
+ 'operator',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-observe.context-capture',
+ file: 'skills/workflow-observe/SKILL.md',
+ mustInclude: [
+ 'Operator audience',
+ 'Progress granularity',
+ 'Stream namespaces',
+ 'Terminal signals',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-observe.required-constraints',
+ file: 'skills/workflow-observe/SKILL.md',
+ mustInclude: [
+ 'Stream namespace separation',
+ 'Stream I/O placement',
+ 'Structured stream events',
+ 'Terminal signals',
+ 'Operator-queryable state',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-observe.test-coverage',
+ file: 'skills/workflow-observe/SKILL.md',
+ mustInclude: [
+ 'Happy path with stream verification',
+ 'Failure path with terminal signal',
+ 'Namespace isolation',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario golden checks: workflow-idempotency
+// ---------------------------------------------------------------------------
+
+export const idempotencyGoldenChecks = [
+ {
+ ruleId: 'golden.idempotency.duplicate-webhook-order',
+ file: 'skills/workflow-idempotency/goldens/duplicate-webhook-order.md',
+ mustInclude: [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'duplicate',
+ 'idempotency',
+ 'compensation',
+ 'refund',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Scenario golden checks: workflow-observe
+// ---------------------------------------------------------------------------
+
+export const observeGoldenChecks = [
+ {
+ ruleId: 'golden.observe.operator-observability-streams',
+ file: 'skills/workflow-observe/goldens/operator-observability-streams.md',
+ mustInclude: [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'stream',
+ 'namespace',
+ 'operator',
+ '## Verification Artifact',
+ '### Verification Summary',
+ 'verification_plan_ready',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ ],
+ jsonFence: {
+ language: 'json',
+ requiredKeys: [
+ 'contractVersion',
+ 'blueprintName',
+ 'files',
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ nonEmptyKeys: ['files', 'testMatrix', 'runtimeCommands'],
+ },
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: [
+ 'testMatrix',
+ 'runtimeCommands',
+ 'implementationNotes',
+ ],
+ suggestedFix:
+ 'Inside `## Verification Artifact`, add a fenced `json` block containing `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`. Immediately after the fence, add `### Verification Summary` followed by a single-line `{"event":"verification_plan_ready",...}` JSON object.',
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Review skill checks: workflow-audit
+// ---------------------------------------------------------------------------
+
+export const auditChecks = [
+ {
+ ruleId: 'skill.workflow-audit',
+ file: 'skills/workflow-audit/SKILL.md',
+ mustInclude: [
+ 'user-invocable: true',
+ 'argument-hint:',
+ 'skills/workflow/SKILL.md',
+ '.workflow.md',
+ '## Audit Scorecard',
+ '## Executive Summary',
+ '## Detailed Findings by Severity',
+ '## Systemic Risks',
+ '## Positive Findings',
+ '## Audit Summary',
+ 'workflow_audit_complete',
+ 'P0 Blocking',
+ 'P1 Major',
+ 'P2 Minor',
+ 'P3 Polish',
+ ],
+ mustNotInclude: [
+ '.workflow-skills/',
+ 'WorkflowBlueprint',
+ 'verification_plan_ready',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-audit.checklist',
+ file: 'skills/workflow-audit/SKILL.md',
+ mustInclude: [
+ 'Determinism boundary',
+ 'Step granularity',
+ 'Pass-by-value / serialization',
+ 'Hook token strategy',
+ 'Webhook response mode',
+ '`start()` placement',
+ 'Stream I/O placement',
+ 'Idempotency keys',
+ 'Retry semantics',
+ 'Rollback / compensation',
+ 'Observability streams',
+ 'Integration test coverage',
+ ],
+ },
+ {
+ ruleId: 'skill.workflow-audit.summary-contract',
+ file: 'skills/workflow-audit/SKILL.md',
+ mustInclude: [
+ '"event":"workflow_audit_complete"',
+ '"maxScore":48',
+ '"contractVersion":"1"',
+ ],
+ },
+];
+
+// ---------------------------------------------------------------------------
+// Aggregated check lists
+// ---------------------------------------------------------------------------
+
+export const checks = [...teachChecks, ...buildChecks, ...approvalChecks, ...webhookChecks, ...sagaChecks, ...timeoutChecks, ...idempotencyChecks, ...observeChecks, ...auditChecks];
+
+export const allGoldenChecks = [...teachGoldenChecks, ...buildGoldenChecks, ...approvalGoldenChecks, ...webhookGoldenChecks, ...sagaGoldenChecks, ...timeoutGoldenChecks, ...idempotencyGoldenChecks, ...observeGoldenChecks];
diff --git a/scripts/lib/workflow-skill-surface.mjs b/scripts/lib/workflow-skill-surface.mjs
new file mode 100644
index 0000000000..f76e54df8a
--- /dev/null
+++ b/scripts/lib/workflow-skill-surface.mjs
@@ -0,0 +1,86 @@
+/**
+ * workflow-skill-surface.mjs
+ *
+ * Canonical source of truth for the workflow skill surface.
+ * Both the builder and test suites import from here so scenario
+ * inventory, install directory count, and total output math
+ * are defined in exactly one place.
+ */
+
+export const CORE_SKILLS = ['workflow', 'workflow-teach', 'workflow-build'];
+
+export const OPTIONAL_SKILLS = ['workflow-init', 'workflow-audit'];
+
+export const SCENARIO_SKILLS = [
+ 'workflow-approval',
+ 'workflow-webhook',
+ 'workflow-saga',
+ 'workflow-timeout',
+ 'workflow-idempotency',
+ 'workflow-observe',
+];
+
+export const USER_INVOKABLE_SKILLS = [
+ ...SCENARIO_SKILLS,
+ 'workflow-audit',
+];
+
+/**
+ * Summarize the discovered skill surface for structured logging,
+ * --check output, and test assertions.
+ *
+ * @param {Array<{dir: string, goldens: string[]}>} skills — discovered skills
+ * @param {string[] | Record} providers — provider names or map
+ * @returns {{
+ * core: string[],
+ * scenario: string[],
+ * optional: string[],
+ * discovered: string[],
+ * counts: {
+ * core: number,
+ * scenarios: number,
+ * optional: number,
+ * skills: number,
+ * installDirectories: number,
+ * goldensPerProvider: number,
+ * providers: number,
+ * outputsPerProvider: number,
+ * totalOutputs: number,
+ * }
+ * }}
+ */
+export function summarizeSkillSurface(skills, providers) {
+ const providerNames = Array.isArray(providers)
+ ? providers
+ : Object.keys(providers);
+ const discovered = skills.map((skill) => skill.dir);
+ const discoveredSet = new Set(discovered);
+
+ const core = CORE_SKILLS.filter((name) => discoveredSet.has(name));
+ const scenario = SCENARIO_SKILLS.filter((name) => discoveredSet.has(name));
+ const optional = OPTIONAL_SKILLS.filter((name) => discoveredSet.has(name));
+
+ const goldensPerProvider = skills.reduce(
+ (sum, skill) => sum + skill.goldens.length,
+ 0,
+ );
+
+ return {
+ core,
+ scenario,
+ optional,
+ discovered,
+ counts: {
+ core: core.length,
+ scenarios: scenario.length,
+ optional: optional.length,
+ skills: discovered.length,
+ installDirectories: discovered.length,
+ goldensPerProvider,
+ providers: providerNames.length,
+ outputsPerProvider: discovered.length + goldensPerProvider,
+ totalOutputs:
+ providerNames.length * (discovered.length + goldensPerProvider),
+ },
+ };
+}
diff --git a/scripts/validate-workflow-skill-files.mjs b/scripts/validate-workflow-skill-files.mjs
new file mode 100644
index 0000000000..60cc48a7a3
--- /dev/null
+++ b/scripts/validate-workflow-skill-files.mjs
@@ -0,0 +1,90 @@
+import { readFileSync, existsSync } from 'node:fs';
+import { validateWorkflowSkillText } from './lib/validate-workflow-skill-files.mjs';
+import { checks, allGoldenChecks } from './lib/workflow-skill-checks.mjs';
+
+const SUMMARY_ONLY = process.argv.includes('--summary');
+const allChecks = [...checks, ...allGoldenChecks];
+
+function log(event, data = {}) {
+ process.stderr.write(
+ `${JSON.stringify({ event, ts: new Date().toISOString(), ...data })}\n`
+ );
+}
+
+log('manifest_loaded', {
+ skillChecks: checks.length,
+ goldenChecks: allGoldenChecks.length,
+ total: allChecks.length,
+});
+
+const filesByPath = {};
+let loadedFiles = 0;
+for (const check of allChecks) {
+ if (filesByPath[check.file]) continue;
+ if (!existsSync(check.file)) continue;
+ filesByPath[check.file] = readFileSync(check.file, 'utf8');
+ loadedFiles += 1;
+}
+log('files_loaded', { count: loadedFiles });
+
+const result = validateWorkflowSkillText(allChecks, filesByPath);
+
+for (const item of result.results) {
+ log('check_evaluated', {
+ ruleId: item.ruleId,
+ file: item.file,
+ status: item.status,
+ reason: item.reason ?? null,
+ });
+}
+
+const summary = result.results.reduce(
+ (acc, item) => {
+ acc[item.status] = (acc[item.status] ?? 0) + 1;
+ if (item.outOfOrder) {
+ acc.outOfOrder = (acc.outOfOrder ?? 0) + 1;
+ }
+ if (item.reason) {
+ acc.reasons[item.reason] = (acc.reasons[item.reason] ?? 0) + 1;
+ }
+ return acc;
+ },
+ { pass: 0, fail: 0, error: 0, outOfOrder: 0, reasons: {} }
+);
+
+const output = {
+ ...result,
+ summary,
+};
+
+function buildCompletionEvent(result, summary) {
+ return {
+ event: 'workflow_skill_validation_complete',
+ ok: result.ok,
+ checked: result.checked,
+ pass: summary.pass,
+ fail: summary.fail,
+ error: summary.error,
+ outOfOrder: summary.outOfOrder,
+ reasonCounts: summary.reasons,
+ };
+}
+
+const completion = buildCompletionEvent(result, summary);
+
+log('workflow_skill_validation_complete', {
+ ok: completion.ok,
+ checked: completion.checked,
+ pass: completion.pass,
+ fail: completion.fail,
+ error: completion.error,
+ outOfOrder: completion.outOfOrder,
+ reasonCounts: completion.reasonCounts,
+});
+
+process.stdout.write(
+ SUMMARY_ONLY
+ ? `${JSON.stringify(completion)}\n`
+ : `${JSON.stringify(output, null, 2)}\n`
+);
+process.exit(result.ok ? 0 : 1);
diff --git a/scripts/validate-workflow-skill-files.test.mjs b/scripts/validate-workflow-skill-files.test.mjs
new file mode 100644
index 0000000000..9eb6fa6e51
--- /dev/null
+++ b/scripts/validate-workflow-skill-files.test.mjs
@@ -0,0 +1,1177 @@
+import { readFileSync } from 'node:fs';
+import { describe, expect, it } from 'vitest';
+import { validateWorkflowSkillText } from './lib/validate-workflow-skill-files.mjs';
+import {
+ checks,
+ allGoldenChecks,
+ teachChecks,
+ buildChecks,
+ teachGoldenChecks,
+ buildGoldenChecks,
+ approvalChecks,
+ webhookChecks,
+ approvalGoldenChecks,
+ webhookGoldenChecks,
+} from './lib/workflow-skill-checks.mjs';
+
+function runSingleCheck(check, content) {
+ return validateWorkflowSkillText([check], {
+ [check.file]: content,
+ });
+}
+
+// ---------------------------------------------------------------------------
+// Validator engine tests
+// ---------------------------------------------------------------------------
+
+describe('validateWorkflowSkillText', () => {
+ it('returns ok:true when all required tokens are present', () => {
+ const result = runSingleCheck(
+ { ruleId: 'test', file: 'test.md', mustInclude: ['foo', 'bar'] },
+ 'foo bar baz'
+ );
+ expect(result.ok).toBe(true);
+ });
+
+ it('returns ok:false when required tokens are missing', () => {
+ const result = runSingleCheck(
+ { ruleId: 'test', file: 'test.md', mustInclude: ['foo', 'missing'] },
+ 'foo bar baz'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('missing_required_content');
+ expect(result.results[0].missing).toContain('missing');
+ });
+
+ it('returns ok:false when forbidden tokens are present', () => {
+ const result = runSingleCheck(
+ { ruleId: 'test', file: 'test.md', mustNotInclude: ['bad'] },
+ 'something bad here'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('forbidden_content_present');
+ expect(result.results[0].forbidden).toContain('bad');
+ });
+
+ it('includes forbiddenContext excerpts for forbidden-token failures', () => {
+ const result = runSingleCheck(
+ { ruleId: 'test', file: 'test.md', mustNotInclude: ['bad token'] },
+ 'some text before bad token some text after'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbiddenContext).toBeDefined();
+ expect(result.results[0].forbiddenContext['bad token']).toContain(
+ 'bad token'
+ );
+ });
+
+ it('returns ok:false when tokens appear out of order', () => {
+ const result = runSingleCheck(
+ {
+ ruleId: 'test',
+ file: 'test.md',
+ mustInclude: ['alpha', 'beta'],
+ mustAppearInOrder: ['alpha', 'beta'],
+ },
+ 'beta comes before alpha here'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('content_out_of_order');
+ });
+
+ it('returns ok:true when tokens appear in order', () => {
+ const result = runSingleCheck(
+ {
+ ruleId: 'test',
+ file: 'test.md',
+ mustInclude: ['alpha', 'beta'],
+ mustAppearInOrder: ['alpha', 'beta'],
+ },
+ 'alpha comes before beta here'
+ );
+ expect(result.ok).toBe(true);
+ });
+
+ it('returns error when file is not found', () => {
+ const result = validateWorkflowSkillText(
+ [{ ruleId: 'test', file: 'missing.md', mustInclude: ['foo'] }],
+ {}
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].status).toBe('error');
+ expect(result.results[0].error).toBe('file_not_found');
+ });
+});
+
+// ---------------------------------------------------------------------------
+// workflow-teach SKILL.md checks
+// ---------------------------------------------------------------------------
+
+describe('workflow-teach SKILL.md validation', () => {
+ it('requires .workflow.md output reference', () => {
+ const check = teachChecks.find((c) => c.ruleId === 'skill.workflow-teach');
+ const result = runSingleCheck(
+ check,
+ 'Some skill that outputs context.json and does not mention the markdown file'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('.workflow.md');
+ });
+
+ it('rejects stale context.json references', () => {
+ const check = teachChecks.find((c) => c.ruleId === 'skill.workflow-teach');
+ const content = [
+ '.workflow.md',
+ '## Project Context',
+ '## Business Rules',
+ '## External Systems',
+ '## Failure Expectations',
+ '## Observability Needs',
+ '## Approved Patterns',
+ '## Open Questions',
+ '.workflow-skills/context.json',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain(
+ '.workflow-skills/context.json'
+ );
+ });
+
+ it('requires all 7 interview questions', () => {
+ const check = teachChecks.find(
+ (c) => c.ruleId === 'skill.workflow-teach.interview'
+ );
+ const result = runSingleCheck(check, 'Empty skill with no interview');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing.length).toBe(7);
+ });
+
+ it('requires Stage 1 of 2 loop position', () => {
+ const check = teachChecks.find(
+ (c) => c.ruleId === 'skill.workflow-teach.loop-position'
+ );
+ const result = runSingleCheck(check, 'Stage 1 of 4 workflow-design');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain('Stage 1 of 4');
+ });
+
+ it('rejects references to deleted skills', () => {
+ const check = teachChecks.find(
+ (c) => c.ruleId === 'skill.workflow-teach.loop-position'
+ );
+ const result = runSingleCheck(
+ check,
+ 'Stage 1 of 2 workflow-build workflow-design'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain('workflow-design');
+ });
+});
+
+// ---------------------------------------------------------------------------
+// workflow-build SKILL.md checks
+// ---------------------------------------------------------------------------
+
+describe('workflow-build SKILL.md validation', () => {
+ it('requires .workflow.md input reference', () => {
+ const check = buildChecks.find((c) => c.ruleId === 'skill.workflow-build');
+ const result = runSingleCheck(check, 'A skill that reads nothing');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('.workflow.md');
+ });
+
+ it('rejects stale WorkflowBlueprint references', () => {
+ const check = buildChecks.find((c) => c.ruleId === 'skill.workflow-build');
+ const allRequired = check.mustInclude.join('\n');
+ const result = runSingleCheck(check, allRequired + '\nWorkflowBlueprint');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain('WorkflowBlueprint');
+ });
+
+ it('requires all 12 stress checklist items', () => {
+ const check = buildChecks.find(
+ (c) => c.ruleId === 'skill.workflow-build.stress-checklist'
+ );
+ const result = runSingleCheck(
+ check,
+ '### 1. Determinism boundary\n### 2. Step granularity'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing.length).toBe(10); // 12 - 2 present
+ });
+
+ it('requires interactive phases in order', () => {
+ const check = buildChecks.find(
+ (c) => c.ruleId === 'skill.workflow-build.interactive-phases'
+ );
+ const content = [
+ 'Phase 5',
+ 'Phase 4',
+ 'Phase 3',
+ 'Phase 2',
+ 'Phase 1',
+ 'Propose step boundaries',
+ 'Flag relevant traps',
+ 'Decide failure modes',
+ 'Write code',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('content_out_of_order');
+ });
+
+ it('passes when phases are in correct order', () => {
+ const check = buildChecks.find(
+ (c) => c.ruleId === 'skill.workflow-build.interactive-phases'
+ );
+ const content = [
+ 'Phase 1',
+ 'Propose step boundaries',
+ 'Phase 2',
+ 'Flag relevant traps',
+ 'Phase 3',
+ 'Decide failure modes',
+ 'Phase 4',
+ 'Write code',
+ 'Phase 5',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(true);
+ });
+
+ it('requires Stage 2 of 2 loop position', () => {
+ const check = buildChecks.find(
+ (c) => c.ruleId === 'skill.workflow-build.loop-position'
+ );
+ const result = runSingleCheck(check, 'Stage 2 of 4');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain('Stage 2 of 4');
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Teach golden checks
+// ---------------------------------------------------------------------------
+
+describe('teach golden validation', () => {
+ it('requires .workflow.md sections in teach goldens', () => {
+ const check = teachGoldenChecks.find(
+ (c) => c.ruleId === 'golden.teach.approval-expiry-escalation'
+ );
+ const result = runSingleCheck(check, '## Interview Context\nSome content');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain(
+ '## Expected `.workflow.md` Sections'
+ );
+ });
+
+ it('rejects teach goldens referencing context.json', () => {
+ const check = teachGoldenChecks.find(
+ (c) => c.ruleId === 'golden.teach.approval-expiry-escalation'
+ );
+ const content = [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Business Rules',
+ '### Failure Expectations',
+ '### Observability Needs',
+ 'workflow-build',
+ 'context.json',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain('context.json');
+ });
+
+ it('rejects teach goldens referencing deleted skills', () => {
+ const check = teachGoldenChecks.find(
+ (c) => c.ruleId === 'golden.teach.approval-expiry-escalation'
+ );
+ const content = [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Business Rules',
+ '### Failure Expectations',
+ '### Observability Needs',
+ 'workflow-build',
+ 'workflow-design',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].forbidden).toContain('workflow-design');
+ });
+
+ it('passes valid teach golden', () => {
+ const check = teachGoldenChecks.find(
+ (c) => c.ruleId === 'golden.teach.duplicate-webhook-order'
+ );
+ const content = [
+ '## Interview Context',
+ '## Expected `.workflow.md` Sections',
+ '### Business Rules',
+ 'idempotency key',
+ 'workflow-build is the next step',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Build golden checks
+// ---------------------------------------------------------------------------
+
+describe('build golden validation', () => {
+ it('requires phase documentation in build goldens', () => {
+ const check = buildGoldenChecks.find(
+ (c) => c.ruleId === 'golden.build.compensation-saga'
+ );
+ const result = runSingleCheck(check, '## Expected Code Output\n"use step"');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain(
+ '## What the Build Skill Should Catch'
+ );
+ });
+
+ it('requires code output in build goldens', () => {
+ const check = buildGoldenChecks.find(
+ (c) => c.ruleId === 'golden.build.compensation-saga'
+ );
+ const result = runSingleCheck(
+ check,
+ '## What the Build Skill Should Catch\n### Phase 2\n### Phase 3'
+ );
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('## Expected Code Output');
+ });
+
+ it('requires test output in streaming golden', () => {
+ const check = buildGoldenChecks.find(
+ (c) => c.ruleId === 'golden.build.approval-timeout-streaming'
+ );
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '## Expected Code Output',
+ 'getWritable',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('## Expected Test Output');
+ });
+
+ it('requires specific API tokens in streaming golden', () => {
+ const check = buildGoldenChecks.find(
+ (c) => c.ruleId === 'golden.build.approval-timeout-streaming'
+ );
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ 'getWritable',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ // Should require test helpers
+ expect(result.results[0].missing).toEqual(
+ expect.arrayContaining([
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ ])
+ );
+ });
+
+ it('requires Promise.all in multi-event-hook-loop golden', () => {
+ const check = buildGoldenChecks.find(
+ (c) => c.ruleId === 'golden.build.multi-event-hook-loop'
+ );
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ 'createHook',
+ 'deterministic token',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('Promise.all');
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Verification artifact schema checks
+// ---------------------------------------------------------------------------
+
+describe('verification artifact schema enforcement', () => {
+ const compensationCheck = buildGoldenChecks.find(
+ (c) => c.ruleId === 'golden.build.compensation-saga'
+ );
+
+ it('fails with structured_validation_failed when testMatrix is missing from JSON', () => {
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '"use step"',
+ 'compensation',
+ 'idempotency',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'compensation-saga',
+ files: [{ kind: 'workflow', path: 'workflows/order-fulfillment.ts' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['some note'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"compensation-saga","fileCount":1,"testCount":0,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(compensationCheck, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('structured_validation_failed');
+ expect(result.results[0].missingJsonKeys).toContain('testMatrix');
+ });
+
+ it('fails when testMatrix is present but empty', () => {
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '"use step"',
+ 'compensation',
+ 'idempotency',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'compensation-saga',
+ files: [{ kind: 'workflow', path: 'workflows/order-fulfillment.ts' }],
+ testMatrix: [],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['some note'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"compensation-saga","fileCount":1,"testCount":0,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(compensationCheck, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('structured_validation_failed');
+ expect(result.results[0].emptyJsonKeys).toContain('testMatrix');
+ });
+
+ it('fails when verification_plan_ready summary line is missing', () => {
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '"use step"',
+ 'compensation',
+ 'idempotency',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'compensation-saga',
+ files: [{ kind: 'workflow', path: 'workflows/order-fulfillment.ts' }],
+ testMatrix: [{ name: 'happy-path', helpers: [], expects: 'pass' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['some note'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ 'No summary here',
+ ].join('\n');
+ const result = runSingleCheck(compensationCheck, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('verification_plan_ready');
+ });
+
+ it('passes when all schema requirements are met', () => {
+ const content = [
+ '## What the Build Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '"use step"',
+ 'compensation',
+ 'idempotency',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'compensation-saga',
+ files: [{ kind: 'workflow', path: 'workflows/order-fulfillment.ts' }],
+ testMatrix: [{ name: 'happy-path', helpers: [], expects: 'pass' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['Operator signal: log compensation.triggered'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"compensation-saga","fileCount":1,"testCount":1,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(compensationCheck, content);
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Regression: extractSection must stop at sibling headings
+// ---------------------------------------------------------------------------
+
+describe('extractSection scoping', () => {
+ it('fails when a required token exists only after the target section ends', () => {
+ // "testMatrix" appears under "## Other Section", NOT under "## Verification Artifact"
+ const content = [
+ '## Verification Artifact',
+ '',
+ '```json',
+ '{"contractVersion":"1"}',
+ '```',
+ '',
+ '## Other Section',
+ '',
+ 'testMatrix appears here but should not count',
+ ].join('\n');
+
+ const check = {
+ ruleId: 'test.section-scope',
+ file: 'test.md',
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: ['testMatrix'],
+ };
+
+ const result = validateWorkflowSkillText([check], { 'test.md': content });
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missingSectionTokens).toContain('testMatrix');
+ });
+
+ it('passes when the required token is inside the target section', () => {
+ const content = [
+ '## Verification Artifact',
+ '',
+ 'testMatrix is right here',
+ '',
+ '## Other Section',
+ '',
+ 'unrelated content',
+ ].join('\n');
+
+ const check = {
+ ruleId: 'test.section-scope',
+ file: 'test.md',
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: ['testMatrix'],
+ };
+
+ const result = validateWorkflowSkillText([check], { 'test.md': content });
+ expect(result.ok).toBe(true);
+ });
+
+ it('subsection headings do not terminate the parent section', () => {
+ const content = [
+ '## Verification Artifact',
+ '',
+ '### Verification Summary',
+ '',
+ 'testMatrix lives in a subsection',
+ '',
+ '## Next Top-Level Section',
+ ].join('\n');
+
+ const check = {
+ ruleId: 'test.section-scope',
+ file: 'test.md',
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: ['testMatrix'],
+ };
+
+ const result = validateWorkflowSkillText([check], { 'test.md': content });
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-approval
+// ---------------------------------------------------------------------------
+
+describe('workflow-approval SKILL.md validation', () => {
+ it('fails when user-invocable frontmatter is missing', () => {
+ const check = approvalChecks.find(
+ (c) => c.ruleId === 'skill.workflow-approval'
+ );
+ const content = [
+ 'argument-hint: describe the approval',
+ '.workflow.md',
+ 'approval',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'deterministic',
+ 'verification_plan_ready',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('missing_required_content');
+ expect(result.results[0].missing).toContain('user-invocable: true');
+ });
+
+ it('fails when Promise.race constraint is missing', () => {
+ const check = approvalChecks.find(
+ (c) => c.ruleId === 'skill.workflow-approval.required-constraints'
+ );
+ const content = [
+ 'Deterministic hook tokens',
+ 'Expiry via `sleep()`',
+ 'Escalation behavior',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('missing_required_content');
+ expect(result.results[0].missing).toContain('Promise.race');
+ });
+
+ it('fails when context-capture questions are missing', () => {
+ const check = approvalChecks.find(
+ (c) => c.ruleId === 'skill.workflow-approval.context-capture'
+ );
+ const result = runSingleCheck(check, 'some unrelated content');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('Approval actors');
+ expect(result.results[0].missing).toContain('Timeout/expiry rules');
+ expect(result.results[0].missing).toContain('Hook token strategy');
+ });
+
+ it('fails when test-coverage helpers are missing', () => {
+ const check = approvalChecks.find(
+ (c) => c.ruleId === 'skill.workflow-approval.test-coverage'
+ );
+ const result = runSingleCheck(check, 'waitForHook only');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('resumeHook');
+ expect(result.results[0].missing).toContain('waitForSleep');
+ expect(result.results[0].missing).toContain('wakeUp');
+ });
+
+ it('passes when all required tokens are present', () => {
+ const check = approvalChecks.find(
+ (c) => c.ruleId === 'skill.workflow-approval'
+ );
+ const content = [
+ 'user-invocable: true',
+ 'argument-hint: describe the approval',
+ '.workflow.md',
+ 'approval',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'deterministic',
+ 'verification_plan_ready',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Scenario skill checks: workflow-webhook
+// ---------------------------------------------------------------------------
+
+describe('workflow-webhook SKILL.md validation', () => {
+ it('fails when static and manual response modes are missing', () => {
+ const check = webhookChecks.find(
+ (c) => c.ruleId === 'skill.workflow-webhook.required-constraints'
+ );
+ const content = [
+ 'Duplicate-delivery handling',
+ 'Stable idempotency keys',
+ 'Webhook response mode',
+ 'Compensation when downstream steps fail',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('missing_required_content');
+ expect(result.results[0].missing).toContain('static');
+ expect(result.results[0].missing).toContain('manual');
+ });
+
+ it('fails when user-invocable frontmatter is missing', () => {
+ const check = webhookChecks.find(
+ (c) => c.ruleId === 'skill.workflow-webhook'
+ );
+ const content = [
+ 'argument-hint: describe the webhook',
+ '.workflow.md',
+ 'webhook',
+ 'duplicate',
+ 'idempotency',
+ 'verification_plan_ready',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('missing_required_content');
+ expect(result.results[0].missing).toContain('user-invocable: true');
+ });
+
+ it('fails when context-capture questions are missing', () => {
+ const check = webhookChecks.find(
+ (c) => c.ruleId === 'skill.workflow-webhook.context-capture'
+ );
+ const result = runSingleCheck(check, 'some unrelated content');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('Webhook source');
+ expect(result.results[0].missing).toContain('Duplicate handling');
+ expect(result.results[0].missing).toContain('Idempotency strategy');
+ expect(result.results[0].missing).toContain('Response timeout');
+ expect(result.results[0].missing).toContain('Compensation requirements');
+ });
+
+ it('fails when test-coverage scenarios are missing', () => {
+ const check = webhookChecks.find(
+ (c) => c.ruleId === 'skill.workflow-webhook.test-coverage'
+ );
+ const result = runSingleCheck(check, 'Happy path only');
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('Duplicate webhook');
+ expect(result.results[0].missing).toContain('Compensation path');
+ });
+
+ it('passes when all required tokens are present', () => {
+ const check = webhookChecks.find(
+ (c) => c.ruleId === 'skill.workflow-webhook'
+ );
+ const content = [
+ 'user-invocable: true',
+ 'argument-hint: describe the webhook',
+ '.workflow.md',
+ 'webhook',
+ 'duplicate',
+ 'idempotency',
+ 'verification_plan_ready',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Approval golden checks
+// ---------------------------------------------------------------------------
+
+describe('approval golden validation', () => {
+ it('fails when verification artifact JSON keys are missing', () => {
+ const check = approvalGoldenChecks.find(
+ (c) => c.ruleId === 'golden.approval.approval-expiry-escalation'
+ );
+ const content = [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'approval-expiry-escalation',
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"approval-expiry-escalation","fileCount":1,"testCount":1,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('structured_validation_failed');
+ expect(result.results[0].missingJsonKeys).toContain('files');
+ expect(result.results[0].missingJsonKeys).toContain('testMatrix');
+ expect(result.results[0].missingJsonKeys).toContain('runtimeCommands');
+ expect(result.results[0].missingJsonKeys).toContain('implementationNotes');
+ });
+
+ it('fails when verification_plan_ready summary is missing', () => {
+ const check = approvalGoldenChecks.find(
+ (c) => c.ruleId === 'golden.approval.approval-expiry-escalation'
+ );
+ const content = [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'approval-expiry-escalation',
+ files: [{ kind: 'workflow', path: 'workflows/approval.ts' }],
+ testMatrix: [{ name: 'happy-path', helpers: [], expects: 'pass' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['deterministic hook tokens'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ 'No summary here',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('verification_plan_ready');
+ });
+
+ it('passes when all approval golden requirements are met', () => {
+ const check = approvalGoldenChecks.find(
+ (c) => c.ruleId === 'golden.approval.approval-expiry-escalation'
+ );
+ const content = [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'createHook',
+ 'sleep',
+ 'escalation',
+ 'waitForHook',
+ 'resumeHook',
+ 'waitForSleep',
+ 'wakeUp',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'approval-expiry-escalation',
+ files: [{ kind: 'workflow', path: 'workflows/approval.ts' }],
+ testMatrix: [{ name: 'happy-path', helpers: [], expects: 'pass' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['deterministic hook tokens'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"approval-expiry-escalation","fileCount":1,"testCount":1,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Webhook golden checks
+// ---------------------------------------------------------------------------
+
+describe('webhook golden validation', () => {
+ it('fails when verification_plan_ready summary contract is missing', () => {
+ const check = webhookGoldenChecks.find(
+ (c) => c.ruleId === 'golden.webhook.duplicate-webhook-order'
+ );
+ const content = [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'duplicate',
+ 'idempotency',
+ 'compensation',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'duplicate-webhook-order',
+ files: [{ kind: 'workflow', path: 'workflows/webhook.ts' }],
+ testMatrix: [{ name: 'happy-path', helpers: [], expects: 'pass' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['stable idempotency keys'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ 'No structured summary here',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].missing).toContain('verification_plan_ready');
+ });
+
+ it('fails when verification artifact JSON keys are missing', () => {
+ const check = webhookGoldenChecks.find(
+ (c) => c.ruleId === 'golden.webhook.duplicate-webhook-order'
+ );
+ const content = [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'duplicate',
+ 'idempotency',
+ 'compensation',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'duplicate-webhook-order',
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"duplicate-webhook-order","fileCount":1,"testCount":1,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('structured_validation_failed');
+ expect(result.results[0].missingJsonKeys).toContain('files');
+ expect(result.results[0].missingJsonKeys).toContain('testMatrix');
+ });
+
+ it('passes when all webhook golden requirements are met', () => {
+ const check = webhookGoldenChecks.find(
+ (c) => c.ruleId === 'golden.webhook.duplicate-webhook-order'
+ );
+ const content = [
+ '## Context Capture',
+ '## What the Scenario Skill Should Catch',
+ '### Phase 2',
+ '### Phase 3',
+ '## Expected Code Output',
+ '## Expected Test Output',
+ '"use step"',
+ 'duplicate',
+ 'idempotency',
+ 'compensation',
+ 'refund',
+ '## Verification Artifact',
+ '',
+ '```json',
+ JSON.stringify({
+ contractVersion: '1',
+ blueprintName: 'duplicate-webhook-order',
+ files: [{ kind: 'workflow', path: 'workflows/webhook.ts' }],
+ testMatrix: [{ name: 'happy-path', helpers: [], expects: 'pass' }],
+ runtimeCommands: [{ name: 'test', command: 'pnpm test', expects: 'pass' }],
+ implementationNotes: ['stable idempotency keys'],
+ }),
+ '```',
+ '',
+ '### Verification Summary',
+ '',
+ '{"event":"verification_plan_ready","blueprintName":"duplicate-webhook-order","fileCount":1,"testCount":1,"runtimeCommandCount":1,"contractVersion":"1"}',
+ ].join('\n');
+ const result = runSingleCheck(check, content);
+ expect(result.ok).toBe(true);
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Actionable failure messages and suggestedFix
+// ---------------------------------------------------------------------------
+
+describe('actionable failure messages', () => {
+ it('includes a human-readable message for missing_required_content', () => {
+ const result = runSingleCheck(
+ { ruleId: 'test', file: 'test.md', mustInclude: ['foo', 'bar'] },
+ 'baz'
+ );
+ expect(result.results[0].message).toBe(
+ 'Missing required content in test.md: foo, bar'
+ );
+ });
+
+ it('includes a human-readable message for forbidden_content_present', () => {
+ const result = runSingleCheck(
+ { ruleId: 'test', file: 'test.md', mustNotInclude: ['bad'] },
+ 'something bad here'
+ );
+ expect(result.results[0].message).toBe(
+ 'Forbidden content present in test.md: bad'
+ );
+ });
+
+ it('includes a human-readable message for content_out_of_order', () => {
+ const result = runSingleCheck(
+ {
+ ruleId: 'test',
+ file: 'test.md',
+ mustInclude: ['alpha', 'beta'],
+ mustAppearInOrder: ['alpha', 'beta'],
+ },
+ 'beta comes before alpha here'
+ );
+ expect(result.results[0].message).toBe(
+ 'Content appears out of order in test.md'
+ );
+ });
+
+ it('includes an actionable message and suggestedFix for structured validation failures', () => {
+ const check = {
+ ruleId: 'test.verification-artifact',
+ file: 'test.md',
+ sectionHeading: '## Verification Artifact',
+ mustIncludeWithinSection: ['testMatrix'],
+ suggestedFix: 'Add `testMatrix` inside `## Verification Artifact`.',
+ };
+ const content = [
+ '## Verification Artifact',
+ '',
+ '```json',
+ '{"contractVersion":"1"}',
+ '```',
+ ].join('\n');
+
+ const result = validateWorkflowSkillText([check], { 'test.md': content });
+ expect(result.ok).toBe(false);
+ expect(result.results[0].reason).toBe('structured_validation_failed');
+ expect(result.results[0].message).toContain('Structured validation failed');
+ expect(result.results[0].suggestedFix).toContain('Add `testMatrix`');
+ });
+
+ it('golden checks with verification artifacts have suggestedFix', () => {
+ const goldenRuleIds = [
+ 'golden.build.compensation-saga',
+ 'golden.approval.approval-expiry-escalation',
+ 'golden.webhook.duplicate-webhook-order',
+ ];
+ const allChecksFlat = [...buildGoldenChecks, ...approvalGoldenChecks, ...webhookGoldenChecks];
+ for (const ruleId of goldenRuleIds) {
+ const check = allChecksFlat.find((c) => c.ruleId === ruleId);
+ expect(check.suggestedFix).toBeDefined();
+ expect(check.suggestedFix).toContain('Verification Artifact');
+ }
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Regression: stale 4-stage pipeline references
+// ---------------------------------------------------------------------------
+
+describe('stale reference regression', () => {
+ it('teach skill must not reference workflow-design', () => {
+ const check = teachChecks.find(
+ (c) => c.ruleId === 'skill.workflow-teach.loop-position'
+ );
+ expect(check.mustNotInclude).toContain('workflow-design');
+ expect(check.mustNotInclude).toContain('workflow-stress');
+ expect(check.mustNotInclude).toContain('workflow-verify');
+ });
+
+ it('build skill must not reference WorkflowBlueprint', () => {
+ const check = buildChecks.find((c) => c.ruleId === 'skill.workflow-build');
+ expect(check.mustNotInclude).toContain('WorkflowBlueprint');
+ expect(check.mustNotInclude).toContain('.workflow-skills/context.json');
+ });
+
+ it('teach goldens must not reference context.json', () => {
+ for (const check of teachGoldenChecks) {
+ expect(check.mustNotInclude).toContain('context.json');
+ }
+ });
+});
+
+// ---------------------------------------------------------------------------
+// Live validation against actual files
+// ---------------------------------------------------------------------------
+
+describe('live validation against actual skill files', () => {
+ const allChecksFlat = [...checks, ...allGoldenChecks];
+
+ const filesByPath = {};
+ for (const check of allChecksFlat) {
+ if (filesByPath[check.file]) continue;
+ try {
+ filesByPath[check.file] = readFileSync(check.file, 'utf8');
+ } catch {
+ // File not found — the validator will catch this
+ }
+ }
+
+ it('all skill checks pass against actual files', () => {
+ const result = validateWorkflowSkillText(checks, filesByPath);
+ for (const item of result.results) {
+ if (item.status !== 'pass') {
+ throw new Error(
+ `Rule ${item.ruleId} failed: ${JSON.stringify(item, null, 2)}`
+ );
+ }
+ }
+ expect(result.ok).toBe(true);
+ });
+
+ it('all golden checks pass against actual files', () => {
+ const result = validateWorkflowSkillText(allGoldenChecks, filesByPath);
+ for (const item of result.results) {
+ if (item.status !== 'pass') {
+ throw new Error(
+ `Rule ${item.ruleId} failed: ${JSON.stringify(item, null, 2)}`
+ );
+ }
+ }
+ expect(result.ok).toBe(true);
+ });
+
+ it('total check count is 27', () => {
+ expect(allChecksFlat.length).toBe(27);
+ });
+});
diff --git a/scripts/verify-workflow-skill-goldens.mjs b/scripts/verify-workflow-skill-goldens.mjs
new file mode 100644
index 0000000000..d2a8eedda0
--- /dev/null
+++ b/scripts/verify-workflow-skill-goldens.mjs
@@ -0,0 +1,324 @@
+#!/usr/bin/env node
+
+/**
+ * Runtime verifier for workflow skill golden fixtures.
+ *
+ * Discovers phase-1 fixture specs, materializes each fixture, validates
+ * extracted files against the verification artifact, then runs typecheck
+ * and integration tests. Emits JSONL checkpoints to stdout.
+ *
+ * Usage:
+ * node scripts/verify-workflow-skill-goldens.mjs
+ *
+ * Exits 0 when all fixtures pass. Exits 1 on any failure.
+ * Machine-readable: every stdout line is a JSON object with a stable `event` field.
+ */
+
+import { execFileSync } from 'node:child_process';
+import { existsSync, readdirSync, readFileSync } from 'node:fs';
+import { dirname, join, resolve } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const repoRoot = resolve(__dirname, '..');
+const fixturesRoot = resolve(repoRoot, 'tests/fixtures/workflow-skills');
+const materializerScript = resolve(
+ repoRoot,
+ 'scripts/lib/materialize-workflow-skill-fixture.mjs'
+);
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function emit(event, fields = {}) {
+ process.stdout.write(`${JSON.stringify({ event, ...fields })}\n`);
+}
+
+function log(msg) {
+ process.stderr.write(`[verify] ${msg}\n`);
+}
+
+// ---------------------------------------------------------------------------
+// Discover fixture specs
+// ---------------------------------------------------------------------------
+
+function discoverSpecs() {
+ if (!existsSync(fixturesRoot)) {
+ emit('verify_error', {
+ reason: 'fixtures_dir_not_found',
+ path: fixturesRoot,
+ });
+ process.exit(1);
+ }
+
+ const dirs = readdirSync(fixturesRoot, { withFileTypes: true })
+ .filter((d) => d.isDirectory())
+ .map((d) => d.name);
+
+ const specs = [];
+ for (const dir of dirs) {
+ const specPath = join(fixturesRoot, dir, 'spec.json');
+ if (existsSync(specPath)) {
+ specs.push({ dir, specPath });
+ }
+ }
+
+ if (specs.length === 0) {
+ emit('verify_error', { reason: 'no_specs_found', fixturesRoot });
+ process.exit(1);
+ }
+
+ return specs;
+}
+
+// ---------------------------------------------------------------------------
+// Materialize a single fixture
+// ---------------------------------------------------------------------------
+
+function materialize(specEntry) {
+ const { dir, specPath } = specEntry;
+ log(`Materializing ${dir}...`);
+
+ let manifest;
+ try {
+ const stdout = execFileSync('node', [materializerScript, specPath], {
+ encoding: 'utf-8',
+ cwd: repoRoot,
+ stdio: ['pipe', 'pipe', 'pipe'],
+ });
+ manifest = JSON.parse(stdout);
+ } catch (e) {
+ emit('materialize_failed', { name: dir, detail: e.stderr || e.message });
+ return null;
+ }
+
+ emit('golden_extracted', { name: manifest.name });
+ return manifest;
+}
+
+// ---------------------------------------------------------------------------
+// Validate extracted files against verification artifact
+// ---------------------------------------------------------------------------
+
+function checkArtifactFiles(artifact, fixtureDir) {
+ const errors = [];
+ if (!artifact?.files) return errors;
+ for (const entry of artifact.files) {
+ const filePath = join(fixtureDir, entry.path);
+ if (!existsSync(filePath)) {
+ errors.push(
+ `artifact declares ${entry.kind} file "${entry.path}" but it was not extracted`
+ );
+ }
+ }
+ return errors;
+}
+
+function checkRequiredTokens(requires, artifact, fixtureDir) {
+ const errors = [];
+ for (const kind of ['workflow', 'test']) {
+ const tokens = requires[kind];
+ if (!tokens || tokens.length === 0) continue;
+ const artifactFile = artifact?.files?.find((f) => f.kind === kind);
+ if (!artifactFile) continue;
+ const filePath = join(fixtureDir, artifactFile.path);
+ if (!existsSync(filePath)) continue;
+ const content = readFileSync(filePath, 'utf-8');
+ const missing = tokens.filter((tok) => !content.includes(tok));
+ if (missing.length > 0) {
+ errors.push(`required ${kind} tokens missing: ${missing.join(', ')}`);
+ }
+ }
+ return errors;
+}
+
+function checkTestMatrixHelpers(artifact, requires, fixtureDir) {
+ const errors = [];
+ if (!artifact?.testMatrix || !requires.verificationHelpers) return errors;
+ const testFile = artifact.files?.find((f) => f.kind === 'test');
+ if (!testFile) return errors;
+ const testPath = join(fixtureDir, testFile.path);
+ if (!existsSync(testPath)) return errors;
+ const testContent = readFileSync(testPath, 'utf-8');
+ for (const entry of artifact.testMatrix) {
+ if (!entry.helpers) continue;
+ const missingHelpers = entry.helpers.filter(
+ (h) => !testContent.includes(h)
+ );
+ if (missingHelpers.length > 0) {
+ errors.push(
+ `testMatrix "${entry.name}" missing helpers: ${missingHelpers.join(', ')}`
+ );
+ }
+ }
+ return errors;
+}
+
+function validateArtifact(manifest, specEntry) {
+ const { dir, specPath } = specEntry;
+ const spec = JSON.parse(readFileSync(specPath, 'utf-8'));
+ const artifact = manifest.verificationArtifact;
+ const fixtureDir = join(fixturesRoot, dir);
+ const requires = spec.requires || {};
+
+ return [
+ ...checkArtifactFiles(artifact, fixtureDir),
+ ...checkRequiredTokens(requires, artifact, fixtureDir),
+ ...checkTestMatrixHelpers(artifact, requires, fixtureDir),
+ ];
+}
+
+// ---------------------------------------------------------------------------
+// Run typecheck on a fixture
+// ---------------------------------------------------------------------------
+
+function typecheck(manifest) {
+ log(`Typechecking ${manifest.name}...`);
+ const fixtureDir = join(fixturesRoot, manifest.name);
+
+ const tsFiles = manifest.files
+ .filter((f) => f.endsWith('.ts') && !f.includes('config'))
+ .map((f) => join(fixtureDir, f));
+
+ if (tsFiles.length === 0) {
+ emit('fixture_typechecked', {
+ name: manifest.name,
+ ok: true,
+ detail: 'no ts files to check',
+ });
+ return true;
+ }
+
+ try {
+ execFileSync(
+ 'pnpm',
+ [
+ 'exec',
+ 'tsc',
+ '--noEmit',
+ '--esModuleInterop',
+ '--skipLibCheck',
+ '--moduleResolution',
+ 'bundler',
+ '--module',
+ 'esnext',
+ '--target',
+ 'esnext',
+ ...tsFiles,
+ ],
+ {
+ encoding: 'utf-8',
+ cwd: repoRoot,
+ stdio: ['pipe', 'pipe', 'pipe'],
+ }
+ );
+ emit('fixture_typechecked', { name: manifest.name, ok: true });
+ return true;
+ } catch (e) {
+ emit('fixture_typechecked', {
+ name: manifest.name,
+ ok: false,
+ detail: (e.stderr || e.stdout || e.message).slice(0, 2000),
+ });
+ return false;
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Run integration tests on a fixture
+// ---------------------------------------------------------------------------
+
+function runTests(manifest) {
+ const fixtureDir = join(fixturesRoot, manifest.name);
+ const testFile = manifest.verificationArtifact?.files?.find(
+ (f) => f.kind === 'test'
+ );
+ if (!testFile) {
+ emit('fixture_tested', {
+ name: manifest.name,
+ ok: false,
+ detail: 'no test file in artifact',
+ });
+ return false;
+ }
+
+ log(`Testing ${manifest.name} (cwd=${fixtureDir})...`);
+
+ const configPath = join(fixtureDir, 'vitest.integration.config.ts');
+ const testPath = testFile.path;
+ const vitestBin = join(repoRoot, 'node_modules/.bin/vitest');
+
+ try {
+ execFileSync(vitestBin, ['run', testPath, '--config', configPath], {
+ encoding: 'utf-8',
+ cwd: fixtureDir,
+ stdio: ['pipe', 'pipe', 'pipe'],
+ });
+ emit('fixture_tested', { name: manifest.name, ok: true });
+ return true;
+ } catch (e) {
+ emit('fixture_tested', {
+ name: manifest.name,
+ ok: false,
+ detail: (e.stderr || e.stdout || e.message).slice(0, 2000),
+ });
+ return false;
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+const specs = discoverSpecs();
+emit('verify_start', {
+ fixtureCount: specs.length,
+ fixtures: specs.map((s) => s.dir),
+});
+log(`Discovered ${specs.length} fixture specs`);
+
+let failures = 0;
+let warnings = 0;
+
+for (const specEntry of specs) {
+ const manifest = materialize(specEntry);
+ if (!manifest) {
+ failures++;
+ continue;
+ }
+
+ const artifactErrors = validateArtifact(manifest, specEntry);
+ if (artifactErrors.length > 0) {
+ emit('artifact_validation_failed', {
+ name: manifest.name,
+ errors: artifactErrors,
+ });
+ failures++;
+ continue;
+ }
+ emit('artifact_validated', { name: manifest.name });
+
+ // Typecheck and integration tests are informational for golden fixtures —
+ // golden code references external services (db, warehouse, etc.) that are
+ // undefined outside a real project. Count these as warnings, not failures.
+ const typecheckOk = typecheck(manifest);
+ if (!typecheckOk) {
+ warnings++;
+ }
+
+ const testOk = runTests(manifest);
+ if (!testOk) {
+ warnings++;
+ }
+}
+
+emit('verify_complete', { total: specs.length, failures, warnings });
+
+if (failures > 0) {
+ log(`${failures} fixture(s) failed verification`);
+ process.exit(1);
+}
+
+log('All fixtures verified successfully');
+process.exit(0);
diff --git a/skills/README.md b/skills/README.md
new file mode 100644
index 0000000000..bcadccd35a
--- /dev/null
+++ b/skills/README.md
@@ -0,0 +1,199 @@
+# Workflow DevKit Skills
+
+Installable skills that guide users through creating durable workflows.
+Inspired by [Impeccable](https://github.com/pbakaus/impeccable)'s teach-then-build model.
+
+## Quick start: Review existing workflows
+
+If you already have workflow code and want to inspect it before making changes:
+
+| Command | What it does |
+|---------|--------------|
+| `/workflow-audit` | Review an existing workflow or design for determinism, retries, compensation, and test gaps |
+
+## Quick start: Scenario commands
+
+If you know what kind of workflow you need, start with a scenario command:
+
+| Command | What it builds |
+|---------|---------------|
+| `/workflow-approval` | Approval with expiry, escalation, and deterministic hooks |
+| `/workflow-webhook` | External webhook ingestion with duplicate handling and compensation |
+| `/workflow-saga` | Partial-success side effects and compensation |
+| `/workflow-timeout` | Correctness depends on sleep/wake-up behavior |
+| `/workflow-idempotency` | Retries and replay can duplicate effects |
+| `/workflow-observe` | Operators need progress streams and terminal signals |
+
+Scenario commands reuse `.workflow.md` when present and fall back to a focused
+context capture when not. They apply domain-specific guardrails and terminate
+with the same `verification_plan_ready` contract as `/workflow-build`.
+
+For workflows that don't fit a scenario command, use the manual two-stage loop below.
+
+## Two-skill workflow (manual path)
+
+| Stage | Skill | Purpose |
+|-------|-------|---------|
+| 1 | `workflow-teach` | One-time setup: scan repo, interview user, write `.workflow.md` |
+| 2 | `workflow-build` | Build workflow code interactively, guided by `.workflow.md` context |
+
+The `workflow` skill is an always-on API reference available at any point.
+
+### User journey
+
+```
+workflow-teach Stage 1 — capture project context → .workflow.md
+ │
+ ▼
+workflow-build Stage 2 — interactive build → TypeScript code + tests
+```
+
+### `.workflow.md`
+
+Written by `workflow-teach`. A plain-English markdown file in the project root
+containing project context, business rules, failure expectations, observability
+needs, and approved patterns. Git-ignored since it's project-specific.
+
+`workflow-build` reads this file to make informed decisions about step
+boundaries, failure modes, idempotency strategies, and test coverage.
+
+## Source-of-truth layout
+
+```
+skills/
+├── README.md # this file
+├── /
+│ ├── SKILL.md # skill source (YAML frontmatter + markdown)
+│ └── goldens/ # optional golden scenarios
+│ └── .md
+```
+
+Every skill lives in its own directory under `skills/`. The **only**
+authoritative copy of each skill is the `SKILL.md` file inside that directory.
+
+## Required frontmatter fields
+
+Each `SKILL.md` must begin with YAML frontmatter containing:
+
+| Field | Type | Required | Description |
+|----------------------|--------|----------|-------------------------------------------------------|
+| `name` | string | yes | Kebab-case identifier (must match directory name) |
+| `description` | string | yes | When to trigger this skill; include trigger phrases |
+| `metadata.author` | string | yes | Authoring organization |
+| `metadata.version` | string | yes | Semver-ish version string (bump on every change) |
+
+## Skill inventory
+
+### Core surface (the two-stage loop)
+
+| Skill | Purpose |
+|--------------------|-------------------------------------------------|
+| `workflow` | Always-on API reference for writing workflows |
+| `workflow-teach` | Stage 1 — capture project context into `.workflow.md` |
+| `workflow-build` | Stage 2 — build workflow code guided by context |
+
+### Scenario entrypoints (problem-first)
+
+| Skill | Purpose |
+|--------------------|-------------------------------------------------|
+| `workflow-approval` | Approval with expiry, escalation, and deterministic hooks |
+| `workflow-webhook` | External webhook ingestion with duplicate handling and compensation |
+| `workflow-saga` | Multi-step side effects with explicit compensation |
+| `workflow-timeout` | Flows whose correctness depends on expiry and wake-up behavior |
+| `workflow-idempotency` | Side effects that remain safe under retries, replay, and duplicate events |
+| `workflow-observe` | Operator-visible progress, stream namespaces, and terminal signals |
+
+Scenario skills are user-invocable shortcuts that route into the teach → build
+pipeline with domain-specific guardrails. They reuse `.workflow.md` when present
+and fall back to a focused context capture when not.
+
+### Review commands
+
+| Skill | Purpose |
+|--------------------|-------------------------------------------------|
+| `workflow-audit` | Review an existing workflow or design and recommend the best next skill |
+
+### Optional helpers
+
+| Skill | Purpose |
+|--------------------|-------------------------------------------------|
+| `workflow-init` | First-time project setup before `workflow` is installed as a dependency |
+| `workflow-audit` | Review an existing workflow or design and recommend the best next skill |
+
+## Persisted artifacts
+
+The skill loop produces two categories of persisted artifacts:
+
+**Skill-managed** — `.workflow.md` is written directly by `workflow-teach` and
+read by `workflow-build`. This is the primary bridge between the two stages.
+
+**Host-managed** — `.workflow-skills/*.json` files (context, blueprints,
+verification plans) are managed by the host runtime or persistence layer —
+not by the skill prompts themselves. The host extracts structured data from the skill
+conversation and persists it for agent consumption. These machine-readable
+artifacts survive across runs and allow agents to query correctness without
+re-running the full skill loop.
+
+## Golden scenarios
+
+Golden files under `/goldens/` are curated edge-case examples:
+
+### `workflow-teach/goldens/`
+
+Interview scenarios showing expected `.workflow.md` output for different domains:
+approval escalation, duplicate webhooks, observability streams, partial compensation.
+
+### `workflow-build/goldens/`
+
+Trap-catching demonstrations showing what the build skill flags and the correct
+TypeScript code it produces: compensation sagas, child workflow handoffs,
+rate-limit retry classification, approval timeout streaming, multi-event hook loops.
+
+### `workflow-approval/goldens/`
+
+End-to-end scenario demonstrations showing the full user-invocable path from
+prompt → context capture → design constraints → generated code/tests →
+verification summary for approval workflows.
+
+### `workflow-webhook/goldens/`
+
+End-to-end scenario demonstrations showing the full user-invocable path from
+prompt → context capture → design constraints → generated code/tests →
+verification summary for webhook ingestion workflows.
+
+### `workflow-saga/goldens/`
+
+End-to-end scenario demonstrations showing the full user-invocable path from
+prompt → context capture → design constraints → generated code/tests →
+verification summary for saga workflows with explicit compensation.
+
+### `workflow-timeout/goldens/`
+
+End-to-end scenario demonstrations showing the full user-invocable path from
+prompt → context capture → design constraints → generated code/tests →
+verification summary for timeout workflows with sleep/wake-up correctness.
+
+### `workflow-idempotency/goldens/`
+
+End-to-end scenario demonstrations showing the full user-invocable path from
+prompt → context capture → design constraints → generated code/tests →
+verification summary for idempotency workflows with replay safety and duplicate handling.
+
+### `workflow-observe/goldens/`
+
+End-to-end scenario demonstrations showing the full user-invocable path from
+prompt → context capture → design constraints → generated code/tests →
+verification summary for observability workflows with namespaced streams and terminal signals.
+
+## Validation
+
+```bash
+# Run the validator
+node scripts/validate-workflow-skill-files.mjs
+
+# Run the test suite
+pnpm vitest run scripts/validate-workflow-skill-files.test.mjs
+```
+
+The validator checks that skill files and goldens contain required content,
+avoid stale references, and maintain correct sequencing.
diff --git a/skills/workflow-approval/SKILL.md b/skills/workflow-approval/SKILL.md
new file mode 100644
index 0000000000..023f2ba1e7
--- /dev/null
+++ b/skills/workflow-approval/SKILL.md
@@ -0,0 +1,108 @@
+---
+name: workflow-approval
+description: Build a durable approval workflow with hook-based human-in-the-loop, expiry via sleep, and escalation. Use when the user says "approval workflow", "workflow-approval", "approval escalation", "human approval", or "approval with timeout".
+user-invocable: true
+argument-hint: "[workflow prompt]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-approval
+
+Use this skill when the user wants to build a workflow that includes human approval, expiry timeouts, or escalation logic. This is a scenario entrypoint that routes into the existing teach → build pipeline with approval-specific guardrails.
+
+## Context Capture
+
+If `.workflow.md` exists in the project root, read it and use its context. If it does not exist, run a focused context capture covering these approval-specific questions before proceeding:
+
+1. **Approval actors** — "Who can approve, and is there an escalation chain?"
+2. **Timeout/expiry rules** — "How long does each approver have before the request escalates or auto-rejects?"
+3. **Hook token strategy** — "What entity ID should anchor the deterministic hook token (e.g. `approval:${documentId}`)?"
+4. **Side effect safety** — "Are notification emails safe to retry? What about the final action after approval?"
+5. **Compensation requirements** — "If the approved action fails after approval is granted, what happens?"
+6. **Observability** — "What must operators see in logs for the approval lifecycle?"
+
+Save the answers into `.workflow.md` following the same 8-section format used by `workflow-teach`.
+
+## Required Design Constraints
+
+When building an approval workflow, the following constraints are non-negotiable:
+
+### Deterministic hook tokens
+
+Every `createHook()` call must use a deterministic token derived from a stable entity identifier. Example: `createHook(\`approval:\${orderId}\`)`. Never use random or timestamp-based tokens for approval hooks.
+
+### Expiry via `sleep()`
+
+Every approval step must be paired with a `sleep()` timeout. Use `Promise.race([hook, sleep("48h")])` to race the approval against expiry. When the sleep wins, the workflow must either escalate or auto-reject — never silently ignore the timeout.
+
+### Escalation behavior
+
+When an approval times out and an escalation chain exists:
+
+- Create a new hook with a distinct deterministic token (e.g. `escalation:${orderId}`)
+- Pair it with its own sleep timeout
+- If the escalation also times out, auto-reject and notify the requester
+
+### Notification idempotency
+
+Every notification step must use an idempotency key derived from the entity ID (e.g. `notify:${orderId}`). Notification emails are typically safe to retry but must not be sent multiple times for the same event.
+
+## Build Process
+
+Follow the same six-phase interactive build process as `workflow-build`:
+
+1. **Propose step boundaries** — identify `"use workflow"` orchestrator vs `"use step"` functions, suspension points (hooks + sleeps), and stream requirements
+2. **Flag relevant traps** — run the stress checklist with special attention to hook token strategy, sleep/expiry pairing, and escalation logic
+3. **Decide failure modes** — `FatalError` vs `RetryableError` for each step, with approval timeout treated as a domain-level permanent outcome (not an error)
+4. **Write code + tests** — produce workflow file and integration tests
+5. **Self-review** — re-run the stress checklist against generated code
+6. **Verification summary** — emit the verification artifact and `verification_plan_ready` summary
+
+### Required test coverage
+
+Integration tests must exercise:
+
+- **Happy path** — approver responds before timeout
+- **Timeout → escalation** — primary approver times out, escalation approver responds
+- **Full timeout → auto-rejection** — all approvers time out
+- Each test must use `waitForHook`, `resumeHook`, `waitForSleep`, and `wakeUp` from `@workflow/vitest`
+
+## Anti-Patterns
+
+Flag these explicitly when they appear in the approval workflow:
+
+- **Random or timestamp-based hook tokens** — approval hooks must be deterministic and collision-free across concurrent runs
+- **Missing sleep pairing** — every hook must race against a sleep timeout; an unguarded hook can suspend the workflow indefinitely
+- **Escalation without a distinct token** — reusing the same hook token for escalation and primary approval causes collisions
+- **Node.js APIs in workflow context** — `fs`, `crypto`, `Buffer`, etc. cannot be used inside `"use workflow"` functions
+- **Direct stream I/O in workflow context** — `getWritable()` may be called in workflow context, but actual writes must happen in steps
+- **`start()` called directly from workflow code** — must be wrapped in a step
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific context (if present)
+
+## Verification Contract
+
+This skill terminates with the same verification contract as `workflow-build`. The final output must include:
+
+1. A **Verification Artifact** — fenced JSON block with `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`
+2. A **Verification Summary** — single-line JSON: `{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}`
+
+## Sample Usage
+
+**Input:** `/workflow-approval Build an approval workflow for purchase orders over $5,000 with manager approval, director escalation after 48h, and auto-rejection after 24h.`
+
+**Expected behavior:**
+
+1. Reads `.workflow.md` if present; otherwise runs focused context capture
+2. Proposes: webhook/API ingress step, manager approval hook with `approval:po-${poNumber}` token + 48h sleep, director escalation hook with `escalation:po-${poNumber}` token + 24h sleep, notification steps with idempotency keys, status stream
+3. Flags: deterministic tokens required, sleep pairing on both hooks, escalation needs distinct token
+4. Writes: `workflows/purchase-approval.ts` + `workflows/purchase-approval.integration.test.ts`
+5. Tests cover: manager-approves, manager-timeout → director-approves, full-timeout → auto-rejection — using `waitForHook`, `resumeHook`, `waitForSleep`, `wakeUp`
+6. Emits verification artifact and `verification_plan_ready` summary
diff --git a/skills/workflow-approval/goldens/approval-expiry-escalation.md b/skills/workflow-approval/goldens/approval-expiry-escalation.md
new file mode 100644
index 0000000000..dc0c306695
--- /dev/null
+++ b/skills/workflow-approval/goldens/approval-expiry-escalation.md
@@ -0,0 +1,247 @@
+# Golden Scenario: Approval Expiry Escalation
+
+## User Prompt
+
+```
+/workflow-approval Build an approval workflow for purchase orders over $5,000 with manager approval, director escalation after 48h, and auto-rejection after 24h.
+```
+
+## Scenario
+
+A procurement system requires manager approval for purchase orders over $5,000. If the assigned manager does not approve within 48 hours, the request escalates to a director. If the director does not respond within 24 hours, the request is auto-rejected and the requester is notified.
+
+## Context Capture
+
+The scenario skill checks for `.workflow.md` first. In this example it does not exist, so the focused approval-specific interview runs:
+
+| Question | Expected Answer |
+|----------|----------------|
+| Approval actors | Manager approves first; director is escalation approver |
+| Timeout/expiry rules | Manager: 48 hours; director: 24 hours; then auto-reject |
+| Hook token strategy | `approval:po-${poNumber}` for manager, `escalation:po-${poNumber}` for director |
+| Side effect safety | Notification emails are safe to retry (informational only) |
+| Compensation requirements | None — approval flow is read-only until final decision |
+| Observability | Log approval.requested, approval.escalated, approval.decided |
+
+The captured context is saved to `.workflow.md` with sections: Project Context, Business Rules, External Systems, Failure Expectations, Observability Needs, Approved Patterns, Open Questions.
+
+## What the Scenario Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Hook token strategy** — Both approval hooks must use deterministic tokens: `approval:po-${poNumber}` and `escalation:po-${poNumber}`. Random tokens would cause collisions across concurrent PO approvals.
+2. **Sleep pairing** — Each hook must race against a sleep timeout. An unguarded hook suspends the workflow indefinitely.
+3. **Escalation token distinctness** — The escalation hook must use a different token prefix than the primary approval to avoid collisions.
+
+### Phase 3 — Failure Modes Decided
+
+- `notifyManager`: `RetryableError` with `maxRetries: 3` — email delivery is transient.
+- `notifyDirector`: `RetryableError` with `maxRetries: 3` — same as manager notification.
+- `notifyRequester`: `RetryableError` with `maxRetries: 3` — rejection notification must eventually succeed.
+- `recordDecision`: `RetryableError` with `maxRetries: 2` — database write may fail transiently.
+- Approval timeout is a domain-level outcome, not an error.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError } from "workflow";
+import { createHook, sleep } from "workflow";
+
+type ApprovalDecision = { approved: boolean; reason?: string };
+
+const notifyApprover = async (
+ poNumber: string,
+ approverId: string,
+ template: string
+) => {
+ "use step";
+ await notifications.send({
+ idempotencyKey: `notify:${template}:${poNumber}`,
+ to: approverId,
+ template,
+ });
+};
+
+const recordDecision = async (
+ poNumber: string,
+ status: string,
+ decidedBy: string
+) => {
+ "use step";
+ await db.purchaseOrders.update({
+ where: { poNumber },
+ data: { status, decidedBy, decidedAt: new Date() },
+ });
+ return { poNumber, status, decidedBy };
+};
+
+export default async function purchaseApproval(
+ poNumber: string,
+ amount: number,
+ managerId: string,
+ directorId: string
+) {
+ // Step 1: Notify manager and wait for approval with 48h timeout
+ await notifyApprover(poNumber, managerId, "approval-request");
+
+ const managerHook = createHook(
+ `approval:po-${poNumber}`
+ );
+ const managerTimeout = sleep("48h");
+ const managerResult = await Promise.race([managerHook, managerTimeout]);
+
+ if (managerResult !== undefined) {
+ // Manager responded
+ return recordDecision(
+ poNumber,
+ managerResult.approved ? "approved" : "rejected",
+ managerId
+ );
+ }
+
+ // Step 2: Manager timed out — escalate to director with 24h timeout
+ await notifyApprover(poNumber, directorId, "escalation-request");
+
+ const directorHook = createHook(
+ `escalation:po-${poNumber}`
+ );
+ const directorTimeout = sleep("24h");
+ const directorResult = await Promise.race([directorHook, directorTimeout]);
+
+ if (directorResult !== undefined) {
+ // Director responded
+ return recordDecision(
+ poNumber,
+ directorResult.approved ? "approved" : "rejected",
+ directorId
+ );
+ }
+
+ // Step 3: Full timeout — auto-reject
+ await notifyApprover(poNumber, managerId, "auto-rejection-notice");
+ return recordDecision(poNumber, "auto-rejected", "system");
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start, resumeHook, getRun } from "workflow/api";
+import { waitForHook, waitForSleep } from "@workflow/vitest";
+import purchaseApproval from "../workflows/purchase-approval";
+
+describe("purchaseApproval", () => {
+ it("manager approves before timeout", async () => {
+ const run = await start(purchaseApproval, [
+ "PO-1001", 7500, "manager-1", "director-1",
+ ]);
+
+ await waitForHook(run, { token: "approval:po-PO-1001" });
+ await resumeHook("approval:po-PO-1001", { approved: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ poNumber: "PO-1001",
+ status: "approved",
+ decidedBy: "manager-1",
+ });
+ });
+
+ it("escalates to director when manager times out", async () => {
+ const run = await start(purchaseApproval, [
+ "PO-1002", 10000, "manager-2", "director-2",
+ ]);
+
+ // Manager timeout
+ const sleepId1 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId1] });
+
+ // Director approves
+ await waitForHook(run, { token: "escalation:po-PO-1002" });
+ await resumeHook("escalation:po-PO-1002", { approved: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ poNumber: "PO-1002",
+ status: "approved",
+ decidedBy: "director-2",
+ });
+ });
+
+ it("auto-rejects when all approvers time out", async () => {
+ const run = await start(purchaseApproval, [
+ "PO-1003", 6000, "manager-3", "director-3",
+ ]);
+
+ // Manager timeout
+ const sleepId1 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId1] });
+
+ // Director timeout
+ const sleepId2 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId2] });
+
+ await expect(run.returnValue).resolves.toEqual({
+ poNumber: "PO-1003",
+ status: "auto-rejected",
+ decidedBy: "system",
+ });
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "purchase-approval",
+ "files": [
+ { "kind": "workflow", "path": "workflows/purchase-approval.ts" },
+ { "kind": "test", "path": "workflows/purchase-approval.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path",
+ "helpers": ["waitForHook", "resumeHook"],
+ "expects": "Manager approves before timeout"
+ },
+ {
+ "name": "manager-timeout-escalation",
+ "helpers": ["waitForHook", "resumeHook", "waitForSleep", "wakeUp"],
+ "expects": "Manager times out, director approves"
+ },
+ {
+ "name": "full-timeout-auto-rejection",
+ "helpers": ["waitForSleep", "wakeUp"],
+ "expects": "All approvers time out, workflow auto-rejects"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/purchase-approval.integration.test.ts", "expects": "purchase-approval integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: A purchase order must receive exactly one final decision: approved, rejected, or auto-rejected",
+ "Invariant: Escalation must only trigger after the primary approval window expires",
+ "Invariant: Hook tokens are deterministic and derived from PO number",
+ "Operator signal: Log approval.requested with PO number and assigned manager",
+ "Operator signal: Log approval.escalated with PO number and director",
+ "Operator signal: Log approval.decided with final status and decision maker"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"purchase-approval","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Hook token strategy (deterministic tokens for both approval tiers)
+- Sleep pairing (every hook races against a timeout)
+- Escalation behavior (distinct tokens, cascading timeouts)
+- Retry semantics (notification = retryable, timeout = domain outcome)
+- Integration test coverage (happy path, escalation, full timeout)
diff --git a/skills/workflow-audit/SKILL.md b/skills/workflow-audit/SKILL.md
new file mode 100644
index 0000000000..4690b6d657
--- /dev/null
+++ b/skills/workflow-audit/SKILL.md
@@ -0,0 +1,179 @@
+---
+name: workflow-audit
+description: Audit an existing durable workflow or proposed workflow design for determinism, step boundaries, hooks/webhooks, retries, compensation, observability, and integration tests. Generates a scored report with P0-P3 severity ratings and a machine-readable summary. Use when the user says "audit workflow", "review workflow", "check workflow", "workflow-audit", "why is this workflow flaky", or "is this workflow safe to retry".
+user-invocable: true
+argument-hint: "[workflow file, flow name, route, or short description]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-audit
+
+Use this skill when the user wants to inspect an existing workflow implementation or a proposed workflow design without generating new code.
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific workflow context, if present
+3. Relevant implementation files — workflow files, API routes, hooks/webhook handlers, and integration tests
+
+If `.workflow.md` does not exist, continue with a code-only audit and explicitly call out any context-dependent uncertainty. Do not block on missing context.
+
+## Audit Process
+
+### 1. Identify the audit target
+
+If the user names a workflow, route, or file, audit that specific target.
+If the user does not name a target, inspect the most relevant workflow files mentioned in the current task or the most recently changed workflow files in the repo.
+
+### 2. Gather evidence
+
+Inspect:
+
+- `workflows/` or `src/workflows/`
+- route files that call `start()`, `resumeHook()`, or `resumeWebhook()`
+- tests importing `@workflow/vitest`, `workflow/api`, or workflow files
+- `.workflow.md` for business invariants, failure expectations, timeout rules, and observability requirements
+
+Do not rewrite code in this skill. Audit only.
+
+### 3. Score the workflow across 12 checks
+
+Score each check from **0-4**:
+
+- **0** — broken / dangerous
+- **1** — major risk
+- **2** — partial / inconsistent
+- **3** — solid with minor gaps
+- **4** — correct and production-ready
+
+Run these exact checks:
+
+1. **Determinism boundary**
+2. **Step granularity**
+3. **Pass-by-value / serialization**
+4. **Hook token strategy**
+5. **Webhook response mode**
+6. **`start()` placement**
+7. **Stream I/O placement**
+8. **Idempotency keys**
+9. **Retry semantics**
+10. **Rollback / compensation**
+11. **Observability streams**
+12. **Integration test coverage**
+
+### 4. Tag every issue with P0-P3 severity
+
+- **P0 Blocking** — can corrupt business invariants, duplicate side effects, hang indefinitely, or make the workflow unrecoverable
+- **P1 Major** — likely to fail in production or break replay/resume under common conditions
+- **P2 Minor** — correctness is mostly intact, but gaps remain
+- **P3 Polish** — cleanup, clarity, maintainability, or developer-experience issue
+
+### 5. Recommend the next skill intentionally
+
+Choose the single best next skill based on the dominant failure mode:
+
+- `workflow-teach` — missing repo-level context, business rules, or failure expectations
+- `workflow-build` — major redesign or rewrite needed
+- `workflow-idempotency` — duplicate side effects or replay safety are the main risk
+- `workflow-timeout` — hooks, sleeps, wake-up behavior, or expiry rules are weak
+- `workflow-webhook` — ingress, deduplication, or webhook response handling is weak
+- `workflow-saga` — compensation or rollback logic is weak
+- `workflow-observe` — logs, streams, or terminal signals are weak
+- `workflow-approval` — approval/escalation logic is weak
+- `workflow` — code is mostly sound and the user only needs API guidance
+
+## Output Format
+
+When you finish, output these exact sections:
+
+## Audit Scorecard
+
+Provide a table with one row per check:
+
+| Check | Score | Key finding |
+|-------|-------|-------------|
+| Determinism boundary | 0-4 | ... |
+| Step granularity | 0-4 | ... |
+| Pass-by-value / serialization | 0-4 | ... |
+| Hook token strategy | 0-4 | ... |
+| Webhook response mode | 0-4 | ... |
+| `start()` placement | 0-4 | ... |
+| Stream I/O placement | 0-4 | ... |
+| Idempotency keys | 0-4 | ... |
+| Retry semantics | 0-4 | ... |
+| Rollback / compensation | 0-4 | ... |
+| Observability streams | 0-4 | ... |
+| Integration test coverage | 0-4 | ... |
+
+Then provide **Total: /48** and a one-line rating:
+
+- 42-48: Excellent
+- 34-41: Good
+- 24-33: Risky
+- 12-23: Fragile
+- 0-11: Critical
+
+## Executive Summary
+
+Summarize the workflow's overall health, the 2-4 most important risks, and the single best next skill.
+
+## Detailed Findings by Severity
+
+For each issue, use this exact shape:
+
+- **[P?] Issue name**
+ - **Location:** file, function, or flow segment
+ - **Why it matters:** concrete replay/resume or business-risk explanation
+ - **Recommendation:** concrete fix
+ - **Suggested skill:** one of the workflow skills above
+
+## Systemic Risks
+
+Call out recurring patterns that appear in more than one place, such as missing idempotency namespaces, direct stream I/O in workflow context, or weak timeout coverage.
+
+## Positive Findings
+
+Note what is already correct and should not be regressed.
+
+## Audit Summary
+
+Immediately after the narrative sections, emit a single line of valid JSON with these exact fields:
+
+```
+{"event":"workflow_audit_complete","target":"","score":,"maxScore":48,"p0":,"p1":,"p2":,"p3":,"contractVersion":"1"}
+```
+
+## Hard Rules
+
+Flag any violation of these as at least **P1**, and mark it **P0** if it can duplicate side effects, deadlock a workflow, or make replay invalid:
+
+1. A `"use workflow"` function must not perform side effects or direct stream I/O.
+2. All external I/O must live in `"use step"` functions.
+3. `createWebhook()` must not use custom tokens.
+4. `start()` inside a workflow must be wrapped in a `"use step"` function.
+5. Side-effecting steps must have stable idempotency keys.
+6. Compensation must exist for irreversible partial success.
+7. Timeout paths are domain outcomes, not accidental hangs.
+8. Integration tests must cover each suspension type that the workflow uses.
+
+## Sample Usage
+
+**Input:** `/workflow-audit purchase-approval`
+
+**Expected behavior:** audits the workflow implementation and tests, reports issues like missing `waitForSleep` coverage or non-deterministic approval tokens, recommends the next workflow skill, and emits the `workflow_audit_complete` JSON summary.
+
+Sample prompt:
+
+```
+/workflow-audit approval-expiry-escalation
+```
+
+Expected machine-readable line:
+
+```json
+{"event":"workflow_audit_complete","target":"approval-expiry-escalation","score":34,"maxScore":48,"p0":0,"p1":2,"p2":4,"p3":1,"contractVersion":"1"}
+```
diff --git a/skills/workflow-build/SKILL.md b/skills/workflow-build/SKILL.md
new file mode 100644
index 0000000000..b77df52780
--- /dev/null
+++ b/skills/workflow-build/SKILL.md
@@ -0,0 +1,228 @@
+---
+name: workflow-build
+description: Build durable workflows interactively, guided by project context from .workflow.md. Reads the API reference, applies a stress checklist, and produces TypeScript code + tests. Use after workflow-teach. Triggers on "build workflow", "workflow-build", "implement workflow", or "create workflow".
+metadata:
+ author: Vercel Inc.
+ version: '0.4'
+---
+
+# workflow-build
+
+Use this skill when the user wants to build a durable workflow. It reads project context, walks through design decisions interactively, and produces working TypeScript code with integration tests.
+
+## Skill Loop Position
+
+**Stage 2 of 2** in the workflow skill loop: teach → **build**
+
+| Stage | Skill | Purpose |
+|-------|-------|---------|
+| 1 | workflow-teach | Capture project context into `.workflow.md` |
+| **2** | **workflow-build** (you are here) | Build workflow code guided by context |
+
+**Prerequisite:** Run `workflow-teach` first to populate `.workflow.md`. If `.workflow.md` does not exist, tell the user to run `workflow-teach` first.
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source. Reference it for all runtime behavior, syntax, and test helper documentation.
+2. **`.workflow.md`** — the project-specific context captured by `workflow-teach`. Use this to inform step boundaries, failure modes, idempotency strategies, and test coverage.
+
+## Interactive Build Process
+
+Walk through these phases in order. Present your work at each phase and wait for user confirmation before proceeding to the next.
+
+### Phase 1 — Propose step boundaries
+
+Read `.workflow.md` and the user's description of the workflow they want to build. Propose:
+
+- Which functions need `"use workflow"` (orchestrators) vs `"use step"` (side effects)
+- Step boundaries — what belongs in each step and why
+- Suspension points — hooks, webhooks, or sleeps needed
+- Stream requirements — what needs to be streamed to the UI or logs
+
+Reference the **Business Rules** and **External Systems** sections of `.workflow.md` to justify your proposals. Present the step breakdown to the user and wait for confirmation.
+
+### Phase 2 — Flag relevant traps
+
+Run every item in the Stress Checklist (below) against the proposed step breakdown. For each item that reveals a risk or issue:
+
+- Name the checklist item
+- Explain what's at risk
+- Propose a concrete fix
+
+Present all findings to the user. If any require changing the step boundaries from Phase 1, propose the changes.
+
+### Phase 3 — Decide failure modes
+
+For each step, decide:
+
+- **FatalError vs RetryableError** — reference `.workflow.md` "Failure Expectations" for what counts as permanent vs transient in this project
+- **Idempotency strategy** — every step with external side effects must have one. Reference `.workflow.md` "Business Rules" for domain-specific idempotency requirements
+- **Compensation plan** — for each irreversible side effect, state what happens if a later step fails. Reference `.workflow.md` "Failure Expectations" for compensation rules
+
+Present the failure model to the user and wait for confirmation.
+
+### Phase 4 — Write code + tests
+
+Produce these files:
+
+1. **Workflow file** (`workflows/.ts`) — contains `"use workflow"` orchestrator and `"use step"` functions following the confirmed step boundaries, failure modes, and idempotency strategies.
+2. **Test file** (`workflows/.integration.test.ts`) — integration tests using `vitest` and `@workflow/vitest`. Must cover:
+ - Happy path
+ - Each suspension point (hook → `waitForHook`/`resumeHook`, webhook → `waitForHook`/`resumeWebhook`, sleep → `waitForSleep`/`wakeUp`)
+ - At least one failure path per error classification
+ - Compensation paths if applicable
+3. **Optional route file** (`app/api//route.ts`) — include this only when the workflow needs an HTTP start surface, a streaming endpoint, or an external resume surface.
+
+Use the test helpers and patterns documented in `skills/workflow/SKILL.md`.
+
+### Phase 5 — Self-review
+
+Before presenting the final code, run the Stress Checklist one more time against the actual generated code. Fix any issues found. Present the final code with a summary of what the self-review caught and fixed (if anything).
+
+### Phase 6 — Verification Summary
+
+After presenting the final code and self-review, emit a **Verification Artifact** section containing the full verification plan JSON, followed immediately by a single-line **Verification Summary** that an agent can extract in one parse step.
+
+#### Verification Artifact
+
+Present the full verification plan as a fenced JSON block.
+
+The `files` array must list only files that are actually produced. Add the `route` entry only when a route file is generated.
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "",
+ "files": [
+ { "kind": "workflow", "path": "workflows/.ts" },
+ { "kind": "test", "path": "workflows/.integration.test.ts" }
+ ],
+ "testMatrix": [
+ { "name": "happy-path", "helpers": [], "expects": "Workflow completes successfully" },
+ { "name": "hook-suspension", "helpers": ["waitForHook", "resumeHook"], "expects": "Workflow resumes from hook" },
+ { "name": "sleep-suspension", "helpers": ["waitForSleep", "wakeUp"], "expects": "Workflow resumes after sleep" }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/.integration.test.ts", "expects": " integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: ...",
+ "Operator signal: ..."
+ ]
+}
+```
+
+#### Verification Summary
+
+Immediately after the artifact block, emit a single line of valid JSON with these exact fields:
+
+```
+{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}
+```
+
+- `event` — always `"verification_plan_ready"`
+- `blueprintName` — matches the artifact's `blueprintName`
+- `fileCount` — number of entries in `files`
+- `testCount` — number of entries in `files` where `kind` is `"test"`
+- `runtimeCommandCount` — number of entries in `runtimeCommands`
+- `contractVersion` — always `"1"`
+
+This summary must be valid single-line JSON. It allows agents to extract verification status in one parse step while humans still get the full artifact and narrative sections above.
+
+## Stress Checklist
+
+Run every item against the workflow — first during Phase 2 (against the proposed design) and again during Phase 5 (against the generated code).
+
+### 1. Determinism boundary
+- Does any `"use workflow"` function perform I/O, direct stream I/O, or use Node.js-only APIs?
+- If the workflow uses time or randomness, is it relying only on the Workflow DevKit's seeded workflow-context APIs rather than external nondeterministic sources?
+
+### 2. Step granularity
+- Are steps too granular (splitting a single logical operation into many tiny steps)?
+- Are steps too coarse (grouping unrelated side effects that need independent retry)?
+- Does each step represent a meaningful unit of work with clear retry semantics?
+
+### 3. Pass-by-value / serialization issues
+- Does any step mutate its input without returning the updated value?
+- Are all step inputs and outputs JSON-serializable?
+- Are there closures, class instances, or functions passed between workflow and step contexts?
+
+### 4. Hook token strategy
+- Does `createHook()` use deterministic tokens where appropriate (e.g. `approval:${entityId}`)?
+- Is `createWebhook()` incorrectly using custom tokens? (It must not.)
+- Are hook tokens unique enough to avoid collisions across concurrent runs?
+
+### 5. Webhook response mode
+- Is the webhook response mode (`static` or `manual`) appropriate for the use case?
+- Does a `static` webhook correctly return a fixed response without blocking?
+
+### 6. `start()` placement
+- Is `start()` (child workflow invocation) called directly from workflow context? (It must be wrapped in a step.)
+
+### 7. Stream I/O placement
+- Does any workflow directly call `getWriter()`, `write()`, `close()`, or read from a stream?
+- If `getWritable()` is called in workflow context, is the stream only being obtained and then passed into a step for actual I/O?
+
+### 8. Idempotency keys
+- Does every step with external side effects have an idempotency strategy?
+- Are idempotency keys derived from stable, unique identifiers (not timestamps or random values)?
+
+### 9. Retry semantics
+- Is `FatalError` used for genuinely permanent failures (invalid input, already-processed, auth denied)?
+- Is `RetryableError` used for genuinely transient failures (network timeout, rate limit, temporary unavailability)?
+- Are `maxRetries` values reasonable for each step's failure mode?
+
+### 10. Rollback / compensation strategy
+- If a step fails after prior steps have committed side effects, is there a compensation step?
+- Are partial-success scenarios handled (e.g. payment charged but email failed)?
+
+### 11. Observability streams
+- Does the workflow emit enough progress information for monitoring?
+- Are stream namespaces used to separate different types of progress data?
+
+### 12. Integration test coverage
+- Does the test plan cover the happy path?
+- Does the test plan cover each suspension point (hook, webhook, sleep)?
+- Does the test plan verify failure paths (`FatalError`, `RetryableError`, timeout)?
+- Are the correct test helpers used (`waitForHook`, `resumeHook`, `waitForSleep`, `wakeUp`, etc.)?
+
+## Hard Rules
+
+These rules are non-negotiable. Violating any of them means the generated code is incorrect:
+
+1. **Workflow functions orchestrate only.** A `"use workflow"` function must not perform I/O, access Node.js APIs, read/write streams, call databases, or invoke external services directly.
+2. **All side effects live in `"use step"`.** Every I/O operation — SDK calls, database queries, filesystem access, HTTP requests, external API calls — must be inside a `"use step"` function.
+3. **`createHook()` may use deterministic tokens.** When a hook needs a stable, predictable token (e.g. `approval:${documentId}`), use `createHook()` with a deterministic token string.
+4. **`createWebhook()` may NOT use deterministic tokens.** Webhooks generate their own tokens. Do not pass custom tokens to `createWebhook()`.
+5. **Stream I/O happens in steps.** `getWritable()` may be called in workflow or step context, but any direct stream interaction must be inside `"use step"` functions. The workflow orchestrator cannot hold stream I/O across replay boundaries.
+6. **`start()` inside a workflow must be wrapped in a step.** Starting a child workflow is a side effect requiring full Node.js access. Wrap it in a `"use step"` function.
+7. **Return mutated values from steps.** Step functions use pass-by-value semantics. If you modify data inside a step, `return` the new value and reassign it in the calling workflow. Mutations to the input object are lost after replay.
+8. **Recommend `FatalError` or `RetryableError` intentionally.** Every error classification must have a clear rationale. `FatalError` means "do not retry, this is a permanent failure." `RetryableError` means "transient issue, try again." Never use one vaguely.
+
+## Anti-Patterns to Avoid
+
+Flag these explicitly when they apply to the workflow being built:
+
+- **Node.js API in workflow context** — `fs`, `path`, `crypto`, `Buffer`, `process`, etc. cannot be used inside `"use workflow"` functions.
+- **Missing idempotency for side effects** — Steps that write to databases, send emails, or call external APIs must have an idempotency strategy (idempotency key, upsert, or check-before-write).
+- **Over-granular step boundaries** — Each step is persisted and replayed. Don't split a single logical operation into many tiny steps. Group related I/O unless you need independent retry or suspension between operations.
+- **Direct stream I/O in workflow context** — `getWritable()` may be called anywhere, but stream reads/writes cannot survive replay. Always perform I/O in steps.
+- **`createWebhook()` with a custom token** — Only `createHook()` supports deterministic tokens.
+- **`start()` called directly from workflow code** — Must be wrapped in a step.
+- **Mutating step inputs without returning** — Pass-by-value means mutations are lost.
+
+## Sample Usage
+
+**Input:** `Build a workflow that ingests a webhook, asks a manager to approve refunds over $500, and streams progress to the UI.`
+
+**Expected behavior:**
+
+1. **Phase 1** proposes: webhook ingress step, approval hook with `approval:${refundId}` token, refund step, notification step, stream progress step — all side effects in `"use step"` functions.
+2. **Phase 2** flags: idempotency needed on refund step, compensation plan for refund-then-notification-failure, stream I/O must happen in a step.
+3. **Phase 3** decides: `RetryableError` on refund with `maxRetries: 3`, `FatalError` if already processed, idempotency key from `refundId`.
+4. **Phase 4** writes: `workflows/refund-approval.ts` with `"use workflow"` orchestrator and `"use step"` functions, plus `workflows/refund-approval.integration.test.ts` using `resumeWebhook()`, `waitForHook()`/`resumeHook()`, and `run.returnValue` assertions.
+5. **Phase 5** self-review confirms: no stream I/O in workflow context, all tokens deterministic, compensation documented, test coverage complete.
diff --git a/skills/workflow-build/goldens/approval-timeout-streaming.md b/skills/workflow-build/goldens/approval-timeout-streaming.md
new file mode 100644
index 0000000000..55f301ce55
--- /dev/null
+++ b/skills/workflow-build/goldens/approval-timeout-streaming.md
@@ -0,0 +1,163 @@
+# Golden Scenario: Approval Timeout with Streaming
+
+## Scenario
+
+An expense approval workflow that waits for a manager's hook-based approval with a 24-hour timeout (sleep). While waiting, it streams status updates to the UI. If the timeout expires, the request is auto-escalated.
+
+## What the Build Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Stream I/O placement** — `getWritable()` may be called in workflow context to obtain a stream reference, but actual stream writes (`write()`, `close()`) must happen inside a `"use step"` function. The workflow orchestrator cannot hold stream I/O across replay boundaries.
+2. **Determinism boundary** — Stream writes are I/O. A workflow function that directly calls `write()` violates the orchestrate-only rule.
+3. **Hook token strategy** — The approval hook should use a deterministic token like `approval:${expenseId}` to be collision-free across concurrent runs.
+
+### Phase 3 — Failure Modes Decided
+
+- `validateExpense`: `FatalError` for invalid data (code/data bug). Database read failures should be `RetryableError`.
+- `notifyManager`: `RetryableError` with `maxRetries: 3` — notification delivery is transient.
+- `streamStatus`: `RetryableError` with `maxRetries: 2` — stream writes are I/O.
+- `processDecision`: `RetryableError` with `maxRetries: 2` — database update may fail transiently.
+- `escalateOnTimeout`: `RetryableError` with `maxRetries: 3` — escalation must eventually succeed.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError, getWritable } from "workflow";
+import { createHook, sleep } from "workflow";
+
+type ApprovalDecision = { approved: boolean; reason?: string };
+
+const validateExpense = async (expenseId: string) => {
+ "use step";
+ const expense = await db.expenses.findUnique({ where: { id: expenseId } });
+ if (!expense) throw new FatalError("Expense not found");
+ return expense;
+};
+
+const notifyManager = async (expenseId: string, managerId: string) => {
+ "use step";
+ await notifications.send({
+ idempotencyKey: `notify:${expenseId}`,
+ to: managerId,
+ template: "expense-approval-request",
+ });
+};
+
+const writeStatus = async (
+ stream: ReturnType,
+ status: string
+) => {
+ "use step";
+ // Stream I/O must happen in a step, not in workflow context
+ const writer = stream.getWriter();
+ await writer.write(status);
+ writer.releaseLock();
+};
+
+const processDecision = async (
+ expenseId: string,
+ decision: ApprovalDecision
+) => {
+ "use step";
+ await db.expenses.update({
+ where: { id: expenseId },
+ data: {
+ status: decision.approved ? "approved" : "rejected",
+ reason: decision.reason,
+ },
+ });
+ return decision;
+};
+
+const escalate = async (expenseId: string) => {
+ "use step";
+ await notifications.send({
+ idempotencyKey: `escalate:${expenseId}`,
+ to: "vp-finance",
+ template: "expense-escalation",
+ });
+ await db.expenses.update({
+ where: { id: expenseId },
+ data: { status: "escalated" },
+ });
+};
+
+export default async function expenseApproval(
+ expenseId: string,
+ amount: number,
+ managerId: string
+) {
+ const expense = await validateExpense(expenseId);
+
+ await notifyManager(expenseId, managerId);
+
+ // getWritable() can be called in workflow context
+ const stream = getWritable("expense-status");
+ await writeStatus(stream, "waiting-for-approval");
+
+ // Race: hook approval vs 24h timeout
+ const hook = createHook(`approval:${expenseId}`);
+ const timeout = sleep("24h");
+
+ const result = await Promise.race([hook, timeout]);
+
+ if (result === undefined) {
+ // Timeout fired — escalate
+ await writeStatus(stream, "escalating");
+ await escalate(expenseId);
+ return { expenseId, status: "escalated" };
+ }
+
+ // Manager responded
+ await writeStatus(stream, result.approved ? "approved" : "rejected");
+ await processDecision(expenseId, result);
+
+ return { expenseId, status: result.approved ? "approved" : "rejected" };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start, resumeHook, getRun } from "workflow/api";
+import { waitForHook, waitForSleep } from "@workflow/vitest";
+import expenseApproval from "../workflows/expense-approval";
+
+describe("expenseApproval", () => {
+ it("manager approves before timeout", async () => {
+ const run = await start(expenseApproval, ["exp-1", 200, "manager-1"]);
+
+ await waitForHook(run, { token: "approval:exp-1" });
+ await resumeHook("approval:exp-1", { approved: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ expenseId: "exp-1",
+ status: "approved",
+ });
+ });
+
+ it("escalates when manager does not respond within 24h", async () => {
+ const run = await start(expenseApproval, ["exp-2", 500, "manager-2"]);
+
+ const sleepId = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId] });
+
+ await expect(run.returnValue).resolves.toEqual({
+ expenseId: "exp-2",
+ status: "escalated",
+ });
+ });
+});
+```
+
+## Checklist Items Exercised
+
+- Stream I/O placement
+- Determinism boundary
+- Hook token strategy
+- Integration test coverage (timeout path, approval path)
+- Retry semantics
diff --git a/skills/workflow-build/goldens/child-workflow-handoff.md b/skills/workflow-build/goldens/child-workflow-handoff.md
new file mode 100644
index 0000000000..0d49791838
--- /dev/null
+++ b/skills/workflow-build/goldens/child-workflow-handoff.md
@@ -0,0 +1,85 @@
+# Golden Scenario: Child Workflow Handoff
+
+## Scenario
+
+A batch-processing workflow that receives a list of document IDs, then starts a child workflow for each document. The parent workflow awaits all child completions and aggregates results.
+
+## What the Build Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **`start()` placement** — Starting child workflows via `start()` is a side effect requiring full Node.js access. It must be wrapped in a `"use step"` function, not called directly from workflow context.
+2. **Pass-by-value / serialization issues** — Child workflow run handles are not serializable. The step must return serializable run IDs, not live objects.
+3. **Step granularity** — Starting all children in a single step means if one `start()` fails, all must retry. Consider whether each child start should be an independent step for independent retry.
+
+### Phase 3 — Failure Modes Decided
+
+- `startChildWorkflow`: `RetryableError` with `maxRetries: 3` — child start is a network call.
+- `aggregateResults`: `RetryableError` with `maxRetries: 2` — fetching child results may fail transiently.
+- Each child start is an independent step so one failure doesn't retry all.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { start as startChild, getRun } from "workflow/api";
+import { processDocument } from "./process-document";
+
+const startDocumentWorkflow = async (documentId: string) => {
+ "use step";
+ // start() must be in a step — it's a side effect
+ const run = await startChild(processDocument, [documentId]);
+ // Return serializable ID, not the live run object
+ return run.runId;
+};
+
+const getChildResult = async (runId: string) => {
+ "use step";
+ const run = await getRun(runId);
+ return run.returnValue;
+};
+
+export default async function batchProcessDocuments(documentIds: string[]) {
+ // Start each child in its own step for independent retry
+ const runIds: string[] = [];
+ for (const docId of documentIds) {
+ const runId = await startDocumentWorkflow(docId);
+ runIds.push(runId);
+ }
+
+ // Aggregate results
+ const results = [];
+ for (const runId of runIds) {
+ const result = await getChildResult(runId);
+ results.push(result);
+ }
+
+ return { processed: results.length, results };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import batchProcessDocuments from "../workflows/batch-process-documents";
+
+describe("batchProcessDocuments", () => {
+ it("processes all documents", async () => {
+ const run = await start(batchProcessDocuments, [
+ ["doc-1", "doc-2", "doc-3"],
+ ]);
+ const result = await run.returnValue;
+ expect(result.processed).toBe(3);
+ });
+});
+```
+
+## Checklist Items Exercised
+
+- `start()` placement
+- Pass-by-value / serialization issues
+- Step granularity
+- Integration test coverage
diff --git a/skills/workflow-build/goldens/compensation-saga.md b/skills/workflow-build/goldens/compensation-saga.md
new file mode 100644
index 0000000000..796de4d6da
--- /dev/null
+++ b/skills/workflow-build/goldens/compensation-saga.md
@@ -0,0 +1,170 @@
+# Golden Scenario: Compensation Saga
+
+## Scenario
+
+A multi-step order fulfillment workflow that charges a payment, reserves inventory, and sends a confirmation email. If inventory reservation fails after payment has been charged, a compensation step must refund the payment.
+
+## What the Build Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Rollback / compensation strategy** — Payment charging is an irreversible side effect. If `reserveInventory` fails after `chargePayment` succeeds, the workflow must refund the payment. A compensation step is required.
+2. **Idempotency keys** — `chargePayment` and `reserveInventory` have external side effects. Derive idempotency keys from `orderId` (e.g. `payment:${orderId}`, `inventory:${orderId}`) to prevent duplicate charges on replay.
+
+### Phase 3 — Failure Modes Decided
+
+- `chargePayment`: `RetryableError` with `maxRetries: 3` for transient payment failures. `FatalError` for invalid card or insufficient funds.
+- `reserveInventory`: `RetryableError` with `maxRetries: 2` for transient warehouse API failures. `FatalError` for out-of-stock (triggers compensation).
+- `refundPayment`: `RetryableError` with `maxRetries: 5` — refund must eventually succeed.
+- `sendConfirmation`: `RetryableError` with `maxRetries: 2` — email delivery is transient.
+- Compensation: if `reserveInventory` throws `FatalError`, run `refundPayment` before terminating.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError } from "workflow";
+
+const chargePayment = async (orderId: string, amount: number) => {
+ "use step";
+ // Idempotency key: payment:${orderId}
+ const result = await paymentProvider.charge({
+ idempotencyKey: `payment:${orderId}`,
+ amount,
+ });
+ return result;
+};
+
+const reserveInventory = async (orderId: string, items: CartItem[]) => {
+ "use step";
+ // Idempotency key: inventory:${orderId}
+ const reservation = await warehouse.reserve({
+ idempotencyKey: `inventory:${orderId}`,
+ items,
+ });
+ return reservation;
+};
+
+const refundPayment = async (orderId: string, chargeId: string) => {
+ "use step";
+ await paymentProvider.refund({
+ idempotencyKey: `refund:${orderId}`,
+ chargeId,
+ });
+};
+
+const sendConfirmation = async (orderId: string, email: string) => {
+ "use step";
+ await emailService.send({
+ idempotencyKey: `confirmation:${orderId}`,
+ to: email,
+ template: "order-confirmed",
+ });
+};
+
+export default async function orderFulfillment(
+ orderId: string,
+ amount: number,
+ items: CartItem[],
+ email: string
+) {
+ const charge = await chargePayment(orderId, amount);
+
+ try {
+ const reservation = await reserveInventory(orderId, items);
+ } catch (error) {
+ // Compensation: refund payment if inventory fails permanently
+ if (error instanceof FatalError) {
+ await refundPayment(orderId, charge.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ await sendConfirmation(orderId, email);
+
+ return { orderId, status: "fulfilled" };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import orderFulfillment from "../workflows/order-fulfillment";
+
+describe("orderFulfillment", () => {
+ it("completes happy path", async () => {
+ const run = await start(orderFulfillment, [
+ "order-1",
+ 100,
+ [{ sku: "A", qty: 1 }],
+ "user@example.com",
+ ]);
+ await expect(run.returnValue).resolves.toEqual({
+ orderId: "order-1",
+ status: "fulfilled",
+ });
+ });
+
+ it("refunds payment when inventory fails", async () => {
+ // Mock reserveInventory to throw FatalError (out of stock)
+ const run = await start(orderFulfillment, [
+ "order-2",
+ 50,
+ [{ sku: "B", qty: 999 }],
+ "user@example.com",
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify refundPayment was called (compensation executed)
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "compensation-saga",
+ "files": [
+ { "kind": "workflow", "path": "workflows/order-fulfillment.ts" },
+ { "kind": "test", "path": "workflows/order-fulfillment.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path",
+ "helpers": [],
+ "expects": "Order completes successfully with payment charged and inventory reserved"
+ },
+ {
+ "name": "compensation-on-inventory-failure",
+ "helpers": [],
+ "expects": "Payment is refunded when inventory reservation fails"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/order-fulfillment.integration.test.ts", "expects": "order-fulfillment integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: A payment charge must be compensated by a refund if inventory reservation fails",
+ "Invariant: Idempotency keys derived from orderId prevent duplicate charges on replay",
+ "Operator signal: Log compensation.triggered with orderId when refund begins after inventory failure"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"compensation-saga","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Rollback / compensation strategy
+- Idempotency keys
+- Retry semantics
+- Integration test coverage
diff --git a/skills/workflow-build/goldens/multi-event-hook-loop.md b/skills/workflow-build/goldens/multi-event-hook-loop.md
new file mode 100644
index 0000000000..877cb72ebe
--- /dev/null
+++ b/skills/workflow-build/goldens/multi-event-hook-loop.md
@@ -0,0 +1,129 @@
+# Golden Scenario: Multi-Event Hook Loop
+
+## Scenario
+
+A document review workflow where multiple reviewers must each submit feedback via hooks. The workflow must collect all reviews before proceeding, not just the first one.
+
+## What the Build Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Hook token strategy** — With multiple reviewers, each hook needs a unique deterministic token like `review:${documentId}:${reviewerId}`. A single hook token would only capture the first response.
+2. **Suspension primitive choice** — Waiting for N events requires either an `AsyncIterable` hook loop, `Promise.all()` over multiple hooks, or a `for await` pattern — not a single `await` on one hook.
+3. **Step granularity** — `createHook()` with deterministic tokens can be called from workflow context (it's not I/O). No need to wrap hook creation in a step.
+4. **Idempotency keys** — `finalizeDocument` has external side effects. Use `finalize:${documentId}` as idempotency key.
+
+### Phase 3 — Failure Modes Decided
+
+- `finalizeDocument`: `RetryableError` with `maxRetries: 2` — database/notification calls are transient.
+- Hook creation: no failure mode needed — `createHook()` is deterministic and replay-safe.
+- Each reviewer's hook resolves independently — one slow reviewer doesn't block others from submitting.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { createHook } from "workflow";
+
+type ReviewFeedback = { reviewerId: string; approved: boolean; comments: string };
+
+const finalizeDocument = async (
+ documentId: string,
+ reviews: ReviewFeedback[]
+) => {
+ "use step";
+ await db.documents.update({
+ where: { id: documentId },
+ data: {
+ status: "reviewed",
+ reviews,
+ idempotencyKey: `finalize:${documentId}`,
+ },
+ });
+ await notifications.send({
+ idempotencyKey: `finalize-notify:${documentId}`,
+ to: "document-owner",
+ template: "review-complete",
+ });
+ return { documentId, reviewCount: reviews.length };
+};
+
+export default async function multiReviewer(
+ documentId: string,
+ reviewerIds: string[]
+) {
+ // Create one hook per reviewer with deterministic tokens
+ // createHook() can be called in workflow context — it's not I/O
+ const hookPromises = reviewerIds.map((reviewerId) =>
+ createHook(`review:${documentId}:${reviewerId}`)
+ );
+
+ // Wait for ALL reviewers, not just the first
+ const reviews = await Promise.all(hookPromises);
+
+ const result = await finalizeDocument(documentId, reviews);
+
+ return result;
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start, resumeHook } from "workflow/api";
+import { waitForHook } from "@workflow/vitest";
+import multiReviewer from "../workflows/multi-reviewer";
+
+describe("multiReviewer", () => {
+ it("collects all reviews before finalizing", async () => {
+ const reviewerIds = ["alice", "bob", "carol"];
+ const run = await start(multiReviewer, ["doc-1", reviewerIds]);
+
+ // Resume each reviewer's hook with unique tokens
+ for (const reviewerId of reviewerIds) {
+ await waitForHook(run, { token: `review:doc-1:${reviewerId}` });
+ await resumeHook(`review:doc-1:${reviewerId}`, {
+ reviewerId,
+ approved: true,
+ comments: "Looks good",
+ });
+ }
+
+ const result = await run.returnValue;
+ expect(result.reviewCount).toBe(3);
+ });
+
+ it("waits for slow reviewer", async () => {
+ const run = await start(multiReviewer, ["doc-2", ["alice", "bob"]]);
+
+ // Alice responds immediately
+ await waitForHook(run, { token: "review:doc-2:alice" });
+ await resumeHook("review:doc-2:alice", {
+ reviewerId: "alice",
+ approved: true,
+ comments: "LGTM",
+ });
+
+ // Bob responds later
+ await waitForHook(run, { token: "review:doc-2:bob" });
+ await resumeHook("review:doc-2:bob", {
+ reviewerId: "bob",
+ approved: false,
+ comments: "Needs changes",
+ });
+
+ const result = await run.returnValue;
+ expect(result.reviewCount).toBe(2);
+ });
+});
+```
+
+## Checklist Items Exercised
+
+- Hook token strategy (unique per reviewer)
+- Suspension primitive choice (Promise.all, not single await)
+- Step granularity (createHook in workflow context)
+- Idempotency keys
+- Integration test coverage (multi-reviewer, slow reviewer)
diff --git a/skills/workflow-build/goldens/rate-limit-retry.md b/skills/workflow-build/goldens/rate-limit-retry.md
new file mode 100644
index 0000000000..cc7ab0163c
--- /dev/null
+++ b/skills/workflow-build/goldens/rate-limit-retry.md
@@ -0,0 +1,128 @@
+# Golden Scenario: Rate-Limit Retry
+
+## Scenario
+
+A data sync workflow that fetches records from a rate-limited third-party API in pages, transforms each page, and upserts results into a database. The API returns HTTP 429 when rate-limited.
+
+## What the Build Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Retry semantics** — HTTP 429 (rate limit) is a textbook transient failure. The fetch step must use `RetryableError`, not `FatalError`. Reserve `FatalError` for permanent failures like HTTP 401/403.
+2. **Idempotency keys** — The upsert step writes to a database. Use a key like `sync:${syncId}:page:${pageNumber}` to prevent duplicate writes on replay.
+3. **Pass-by-value / serialization issues** — If fetching returns large record sets, ensure payloads are JSON-serializable and within event log limits.
+
+### Phase 3 — Failure Modes Decided
+
+- `fetchPage`: `RetryableError` with `maxRetries: 5` for HTTP 429 and network errors. `FatalError` for HTTP 401/403 (auth failure — retrying won't help).
+- `transformRecords`: `FatalError` — a transformation error is a code bug, not transient. Retrying won't fix it.
+- `upsertRecords`: `RetryableError` with `maxRetries: 3` for transient database errors. Idempotency key from `syncId` + page number.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError, getWritable } from "workflow";
+
+const fetchPage = async (apiUrl: string, page: number, pageSize: number) => {
+ "use step";
+ const response = await fetch(
+ `${apiUrl}?page=${page}&pageSize=${pageSize}`
+ );
+
+ if (response.status === 429) {
+ throw new RetryableError("Rate limited — will retry with backoff");
+ }
+ if (response.status === 401 || response.status === 403) {
+ throw new FatalError("Authentication failed — cannot retry");
+ }
+ if (!response.ok) {
+ throw new RetryableError(`API error ${response.status}`);
+ }
+
+ return response.json();
+};
+
+const transformRecords = async (records: ApiRecord[]) => {
+ "use step";
+ // Pure transformation — FatalError if this fails (code bug)
+ return records.map((r) => ({
+ id: r.externalId,
+ name: r.fields.name,
+ updatedAt: r.fields.modified,
+ }));
+};
+
+const upsertRecords = async (
+ syncId: string,
+ page: number,
+ records: LocalRecord[]
+) => {
+ "use step";
+ await db.upsert({
+ idempotencyKey: `sync:${syncId}:page:${page}`,
+ records,
+ });
+ return records.length;
+};
+
+export default async function dataSync(
+ syncId: string,
+ apiUrl: string,
+ pageSize: number
+) {
+ const stream = getWritable("sync-progress");
+
+ let page = 0;
+ let totalSynced = 0;
+ let hasMore = true;
+
+ while (hasMore) {
+ const data = await fetchPage(apiUrl, page, pageSize);
+ const transformed = await transformRecords(data.records);
+ const count = await upsertRecords(syncId, page, transformed);
+
+ totalSynced += count;
+ hasMore = data.hasNextPage;
+ page++;
+ }
+
+ return { syncId, totalSynced, pages: page };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import dataSync from "../workflows/data-sync";
+
+describe("dataSync", () => {
+ it("syncs all pages", async () => {
+ const run = await start(dataSync, ["sync-1", "https://api.example.com/records", 100]);
+ const result = await run.returnValue;
+ expect(result.totalSynced).toBeGreaterThan(0);
+ });
+
+ it("retries on rate limit (429)", async () => {
+ // API returns 429 on first attempt, 200 on second
+ const run = await start(dataSync, ["sync-2", "https://api.example.com/records", 50]);
+ await expect(run.returnValue).resolves.toBeDefined();
+ });
+
+ it("fails permanently on auth error", async () => {
+ // API returns 401
+ const run = await start(dataSync, ["sync-3", "https://api.example.com/records", 50]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ });
+});
+```
+
+## Checklist Items Exercised
+
+- Retry semantics (`RetryableError` vs `FatalError`)
+- Idempotency keys
+- Pass-by-value / serialization issues
+- Integration test coverage
diff --git a/skills/workflow-idempotency/SKILL.md b/skills/workflow-idempotency/SKILL.md
new file mode 100644
index 0000000000..573fc1b256
--- /dev/null
+++ b/skills/workflow-idempotency/SKILL.md
@@ -0,0 +1,115 @@
+---
+name: workflow-idempotency
+description: Build a durable workflow where side effects must remain safe under retries, replay, and duplicate delivery. Use when the user says "idempotency workflow", "workflow-idempotency", "duplicate", "replay", or "retry safety".
+user-invocable: true
+argument-hint: "[workflow prompt]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-idempotency
+
+Use this skill when the user wants to build a workflow where external side effects must remain safe under retries, replay, and duplicate delivery. This is a scenario entrypoint that routes into the existing teach → build pipeline with idempotency-specific guardrails.
+
+## Context Capture
+
+If `.workflow.md` exists in the project root, read it and use its context. If it does not exist, run a focused context capture covering these idempotency-specific questions before proceeding:
+
+1. **Duplicate ingress** — "Can the same event arrive more than once (e.g. webhook at-least-once delivery, queue retry)? What entity ID anchors deduplication?"
+2. **Replay safety** — "Which steps produce external side effects that would be harmful if replayed (charges, emails, reservations)?"
+3. **Idempotency key strategy** — "What stable identifiers are available to derive idempotency keys for each side-effecting step?"
+4. **External provider support** — "Do downstream APIs accept idempotency keys natively, or must the workflow enforce deduplication itself?"
+5. **Compensation requirements** — "If a step fails after earlier steps committed with idempotency keys, what compensation is needed?"
+6. **Observability** — "What must operators see in logs for duplicate detection, idempotency cache hits, and replay events?"
+
+Save the answers into `.workflow.md` following the same 8-section format used by `workflow-teach`.
+
+## Required Design Constraints
+
+When building an idempotency-safe workflow, the following constraints are non-negotiable:
+
+### Duplicate delivery detection
+
+The workflow must detect and safely handle duplicate event delivery. The deduplication strategy must use a stable identifier from the ingress payload (e.g. Stripe event ID, Shopify order ID, message queue deduplication ID). Duplicate deliveries after successful processing must be treated as `FatalError` (skip, do not reprocess).
+
+### Stable idempotency keys on every side-effecting step
+
+Every step that produces an external side effect must use an idempotency key derived from a stable, unique identifier — never from timestamps or random values. Examples:
+
+- Payment charge: `payment:${eventId}`
+- Inventory reservation: `inventory:${eventId}`
+- Notification: `notify:${eventId}`
+- Refund: `refund:${eventId}`
+
+### Replay safety verification
+
+The workflow must be safe to replay from any point in the event log. This means:
+
+- Steps with idempotency keys produce the same result on replay (no duplicate side effects)
+- Steps without external side effects (pure computation) are naturally replay-safe
+- Steps that read external state must tolerate stale reads from replay
+
+### Compensation with idempotency keys
+
+If a step fails after earlier steps committed with idempotency keys, compensation steps must also use stable idempotency keys. Compensation steps must use `RetryableError` with high `maxRetries` — compensation must eventually succeed.
+
+## Build Process
+
+Follow the same six-phase interactive build process as `workflow-build`:
+
+1. **Propose step boundaries** — identify `"use workflow"` orchestrator vs `"use step"` functions, deduplication check, side-effecting steps with idempotency keys, compensation steps
+2. **Flag relevant traps** — run the stress checklist with special attention to idempotency keys on every side-effecting step, duplicate ingress handling, and replay safety
+3. **Decide failure modes** — `FatalError` for duplicate/already-processed, `RetryableError` for transient failures, compensation plan for each irreversible step
+4. **Write code + tests** — produce workflow file and integration tests
+5. **Self-review** — re-run the stress checklist against generated code
+6. **Verification summary** — emit the verification artifact and `verification_plan_ready` summary
+
+### Required test coverage
+
+Integration tests must exercise:
+
+- **Happy path** — event received, all steps succeed with idempotency keys
+- **Duplicate event** — second delivery is detected and skipped (no-op)
+- **Replay safety** — replayed steps do not produce duplicate side effects
+- **Compensation path** — downstream step fails after earlier step committed, compensation executes with its own idempotency keys
+
+## Anti-Patterns
+
+Flag these explicitly when they appear in the workflow:
+
+- **Missing idempotency key on a side-effecting step** — every external call must have a stable idempotency key to survive replay
+- **Timestamp or random idempotency keys** — keys must be derived from stable entity identifiers; `Date.now()` or `crypto.randomUUID()` break on replay
+- **Missing deduplication on ingress** — without duplicate detection, at-least-once delivery causes double-processing
+- **Idempotency key reuse across different operations** — each step must have a distinct key namespace (e.g. `payment:${id}` vs `inventory:${id}`)
+- **Missing compensation idempotency keys** — compensation steps need their own stable keys to survive replay
+- **Node.js APIs in workflow context** — `fs`, `crypto`, `Buffer`, etc. cannot be used inside `"use workflow"` functions
+- **Direct stream I/O in workflow context** — `getWritable()` may be called in workflow context, but actual writes must happen in steps
+- **`start()` called directly from workflow code** — must be wrapped in a step
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific context (if present)
+
+## Verification Contract
+
+This skill terminates with the same verification contract as `workflow-build`. The final output must include:
+
+1. A **Verification Artifact** — fenced JSON block with `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`
+2. A **Verification Summary** — single-line JSON: `{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}`
+
+## Sample Usage
+
+**Input:** `/workflow-idempotency Make duplicate Stripe checkout events safe without double-charging or double-emailing.`
+
+**Expected behavior:**
+
+1. Reads `.workflow.md` if present; otherwise runs focused context capture
+2. Proposes: deduplication check step by Stripe event ID, payment charge step with `payment:${eventId}` idempotency key, inventory step with `inventory:${eventId}` key, confirmation email step with `notify:${eventId}` key, compensation refund step with `refund:${eventId}` key
+3. Flags: idempotency key required on every side-effecting step, duplicate ingress detection, replay safety, compensation keys for refund path
+4. Writes: `workflows/stripe-checkout.ts` + `workflows/stripe-checkout.integration.test.ts`
+5. Tests cover: happy path, duplicate event no-op, replay safety verification, compensation with idempotency keys on failure
+6. Emits verification artifact and `verification_plan_ready` summary
diff --git a/skills/workflow-idempotency/goldens/duplicate-webhook-order.md b/skills/workflow-idempotency/goldens/duplicate-webhook-order.md
new file mode 100644
index 0000000000..0d6a7bcbf4
--- /dev/null
+++ b/skills/workflow-idempotency/goldens/duplicate-webhook-order.md
@@ -0,0 +1,227 @@
+# Golden Scenario: Duplicate Webhook Order (Idempotency Focus)
+
+## User Prompt
+
+```
+/workflow-idempotency Make duplicate Stripe checkout events safe without double-charging or double-emailing.
+```
+
+## Scenario
+
+An e-commerce platform receives checkout completion events from Stripe. Due to Stripe's at-least-once delivery guarantee, the same event may arrive multiple times. The workflow must charge payment, reserve inventory, and send a confirmation email — but must never double-charge, double-reserve, or double-email on duplicate deliveries or replay. If inventory reservation fails after payment, the payment must be refunded using its own idempotency key.
+
+## Context Capture
+
+The scenario skill checks for `.workflow.md` first. In this example it does not exist, so the focused idempotency-specific interview runs:
+
+| Question | Expected Answer |
+|----------|----------------|
+| Duplicate ingress | Stripe checkout events use at-least-once delivery; deduplicate by Stripe event ID |
+| Replay safety | Payment charge, inventory reservation, and confirmation email all produce external side effects that must not duplicate on replay |
+| Idempotency key strategy | Payment: `payment:${eventId}`, Inventory: `inventory:${eventId}`, Notification: `notify:${eventId}`, Refund: `refund:${eventId}` |
+| External provider support | Stripe accepts idempotency keys natively; warehouse API supports upsert by key; email provider deduplicates by message ID |
+| Compensation requirements | Refund payment if inventory reservation fails after charge |
+| Observability | Log duplicate detection (idempotency cache hit/miss), step completion with idempotency key used, compensation events |
+
+The captured context is saved to `.workflow.md` with sections: Project Context, Business Rules, External Systems, Failure Expectations, Observability Needs, Approved Patterns, Open Questions.
+
+## What the Scenario Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Idempotency keys on every side-effecting step** — Payment charge, inventory reservation, confirmation email, and refund all need stable idempotency keys derived from the Stripe event ID. Timestamps or random values would break on replay.
+2. **Duplicate ingress detection** — The first step must check whether this event ID has already been processed. If yes, return early with a `FatalError` (skip). This prevents the entire workflow from re-executing on duplicate delivery.
+3. **Replay safety** — The workflow runtime replays the event log on cold start. Every step must produce the same result on replay because idempotency keys are stable.
+4. **Compensation idempotency** — If `reserveInventory` fails after `chargePayment` succeeds, the refund step must use its own idempotency key (`refund:${eventId}`) to prevent double-refunding on replay.
+
+### Phase 3 — Failure Modes Decided
+
+- `checkDuplicate`: `FatalError` if already processed (skip entire workflow). No retry needed.
+- `chargePayment`: `RetryableError` with `maxRetries: 3` for transient Stripe failures. `FatalError` for invalid card or insufficient funds.
+- `reserveInventory`: `RetryableError` with `maxRetries: 2` for transient warehouse API failures. `FatalError` for out-of-stock (triggers compensation).
+- `refundPayment`: `RetryableError` with `maxRetries: 5` — refund must eventually succeed. Uses `refund:${eventId}` idempotency key.
+- `sendConfirmation`: `RetryableError` with `maxRetries: 2` — email delivery is transient. Uses `notify:${eventId}` idempotency key.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError } from "workflow";
+
+const checkDuplicate = async (eventId: string) => {
+ "use step";
+ const existing = await db.events.findUnique({ where: { stripeEventId: eventId } });
+ if (existing?.status === "completed") {
+ throw new FatalError(`Event ${eventId} already processed`);
+ }
+ return existing;
+};
+
+const chargePayment = async (eventId: string, amount: number) => {
+ "use step";
+ const result = await stripe.charges.create({
+ amount,
+ idempotencyKey: `payment:${eventId}`,
+ });
+ return result;
+};
+
+const reserveInventory = async (eventId: string, items: LineItem[]) => {
+ "use step";
+ const reservation = await warehouse.reserve({
+ idempotencyKey: `inventory:${eventId}`,
+ items,
+ });
+ return reservation;
+};
+
+const refundPayment = async (eventId: string, chargeId: string) => {
+ "use step";
+ await stripe.refunds.create({
+ chargeId,
+ idempotencyKey: `refund:${eventId}`,
+ });
+};
+
+const sendConfirmation = async (eventId: string, email: string) => {
+ "use step";
+ await emailService.send({
+ idempotencyKey: `notify:${eventId}`,
+ to: email,
+ template: "checkout-confirmed",
+ });
+};
+
+export default async function stripeCheckout(
+ eventId: string,
+ amount: number,
+ items: LineItem[],
+ email: string
+) {
+ // Duplicate ingress check — skip if already processed
+ await checkDuplicate(eventId);
+
+ // Charge payment with stable idempotency key
+ const charge = await chargePayment(eventId, amount);
+
+ // Reserve inventory — compensate with refund on failure
+ try {
+ await reserveInventory(eventId, items);
+ } catch (error) {
+ if (error instanceof FatalError) {
+ // Compensation with its own idempotency key
+ await refundPayment(eventId, charge.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // Send confirmation with idempotency key
+ await sendConfirmation(eventId, email);
+
+ return { eventId, status: "fulfilled" };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import stripeCheckout from "../workflows/stripe-checkout";
+
+describe("stripeCheckout idempotency", () => {
+ it("completes happy path with idempotency keys", async () => {
+ const run = await start(stripeCheckout, [
+ "evt_001", 100, [{ sku: "A", qty: 1 }], "user@example.com",
+ ]);
+ await expect(run.returnValue).resolves.toEqual({
+ eventId: "evt_001",
+ status: "fulfilled",
+ });
+ });
+
+ it("skips duplicate event delivery", async () => {
+ // First delivery succeeds
+ const run1 = await start(stripeCheckout, [
+ "evt_002", 50, [{ sku: "B", qty: 1 }], "user@example.com",
+ ]);
+ await expect(run1.returnValue).resolves.toEqual({
+ eventId: "evt_002",
+ status: "fulfilled",
+ });
+
+ // Second delivery with same event ID is skipped
+ const run2 = await start(stripeCheckout, [
+ "evt_002", 50, [{ sku: "B", qty: 1 }], "user@example.com",
+ ]);
+ await expect(run2.returnValue).rejects.toThrow(FatalError);
+ });
+
+ it("refunds payment with idempotency key when inventory fails", async () => {
+ // Mock reserveInventory to throw FatalError (out of stock)
+ const run = await start(stripeCheckout, [
+ "evt_003", 75, [{ sku: "C", qty: 999 }], "user@example.com",
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify refundPayment was called with refund:evt_003 idempotency key
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "stripe-checkout",
+ "files": [
+ { "kind": "workflow", "path": "workflows/stripe-checkout.ts" },
+ { "kind": "test", "path": "workflows/stripe-checkout.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path-with-idempotency",
+ "helpers": [],
+ "expects": "Checkout completes with idempotency keys on every side-effecting step"
+ },
+ {
+ "name": "duplicate-event-skip",
+ "helpers": [],
+ "expects": "Duplicate delivery is detected and skipped without reprocessing"
+ },
+ {
+ "name": "compensation-with-idempotency-key",
+ "helpers": [],
+ "expects": "Payment is refunded with refund:${eventId} key when inventory fails"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/stripe-checkout.integration.test.ts", "expects": "stripe-checkout integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: Every side-effecting step uses a stable idempotency key derived from the Stripe event ID",
+ "Invariant: Duplicate event delivery is detected and skipped at ingress",
+ "Invariant: Replayed steps produce the same result because idempotency keys are stable",
+ "Invariant: Compensation refund uses its own idempotency key to prevent double-refunding on replay",
+ "Operator signal: Log idempotency.hit when duplicate delivery is detected",
+ "Operator signal: Log compensation.triggered with eventId when refund begins"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"stripe-checkout","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Idempotency keys (stable keys on every side-effecting step, derived from Stripe event ID)
+- Duplicate delivery detection (deduplication by event ID at ingress)
+- Replay safety (stable idempotency keys survive event log replay)
+- Compensation idempotency (refund step has its own stable key)
+- Retry semantics (FatalError for duplicates and permanent failures, RetryableError for transient)
+- Integration test coverage (happy path, duplicate skip, compensation with idempotency)
diff --git a/skills/workflow-observe/SKILL.md b/skills/workflow-observe/SKILL.md
new file mode 100644
index 0000000000..a48eccb97b
--- /dev/null
+++ b/skills/workflow-observe/SKILL.md
@@ -0,0 +1,124 @@
+---
+name: workflow-observe
+description: Build a durable workflow with operator-visible progress, namespaced streams, and terminal signals. Use when the user says "observability workflow", "workflow-observe", "operator signals", "stream logs", or "progress visibility".
+user-invocable: true
+argument-hint: "[workflow prompt]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-observe
+
+Use this skill when the user wants to build a workflow where operator visibility is a first-class concern — progress streams, namespaced log channels, and terminal signals that allow operators to diagnose failures without accessing the runtime directly. This is a scenario entrypoint that routes into the existing teach → build pipeline with observability-specific guardrails.
+
+## Context Capture
+
+If `.workflow.md` exists in the project root, read it and use its context. If it does not exist, run a focused context capture covering these observability-specific questions before proceeding:
+
+1. **Operator audience** — "Who consumes the stream output: a dashboard, CLI, monitoring system, or all three?"
+2. **Progress granularity** — "What progress events do operators need to see (e.g. rows processed, steps completed, percentage)?"
+3. **Stream namespaces** — "Does the workflow need multiple stream channels (e.g. progress, errors, diagnostics) or a single unified stream?"
+4. **Terminal signals** — "What must the final output contain so an operator knows the workflow succeeded, failed, or was cancelled?"
+5. **Structured log format** — "Should stream events be structured JSON, key=value pairs, or human-readable text?"
+6. **Failure diagnostics** — "When a step fails, what contextual data must be in the stream for operators to diagnose without runtime access?"
+
+Save the answers into `.workflow.md` following the same 8-section format used by `workflow-teach`.
+
+## Required Design Constraints
+
+When building an operator-observable workflow, the following constraints are non-negotiable:
+
+### Stream namespace separation
+
+Use distinct stream namespaces to separate concerns. At minimum:
+
+- **`progress`** — operator-facing progress updates (items processed, percentage, stage transitions)
+- **`errors`** — validation errors, step failures, diagnostic context
+- **`status`** — terminal signals: workflow completed, failed, or cancelled with summary data
+
+Each namespace must be addressable independently so operators can subscribe to only the channels they need.
+
+### Stream I/O placement
+
+`getWritable()` may be called in workflow context to obtain a stream handle, but all actual `write()` calls must happen inside `"use step"` functions. This is a hard runtime constraint.
+
+### Structured stream events
+
+Every stream event must be structured (JSON or key=value) so downstream consumers can parse and aggregate without regex. Include at minimum:
+
+- `event` — the event type (e.g. `progress`, `step.started`, `step.completed`, `workflow.failed`)
+- `timestamp` — ISO 8601 timestamp
+- `data` — event-specific payload
+
+### Terminal signals
+
+The workflow must emit a terminal signal on every exit path:
+
+- **Success:** `{ "event": "workflow.completed", "status": "success", ... }`
+- **Failure:** `{ "event": "workflow.failed", "status": "error", "error": "...", ... }`
+- **Partial:** `{ "event": "workflow.completed", "status": "partial", "completed": [...], "failed": [...] }`
+
+Operators must never have to guess whether a workflow is still running or has finished.
+
+### Operator-queryable state
+
+Step functions that emit stream events must include enough context for an operator to understand the current state without seeing the full event history. Each progress event should be self-describing (include total, processed, remaining — not just a delta).
+
+## Build Process
+
+Follow the same six-phase interactive build process as `workflow-build`:
+
+1. **Propose step boundaries** — identify `"use workflow"` orchestrator vs `"use step"` functions, stream namespace allocation, progress emission points
+2. **Flag relevant traps** — run the stress checklist with special attention to stream I/O placement, namespace separation, and terminal signal coverage
+3. **Decide failure modes** — ensure every failure path emits a terminal signal before throwing
+4. **Write code + tests** — produce workflow file and integration tests
+5. **Self-review** — re-run the stress checklist against generated code, verify all exit paths emit terminal signals
+6. **Verification summary** — emit the verification artifact and `verification_plan_ready` summary
+
+### Required test coverage
+
+Integration tests must exercise:
+
+- **Happy path with stream verification** — workflow completes, progress stream contains expected events, terminal signal is `workflow.completed`
+- **Failure path with terminal signal** — step fails, error stream contains diagnostic context, terminal signal is `workflow.failed`
+- **Namespace isolation** — progress events appear only in the progress namespace, errors only in the error namespace
+
+## Anti-Patterns
+
+Flag these explicitly when they appear in the workflow:
+
+- **Stream writes in workflow context** — `write()` calls must happen in `"use step"` functions, not in the `"use workflow"` orchestrator
+- **Missing terminal signal** — every exit path (success, failure, partial) must emit a terminal signal; silent exits are invisible to operators
+- **Unstructured stream output** — free-text log lines cannot be parsed by downstream consumers; use structured JSON or key=value
+- **Single namespace for all events** — mixing progress, errors, and status in one namespace forces operators to filter manually
+- **Delta-only progress events** — operators joining mid-stream cannot reconstruct state; include cumulative totals in each event
+- **Node.js APIs in workflow context** — `fs`, `crypto`, `Buffer`, etc. cannot be used inside `"use workflow"` functions
+- **`start()` called directly from workflow code** — must be wrapped in a step
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific context (if present)
+
+## Verification Contract
+
+This skill terminates with the same verification contract as `workflow-build`. The final output must include:
+
+1. A **Verification Artifact** — fenced JSON block with `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`
+2. A **Verification Summary** — single-line JSON: `{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}`
+
+## Sample Usage
+
+**Input:** `/workflow-observe Stream operator progress, namespaced logs, and terminal status for a long-running backfill workflow.`
+
+**Expected behavior:**
+
+1. Reads `.workflow.md` if present; otherwise runs focused context capture
+2. Proposes: ingestion step with progress stream emissions, validation step with error stream emissions, load step with progress updates, summary step with terminal signal — each using `getWritable()` in workflow context and `write()` in step context
+3. Flags: stream I/O must happen in steps, namespace separation required, terminal signal on every exit path, structured events with cumulative totals
+4. Writes: `workflows/backfill-pipeline.ts` + `workflows/backfill-pipeline.integration.test.ts`
+5. Tests cover: happy path with stream event assertions, failure path with terminal signal verification, namespace isolation
+6. Emits verification artifact and `verification_plan_ready` summary
diff --git a/skills/workflow-observe/goldens/operator-observability-streams.md b/skills/workflow-observe/goldens/operator-observability-streams.md
new file mode 100644
index 0000000000..ec093a8247
--- /dev/null
+++ b/skills/workflow-observe/goldens/operator-observability-streams.md
@@ -0,0 +1,245 @@
+# Golden Scenario: Operator Observability Streams
+
+## User Prompt
+
+```
+/workflow-observe Stream operator progress, namespaced logs, and terminal status for a long-running backfill workflow.
+```
+
+## Scenario
+
+A data pipeline workflow ingests CSV files, validates rows, transforms data, and loads into a data warehouse. Operators need real-time visibility into progress (rows processed, validation errors, load status) via namespaced streams, and must be able to diagnose failures from structured logs without accessing the runtime directly. The workflow must emit terminal signals on every exit path so operators never have to guess whether it is still running.
+
+## Context Capture
+
+The scenario skill checks for `.workflow.md` first. In this example it does not exist, so the focused observability-specific interview runs:
+
+| Question | Expected Answer |
+|----------|----------------|
+| Operator audience | Ops dashboard and CLI monitoring tool; both consume structured JSON streams |
+| Progress granularity | Rows processed vs total, stage transitions (validate → transform → load), percentage complete |
+| Stream namespaces | Three channels: `progress` (row counts, percentage), `errors` (validation failures with row numbers), `status` (terminal signals) |
+| Terminal signals | Success: `workflow.completed` with total rows and duration. Failure: `workflow.failed` with error context and last successful stage. |
+| Structured log format | JSON with `event`, `timestamp`, and `data` fields |
+| Failure diagnostics | On step failure: include step name, input row range, error message, and retry count in the error stream |
+
+The captured context is saved to `.workflow.md` with sections: Project Context, Business Rules, External Systems, Failure Expectations, Observability Needs, Approved Patterns, Open Questions.
+
+## What the Scenario Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Stream I/O placement** — `getWritable()` may be called in workflow context, but all `write()` calls must happen inside `"use step"` functions. This is a hard runtime constraint that would cause silent failures if violated.
+2. **Namespace separation** — Progress, error, and status events must use distinct stream namespaces. Mixing them in a single namespace forces operators to filter manually and breaks targeted subscriptions.
+3. **Terminal signal coverage** — Every exit path (success, failure, partial success) must emit a terminal signal. A workflow that fails silently is invisible to operators.
+4. **Self-describing progress events** — Each progress event must include cumulative totals (processed, total, remaining), not just deltas. Operators joining mid-stream cannot reconstruct state from deltas alone.
+
+### Phase 3 — Failure Modes Decided
+
+- `validateRows`: `FatalError` for malformed CSV (code/data bug, cannot recover). Emits validation errors to `errors` namespace before throwing.
+- `transformData`: `RetryableError` with `maxRetries: 2` for transient transformation failures. Emits progress to `progress` namespace.
+- `loadToWarehouse`: `RetryableError` with `maxRetries: 3` for transient warehouse connection failures. Emits row-level progress to `progress` namespace.
+- `emitTerminalSignal`: Always executes — wraps the workflow in try/finally to guarantee terminal signal emission on every exit path.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError, getWritable } from "workflow";
+
+const progressStream = getWritable("progress");
+const errorStream = getWritable("errors");
+const statusStream = getWritable("status");
+
+const validateRows = async (batchId: string, rows: RawRow[]) => {
+ "use step";
+ const valid: ValidRow[] = [];
+ const errors: ValidationError[] = [];
+
+ for (let i = 0; i < rows.length; i++) {
+ const result = validateRow(rows[i]);
+ if (result.ok) {
+ valid.push(result.row);
+ } else {
+ errors.push({ row: i, error: result.error });
+ errorStream.write(JSON.stringify({
+ event: "validation.error",
+ timestamp: new Date().toISOString(),
+ data: { batchId, row: i, error: result.error },
+ }));
+ }
+ }
+
+ progressStream.write(JSON.stringify({
+ event: "stage.completed",
+ timestamp: new Date().toISOString(),
+ data: { batchId, stage: "validate", validCount: valid.length, errorCount: errors.length, total: rows.length },
+ }));
+
+ if (valid.length === 0) {
+ throw new FatalError(`Batch ${batchId}: all rows invalid`);
+ }
+
+ return { valid, errors };
+};
+
+const loadToWarehouse = async (batchId: string, rows: ValidRow[]) => {
+ "use step";
+ let loaded = 0;
+ for (const chunk of chunkArray(rows, 100)) {
+ await warehouse.upsert({ idempotencyKey: `load:${batchId}:${loaded}`, rows: chunk });
+ loaded += chunk.length;
+
+ progressStream.write(JSON.stringify({
+ event: "load.progress",
+ timestamp: new Date().toISOString(),
+ data: { batchId, loaded, total: rows.length, remaining: rows.length - loaded },
+ }));
+ }
+
+ return { loaded };
+};
+
+const emitTerminal = async (batchId: string, status: string, details: Record) => {
+ "use step";
+ statusStream.write(JSON.stringify({
+ event: status === "success" ? "workflow.completed" : "workflow.failed",
+ timestamp: new Date().toISOString(),
+ data: { batchId, status, ...details },
+ }));
+};
+
+export default async function backfillPipeline(
+ batchId: string,
+ rows: RawRow[]
+) {
+ const startTime = Date.now();
+
+ try {
+ // Validate rows — streams errors to error namespace
+ const { valid, errors } = await validateRows(batchId, rows);
+
+ // Load to warehouse — streams progress to progress namespace
+ const { loaded } = await loadToWarehouse(batchId, valid);
+
+ // Terminal signal: success
+ await emitTerminal(batchId, "success", {
+ totalRows: rows.length,
+ validRows: valid.length,
+ loadedRows: loaded,
+ validationErrors: errors.length,
+ durationMs: Date.now() - startTime,
+ });
+
+ return { batchId, status: "completed", loaded };
+ } catch (error) {
+ // Terminal signal: failure
+ await emitTerminal(batchId, "error", {
+ error: error instanceof Error ? error.message : String(error),
+ durationMs: Date.now() - startTime,
+ });
+ throw error;
+ }
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import backfillPipeline from "../workflows/backfill-pipeline";
+
+describe("backfillPipeline observability", () => {
+ it("emits progress events and terminal success signal", async () => {
+ const run = await start(backfillPipeline, [
+ "batch-001", [{ id: 1, data: "valid" }, { id: 2, data: "valid" }],
+ ]);
+ const result = await run.returnValue;
+ expect(result).toEqual({
+ batchId: "batch-001",
+ status: "completed",
+ loaded: 2,
+ });
+ // Verify progress stream contains stage.completed and load.progress events
+ // Verify status stream contains workflow.completed terminal signal
+ });
+
+ it("streams validation errors to error namespace", async () => {
+ const run = await start(backfillPipeline, [
+ "batch-002", [{ id: 1, data: "valid" }, { id: 2, data: null }],
+ ]);
+ const result = await run.returnValue;
+ // Verify error stream contains validation.error for row 1
+ // Verify progress stream shows validCount: 1, errorCount: 1
+ });
+
+ it("emits terminal failure signal when all rows invalid", async () => {
+ const run = await start(backfillPipeline, [
+ "batch-003", [{ id: 1, data: null }],
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify status stream contains workflow.failed terminal signal
+ // Verify error stream contains validation errors
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "backfill-pipeline",
+ "files": [
+ { "kind": "workflow", "path": "workflows/backfill-pipeline.ts" },
+ { "kind": "test", "path": "workflows/backfill-pipeline.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path-with-stream-verification",
+ "helpers": [],
+ "expects": "Pipeline completes with progress events and workflow.completed terminal signal"
+ },
+ {
+ "name": "validation-errors-streamed",
+ "helpers": [],
+ "expects": "Validation errors appear in error namespace, progress reflects valid/invalid counts"
+ },
+ {
+ "name": "terminal-failure-signal",
+ "helpers": [],
+ "expects": "Fatal validation failure emits workflow.failed terminal signal before throwing"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/backfill-pipeline.integration.test.ts", "expects": "backfill-pipeline integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: Stream writes happen only inside step functions, never in workflow context",
+ "Invariant: Progress, error, and status use separate stream namespaces",
+ "Invariant: Every exit path emits a terminal signal to the status namespace",
+ "Invariant: Progress events include cumulative totals, not deltas",
+ "Operator signal: Log stage.completed with valid/error counts after validation",
+ "Operator signal: Log load.progress with loaded/total/remaining during warehouse load",
+ "Operator signal: Log workflow.completed or workflow.failed as terminal signal"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"backfill-pipeline","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Stream I/O placement (getWritable in workflow context, write in step context)
+- Stream namespace separation (progress, errors, status channels)
+- Terminal signals (workflow.completed and workflow.failed on every exit path)
+- Structured stream events (JSON with event, timestamp, data)
+- Self-describing progress (cumulative totals in each event)
+- Operator-queryable state (no runtime access needed to diagnose failures)
+- Integration test coverage (happy path with stream verification, error streaming, terminal signal)
diff --git a/skills/workflow-saga/SKILL.md b/skills/workflow-saga/SKILL.md
new file mode 100644
index 0000000000..fa9e526640
--- /dev/null
+++ b/skills/workflow-saga/SKILL.md
@@ -0,0 +1,113 @@
+---
+name: workflow-saga
+description: Build a durable saga workflow with explicit compensation for partial success. Use when the user says "saga workflow", "workflow-saga", "compensation", "rollback", or "partial failure".
+user-invocable: true
+argument-hint: "[workflow prompt]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-saga
+
+Use this skill when the user wants to build a workflow where multiple steps produce irreversible side effects and partial failure requires explicit compensation. This is a scenario entrypoint that routes into the existing teach → build pipeline with saga-specific guardrails.
+
+## Context Capture
+
+If `.workflow.md` exists in the project root, read it and use its context. If it does not exist, run a focused context capture covering these saga-specific questions before proceeding:
+
+1. **Side-effecting steps** — "Which steps produce irreversible external effects (payments, reservations, notifications)?"
+2. **Compensation ordering** — "When a later step fails, which earlier effects must be undone, and in what order?"
+3. **Compensation idempotency** — "Can each compensation action be retried safely? What idempotency key anchors each undo?"
+4. **Partial success semantics** — "After compensation, does the workflow terminate with an error or return a partial-success status?"
+5. **Forward-recovery option** — "Are there any steps where retrying forward is safer than compensating backward?"
+6. **Observability** — "What must operators see in logs when compensation triggers?"
+
+Save the answers into `.workflow.md` following the same 8-section format used by `workflow-teach`.
+
+## Required Design Constraints
+
+When building a saga workflow, the following constraints are non-negotiable:
+
+### Compensation for every irreversible step
+
+Every step that commits an irreversible side effect must have a corresponding compensation step. The compensation step must undo the effect completely or leave the system in a known-safe state. Map each forward step to its compensator before writing code.
+
+### Compensation ordering
+
+Compensation steps must execute in reverse order of the forward steps that succeeded. If step A then step B succeeded but step C fails, compensate B first, then A.
+
+### Compensation idempotency keys
+
+Every compensation step must use an idempotency key derived from a stable entity identifier — never from timestamps or random values. Examples:
+
+- Payment refund: `refund:${orderId}`
+- Inventory release: `release:${orderId}`
+- Reservation cancel: `cancel:${reservationId}`
+
+### Compensation must eventually succeed
+
+Compensation steps must use `RetryableError` with high `maxRetries`. A failed compensation leaves the system in an inconsistent state. Never use `FatalError` for compensation steps.
+
+### Forward steps use FatalError to trigger compensation
+
+When a forward step encounters a permanent failure that requires compensation (e.g. out-of-stock), it must throw `FatalError`. The workflow orchestrator catches the `FatalError` and runs the compensation chain before re-throwing.
+
+## Build Process
+
+Follow the same six-phase interactive build process as `workflow-build`:
+
+1. **Propose step boundaries** — identify `"use workflow"` orchestrator vs `"use step"` functions, forward steps, and compensation steps
+2. **Flag relevant traps** — run the stress checklist with special attention to compensation ordering, idempotency keys, and partial-failure semantics
+3. **Decide failure modes** — `FatalError` for permanent forward failures that trigger compensation, `RetryableError` for transient failures, compensation steps always `RetryableError` with high retry count
+4. **Write code + tests** — produce workflow file and integration tests
+5. **Self-review** — re-run the stress checklist against generated code
+6. **Verification summary** — emit the verification artifact and `verification_plan_ready` summary
+
+### Required test coverage
+
+Integration tests must exercise:
+
+- **Happy path** — all forward steps succeed, no compensation needed
+- **Compensation path** — a later step fails after earlier steps committed, compensation executes in reverse order
+- **Compensation idempotency** — replayed compensation steps do not produce duplicate side effects
+
+## Anti-Patterns
+
+Flag these explicitly when they appear in the saga workflow:
+
+- **Missing compensation for an irreversible step** — every committed side effect must have an undo path
+- **Wrong compensation order** — compensations must run in reverse order of committed forward steps
+- **FatalError in a compensation step** — compensation must use `RetryableError` with high retries; a fatal compensation leaves the system inconsistent
+- **Timestamp or random idempotency keys** — keys must be derived from stable entity identifiers to survive replay
+- **Compensation that depends on uncommitted state** — each compensation step must be self-contained; it cannot assume later forward steps ran
+- **Node.js APIs in workflow context** — `fs`, `crypto`, `Buffer`, etc. cannot be used inside `"use workflow"` functions
+- **Direct stream I/O in workflow context** — `getWritable()` may be called in workflow context, but actual writes must happen in steps
+- **`start()` called directly from workflow code** — must be wrapped in a step
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific context (if present)
+
+## Verification Contract
+
+This skill terminates with the same verification contract as `workflow-build`. The final output must include:
+
+1. A **Verification Artifact** — fenced JSON block with `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`
+2. A **Verification Summary** — single-line JSON: `{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}`
+
+## Sample Usage
+
+**Input:** `/workflow-saga Reserve inventory, charge payment, create shipment, and refund if shipment booking fails.`
+
+**Expected behavior:**
+
+1. Reads `.workflow.md` if present; otherwise runs focused context capture
+2. Proposes: inventory reservation step with `inventory:${orderId}` key, payment charge step with `payment:${orderId}` key, shipment booking step with `shipment:${orderId}` key, compensation steps: cancel shipment, refund payment, release inventory — each with idempotency keys
+3. Flags: compensation ordering (reverse of forward), idempotency on every step, FatalError for permanent shipment failure triggers compensation
+4. Writes: `workflows/order-saga.ts` + `workflows/order-saga.integration.test.ts`
+5. Tests cover: happy path, shipment failure triggering payment refund and inventory release — verifying compensation order and idempotency
+6. Emits verification artifact and `verification_plan_ready` summary
diff --git a/skills/workflow-saga/goldens/compensation-saga.md b/skills/workflow-saga/goldens/compensation-saga.md
new file mode 100644
index 0000000000..e45eac016b
--- /dev/null
+++ b/skills/workflow-saga/goldens/compensation-saga.md
@@ -0,0 +1,237 @@
+# Golden Scenario: Compensation Saga
+
+## User Prompt
+
+```
+/workflow-saga Reserve inventory, charge payment, create shipment, and refund if shipment booking fails.
+```
+
+## Scenario
+
+A multi-step order fulfillment workflow that reserves inventory, charges payment, and books a shipment. If shipment booking fails after payment has been charged and inventory reserved, the workflow must compensate by refunding the payment and releasing the inventory — in reverse order of the forward steps.
+
+## Context Capture
+
+The scenario skill checks for `.workflow.md` first. In this example it does not exist, so the focused saga-specific interview runs:
+
+| Question | Expected Answer |
+|----------|----------------|
+| Side-effecting steps | Reserve inventory, charge payment, book shipment — all irreversible |
+| Compensation ordering | On shipment failure: cancel shipment (no-op if not booked), refund payment, release inventory |
+| Compensation idempotency | Refund: `refund:${orderId}`, Release: `release:${orderId}`, Cancel: `cancel-shipment:${orderId}` |
+| Partial success semantics | Workflow terminates with error after compensation completes |
+| Forward-recovery option | None — shipment failure is permanent (warehouse rejected) |
+| Observability | Log compensation.triggered with orderId and failing step name |
+
+The captured context is saved to `.workflow.md` with sections: Project Context, Business Rules, External Systems, Failure Expectations, Observability Needs, Approved Patterns, Open Questions.
+
+## What the Scenario Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Rollback / compensation strategy** — Payment charging and inventory reservation are irreversible side effects. If `bookShipment` fails after both succeed, the workflow must refund the payment and release the inventory. A compensation chain is required.
+2. **Compensation ordering** — Compensations must run in reverse order: refund payment first (most recent committed effect), then release inventory.
+3. **Idempotency keys** — Every forward and compensation step has external side effects. Derive idempotency keys from `orderId` (e.g. `payment:${orderId}`, `inventory:${orderId}`, `refund:${orderId}`, `release:${orderId}`) to prevent duplicate effects on replay.
+
+### Phase 3 — Failure Modes Decided
+
+- `reserveInventory`: `RetryableError` with `maxRetries: 2` for transient warehouse API failures. `FatalError` for out-of-stock (no compensation needed — nothing committed yet).
+- `chargePayment`: `RetryableError` with `maxRetries: 3` for transient payment failures. `FatalError` for invalid card or insufficient funds (compensate inventory only).
+- `bookShipment`: `RetryableError` with `maxRetries: 2` for transient carrier failures. `FatalError` for permanent rejection (triggers full compensation).
+- `refundPayment`: `RetryableError` with `maxRetries: 5` — refund must eventually succeed.
+- `releaseInventory`: `RetryableError` with `maxRetries: 5` — release must eventually succeed.
+- `sendConfirmation`: `RetryableError` with `maxRetries: 2` — email delivery is transient.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError } from "workflow";
+
+const reserveInventory = async (orderId: string, items: CartItem[]) => {
+ "use step";
+ const reservation = await warehouse.reserve({
+ idempotencyKey: `inventory:${orderId}`,
+ items,
+ });
+ return reservation;
+};
+
+const chargePayment = async (orderId: string, amount: number) => {
+ "use step";
+ const result = await paymentProvider.charge({
+ idempotencyKey: `payment:${orderId}`,
+ amount,
+ });
+ return result;
+};
+
+const bookShipment = async (orderId: string, address: Address) => {
+ "use step";
+ const shipment = await carrier.book({
+ idempotencyKey: `shipment:${orderId}`,
+ address,
+ });
+ return shipment;
+};
+
+const refundPayment = async (orderId: string, chargeId: string) => {
+ "use step";
+ await paymentProvider.refund({
+ idempotencyKey: `refund:${orderId}`,
+ chargeId,
+ });
+};
+
+const releaseInventory = async (orderId: string, reservationId: string) => {
+ "use step";
+ await warehouse.release({
+ idempotencyKey: `release:${orderId}`,
+ reservationId,
+ });
+};
+
+const sendConfirmation = async (orderId: string, email: string) => {
+ "use step";
+ await emailService.send({
+ idempotencyKey: `confirmation:${orderId}`,
+ to: email,
+ template: "order-confirmed",
+ });
+};
+
+export default async function orderSaga(
+ orderId: string,
+ amount: number,
+ items: CartItem[],
+ address: Address,
+ email: string
+) {
+ // Forward step 1: Reserve inventory
+ const reservation = await reserveInventory(orderId, items);
+
+ // Forward step 2: Charge payment
+ let charge;
+ try {
+ charge = await chargePayment(orderId, amount);
+ } catch (error) {
+ // Compensate: release inventory
+ if (error instanceof FatalError) {
+ await releaseInventory(orderId, reservation.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // Forward step 3: Book shipment
+ try {
+ await bookShipment(orderId, address);
+ } catch (error) {
+ // Compensate in reverse order: refund payment, then release inventory
+ if (error instanceof FatalError) {
+ await refundPayment(orderId, charge.id);
+ await releaseInventory(orderId, reservation.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // All forward steps succeeded
+ await sendConfirmation(orderId, email);
+
+ return { orderId, status: "fulfilled" };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import orderSaga from "../workflows/order-saga";
+
+describe("orderSaga", () => {
+ it("completes happy path", async () => {
+ const run = await start(orderSaga, [
+ "order-1", 100, [{ sku: "A", qty: 1 }], { street: "123 Main" }, "user@example.com",
+ ]);
+ await expect(run.returnValue).resolves.toEqual({
+ orderId: "order-1",
+ status: "fulfilled",
+ });
+ });
+
+ it("compensates payment and inventory when shipment fails", async () => {
+ // Mock bookShipment to throw FatalError (carrier rejected)
+ const run = await start(orderSaga, [
+ "order-2", 50, [{ sku: "B", qty: 1 }], { street: "456 Elm" }, "user@example.com",
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify refundPayment and releaseInventory were called (compensation executed)
+ });
+
+ it("compensates inventory only when payment fails", async () => {
+ // Mock chargePayment to throw FatalError (insufficient funds)
+ const run = await start(orderSaga, [
+ "order-3", 75, [{ sku: "C", qty: 1 }], { street: "789 Oak" }, "user@example.com",
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify releaseInventory was called but refundPayment was not
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "order-saga",
+ "files": [
+ { "kind": "workflow", "path": "workflows/order-saga.ts" },
+ { "kind": "test", "path": "workflows/order-saga.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path",
+ "helpers": [],
+ "expects": "Order completes successfully with inventory reserved, payment charged, and shipment booked"
+ },
+ {
+ "name": "compensation-on-shipment-failure",
+ "helpers": [],
+ "expects": "Payment is refunded and inventory released when shipment booking fails"
+ },
+ {
+ "name": "partial-compensation-on-payment-failure",
+ "helpers": [],
+ "expects": "Inventory is released when payment fails (no refund needed)"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/order-saga.integration.test.ts", "expects": "order-saga integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: Compensation runs in reverse order of committed forward steps",
+ "Invariant: A payment charge must be compensated by a refund if shipment booking fails",
+ "Invariant: Idempotency keys derived from orderId prevent duplicate charges on replay",
+ "Operator signal: Log compensation.triggered with orderId when refund begins after shipment failure",
+ "Operator signal: Log compensation.complete with orderId when all compensations finish"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"order-saga","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Rollback / compensation strategy (reverse-order compensation chain)
+- Compensation ordering (refund before release)
+- Idempotency keys (stable keys on every forward and compensation step)
+- Retry semantics (FatalError triggers compensation, RetryableError for transient failures)
+- Integration test coverage (happy path, full compensation, partial compensation)
diff --git a/skills/workflow-teach/SKILL.md b/skills/workflow-teach/SKILL.md
new file mode 100644
index 0000000000..9892fc42f5
--- /dev/null
+++ b/skills/workflow-teach/SKILL.md
@@ -0,0 +1,145 @@
+---
+name: workflow-teach
+description: One-time setup that captures project context for workflow building. Use when the user wants to teach the assistant how workflows should be designed for this project. Triggers on "teach workflow", "set up workflow context", "configure workflow skills", or "workflow-teach".
+metadata:
+ author: Vercel Inc.
+ version: '0.6'
+---
+
+# workflow-teach
+
+Use this skill when the user wants to teach the assistant how workflows should be designed for this project.
+
+## Skill Loop Position
+
+**Stage 1 of 2** in the workflow skill loop: **teach** → build
+
+| Stage | Skill | Purpose |
+|-------|-------|---------|
+| **1** | **workflow-teach** (you are here) | Capture project context into `.workflow.md` |
+| 2 | workflow-build | Build workflow code guided by context |
+
+**Next:** Run `workflow-build` after this skill completes.
+
+## Steps
+
+Always do these steps:
+
+### 1. Read the workflow skill
+
+Read `skills/workflow/SKILL.md` to load the current API truth source. Do not fork or duplicate its guidance — reference it as the authoritative source for all workflow API behavior.
+
+### 2. Inspect the repo for workflow surfaces
+
+Search the repository for:
+
+- `workflows/` or `src/workflows/` directories
+- API routes (e.g. `app/api/`, `pages/api/`, route handlers)
+- Queue consumers or background job processors
+- Webhook handlers
+- Existing `"use workflow"` and `"use step"` directives
+- Test files related to workflows (e.g. files importing `@workflow/vitest`, `workflow/api`)
+- Configuration files (`next.config.*`, `workflow.config.*`, `package.json` workflow dependencies)
+
+### 3. Conduct the workflow context interview
+
+After completing the repo scan, ask the user targeted follow-up questions to fill gaps in the context that the codebase alone cannot reveal. Only ask questions whose answers are not already inferable from the repo scan — do not re-ask facts you have already discovered.
+
+Cover these exact buckets, skipping any that are already resolved from the repo:
+
+1. **Workflow starter/emitter** — "What starts this workflow, and who or what emits that event?"
+2. **Repeat-safe side effects** — "Which side effects must be safe to repeat (idempotent)?"
+3. **Permanent vs retryable failures** — "What counts as a permanent failure vs. a retryable failure?"
+4. **Approval actors** — "Does any step require human approval, and who is allowed to approve?"
+5. **Timeout/expiry rules** — "What timeout or expiry rules exist?"
+6. **Compensation requirements** — "If a side effect succeeds and a later step fails, what compensation is required?"
+7. **Operator observability needs** — "What must operators be able to observe in logs/streams?"
+
+Ask only the unresolved questions in a single batch. Wait for the user's answers before proceeding to step 4.
+
+### 4. Create or update `.workflow.md`
+
+Create or update `.workflow.md` in the project root with the following sections. Write in plain English — this file is for humans and agents to read, not a machine schema.
+
+```markdown
+# .workflow.md
+
+## Project Context
+
+Project name, what it does, why it needs durable workflows, and paths to any
+existing workflow files or tests found in the repo.
+
+## Business Rules
+
+Rules that must never be violated. Include idempotency requirements here —
+which side effects must be safe to repeat and how.
+
+Examples: "An order must not be charged twice", "Refund cannot exceed original
+amount", "Payment charge uses idempotency key from order ID".
+
+## External Systems
+
+Third-party services and infrastructure the workflows interact with. Note
+which are idempotent, which have compensation APIs, and which are rate-limited.
+
+Also list trigger surfaces: API routes, webhooks, queue messages, cron jobs,
+or UI actions that start workflows.
+
+## Failure Expectations
+
+What counts as a permanent failure vs. a retryable failure in this project.
+Include approval rules (who approves, what happens on timeout), timeout and
+expiry policies, and compensation rules (what to undo when a later step fails).
+
+## Observability Needs
+
+What operators need to see in logs or streams. What the UI needs streamed
+for real-time progress.
+
+## Approved Patterns
+
+Anti-patterns that are relevant to this project's workflow surfaces. These
+serve as awareness for anyone building workflows in this codebase.
+
+## Open Questions
+
+Unresolved questions that could not be answered from the repo scan or the
+interview. These will be surfaced again by workflow-build.
+```
+
+Populate sections from both the repo scan (step 2) and the interview answers (step 3). For any question the user could not answer, add it to **Open Questions** so `workflow-build` can surface it again.
+
+### 5. Evaluate anti-patterns
+
+Include the following anti-patterns in the **Approved Patterns** section when they are relevant to the project's workflow surfaces:
+
+- **Node.js APIs in `"use workflow"`** — Workflow functions run in a sandboxed VM without full Node.js access. Any use of `fs`, `path`, `crypto`, `Buffer`, `process`, or other Node.js built-ins must live in a `"use step"` function.
+- **Side effects split across too many tiny steps** — Each step is persisted and replayed. Over-granular step boundaries add latency, increase event log size, and make debugging harder. Group related I/O into a single step unless you need independent retry or suspension between them.
+- **Direct stream I/O in workflow context** — `getWritable()` may be called in either `"use workflow"` or `"use step"` functions to obtain a stream reference, but direct stream I/O (`getWriter()`, `write()`, `close()`, or reading from a stream) must happen inside `"use step"` functions. The workflow orchestrator cannot hold open stream I/O across replay boundaries.
+- **`createWebhook()` with a custom token** — `createWebhook()` does not accept custom tokens. Only `createHook()` supports deterministic token strategies. Using a custom token with `createWebhook()` will fail silently or produce unexpected behavior.
+- **`start()` called directly from workflow code** — Starting a child workflow from inside a workflow function must be wrapped in a `"use step"` function. Direct `start()` calls in workflow context will fail because `start()` is a side effect that requires full Node.js access.
+- **Mutating step inputs without returning the updated value** — Step functions use pass-by-value semantics. If you modify data inside a step, you must `return` the new value and reassign it in the calling workflow. Mutations to the input object are lost after replay.
+
+### 6. Output results
+
+When you finish, output these exact sections:
+
+## Captured Context
+
+Summarize what was discovered: project name, goal, trigger surfaces found, external systems identified, relevant anti-patterns, and any canonical examples located in the repo. Also summarize the business rules, failure expectations, and observability needs gathered from the interview.
+
+## Open Questions
+
+List anything that could not be determined from the repo scan or the interview and needs further investigation. These should match the **Open Questions** section in `.workflow.md`.
+
+## Next Recommended Skill
+
+Recommend `workflow-build` to start building workflows using the captured context. For simple workflows with no suspensions, the user can also use `workflow` directly.
+
+---
+
+## Sample Usage
+
+**Input:** `Teach workflow skills about our refund approval system.`
+
+**Expected output:** A `.workflow.md` file capturing the refund approval domain — including business rules like "refund cannot exceed original charge" and "payment charge uses idempotency key from order ID", failure expectations covering approval timeout behavior and compensation rules, observability needs for audit logging — plus the three output headings above with specific findings, open questions, and a recommendation to run `workflow-build` next.
diff --git a/skills/workflow-teach/goldens/approval-expiry-escalation.md b/skills/workflow-teach/goldens/approval-expiry-escalation.md
new file mode 100644
index 0000000000..91a5e85b56
--- /dev/null
+++ b/skills/workflow-teach/goldens/approval-expiry-escalation.md
@@ -0,0 +1,72 @@
+# Golden Scenario: Approval Expiry Escalation
+
+## Scenario
+
+A procurement system requires manager approval for purchase orders over $5,000. If the assigned manager does not approve within 48 hours, the request escalates to a director. If the director does not respond within 24 hours, the request is auto-rejected and the requester is notified. Each approval step uses a deterministic hook token tied to the PO number.
+
+## Interview Context
+
+The workflow-teach interview should surface these answers:
+
+| Bucket | Expected Answer |
+|--------|----------------|
+| Workflow starter/emitter | Internal API call when a purchase order is submitted |
+| Repeat-safe side effects | Notification emails are safe to retry (informational only) |
+| Permanent vs retryable | Approval timeout is permanent (escalate or reject); email delivery failure is retryable |
+| Approval actors | Manager approves first; director is escalation approver; token strategy is `approval:po-${poNumber}` and `escalation:po-${poNumber}` |
+| Timeout/expiry rules | Manager approval expires after 48 hours; director escalation expires after 24 hours |
+| Compensation requirements | No compensation needed — approval flow is read-only until final decision; if auto-rejected, requester is notified but no side effects to undo |
+| Operator observability | Log approval request with PO number and assigned approver, log escalation trigger, log final decision (approved/rejected/auto-rejected) |
+
+## Expected `.workflow.md` Sections
+
+### Project Context
+
+Procurement approval system. Needs durable workflows because approval chains span hours to days and must survive server restarts.
+
+### Business Rules
+
+- A purchase order must receive exactly one final decision: approved, rejected, or auto-rejected.
+- Escalation must only trigger after the primary approval window expires.
+- Notification emails use PO number as deduplication key.
+
+### External Systems
+
+- Internal notification service (email). Trigger: API call when PO is submitted.
+
+### Failure Expectations
+
+- Approval timeout is permanent — escalate to director or auto-reject.
+- Email delivery failure is retryable.
+- Manager approval: `approval:po-${poNumber}` hook, 48-hour timeout.
+- Director escalation: `escalation:po-${poNumber}` hook, 24-hour timeout.
+- No compensation needed — approval flow is read-only until final decision.
+
+### Observability Needs
+
+- Log approval.requested with PO number and assigned manager.
+- Log approval.escalated with PO number and director.
+- Log approval.decided with final status and decision maker.
+
+### Open Questions
+
+(none for this scenario)
+
+## Downstream Expectations
+
+### workflow-build
+
+When building this workflow, the build skill should:
+
+- Use two hook suspensions with deterministic tokens: `approval:po-${poNumber}` and `escalation:po-${poNumber}`
+- Pair each hook with a sleep timeout (48h and 24h) using `Promise.race`
+- Produce tests for: manager-approves (happy path), manager-timeout → director-approves, full-timeout → auto-rejection
+- Each test uses `waitForHook`, `resumeHook`, `waitForSleep`, `wakeUp`
+
+## Verification Criteria
+
+- [ ] Interview captures both approval actors with their token strategies
+- [ ] `.workflow.md` Business Rules includes the single-decision invariant
+- [ ] `.workflow.md` Failure Expectations captures both timeout windows
+- [ ] `.workflow.md` Observability Needs covers the full approval lifecycle
+- [ ] Next skill recommendation is `workflow-build`
diff --git a/skills/workflow-teach/goldens/duplicate-webhook-order.md b/skills/workflow-teach/goldens/duplicate-webhook-order.md
new file mode 100644
index 0000000000..31edec8607
--- /dev/null
+++ b/skills/workflow-teach/goldens/duplicate-webhook-order.md
@@ -0,0 +1,75 @@
+# Golden Scenario: Duplicate Webhook Order
+
+## Scenario
+
+An e-commerce platform receives order-placed webhooks from Shopify. The same webhook may be delivered multiple times due to Shopify's at-least-once delivery guarantee. The workflow must charge payment, reserve inventory, and send a confirmation — but must never double-charge or double-reserve on duplicate deliveries.
+
+## Interview Context
+
+The workflow-teach interview should surface these answers:
+
+| Bucket | Expected Answer |
+|--------|----------------|
+| Workflow starter/emitter | Shopify `orders/create` webhook, may be delivered more than once |
+| Repeat-safe side effects | Payment charge must use idempotency key from Shopify order ID; inventory reservation must be upsert-based |
+| Permanent vs retryable | Duplicate order ID after successful processing is permanent (skip); payment gateway timeout is retryable |
+| Approval actors | No human approval required |
+| Timeout/expiry rules | Webhook must respond within 30 seconds; inventory hold expires after 15 minutes |
+| Compensation requirements | If inventory reservation fails after payment, refund payment using idempotency key |
+| Operator observability | Log webhook receipt with Shopify order ID, log idempotency cache hit/miss, stream step progress |
+
+## Expected `.workflow.md` Sections
+
+### Project Context
+
+E-commerce order processing. Needs durable workflows because Shopify webhooks have at-least-once delivery and the system must handle duplicates safely.
+
+### Business Rules
+
+- An order must not be charged twice for the same Shopify order ID.
+- Inventory reservation must be idempotent — re-reserving the same order is a no-op.
+- Payment charge uses idempotency key derived from Shopify order ID.
+- Inventory reservation uses upsert keyed by order ID.
+
+### External Systems
+
+- Shopify (webhook source, at-least-once delivery). Trigger: `orders/create` webhook.
+- Payment gateway (charge, refund). Rate-limited, has idempotency key support.
+- Inventory service (reserve, release). Supports upsert.
+
+### Failure Expectations
+
+- Duplicate order ID after successful processing: permanent (skip).
+- Payment gateway timeout: retryable.
+- Webhook must respond within 30 seconds.
+- Inventory hold expires after 15 minutes.
+- Compensation: refund payment if inventory reservation fails after charge succeeds.
+
+### Observability Needs
+
+- Log webhook receipt with Shopify order ID.
+- Log idempotency cache hit/miss for payment charge.
+- Stream step progress to operator dashboard.
+
+### Open Questions
+
+(none for this scenario)
+
+## Downstream Expectations
+
+### workflow-build
+
+When building this workflow, the build skill should:
+
+- Flag idempotency requirements on every payment and inventory step
+- Include compensation step for payment refund on inventory failure
+- Produce tests for: happy path, duplicate webhook (no-op), inventory failure triggering refund
+- Flag the 30-second webhook response timeout
+
+## Verification Criteria
+
+- [ ] Interview surfaces duplicate-safety as the first concern
+- [ ] `.workflow.md` Business Rules captures both idempotency strategies
+- [ ] `.workflow.md` Failure Expectations captures refund-on-inventory-failure
+- [ ] `.workflow.md` Observability Needs captures idempotency cache logging
+- [ ] Next skill recommendation is `workflow-build`
diff --git a/skills/workflow-teach/goldens/operator-observability-streams.md b/skills/workflow-teach/goldens/operator-observability-streams.md
new file mode 100644
index 0000000000..2e074b5cce
--- /dev/null
+++ b/skills/workflow-teach/goldens/operator-observability-streams.md
@@ -0,0 +1,77 @@
+# Golden Scenario: Operator Observability Streams
+
+## Scenario
+
+A data pipeline workflow ingests CSV files, validates rows, transforms data, loads into a data warehouse, and generates a summary report. Operators need real-time visibility into progress (rows processed, validation errors, load status) via streams, and must be able to diagnose failures from structured logs without accessing the runtime directly.
+
+## Interview Context
+
+The workflow-teach interview should surface these answers:
+
+| Bucket | Expected Answer |
+|--------|----------------|
+| Workflow starter/emitter | Scheduled cron job or manual trigger from ops dashboard |
+| Repeat-safe side effects | Data warehouse load uses upsert by row hash; report generation overwrites previous report |
+| Permanent vs retryable | Malformed CSV is permanent (fatal); warehouse connection timeout is retryable; report generation failure is retryable |
+| Approval actors | No human approval required |
+| Timeout/expiry rules | Each batch must complete within 30 minutes; individual step timeout of 5 minutes |
+| Compensation requirements | If warehouse load fails after partial insert, no rollback needed (upsert makes re-run safe); if report fails, pipeline is still considered successful |
+| Operator observability | Stream row-level progress (processed/total), stream validation error summary, log batch ID with row counts at each stage, log final status with duration |
+
+## Expected `.workflow.md` Sections
+
+### Project Context
+
+Data pipeline for CSV ingestion. Needs durable workflows because batches can take up to 30 minutes and operators need real-time progress visibility.
+
+### Business Rules
+
+- Data warehouse loads must be idempotent — re-running the same batch produces the same result.
+- Validation errors must be surfaced to operators, not silently dropped.
+- Warehouse load uses upsert keyed by row content hash.
+- Report generation overwrites by batch ID.
+
+### External Systems
+
+- Data warehouse (load, query). Supports upsert. Trigger: cron job or manual ops dashboard.
+- Report generation service (write). Overwrites by batch ID.
+
+### Failure Expectations
+
+- Malformed CSV: permanent (fatal — code/data bug).
+- Warehouse connection timeout: retryable.
+- Report generation failure: retryable. Does not block pipeline success.
+- No rollback for partial warehouse load — upsert makes re-run safe.
+- Batch must complete within 30 minutes; individual step timeout of 5 minutes.
+
+### Observability Needs
+
+- Stream row-level progress: rows processed vs total rows.
+- Stream validation error summary with row numbers and error types.
+- Log batch.started with batch ID and source file.
+- Log batch.validated with valid/invalid row counts.
+- Log batch.loaded with inserted/updated/skipped counts.
+- Log batch.completed with final status and total duration.
+
+### Open Questions
+
+(none for this scenario)
+
+## Downstream Expectations
+
+### workflow-build
+
+When building this workflow, the build skill should:
+
+- Use separate stream namespaces for row progress and validation errors
+- Ensure `getWritable()` stream I/O happens in steps, not workflow context
+- Flag that report failure should not block pipeline success
+- Produce tests for: happy path with stream verification, validation errors being streamed, warehouse timeout with retry
+
+## Verification Criteria
+
+- [ ] Interview prioritizes operator observability as a first-class concern
+- [ ] `.workflow.md` Observability Needs is the most detailed section
+- [ ] `.workflow.md` Business Rules captures the no-silent-drop rule
+- [ ] `.workflow.md` Failure Expectations distinguishes fatal CSV errors from retryable warehouse errors
+- [ ] Next skill recommendation is `workflow-build`
diff --git a/skills/workflow-teach/goldens/partial-side-effect-compensation.md b/skills/workflow-teach/goldens/partial-side-effect-compensation.md
new file mode 100644
index 0000000000..39d32cdf9f
--- /dev/null
+++ b/skills/workflow-teach/goldens/partial-side-effect-compensation.md
@@ -0,0 +1,77 @@
+# Golden Scenario: Partial Side-Effect Compensation
+
+## Scenario
+
+A SaaS onboarding workflow provisions a new tenant: creates a database schema, provisions cloud storage, seeds default configuration, and sends a welcome email. If cloud storage provisioning fails after the database schema is created, the database schema must be torn down. If email fails after everything else succeeds, the tenant is still considered provisioned (email is retried asynchronously).
+
+## Interview Context
+
+The workflow-teach interview should surface these answers:
+
+| Bucket | Expected Answer |
+|--------|----------------|
+| Workflow starter/emitter | API call from admin dashboard when a new tenant signs up |
+| Repeat-safe side effects | Database schema creation uses `CREATE SCHEMA IF NOT EXISTS`; storage provisioning is idempotent by bucket naming convention |
+| Permanent vs retryable | Schema creation failure is retryable (transient DB errors); storage quota exceeded is permanent; email failure is retryable |
+| Approval actors | No human approval required |
+| Timeout/expiry rules | Entire onboarding must complete within 5 minutes or be marked as failed |
+| Compensation requirements | If storage provisioning fails after DB schema creation, drop the schema; if email fails, do not compensate — tenant is provisioned, email retried separately |
+| Operator observability | Log each provisioning step with tenant ID, log compensation actions, stream progress to admin dashboard |
+
+## Expected `.workflow.md` Sections
+
+### Project Context
+
+SaaS tenant onboarding system. Needs durable workflows because provisioning involves multiple external services that must be orchestrated with compensation on failure.
+
+### Business Rules
+
+- A tenant must not exist in a half-provisioned state — either fully provisioned or fully rolled back.
+- Email failure does not block tenant provisioning.
+- Database schema creation uses `CREATE SCHEMA IF NOT EXISTS`.
+- Storage provisioning uses deterministic bucket name from tenant ID.
+
+### External Systems
+
+- Database (schema creation, teardown). Supports idempotent creation. Trigger: API call from admin dashboard.
+- Cloud storage (bucket provisioning). Idempotent by naming convention.
+- Email service (welcome email). Retryable, non-critical.
+
+### Failure Expectations
+
+- Schema creation failure: retryable (transient DB errors).
+- Storage quota exceeded: permanent (fatal).
+- Email failure: retryable, non-critical — does not block provisioning.
+- Compensation: drop database schema if storage provisioning fails after schema creation.
+- No compensation for email failure — tenant is considered provisioned.
+- Entire onboarding must complete within 5 minutes.
+
+### Observability Needs
+
+- Log provision.schema with tenant ID and status.
+- Log provision.storage with tenant ID and status.
+- Log compensation.schema_drop with tenant ID when rollback triggers.
+- Stream onboarding progress to admin dashboard.
+
+### Open Questions
+
+(none for this scenario)
+
+## Downstream Expectations
+
+### workflow-build
+
+When building this workflow, the build skill should:
+
+- Include a compensation step that drops the schema on storage failure
+- Classify email failure as retryable and non-blocking
+- Flag the 5-minute overall timeout
+- Produce tests for: happy path, storage failure triggering schema compensation, email failure not triggering compensation, overall timeout
+
+## Verification Criteria
+
+- [ ] Interview distinguishes compensable failures (storage) from non-compensable ones (email)
+- [ ] `.workflow.md` Failure Expectations captures both the positive case (schema drop) and the negative case (no email compensation)
+- [ ] `.workflow.md` Business Rules captures the no-half-provisioned-state rule
+- [ ] `.workflow.md` Observability Needs includes compensation action logging
+- [ ] Next skill recommendation is `workflow-build`
diff --git a/skills/workflow-timeout/SKILL.md b/skills/workflow-timeout/SKILL.md
new file mode 100644
index 0000000000..d5639a6dc2
--- /dev/null
+++ b/skills/workflow-timeout/SKILL.md
@@ -0,0 +1,116 @@
+---
+name: workflow-timeout
+description: Build a durable workflow whose correctness depends on expiry, wake-up behavior, and timeout outcomes. Use when the user says "timeout workflow", "workflow-timeout", "expiry", "sleep", or "wake up".
+user-invocable: true
+argument-hint: "[workflow prompt]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-timeout
+
+Use this skill when the user wants to build a workflow whose correctness depends on time-based expiry, suspension via `sleep`, and deterministic wake-up via `wakeUp`. This is a scenario entrypoint that routes into the existing teach → build pipeline with timeout-specific guardrails.
+
+## Context Capture
+
+If `.workflow.md` exists in the project root, read it and use its context. If it does not exist, run a focused context capture covering these timeout-specific questions before proceeding:
+
+1. **Timeout triggers** — "What events or durations trigger a timeout? Is each timeout a fixed duration or computed from business rules?"
+2. **Timeout outcomes** — "What happens when a timeout fires — escalation, auto-rejection, cancellation, or something else?"
+3. **Sleep/wake-up pairing** — "Which suspension points use `sleep()`, and can any be woken early via `wakeUp`?"
+4. **Hook/sleep races** — "Are there points where a hook (human action) races against a sleep (timeout)? What wins if both resolve?"
+5. **Cascading timeouts** — "Does the workflow have multiple timeout tiers (e.g. 48h then 24h)? What is the escalation chain?"
+6. **Observability** — "What must operators see in logs for timeout lifecycle events (sleep started, woken early, expired)?"
+
+Save the answers into `.workflow.md` following the same 8-section format used by `workflow-teach`.
+
+## Required Design Constraints
+
+When building a timeout workflow, the following constraints are non-negotiable:
+
+### Every suspension must have a bounded lifetime
+
+Every `sleep()` call must have an explicit duration. Never create an unbounded suspension — a workflow that sleeps forever is a workflow that never completes.
+
+### Sleep/wake-up correctness
+
+Use `sleep()` to suspend the workflow for a fixed duration. Use `waitForSleep` in tests to capture the sleep correlation ID, then `wakeUp` to advance past the sleep without waiting for real time. Every test that exercises a timeout path must use `waitForSleep` and `wakeUp`.
+
+### Hook/sleep races via `Promise.race`
+
+When a human action (hook) races against a timeout (sleep), use `createHook()` for the human action and `sleep()` for the timeout, then race them with `Promise.race([hook, sleep("duration")])`. Check the result:
+
+- If the hook resolves first, the human responded before the timeout
+- If the sleep resolves first (returns `undefined`), the timeout fired
+
+Never use separate branches or polling to detect timeout — always race.
+
+### Timeout as a domain outcome
+
+A timeout is a normal workflow outcome, not an error. Do not throw an error when a timeout fires. Instead, treat the timeout branch as a first-class code path with its own business logic (escalation, auto-rejection, cancellation).
+
+### Deterministic hook tokens for timed actions
+
+When a hook races against a sleep, the hook must use a deterministic token derived from a stable entity identifier (e.g. `approval:${requestId}`). This ensures the hook is collision-free across concurrent workflow runs.
+
+## Build Process
+
+Follow the same six-phase interactive build process as `workflow-build`:
+
+1. **Propose step boundaries** — identify `"use workflow"` orchestrator vs `"use step"` functions, suspension points (sleep + hook races), and escalation tiers
+2. **Flag relevant traps** — run the stress checklist with special attention to sleep/wake-up correctness, hook/sleep races, and cascading timeout tiers
+3. **Decide failure modes** — `FatalError` vs `RetryableError` for each step, with timeout treated as a domain-level permanent outcome (not an error)
+4. **Write code + tests** — produce workflow file and integration tests
+5. **Self-review** — re-run the stress checklist against generated code
+6. **Verification summary** — emit the verification artifact and `verification_plan_ready` summary
+
+### Required test coverage
+
+Integration tests must exercise:
+
+- **Happy path** — action completes before any timeout fires
+- **First timeout** — primary timeout fires, escalation or fallback logic runs
+- **Full timeout chain** — all timeouts expire, workflow reaches terminal state (auto-reject, cancel, etc.)
+- Each test must use `waitForHook`, `resumeHook`, `waitForSleep`, and `wakeUp` from `@workflow/vitest`
+
+## Anti-Patterns
+
+Flag these explicitly when they appear in the timeout workflow:
+
+- **Unbounded sleep** — every `sleep()` must have an explicit duration; missing durations suspend the workflow forever
+- **Missing sleep pairing** — every hook must race against a sleep timeout; an unguarded hook can suspend the workflow indefinitely
+- **Timeout treated as an error** — timeouts are domain outcomes, not exceptions; do not throw when a sleep wins a race
+- **Polling instead of `Promise.race`** — use `Promise.race([hook, sleep])` to detect timeout; never poll or use setInterval
+- **Non-deterministic hook tokens** — hook tokens in timed races must be deterministic and derived from stable entity identifiers
+- **Tests without `waitForSleep`/`wakeUp`** — timeout tests that rely on real time are flaky; always use test helpers
+- **Node.js APIs in workflow context** — `fs`, `crypto`, `Buffer`, etc. cannot be used inside `"use workflow"` functions
+- **Direct stream I/O in workflow context** — `getWritable()` may be called in workflow context, but actual writes must happen in steps
+- **`start()` called directly from workflow code** — must be wrapped in a step
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific context (if present)
+
+## Verification Contract
+
+This skill terminates with the same verification contract as `workflow-build`. The final output must include:
+
+1. A **Verification Artifact** — fenced JSON block with `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`
+2. A **Verification Summary** — single-line JSON: `{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}`
+
+## Sample Usage
+
+**Input:** `/workflow-timeout Wait 24h for manager acknowledgement, escalate for another 24h, then auto-close.`
+
+**Expected behavior:**
+
+1. Reads `.workflow.md` if present; otherwise runs focused context capture
+2. Proposes: notification step, manager hook with `ack:${requestId}` token + 24h sleep, escalation notification step, escalation hook with `escalation:${requestId}` token + 24h sleep, auto-close step
+3. Flags: every hook must race against a sleep, timeout is a domain outcome not an error, deterministic tokens required, `waitForSleep`/`wakeUp` required in tests
+4. Writes: `workflows/manager-ack.ts` + `workflows/manager-ack.integration.test.ts`
+5. Tests cover: manager responds before timeout, manager timeout → escalation → escalation responds, full timeout → auto-close — using `waitForHook`, `resumeHook`, `waitForSleep`, `wakeUp`
+6. Emits verification artifact and `verification_plan_ready` summary
diff --git a/skills/workflow-timeout/goldens/approval-timeout-streaming.md b/skills/workflow-timeout/goldens/approval-timeout-streaming.md
new file mode 100644
index 0000000000..5019c181ba
--- /dev/null
+++ b/skills/workflow-timeout/goldens/approval-timeout-streaming.md
@@ -0,0 +1,243 @@
+# Golden Scenario: Approval Timeout with Streaming
+
+## User Prompt
+
+```
+/workflow-timeout Wait 24h for manager acknowledgement, escalate for another 24h, then auto-close.
+```
+
+## Scenario
+
+A ticket acknowledgement workflow that waits for a manager to acknowledge an issue within 24 hours. If the manager does not respond, the ticket escalates to a director with another 24-hour window. If neither responds, the ticket auto-closes. While waiting, the workflow streams status updates to the UI using `getWritable()`.
+
+## Context Capture
+
+The scenario skill checks for `.workflow.md` first. In this example it does not exist, so the focused timeout-specific interview runs:
+
+| Question | Expected Answer |
+|----------|----------------|
+| Timeout triggers | Manager: 24 hours; Director: 24 hours; both fixed durations |
+| Timeout outcomes | Manager timeout → escalate to director; Director timeout → auto-close |
+| Sleep/wake-up pairing | Both hooks race against sleep; tests use `waitForSleep` and `wakeUp` |
+| Hook/sleep races | Manager hook races 24h sleep; director hook races 24h sleep |
+| Cascading timeouts | Two tiers: manager (24h) then director (24h) |
+| Observability | Log sleep started, timeout fired, escalation triggered, auto-close |
+
+The captured context is saved to `.workflow.md` with sections: Project Context, Business Rules, External Systems, Failure Expectations, Observability Needs, Approved Patterns, Open Questions.
+
+## What the Scenario Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Sleep/wake-up correctness** — Both suspension points use `sleep("24h")`. Tests must use `waitForSleep` to capture the correlation ID and `wakeUp` to advance past the sleep without real-time waits.
+2. **Hook/sleep race** — Each hook must race against its paired sleep via `Promise.race`. An unguarded hook suspends the workflow indefinitely.
+3. **Deterministic hook tokens** — The manager hook uses `ack:${ticketId}` and the director hook uses `escalation:${ticketId}`. Random tokens would cause collisions across concurrent tickets.
+4. **Stream I/O placement** — `getWritable()` may be called in workflow context, but actual `write()` calls must happen inside `"use step"` functions.
+
+### Phase 3 — Failure Modes Decided
+
+- `notifyManager`: `RetryableError` with `maxRetries: 3` — notification delivery is transient.
+- `notifyDirector`: `RetryableError` with `maxRetries: 3` — same as manager notification.
+- `writeStatus`: `RetryableError` with `maxRetries: 2` — stream writes are I/O.
+- `recordOutcome`: `RetryableError` with `maxRetries: 2` — database write may fail transiently.
+- Manager timeout is a domain-level outcome, not an error.
+- Director timeout is a domain-level outcome, not an error.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError, getWritable } from "workflow";
+import { createHook, sleep } from "workflow";
+
+type AckDecision = { acknowledged: boolean; note?: string };
+
+const notifyPerson = async (
+ ticketId: string,
+ personId: string,
+ template: string
+) => {
+ "use step";
+ await notifications.send({
+ idempotencyKey: `notify:${template}:${ticketId}`,
+ to: personId,
+ template,
+ });
+};
+
+const writeStatus = async (
+ stream: ReturnType,
+ status: string
+) => {
+ "use step";
+ // Stream I/O must happen in a step, not in workflow context
+ const writer = stream.getWriter();
+ await writer.write(status);
+ writer.releaseLock();
+};
+
+const recordOutcome = async (ticketId: string, status: string, actor: string) => {
+ "use step";
+ await db.tickets.update({
+ where: { id: ticketId },
+ data: { status, resolvedBy: actor, resolvedAt: new Date() },
+ });
+ return { ticketId, status, resolvedBy: actor };
+};
+
+export default async function ticketAck(
+ ticketId: string,
+ managerId: string,
+ directorId: string
+) {
+ const stream = getWritable("ticket-status");
+
+ // Tier 1: Notify manager and wait 24h
+ await notifyPerson(ticketId, managerId, "ack-request");
+ await writeStatus(stream, "waiting-for-manager");
+
+ const managerHook = createHook(`ack:${ticketId}`);
+ const managerTimeout = sleep("24h");
+ const managerResult = await Promise.race([managerHook, managerTimeout]);
+
+ if (managerResult !== undefined) {
+ await writeStatus(stream, "acknowledged");
+ return recordOutcome(ticketId, "acknowledged", managerId);
+ }
+
+ // Tier 2: Manager timed out — escalate to director
+ await notifyPerson(ticketId, directorId, "escalation-request");
+ await writeStatus(stream, "escalated");
+
+ const directorHook = createHook(`escalation:${ticketId}`);
+ const directorTimeout = sleep("24h");
+ const directorResult = await Promise.race([directorHook, directorTimeout]);
+
+ if (directorResult !== undefined) {
+ await writeStatus(stream, "acknowledged-by-director");
+ return recordOutcome(ticketId, "acknowledged", directorId);
+ }
+
+ // Tier 3: Full timeout — auto-close
+ await writeStatus(stream, "auto-closed");
+ return recordOutcome(ticketId, "auto-closed", "system");
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start, resumeHook, getRun } from "workflow/api";
+import { waitForHook, waitForSleep } from "@workflow/vitest";
+import ticketAck from "../workflows/ticket-ack";
+
+describe("ticketAck", () => {
+ it("manager acknowledges before timeout", async () => {
+ const run = await start(ticketAck, ["ticket-1", "manager-1", "director-1"]);
+
+ await waitForHook(run, { token: "ack:ticket-1" });
+ await resumeHook("ack:ticket-1", { acknowledged: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ ticketId: "ticket-1",
+ status: "acknowledged",
+ resolvedBy: "manager-1",
+ });
+ });
+
+ it("escalates to director when manager times out", async () => {
+ const run = await start(ticketAck, ["ticket-2", "manager-2", "director-2"]);
+
+ // Manager timeout — advance past 24h sleep
+ const sleepId1 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId1] });
+
+ // Director acknowledges
+ await waitForHook(run, { token: "escalation:ticket-2" });
+ await resumeHook("escalation:ticket-2", { acknowledged: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ ticketId: "ticket-2",
+ status: "acknowledged",
+ resolvedBy: "director-2",
+ });
+ });
+
+ it("auto-closes when all approvers time out", async () => {
+ const run = await start(ticketAck, ["ticket-3", "manager-3", "director-3"]);
+
+ // Manager timeout
+ const sleepId1 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId1] });
+
+ // Director timeout
+ const sleepId2 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId2] });
+
+ await expect(run.returnValue).resolves.toEqual({
+ ticketId: "ticket-3",
+ status: "auto-closed",
+ resolvedBy: "system",
+ });
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "ticket-ack",
+ "files": [
+ { "kind": "workflow", "path": "workflows/ticket-ack.ts" },
+ { "kind": "test", "path": "workflows/ticket-ack.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path",
+ "helpers": ["waitForHook", "resumeHook"],
+ "expects": "Manager acknowledges before timeout"
+ },
+ {
+ "name": "manager-timeout-escalation",
+ "helpers": ["waitForHook", "resumeHook", "waitForSleep", "wakeUp"],
+ "expects": "Manager times out, director acknowledges"
+ },
+ {
+ "name": "full-timeout-auto-close",
+ "helpers": ["waitForSleep", "wakeUp"],
+ "expects": "All approvers time out, workflow auto-closes"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/ticket-ack.integration.test.ts", "expects": "ticket-ack integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: A ticket must receive exactly one final outcome: acknowledged or auto-closed",
+ "Invariant: Escalation must only trigger after the manager timeout expires",
+ "Invariant: Hook tokens are deterministic and derived from ticket ID",
+ "Operator signal: Log timeout.fired with ticketId when sleep wins the race",
+ "Operator signal: Log escalation.triggered with ticketId and director",
+ "Operator signal: Log ticket.resolved with final status and actor"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"ticket-ack","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Sleep/wake-up correctness (waitForSleep + wakeUp in every timeout test)
+- Hook/sleep races (Promise.race for both approval tiers)
+- Deterministic hook tokens (ack:${ticketId}, escalation:${ticketId})
+- Stream I/O placement (getWritable in workflow, write in steps)
+- Timeout as domain outcome (not an error)
+- Cascading timeouts (two-tier escalation)
+- Integration test coverage (happy path, escalation, full timeout)
diff --git a/skills/workflow-webhook/SKILL.md b/skills/workflow-webhook/SKILL.md
new file mode 100644
index 0000000000..366ee313d0
--- /dev/null
+++ b/skills/workflow-webhook/SKILL.md
@@ -0,0 +1,120 @@
+---
+name: workflow-webhook
+description: Build a durable webhook ingestion workflow with duplicate-delivery handling, idempotency keys, and compensation. Use when the user says "webhook workflow", "workflow-webhook", "webhook ingestion", "duplicate webhook", or "at-least-once delivery".
+user-invocable: true
+argument-hint: "[workflow prompt]"
+metadata:
+ author: Vercel Inc.
+ version: '0.1'
+---
+
+# workflow-webhook
+
+Use this skill when the user wants to build a workflow that ingests external webhooks with at-least-once delivery guarantees. This is a scenario entrypoint that routes into the existing teach → build pipeline with webhook-specific guardrails.
+
+## Context Capture
+
+If `.workflow.md` exists in the project root, read it and use its context. If it does not exist, run a focused context capture covering these webhook-specific questions before proceeding:
+
+1. **Webhook source** — "What system sends the webhook, and does it guarantee at-least-once or exactly-once delivery?"
+2. **Duplicate handling** — "How should duplicate deliveries be detected and handled? What entity ID anchors deduplication?"
+3. **Idempotency strategy** — "Which downstream operations need idempotency keys, and what stable identifiers are available?"
+4. **Response timeout** — "How quickly must the webhook endpoint respond before the sender retries?"
+5. **Compensation requirements** — "If a downstream step fails after earlier steps have committed side effects, what must be undone?"
+6. **Observability** — "What must operators see in logs for webhook receipt, deduplication, and step progress?"
+
+Save the answers into `.workflow.md` following the same 8-section format used by `workflow-teach`.
+
+## Required Design Constraints
+
+When building a webhook ingestion workflow, the following constraints are non-negotiable:
+
+### Duplicate-delivery handling
+
+The workflow must detect and safely handle duplicate webhook deliveries. The deduplication strategy must use a stable identifier from the webhook payload (e.g. Shopify order ID, Stripe event ID). Duplicate deliveries after successful processing must be treated as `FatalError` (skip, do not reprocess).
+
+### Stable idempotency keys
+
+Every step with external side effects must use an idempotency key derived from a stable, unique identifier — never from timestamps or random values. Examples:
+
+- Payment charge: `payment:${orderId}`
+- Inventory reservation: `inventory:${orderId}`
+- Notification: `notify:${orderId}`
+
+### Webhook response mode selection
+
+Choose the correct webhook response mode:
+
+- **`static`** — use when the webhook sender only needs an acknowledgment. The endpoint returns a fixed response immediately without blocking on workflow completion. This is the correct default for most webhook ingestion patterns.
+- **`manual`** — use only when the webhook response must include data computed by the workflow (rare for ingestion patterns).
+
+The response timeout from the webhook sender (e.g. Shopify's 30-second limit) must be respected. Long-running processing must happen after the webhook response is sent.
+
+### Compensation when downstream steps fail
+
+If a step fails after prior steps have committed irreversible side effects, a compensation step must undo the committed work. Example: if inventory reservation fails after payment has been charged, the workflow must refund the payment.
+
+Compensation steps must:
+
+- Use their own idempotency keys (e.g. `refund:${orderId}`)
+- Be `RetryableError` with high `maxRetries` — compensation must eventually succeed
+- Execute before the workflow terminates with an error
+
+## Build Process
+
+Follow the same six-phase interactive build process as `workflow-build`:
+
+1. **Propose step boundaries** — identify `"use workflow"` orchestrator vs `"use step"` functions, deduplication check, downstream steps, compensation steps
+2. **Flag relevant traps** — run the stress checklist with special attention to idempotency keys, webhook response mode, and compensation strategy
+3. **Decide failure modes** — `FatalError` for duplicate/already-processed, `RetryableError` for transient downstream failures, compensation plan for each irreversible step
+4. **Write code + tests** — produce workflow file and integration tests
+5. **Self-review** — re-run the stress checklist against generated code
+6. **Verification summary** — emit the verification artifact and `verification_plan_ready` summary
+
+### Required test coverage
+
+Integration tests must exercise:
+
+- **Happy path** — webhook received, all steps succeed
+- **Duplicate webhook** — second delivery is detected and skipped (no-op)
+- **Compensation path** — downstream step fails after earlier step committed, compensation executes
+- **Idempotency verification** — replayed steps do not produce duplicate side effects
+
+## Anti-Patterns
+
+Flag these explicitly when they appear in the webhook workflow:
+
+- **Missing deduplication on webhook ingress** — without duplicate detection, at-least-once delivery causes double-processing
+- **Timestamp or random idempotency keys** — keys must be derived from stable entity identifiers to survive replay
+- **Wrong webhook response mode** — using `manual` when `static` suffices blocks the sender; using `static` when computed data is needed returns stale responses
+- **Missing compensation for irreversible side effects** — if payment is charged and inventory fails, the payment must be refunded
+- **Node.js APIs in workflow context** — `fs`, `crypto`, `Buffer`, etc. cannot be used inside `"use workflow"` functions
+- **Direct stream I/O in workflow context** — `getWritable()` may be called in workflow context, but actual writes must happen in steps
+- **`createWebhook()` with a custom token** — `createWebhook()` does not accept custom tokens; only `createHook()` supports deterministic tokens
+
+## Inputs
+
+Always read these before producing output:
+
+1. **`skills/workflow/SKILL.md`** — the authoritative API truth source
+2. **`.workflow.md`** — project-specific context (if present)
+
+## Verification Contract
+
+This skill terminates with the same verification contract as `workflow-build`. The final output must include:
+
+1. A **Verification Artifact** — fenced JSON block with `contractVersion`, `blueprintName`, `files`, `testMatrix`, `runtimeCommands`, and `implementationNotes`
+2. A **Verification Summary** — single-line JSON: `{"event":"verification_plan_ready","blueprintName":"","fileCount":,"testCount":,"runtimeCommandCount":,"contractVersion":"1"}`
+
+## Sample Usage
+
+**Input:** `/workflow-webhook Build a workflow that processes Shopify order webhooks with at-least-once delivery, charges payment, reserves inventory, and sends confirmation — without double-charging.`
+
+**Expected behavior:**
+
+1. Reads `.workflow.md` if present; otherwise runs focused context capture
+2. Proposes: deduplication check step, payment charge step with `payment:${orderId}` idempotency key, inventory reservation step with `inventory:${orderId}` key, compensation refund step with `refund:${orderId}` key, confirmation email step, webhook response mode `static`
+3. Flags: idempotency required on every side-effecting step, compensation plan for payment-then-inventory-failure, 30-second webhook response timeout
+4. Writes: `workflows/shopify-order.ts` + `workflows/shopify-order.integration.test.ts`
+5. Tests cover: happy path, duplicate webhook no-op, inventory failure triggering refund — verifying idempotency keys prevent double-charges
+6. Emits verification artifact and `verification_plan_ready` summary
diff --git a/skills/workflow-webhook/goldens/duplicate-webhook-order.md b/skills/workflow-webhook/goldens/duplicate-webhook-order.md
new file mode 100644
index 0000000000..0837ba9edc
--- /dev/null
+++ b/skills/workflow-webhook/goldens/duplicate-webhook-order.md
@@ -0,0 +1,226 @@
+# Golden Scenario: Duplicate Webhook Order
+
+## User Prompt
+
+```
+/workflow-webhook Build a workflow that processes Shopify order webhooks with at-least-once delivery, charges payment, reserves inventory, and sends confirmation — without double-charging.
+```
+
+## Scenario
+
+An e-commerce platform receives order-placed webhooks from Shopify. The same webhook may be delivered multiple times due to Shopify's at-least-once delivery guarantee. The workflow must charge payment, reserve inventory, and send a confirmation — but must never double-charge or double-reserve on duplicate deliveries. If inventory reservation fails after payment, the payment must be refunded.
+
+## Context Capture
+
+The scenario skill checks for `.workflow.md` first. In this example it does not exist, so the focused webhook-specific interview runs:
+
+| Question | Expected Answer |
+|----------|----------------|
+| Webhook source | Shopify `orders/create` webhook, at-least-once delivery |
+| Duplicate handling | Deduplicate by Shopify order ID; skip if already processed |
+| Idempotency strategy | Payment: `payment:${orderId}`, Inventory: `inventory:${orderId}`, Refund: `refund:${orderId}` |
+| Response timeout | Shopify expects response within 30 seconds |
+| Compensation requirements | Refund payment if inventory reservation fails after charge |
+| Observability | Log webhook receipt, idempotency cache hit/miss, step progress |
+
+The captured context is saved to `.workflow.md` with sections: Project Context, Business Rules, External Systems, Failure Expectations, Observability Needs, Approved Patterns, Open Questions.
+
+## What the Scenario Skill Should Catch
+
+### Phase 2 — Traps Flagged
+
+1. **Duplicate-delivery handling** — The webhook may arrive more than once. The first step must check whether this order ID has already been processed. If yes, return early with a `FatalError` (skip).
+2. **Idempotency keys** — Every step with external side effects must use a stable idempotency key derived from the Shopify order ID. Timestamps or random values would break on replay.
+3. **Webhook response mode** — Use `static` response mode. The webhook endpoint must respond within 30 seconds; long-running processing happens after the response.
+4. **Compensation strategy** — If `reserveInventory` fails after `chargePayment` succeeds, the workflow must run `refundPayment` before terminating.
+
+### Phase 3 — Failure Modes Decided
+
+- `checkDuplicate`: `FatalError` if already processed (skip entire workflow). No retry needed.
+- `chargePayment`: `RetryableError` with `maxRetries: 3` for transient payment failures. `FatalError` for invalid card or insufficient funds.
+- `reserveInventory`: `RetryableError` with `maxRetries: 2` for transient warehouse API failures. `FatalError` for out-of-stock (triggers compensation).
+- `refundPayment`: `RetryableError` with `maxRetries: 5` — refund must eventually succeed.
+- `sendConfirmation`: `RetryableError` with `maxRetries: 2` — email delivery is transient.
+
+## Expected Code Output
+
+```typescript
+"use workflow";
+
+import { FatalError, RetryableError } from "workflow";
+
+const checkDuplicate = async (orderId: string) => {
+ "use step";
+ const existing = await db.orders.findUnique({ where: { shopifyId: orderId } });
+ if (existing?.status === "completed") {
+ throw new FatalError(`Order ${orderId} already processed`);
+ }
+ return existing;
+};
+
+const chargePayment = async (orderId: string, amount: number) => {
+ "use step";
+ const result = await paymentProvider.charge({
+ idempotencyKey: `payment:${orderId}`,
+ amount,
+ });
+ return result;
+};
+
+const reserveInventory = async (orderId: string, items: CartItem[]) => {
+ "use step";
+ const reservation = await warehouse.reserve({
+ idempotencyKey: `inventory:${orderId}`,
+ items,
+ });
+ return reservation;
+};
+
+const refundPayment = async (orderId: string, chargeId: string) => {
+ "use step";
+ await paymentProvider.refund({
+ idempotencyKey: `refund:${orderId}`,
+ chargeId,
+ });
+};
+
+const sendConfirmation = async (orderId: string, email: string) => {
+ "use step";
+ await emailService.send({
+ idempotencyKey: `confirmation:${orderId}`,
+ to: email,
+ template: "order-confirmed",
+ });
+};
+
+export default async function shopifyOrder(
+ orderId: string,
+ amount: number,
+ items: CartItem[],
+ email: string
+) {
+ // Duplicate check — skip if already processed
+ await checkDuplicate(orderId);
+
+ // Charge payment with idempotency key
+ const charge = await chargePayment(orderId, amount);
+
+ // Reserve inventory — compensate with refund on failure
+ try {
+ await reserveInventory(orderId, items);
+ } catch (error) {
+ if (error instanceof FatalError) {
+ await refundPayment(orderId, charge.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // Send confirmation
+ await sendConfirmation(orderId, email);
+
+ return { orderId, status: "fulfilled" };
+}
+```
+
+## Expected Test Output
+
+```typescript
+import { describe, it, expect } from "vitest";
+import { start } from "workflow/api";
+import shopifyOrder from "../workflows/shopify-order";
+
+describe("shopifyOrder", () => {
+ it("completes happy path", async () => {
+ const run = await start(shopifyOrder, [
+ "order-1", 100, [{ sku: "A", qty: 1 }], "user@example.com",
+ ]);
+ await expect(run.returnValue).resolves.toEqual({
+ orderId: "order-1",
+ status: "fulfilled",
+ });
+ });
+
+ it("skips duplicate webhook delivery", async () => {
+ // First delivery succeeds
+ const run1 = await start(shopifyOrder, [
+ "order-2", 50, [{ sku: "B", qty: 1 }], "user@example.com",
+ ]);
+ await expect(run1.returnValue).resolves.toEqual({
+ orderId: "order-2",
+ status: "fulfilled",
+ });
+
+ // Second delivery with same order ID is skipped
+ const run2 = await start(shopifyOrder, [
+ "order-2", 50, [{ sku: "B", qty: 1 }], "user@example.com",
+ ]);
+ await expect(run2.returnValue).rejects.toThrow(FatalError);
+ });
+
+ it("refunds payment when inventory fails", async () => {
+ // Mock reserveInventory to throw FatalError (out of stock)
+ const run = await start(shopifyOrder, [
+ "order-3", 75, [{ sku: "C", qty: 999 }], "user@example.com",
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify refundPayment was called (compensation executed)
+ });
+});
+```
+
+## Verification Artifact
+
+```json
+{
+ "contractVersion": "1",
+ "blueprintName": "shopify-order",
+ "files": [
+ { "kind": "workflow", "path": "workflows/shopify-order.ts" },
+ { "kind": "test", "path": "workflows/shopify-order.integration.test.ts" }
+ ],
+ "testMatrix": [
+ {
+ "name": "happy-path",
+ "helpers": [],
+ "expects": "Order completes successfully with payment charged and inventory reserved"
+ },
+ {
+ "name": "duplicate-webhook-skip",
+ "helpers": [],
+ "expects": "Duplicate delivery is detected and skipped without reprocessing"
+ },
+ {
+ "name": "compensation-on-inventory-failure",
+ "helpers": [],
+ "expects": "Payment is refunded when inventory reservation fails"
+ }
+ ],
+ "runtimeCommands": [
+ { "name": "typecheck", "command": "pnpm typecheck", "expects": "No TypeScript errors" },
+ { "name": "test", "command": "pnpm test", "expects": "All repository tests pass" },
+ { "name": "focused-workflow-test", "command": "pnpm vitest run workflows/shopify-order.integration.test.ts", "expects": "shopify-order integration tests pass" }
+ ],
+ "implementationNotes": [
+ "Invariant: An order must not be charged twice for the same Shopify order ID",
+ "Invariant: Idempotency keys derived from orderId prevent duplicate charges on replay",
+ "Invariant: Payment charge must be compensated by a refund if inventory reservation fails",
+ "Operator signal: Log webhook.received with Shopify order ID",
+ "Operator signal: Log idempotency.hit when duplicate delivery is detected",
+ "Operator signal: Log compensation.triggered with orderId when refund begins"
+ ]
+}
+```
+
+### Verification Summary
+
+{"event":"verification_plan_ready","blueprintName":"shopify-order","fileCount":2,"testCount":1,"runtimeCommandCount":3,"contractVersion":"1"}
+
+## Checklist Items Exercised
+
+- Duplicate-delivery handling (deduplication by order ID)
+- Idempotency keys (stable keys on every side-effecting step)
+- Webhook response mode (static, respects 30-second timeout)
+- Rollback / compensation strategy (refund on inventory failure)
+- Retry semantics (FatalError for duplicates, RetryableError for transient failures)
+- Integration test coverage (happy path, duplicate skip, compensation)
diff --git a/tests/fixtures/workflow-skills/approval-expiry-escalation/spec.json b/tests/fixtures/workflow-skills/approval-expiry-escalation/spec.json
new file mode 100644
index 0000000000..9589d746d0
--- /dev/null
+++ b/tests/fixtures/workflow-skills/approval-expiry-escalation/spec.json
@@ -0,0 +1,14 @@
+{
+ "name": "approval-expiry-escalation",
+ "goldenPath": "skills/workflow-approval/goldens/approval-expiry-escalation.md",
+ "requires": {
+ "workflow": ["createHook", "sleep", "Promise.race"],
+ "test": ["waitForHook", "resumeHook", "waitForSleep", "wakeUp"],
+ "verificationHelpers": [
+ "waitForHook",
+ "resumeHook",
+ "waitForSleep",
+ "wakeUp"
+ ]
+ }
+}
diff --git a/tests/fixtures/workflow-skills/approval-expiry-escalation/vitest.integration.config.ts b/tests/fixtures/workflow-skills/approval-expiry-escalation/vitest.integration.config.ts
new file mode 100644
index 0000000000..2436a202d7
--- /dev/null
+++ b/tests/fixtures/workflow-skills/approval-expiry-escalation/vitest.integration.config.ts
@@ -0,0 +1,15 @@
+import { dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { defineConfig } from 'vitest/config';
+import { workflow } from '@workflow/vitest';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+export default defineConfig({
+ root: __dirname,
+ plugins: [workflow()],
+ test: {
+ include: ['**/*.integration.test.ts'],
+ testTimeout: 60_000,
+ },
+});
diff --git a/tests/fixtures/workflow-skills/approval-expiry-escalation/workflows/purchase-approval.integration.test.ts b/tests/fixtures/workflow-skills/approval-expiry-escalation/workflows/purchase-approval.integration.test.ts
new file mode 100644
index 0000000000..c516fa728b
--- /dev/null
+++ b/tests/fixtures/workflow-skills/approval-expiry-escalation/workflows/purchase-approval.integration.test.ts
@@ -0,0 +1,61 @@
+import { describe, it, expect } from "vitest";
+import { start, resumeHook, getRun } from "workflow/api";
+import { waitForHook, waitForSleep } from "@workflow/vitest";
+import purchaseApproval from "../workflows/purchase-approval";
+
+describe("purchaseApproval", () => {
+ it("manager approves before timeout", async () => {
+ const run = await start(purchaseApproval, [
+ "PO-1001", 7500, "manager-1", "director-1",
+ ]);
+
+ await waitForHook(run, { token: "approval:po-PO-1001" });
+ await resumeHook("approval:po-PO-1001", { approved: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ poNumber: "PO-1001",
+ status: "approved",
+ decidedBy: "manager-1",
+ });
+ });
+
+ it("escalates to director when manager times out", async () => {
+ const run = await start(purchaseApproval, [
+ "PO-1002", 10000, "manager-2", "director-2",
+ ]);
+
+ // Manager timeout
+ const sleepId1 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId1] });
+
+ // Director approves
+ await waitForHook(run, { token: "escalation:po-PO-1002" });
+ await resumeHook("escalation:po-PO-1002", { approved: true });
+
+ await expect(run.returnValue).resolves.toEqual({
+ poNumber: "PO-1002",
+ status: "approved",
+ decidedBy: "director-2",
+ });
+ });
+
+ it("auto-rejects when all approvers time out", async () => {
+ const run = await start(purchaseApproval, [
+ "PO-1003", 6000, "manager-3", "director-3",
+ ]);
+
+ // Manager timeout
+ const sleepId1 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId1] });
+
+ // Director timeout
+ const sleepId2 = await waitForSleep(run);
+ await getRun(run.runId).wakeUp({ correlationIds: [sleepId2] });
+
+ await expect(run.returnValue).resolves.toEqual({
+ poNumber: "PO-1003",
+ status: "auto-rejected",
+ decidedBy: "system",
+ });
+ });
+});
diff --git a/tests/fixtures/workflow-skills/approval-expiry-escalation/workflows/purchase-approval.ts b/tests/fixtures/workflow-skills/approval-expiry-escalation/workflows/purchase-approval.ts
new file mode 100644
index 0000000000..dd72bb743e
--- /dev/null
+++ b/tests/fixtures/workflow-skills/approval-expiry-escalation/workflows/purchase-approval.ts
@@ -0,0 +1,79 @@
+"use workflow";
+
+import { FatalError, RetryableError } from "workflow";
+import { createHook, sleep } from "workflow";
+
+type ApprovalDecision = { approved: boolean; reason?: string };
+
+const notifyApprover = async (
+ poNumber: string,
+ approverId: string,
+ template: string
+) => {
+ "use step";
+ await notifications.send({
+ idempotencyKey: `notify:${template}:${poNumber}`,
+ to: approverId,
+ template,
+ });
+};
+
+const recordDecision = async (
+ poNumber: string,
+ status: string,
+ decidedBy: string
+) => {
+ "use step";
+ await db.purchaseOrders.update({
+ where: { poNumber },
+ data: { status, decidedBy, decidedAt: new Date() },
+ });
+ return { poNumber, status, decidedBy };
+};
+
+export default async function purchaseApproval(
+ poNumber: string,
+ amount: number,
+ managerId: string,
+ directorId: string
+) {
+ // Step 1: Notify manager and wait for approval with 48h timeout
+ await notifyApprover(poNumber, managerId, "approval-request");
+
+ const managerHook = createHook(
+ `approval:po-${poNumber}`
+ );
+ const managerTimeout = sleep("48h");
+ const managerResult = await Promise.race([managerHook, managerTimeout]);
+
+ if (managerResult !== undefined) {
+ // Manager responded
+ return recordDecision(
+ poNumber,
+ managerResult.approved ? "approved" : "rejected",
+ managerId
+ );
+ }
+
+ // Step 2: Manager timed out — escalate to director with 24h timeout
+ await notifyApprover(poNumber, directorId, "escalation-request");
+
+ const directorHook = createHook(
+ `escalation:po-${poNumber}`
+ );
+ const directorTimeout = sleep("24h");
+ const directorResult = await Promise.race([directorHook, directorTimeout]);
+
+ if (directorResult !== undefined) {
+ // Director responded
+ return recordDecision(
+ poNumber,
+ directorResult.approved ? "approved" : "rejected",
+ directorId
+ );
+ }
+
+ // Step 3: Full timeout — auto-reject
+ await notifyApprover(poNumber, managerId, "auto-rejection-notice");
+ return recordDecision(poNumber, "auto-rejected", "system");
+}
diff --git a/tests/fixtures/workflow-skills/compensation-saga/spec.json b/tests/fixtures/workflow-skills/compensation-saga/spec.json
new file mode 100644
index 0000000000..e0bef20be2
--- /dev/null
+++ b/tests/fixtures/workflow-skills/compensation-saga/spec.json
@@ -0,0 +1,9 @@
+{
+ "name": "compensation-saga",
+ "goldenPath": "skills/workflow-saga/goldens/compensation-saga.md",
+ "requires": {
+ "workflow": ["FatalError", "RetryableError"],
+ "test": [],
+ "verificationHelpers": []
+ }
+}
diff --git a/tests/fixtures/workflow-skills/compensation-saga/vitest.integration.config.ts b/tests/fixtures/workflow-skills/compensation-saga/vitest.integration.config.ts
new file mode 100644
index 0000000000..2436a202d7
--- /dev/null
+++ b/tests/fixtures/workflow-skills/compensation-saga/vitest.integration.config.ts
@@ -0,0 +1,15 @@
+import { dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { defineConfig } from 'vitest/config';
+import { workflow } from '@workflow/vitest';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+export default defineConfig({
+ root: __dirname,
+ plugins: [workflow()],
+ test: {
+ include: ['**/*.integration.test.ts'],
+ testTimeout: 60_000,
+ },
+});
diff --git a/tests/fixtures/workflow-skills/compensation-saga/workflows/order-saga.integration.test.ts b/tests/fixtures/workflow-skills/compensation-saga/workflows/order-saga.integration.test.ts
new file mode 100644
index 0000000000..7cd383f56f
--- /dev/null
+++ b/tests/fixtures/workflow-skills/compensation-saga/workflows/order-saga.integration.test.ts
@@ -0,0 +1,45 @@
+import { describe, it, expect } from 'vitest';
+import { start } from 'workflow/api';
+import orderSaga from '../workflows/order-saga';
+
+describe('orderSaga', () => {
+ it('completes happy path', async () => {
+ const run = await start(orderSaga, [
+ 'order-1',
+ 100,
+ [{ sku: 'A', qty: 1 }],
+ { street: '123 Main' },
+ 'user@example.com',
+ ]);
+ await expect(run.returnValue).resolves.toEqual({
+ orderId: 'order-1',
+ status: 'fulfilled',
+ });
+ });
+
+ it('compensates payment and inventory when shipment fails', async () => {
+ // Mock bookShipment to throw FatalError (carrier rejected)
+ const run = await start(orderSaga, [
+ 'order-2',
+ 50,
+ [{ sku: 'B', qty: 1 }],
+ { street: '456 Elm' },
+ 'user@example.com',
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify refundPayment and releaseInventory were called (compensation executed)
+ });
+
+ it('compensates inventory only when payment fails', async () => {
+ // Mock chargePayment to throw FatalError (insufficient funds)
+ const run = await start(orderSaga, [
+ 'order-3',
+ 75,
+ [{ sku: 'C', qty: 1 }],
+ { street: '789 Oak' },
+ 'user@example.com',
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify releaseInventory was called but refundPayment was not
+ });
+});
diff --git a/tests/fixtures/workflow-skills/compensation-saga/workflows/order-saga.ts b/tests/fixtures/workflow-skills/compensation-saga/workflows/order-saga.ts
new file mode 100644
index 0000000000..f3543cb03c
--- /dev/null
+++ b/tests/fixtures/workflow-skills/compensation-saga/workflows/order-saga.ts
@@ -0,0 +1,97 @@
+'use workflow';
+
+import { FatalError, RetryableError } from 'workflow';
+
+const reserveInventory = async (orderId: string, items: CartItem[]) => {
+ 'use step';
+ const reservation = await warehouse.reserve({
+ idempotencyKey: `inventory:${orderId}`,
+ items,
+ });
+ return reservation;
+};
+
+const chargePayment = async (orderId: string, amount: number) => {
+ 'use step';
+ const result = await paymentProvider.charge({
+ idempotencyKey: `payment:${orderId}`,
+ amount,
+ });
+ return result;
+};
+
+const bookShipment = async (orderId: string, address: Address) => {
+ 'use step';
+ const shipment = await carrier.book({
+ idempotencyKey: `shipment:${orderId}`,
+ address,
+ });
+ return shipment;
+};
+
+const refundPayment = async (orderId: string, chargeId: string) => {
+ 'use step';
+ await paymentProvider.refund({
+ idempotencyKey: `refund:${orderId}`,
+ chargeId,
+ });
+};
+
+const releaseInventory = async (orderId: string, reservationId: string) => {
+ 'use step';
+ await warehouse.release({
+ idempotencyKey: `release:${orderId}`,
+ reservationId,
+ });
+};
+
+const sendConfirmation = async (orderId: string, email: string) => {
+ 'use step';
+ await emailService.send({
+ idempotencyKey: `confirmation:${orderId}`,
+ to: email,
+ template: 'order-confirmed',
+ });
+};
+
+export default async function orderSaga(
+ orderId: string,
+ amount: number,
+ items: CartItem[],
+ address: Address,
+ email: string
+) {
+ // Forward step 1: Reserve inventory
+ const reservation = await reserveInventory(orderId, items);
+
+ // Forward step 2: Charge payment
+ let charge;
+ try {
+ charge = await chargePayment(orderId, amount);
+ } catch (error) {
+ // Compensate: release inventory
+ if (error instanceof FatalError) {
+ await releaseInventory(orderId, reservation.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // Forward step 3: Book shipment
+ try {
+ await bookShipment(orderId, address);
+ } catch (error) {
+ // Compensate in reverse order: refund payment, then release inventory
+ if (error instanceof FatalError) {
+ await refundPayment(orderId, charge.id);
+ await releaseInventory(orderId, reservation.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // All forward steps succeeded
+ await sendConfirmation(orderId, email);
+
+ return { orderId, status: 'fulfilled' };
+}
diff --git a/tests/fixtures/workflow-skills/duplicate-webhook-order/spec.json b/tests/fixtures/workflow-skills/duplicate-webhook-order/spec.json
new file mode 100644
index 0000000000..a0e22ee299
--- /dev/null
+++ b/tests/fixtures/workflow-skills/duplicate-webhook-order/spec.json
@@ -0,0 +1,9 @@
+{
+ "name": "duplicate-webhook-order",
+ "goldenPath": "skills/workflow-webhook/goldens/duplicate-webhook-order.md",
+ "requires": {
+ "workflow": ["FatalError"],
+ "test": [],
+ "verificationHelpers": []
+ }
+}
diff --git a/tests/fixtures/workflow-skills/duplicate-webhook-order/vitest.integration.config.ts b/tests/fixtures/workflow-skills/duplicate-webhook-order/vitest.integration.config.ts
new file mode 100644
index 0000000000..2436a202d7
--- /dev/null
+++ b/tests/fixtures/workflow-skills/duplicate-webhook-order/vitest.integration.config.ts
@@ -0,0 +1,15 @@
+import { dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { defineConfig } from 'vitest/config';
+import { workflow } from '@workflow/vitest';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+export default defineConfig({
+ root: __dirname,
+ plugins: [workflow()],
+ test: {
+ include: ['**/*.integration.test.ts'],
+ testTimeout: 60_000,
+ },
+});
diff --git a/tests/fixtures/workflow-skills/duplicate-webhook-order/workflows/shopify-order.integration.test.ts b/tests/fixtures/workflow-skills/duplicate-webhook-order/workflows/shopify-order.integration.test.ts
new file mode 100644
index 0000000000..bdf7873d76
--- /dev/null
+++ b/tests/fixtures/workflow-skills/duplicate-webhook-order/workflows/shopify-order.integration.test.ts
@@ -0,0 +1,53 @@
+import { describe, it, expect } from 'vitest';
+import { start } from 'workflow/api';
+import shopifyOrder from '../workflows/shopify-order';
+
+describe('shopifyOrder', () => {
+ it('completes happy path', async () => {
+ const run = await start(shopifyOrder, [
+ 'order-1',
+ 100,
+ [{ sku: 'A', qty: 1 }],
+ 'user@example.com',
+ ]);
+ await expect(run.returnValue).resolves.toEqual({
+ orderId: 'order-1',
+ status: 'fulfilled',
+ });
+ });
+
+ it('skips duplicate webhook delivery', async () => {
+ // First delivery succeeds
+ const run1 = await start(shopifyOrder, [
+ 'order-2',
+ 50,
+ [{ sku: 'B', qty: 1 }],
+ 'user@example.com',
+ ]);
+ await expect(run1.returnValue).resolves.toEqual({
+ orderId: 'order-2',
+ status: 'fulfilled',
+ });
+
+ // Second delivery with same order ID is skipped
+ const run2 = await start(shopifyOrder, [
+ 'order-2',
+ 50,
+ [{ sku: 'B', qty: 1 }],
+ 'user@example.com',
+ ]);
+ await expect(run2.returnValue).rejects.toThrow(FatalError);
+ });
+
+ it('refunds payment when inventory fails', async () => {
+ // Mock reserveInventory to throw FatalError (out of stock)
+ const run = await start(shopifyOrder, [
+ 'order-3',
+ 75,
+ [{ sku: 'C', qty: 999 }],
+ 'user@example.com',
+ ]);
+ await expect(run.returnValue).rejects.toThrow(FatalError);
+ // Verify refundPayment was called (compensation executed)
+ });
+});
diff --git a/tests/fixtures/workflow-skills/duplicate-webhook-order/workflows/shopify-order.ts b/tests/fixtures/workflow-skills/duplicate-webhook-order/workflows/shopify-order.ts
new file mode 100644
index 0000000000..35ad2efc9f
--- /dev/null
+++ b/tests/fixtures/workflow-skills/duplicate-webhook-order/workflows/shopify-order.ts
@@ -0,0 +1,78 @@
+'use workflow';
+
+import { FatalError, RetryableError } from 'workflow';
+
+const checkDuplicate = async (orderId: string) => {
+ 'use step';
+ const existing = await db.orders.findUnique({
+ where: { shopifyId: orderId },
+ });
+ if (existing?.status === 'completed') {
+ throw new FatalError(`Order ${orderId} already processed`);
+ }
+ return existing;
+};
+
+const chargePayment = async (orderId: string, amount: number) => {
+ 'use step';
+ const result = await paymentProvider.charge({
+ idempotencyKey: `payment:${orderId}`,
+ amount,
+ });
+ return result;
+};
+
+const reserveInventory = async (orderId: string, items: CartItem[]) => {
+ 'use step';
+ const reservation = await warehouse.reserve({
+ idempotencyKey: `inventory:${orderId}`,
+ items,
+ });
+ return reservation;
+};
+
+const refundPayment = async (orderId: string, chargeId: string) => {
+ 'use step';
+ await paymentProvider.refund({
+ idempotencyKey: `refund:${orderId}`,
+ chargeId,
+ });
+};
+
+const sendConfirmation = async (orderId: string, email: string) => {
+ 'use step';
+ await emailService.send({
+ idempotencyKey: `confirmation:${orderId}`,
+ to: email,
+ template: 'order-confirmed',
+ });
+};
+
+export default async function shopifyOrder(
+ orderId: string,
+ amount: number,
+ items: CartItem[],
+ email: string
+) {
+ // Duplicate check — skip if already processed
+ await checkDuplicate(orderId);
+
+ // Charge payment with idempotency key
+ const charge = await chargePayment(orderId, amount);
+
+ // Reserve inventory — compensate with refund on failure
+ try {
+ await reserveInventory(orderId, items);
+ } catch (error) {
+ if (error instanceof FatalError) {
+ await refundPayment(orderId, charge.id);
+ throw error;
+ }
+ throw error;
+ }
+
+ // Send confirmation
+ await sendConfirmation(orderId, email);
+
+ return { orderId, status: 'fulfilled' };
+}
diff --git a/workbench/vitest/test/workflow-audit-skill-contract.test.ts b/workbench/vitest/test/workflow-audit-skill-contract.test.ts
new file mode 100644
index 0000000000..c7a8750738
--- /dev/null
+++ b/workbench/vitest/test/workflow-audit-skill-contract.test.ts
@@ -0,0 +1,47 @@
+import { readFileSync } from 'node:fs';
+import { resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..', '..', '..');
+
+describe('workflow-audit skill contract', () => {
+ const text = readFileSync(
+ resolve(ROOT, 'skills/workflow-audit/SKILL.md'),
+ 'utf8',
+ );
+
+ it('keeps the scored-report contract intact', () => {
+ expect(text).toContain('## Audit Scorecard');
+ expect(text).toContain('## Executive Summary');
+ expect(text).toContain('## Detailed Findings by Severity');
+ expect(text).toContain('## Systemic Risks');
+ expect(text).toContain('## Positive Findings');
+ expect(text).toContain('## Audit Summary');
+ expect(text).toContain('P0 Blocking');
+ expect(text).toContain('P1 Major');
+ expect(text).toContain('P2 Minor');
+ expect(text).toContain('P3 Polish');
+ expect(text).toContain('"event":"workflow_audit_complete"');
+ expect(text).toContain('"maxScore":48');
+ expect(text).toContain('"contractVersion":"1"');
+ });
+
+ it('reuses the same 12-check durable-workflow rubric as workflow-build', () => {
+ for (const token of [
+ 'Determinism boundary',
+ 'Step granularity',
+ 'Pass-by-value / serialization',
+ 'Hook token strategy',
+ 'Webhook response mode',
+ '`start()` placement',
+ 'Stream I/O placement',
+ 'Idempotency keys',
+ 'Retry semantics',
+ 'Rollback / compensation',
+ 'Observability streams',
+ 'Integration test coverage',
+ ]) {
+ expect(text).toContain(token);
+ }
+ });
+});
diff --git a/workbench/vitest/test/workflow-scenario-surface.test.ts b/workbench/vitest/test/workflow-scenario-surface.test.ts
new file mode 100644
index 0000000000..44cb448494
--- /dev/null
+++ b/workbench/vitest/test/workflow-scenario-surface.test.ts
@@ -0,0 +1,754 @@
+import { existsSync, readFileSync } from 'node:fs';
+import { resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..', '..', '..');
+
+function read(relativePath: string): string {
+ return readFileSync(resolve(ROOT, relativePath), 'utf-8');
+}
+
+describe('workflow scenario surface', () => {
+ // -----------------------------------------------------------------------
+ // Scenario skill files exist
+ // -----------------------------------------------------------------------
+ describe('scenario skills exist', () => {
+ it('workflow-approval SKILL.md exists', () => {
+ expect(
+ existsSync(resolve(ROOT, 'skills/workflow-approval/SKILL.md')),
+ ).toBe(true);
+ });
+
+ it('workflow-webhook SKILL.md exists', () => {
+ expect(
+ existsSync(resolve(ROOT, 'skills/workflow-webhook/SKILL.md')),
+ ).toBe(true);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Frontmatter: user-invocable and argument-hint
+ // -----------------------------------------------------------------------
+ describe('scenario skill frontmatter', () => {
+ it('workflow-approval has user-invocable: true and argument-hint', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('user-invocable: true');
+ expect(skill).toContain('argument-hint:');
+ });
+
+ it('workflow-webhook has user-invocable: true and argument-hint', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('user-invocable: true');
+ expect(skill).toContain('argument-hint:');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Context reuse: .workflow.md when present, fallback to capture
+ // -----------------------------------------------------------------------
+ describe('context reuse contract', () => {
+ it('workflow-approval reuses .workflow.md and falls back to context capture', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).toContain('Context Capture');
+ });
+
+ it('workflow-webhook reuses .workflow.md and falls back to context capture', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).toContain('Context Capture');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Verification contract: same as workflow-build
+ // -----------------------------------------------------------------------
+ describe('verification contract parity with workflow-build', () => {
+ it('workflow-approval terminates with verification_plan_ready', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('contractVersion');
+ });
+
+ it('workflow-webhook terminates with verification_plan_ready', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('contractVersion');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Artifact ownership: no direct .workflow-skills/*.json mutation
+ // -----------------------------------------------------------------------
+ describe('artifact ownership boundary', () => {
+ it('workflow-approval does not reference .workflow-skills JSON paths', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+
+ it('workflow-webhook does not reference .workflow-skills JSON paths', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Domain-specific required content: approval
+ // -----------------------------------------------------------------------
+ describe('workflow-approval domain constraints', () => {
+ it('requires deterministic createHook() tokens', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('createHook');
+ expect(skill).toContain('deterministic');
+ });
+
+ it('requires expiry via sleep()', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('sleep');
+ expect(skill).toContain('Promise.race');
+ });
+
+ it('requires escalation behavior', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('escalation');
+ expect(skill).toContain('escalat');
+ });
+
+ it('requires test helpers: waitForHook, resumeHook, waitForSleep, wakeUp', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ expect(skill).toContain('waitForHook');
+ expect(skill).toContain('resumeHook');
+ expect(skill).toContain('waitForSleep');
+ expect(skill).toContain('wakeUp');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Domain-specific required content: webhook
+ // -----------------------------------------------------------------------
+ describe('workflow-webhook domain constraints', () => {
+ it('requires duplicate-delivery handling', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('duplicate');
+ expect(skill).toContain('Duplicate-delivery handling');
+ });
+
+ it('requires stable idempotency keys', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('idempotency');
+ expect(skill).toContain('Stable idempotency keys');
+ });
+
+ it('requires webhook response mode selection', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('Webhook response mode');
+ expect(skill).toContain('static');
+ expect(skill).toContain('manual');
+ });
+
+ it('requires compensation when downstream steps fail', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ expect(skill).toContain('Compensation when downstream steps fail');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Goldens exist
+ // -----------------------------------------------------------------------
+ describe('scenario goldens exist', () => {
+ it('workflow-approval has approval-expiry-escalation golden', () => {
+ expect(
+ existsSync(
+ resolve(
+ ROOT,
+ 'skills/workflow-approval/goldens/approval-expiry-escalation.md',
+ ),
+ ),
+ ).toBe(true);
+ });
+
+ it('workflow-webhook has duplicate-webhook-order golden', () => {
+ expect(
+ existsSync(
+ resolve(
+ ROOT,
+ 'skills/workflow-webhook/goldens/duplicate-webhook-order.md',
+ ),
+ ),
+ ).toBe(true);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Goldens include verification contract
+ // -----------------------------------------------------------------------
+ describe('scenario golden verification contract', () => {
+ it('approval golden includes verification artifact and summary', () => {
+ const golden = read(
+ 'skills/workflow-approval/goldens/approval-expiry-escalation.md',
+ );
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+
+ it('webhook golden includes verification artifact and summary', () => {
+ const golden = read(
+ 'skills/workflow-webhook/goldens/duplicate-webhook-order.md',
+ );
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Docs and README mention scenario skills iff source exists
+ // -----------------------------------------------------------------------
+ describe('docs and README mention scenario skills', () => {
+ it('getting-started doc mentions workflow-approval iff source exists', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx',
+ );
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-approval/SKILL.md'),
+ );
+ expect(docs.includes('`/workflow-approval`')).toBe(exists);
+ });
+
+ it('getting-started doc mentions workflow-webhook iff source exists', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx',
+ );
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-webhook/SKILL.md'),
+ );
+ expect(docs.includes('`/workflow-webhook`')).toBe(exists);
+ });
+
+ it('skills README mentions workflow-approval iff source exists', () => {
+ const readme = read('skills/README.md');
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-approval/SKILL.md'),
+ );
+ expect(readme.includes('`workflow-approval`')).toBe(exists);
+ });
+
+ it('skills README mentions workflow-webhook iff source exists', () => {
+ const readme = read('skills/README.md');
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-webhook/SKILL.md'),
+ );
+ expect(readme.includes('`workflow-webhook`')).toBe(exists);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Scenario skills exist: workflow-saga and workflow-timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout scenario skills exist', () => {
+ it('workflow-saga SKILL.md exists', () => {
+ expect(
+ existsSync(resolve(ROOT, 'skills/workflow-saga/SKILL.md')),
+ ).toBe(true);
+ });
+
+ it('workflow-timeout SKILL.md exists', () => {
+ expect(
+ existsSync(resolve(ROOT, 'skills/workflow-timeout/SKILL.md')),
+ ).toBe(true);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Frontmatter: workflow-saga and workflow-timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout frontmatter', () => {
+ it('workflow-saga has user-invocable: true and argument-hint', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ expect(skill).toContain('user-invocable: true');
+ expect(skill).toContain('argument-hint:');
+ });
+
+ it('workflow-timeout has user-invocable: true and argument-hint', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).toContain('user-invocable: true');
+ expect(skill).toContain('argument-hint:');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Context reuse: workflow-saga and workflow-timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout context reuse', () => {
+ it('workflow-saga reuses .workflow.md and falls back to context capture', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).toContain('Context Capture');
+ });
+
+ it('workflow-timeout reuses .workflow.md and falls back to context capture', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).toContain('Context Capture');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Verification contract: workflow-saga and workflow-timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout verification contract', () => {
+ it('workflow-saga terminates with verification_plan_ready', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('contractVersion');
+ });
+
+ it('workflow-timeout terminates with verification_plan_ready', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('contractVersion');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Artifact ownership: workflow-saga and workflow-timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout artifact ownership', () => {
+ it('workflow-saga does not reference .workflow-skills JSON paths', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+
+ it('workflow-timeout does not reference .workflow-skills JSON paths', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Domain-specific required content: saga
+ // -----------------------------------------------------------------------
+ describe('workflow-saga domain constraints', () => {
+ it('requires compensation ordering and idempotency', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ expect(skill).toContain('compensation');
+ expect(skill).toContain('Compensation ordering');
+ expect(skill).toContain('Compensation idempotency keys');
+ });
+
+ it('requires partial failure handling', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ expect(skill).toContain('partial');
+ expect(skill).toContain('FatalError');
+ expect(skill).toContain('RetryableError');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Domain-specific required content: timeout
+ // -----------------------------------------------------------------------
+ describe('workflow-timeout domain constraints', () => {
+ it('requires sleep/wake-up correctness', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).toContain('sleep');
+ expect(skill).toContain('waitForSleep');
+ expect(skill).toContain('wakeUp');
+ });
+
+ it('requires hook/sleep races via Promise.race', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).toContain('Promise.race');
+ expect(skill).toContain('createHook');
+ });
+
+ it('treats timeout as domain outcome', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ expect(skill).toContain('Timeout as a domain outcome');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Goldens exist: saga and timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout goldens exist', () => {
+ it('workflow-saga has compensation-saga golden', () => {
+ expect(
+ existsSync(
+ resolve(ROOT, 'skills/workflow-saga/goldens/compensation-saga.md'),
+ ),
+ ).toBe(true);
+ });
+
+ it('workflow-timeout has approval-timeout-streaming golden', () => {
+ expect(
+ existsSync(
+ resolve(
+ ROOT,
+ 'skills/workflow-timeout/goldens/approval-timeout-streaming.md',
+ ),
+ ),
+ ).toBe(true);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Golden verification contract: saga and timeout
+ // -----------------------------------------------------------------------
+ describe('saga and timeout golden verification contract', () => {
+ it('saga golden includes verification artifact and summary', () => {
+ const golden = read(
+ 'skills/workflow-saga/goldens/compensation-saga.md',
+ );
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+
+ it('timeout golden includes verification artifact and summary', () => {
+ const golden = read(
+ 'skills/workflow-timeout/goldens/approval-timeout-streaming.md',
+ );
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Docs and README mention saga and timeout iff source exists
+ // -----------------------------------------------------------------------
+ describe('docs and README mention saga and timeout skills', () => {
+ it('getting-started doc mentions workflow-saga iff source exists', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx',
+ );
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-saga/SKILL.md'),
+ );
+ expect(docs.includes('`/workflow-saga`')).toBe(exists);
+ });
+
+ it('getting-started doc mentions workflow-timeout iff source exists', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx',
+ );
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-timeout/SKILL.md'),
+ );
+ expect(docs.includes('`/workflow-timeout`')).toBe(exists);
+ });
+
+ it('skills README mentions workflow-saga iff source exists', () => {
+ const readme = read('skills/README.md');
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-saga/SKILL.md'),
+ );
+ expect(readme.includes('`workflow-saga`')).toBe(exists);
+ });
+
+ it('skills README mentions workflow-timeout iff source exists', () => {
+ const readme = read('skills/README.md');
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-timeout/SKILL.md'),
+ );
+ expect(readme.includes('`workflow-timeout`')).toBe(exists);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Scenario skills exist: workflow-idempotency and workflow-observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe scenario skills exist', () => {
+ it('workflow-idempotency SKILL.md exists', () => {
+ expect(
+ existsSync(resolve(ROOT, 'skills/workflow-idempotency/SKILL.md')),
+ ).toBe(true);
+ });
+
+ it('workflow-observe SKILL.md exists', () => {
+ expect(
+ existsSync(resolve(ROOT, 'skills/workflow-observe/SKILL.md')),
+ ).toBe(true);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Frontmatter: workflow-idempotency and workflow-observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe frontmatter', () => {
+ it('workflow-idempotency has user-invocable: true and argument-hint', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).toContain('user-invocable: true');
+ expect(skill).toContain('argument-hint:');
+ });
+
+ it('workflow-observe has user-invocable: true and argument-hint', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('user-invocable: true');
+ expect(skill).toContain('argument-hint:');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Context reuse: workflow-idempotency and workflow-observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe context reuse', () => {
+ it('workflow-idempotency reuses .workflow.md and falls back to context capture', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).toContain('Context Capture');
+ });
+
+ it('workflow-observe reuses .workflow.md and falls back to context capture', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).toContain('Context Capture');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Verification contract: workflow-idempotency and workflow-observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe verification contract', () => {
+ it('workflow-idempotency terminates with verification_plan_ready', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('contractVersion');
+ });
+
+ it('workflow-observe terminates with verification_plan_ready', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('contractVersion');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Artifact ownership: workflow-idempotency and workflow-observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe artifact ownership', () => {
+ it('workflow-idempotency does not reference .workflow-skills JSON paths', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+
+ it('workflow-observe does not reference .workflow-skills JSON paths', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Domain-specific required content: idempotency
+ // -----------------------------------------------------------------------
+ describe('workflow-idempotency domain constraints', () => {
+ it('requires duplicate delivery detection and idempotency keys', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).toContain('duplicate');
+ expect(skill).toContain('idempotency');
+ expect(skill).toContain('Duplicate delivery detection');
+ expect(skill).toContain('Stable idempotency keys');
+ });
+
+ it('requires replay safety', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).toContain('Replay safety');
+ expect(skill).toContain('replay');
+ });
+
+ it('requires compensation with idempotency keys', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ expect(skill).toContain('Compensation with idempotency keys');
+ expect(skill).toContain('RetryableError');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Domain-specific required content: observe
+ // -----------------------------------------------------------------------
+ describe('workflow-observe domain constraints', () => {
+ it('requires stream namespace separation', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('stream');
+ expect(skill).toContain('namespace');
+ expect(skill).toContain('Stream namespace separation');
+ });
+
+ it('requires stream I/O placement in steps', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('Stream I/O placement');
+ expect(skill).toContain('getWritable');
+ });
+
+ it('requires terminal signals on every exit path', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('Terminal signals');
+ expect(skill).toContain('operator');
+ });
+
+ it('requires structured stream events', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ expect(skill).toContain('Structured stream events');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Goldens exist: idempotency and observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe goldens exist', () => {
+ it('workflow-idempotency has duplicate-webhook-order golden', () => {
+ expect(
+ existsSync(
+ resolve(ROOT, 'skills/workflow-idempotency/goldens/duplicate-webhook-order.md'),
+ ),
+ ).toBe(true);
+ });
+
+ it('workflow-observe has operator-observability-streams golden', () => {
+ expect(
+ existsSync(
+ resolve(
+ ROOT,
+ 'skills/workflow-observe/goldens/operator-observability-streams.md',
+ ),
+ ),
+ ).toBe(true);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Golden verification contract: idempotency and observe
+ // -----------------------------------------------------------------------
+ describe('idempotency and observe golden verification contract', () => {
+ it('idempotency golden includes verification artifact and summary', () => {
+ const golden = read(
+ 'skills/workflow-idempotency/goldens/duplicate-webhook-order.md',
+ );
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+
+ it('observe golden includes verification artifact and summary', () => {
+ const golden = read(
+ 'skills/workflow-observe/goldens/operator-observability-streams.md',
+ );
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Docs and README mention idempotency and observe iff source exists
+ // -----------------------------------------------------------------------
+ describe('docs and README mention idempotency and observe skills', () => {
+ it('getting-started doc mentions workflow-idempotency iff source exists', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx',
+ );
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-idempotency/SKILL.md'),
+ );
+ expect(docs.includes('`/workflow-idempotency`')).toBe(exists);
+ });
+
+ it('getting-started doc mentions workflow-observe iff source exists', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx',
+ );
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-observe/SKILL.md'),
+ );
+ expect(docs.includes('`/workflow-observe`')).toBe(exists);
+ });
+
+ it('skills README mentions workflow-idempotency iff source exists', () => {
+ const readme = read('skills/README.md');
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-idempotency/SKILL.md'),
+ );
+ expect(readme.includes('`workflow-idempotency`')).toBe(exists);
+ });
+
+ it('skills README mentions workflow-observe iff source exists', () => {
+ const readme = read('skills/README.md');
+ const exists = existsSync(
+ resolve(ROOT, 'skills/workflow-observe/SKILL.md'),
+ );
+ expect(readme.includes('`workflow-observe`')).toBe(exists);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // No legacy vocabulary in scenario skills
+ // -----------------------------------------------------------------------
+ describe('legacy vocabulary absent from scenario skills', () => {
+ const LEGACY_STAGES = [
+ 'workflow-design',
+ 'workflow-stress',
+ 'workflow-verify',
+ ] as const;
+
+ it('workflow-approval contains no legacy stage names', () => {
+ const skill = read('skills/workflow-approval/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-webhook contains no legacy stage names', () => {
+ const skill = read('skills/workflow-webhook/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-saga contains no legacy stage names', () => {
+ const skill = read('skills/workflow-saga/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-timeout contains no legacy stage names', () => {
+ const skill = read('skills/workflow-timeout/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-idempotency contains no legacy stage names', () => {
+ const skill = read('skills/workflow-idempotency/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-observe contains no legacy stage names', () => {
+ const skill = read('skills/workflow-observe/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+ });
+});
diff --git a/workbench/vitest/test/workflow-skill-bundle-parity.test.ts b/workbench/vitest/test/workflow-skill-bundle-parity.test.ts
new file mode 100644
index 0000000000..7cf9bcd6eb
--- /dev/null
+++ b/workbench/vitest/test/workflow-skill-bundle-parity.test.ts
@@ -0,0 +1,265 @@
+import { execSync } from 'node:child_process';
+import { existsSync, readFileSync } from 'node:fs';
+import { resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..', '..', '..');
+
+function read(relativePath: string): string {
+ return readFileSync(resolve(ROOT, relativePath), 'utf-8');
+}
+
+interface SkillSurface {
+ core: string[];
+ scenario: string[];
+ optional: string[];
+ discovered: string[];
+ counts: {
+ core: number;
+ scenarios: number;
+ optional: number;
+ skills: number;
+ installDirectories: number;
+ goldensPerProvider: number;
+ providers: number;
+ outputsPerProvider: number;
+ totalOutputs: number;
+ };
+}
+
+interface BuildCheckOutput {
+ ok: boolean;
+ providers: string[];
+ skillSurface: SkillSurface;
+ skills: Array<{
+ name: string;
+ version: string;
+ goldens: number;
+ checksum: string;
+ }>;
+ outputs: Array<{
+ provider: string;
+ skill: string;
+ dest: string;
+ checksum: string;
+ type?: string;
+ }>;
+ totalOutputs: number;
+}
+
+function getBuildPlan(): BuildCheckOutput {
+ const stdout = execSync('node scripts/build-workflow-skills.mjs --check', {
+ cwd: ROOT,
+ encoding: 'utf-8',
+ stdio: ['pipe', 'pipe', 'pipe'],
+ });
+ return JSON.parse(stdout);
+}
+
+let cachedPlan: BuildCheckOutput | undefined;
+function getCachedBuildPlan(): BuildCheckOutput {
+ cachedPlan ??= getBuildPlan();
+ return cachedPlan;
+}
+
+const SCENARIO_SKILLS = getCachedBuildPlan().skillSurface
+ .scenario as readonly string[];
+
+describe('workflow skill bundle parity', () => {
+ it('docs and README mention workflow-init iff the source skill exists', () => {
+ const docs = read('docs/content/docs/getting-started/workflow-skills.mdx');
+ const readme = read('skills/README.md');
+ const initExists = existsSync(
+ resolve(ROOT, 'skills/workflow-init/SKILL.md')
+ );
+
+ console.log(
+ JSON.stringify({
+ event: 'bundle_parity_check',
+ skill: 'workflow-init',
+ skillFileExists: initExists,
+ docsContains: docs.includes('`workflow-init`'),
+ readmeContains: readme.includes('`workflow-init`'),
+ })
+ );
+
+ expect(docs.includes('`workflow-init`')).toBe(initExists);
+ expect(readme.includes('`workflow-init`')).toBe(initExists);
+ });
+
+ it('core skills all have SKILL.md files', () => {
+ const coreSkills = [
+ 'workflow',
+ 'workflow-teach',
+ 'workflow-build',
+ ] as const;
+
+ for (const skill of coreSkills) {
+ const skillPath = resolve(ROOT, `skills/${skill}/SKILL.md`);
+ const exists = existsSync(skillPath);
+
+ console.log(
+ JSON.stringify({
+ event: 'core_skill_check',
+ skill,
+ exists,
+ })
+ );
+
+ expect(exists, `skills/${skill}/SKILL.md must exist`).toBe(true);
+ }
+ });
+
+ it('scenario skills have SKILL.md files when referenced in docs', () => {
+ const scenarioSkills = SCENARIO_SKILLS;
+ const docs = read('docs/content/docs/getting-started/workflow-skills.mdx');
+
+ for (const skill of scenarioSkills) {
+ const skillPath = resolve(ROOT, `skills/${skill}/SKILL.md`);
+ const exists = existsSync(skillPath);
+ const mentioned = docs.includes(`\`/${skill}\``);
+
+ console.log(
+ JSON.stringify({
+ event: 'scenario_skill_check',
+ skill,
+ exists,
+ mentionedInDocs: mentioned,
+ })
+ );
+
+ // If mentioned in docs, must exist
+ if (mentioned) {
+ expect(
+ exists,
+ `skills/${skill}/SKILL.md must exist when referenced in docs`
+ ).toBe(true);
+ }
+ }
+ });
+
+ // ---------------------------------------------------------------------------
+ // Provider bundle parity: build plan includes scenario skills for all providers
+ // ---------------------------------------------------------------------------
+ describe('provider bundle includes scenario skills', () => {
+ // Uses the shared SCENARIO_SKILLS constant defined at module scope
+
+ it('build --check succeeds', () => {
+ const plan = getCachedBuildPlan();
+
+ console.log(
+ JSON.stringify({
+ event: 'build_check_result',
+ ok: plan.ok,
+ providers: plan.providers,
+ totalOutputs: plan.totalOutputs,
+ })
+ );
+
+ expect(plan.ok).toBe(true);
+ });
+
+ it('build check exposes a self-describing skill surface', () => {
+ const plan = getCachedBuildPlan();
+
+ console.log(
+ JSON.stringify({
+ event: 'skill_surface_summary',
+ core: plan.skillSurface.core,
+ scenario: plan.skillSurface.scenario,
+ optional: plan.skillSurface.optional,
+ counts: plan.skillSurface.counts,
+ })
+ );
+
+ expect(plan.skillSurface.core).toEqual([
+ 'workflow',
+ 'workflow-teach',
+ 'workflow-build',
+ ]);
+ expect(plan.skillSurface.scenario).toContain('workflow-observe');
+ expect(plan.skillSurface.counts.totalOutputs).toBe(plan.totalOutputs);
+ });
+
+ it('build plan lists all currently supported providers', () => {
+ const plan = getCachedBuildPlan();
+ expect(plan.providers).toContain('claude-code');
+ expect(plan.providers).toContain('cursor');
+ expect(plan.providers.length).toBeGreaterThanOrEqual(2);
+ });
+
+ it('every provider bundle includes every scenario skill', () => {
+ const plan = getCachedBuildPlan();
+
+ for (const provider of plan.providers) {
+ const providerSkills = plan.outputs
+ .filter((o) => o.provider === provider && !o.type)
+ .map((o) => o.skill);
+
+ for (const scenario of SCENARIO_SKILLS) {
+ console.log(
+ JSON.stringify({
+ event: 'provider_scenario_parity',
+ provider,
+ scenario,
+ included: providerSkills.includes(scenario),
+ })
+ );
+
+ expect(
+ providerSkills,
+ `provider "${provider}" must include skill "${scenario}"`
+ ).toContain(scenario);
+ }
+ }
+ });
+
+ it('every provider bundle includes scenario goldens', () => {
+ const plan = getCachedBuildPlan();
+
+ for (const provider of plan.providers) {
+ const providerGoldens = plan.outputs
+ .filter((o) => o.provider === provider && o.type === 'golden')
+ .map((o) => o.skill);
+
+ for (const scenario of SCENARIO_SKILLS) {
+ console.log(
+ JSON.stringify({
+ event: 'provider_golden_parity',
+ provider,
+ scenario,
+ included: providerGoldens.includes(scenario),
+ })
+ );
+
+ expect(
+ providerGoldens,
+ `provider "${provider}" must include goldens for "${scenario}"`
+ ).toContain(scenario);
+ }
+ }
+ });
+
+ it('scenario skills in build plan match source skills directory', () => {
+ const plan = getCachedBuildPlan();
+ const planSkillNames = plan.skills.map((s) => s.name);
+
+ for (const scenario of SCENARIO_SKILLS) {
+ const sourceExists = existsSync(
+ resolve(ROOT, `skills/${scenario}/SKILL.md`)
+ );
+
+ console.log(
+ JSON.stringify({
+ event: 'source_plan_parity',
+ scenario,
+ sourceExists,
+ inPlan: planSkillNames.includes(scenario),
+ })
+ );
+
+ expect(planSkillNames.includes(scenario)).toBe(sourceExists);
+ }
+ });
+ });
+});
diff --git a/workbench/vitest/test/workflow-skill-validator-aggregation.test.ts b/workbench/vitest/test/workflow-skill-validator-aggregation.test.ts
new file mode 100644
index 0000000000..68c04781ca
--- /dev/null
+++ b/workbench/vitest/test/workflow-skill-validator-aggregation.test.ts
@@ -0,0 +1,133 @@
+import { readFileSync } from 'node:fs';
+import { resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..', '..', '..');
+
+function read(relativePath: string): string {
+ return readFileSync(resolve(ROOT, relativePath), 'utf-8');
+}
+
+/**
+ * Guard: every scenario rule-set array defined in workflow-skill-checks.mjs
+ * must be spread into the exported `checks` or `allGoldenChecks` aggregates.
+ * If someone adds a new array but forgets to wire it into the aggregates,
+ * validation silently skips those rules. This test makes that a hard failure.
+ */
+describe('workflow skill validator aggregation', () => {
+ const checksSource = read('scripts/lib/workflow-skill-checks.mjs');
+ const validatorSource = read('scripts/validate-workflow-skill-files.mjs');
+
+ const SCENARIO_SKILL_CHECK_ARRAYS = [
+ 'sagaChecks',
+ 'timeoutChecks',
+ 'idempotencyChecks',
+ 'observeChecks',
+ ] as const;
+
+ const SCENARIO_GOLDEN_CHECK_ARRAYS = [
+ 'sagaGoldenChecks',
+ 'timeoutGoldenChecks',
+ 'idempotencyGoldenChecks',
+ 'observeGoldenChecks',
+ ] as const;
+
+ describe('scenario skill rule arrays are exported', () => {
+ for (const symbol of SCENARIO_SKILL_CHECK_ARRAYS) {
+ it(`exports ${symbol}`, () => {
+ expect(checksSource).toContain(`export const ${symbol} = [`);
+ console.log(
+ JSON.stringify({
+ event: 'aggregation_check',
+ symbol,
+ exported: true,
+ })
+ );
+ });
+ }
+ });
+
+ describe('scenario golden rule arrays are exported', () => {
+ for (const symbol of SCENARIO_GOLDEN_CHECK_ARRAYS) {
+ it(`exports ${symbol}`, () => {
+ expect(checksSource).toContain(`export const ${symbol} = [`);
+ console.log(
+ JSON.stringify({
+ event: 'aggregation_check',
+ symbol,
+ exported: true,
+ })
+ );
+ });
+ }
+ });
+
+ describe('scenario skill rule arrays are spread into `checks` aggregate', () => {
+ for (const symbol of SCENARIO_SKILL_CHECK_ARRAYS) {
+ it(`spreads ...${symbol} into checks`, () => {
+ expect(checksSource).toMatch(
+ new RegExp(`export const checks\\s*=\\s*\\[[^\\]]*\\.\\.\\.${symbol}`)
+ );
+ console.log(
+ JSON.stringify({
+ event: 'aggregation_spread',
+ symbol,
+ aggregate: 'checks',
+ present: true,
+ })
+ );
+ });
+ }
+ });
+
+ describe('scenario golden rule arrays are spread into `allGoldenChecks` aggregate', () => {
+ for (const symbol of SCENARIO_GOLDEN_CHECK_ARRAYS) {
+ it(`spreads ...${symbol} into allGoldenChecks`, () => {
+ expect(checksSource).toMatch(
+ new RegExp(
+ `export const allGoldenChecks\\s*=\\s*\\[[^\\]]*\\.\\.\\.${symbol}`
+ )
+ );
+ console.log(
+ JSON.stringify({
+ event: 'aggregation_spread',
+ symbol,
+ aggregate: 'allGoldenChecks',
+ present: true,
+ })
+ );
+ });
+ }
+ });
+
+ describe('validator script consumes exported aggregates', () => {
+ it('imports checks and allGoldenChecks from workflow-skill-checks', () => {
+ expect(validatorSource).toMatch(
+ /import\s*\{[^}]*checks[^}]*\}\s*from\s*['"]\.\/lib\/workflow-skill-checks\.mjs['"]/
+ );
+ expect(validatorSource).toMatch(
+ /import\s*\{[^}]*allGoldenChecks[^}]*\}\s*from\s*['"]\.\/lib\/workflow-skill-checks\.mjs['"]/
+ );
+ console.log(
+ JSON.stringify({
+ event: 'validator_import_check',
+ imports: ['checks', 'allGoldenChecks'],
+ present: true,
+ })
+ );
+ });
+
+ it('combines checks and allGoldenChecks into allChecks', () => {
+ expect(validatorSource).toContain(
+ 'const allChecks = [...checks, ...allGoldenChecks];'
+ );
+ console.log(
+ JSON.stringify({
+ event: 'validator_combination_check',
+ pattern: '[...checks, ...allGoldenChecks]',
+ present: true,
+ })
+ );
+ });
+ });
+});
diff --git a/workbench/vitest/test/workflow-skill-verification-summary-contract.test.ts b/workbench/vitest/test/workflow-skill-verification-summary-contract.test.ts
new file mode 100644
index 0000000000..37935c4680
--- /dev/null
+++ b/workbench/vitest/test/workflow-skill-verification-summary-contract.test.ts
@@ -0,0 +1,127 @@
+import { existsSync, readdirSync, readFileSync } from 'node:fs';
+import { join, resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..', '..', '..');
+const SKILLS_DIR = resolve(ROOT, 'skills');
+
+function read(relativePath: string): string {
+ return readFileSync(resolve(ROOT, relativePath), 'utf8');
+}
+
+function extractSection(text: string, heading: string): string | null {
+ const lines = text.split('\n');
+ const start = lines.findIndex((line) => {
+ const trimmed = line.trim();
+ return trimmed === `## ${heading}` || trimmed === `### ${heading}`;
+ });
+ if (start === -1) return null;
+
+ const targetLevel = lines[start].trim().match(/^(#{2,6})\s/)?.[1].length ?? 2;
+ let end = lines.length;
+ for (let i = start + 1; i < lines.length; i += 1) {
+ const match = lines[i].trim().match(/^(#{2,6})\s/);
+ if (match && match[1].length <= targetLevel) {
+ end = i;
+ break;
+ }
+ }
+ return lines
+ .slice(start + 1, end)
+ .join('\n')
+ .trim();
+}
+
+function extractCodeFence(
+ sectionText: string,
+ language: string
+): string | null {
+ const lines = sectionText.split('\n');
+ const startFence = '```' + language;
+ const start = lines.findIndex((line) => line.trim() === startFence);
+ if (start === -1) return null;
+
+ const end = lines.findIndex(
+ (line, index) => index > start && line.trim() === '```'
+ );
+ if (end === -1) return null;
+
+ return lines.slice(start + 1, end).join('\n');
+}
+
+function extractVerificationSummary(sectionText: string): {
+ event: string;
+ blueprintName: string;
+ fileCount: number;
+ testCount: number;
+ runtimeCommandCount: number;
+ contractVersion: string;
+} | null {
+ const line = sectionText
+ .split('\n')
+ .map((value) => value.trim())
+ .find((value) => value.startsWith('{"event":"verification_plan_ready"'));
+ return line ? JSON.parse(line) : null;
+}
+
+function discoverGoldenFiles(): string[] {
+ return readdirSync(SKILLS_DIR, { withFileTypes: true })
+ .filter((entry) => entry.isDirectory())
+ .flatMap((entry) => {
+ const goldensDir = join(SKILLS_DIR, entry.name, 'goldens');
+ if (!existsSync(goldensDir)) return [];
+ return readdirSync(goldensDir)
+ .filter((file) => file.endsWith('.md'))
+ .map((file) => `skills/${entry.name}/goldens/${file}`);
+ });
+}
+
+describe('workflow golden verification summary contract', () => {
+ for (const goldenPath of discoverGoldenFiles()) {
+ const text = read(goldenPath);
+ if (!text.includes('## Verification Artifact')) continue;
+
+ it(`${goldenPath} keeps summary counts aligned with the artifact`, () => {
+ const artifactSection = extractSection(text, 'Verification Artifact');
+ expect(
+ artifactSection,
+ 'verification artifact section must exist'
+ ).toBeTruthy();
+
+ const artifactJson = extractCodeFence(artifactSection!, 'json');
+ expect(
+ artifactJson,
+ 'verification artifact must contain json'
+ ).toBeTruthy();
+
+ const artifact = JSON.parse(artifactJson!) as {
+ contractVersion: string;
+ blueprintName: string;
+ files: Array<{ kind: string; path: string }>;
+ runtimeCommands: Array<{
+ name: string;
+ command: string;
+ expects: string;
+ }>;
+ };
+
+ const summarySection = extractSection(text, 'Verification Summary');
+ expect(
+ summarySection,
+ 'verification summary section must exist'
+ ).toBeTruthy();
+
+ const summary = extractVerificationSummary(summarySection!);
+ expect(summary, 'verification summary json line must exist').toBeTruthy();
+
+ expect(summary).toEqual({
+ event: 'verification_plan_ready',
+ blueprintName: artifact.blueprintName,
+ fileCount: artifact.files.length,
+ testCount: artifact.files.filter((file) => file.kind === 'test').length,
+ runtimeCommandCount: artifact.runtimeCommands.length,
+ contractVersion: artifact.contractVersion,
+ });
+ });
+ }
+});
diff --git a/workbench/vitest/test/workflow-skills-docs-contract.test.ts b/workbench/vitest/test/workflow-skills-docs-contract.test.ts
new file mode 100644
index 0000000000..f2e6318f6d
--- /dev/null
+++ b/workbench/vitest/test/workflow-skills-docs-contract.test.ts
@@ -0,0 +1,592 @@
+import { execSync } from 'node:child_process';
+import { existsSync, readFileSync } from 'node:fs';
+import { resolve } from 'node:path';
+import { describe, expect, it } from 'vitest';
+
+const ROOT = resolve(import.meta.dirname, '..', '..', '..');
+
+function read(relativePath: string): string {
+ return readFileSync(resolve(ROOT, relativePath), 'utf-8');
+}
+
+interface SkillSurface {
+ core: string[];
+ scenario: string[];
+ optional: string[];
+ discovered: string[];
+ counts: {
+ core: number;
+ scenarios: number;
+ optional: number;
+ skills: number;
+ installDirectories: number;
+ goldensPerProvider: number;
+ providers: number;
+ outputsPerProvider: number;
+ totalOutputs: number;
+ };
+}
+
+interface BuildCheckOutput {
+ ok: boolean;
+ providers: string[];
+ skillSurface: SkillSurface;
+ skills: Array<{
+ name: string;
+ version: string;
+ goldens: number;
+ checksum: string;
+ }>;
+ outputs: Array<{
+ provider: string;
+ skill: string;
+ dest: string;
+ checksum: string;
+ type?: string;
+ }>;
+ totalOutputs: number;
+}
+
+function getBuildPlan(): BuildCheckOutput {
+ const stdout = execSync('node scripts/build-workflow-skills.mjs --check', {
+ cwd: ROOT,
+ encoding: 'utf-8',
+ stdio: ['pipe', 'pipe', 'pipe'],
+ });
+ return JSON.parse(stdout);
+}
+
+let cachedPlan: BuildCheckOutput | undefined;
+function getCachedBuildPlan(): BuildCheckOutput {
+ cachedPlan ??= getBuildPlan();
+ return cachedPlan;
+}
+
+// ---------------------------------------------------------------------------
+// Legacy vocabulary that must never reappear in shipped docs or skills
+// ---------------------------------------------------------------------------
+const LEGACY_STAGES = [
+ 'workflow-design',
+ 'workflow-stress',
+ 'workflow-verify',
+] as const;
+
+describe('workflow skills docs contract surfaces', () => {
+ // -----------------------------------------------------------------------
+ // Canonical two-stage loop
+ // -----------------------------------------------------------------------
+ describe('canonical loop: workflow-teach then workflow-build', () => {
+ it('getting-started doc describes a two-stage teach-then-build loop', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('two-stage');
+ expect(docs).toContain('/workflow-teach');
+ expect(docs).toContain('/workflow-build');
+ expect(docs).toContain(
+ 'The `workflow` skill is an always-on API reference'
+ );
+ });
+
+ it('skills README describes the same two-skill workflow', () => {
+ const readme = read('skills/README.md');
+ expect(readme).toContain('Two-skill workflow');
+ expect(readme).toContain('`workflow-teach`');
+ expect(readme).toContain('`workflow-build`');
+ expect(readme).toContain(
+ '`workflow` skill is an always-on API reference'
+ );
+ });
+
+ it('getting-started stage table lists teach as Stage 1 and build as Stage 2', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ // Table row: | 1 | `/workflow-teach` | ...
+ expect(docs).toMatch(/\|\s*1\s*\|.*workflow-teach/);
+ // Table row: | 2 | `/workflow-build` | ...
+ expect(docs).toMatch(/\|\s*2\s*\|.*workflow-build/);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Core surface: workflow, workflow-teach, workflow-build
+ // -----------------------------------------------------------------------
+ describe('core surface is explicitly named', () => {
+ it('getting-started doc names the three core skill directories', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('`workflow`');
+ expect(docs).toContain('`workflow-teach`');
+ expect(docs).toContain('`workflow-build`');
+ });
+
+ it('skills README lists the three core skills under a core heading', () => {
+ const readme = read('skills/README.md');
+ expect(readme).toContain('Core surface');
+ expect(readme).toContain('`workflow`');
+ expect(readme).toContain('`workflow-teach`');
+ expect(readme).toContain('`workflow-build`');
+ });
+
+ it('getting-started doc describes workflow-init as optional helper', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toMatch(/optional.*workflow-init/is);
+ });
+
+ it('skills README lists workflow-init under optional helpers', () => {
+ const readme = read('skills/README.md');
+ expect(readme).toContain('Optional helpers');
+ expect(readme).toContain('`workflow-init`');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Legacy stage vocabulary must not appear
+ // -----------------------------------------------------------------------
+ describe('legacy stage vocabulary is absent', () => {
+ it('getting-started doc contains no legacy stage names', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ for (const legacy of LEGACY_STAGES) {
+ expect(docs).not.toContain(legacy);
+ }
+ });
+
+ it('skills README contains no legacy stage names', () => {
+ const readme = read('skills/README.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(readme).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-teach skill contains no legacy stage names', () => {
+ const skill = read('skills/workflow-teach/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+
+ it('workflow-build skill contains no legacy stage names', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(skill).not.toContain(legacy);
+ }
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Artifact ownership: .workflow.md (skill-managed) vs .workflow-skills/*.json (host-managed)
+ // -----------------------------------------------------------------------
+ describe('artifact ownership model', () => {
+ it('docs describe .workflow.md as skill-managed', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('Skill-managed');
+ expect(docs).toContain('.workflow.md');
+ // workflow-teach writes .workflow.md
+ expect(docs).toMatch(/Written.*by.*`workflow-teach`/s);
+ });
+
+ it('docs describe .workflow-skills/*.json as host-managed', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('Host-managed');
+ expect(docs).toContain('.workflow-skills/context.json');
+ expect(docs).toContain('.workflow-skills/blueprints/.json');
+ expect(docs).toContain('.workflow-skills/verification/.json');
+ // Must explain host ownership — skill prompts don't reference JSON paths
+ expect(docs).toContain(
+ 'managed by the host runtime or persistence layer'
+ );
+ });
+
+ it('README distinguishes skill-managed from host-managed artifacts', () => {
+ const readme = read('skills/README.md');
+ expect(readme).toContain('Skill-managed');
+ expect(readme).toContain('Host-managed');
+ expect(readme).toContain('.workflow.md');
+ expect(readme).toContain('.workflow-skills/*.json');
+ });
+
+ it('workflow-teach skill references .workflow.md but not JSON artifact paths', () => {
+ const skill = read('skills/workflow-teach/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+
+ it('workflow-build skill references .workflow.md but not JSON artifact paths', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ expect(skill).toContain('.workflow.md');
+ expect(skill).not.toContain('.workflow-skills/context.json');
+ expect(skill).not.toContain('.workflow-skills/blueprints');
+ });
+
+ it('docs explain that .workflow.md is written by the assistant flow', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toMatch(/\.workflow\.md.*written.*directly/is);
+ });
+
+ it('docs explain that .workflow-skills/*.json are host-managed', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('not by the skill prompts');
+ });
+
+ it('README explains that .workflow-skills/*.json are host-managed', () => {
+ const readme = read('skills/README.md');
+ expect(readme).toMatch(/not\s+by\s+the\s+skill\s+prompts/);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Legacy artifact ownership regression guards
+ // -----------------------------------------------------------------------
+ describe('legacy artifact ownership regression', () => {
+ it('getting-started doc no longer uses the legacy artifact ownership layout', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ // The legacy table used "Written By" as a column header
+ expect(docs).not.toContain('| Artifact | Path | Written By |');
+ // Legacy docs described JSON paths as individual sections owned by skills
+ expect(docs).not.toContain('### `.workflow-skills/context.json`');
+ expect(docs).not.toContain(
+ '### `.workflow-skills/blueprints/.json`'
+ );
+ expect(docs).not.toContain(
+ '### `.workflow-skills/verification/.json`'
+ );
+ });
+
+ it('getting-started doc explicitly says host-managed JSON paths are not referenced by skill text', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain(
+ 'The skill text never references these JSON paths directly'
+ );
+ expect(docs).toContain(
+ 'managed by the host runtime or persistence layer'
+ );
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Integration test path convention
+ // -----------------------------------------------------------------------
+ describe('integration test path convention', () => {
+ it('workflow-build uses one integration-test path convention', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ expect(skill).toContain('workflows/.integration.test.ts');
+ expect(skill).not.toContain('__tests__/.test.ts');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Verification schema: testMatrix field
+ // -----------------------------------------------------------------------
+ describe('verification schema completeness', () => {
+ it('getting-started verification example includes a testMatrix field', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('"testMatrix"');
+ });
+
+ it('workflow-build verification artifact includes a testMatrix field', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ expect(skill).toContain('"testMatrix"');
+ });
+
+ it('workflow-build Phase 4 lists optional route file', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ expect(skill).toContain('app/api//route.ts');
+ expect(skill).toContain('Optional route file');
+ });
+
+ it('files-array sentinel sentence appears in both skill and docs', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const sentinel =
+ 'The `files` array must list only files that are actually produced.';
+ expect(skill).toContain(sentinel);
+ expect(docs).toContain(sentinel);
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Verification summary contract (workflow-build)
+ // -----------------------------------------------------------------------
+ describe('verification summary contract', () => {
+ it('workflow-build skill requires a machine-parseable verification summary', () => {
+ const skill = read('skills/workflow-build/SKILL.md');
+ expect(skill).toContain('verification_plan_ready');
+ expect(skill).toContain('blueprintName');
+ expect(skill).toContain('fileCount');
+ expect(skill).toContain('testCount');
+ expect(skill).toContain('runtimeCommandCount');
+ expect(skill).toContain('contractVersion');
+ });
+
+ it('workflow-build golden demonstrates verification summary format', () => {
+ const golden = read('skills/workflow-build/goldens/compensation-saga.md');
+ expect(golden).toContain('## Verification Artifact');
+ expect(golden).toContain('### Verification Summary');
+ expect(golden).toContain('"event":"verification_plan_ready"');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Installed skill count
+ // -----------------------------------------------------------------------
+ describe('installed skill count', () => {
+ it('getting-started doc reports the correct installed skill count', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const plan = getCachedBuildPlan();
+ expect(docs).toContain(
+ `After copying, you should see ${plan.skillSurface.counts.installDirectories} skill directories:`
+ );
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Scenario surface: all six scenario skills in docs, install, and README
+ // -----------------------------------------------------------------------
+ describe('scenario surface is explicit', () => {
+ it('getting-started doc lists every current scenario command from the build plan', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const plan = getCachedBuildPlan();
+
+ console.log(
+ JSON.stringify({
+ event: 'docs_expected_surface',
+ scenarios: plan.skillSurface.scenario,
+ installDirectories: plan.skillSurface.counts.installDirectories,
+ totalOutputs: plan.totalOutputs,
+ })
+ );
+
+ for (const skill of plan.skillSurface.scenario) {
+ expect(docs).toContain(`/${skill}`);
+ }
+ });
+
+ it('install section reports the current install-directory count', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const plan = getCachedBuildPlan();
+ expect(docs).toContain(
+ `After copying, you should see ${plan.skillSurface.counts.installDirectories} skill directories:`
+ );
+ });
+
+ it('build-output example matches the live build plan', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const plan = getCachedBuildPlan();
+ expect(docs).toMatch(
+ new RegExp(`"totalOutputs"\\s*:\\s*${plan.totalOutputs}`)
+ );
+ expect(docs).toMatch(
+ new RegExp(
+ `"count"\\s*:\\s*${plan.skillSurface.counts.installDirectories}`
+ )
+ );
+ });
+
+ it('README lists every scenario entrypoint and golden family', () => {
+ const readme = read('skills/README.md');
+ const plan = getCachedBuildPlan();
+ for (const skill of plan.skillSurface.scenario) {
+ expect(readme).toContain(`\`${skill}\``);
+ }
+ expect(readme).toContain('### `workflow-saga/goldens/`');
+ expect(readme).toContain('### `workflow-timeout/goldens/`');
+ expect(readme).toContain('### `workflow-idempotency/goldens/`');
+ expect(readme).toContain('### `workflow-observe/goldens/`');
+ });
+
+ it('sample build output numbers are internally consistent', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const plan = getCachedBuildPlan();
+ // The "totalOutputs" in the manifest summary and the plan_computed event must match
+ const manifestMatch = docs.match(/"totalOutputs":\s*(\d+)/g);
+ expect(manifestMatch).not.toBeNull();
+ const values = manifestMatch!.map((m) => m.match(/\d+/)![0]);
+ // All totalOutputs references should be the same number
+ expect(new Set(values).size).toBe(1);
+ // The skills_discovered count should match the live build plan
+ expect(docs).toContain(
+ `"count":${plan.skillSurface.counts.installDirectories}`
+ );
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Validator inspection guidance
+ // -----------------------------------------------------------------------
+ describe('validator inspection guidance', () => {
+ it('getting-started doc includes Inspect Validation Output section', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('## Inspect Validation Output');
+ });
+
+ it('validator inspection shows stdout as machine-readable result', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('machine-readable result');
+ expect(docs).toContain('workflow-skills-validate.json');
+ });
+
+ it('validator inspection shows stderr as JSONL logs', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('JSON logs on stderr');
+ expect(docs).toContain('workflow-skills-validate.log');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Docs show full verification runtime command set
+ // -----------------------------------------------------------------------
+ it('docs show the full verification runtime command set', () => {
+ const docs = read('docs/content/docs/getting-started/workflow-skills.mdx');
+ expect(docs).toContain('"name": "typecheck"');
+ expect(docs).toContain('"name": "test"');
+ expect(docs).toContain('"name": "focused-workflow-test"');
+ });
+
+ // -----------------------------------------------------------------------
+ // Six-phase build flow
+ // -----------------------------------------------------------------------
+ describe('six-phase build flow', () => {
+ it('getting-started doc describes six interactive phases', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('six interactive phases');
+ expect(docs).not.toContain('five interactive phases');
+ });
+
+ it('getting-started doc includes Phase 6 verification summary', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain('Verification summary');
+ expect(docs).toContain('verification_plan_ready');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Package README parity with teach→build vocabulary
+ // -----------------------------------------------------------------------
+ describe('package README parity', () => {
+ it('package README describes teach-then-build two-stage loop', () => {
+ const readme = read('packages/workflow/README.md');
+ expect(readme).toContain('two-stage loop');
+ expect(readme).toContain('teach');
+ expect(readme).toContain('build');
+ });
+
+ it('package README contains no legacy four-stage vocabulary', () => {
+ const readme = read('packages/workflow/README.md');
+ for (const legacy of LEGACY_STAGES) {
+ expect(readme).not.toContain(legacy);
+ }
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Compensation-saga golden includes testMatrix
+ // -----------------------------------------------------------------------
+ describe('golden verification artifact schema', () => {
+ it('compensation-saga golden includes testMatrix field', () => {
+ const golden = read('skills/workflow-build/goldens/compensation-saga.md');
+ expect(golden).toContain('"testMatrix"');
+ });
+ });
+
+ // -----------------------------------------------------------------------
+ // Scenario skill parity: docs, README, source files, and goldens
+ // -----------------------------------------------------------------------
+ describe('scenario skill parity', () => {
+ it('docs and README list every user-invocable scenario skill', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ const readme = read('skills/README.md');
+ for (const skill of [
+ 'workflow-approval',
+ 'workflow-webhook',
+ 'workflow-saga',
+ 'workflow-timeout',
+ 'workflow-idempotency',
+ 'workflow-observe',
+ ]) {
+ expect(docs).toContain(`\`/${skill}\``);
+ expect(readme).toContain(`\`${skill}\``);
+ }
+ });
+
+ it('every documented scenario skill has a source file and golden', () => {
+ for (const [skill, golden] of [
+ ['workflow-approval', 'approval-expiry-escalation.md'],
+ ['workflow-webhook', 'duplicate-webhook-order.md'],
+ ['workflow-saga', 'compensation-saga.md'],
+ ['workflow-timeout', 'approval-timeout-streaming.md'],
+ ['workflow-idempotency', 'duplicate-webhook-order.md'],
+ ['workflow-observe', 'operator-observability-streams.md'],
+ ]) {
+ expect(existsSync(resolve(ROOT, `skills/${skill}/SKILL.md`))).toBe(
+ true
+ );
+ expect(
+ existsSync(resolve(ROOT, `skills/${skill}/goldens/${golden}`))
+ ).toBe(true);
+ }
+ });
+
+ it('docs include sample prompts for scenario commands', () => {
+ const docs = read(
+ 'docs/content/docs/getting-started/workflow-skills.mdx'
+ );
+ expect(docs).toContain(
+ '/workflow-saga reserve inventory, charge payment, compensate on shipping failure'
+ );
+ expect(docs).toContain(
+ '/workflow-timeout wait 24h for approval, then expire'
+ );
+ expect(docs).toContain(
+ '/workflow-idempotency make duplicate webhook delivery safe'
+ );
+ expect(docs).toContain(
+ '/workflow-observe stream operator progress and final status'
+ );
+ });
+ });
+});