diff --git a/.impeccable.md b/.impeccable.md
new file mode 100644
index 0000000000..509c9c081e
--- /dev/null
+++ b/.impeccable.md
@@ -0,0 +1,33 @@
+## Design Context
+
+### Users
+Broad audience spanning Vercel customers adding durable workflows, general JS/TS developers evaluating workflow solutions, enterprise teams building production systems, and AI agent builders who need suspend/resume semantics. They arrive with a task in mind — integrating workflows into an existing app or understanding a specific pattern — and want to get back to coding quickly.
+
+### Brand Personality
+**Technical, reliable, clean.** The site should project engineering rigor and visual clarity — a Vercel-grade quality bar. Confidence comes from precision, not decoration.
+
+### Emotional Goal
+**Confidence & clarity.** A visitor should immediately feel "I can trust this and understand it quickly." The interface should reduce cognitive load, not add to it.
+
+### Aesthetic Direction
+- **Visual tone**: Minimal, precise, high-contrast. Geist typography carries the hierarchy.
+- **References**: Vercel docs — the quality bar and feel to match.
+- **Anti-references**: No playful/gamified elements (mascots, excessive gradients, startup energy). No dense enterprise wiki walls of text.
+- **Theme**: Light and dark mode, pure white/black backgrounds, OKLch color system for perceptual uniformity. The primary blue (`oklch(57.61% 0.2508 258.23)`) is the only chromatic accent outside data visualization.
+
+### Design Principles
+
+1. **Content-first** — Every element earns its place by serving the reader. Remove decoration that doesn't clarify.
+2. **Scannable hierarchy** — Use Geist font weights, spacing, and muted foregrounds so developers can skim to the answer. Dense text means something is missing structure, not that it needs illustration.
+3. **System consistency** — Use shadcn/ui (New York) components and CSS variable tokens everywhere. Custom one-offs signal a gap in the system, not a design opportunity.
+4. **Accessible by default** — Follow WCAG AA, respect `prefers-reduced-motion`, lean on Radix primitives for keyboard/screen-reader support. The defaults should be correct.
+5. **Dark mode parity** — Both themes are first-class. Design in both, not one then the other. OKLch tokens and transparent borders keep contrast ratios stable across themes.
+
+### Technical Stack (Design-Relevant)
+- **Fonts**: Geist Sans + Geist Mono (variable weight, loaded as `--font-sans` / `--font-mono`)
+- **Colors**: OKLch CSS custom properties, switched via `.dark` class (next-themes)
+- **Components**: shadcn/ui New York style, CVA variants, Radix primitives
+- **Layout**: Tailwind CSS v4, container queries for responsive components
+- **Docs framework**: Fumadocs with shadcn theme preset
+- **Animation**: Motion (Framer), used sparingly — entrance transitions only
+- **Icons**: Lucide React
diff --git a/docs/app/[lang]/cookbook/[[...slug]]/page.tsx b/docs/app/[lang]/cookbook/[[...slug]]/page.tsx
new file mode 100644
index 0000000000..bd7b7267c2
--- /dev/null
+++ b/docs/app/[lang]/cookbook/[[...slug]]/page.tsx
@@ -0,0 +1,152 @@
+import { Step, Steps } from 'fumadocs-ui/components/steps';
+import { Tab, Tabs } from 'fumadocs-ui/components/tabs';
+import { createRelativeLink } from 'fumadocs-ui/mdx';
+import type { Metadata } from 'next';
+import dynamic from 'next/dynamic';
+import { notFound } from 'next/navigation';
+import type { ComponentProps } from 'react';
+import {
+ rewriteCookbookUrl,
+ rewriteCookbookUrlsInText,
+} from '@/lib/geistdocs/cookbook-source';
+import { AskAI } from '@/components/geistdocs/ask-ai';
+import { CopyPage } from '@/components/geistdocs/copy-page';
+import {
+ DocsBody,
+ DocsDescription,
+ DocsPage,
+ DocsTitle,
+} from '@/components/geistdocs/docs-page';
+import { EditSource } from '@/components/geistdocs/edit-source';
+import { Feedback } from '@/components/geistdocs/feedback';
+import { getMDXComponents } from '@/components/geistdocs/mdx-components';
+import { OpenInChat } from '@/components/geistdocs/open-in-chat';
+import { ScrollTop } from '@/components/geistdocs/scroll-top';
+import { Badge } from '@/components/ui/badge';
+import { Separator } from '@/components/ui/separator';
+import { getLLMText, getPageImage, source } from '@/lib/geistdocs/source';
+
+const LazyCookbookExplorer = dynamic(
+ () =>
+ import('@/components/geistdocs/cookbook-explorer').then(
+ (mod) => mod.CookbookExplorer
+ ),
+ {
+ loading: () => (
+
+ Answer a few questions to find the right pattern from{' '}
+ {recipeCount} recipes, or switch to Browse all{' '}
+ if you already know roughly what you want.
+
+ {resultRecipes.length} recipe
+ {resultRecipes.length !== 1 ? 's' : ''} match your path.
+
+
+ {resultRecipes.map((recipe) => (
+
+ ))}
+
+
+
+
+
+ )}
+
+
+ );
+}
diff --git a/docs/content/docs/cookbook/advanced/custom-serialization.mdx b/docs/content/docs/cookbook/advanced/custom-serialization.mdx
new file mode 100644
index 0000000000..b39e619b44
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/custom-serialization.mdx
@@ -0,0 +1,255 @@
+---
+title: Custom Serialization
+description: Make class instances serializable across workflow boundaries using the WORKFLOW_SERIALIZE and WORKFLOW_DESERIALIZE symbol protocol.
+type: guide
+summary: Implement the serde symbol protocol on classes so instances survive serialization when passed between workflow and step functions, and register them in the global class registry.
+---
+
+
+This is an advanced guide. It dives into workflow internals and is not required reading to use workflow.
+
+
+## The Problem
+
+Workflow functions run inside a sandboxed VM. Every value that crosses a function boundary — step arguments, step return values, workflow inputs — must be serializable. Plain objects, strings, and numbers work automatically, but **class instances** lose their prototype chain and methods during serialization.
+
+```typescript lineNumbers
+class StorageClient {
+ constructor(private region: string) {}
+
+ async upload(key: string, body: Uint8Array) {
+ // ... uses this.region internally
+ }
+}
+
+export async function processFile(client: StorageClient) {
+ "use workflow";
+
+ // client is no longer a StorageClient here — it's a plain object
+ // client.upload() throws: "client.upload is not a function"
+ await uploadStep(client, "output.json", data);
+}
+```
+
+The [step-as-factory pattern](/docs/cookbook/advanced/serializable-steps) solves this by deferring object construction into steps. But sometimes you need the object itself to cross boundaries — for example, when a class instance is passed as a workflow input, returned from a step, or stored in workflow state. That's where custom serialization comes in.
+
+## The WORKFLOW_SERIALIZE / WORKFLOW_DESERIALIZE Protocol
+
+The `@workflow/serde` package exports two symbols that act as a serialization protocol. When the workflow runtime encounters a class instance with these symbols, it knows how to convert it to plain data and back.
+
+{/* @skip-typecheck - @workflow/serde is not mapped in the type-checker */}
+```typescript lineNumbers
+import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from "@workflow/serde";
+
+class Point {
+ constructor(public x: number, public y: number) {}
+
+ distanceTo(other: Point): number {
+ return Math.sqrt((this.x - other.x) ** 2 + (this.y - other.y) ** 2);
+ }
+
+ static [WORKFLOW_SERIALIZE](instance: Point) {
+ return { x: instance.x, y: instance.y };
+ }
+
+ static [WORKFLOW_DESERIALIZE](data: { x: number; y: number }) {
+ return new Point(data.x, data.y);
+ }
+}
+```
+
+Both methods must be **static**. `WORKFLOW_SERIALIZE` receives an instance and returns plain serializable data. `WORKFLOW_DESERIALIZE` receives that same data and reconstructs a new instance.
+
+
+Both serde methods run inside the workflow VM. They must not use Node.js APIs, non-deterministic operations, or network calls. Keep them focused on extracting and reconstructing data.
+
+
+## Automatic Class Registration
+
+For the runtime to deserialize a class, the class must be registered in a global registry with a stable `classId`. The SWC compiler plugin handles this automatically — when it detects a class with both `WORKFLOW_SERIALIZE` and `WORKFLOW_DESERIALIZE` static methods, it generates registration code at build time.
+
+This means you only need to implement the two symbol methods. The compiler assigns a deterministic `classId` based on the file path and class name, and registers it in the global `Symbol.for("workflow-class-registry")` registry.
+
+
+No manual registration is required for classes defined in your workflow files. The SWC plugin detects the serde symbols and generates the registration automatically at build time.
+
+
+### Manual Registration for Library Authors
+
+If you're a library author whose classes are defined **outside** the workflow build pipeline (e.g., in a published npm package), the SWC plugin won't process your code. In that case, you need to register classes manually using the same global registry the runtime uses:
+
+```typescript lineNumbers
+const WORKFLOW_CLASS_REGISTRY = Symbol.for("workflow-class-registry");
+
+function registerSerializableClass(classId: string, cls: Function) {
+ const g = globalThis as any;
+ let registry = g[WORKFLOW_CLASS_REGISTRY] as Map | undefined;
+ if (!registry) {
+ registry = new Map();
+ g[WORKFLOW_CLASS_REGISTRY] = registry;
+ }
+ registry.set(classId, cls);
+ Object.defineProperty(cls, "classId", {
+ value: classId,
+ writable: false,
+ enumerable: false,
+ configurable: false,
+ });
+}
+```
+
+Then call it after your class definition:
+
+{/* @skip-typecheck - references variables from prior code block */}
+```typescript lineNumbers
+registerSerializableClass("WorkflowStorageClient", WorkflowStorageClient);
+```
+
+The `classId` is a string identifier stored alongside the serialized data. When the runtime encounters serialized data tagged with that ID, it looks up the registry to find the class and calls `WORKFLOW_DESERIALIZE`.
+
+## Full Example: A Workflow-Safe Storage Client
+
+Here's a complete example of a storage client class that survives serialization across workflow boundaries. This pattern is useful when you need an object with methods to be passed as a workflow input or returned from a step.
+
+```typescript lineNumbers
+import { WORKFLOW_SERIALIZE, WORKFLOW_DESERIALIZE } from "@workflow/serde";
+
+interface StorageClientOptions {
+ region: string;
+ bucket: string;
+ accessKeyId?: string;
+ secretAccessKey?: string;
+}
+
+export class WorkflowStorageClient {
+ private readonly region: string;
+ private readonly bucket: string;
+ private readonly accessKeyId?: string;
+ private readonly secretAccessKey?: string;
+
+ constructor(options: StorageClientOptions) {
+ this.region = options.region;
+ this.bucket = options.bucket;
+ this.accessKeyId = options.accessKeyId;
+ this.secretAccessKey = options.secretAccessKey;
+ }
+
+ async upload(key: string, body: Uint8Array) {
+ "use step";
+ const { S3Client, PutObjectCommand } = await import("@aws-sdk/client-s3");
+ const client = new S3Client({
+ region: this.region,
+ credentials: this.accessKeyId
+ ? { accessKeyId: this.accessKeyId, secretAccessKey: this.secretAccessKey! }
+ : undefined,
+ });
+ await client.send(
+ new PutObjectCommand({ Bucket: this.bucket, Key: key, Body: body })
+ );
+ }
+
+ async getSignedUrl(key: string): Promise {
+ "use step";
+ const { S3Client, GetObjectCommand } = await import("@aws-sdk/client-s3");
+ const { getSignedUrl } = await import("@aws-sdk/s3-request-presigner");
+ const client = new S3Client({ region: this.region });
+ return getSignedUrl(client, new GetObjectCommand({ Bucket: this.bucket, Key: key }));
+ }
+
+ // --- Serde protocol ---
+
+ static [WORKFLOW_SERIALIZE](instance: WorkflowStorageClient): StorageClientOptions {
+ return {
+ region: instance.region,
+ bucket: instance.bucket,
+ accessKeyId: instance.accessKeyId,
+ secretAccessKey: instance.secretAccessKey,
+ };
+ }
+
+ static [WORKFLOW_DESERIALIZE](
+ this: typeof WorkflowStorageClient,
+ data: StorageClientOptions
+ ): WorkflowStorageClient {
+ return new this(data);
+ }
+}
+```
+
+Now this client can be passed into a workflow and used directly:
+
+```typescript lineNumbers
+import { WorkflowStorageClient } from "./storage-client";
+
+export async function processUpload(
+ client: WorkflowStorageClient,
+ data: Uint8Array
+) {
+ "use workflow";
+
+ // client is a real WorkflowStorageClient with working methods
+ await client.upload("output/result.json", data);
+ const url = await client.getSignedUrl("output/result.json");
+ return { url };
+}
+```
+
+## When to Use Custom Serde vs Step-as-Factory
+
+Both patterns solve the same root problem — non-serializable objects can't cross workflow boundaries — but they work differently and suit different situations.
+
+### Step-as-Factory
+
+The [step-as-factory pattern](/docs/cookbook/advanced/serializable-steps) passes a **factory function** instead of an object. The real object is constructed inside a step at execution time.
+
+```typescript lineNumbers
+// Factory: returns a step function, not an object
+export function createS3Client(region: string) {
+ return async () => {
+ "use step";
+ const { S3Client } = await import("@aws-sdk/client-s3");
+ return new S3Client({ region });
+ };
+}
+```
+
+**Best when:**
+- The object has no serializable state (e.g., AI SDK model providers that are pure configuration)
+- You don't need to pass the object back out of a step
+- The object is only used inside a single step
+
+### Custom Serde
+
+Custom serde makes the **object itself** serializable. It can be passed as a workflow input, stored in workflow state, returned from steps, and used across multiple steps.
+
+```typescript lineNumbers
+// Serde: the object survives serialization
+class WorkflowStorageClient {
+ static [WORKFLOW_SERIALIZE](instance) { /* ... */ }
+ static [WORKFLOW_DESERIALIZE](data) { /* ... */ }
+}
+```
+
+**Best when:**
+- The object has meaningful state that must survive serialization (credentials, configuration, accumulated data)
+- The object is passed as a workflow input by the caller
+- Multiple steps need the same object instance
+- You're a library author shipping classes that workflow users will pass around
+
+### Decision Guide
+
+| Scenario | Recommended pattern |
+|---|---|
+| AI SDK model provider (`openai("gpt-4o")`) | Step-as-factory |
+| Database/HTTP client with no config state | Step-as-factory |
+| Storage client with region + credentials | Custom serde |
+| Domain object passed as workflow input | Custom serde |
+| Object returned from one step, used in another | Custom serde |
+| Library class that users instantiate and pass to `start()` | Custom serde |
+
+## Key APIs
+
+- [`WORKFLOW_SERIALIZE`](/docs/api-reference/workflow-serde/workflow-serialize) — symbol for the static serialization method
+- [`WORKFLOW_DESERIALIZE`](/docs/api-reference/workflow-serde/workflow-deserialize) — symbol for the static deserialization method
+- [`"use step"`](/docs/api-reference/workflow/use-step) — marks a function for extraction and serialization
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
diff --git a/docs/content/docs/cookbook/advanced/durable-objects.mdx b/docs/content/docs/cookbook/advanced/durable-objects.mdx
new file mode 100644
index 0000000000..b3140ac56f
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/durable-objects.mdx
@@ -0,0 +1,150 @@
+---
+title: Durable Objects
+description: Model long-lived stateful entities as workflows that persist state across requests.
+type: guide
+summary: Build a durable counter or session object whose state survives restarts by using a workflow's event log as the persistence layer.
+---
+
+
+This is an advanced guide. It dives into workflow internals and is not required reading to use workflow.
+
+
+## The Idea
+
+A workflow's event log already records every step result and replays them to reconstruct state. This is the same property that makes an "object" durable — its fields survive cold starts, crashes, and redeployments. Instead of using a workflow to model a *process*, you can use one to model an *entity* with methods.
+
+Each "method call" is a hook that the object's workflow loop awaits. External callers resume the hook with a payload describing the operation. The workflow applies the operation, updates its internal state, and waits for the next call.
+
+## Pattern: Durable Counter
+
+A counter that persists its value without a database. Each increment/decrement is recorded in the event log.
+
+```typescript lineNumbers
+import { defineHook, getWorkflowMetadata } from "workflow";
+import { z } from "zod";
+
+const counterAction = defineHook({
+ schema: z.object({
+ type: z.enum(["increment", "decrement", "get"]),
+ amount: z.number().default(1),
+ }),
+});
+
+export async function durableCounter() {
+ "use workflow";
+
+ let count = 0;
+ const { workflowRunId } = getWorkflowMetadata();
+
+ while (true) {
+ const hook = counterAction.create({ token: `counter:${workflowRunId}` });
+ const action = await hook;
+
+ switch (action.type) {
+ case "increment":
+ count += action.amount;
+ await recordState(count);
+ break;
+ case "decrement":
+ count -= action.amount;
+ await recordState(count);
+ break;
+ case "get":
+ await emitValue(count);
+ break;
+ }
+ }
+}
+
+async function recordState(count: number) {
+ "use step";
+ // Step records the state transition in the event log.
+ // On replay, the step result restores `count` without re-executing.
+ return count;
+}
+
+async function emitValue(count: number) {
+ "use step";
+ return { count };
+}
+```
+
+### Calling the Object
+
+From an API route, resume the hook to "invoke a method" on the durable object:
+
+```typescript lineNumbers
+import { resumeHook } from "workflow/api";
+
+export async function POST(request: Request) {
+ "use step";
+
+ const { runId, type, amount } = await request.json();
+ await resumeHook(`counter:${runId}`, { type, amount });
+ return Response.json({ ok: true });
+}
+```
+
+## Pattern: Durable Session
+
+A chat session where conversation history is the durable state. Each user message is a hook event; the workflow accumulates messages and generates responses.
+
+```typescript lineNumbers
+import { defineHook, getWritable, getWorkflowMetadata } from "workflow";
+import { DurableAgent } from "@workflow/ai/agent";
+import { anthropic } from "@workflow/ai/providers/anthropic";
+import { z } from "zod";
+import type { UIMessageChunk, ModelMessage } from "ai";
+
+const messageHook = defineHook({
+ schema: z.object({
+ role: z.literal("user"),
+ content: z.string(),
+ }),
+});
+
+export async function durableSession() {
+ "use workflow";
+
+ const writable = getWritable();
+ const { workflowRunId: runId } = getWorkflowMetadata();
+ const messages: ModelMessage[] = [];
+
+ const agent = new DurableAgent({
+ model: anthropic("claude-sonnet-4-20250514"),
+ instructions: "You are a helpful assistant.",
+ });
+
+ while (true) {
+ const hook = messageHook.create({ token: `session:${runId}` });
+ const userMessage = await hook;
+
+ messages.push({
+ role: userMessage.role,
+ content: userMessage.content,
+ });
+
+ await agent.stream({ messages, writable });
+ }
+}
+```
+
+## When to Use This
+
+- **Entity-per-workflow**: Each user, document, or device gets its own workflow run. The run ID is the entity ID.
+- **No external database needed**: State lives in the event log. Reads replay from the log; writes append to it.
+- **Automatic consistency**: Only one execution runs at a time per workflow run, so there are no race conditions on the entity's state.
+
+## Trade-offs
+
+- **Read latency**: Accessing current state requires replaying the event log (or caching the last known state in a step result).
+- **Not a replacement for databases**: If you need to query across entities (e.g., "all counters above 100"), you still need a database. Durable objects are for single-entity state.
+- **Log growth**: Long-lived objects accumulate large event logs. Consider periodic "snapshot" steps that checkpoint the full state.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — marks functions for durable execution
+- [`defineHook`](/docs/api-reference/workflow/define-hook) — type-safe hook for receiving external method calls
+- [`getWorkflowMetadata`](/docs/api-reference/workflow/get-workflow-metadata) — access the run ID for deterministic hook tokens
+- [`resumeHook`](/docs/api-reference/workflow-api/resume-hook) — invoke a method on the durable object from an API route
diff --git a/docs/content/docs/cookbook/advanced/isomorphic-packages.mdx b/docs/content/docs/cookbook/advanced/isomorphic-packages.mdx
new file mode 100644
index 0000000000..830c2a02da
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/isomorphic-packages.mdx
@@ -0,0 +1,145 @@
+---
+title: Isomorphic Packages
+description: Publish reusable workflow packages that work both inside and outside the workflow runtime.
+type: guide
+summary: Use try/catch around getWorkflowMetadata, dynamic imports, and optional peer dependencies to build libraries that run in workflows and in plain Node.js.
+---
+
+
+This is an advanced guide. It dives into workflow internals and is not required reading to use workflow.
+
+
+## The Challenge
+
+If you're a library author publishing a package that integrates with workflow, your code needs to handle two environments:
+
+1. **Inside a workflow run** — `getWorkflowMetadata()` works, `"use step"` directives are transformed, and the full workflow runtime is available.
+2. **Outside a workflow** — your package is imported in a regular Node.js process, a test suite, or a project that doesn't use workflow at all.
+
+A hard dependency on `workflow` will crash at import time for users who don't have it installed.
+
+## Pattern 1: Feature-Detect with `getWorkflowMetadata`
+
+Use a try/catch to detect whether you're running inside a workflow. This lets you add durable behavior when available and fall back to standard execution otherwise.
+
+```typescript lineNumbers
+import { getWorkflowMetadata } from "workflow";
+
+export async function processPayment(amount: number, currency: string) {
+ "use workflow";
+
+ let runId: string | undefined;
+ try {
+ const metadata = getWorkflowMetadata();
+ runId = metadata.workflowRunId;
+ } catch {
+ // Not running inside a workflow — proceed without durability
+ runId = undefined;
+ }
+
+ if (runId) {
+ // Inside a workflow: use the run ID as an idempotency key
+ return await chargeWithIdempotency(amount, currency, runId);
+ } else {
+ // Outside a workflow: standard charge
+ return await chargeStandard(amount, currency);
+ }
+}
+
+async function chargeWithIdempotency(amount: number, currency: string, idempotencyKey: string) {
+ "use step";
+ // Stripe charge with idempotency key from workflow run ID
+ return { charged: true, amount, currency, idempotencyKey };
+}
+
+async function chargeStandard(amount: number, currency: string) {
+ "use step";
+ return { charged: true, amount, currency };
+}
+```
+
+## Pattern 2: Dynamic Imports
+
+Avoid importing `workflow` at the top level. Use dynamic `import()` so the module is only loaded when actually needed.
+
+```typescript lineNumbers
+export async function createDurableTask(name: string, payload: unknown) {
+ "use workflow";
+
+ let sleep: ((duration: string) => Promise) | undefined;
+
+ try {
+ const wf = await import("workflow");
+ sleep = wf.sleep;
+ } catch {
+ // workflow not installed — use setTimeout fallback
+ sleep = undefined;
+ }
+
+ await executeTask(name, payload);
+
+ if (sleep) {
+ // Inside workflow: durable sleep that survives restarts
+ await sleep("5m");
+ } else {
+ // Outside workflow: plain timer (not durable)
+ await new Promise((resolve) => setTimeout(resolve, 5 * 60 * 1000));
+ }
+
+ await sendNotification(name);
+}
+
+async function executeTask(name: string, payload: unknown) {
+ "use step";
+ return { executed: true, name, payload };
+}
+
+async function sendNotification(name: string) {
+ "use step";
+ return { notified: true, name };
+}
+```
+
+## Pattern 3: Optional Peer Dependencies
+
+In your `package.json`, declare `workflow` as an optional peer dependency. This signals to package managers that your library *can* use workflow but doesn't require it.
+
+```json
+{
+ "name": "@acme/payments",
+ "peerDependencies": {
+ "workflow": ">=1.0.0"
+ },
+ "peerDependenciesMeta": {
+ "workflow": {
+ "optional": true
+ }
+ }
+}
+```
+
+Then guard all workflow imports with dynamic `import()` and try/catch as shown above.
+
+## Real-World Examples
+
+### Mux AI
+
+The Mux team published a reusable workflow package for video processing. Their library detects the workflow runtime and falls back to standard async processing when workflow isn't available.
+
+### World ID
+
+World ID's identity verification library uses `getWorkflowMetadata()` to attach run IDs to their human-in-the-loop verification hooks, but the same library works in non-workflow environments for simple verification flows.
+
+## Guidelines for Library Authors
+
+1. **Never hard-import `workflow` at the top level** if your package should work without it.
+2. **Use `getWorkflowMetadata()` in a try/catch** as the canonical runtime detection pattern.
+3. **Mark `workflow` as an optional peer dependency** in `package.json`.
+4. **Test both paths**: run your test suite with and without the workflow runtime to catch import errors.
+5. **Document the dual behavior**: make it clear in your README which features require workflow and which work standalone.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — marks functions for durable execution
+- [`getWorkflowMetadata`](/docs/api-reference/workflow/get-workflow-metadata) — runtime detection and run ID access
diff --git a/docs/content/docs/cookbook/advanced/meta.json b/docs/content/docs/cookbook/advanced/meta.json
new file mode 100644
index 0000000000..6b6fb644ff
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/meta.json
@@ -0,0 +1,11 @@
+{
+ "title": "Advanced",
+ "pages": [
+ "serializable-steps",
+ "durable-objects",
+ "isomorphic-packages",
+ "custom-serialization",
+ "secure-credentials",
+ "publishing-libraries"
+ ]
+}
diff --git a/docs/content/docs/cookbook/advanced/publishing-libraries.mdx b/docs/content/docs/cookbook/advanced/publishing-libraries.mdx
new file mode 100644
index 0000000000..c31a3f67d2
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/publishing-libraries.mdx
@@ -0,0 +1,298 @@
+---
+title: Publishing Libraries
+description: Structure and publish npm packages that export workflow functions for consumers to use with Workflow DevKit.
+type: guide
+summary: Learn how to build, export, and test npm packages that ship workflow and step functions — including package.json exports, re-exporting for stable workflow IDs, keeping step I/O clean, and integration testing.
+---
+
+
+This is an advanced guide for library authors who want to publish reusable workflow functions as npm packages. It assumes familiarity with `"use workflow"`, `"use step"`, and the workflow execution model.
+
+
+## Package Structure
+
+A workflow library follows a standard TypeScript package layout with a dedicated `workflows/` directory. Each workflow file exports one or more workflow functions that consumers can import and pass to `start()`.
+
+```
+my-media-lib/
+├── src/
+│ ├── index.ts # Package entry point
+│ ├── types.ts # Shared types
+│ ├── workflows/
+│ │ ├── index.ts # Re-exports all workflows
+│ │ ├── transcode.ts # Workflow: transcode a video
+│ │ └── generate-thumbnails.ts
+│ └── lib/
+│ └── api-client.ts # Internal helpers (NOT steps)
+├── test-server/
+│ └── workflows.ts # Re-export for integration tests
+├── tsup.config.ts
+├── package.json
+└── tsconfig.json
+```
+
+### Entry Points and Exports
+
+Use the `exports` field in `package.json` to expose separate entry points for the main API and the raw workflow functions:
+
+```json
+{
+ "name": "@acme/media",
+ "type": "module",
+ "exports": {
+ ".": {
+ "types": { "import": "./dist/index.d.ts" },
+ "import": "./dist/index.js"
+ },
+ "./workflows": {
+ "types": { "import": "./dist/workflows/index.d.ts" },
+ "import": "./dist/workflows/index.js"
+ }
+ },
+ "files": ["dist"]
+}
+```
+
+The main entry point (`@acme/media`) exports types, utilities, and convenience wrappers. The `./workflows` entry point (`@acme/media/workflows`) exports the raw workflow functions that consumers need for the build system.
+
+### Source Files
+
+The package entry re-exports workflows alongside any utilities:
+
+```typescript lineNumbers
+// src/index.ts
+export * from "./types";
+export * as workflows from "./workflows";
+```
+
+The workflows barrel file re-exports each workflow:
+
+```typescript lineNumbers
+// src/workflows/index.ts
+export * from "./transcode";
+export * from "./generate-thumbnails";
+```
+
+### Build Configuration
+
+Use a bundler like `tsup` with separate entry points for each export. Mark `workflow` as external so it's resolved from the consumer's project:
+
+```typescript lineNumbers
+// tsup.config.ts
+import { defineConfig } from "tsup";
+
+export default defineConfig({
+ entry: [
+ "src/index.ts",
+ "src/workflows/index.ts",
+ ],
+ format: ["esm"],
+ dts: true,
+ sourcemap: true,
+ clean: true,
+ external: ["workflow"],
+});
+```
+
+## Re-Exporting for Workflow ID Stability
+
+Workflow DevKit's compiler assigns each workflow function a stable ID based on its position in the source file that the build system processes. When a consumer imports a pre-built workflow from an npm package, the compiler never sees the original source — it only sees the compiled output. This means workflow IDs won't match between the library's development environment and the consumer's app.
+
+The fix is a **re-export file**. The consumer creates a file in their `workflows/` directory that re-exports the library's workflows. The build system then processes this file and assigns stable IDs.
+
+### Consumer Setup
+
+```typescript lineNumbers
+// workflows/media.ts (in the consumer's project)
+// Re-export library workflows so the build system assigns stable IDs
+export * from "@acme/media/workflows";
+```
+
+This one-line file is all that's needed. The workflow compiler transforms this file, discovers the workflow and step functions from the library, and assigns IDs that are stable across deployments.
+
+### Why This Is Necessary
+
+Without re-exporting, the workflow runtime cannot match a running workflow to its function definition. When a workflow run is replayed after a cold start, the runtime looks up functions by their compiler-assigned IDs. If the IDs don't exist (because the compiler never processed the library's source), replay fails.
+
+The re-export pattern ensures:
+
+1. **Stable IDs** — the compiler assigns IDs based on the consumer's source tree
+2. **Replay safety** — IDs persist across deployments and cold starts
+3. **Version upgrades** — re-exported IDs remain stable as long as the consumer's file doesn't change
+
+## Keeping Step I/O Clean
+
+When you publish a workflow library, every step function's inputs and outputs are recorded in the event log. This has two implications:
+
+### 1. Everything Must Be Serializable
+
+Step inputs and outputs must be JSON-serializable. Do not pass or return:
+
+- Class instances (unless they implement custom serialization)
+- Functions or closures
+- `Map`, `Set`, `WeakRef`, or other non-JSON types
+- Circular references
+
+If your library works with complex objects, pass serializable configuration into steps and reconstruct the objects inside the step body.
+
+{/* @skip-typecheck - good/bad comparison with duplicate function names */}
+```typescript lineNumbers
+// Good: pass serializable config, construct inside the step
+async function callExternalApi(endpoint: string, params: Record) {
+ "use step";
+ const client = createApiClient(process.env.API_KEY!);
+ return await client.request(endpoint, params);
+}
+
+// Bad: pass a pre-constructed client object
+async function callExternalApi(client: ApiClient, params: Record) {
+ "use step";
+ // ApiClient is not serializable — this will fail on replay
+ return await client.request(params);
+}
+```
+
+See [Serializable Steps](/docs/cookbook/advanced/serializable-steps) for the step-as-factory pattern.
+
+### 2. Secrets Must Not Appear in Step I/O
+
+Step inputs and outputs are persisted in the event log and may be visible in observability tools. **Never pass secrets as step arguments or return them from steps.**
+
+{/* @skip-typecheck - good/bad comparison with duplicate function names */}
+```typescript lineNumbers
+// Bad: API key appears in the event log
+async function fetchData(apiKey: string, query: string) {
+ "use step";
+ const client = createClient(apiKey);
+ return await client.fetch(query);
+}
+
+// Good: resolve credentials inside the step from environment
+async function fetchData(query: string) {
+ "use step";
+ const client = createClient(process.env.API_KEY!);
+ return await client.fetch(query);
+}
+```
+
+Similarly, helper functions that create API clients using credentials should **not** be marked as steps. If a function's return value would contain sensitive data, keep it as a plain function called inside a step body:
+
+{/* @skip-typecheck - references undefined ServiceClient */}
+```typescript lineNumbers
+// This is NOT a step — intentionally, to avoid credentials in step I/O
+function createAuthenticatedClient(credentials: { token: string }) {
+ return new ServiceClient({ auth: credentials.token });
+}
+
+async function processItem(itemId: string) {
+ "use step";
+ // Resolve credentials and create client inside the step
+ const client = createAuthenticatedClient({
+ token: process.env.SERVICE_TOKEN!,
+ });
+ return await client.process(itemId);
+}
+```
+
+## Testing Workflow Libraries
+
+Library authors need integration tests that exercise workflows through the full Workflow DevKit runtime — not just unit tests of individual functions.
+
+### Test Server Pattern
+
+Create a minimal test server that re-exports your library's workflows, just like a consumer would:
+
+```typescript lineNumbers
+// test-server/workflows.ts
+export * from "@acme/media/workflows";
+```
+
+This test server acts as a stand-in consumer app. Point your test runner at it to exercise the full workflow lifecycle: start, replay, and completion.
+
+### Vitest Configuration
+
+Use a dedicated Vitest config for integration tests that run against the Workflow DevKit runtime:
+
+```typescript lineNumbers
+// vitest.workflowdevkit.config.ts
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+ test: {
+ include: ["tests/integration/**/*.workflowdevkit.test.ts"],
+ testTimeout: 120_000, // Workflows may take time to complete
+ setupFiles: ["./tests/setup.ts"],
+ },
+});
+```
+
+Run these tests separately from your unit tests:
+
+```bash
+# Unit tests (fast, no workflow runtime)
+pnpm vitest run tests/unit
+
+# Integration tests (requires workflow runtime)
+pnpm vitest run --config vitest.workflowdevkit.config.ts
+```
+
+### What to Test
+
+- **Happy path**: workflow starts, all steps execute, and the final result is correct
+- **Serialization round-trip**: inputs and outputs survive the event log
+- **Replay**: kill and restart a workflow mid-execution to verify deterministic replay
+- **Error handling**: verify that step failures produce the expected errors
+
+## Working With and Without Workflow Installed
+
+If your library should work both as a standalone package and inside Workflow DevKit, declare `workflow` as an optional peer dependency:
+
+```json
+{
+ "peerDependencies": {
+ "workflow": ">=4.0.0"
+ },
+ "peerDependenciesMeta": {
+ "workflow": {
+ "optional": true
+ }
+ }
+}
+```
+
+Use dynamic imports and runtime detection so your library gracefully degrades when workflow is not installed:
+
+```typescript lineNumbers
+async function isWorkflowRuntime(): Promise {
+ try {
+ const wf = await import("workflow");
+ if (typeof wf.getWorkflowMetadata !== "function") return false;
+ wf.getWorkflowMetadata();
+ return true;
+ } catch {
+ return false;
+ }
+}
+```
+
+See [Isomorphic Packages](/docs/cookbook/advanced/isomorphic-packages) for the full pattern including feature detection, dynamic imports, and dual-path execution.
+
+## Checklist
+
+Before publishing a workflow library:
+
+- [ ] `workflow` is listed as an **optional** peer dependency
+- [ ] Separate `./workflows` export in `package.json` for the raw workflow functions
+- [ ] `workflow` is marked as **external** in your bundler config
+- [ ] Documentation tells consumers to re-export from `@your-lib/workflows`
+- [ ] No secrets in step inputs or outputs — credentials are resolved at runtime inside steps
+- [ ] All step I/O is JSON-serializable
+- [ ] Integration tests use a test server with re-exported workflows
+- [ ] Both with-workflow and without-workflow code paths are tested
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — marks functions for durable execution
+- [`start`](/docs/api-reference/workflow/start) — starts a workflow run
+- [`getWorkflowMetadata`](/docs/api-reference/workflow/get-workflow-metadata) — runtime detection and run ID access
diff --git a/docs/content/docs/cookbook/advanced/secure-credentials.mdx b/docs/content/docs/cookbook/advanced/secure-credentials.mdx
new file mode 100644
index 0000000000..68756af43a
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/secure-credentials.mdx
@@ -0,0 +1,353 @@
+---
+title: Secure Credential Handling
+description: Protect API keys and secrets from appearing in the workflow event log using encryption, credential providers, and careful step design.
+type: guide
+summary: Encrypt credentials before start(), resolve secrets at runtime via a credentials provider, and avoid leaking secrets through step I/O.
+---
+
+
+This is an advanced guide. It covers security patterns for workflows that handle sensitive credentials. It is not required reading to use workflow, but is strongly recommended for production multi-tenant applications.
+
+
+## Why Credentials Need Special Treatment
+
+Workflow DevKit persists every step's input and output to an event log for replay and observability. If you pass an API key as a step argument or return it from a step, **the plaintext secret is stored in the event log**.
+
+Three complementary patterns keep secrets out of the log:
+
+1. **Encrypt credentials before `start()`** so the event log only stores ciphertext.
+2. **Use a module-level credentials provider** so steps resolve secrets at runtime instead of receiving them as arguments.
+3. **Keep credential-resolving helpers out of steps** so their return values are never serialized.
+
+---
+
+## Encrypting Credentials Before `start()`
+
+When a caller triggers a workflow, any arguments passed to `start()` are serialized into the event log. If those arguments contain API keys, the keys are stored in plaintext. AES-256-GCM encryption solves this: encrypt on the caller side, decrypt inside a step.
+
+### The Encryption Utility
+
+{/* @skip-typecheck - uses @noble/ciphers and helper functions not available to type-checker */}
+{/* @skip-typecheck - uses unmapped @noble/ciphers and helper functions defined elsewhere */}
+```typescript lineNumbers
+// lib/workflow-crypto.ts
+import { gcm } from "@noble/ciphers/aes.js";
+
+const IV_LENGTH = 12;
+const TAG_LENGTH = 16;
+
+export interface EncryptedPayload {
+ v: 1;
+ alg: "aes-256-gcm";
+ kid?: string; // optional key ID for rotation
+ iv: string;
+ tag: string;
+ ciphertext: string;
+}
+
+export async function encryptForWorkflow(
+ value: T,
+ key: Uint8Array | string,
+ keyId?: string,
+): Promise {
+ const keyBytes = normalizeKey(key); // validate 32-byte key
+ const iv = new Uint8Array(IV_LENGTH);
+ crypto.getRandomValues(iv);
+
+ const plaintext = new TextEncoder().encode(JSON.stringify(value));
+ const encrypted = gcm(keyBytes, iv).encrypt(plaintext);
+
+ // GCM appends the auth tag to the ciphertext
+ const tag = encrypted.slice(encrypted.length - TAG_LENGTH);
+ const ciphertext = encrypted.slice(0, encrypted.length - TAG_LENGTH);
+
+ return {
+ v: 1,
+ alg: "aes-256-gcm",
+ ...(keyId !== undefined && { kid: keyId }),
+ iv: bytesToBase64(iv),
+ tag: bytesToBase64(tag),
+ ciphertext: bytesToBase64(ciphertext),
+ };
+}
+
+export async function decryptFromWorkflow(
+ payload: EncryptedPayload,
+ key: Uint8Array | string,
+): Promise {
+ const keyBytes = normalizeKey(key);
+ const iv = base64ToBytes(payload.iv);
+ const tag = base64ToBytes(payload.tag);
+ const ciphertext = base64ToBytes(payload.ciphertext);
+
+ // Recombine ciphertext + tag for GCM decryption
+ const combined = new Uint8Array(ciphertext.length + tag.length);
+ combined.set(ciphertext);
+ combined.set(tag, ciphertext.length);
+
+ const plaintext = gcm(keyBytes, iv).decrypt(combined);
+ return JSON.parse(new TextDecoder().decode(plaintext)) as T;
+}
+
+function normalizeKey(key: Uint8Array | string): Uint8Array {
+ const bytes = typeof key === "string" ? base64ToBytes(key) : key;
+ if (bytes.length !== 32) {
+ throw new Error(`Expected 32-byte key, got ${bytes.length}`);
+ }
+ return bytes;
+}
+```
+
+### Encrypting on the Caller Side
+
+{/* @skip-typecheck - uses app-local imports and workflow/api start */}
+{/* @skip-typecheck - start is from workflow/api, references local modules */}
+```typescript lineNumbers
+// app/api/start-workflow/route.ts
+import { start } from "workflow/api";
+import { encryptForWorkflow } from "@/lib/workflow-crypto";
+import { processDocument } from "@/workflows/process-document";
+
+export async function POST(request: Request) {
+ const { documentId } = await request.json();
+
+ // Encrypt credentials before they enter the event log
+ const encrypted = await encryptForWorkflow(
+ {
+ apiKey: process.env.THIRD_PARTY_API_KEY!,
+ serviceToken: process.env.SERVICE_TOKEN!,
+ },
+ process.env.WORKFLOW_SECRET_KEY!,
+ );
+
+ const run = await start(processDocument, [documentId, encrypted]);
+ return Response.json({ runId: run.id });
+}
+```
+
+### Decrypting Inside a Step
+
+```typescript lineNumbers
+// workflows/process-document.ts
+import { decryptFromWorkflow } from "@/lib/workflow-crypto";
+import type { EncryptedPayload } from "@/lib/workflow-crypto";
+
+export async function processDocument(
+ documentId: string,
+ credentials: EncryptedPayload,
+) {
+ "use workflow";
+
+ const result = await fetchDocument(documentId, credentials);
+ return result;
+}
+
+async function fetchDocument(
+ documentId: string,
+ credentials: EncryptedPayload,
+) {
+ "use step";
+
+ // Decrypt inside the step — the decrypted values never leave this function
+ const { apiKey } = await decryptFromWorkflow<{ apiKey: string }>(
+ credentials,
+ process.env.WORKFLOW_SECRET_KEY!,
+ );
+
+ const response = await fetch(`https://api.example.com/docs/${documentId}`, {
+ headers: { Authorization: `Bearer ${apiKey}` },
+ });
+
+ // Only the document data is returned (and logged), not the API key
+ return response.json();
+}
+```
+
+The event log stores the encrypted blob as the step input and the document data as the step output. The plaintext API key exists only in memory during step execution.
+
+### Key Rotation
+
+The optional `kid` (key ID) field supports key rotation. Include a `kid` when encrypting to identify which key was used. On the decryption side, read `payload.kid` to look up the correct key:
+
+{/* @skip-typecheck - references variables from prior code blocks */}
+```typescript lineNumbers
+const encrypted = await encryptForWorkflow(
+ credentials,
+ currentKey,
+ "key-2025-03", // key identifier
+);
+
+// On the decryption side
+const key = getKeyById(payload.kid); // look up the right key
+const decrypted = await decryptFromWorkflow(payload, key);
+```
+
+---
+
+## Module-Level Credentials Provider
+
+Encryption works well when credentials originate from the caller. But sometimes the deployment environment itself holds the secrets (e.g., environment variables or a secrets manager), and you want steps to resolve them at runtime without receiving them as arguments.
+
+A **credentials provider** is a factory function registered at module scope. Steps call it at runtime to get the credentials they need.
+
+### Registering a Provider
+
+```typescript lineNumbers
+// lib/credentials-provider.ts
+
+type CredentialsProvider = () =>
+ | Promise | undefined>
+ | Record
+ | undefined;
+
+let credentialsProvider: CredentialsProvider | undefined;
+
+export function setCredentialsProvider(provider?: CredentialsProvider): void {
+ credentialsProvider = provider;
+}
+
+export async function resolveCredentials(
+ input?: Record,
+): Promise> {
+ // 1. Start with provider credentials as the base
+ const fromProvider = credentialsProvider
+ ? (await credentialsProvider()) ?? {}
+ : {};
+
+ // 2. Merge direct input (overrides provider)
+ return { ...fromProvider, ...input };
+}
+```
+
+### Setting the Provider at App Startup
+
+```typescript lineNumbers
+// app/instrumentation.ts (Next.js) or server entry point
+import { setCredentialsProvider } from "@/lib/credentials-provider";
+
+// Register once at module scope — runs before any workflow step
+setCredentialsProvider(() => ({
+ apiKey: process.env.THIRD_PARTY_API_KEY!,
+ serviceToken: process.env.SERVICE_TOKEN!,
+}));
+```
+
+### Using the Provider Inside Steps
+
+```typescript lineNumbers
+// workflows/analyze.ts
+import { resolveCredentials } from "@/lib/credentials-provider";
+
+export async function analyzeData(datasetId: string) {
+ "use workflow";
+
+ const summary = await runAnalysis(datasetId);
+ return summary;
+}
+
+async function runAnalysis(datasetId: string) {
+ "use step";
+
+ // Resolve credentials at runtime — no secrets in the step's arguments
+ const { apiKey } = await resolveCredentials();
+
+ const response = await fetch(`https://api.example.com/analyze/${datasetId}`, {
+ headers: { Authorization: `Bearer ${apiKey}` },
+ });
+
+ return response.json();
+}
+```
+
+### Resolution Order
+
+When both encryption and a provider are in use, a typical resolution order is:
+
+1. **Credentials provider** (module-level factory)
+2. **Decrypted credentials** (from encrypted workflow arguments)
+3. **Environment variables** (direct `process.env` fallback)
+
+Later sources override earlier ones. This lets a library provide sensible defaults while allowing callers to override per-workflow.
+
+---
+
+## Why Some Functions MUST NOT Be Steps
+
+This is the most subtle pattern. Consider a helper function that creates an API client with credentials:
+
+{/* @skip-typecheck - references resolveCredentials from prior code block */}
+```typescript lineNumbers
+// lib/client-factory.ts
+
+/**
+ * Resolves client configuration for a workflow.
+ * This function is NOT a workflow step to avoid exposing
+ * credentials in step I/O.
+ */
+export async function createClient(
+ credentials?: Record,
+) {
+ const { apiKey, serviceToken } = await resolveCredentials(credentials);
+
+ return {
+ apiKey,
+ serviceToken,
+ baseUrl: "https://api.example.com",
+ };
+}
+```
+
+If `createClient` were marked with `"use step"`, its **return value** — which contains the plaintext `apiKey` and `serviceToken` — would be serialized into the event log for observability. This is a credential leak.
+
+The rule: **functions that return or handle credentials should NOT be steps.** Instead, call them from *inside* a step:
+
+```typescript lineNumbers
+// workflows/process.ts
+import { createClient } from "@/lib/client-factory";
+
+async function uploadResult(data: Record) {
+ "use step";
+
+ // createClient runs inside this step — its return value
+ // stays in memory and is never serialized to the event log
+ const client = await createClient();
+
+ const response = await fetch(`${client.baseUrl}/upload`, {
+ method: "POST",
+ headers: {
+ Authorization: `Bearer ${client.apiKey}`,
+ "X-Service-Token": client.serviceToken,
+ },
+ body: JSON.stringify(data),
+ });
+
+ // Only the upload result is returned (and logged)
+ return response.json();
+}
+
+export async function processAndUpload(inputData: Record) {
+ "use workflow";
+
+ const result = await uploadResult(inputData);
+ return result;
+}
+```
+
+### The Key Insight
+
+The event log records:
+
+- **Step inputs**: the arguments passed to the step function
+- **Step outputs**: the return value of the step function
+
+Anything that happens *inside* the step but is not an input or output is invisible to the log. By resolving credentials inside the step and only returning non-sensitive results, you keep secrets out of the event log entirely.
+
+### What to Watch For
+
+| Pattern | Safe? | Why |
+|---------|-------|-----|
+| Step receives API key as argument | No | Input is logged |
+| Step returns an object containing a token | No | Output is logged |
+| Step calls `resolveCredentials()` internally | Yes | Credentials stay in memory |
+| Helper that returns credentials is called inside a step | Yes | Return value is not the step's return value |
+| Helper that returns credentials is marked `"use step"` | No | Step output is logged |
diff --git a/docs/content/docs/cookbook/advanced/serializable-steps.mdx b/docs/content/docs/cookbook/advanced/serializable-steps.mdx
new file mode 100644
index 0000000000..e64ce38b4f
--- /dev/null
+++ b/docs/content/docs/cookbook/advanced/serializable-steps.mdx
@@ -0,0 +1,149 @@
+---
+title: Serializable Steps
+description: Wrap non-serializable objects (like AI model providers) inside step functions so they can cross the workflow boundary.
+type: guide
+summary: Return a callback from a step to defer provider initialization, making non-serializable AI SDK models work inside durable workflows.
+---
+
+
+This is an advanced guide. It dives into workflow internals and is not required reading to use workflow.
+
+
+## The Problem
+
+Workflow functions run inside a sandboxed VM where every value that crosses a function boundary must be serializable (JSON-safe). AI SDK model providers — `openai("gpt-4o")`, `anthropic("claude-sonnet-4-20250514")`, etc. — return complex objects with methods, closures, and internal state. Passing one directly into a step causes a serialization error.
+
+```typescript lineNumbers
+import { openai } from "@ai-sdk/openai";
+import { DurableAgent } from "@workflow/ai/agent";
+import { getWritable } from "workflow";
+import type { UIMessageChunk } from "ai";
+
+export async function brokenAgent(prompt: string) {
+ "use workflow";
+
+ const writable = getWritable();
+ const agent = new DurableAgent({
+ // This fails — the model object is not serializable
+ model: openai("gpt-4o"),
+ });
+
+ await agent.stream({ messages: [{ role: "user", content: prompt }], writable });
+}
+```
+
+## The Solution: Step-as-Factory
+
+Instead of passing the model object, pass a **callback function** that returns the model. Marking that callback with `"use step"` tells the compiler to serialize the *function reference* (which is just a string identifier) rather than its return value. The provider is only instantiated at execution time, inside the step's full Node.js runtime.
+
+```typescript lineNumbers
+import { openai as openaiProvider } from "@ai-sdk/openai";
+
+// Returns a step function, not a model object
+export function openai(...args: Parameters) {
+ return async () => {
+ "use step";
+ return openaiProvider(...args);
+ };
+}
+```
+
+The `DurableAgent` receives a function (`() => Promise`) instead of a model object. When the agent needs to call the LLM, it invokes the factory inside a step where the real provider can be constructed with full Node.js access.
+
+## How `@workflow/ai` Uses This
+
+The `@workflow/ai` package ships pre-wrapped providers for all major AI SDK backends. Each one follows the same pattern:
+
+```typescript lineNumbers
+// packages/ai/src/providers/anthropic.ts
+import { anthropic as anthropicProvider } from "@ai-sdk/anthropic";
+
+export function anthropic(...args: Parameters) {
+ return async () => {
+ "use step";
+ return anthropicProvider(...args);
+ };
+}
+```
+
+This means you import from `@workflow/ai` instead of `@ai-sdk/*` directly:
+
+```typescript lineNumbers
+import { anthropic } from "@workflow/ai/providers/anthropic";
+import { DurableAgent } from "@workflow/ai/agent";
+import { getWritable } from "workflow";
+import type { UIMessageChunk } from "ai";
+
+export async function chatAgent(prompt: string) {
+ "use workflow";
+
+ const writable = getWritable();
+ const agent = new DurableAgent({
+ model: anthropic("claude-sonnet-4-20250514"),
+ });
+
+ await agent.stream({ messages: [{ role: "user", content: prompt }], writable });
+}
+```
+
+## Writing Your Own Serializable Wrapper
+
+Apply the same pattern to any non-serializable dependency. The key rule: **the outer function captures serializable arguments, and the inner `"use step"` function constructs the real object at runtime**.
+
+```typescript lineNumbers
+import type { S3Client as S3ClientType } from "@aws-sdk/client-s3";
+
+// The arguments (region, bucket) are plain strings — serializable
+export function createS3Client(region: string) {
+ return async (): Promise => {
+ "use step";
+ const { S3Client } = await import("@aws-sdk/client-s3");
+ return new S3Client({ region });
+ };
+}
+
+// Usage in a workflow
+export async function processUpload(region: string, key: string) {
+ "use workflow";
+
+ const getClient = createS3Client(region);
+ // getClient is a serializable step reference, not an S3Client
+ await uploadFile(getClient, key);
+}
+
+async function uploadFile(
+ getClient: () => Promise,
+ key: string
+) {
+ "use step";
+ const client = await getClient();
+ // Now you have a real S3Client with full Node.js access
+ await client.send(/* ... */);
+}
+```
+
+## Why This Works
+
+1. **Compiler transformation**: `"use step"` tells the SWC plugin to extract the function into a separate bundle. The workflow VM only sees a serializable reference (function ID + captured arguments).
+2. **Closure tracking**: The compiler tracks which variables the step function closes over. Only serializable values (strings, numbers, plain objects) can be captured.
+3. **Deferred construction**: The actual provider/client is only constructed when the step executes in the Node.js runtime — never in the sandboxed workflow VM.
+
+## Bundle optimization with dynamic imports
+
+Step functions run in full Node.js, so they can use `await import()` to load heavy dependencies on demand. This keeps the workflow bundle light -- the sandboxed workflow VM never needs to parse or load these libraries.
+
+```typescript
+async function processWithHeavyLib(data: string) {
+ "use step";
+ const { parse } = await import("heavy-parser-lib");
+ return parse(data);
+}
+```
+
+This is especially useful for large SDKs (AWS, Google Cloud, parser libraries) that would bloat the workflow bundle unnecessarily. The `createS3Client` example [above](#writing-your-own-serializable-wrapper) already uses this pattern with `await import("@aws-sdk/client-s3")`.
+
+## Key APIs
+
+- [`"use step"`](/docs/api-reference/workflow/use-step) — marks a function for extraction and serialization
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`DurableAgent`](/docs/api-reference/workflow-ai/durable-agent) — accepts a model factory for durable AI agent streaming
diff --git a/docs/content/docs/cookbook/agent-patterns/durable-agent.mdx b/docs/content/docs/cookbook/agent-patterns/durable-agent.mdx
new file mode 100644
index 0000000000..93bc9b2e82
--- /dev/null
+++ b/docs/content/docs/cookbook/agent-patterns/durable-agent.mdx
@@ -0,0 +1,191 @@
+---
+title: Durable Agent
+description: Replace a stateless AI agent with a durable one that survives crashes, retries tool calls, and streams output.
+type: guide
+summary: Convert an AI SDK Agent into a DurableAgent backed by a workflow, with tools as retryable steps.
+---
+
+Use this pattern to make any AI SDK agent durable. The agent becomes a workflow, tools become steps, and the framework handles retries, streaming, and state persistence automatically.
+
+## Pattern
+
+Replace `Agent` with `DurableAgent`, wrap the function in `"use workflow"`, mark each tool with `"use step"`, and stream output through `getWritable()`.
+
+### Simplified
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { getWritable } from "workflow";
+import { z } from "zod";
+import type { ModelMessage, UIMessageChunk } from "ai";
+
+declare function searchFlights(args: { from: string; to: string; date: string }): Promise<{ flights: { id: string; price: number }[] }>; // @setup
+declare function bookFlight(args: { flightId: string; passenger: string }): Promise<{ confirmationId: string }>; // @setup
+
+export async function flightAgent(messages: ModelMessage[]) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions: "You are a helpful flight booking assistant.",
+ tools: {
+ searchFlights: {
+ description: "Search for available flights",
+ inputSchema: z.object({
+ from: z.string(),
+ to: z.string(),
+ date: z.string(),
+ }),
+ execute: searchFlights,
+ },
+ bookFlight: {
+ description: "Book a specific flight",
+ inputSchema: z.object({
+ flightId: z.string(),
+ passenger: z.string(),
+ }),
+ execute: bookFlight,
+ },
+ },
+ });
+
+ await agent.stream({
+ messages,
+ writable: getWritable(),
+ });
+}
+```
+
+### Full Implementation
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { getWritable } from "workflow";
+import { z } from "zod";
+import type { ModelMessage, UIMessageChunk } from "ai";
+
+// Step: Search flights with full Node.js access and automatic retries
+async function searchFlights({
+ from,
+ to,
+ date,
+}: {
+ from: string;
+ to: string;
+ date: string;
+}) {
+ "use step";
+
+ const response = await fetch(
+ `https://api.example.com/flights?from=${from}&to=${to}&date=${date}`
+ );
+ if (!response.ok) throw new Error(`Search failed: ${response.status}`);
+ return response.json();
+}
+
+// Step: Book a flight — retries on transient failures
+async function bookFlight({
+ flightId,
+ passenger,
+}: {
+ flightId: string;
+ passenger: string;
+}) {
+ "use step";
+
+ const response = await fetch("https://api.example.com/bookings", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ flightId, passenger }),
+ });
+ if (!response.ok) throw new Error(`Booking failed: ${response.status}`);
+ return response.json();
+}
+
+// Step: Check flight status
+async function checkStatus({ flightId }: { flightId: string }) {
+ "use step";
+
+ const response = await fetch(
+ `https://api.example.com/flights/${flightId}/status`
+ );
+ return response.json();
+}
+
+export async function flightAgent(messages: ModelMessage[]) {
+ "use workflow";
+
+ const writable = getWritable();
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions: "You are a helpful flight booking assistant.",
+ tools: {
+ searchFlights: {
+ description: "Search for available flights between two airports",
+ inputSchema: z.object({
+ from: z.string().describe("Departure airport code"),
+ to: z.string().describe("Arrival airport code"),
+ date: z.string().describe("Travel date (YYYY-MM-DD)"),
+ }),
+ execute: searchFlights,
+ },
+ bookFlight: {
+ description: "Book a specific flight for a passenger",
+ inputSchema: z.object({
+ flightId: z.string().describe("Flight ID from search results"),
+ passenger: z.string().describe("Passenger full name"),
+ }),
+ execute: bookFlight,
+ },
+ checkStatus: {
+ description: "Check the current status of a flight",
+ inputSchema: z.object({
+ flightId: z.string().describe("Flight ID to check"),
+ }),
+ execute: checkStatus,
+ },
+ },
+ });
+
+ const result = await agent.stream({
+ messages,
+ writable,
+ maxSteps: 10,
+ });
+
+ return { messages: result.messages };
+}
+```
+
+### API Route
+
+```typescript lineNumbers
+import { createUIMessageStreamResponse } from "ai";
+import { start } from "workflow/api";
+import { flightAgent } from "@/workflows/flight-agent";
+import type { UIMessage } from "ai";
+import { convertToModelMessages } from "ai";
+
+export async function POST(req: Request) {
+ const { messages }: { messages: UIMessage[] } = await req.json();
+ const modelMessages = await convertToModelMessages(messages);
+
+ const run = await start(flightAgent, [modelMessages]);
+
+ return createUIMessageStreamResponse({
+ stream: run.readable,
+ headers: {
+ "x-workflow-run-id": run.runId,
+ },
+ });
+}
+```
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — declares step functions with retries and full Node.js access
+- [`DurableAgent`](/docs/api-reference/workflow-ai/durable-agent) — durable wrapper around AI SDK's Agent
+- [`getWritable()`](/docs/api-reference/workflow/get-writable) — streams agent output to the client
+- [`start()`](/docs/api-reference/workflow-api/start) — starts a workflow run from an API route
diff --git a/docs/content/docs/cookbook/agent-patterns/human-in-the-loop.mdx b/docs/content/docs/cookbook/agent-patterns/human-in-the-loop.mdx
new file mode 100644
index 0000000000..90d131b5ad
--- /dev/null
+++ b/docs/content/docs/cookbook/agent-patterns/human-in-the-loop.mdx
@@ -0,0 +1,278 @@
+---
+title: Human-in-the-Loop
+description: Pause an AI agent to wait for human approval, then resume based on the decision.
+type: guide
+summary: Use defineHook with the tool call ID to suspend an agent for human approval, with an optional timeout.
+---
+
+Use this pattern when an AI agent needs human confirmation before performing a consequential action like booking, purchasing, or publishing. The workflow suspends without consuming resources until the human responds.
+
+## Pattern
+
+Create a typed hook using `defineHook()`. When the agent calls the approval tool, the tool creates a hook instance using the tool call ID as the token, then awaits it. The UI renders approval controls, and an API route resumes the hook with the decision.
+
+### Simplified
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { defineHook, sleep, getWritable } from "workflow";
+import { z } from "zod";
+import type { ModelMessage, UIMessageChunk } from "ai";
+
+export const bookingApprovalHook = defineHook({
+ schema: z.object({
+ approved: z.boolean(),
+ comment: z.string().optional(),
+ }),
+});
+
+declare function confirmBooking(args: { flightId: string; passenger: string }): Promise<{ confirmationId: string }>; // @setup
+
+// This tool runs at the workflow level (no "use step") because hooks are workflow primitives
+async function requestBookingApproval(
+ { flightId, passenger, price }: { flightId: string; passenger: string; price: number },
+ { toolCallId }: { toolCallId: string }
+) {
+ const hook = bookingApprovalHook.create({ token: toolCallId });
+
+ const result = await Promise.race([
+ hook.then((payload) => ({ type: "decision" as const, ...payload })),
+ sleep("24h").then(() => ({ type: "timeout" as const, approved: false })),
+ ]);
+
+ if (result.type === "timeout") return "Booking request expired after 24 hours.";
+ if (!result.approved) return `Booking rejected: ${result.comment || "No reason given"}`;
+
+ const booking = await confirmBooking({ flightId, passenger });
+ return `Booked! Confirmation: ${booking.confirmationId}`;
+}
+
+export async function bookingAgent(messages: ModelMessage[]) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions: "You help book flights. Always request approval before booking.",
+ tools: {
+ requestBookingApproval: {
+ description: "Request human approval before booking a flight",
+ inputSchema: z.object({
+ flightId: z.string(),
+ passenger: z.string(),
+ price: z.number(),
+ }),
+ execute: requestBookingApproval,
+ },
+ },
+ });
+
+ await agent.stream({
+ messages,
+ writable: getWritable(),
+ });
+}
+```
+
+### Full Implementation
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { defineHook, sleep, getWritable } from "workflow";
+import { z } from "zod";
+import type { ModelMessage, UIMessageChunk } from "ai";
+
+// Define the approval hook with schema validation
+export const bookingApprovalHook = defineHook({
+ schema: z.object({
+ approved: z.boolean(),
+ comment: z.string().optional(),
+ }),
+});
+
+// Step: Search for flights (full Node.js access, automatic retries)
+async function searchFlights({
+ from,
+ to,
+ date,
+}: {
+ from: string;
+ to: string;
+ date: string;
+}) {
+ "use step";
+
+ // Your real flight search API call here
+ await new Promise((resolve) => setTimeout(resolve, 500));
+ return {
+ flights: [
+ { id: "FL-100", airline: "Example Air", price: 299, from, to, date },
+ { id: "FL-200", airline: "Demo Airlines", price: 349, from, to, date },
+ ],
+ };
+}
+
+// Step: Confirm the booking after approval
+async function confirmBooking({
+ flightId,
+ passenger,
+}: {
+ flightId: string;
+ passenger: string;
+}) {
+ "use step";
+
+ await new Promise((resolve) => setTimeout(resolve, 500));
+ return { confirmationId: `CONF-${flightId}-${Date.now().toString(36)}` };
+}
+
+// Workflow-level tool: hooks must be created in workflow context, not inside steps
+async function requestBookingApproval(
+ {
+ flightId,
+ passenger,
+ price,
+ }: { flightId: string; passenger: string; price: number },
+ { toolCallId }: { toolCallId: string }
+) {
+ // No "use step" — hooks are workflow-level primitives
+
+ const hook = bookingApprovalHook.create({ token: toolCallId });
+
+ // Race: human approval vs. 24-hour timeout
+ const result = await Promise.race([
+ hook.then((payload) => ({ type: "decision" as const, ...payload })),
+ sleep("24h").then(() => ({ type: "timeout" as const, approved: false })),
+ ]);
+
+ if (result.type === "timeout") {
+ return "Booking request expired after 24 hours.";
+ }
+
+ if (!result.approved) {
+ return `Booking rejected: ${result.comment || "No reason given"}`;
+ }
+
+ // Approved — proceed with booking
+ const booking = await confirmBooking({ flightId, passenger });
+ return `Flight ${flightId} booked for ${passenger}. Confirmation: ${booking.confirmationId}`;
+}
+
+export async function bookingAgent(messages: ModelMessage[]) {
+ "use workflow";
+
+ const writable = getWritable();
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions:
+ "You are a flight booking assistant. Search for flights, then request approval before booking.",
+ tools: {
+ searchFlights: {
+ description: "Search for available flights",
+ inputSchema: z.object({
+ from: z.string().describe("Departure airport code"),
+ to: z.string().describe("Arrival airport code"),
+ date: z.string().describe("Travel date (YYYY-MM-DD)"),
+ }),
+ execute: searchFlights,
+ },
+ requestBookingApproval: {
+ description: "Request human approval before booking a flight",
+ inputSchema: z.object({
+ flightId: z.string().describe("Flight ID to book"),
+ passenger: z.string().describe("Passenger name"),
+ price: z.number().describe("Total price"),
+ }),
+ execute: requestBookingApproval,
+ },
+ },
+ });
+
+ await agent.stream({ messages, writable });
+}
+```
+
+### API Route for Approvals
+
+```typescript lineNumbers
+import { bookingApprovalHook } from "@/workflows/booking-agent";
+
+export async function POST(request: Request) {
+ const { toolCallId, approved, comment } = await request.json();
+
+ // Schema validation happens automatically via defineHook
+ await bookingApprovalHook.resume(toolCallId, { approved, comment });
+
+ return Response.json({ success: true });
+}
+```
+
+### Approval Component
+
+```tsx lineNumbers
+"use client";
+
+import { useState } from "react";
+
+export function BookingApproval({
+ toolCallId,
+ input,
+ output,
+}: {
+ toolCallId: string;
+ input?: { flightId: string; passenger: string; price: number };
+ output?: string;
+}) {
+ const [comment, setComment] = useState("");
+ const [isSubmitting, setIsSubmitting] = useState(false);
+
+ if (output) {
+ return
+ );
+}
+```
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — declares step functions with retries
+- [`defineHook()`](/docs/api-reference/workflow/define-hook) — type-safe hook with schema validation
+- [`sleep()`](/docs/api-reference/workflow/sleep) — durable timeout for approval expiry
+- [`DurableAgent`](/docs/api-reference/workflow-ai/durable-agent) — durable agent with tool definitions
diff --git a/docs/content/docs/cookbook/agent-patterns/meta.json b/docs/content/docs/cookbook/agent-patterns/meta.json
new file mode 100644
index 0000000000..edf26cce5b
--- /dev/null
+++ b/docs/content/docs/cookbook/agent-patterns/meta.json
@@ -0,0 +1,10 @@
+{
+ "title": "Agent Patterns",
+ "pages": [
+ "durable-agent",
+ "tool-streaming",
+ "human-in-the-loop",
+ "tool-orchestration",
+ "stop-workflow"
+ ]
+}
diff --git a/docs/content/docs/cookbook/agent-patterns/stop-workflow.mdx b/docs/content/docs/cookbook/agent-patterns/stop-workflow.mdx
new file mode 100644
index 0000000000..04fe436f16
--- /dev/null
+++ b/docs/content/docs/cookbook/agent-patterns/stop-workflow.mdx
@@ -0,0 +1,216 @@
+---
+title: Stop Workflow
+description: Gracefully cancel a running agent workflow using a hook signal.
+type: guide
+summary: Use a hook as a stop signal to break out of an agent loop and close the stream cleanly.
+---
+
+Use this pattern when you need to gracefully stop a running agent from the outside — for example, a "Stop" button in a chat UI or an admin cancellation endpoint. The workflow listens for a stop signal via a hook while the agent runs, and breaks out of the loop when the signal arrives.
+
+## Pattern
+
+Create a hook with a known token (the run ID). Listen for a stop signal in a non-blocking `.then()`. Check the flag before each agent turn and break if signaled. Close the stream cleanly on exit.
+
+### Simplified
+
+{/* @skip-typecheck - prepareStep stop pattern is conceptual */}
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { defineHook, getWritable, getWorkflowMetadata } from "workflow";
+import { z } from "zod";
+import type { ModelMessage, UIMessageChunk } from "ai";
+
+export const stopHook = defineHook({
+ schema: z.object({ reason: z.string().optional() }),
+});
+
+declare function processQuery(args: { query: string }): Promise; // @setup
+
+export async function stoppableAgent(messages: ModelMessage[]) {
+ "use workflow";
+
+ const { workflowRunId } = getWorkflowMetadata();
+ let stopRequested = false;
+
+ const hook = stopHook.create({ token: `stop:${workflowRunId}` });
+ hook.then(() => { stopRequested = true; });
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ tools: {
+ processQuery: {
+ description: "Process a query",
+ inputSchema: z.object({ query: z.string() }),
+ execute: processQuery,
+ },
+ },
+ });
+
+ const result = await agent.stream({
+ messages,
+ writable: getWritable(),
+ prepareStep: () => {
+ if (stopRequested) return { stop: true };
+ return {};
+ },
+ });
+
+ return { messages: result.messages, stopped: stopRequested };
+}
+```
+
+### Full Implementation
+
+{/* @skip-typecheck - prepareStep stop pattern is conceptual */}
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { defineHook, getWritable, getWorkflowMetadata } from "workflow";
+import { z } from "zod";
+import type { ModelMessage, UIMessageChunk } from "ai";
+
+// Hook to signal the workflow to stop
+export const stopHook = defineHook({
+ schema: z.object({
+ reason: z.string().optional(),
+ }),
+});
+
+// Step: Search the web
+async function searchWeb({ query }: { query: string }) {
+ "use step";
+
+ await new Promise((resolve) => setTimeout(resolve, 1000));
+ return { results: [`Result for "${query}"`] };
+}
+
+// Step: Analyze data
+async function analyzeData({ data }: { data: string }) {
+ "use step";
+
+ await new Promise((resolve) => setTimeout(resolve, 800));
+ return { analysis: `Analysis of: ${data}` };
+}
+
+// Step: Write the final close marker to the stream
+async function closeStream() {
+ "use step";
+
+ const writable = getWritable();
+ const writer = writable.getWriter();
+ try {
+ await writer.write({ type: "finish" } as UIMessageChunk);
+ } finally {
+ writer.releaseLock();
+ }
+ await writable.close();
+}
+
+export async function stoppableAgent(messages: ModelMessage[]) {
+ "use workflow";
+
+ const { workflowRunId } = getWorkflowMetadata();
+ const writable = getWritable();
+
+ // Listen for stop signal using a non-blocking hook
+ let stopRequested = false;
+ let stopReason: string | undefined;
+
+ const hook = stopHook.create({ token: `stop:${workflowRunId}` });
+ hook.then(({ reason }) => {
+ stopRequested = true;
+ stopReason = reason;
+ });
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions: "You are a research assistant. Search and analyze data as needed.",
+ tools: {
+ searchWeb: {
+ description: "Search the web for information",
+ inputSchema: z.object({ query: z.string() }),
+ execute: searchWeb,
+ },
+ analyzeData: {
+ description: "Analyze a piece of data",
+ inputSchema: z.object({ data: z.string() }),
+ execute: analyzeData,
+ },
+ },
+ maxSteps: 20,
+ });
+
+ const result = await agent.stream({
+ messages,
+ writable,
+ preventClose: true,
+ prepareStep: ({ stepNumber }) => {
+ // Check stop flag before each agent step
+ if (stopRequested) {
+ return { stop: true };
+ }
+ return {};
+ },
+ });
+
+ // Clean up: close the stream
+ await closeStream();
+
+ return {
+ messages: result.messages,
+ stopped: stopRequested,
+ stopReason,
+ stepsCompleted: result.steps.length,
+ };
+}
+```
+
+### API Route to Trigger Stop
+
+```typescript lineNumbers
+import { stopHook } from "@/workflows/stoppable-agent";
+
+export async function POST(
+ request: Request,
+ { params }: { params: Promise<{ runId: string }> }
+) {
+ const { runId } = await params;
+ const { reason } = await request.json();
+
+ await stopHook.resume(`stop:${runId}`, {
+ reason: reason || "User requested stop",
+ });
+
+ return Response.json({ success: true });
+}
+```
+
+### Client Stop Button
+
+```tsx lineNumbers
+"use client";
+
+export function StopButton({ runId }: { runId: string }) {
+ const handleStop = async () => {
+ await fetch(`/api/chat/${runId}/stop`, {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ reason: "User clicked stop" }),
+ });
+ };
+
+ return (
+
+ );
+}
+```
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — declares step functions with retries
+- [`defineHook()`](/docs/api-reference/workflow/define-hook) — type-safe hook for the stop signal
+- [`getWorkflowMetadata()`](/docs/api-reference/workflow/get-workflow-metadata) — access the run ID for deterministic hook tokens
+- [`getWritable()`](/docs/api-reference/workflow/get-writable) — stream output and close cleanly on stop
+- [`DurableAgent`](/docs/api-reference/workflow-ai/durable-agent) — `prepareStep` callback to check stop flag before each step
diff --git a/docs/content/docs/cookbook/agent-patterns/tool-orchestration.mdx b/docs/content/docs/cookbook/agent-patterns/tool-orchestration.mdx
new file mode 100644
index 0000000000..9c38975b90
--- /dev/null
+++ b/docs/content/docs/cookbook/agent-patterns/tool-orchestration.mdx
@@ -0,0 +1,255 @@
+---
+title: Tool Orchestration
+description: Choose between step-level and workflow-level tools, or combine both for complex tool implementations.
+type: guide
+summary: Implement tools as steps for retries and I/O, at the workflow level for sleep and hooks, or combine both.
+---
+
+Use this pattern to understand when to implement a tool as a step, at the workflow level, or as a combination. The choice depends on whether the tool needs Node.js I/O (step), workflow primitives like `sleep()` and hooks (workflow level), or both.
+
+## Pattern
+
+Tools marked with `"use step"` get automatic retries and full Node.js access but cannot use `sleep()` or hooks. Tools without `"use step"` run in the workflow context and can use workflow primitives but cannot perform side effects directly. Combine both by having a workflow-level tool call into steps for I/O.
+
+### Step-Level vs Workflow-Level
+
+| Capability | Step (`"use step"`) | Workflow Level |
+|------------|---------------------|----------------|
+| `getWritable()` | Yes | No |
+| Automatic retries | Yes | No |
+| Side effects (fetch, DB) | Yes | No |
+| `sleep()` | No | Yes |
+| `createHook()` / `createWebhook()` | No | Yes |
+
+### Simplified
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { sleep, getWritable } from "workflow";
+import { z } from "zod";
+import type { UIMessageChunk } from "ai";
+
+// Step-level tool: I/O with retries
+async function fetchWeather({ city }: { city: string }) {
+ "use step";
+ const res = await fetch(`https://api.weather.com?city=${city}`);
+ return res.json();
+}
+
+// Workflow-level tool: uses sleep()
+async function scheduleReminder({ delayMs }: { delayMs: number }) {
+ // No "use step" — sleep() requires workflow context
+ await sleep(delayMs);
+ return { message: `Reminder fired after ${delayMs}ms` };
+}
+
+// Combined: workflow-level orchestration calling into steps
+async function fetchWithDelay({ url, delayMs }: { url: string; delayMs: number }) {
+ const result = await doFetch(url); // Step handles I/O
+ await sleep(delayMs); // Workflow handles sleep
+ return result;
+}
+
+async function doFetch(url: string) {
+ "use step";
+ const res = await fetch(url);
+ return res.json();
+}
+
+export async function assistantAgent(userMessage: string) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ tools: {
+ fetchWeather: {
+ description: "Get weather for a city",
+ inputSchema: z.object({ city: z.string() }),
+ execute: fetchWeather,
+ },
+ scheduleReminder: {
+ description: "Set a reminder after a delay",
+ inputSchema: z.object({ delayMs: z.number() }),
+ execute: scheduleReminder,
+ },
+ fetchWithDelay: {
+ description: "Fetch a URL then wait before returning",
+ inputSchema: z.object({ url: z.string(), delayMs: z.number() }),
+ execute: fetchWithDelay,
+ },
+ },
+ });
+
+ await agent.stream({
+ messages: [{ role: "user", content: userMessage }],
+ writable: getWritable(),
+ });
+}
+```
+
+### Full Implementation
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { sleep, createWebhook, getWritable } from "workflow";
+import { z } from "zod";
+import type { UIMessageChunk } from "ai";
+
+// --- Step-level tools: I/O with retries ---
+
+async function searchDatabase({ query }: { query: string }) {
+ "use step";
+
+ const response = await fetch(`https://api.example.com/search?q=${query}`);
+ if (!response.ok) throw new Error(`Search failed: ${response.status}`);
+ return response.json();
+}
+
+async function sendNotification({
+ userId,
+ message,
+}: {
+ userId: string;
+ message: string;
+}) {
+ "use step";
+
+ await fetch("https://api.example.com/notifications", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ userId, message }),
+ });
+ return { sent: true };
+}
+
+// --- Workflow-level tool: uses sleep ---
+
+async function waitThenCheck({
+ delayMs,
+ endpoint,
+}: {
+ delayMs: number;
+ endpoint: string;
+}) {
+ // No "use step" — workflow context needed for sleep()
+ await sleep(delayMs);
+ // Delegate I/O to a step
+ return pollEndpoint(endpoint);
+}
+
+async function pollEndpoint(endpoint: string) {
+ "use step";
+ const res = await fetch(endpoint);
+ return res.json();
+}
+
+// --- Workflow-level tool: uses webhook ---
+
+async function waitForCallback({ description }: { description: string }) {
+ // No "use step" — webhooks are workflow primitives
+ const webhook = createWebhook<{ status: string }>();
+ // Log the URL so external systems can call it
+ console.log(`Waiting for callback at: ${webhook.url}`);
+
+ const result = await Promise.race([
+ webhook.then((req) => req.json()),
+ sleep("1h").then(() => ({ status: "timeout" })),
+ ]);
+
+ return result;
+}
+
+// --- Combined tool: step I/O + workflow sleep + step I/O ---
+
+async function retryWithCooldown({
+ url,
+ maxAttempts,
+}: {
+ url: string;
+ maxAttempts: number;
+}) {
+ for (let i = 0; i < maxAttempts; i++) {
+ const result = await attemptFetch(url);
+ if (result.success) return result;
+ if (i < maxAttempts - 1) {
+ await sleep(`${(i + 1) * 5}s`); // Increasing cooldown between attempts
+ }
+ }
+ return { success: false, error: "All attempts failed" };
+}
+
+async function attemptFetch(url: string) {
+ "use step";
+ try {
+ const res = await fetch(url);
+ if (!res.ok) return { success: false, status: res.status };
+ return { success: true, data: await res.json() };
+ } catch {
+ return { success: false, error: "Network error" };
+ }
+}
+
+export async function orchestrationAgent(userMessage: string) {
+ "use workflow";
+
+ const writable = getWritable();
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions:
+ "You are an assistant with access to search, notifications, polling, callbacks, and retry tools.",
+ tools: {
+ searchDatabase: {
+ description: "Search the database",
+ inputSchema: z.object({ query: z.string() }),
+ execute: searchDatabase,
+ },
+ sendNotification: {
+ description: "Send a notification to a user",
+ inputSchema: z.object({
+ userId: z.string(),
+ message: z.string(),
+ }),
+ execute: sendNotification,
+ },
+ waitThenCheck: {
+ description: "Wait for a duration then check an endpoint",
+ inputSchema: z.object({
+ delayMs: z.number().describe("Milliseconds to wait"),
+ endpoint: z.string().describe("URL to check after waiting"),
+ }),
+ execute: waitThenCheck,
+ },
+ waitForCallback: {
+ description: "Create a webhook and wait for an external system to call it",
+ inputSchema: z.object({
+ description: z.string().describe("What the callback is for"),
+ }),
+ execute: waitForCallback,
+ },
+ retryWithCooldown: {
+ description: "Fetch a URL with retries and increasing cooldown between attempts",
+ inputSchema: z.object({
+ url: z.string(),
+ maxAttempts: z.number().default(3),
+ }),
+ execute: retryWithCooldown,
+ },
+ },
+ });
+
+ await agent.stream({
+ messages: [{ role: "user", content: userMessage }],
+ writable,
+ });
+}
+```
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) — declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) — declares step functions with retries and Node.js access
+- [`sleep()`](/docs/api-reference/workflow/sleep) — durable pause (only in workflow context)
+- [`createWebhook()`](/docs/api-reference/workflow/create-webhook) — wait for external HTTP callbacks (only in workflow context)
+- [`getWritable()`](/docs/api-reference/workflow/get-writable) — stream data from steps
+- [`DurableAgent`](/docs/api-reference/workflow-ai/durable-agent) — agent with mixed step/workflow-level tools
diff --git a/docs/content/docs/cookbook/agent-patterns/tool-streaming.mdx b/docs/content/docs/cookbook/agent-patterns/tool-streaming.mdx
new file mode 100644
index 0000000000..c6c7e752ed
--- /dev/null
+++ b/docs/content/docs/cookbook/agent-patterns/tool-streaming.mdx
@@ -0,0 +1,181 @@
+---
+title: Tool Streaming
+description: Stream real-time progress updates from tools to the UI while they execute.
+type: guide
+summary: Emit custom data parts from step functions to show incremental results during long-running tool calls.
+---
+
+Use this pattern when tools take a long time to execute and you want to show progress updates, intermediate results, or status messages in the UI while the tool is still running.
+
+## Pattern
+
+Inside a step function, call `getWritable()` to write custom data parts to the same stream the agent uses. These appear as typed data parts in the client's message parts array.
+
+### Simplified
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { getWritable } from "workflow";
+import { z } from "zod";
+import type { UIMessageChunk } from "ai";
+
+declare function performSearch(query: string): Promise<{ id: string; title: string }[]>; // @setup
+declare function searchWithProgress(args: { query: string }): Promise; // @setup
+
+export async function searchAgent(userMessage: string) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ tools: {
+ search: {
+ description: "Search for items",
+ inputSchema: z.object({ query: z.string() }),
+ execute: searchWithProgress,
+ },
+ },
+ });
+
+ await agent.stream({
+ messages: [{ role: "user", content: userMessage }],
+ writable: getWritable(),
+ });
+}
+```
+
+### Full Implementation
+
+```typescript lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { getWritable } from "workflow";
+import { z } from "zod";
+import type { UIMessageChunk } from "ai";
+
+// Custom data part type for the client to render
+interface FoundItemDataPart {
+ type: "data-found-item";
+ id: string;
+ data: {
+ title: string;
+ score: number;
+ };
+}
+
+// Step: Search with streaming progress updates
+async function searchWithProgress(
+ { query }: { query: string },
+ { toolCallId }: { toolCallId: string }
+) {
+ "use step";
+
+ const writable = getWritable();
+ const writer = writable.getWriter();
+
+ try {
+ // Simulate finding items one at a time
+ const items = [
+ { title: "Result A", score: 95 },
+ { title: "Result B", score: 87 },
+ { title: "Result C", score: 72 },
+ ];
+
+ for (const item of items) {
+ // Simulate search latency
+ await new Promise((resolve) => setTimeout(resolve, 800));
+
+ // Stream each result to the UI as it's found
+ await writer.write({
+ type: "data-found-item",
+ id: `${toolCallId}-${item.title}`,
+ data: item,
+ } as UIMessageChunk);
+ }
+
+ return {
+ message: `Found ${items.length} results for "${query}"`,
+ items,
+ };
+ } finally {
+ writer.releaseLock();
+ }
+}
+
+// Step: Fetch details for a specific item
+async function getItemDetails({ itemId }: { itemId: string }) {
+ "use step";
+
+ const writable = getWritable();
+ const writer = writable.getWriter();
+
+ try {
+ // Emit a transient progress message
+ await writer.write({
+ type: "data-progress",
+ data: { message: `Loading details for ${itemId}...` },
+ transient: true,
+ } as UIMessageChunk);
+
+ await new Promise((resolve) => setTimeout(resolve, 1000));
+
+ return { itemId, description: "Detailed information", available: true };
+ } finally {
+ writer.releaseLock();
+ }
+}
+
+export async function searchAgent(userMessage: string) {
+ "use workflow";
+
+ const writable = getWritable();
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-haiku-4.5",
+ instructions: "You help users search for items. Use the search tool first, then get details if asked.",
+ tools: {
+ search: {
+ description: "Search for items matching a query",
+ inputSchema: z.object({
+ query: z.string().describe("Search query"),
+ }),
+ execute: searchWithProgress,
+ },
+ getDetails: {
+ description: "Get detailed information about a specific item",
+ inputSchema: z.object({
+ itemId: z.string().describe("Item ID from search results"),
+ }),
+ execute: getItemDetails,
+ },
+ },
+ });
+
+ await agent.stream({
+ messages: [{ role: "user", content: userMessage }],
+ writable,
+ });
+}
+```
+
+### Client Rendering
+
+```tsx lineNumbers
+// In your chat component's message rendering:
+{message.parts.map((part, i) => {
+ if (part.type === "data-found-item") {
+ const item = part.data as { title: string; score: number };
+ return (
+
+
{item.title}
+
Score: {item.score}
+
+ );
+ }
+ // ... other part types
+})}
+```
+
+## Key APIs
+
+- [`"use step"`](/docs/api-reference/workflow/use-step) — step functions can write to the stream
+- [`getWritable()`](/docs/api-reference/workflow/get-writable) — access the run's output stream from inside a step
+- [`DurableAgent`](/docs/api-reference/workflow-ai/durable-agent) — agent streams LLM output to the same writable
diff --git a/docs/content/docs/cookbook/common-patterns/batching.mdx b/docs/content/docs/cookbook/common-patterns/batching.mdx
new file mode 100644
index 0000000000..c60ba5b5d4
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/batching.mdx
@@ -0,0 +1,179 @@
+---
+title: Batching & Parallel Processing
+description: Process large collections in parallel batches with failure isolation between groups.
+type: guide
+summary: Split items into fixed-size batches, process each batch concurrently with Promise.allSettled, and pace batches with sleep to avoid overloading downstream services.
+---
+
+Use batching when you need to process a large list of items in parallel while controlling concurrency. Items are split into fixed-size batches, each batch runs concurrently, and failures in one batch don't affect others.
+
+## When to use this
+
+- Processing hundreds or thousands of items (orders, images, records)
+- Calling rate-limited APIs where you need to control concurrency
+- Any fan-out where you want failure isolation between groups
+
+## Pattern
+
+The workflow splits items into chunks and processes each chunk with `Promise.allSettled()`. A `sleep()` between chunks prevents overloading downstream services.
+
+```typescript
+import { sleep } from "workflow";
+
+declare function processItem(item: string): Promise<{ item: string; ok: boolean }>; // @setup
+
+export async function processBatch(items: string[], batchSize: number = 5) {
+ "use workflow";
+
+ const results = [];
+
+ for (let i = 0; i < items.length; i += batchSize) {
+ const batch = items.slice(i, i + batchSize);
+
+ // Run batch in parallel -- failures are isolated
+ const outcomes = await Promise.allSettled(
+ batch.map((item) => processItem(item))
+ );
+
+ for (let j = 0; j < outcomes.length; j++) {
+ const outcome = outcomes[j];
+ results.push(
+ outcome.status === "fulfilled"
+ ? outcome.value
+ : { item: batch[j], ok: false, error: String(outcome.reason) }
+ );
+ }
+
+ // Pace between batches to avoid overload
+ if (i + batchSize < items.length) {
+ await sleep("1s");
+ }
+ }
+
+ const succeeded = results.filter((r) => r.ok).length;
+ return { total: results.length, succeeded, failed: results.length - succeeded };
+}
+```
+
+### Step function
+
+Each item is processed in its own step, giving it full Node.js access and automatic retries.
+
+```typescript
+async function processItem(item: string): Promise<{ item: string; ok: boolean }> {
+ "use step";
+ const res = await fetch(`https://api.example.com/process`, {
+ method: "POST",
+ body: JSON.stringify({ item }),
+ });
+ if (!res.ok) throw new Error(`Failed to process ${item}`);
+ return { item, ok: true };
+}
+```
+
+## Variations
+
+### Scatter-gather
+
+When you need results from multiple independent sources before continuing, fan out in parallel and collect all results:
+
+```typescript
+export async function scatterGather(query: string) {
+ "use workflow";
+
+ const [web, database, cache] = await Promise.allSettled([
+ searchWeb(query),
+ searchDatabase(query),
+ searchCache(query),
+ ]);
+
+ return {
+ web: web.status === "fulfilled" ? web.value : null,
+ database: database.status === "fulfilled" ? database.value : null,
+ cache: cache.status === "fulfilled" ? cache.value : null,
+ };
+}
+
+async function searchWeb(query: string): Promise {
+ "use step";
+ // Full Node.js access -- call external APIs
+ const res = await fetch(`https://search.example.com?q=${query}`);
+ return res.json();
+}
+
+async function searchDatabase(query: string): Promise {
+ "use step";
+ // Query your database
+ return [`db-result-for-${query}`];
+}
+
+async function searchCache(query: string): Promise {
+ "use step";
+ return [`cached-result-for-${query}`];
+}
+```
+
+## In-step concurrency control
+
+When you need to process many items against a rate-limited API but want the entire operation to be a single atomic step, batch the work inside the step itself. This keeps the event log clean (one step instead of hundreds) while still controlling concurrency.
+
+```typescript
+async function processConcurrently(
+ items: string[],
+ processor: (item: string) => Promise,
+ maxConcurrent: number = 5,
+): Promise {
+ "use step";
+ const results: T[] = [];
+
+ for (let i = 0; i < items.length; i += maxConcurrent) {
+ const batch = items.slice(i, i + maxConcurrent);
+ const batchResults = await Promise.all(batch.map(processor));
+ results.push(...batchResults);
+ }
+
+ return results;
+}
+```
+
+Usage in a workflow:
+
+```typescript
+declare function processConcurrently(items: string[], processor: (item: string) => Promise, maxConcurrent?: number): Promise; // @setup
+
+export async function moderateImages(imageUrls: string[]) {
+ "use workflow";
+
+ const results = await processConcurrently(
+ imageUrls,
+ async (url) => {
+ const res = await fetch("https://api.example.com/moderate", {
+ method: "POST",
+ body: JSON.stringify({ url }),
+ });
+ return res.json();
+ },
+ 3, // max 3 concurrent API calls
+ );
+
+ return { total: results.length, results };
+}
+```
+
+**When to use in-step batching vs workflow-level batching:**
+- **Workflow-level** (the pattern above): Each item is its own step with independent retries and failure isolation. Use when items are independent and individual failures should be retried.
+- **In-step**: All items are processed in one step. Use when the items are tightly coupled (e.g., moderating all thumbnails for a single video) or when you want to minimize step overhead for large item counts.
+
+## Tips
+
+- **Use `Promise.allSettled` over `Promise.all`** when you want to continue even if some items fail. `Promise.all` rejects on the first failure; `allSettled` waits for everything and tells you what failed.
+- **Tune batch size to your downstream API limits.** If the API allows 10 concurrent requests, use `batchSize: 10`.
+- **Add pacing with `sleep()`** between batches to respect rate limits. The sleep is durable -- it survives cold starts.
+- **Each `processItem` call is an independent step.** If one fails, it retries up to 3 times without affecting other items in the batch.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/foundations/workflows-and-steps) -- marks the orchestrator function
+- [`"use step"`](/docs/foundations/workflows-and-steps) -- marks functions that run with full Node.js access
+- [`sleep()`](/docs/api-reference/workflow/sleep) -- pacing delay between batches
+- [`Promise.allSettled()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/allSettled) -- runs items in parallel, isolating failures
diff --git a/docs/content/docs/cookbook/common-patterns/child-workflows.mdx b/docs/content/docs/cookbook/common-patterns/child-workflows.mdx
new file mode 100644
index 0000000000..7f87e6d856
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/child-workflows.mdx
@@ -0,0 +1,372 @@
+---
+title: Child Workflows
+description: Spawn child workflows from a parent and poll their progress for batch processing, report generation, and other multi-workflow orchestration scenarios.
+type: guide
+summary: Orchestrate independent child workflows from a parent workflow using start(), sleep(), and getRun() to fan out work with isolated failure boundaries.
+---
+
+Use child workflows when a single workflow needs to orchestrate many independent units of work. Each child runs as its own workflow with a separate event log, retry boundary, and failure scope -- if one child fails, it doesn't take down the parent or siblings.
+
+## When to use child workflows
+
+Child workflows are the right choice when:
+
+- **Work units are independent.** Each child can run without knowing about the others (e.g., processing individual documents, generating separate reports).
+- **You need isolated failure boundaries.** A failing child should not abort unrelated work. The parent decides how to handle failures.
+- **You want massive fan-out.** Spawning 50 or 500 children is practical because each runs on its own infrastructure.
+- **You need per-item observability.** Each child workflow has its own run ID, status, and event log for monitoring.
+
+For simpler cases where steps share a single event log, use [direct await composition](/docs/foundations/common-patterns#direct-await-flattening) instead.
+
+## Basic pattern: spawn and poll
+
+The core pattern has three parts:
+
+1. A **step** that calls `start()` to spawn a child workflow and returns the run ID
+2. A **polling loop** in the parent workflow that checks child status with `getRun()`
+3. A **step** that retrieves the child's return value once it completes
+
+```typescript
+import { sleep } from "workflow";
+import { getRun, start } from "workflow/api";
+
+declare function pollUntilComplete(runIds: string[]): Promise; // @setup
+declare function collectResults(runIds: string[]): Promise>; // @setup
+
+// Child workflow -- processes a single document
+export async function processDocument(documentId: string) {
+ "use workflow";
+
+ const content = await fetchDocument(documentId);
+ const analysis = await analyzeContent(content);
+ const summary = await generateSummary(analysis);
+
+ return { documentId, summary };
+}
+
+async function fetchDocument(documentId: string): Promise {
+ "use step";
+ const res = await fetch(`https://docs.example.com/api/${documentId}`);
+ return res.text();
+}
+
+async function analyzeContent(content: string): Promise {
+ "use step";
+ // Call analysis API
+ return `analysis of ${content.length} chars`;
+}
+
+async function generateSummary(analysis: string): Promise {
+ "use step";
+ // Generate summary from analysis
+ return `Summary: ${analysis}`;
+}
+
+// Parent workflow -- orchestrates document processing
+export async function processDocumentBatch(documentIds: string[]) {
+ "use workflow";
+
+ // Spawn a child workflow for each document
+ const runIds = await spawnChildren(documentIds);
+
+ // Poll until all children complete
+ await pollUntilComplete(runIds);
+
+ // Collect results
+ const results = await collectResults(runIds);
+
+ return { processed: results.length, results };
+}
+
+async function spawnChildren(
+ documentIds: string[]
+): Promise {
+ "use step"; // [!code highlight]
+
+ const runIds: string[] = [];
+ for (const docId of documentIds) {
+ const run = await start(processDocument, [docId]); // [!code highlight]
+ runIds.push(run.runId);
+ }
+ return runIds;
+}
+```
+
+### Polling loop
+
+The parent workflow polls child statuses in a loop, sleeping between checks. This is durable -- if the parent replays, the sleep and status checks replay from the event log.
+
+```typescript
+import { sleep } from "workflow";
+import { getRun } from "workflow/api";
+
+const POLL_INTERVAL = "30s";
+const MAX_POLL_ITERATIONS = 120; // 60 minutes at 30s intervals
+
+async function pollUntilComplete(runIds: string[]): Promise {
+ let iteration = 0;
+
+ while (iteration < MAX_POLL_ITERATIONS) {
+ const status = await checkStatuses(runIds); // [!code highlight]
+
+ if (status.running === 0) {
+ if (status.failed > 0) {
+ throw new Error(
+ `${status.failed} of ${runIds.length} children failed`
+ );
+ }
+ return; // All completed successfully
+ }
+
+ iteration += 1;
+ await sleep(POLL_INTERVAL); // [!code highlight]
+ }
+
+ throw new Error("Timed out waiting for children to complete");
+}
+
+async function checkStatuses(
+ runIds: string[]
+): Promise<{ running: number; completed: number; failed: number }> {
+ "use step"; // [!code highlight]
+
+ let running = 0;
+ let completed = 0;
+ let failed = 0;
+
+ for (const runId of runIds) {
+ const run = getRun(runId); // [!code highlight]
+ const status = await run.status; // [!code highlight]
+
+ if (status === "completed") completed += 1;
+ else if (status === "failed" || status === "cancelled") failed += 1;
+ else running += 1; // queued, starting, running
+ }
+
+ return { running, completed, failed };
+}
+
+async function collectResults(
+ runIds: string[]
+): Promise> {
+ "use step";
+
+ const results = [];
+ for (const runId of runIds) {
+ const run = getRun(runId);
+ const value = await run.returnValue;
+ results.push(value as { documentId: string; summary: string });
+ }
+ return results;
+}
+```
+
+## Fan-out pattern: chunked spawning
+
+When spawning hundreds of children, batch the `start()` calls to avoid overwhelming the system. Use multiple spawn steps, each launching a chunk of children.
+
+```typescript
+import { start } from "workflow/api";
+
+declare function pollUntilComplete(runIds: string[]): Promise; // @setup
+
+const CHUNK_SIZE = 10;
+
+export async function largeReportBatch(reportConfigs: Array<{ id: string; query: string }>) {
+ "use workflow";
+
+ // Spawn children in chunks
+ const allRunIds: string[] = [];
+ for (let i = 0; i < reportConfigs.length; i += CHUNK_SIZE) {
+ const chunk = reportConfigs.slice(i, i + CHUNK_SIZE);
+ const runIds = await spawnReportChunk(chunk); // [!code highlight]
+ allRunIds.push(...runIds);
+ }
+
+ // Poll until all complete
+ await pollUntilComplete(allRunIds);
+
+ const results = await collectReportResults(allRunIds);
+ return { total: results.length, results };
+}
+
+async function spawnReportChunk(
+ configs: Array<{ id: string; query: string }>
+): Promise {
+ "use step";
+
+ const runIds: string[] = [];
+ for (const config of configs) {
+ const run = await start(generateReport, [config.id, config.query]);
+ runIds.push(run.runId);
+ }
+ return runIds;
+}
+
+async function generateReport(reportId: string, query: string) {
+ "use workflow";
+
+ const data = await queryDatabase(reportId, query);
+ const formatted = await formatReport(reportId, data);
+ return { reportId, formatted };
+}
+
+declare function queryDatabase(reportId: string, query: string): Promise; // @setup
+declare function formatReport(reportId: string, data: string): Promise; // @setup
+
+declare function collectReportResults(
+ runIds: string[]
+): Promise>; // @setup
+```
+
+## Error handling
+
+### Tolerating partial failures
+
+Not every batch requires 100% success. Use `allowFailures` logic to let the parent continue when some children fail, while still surfacing the failures.
+
+```typescript
+import { sleep } from "workflow";
+import { getRun } from "workflow/api";
+
+const POLL_INTERVAL = "30s";
+const MAX_POLL_ITERATIONS = 120;
+
+async function pollWithPartialFailures(
+ runIds: string[],
+ maxFailureRate: number
+): Promise<{ completed: string[]; failed: string[] }> {
+ let iteration = 0;
+ const completedIds: string[] = [];
+ const failedIds: string[] = [];
+
+ while (iteration < MAX_POLL_ITERATIONS) {
+ const status = await checkDetailedStatuses(runIds);
+
+ completedIds.length = 0;
+ failedIds.length = 0;
+
+ for (const entry of status) {
+ if (entry.status === "completed") completedIds.push(entry.runId);
+ else if (entry.status === "failed" || entry.status === "cancelled")
+ failedIds.push(entry.runId);
+ }
+
+ const active = runIds.length - completedIds.length - failedIds.length;
+
+ // Check if failure rate exceeds threshold
+ const failureRate = failedIds.length / Math.max(1, runIds.length); // [!code highlight]
+ if (failureRate > maxFailureRate) { // [!code highlight]
+ throw new Error( // [!code highlight]
+ `Failure rate ${(failureRate * 100).toFixed(1)}% exceeds ` + // [!code highlight]
+ `threshold of ${(maxFailureRate * 100).toFixed(1)}%` // [!code highlight]
+ ); // [!code highlight]
+ } // [!code highlight]
+
+ if (active === 0) {
+ return { completed: completedIds, failed: failedIds };
+ }
+
+ iteration += 1;
+ await sleep(POLL_INTERVAL);
+ }
+
+ throw new Error("Timed out waiting for children");
+}
+
+async function checkDetailedStatuses(
+ runIds: string[]
+): Promise> {
+ "use step";
+
+ const statuses = [];
+ for (const runId of runIds) {
+ const run = getRun(runId);
+ const status = await run.status;
+ statuses.push({ runId, status });
+ }
+ return statuses;
+}
+```
+
+### Retrying failed children
+
+When a child fails, the parent can spawn a replacement and continue polling. Track restart counts to prevent infinite retry loops.
+
+```typescript
+import { sleep } from "workflow";
+
+declare function checkDetailedStatuses(runIds: string[]): Promise>; // @setup
+
+const POLL_INTERVAL = "30s";
+const MAX_POLL_ITERATIONS = 120;
+
+async function pollWithRetries(
+ initialRunIds: string[],
+ maxRestartsPerChild: number,
+ spawnReplacement: (index: number) => Promise
+): Promise {
+ const activeRuns = new Map();
+ const restartCounts = new Map();
+
+ initialRunIds.forEach((runId, index) => activeRuns.set(index, runId));
+
+ let iteration = 0;
+
+ while (iteration < MAX_POLL_ITERATIONS) {
+ const statuses = await checkDetailedStatuses(
+ Array.from(activeRuns.values())
+ );
+ const statusByRunId = new Map(
+ statuses.map((s) => [s.runId, s.status])
+ );
+
+ for (const [index, runId] of activeRuns.entries()) {
+ const status = statusByRunId.get(runId) ?? "running";
+
+ if (status === "completed") {
+ activeRuns.delete(index);
+ continue;
+ }
+
+ if (status === "failed" || status === "cancelled") {
+ const restarts = (restartCounts.get(index) ?? 0) + 1; // [!code highlight]
+ restartCounts.set(index, restarts); // [!code highlight]
+
+ if (restarts > maxRestartsPerChild) { // [!code highlight]
+ throw new Error( // [!code highlight]
+ `Child ${index} exceeded restart limit (${maxRestartsPerChild})` // [!code highlight]
+ ); // [!code highlight]
+ } // [!code highlight]
+
+ const newRunId = await spawnReplacement(index); // [!code highlight]
+ activeRuns.set(index, newRunId); // [!code highlight]
+ }
+ }
+
+ if (activeRuns.size === 0) return;
+
+ iteration += 1;
+ await sleep(POLL_INTERVAL);
+ }
+
+ throw new Error("Timed out waiting for children");
+}
+```
+
+## Tips
+
+- **`start()` must be called from a step**, not directly from a workflow function. Wrap it in a `"use step"` function.
+- **`getRun()` must also be called from a step.** The polling loop lives in the workflow, but the actual status check is a step.
+- **Set a max iteration count on polling loops** to prevent runaway workflows. Calculate the count from your expected max duration and poll interval.
+- **Use chunked spawning for large batches.** Spawning 500 children in a single step can time out. Break it into chunks of 10-50.
+- **Each child has its own retry semantics.** Steps inside child workflows retry independently. The parent only sees the child's final status.
+- **Use `deploymentId: "latest"`** if children should run on the most recent deployment. See the [`start()` API reference](/docs/api-reference/workflow-api/start#using-deploymentid-latest) for compatibility considerations.
+
+## Key APIs
+
+- [`start()`](/docs/api-reference/workflow-api/start) -- spawn a new workflow run and get its run ID
+- [`getRun()`](/docs/api-reference/workflow-api/get-run) -- retrieve a workflow run's status and return value
+- [`sleep()`](/docs/api-reference/workflow/sleep) -- durably pause between polling iterations
+- [`"use workflow"`](/docs/foundations/workflows-and-steps) -- marks the orchestrator function
+- [`"use step"`](/docs/foundations/workflows-and-steps) -- marks functions with full Node.js access
diff --git a/docs/content/docs/cookbook/common-patterns/content-router.mdx b/docs/content/docs/cookbook/common-patterns/content-router.mdx
new file mode 100644
index 0000000000..c7997f35c9
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/content-router.mdx
@@ -0,0 +1,207 @@
+---
+title: Conditional Routing
+description: Inspect a payload and route it to different step handlers based on its content.
+type: guide
+summary: Classify incoming messages and branch to specialized handlers using standard if/else logic in the workflow function.
+---
+
+Use conditional routing when incoming messages need different processing paths depending on their content. A support ticket about billing goes to the billing handler; a bug report goes to engineering. The workflow inspects the payload and branches with standard JavaScript control flow.
+
+## When to use this
+
+- Support ticket routing by category
+- Order processing with different flows per product type
+- Event handling where different event types need different logic
+- Any message-driven system where the handler depends on the content
+
+## Pattern: Content-based router
+
+The workflow classifies the input, then branches with `if`/`else` to call the appropriate step:
+
+```typescript
+declare function classifyTicket(ticketId: string, subject: string): Promise<{ ticketType: string }>; // @setup
+declare function handleBilling(ticketId: string): Promise; // @setup
+declare function handleTechnical(ticketId: string): Promise; // @setup
+declare function handleAccount(ticketId: string): Promise; // @setup
+declare function handleFeedback(ticketId: string): Promise; // @setup
+
+export async function routeTicket(ticketId: string, subject: string) {
+ "use workflow";
+
+ const { ticketType } = await classifyTicket(ticketId, subject);
+
+ if (ticketType === "billing") {
+ await handleBilling(ticketId);
+ } else if (ticketType === "technical") {
+ await handleTechnical(ticketId);
+ } else if (ticketType === "account") {
+ await handleAccount(ticketId);
+ } else {
+ await handleFeedback(ticketId);
+ }
+
+ return { ticketId, routedTo: ticketType };
+}
+```
+
+### Step functions
+
+Each handler is a separate `"use step"` function. The classification step can use an LLM, keyword matching, or any logic you need:
+
+```typescript
+async function classifyTicket(
+ ticketId: string,
+ subject: string
+): Promise<{ ticketType: string }> {
+ "use step";
+
+ // Example: simple keyword classification
+ // In production, this could call an LLM or ML model
+ const lower = subject.toLowerCase();
+ if (lower.includes("invoice") || lower.includes("charge") || lower.includes("refund")) {
+ return { ticketType: "billing" };
+ }
+ if (lower.includes("error") || lower.includes("bug") || lower.includes("crash")) {
+ return { ticketType: "technical" };
+ }
+ if (lower.includes("password") || lower.includes("login") || lower.includes("access")) {
+ return { ticketType: "account" };
+ }
+ return { ticketType: "feedback" };
+}
+
+async function handleBilling(ticketId: string): Promise {
+ "use step";
+ // Look up billing records, process refund, etc.
+}
+
+async function handleTechnical(ticketId: string): Promise {
+ "use step";
+ // Create bug report, notify engineering, etc.
+}
+
+async function handleAccount(ticketId: string): Promise {
+ "use step";
+ // Reset password, update permissions, etc.
+}
+
+async function handleFeedback(ticketId: string): Promise {
+ "use step";
+ // Log feedback, notify product team, etc.
+}
+```
+
+## Pattern: Enrichment before routing
+
+When downstream handlers need more context than the raw input provides, enrich the message in parallel before routing:
+
+```typescript
+export async function enrichAndRoute(email: string) {
+ "use workflow";
+
+ // Step 1: Look up base data
+ const contact = await lookupContact(email);
+
+ // Step 2: Enrich from multiple sources in parallel
+ const [crm, social] = await Promise.allSettled([
+ fetchCrmData(contact),
+ fetchSocialData(contact),
+ ]);
+
+ const enriched = {
+ ...contact,
+ crm: crm.status === "fulfilled" ? crm.value : null,
+ social: social.status === "fulfilled" ? social.value : null,
+ };
+
+ // Step 3: Route based on enriched data
+ if (enriched.crm?.segment === "enterprise") {
+ await routeToEnterpriseSales(enriched);
+ } else {
+ await routeToSelfServe(enriched);
+ }
+
+ return { email, segment: enriched.crm?.segment ?? "self-serve" };
+}
+
+async function lookupContact(email: string): Promise<{ email: string; domain: string }> {
+ "use step";
+ return { email, domain: email.split("@")[1] ?? "unknown" };
+}
+
+async function fetchCrmData(contact: { email: string }): Promise<{ segment: string }> {
+ "use step";
+ const res = await fetch(`https://crm.example.com/lookup?email=${contact.email}`);
+ return res.json();
+}
+
+async function fetchSocialData(contact: { email: string }): Promise<{ followers: number }> {
+ "use step";
+ const res = await fetch(`https://social.example.com/lookup?email=${contact.email}`);
+ return res.json();
+}
+
+async function routeToEnterpriseSales(enriched: unknown): Promise {
+ "use step";
+ // Assign to enterprise sales team
+}
+
+async function routeToSelfServe(enriched: unknown): Promise {
+ "use step";
+ // Add to self-serve onboarding flow
+}
+```
+
+## Pattern: Multiple event sources
+
+When a workflow must wait for signals from different systems before proceeding, create one hook per source and use `Promise.all` with a deadline:
+
+```typescript
+import { defineHook, sleep } from "workflow";
+
+export const orderSignal = defineHook<{ ok: true }>();
+
+const SIGNALS = ["payment", "inventory", "fraud"] as const;
+
+export async function waitForAllSignals(orderId: string) {
+ "use workflow";
+
+ const hooks = SIGNALS.map((kind) =>
+ orderSignal.create({ token: `${kind}:${orderId}` })
+ );
+
+ const outcome = await Promise.race([
+ Promise.all(hooks).then(() => ({ type: "ready" as const })),
+ sleep("5m").then(() => ({ type: "timeout" as const })),
+ ]);
+
+ if (outcome.type === "timeout") {
+ return { orderId, status: "timeout" };
+ }
+
+ await shipOrder(orderId);
+ return { orderId, status: "shipped" };
+}
+
+async function shipOrder(orderId: string): Promise {
+ "use step";
+ await fetch(`https://shipping.example.com/ship`, {
+ method: "POST",
+ body: JSON.stringify({ orderId }),
+ });
+}
+```
+
+## Tips
+
+- **Workflow functions use standard JavaScript.** `if`/`else`, `switch`, ternaries -- any branching logic works. No special routing DSL needed.
+- **Each handler is an independent step.** This means each gets its own retries, its own error handling, and its own entry in the event log.
+- **Combine with enrichment** when downstream handlers need data from multiple sources. Fan out enrichment with `Promise.allSettled`, then route on the merged result.
+- **Use `defineHook` for event gateways** when the routing decision depends on external signals arriving asynchronously.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) -- marks the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) -- marks each handler as a durable step
+- [`defineHook()`](/docs/api-reference/workflow/define-hook) -- creates hooks for event gateway patterns
+- [`sleep()`](/docs/api-reference/workflow/sleep) -- durable deadline for event gateways
diff --git a/docs/content/docs/cookbook/common-patterns/fan-out.mdx b/docs/content/docs/cookbook/common-patterns/fan-out.mdx
new file mode 100644
index 0000000000..59457725a0
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/fan-out.mdx
@@ -0,0 +1,208 @@
+---
+title: Fan-Out & Parallel Delivery
+description: Send a message to multiple channels or recipients in parallel with independent failure handling.
+type: guide
+summary: Fan out an incident alert to Slack, email, SMS, and PagerDuty simultaneously using Promise.allSettled, so a failure in one channel does not block the others.
+---
+
+Use fan-out when one event needs to trigger multiple independent actions in parallel. Each action runs as its own step, so failures are isolated -- a Slack outage doesn't prevent the email from sending.
+
+## When to use this
+
+- Incident alerting across multiple channels (Slack, email, SMS, PagerDuty)
+- Notifying a list of recipients determined at runtime
+- Any "broadcast" where each delivery is independent
+
+## Pattern: Static fan-out
+
+Define one step per channel and launch them all with `Promise.allSettled()`:
+
+```typescript
+declare function sendSlackAlert(incidentId: string, message: string): Promise; // @setup
+declare function sendEmailAlert(incidentId: string, message: string): Promise; // @setup
+declare function sendSmsAlert(incidentId: string, message: string): Promise; // @setup
+declare function sendPagerDutyAlert(incidentId: string, message: string): Promise; // @setup
+
+export async function incidentFanOut(incidentId: string, message: string) {
+ "use workflow";
+
+ const settled = await Promise.allSettled([
+ sendSlackAlert(incidentId, message),
+ sendEmailAlert(incidentId, message),
+ sendSmsAlert(incidentId, message),
+ sendPagerDutyAlert(incidentId, message),
+ ]);
+
+ const ok = settled.filter((r) => r.status === "fulfilled").length;
+ return { incidentId, delivered: ok, failed: settled.length - ok };
+}
+```
+
+### Step functions
+
+Each channel is a separate `"use step"` function. Steps have full Node.js access and retry automatically on transient failures.
+
+```typescript
+async function sendSlackAlert(incidentId: string, message: string) {
+ "use step";
+ await fetch("https://hooks.slack.com/services/T.../B.../xxx", {
+ method: "POST",
+ body: JSON.stringify({ text: `[${incidentId}] ${message}` }),
+ });
+ return { channel: "slack" };
+}
+
+async function sendEmailAlert(incidentId: string, message: string) {
+ "use step";
+ await fetch("https://api.sendgrid.com/v3/mail/send", {
+ method: "POST",
+ headers: { Authorization: `Bearer ${process.env.SENDGRID_KEY}` },
+ body: JSON.stringify({
+ to: [{ email: "oncall@example.com" }],
+ subject: `Incident ${incidentId}`,
+ content: [{ type: "text/plain", value: message }],
+ }),
+ });
+ return { channel: "email" };
+}
+
+async function sendSmsAlert(incidentId: string, message: string) {
+ "use step";
+ // Call Twilio or similar SMS provider
+ return { channel: "sms" };
+}
+
+async function sendPagerDutyAlert(incidentId: string, message: string) {
+ "use step";
+ // Call PagerDuty Events API
+ return { channel: "pagerduty" };
+}
+```
+
+## Pattern: Dynamic recipient list
+
+When recipients are determined at runtime (e.g., severity-based routing), build the list dynamically:
+
+```typescript
+type Severity = "info" | "warning" | "critical";
+
+const RULES = [
+ { channel: "slack", match: () => true },
+ { channel: "email", match: (s: Severity) => s === "warning" || s === "critical" },
+ { channel: "pagerduty", match: (s: Severity) => s === "critical" },
+];
+
+export async function alertByRecipientList(
+ alertId: string,
+ message: string,
+ severity: Severity
+) {
+ "use workflow";
+
+ const matched = RULES.filter((r) => r.match(severity)).map((r) => r.channel);
+
+ const settled = await Promise.allSettled(
+ matched.map((channel) => deliverToChannel(channel, alertId, message))
+ );
+
+ const delivered = settled.filter((r) => r.status === "fulfilled").length;
+ return { alertId, severity, matched, delivered, failed: matched.length - delivered };
+}
+
+async function deliverToChannel(
+ channel: string,
+ alertId: string,
+ message: string
+): Promise {
+ "use step";
+ // Route to the appropriate API based on channel name
+ await fetch(`https://notifications.example.com/${channel}`, {
+ method: "POST",
+ body: JSON.stringify({ alertId, message }),
+ });
+}
+```
+
+## Pattern: Publish-subscribe
+
+When subscribers are managed in a registry and filtered by topic:
+
+```typescript
+type Subscriber = { id: string; name: string; topics: string[] };
+
+export async function publishEvent(topic: string, payload: string) {
+ "use workflow";
+
+ const subscribers = await loadSubscribers();
+ const matched = subscribers.filter((sub) => sub.topics.includes(topic));
+
+ await Promise.allSettled(
+ matched.map((sub) => deliverToSubscriber(sub.id, topic, payload))
+ );
+
+ return { topic, delivered: matched.length, total: subscribers.length };
+}
+
+async function loadSubscribers(): Promise {
+ "use step";
+ // Load from database or configuration service
+ return [
+ { id: "sub-1", name: "Order Service", topics: ["orders", "inventory"] },
+ { id: "sub-2", name: "Email Notifier", topics: ["orders", "shipping"] },
+ { id: "sub-3", name: "Analytics", topics: ["orders", "inventory", "shipping"] },
+ ];
+}
+
+async function deliverToSubscriber(
+ subscriberId: string,
+ topic: string,
+ payload: string
+): Promise {
+ "use step";
+ await fetch(`https://subscribers.example.com/${subscriberId}/deliver`, {
+ method: "POST",
+ body: JSON.stringify({ topic, payload }),
+ });
+}
+```
+
+## Deferred await (background steps)
+
+You don't have to await a step immediately. Start a step, do other work, and collect the result later. This is different from `Promise.all` -- you interleave sequential and parallel work instead of waiting for everything at once.
+
+```typescript
+declare function generateReport(data: Record): Promise; // @setup
+declare function sendNotification(userId: string, message: string): Promise; // @setup
+declare function updateDashboard(userId: string): Promise; // @setup
+
+export async function onboardUser(userId: string, data: Record) {
+ "use workflow";
+
+ // Start report generation in the background
+ const reportPromise = generateReport(data);
+
+ // Do other work while the report generates
+ await sendNotification(userId, "Processing started");
+ await updateDashboard(userId);
+
+ // Now await the report when we actually need it
+ const report = await reportPromise;
+ return { userId, report };
+}
+```
+
+The workflow runtime tracks the background step like any other. If the workflow replays, the already-completed step returns its cached result instantly.
+
+## Tips
+
+- **Use `Promise.allSettled` over `Promise.all`.** `allSettled` lets you know which channels failed without aborting the others.
+- **Each delivery is an independent step.** Transient failures (e.g., Slack 503) trigger automatic retries without affecting other channels.
+- **Use `FatalError` for permanent failures** (e.g., PagerDuty not configured) to stop retries on that channel while letting others continue.
+- **Dynamic recipient lists** decouple routing from delivery -- adding a new channel is a configuration change, not a code change.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/foundations/workflows-and-steps) -- marks the orchestrator function
+- [`"use step"`](/docs/foundations/workflows-and-steps) -- marks functions that run with full Node.js access
+- [`Promise.allSettled()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/allSettled) -- fans out to all targets, isolating failures
+- [`FatalError`](/docs/api-reference/workflow/fatal-error) -- prevents automatic retry for permanent failures
diff --git a/docs/content/docs/cookbook/common-patterns/idempotency.mdx b/docs/content/docs/cookbook/common-patterns/idempotency.mdx
new file mode 100644
index 0000000000..4983177780
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/idempotency.mdx
@@ -0,0 +1,134 @@
+---
+title: Idempotency
+description: Ensure external side effects happen exactly once, even when steps are retried or workflows are replayed.
+type: guide
+summary: Use step IDs as idempotency keys for external APIs like Stripe so that retries and replays don't create duplicate charges.
+---
+
+Workflow steps can be retried (on failure) and replayed (on cold start). If a step calls an external API that isn't idempotent, retries could create duplicate charges, send duplicate emails, or double-process records. Use idempotency keys to make these operations safe.
+
+## When to use this
+
+- Charging a payment (Stripe, PayPal)
+- Sending transactional emails or SMS
+- Creating records in external systems where duplicates are harmful
+- Any step that has side effects in systems you don't control
+
+## Pattern: Step ID as idempotency key
+
+Every step has a unique, deterministic `stepId` available via `getStepMetadata()`. Pass this as the idempotency key to external APIs:
+
+```typescript
+import { getStepMetadata } from "workflow";
+
+declare function createCharge(customerId: string, amount: number): Promise<{ id: string }>; // @setup
+declare function sendReceipt(customerId: string, chargeId: string): Promise; // @setup
+
+export async function chargeCustomer(customerId: string, amount: number) {
+ "use workflow";
+
+ const charge = await createCharge(customerId, amount);
+ await sendReceipt(customerId, charge.id);
+
+ return { customerId, chargeId: charge.id, status: "completed" };
+}
+```
+
+### Step function with idempotency key
+
+```typescript
+import { getStepMetadata } from "workflow";
+
+async function createCharge(
+ customerId: string,
+ amount: number
+): Promise<{ id: string }> {
+ "use step";
+
+ const { stepId } = getStepMetadata();
+
+ // Stripe uses the idempotency key to deduplicate requests.
+ // If this step is retried, Stripe returns the same charge.
+ const charge = await fetch("https://api.stripe.com/v1/charges", {
+ method: "POST",
+ headers: {
+ Authorization: `Bearer ${process.env.STRIPE_SECRET_KEY}`,
+ "Idempotency-Key": stepId,
+ },
+ body: new URLSearchParams({
+ amount: String(amount),
+ currency: "usd",
+ customer: customerId,
+ }),
+ });
+
+ if (!charge.ok) {
+ const error = await charge.json();
+ throw new Error(`Charge failed: ${error.message}`);
+ }
+
+ return charge.json();
+}
+
+async function sendReceipt(customerId: string, chargeId: string): Promise {
+ "use step";
+
+ const { stepId } = getStepMetadata();
+
+ await fetch("https://api.example.com/receipts", {
+ method: "POST",
+ headers: { "Idempotency-Key": stepId },
+ body: JSON.stringify({ customerId, chargeId }),
+ });
+}
+```
+
+## Pattern: Workflow-level deduplication
+
+Use the workflow `runId` as a natural deduplication key. Start workflows with a deterministic ID so re-triggering the same event doesn't create a second run:
+
+{/* @skip-typecheck: start() doesn't support id option yet -- aspirational API */}
+
+```typescript
+import { start } from "workflow/api";
+
+// POST /api/webhooks/stripe
+export async function POST(request: Request) {
+ const event = await request.json();
+
+ // Use the Stripe event ID as the workflow run ID.
+ // If this webhook is delivered twice, the second start()
+ // returns the existing run instead of creating a new one.
+ const run = await start({
+ id: `stripe-${event.id}`,
+ workflow: processStripeEvent,
+ input: event,
+ });
+
+ return Response.json({ runId: run.id });
+}
+```
+
+## Race condition caveats
+
+Workflow does not currently provide distributed locking or true exactly-once delivery across concurrent runs. If two workflow runs could process the same entity concurrently:
+
+- **Rely on the external API's idempotency** (like Stripe's `Idempotency-Key`) rather than checking a local flag.
+- **Don't use check-then-act patterns** like "read a flag, then write if not set" -- another run could read the same flag between your read and write.
+- **Use deterministic workflow IDs** to prevent duplicate runs from the same trigger event.
+
+If your external API doesn't support idempotency keys natively, consider adding a deduplication layer (e.g., a database unique constraint on the operation ID).
+
+## Tips
+
+- **`stepId` is deterministic.** It's the same value across retries and replays of the same step, making it a reliable idempotency key.
+- **Always provide idempotency keys for non-idempotent external calls.** Even if you think a step won't be retried, cold-start replay will re-execute it.
+- **Handle 409/conflict as success.** If an external API returns "already processed," treat that as a successful result, not an error.
+- **Make your own APIs idempotent** where possible. Accept an idempotency key and return the cached result on duplicate requests.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) -- declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) -- declares step functions with full Node.js access
+- [`getStepMetadata()`](/docs/api-reference/step/get-step-metadata) -- provides the deterministic `stepId` for idempotency keys
+- [`start()`](/docs/api-reference/workflow-api/start) -- starts a workflow with an optional deterministic ID
diff --git a/docs/content/docs/cookbook/common-patterns/meta.json b/docs/content/docs/cookbook/common-patterns/meta.json
new file mode 100644
index 0000000000..4da9959546
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/meta.json
@@ -0,0 +1,15 @@
+{
+ "title": "Common Patterns",
+ "defaultOpen": true,
+ "pages": [
+ "saga",
+ "batching",
+ "rate-limiting",
+ "fan-out",
+ "scheduling",
+ "idempotency",
+ "webhooks",
+ "content-router",
+ "child-workflows"
+ ]
+}
diff --git a/docs/content/docs/cookbook/common-patterns/rate-limiting.mdx b/docs/content/docs/cookbook/common-patterns/rate-limiting.mdx
new file mode 100644
index 0000000000..bff77fd8da
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/rate-limiting.mdx
@@ -0,0 +1,228 @@
+---
+title: Rate Limiting & Retries
+description: Handle 429 responses and transient failures with RetryableError and exponential backoff.
+type: guide
+summary: When an external API returns 429, throw RetryableError with the Retry-After value so the workflow runtime automatically reschedules the step after the specified delay.
+---
+
+Use this pattern when calling external APIs that enforce rate limits. Instead of writing manual retry loops, throw `RetryableError` with a `retryAfter` value and let the workflow runtime handle rescheduling.
+
+## When to use this
+
+- Calling APIs that return 429 (Too Many Requests) with `Retry-After` headers
+- Any step that hits transient failures and needs backoff
+- Syncing data with third-party services (Stripe, CRMs, scrapers)
+
+## Pattern: RetryableError with Retry-After
+
+A step function calls an external API. On 429, it reads the `Retry-After` header and throws `RetryableError`. The runtime reschedules the step automatically.
+
+```typescript
+import { RetryableError } from "workflow";
+
+declare function fetchFromCrm(contactId: string): Promise; // @setup
+declare function upsertToWarehouse(contactId: string, contact: unknown): Promise; // @setup
+
+export async function syncContact(contactId: string) {
+ "use workflow";
+
+ const contact = await fetchFromCrm(contactId);
+ await upsertToWarehouse(contactId, contact);
+
+ return { contactId, status: "synced" };
+}
+```
+
+### Step function with rate limit handling
+
+```typescript
+import { RetryableError } from "workflow";
+
+async function fetchFromCrm(contactId: string) {
+ "use step";
+
+ const res = await fetch(`https://crm.example.com/contacts/${contactId}`);
+
+ if (res.status === 429) {
+ const retryAfter = res.headers.get("Retry-After");
+ throw new RetryableError("Rate limited by CRM", {
+ retryAfter: retryAfter ? parseInt(retryAfter) * 1000 : "1m",
+ });
+ }
+
+ if (!res.ok) throw new Error(`CRM returned ${res.status}`);
+ return res.json();
+}
+
+async function upsertToWarehouse(contactId: string, contact: unknown) {
+ "use step";
+ await fetch(`https://warehouse.example.com/contacts/${contactId}`, {
+ method: "PUT",
+ body: JSON.stringify(contact),
+ });
+}
+```
+
+## Pattern: Exponential backoff
+
+Use `getStepMetadata()` to access the current attempt number and calculate increasing delays:
+
+```typescript
+import { RetryableError, getStepMetadata } from "workflow";
+
+async function callFlakeyApi(endpoint: string) {
+ "use step";
+
+ const { attempt } = getStepMetadata();
+ const res = await fetch(endpoint);
+
+ if (res.status === 429 || res.status >= 500) {
+ throw new RetryableError(`Request failed (${res.status})`, {
+ retryAfter: (attempt ** 2) * 1000, // 1s, 4s, 9s...
+ });
+ }
+
+ return res.json();
+}
+```
+
+## Pattern: Circuit breaker with sleep
+
+When a dependency is completely down, stop hitting it for a cooldown period using `sleep()`, then probe with a single test request:
+
+```typescript
+import { sleep } from "workflow";
+
+export async function circuitBreaker(maxRequests: number = 10) {
+ "use workflow";
+
+ let state: "closed" | "open" | "half-open" = "closed";
+ let consecutiveFailures = 0;
+ const FAILURE_THRESHOLD = 3;
+
+ for (let i = 1; i <= maxRequests; i++) {
+ if (state === "open") {
+ await sleep("30s"); // Durable cooldown
+ state = "half-open";
+ }
+
+ const success = await callService(i);
+
+ if (success) {
+ consecutiveFailures = 0;
+ if (state === "half-open") state = "closed";
+ } else {
+ consecutiveFailures++;
+ if (consecutiveFailures >= FAILURE_THRESHOLD) {
+ state = "open";
+ consecutiveFailures = 0;
+ }
+ }
+ }
+
+ return { status: state === "closed" ? "recovered" : "failed" };
+}
+
+async function callService(requestNum: number): Promise {
+ "use step";
+ try {
+ const res = await fetch("https://payment-gateway.example.com/charge");
+ return res.ok;
+ } catch {
+ return false;
+ }
+}
+```
+
+## Pattern: Custom max retries
+
+Override the default retry count (3) for steps that need more or fewer attempts:
+
+```typescript
+async function fetchWithRetries(url: string) {
+ "use step";
+ const res = await fetch(url);
+ if (!res.ok) throw new Error(`Failed: ${res.status}`);
+ return res.json();
+}
+
+// Allow up to 10 retry attempts
+fetchWithRetries.maxRetries = 10;
+```
+
+## Application-level retry
+
+Sometimes you need retry logic at the workflow level -- wrapping a step call with your own backoff instead of relying on the framework's built-in `RetryableError`. This is useful when you want full control over retry conditions, delays, and error filtering.
+
+```typescript
+interface RetryOptions {
+ maxRetries?: number;
+ baseDelay?: number;
+ maxDelay?: number;
+ shouldRetry?: (error: Error, attempt: number) => boolean;
+}
+
+async function withRetry(
+ fn: () => Promise,
+ options: RetryOptions = {},
+): Promise {
+ const { maxRetries = 3, baseDelay = 2000, maxDelay = 10000, shouldRetry } = options;
+ let lastError: Error | undefined;
+
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
+ try {
+ return await fn();
+ } catch (error) {
+ lastError = error instanceof Error ? error : new Error(String(error));
+ const isLastAttempt = attempt === maxRetries;
+ if (isLastAttempt || (shouldRetry && !shouldRetry(lastError, attempt + 1))) {
+ throw lastError;
+ }
+ // Exponential backoff with jitter
+ const delay = Math.min(baseDelay * 2 ** attempt * (0.5 + Math.random() * 0.5), maxDelay);
+ await new Promise(resolve => setTimeout(resolve, delay));
+ }
+ }
+
+ throw lastError;
+}
+```
+
+Use it in a workflow to wrap step calls:
+
+```typescript
+declare function withRetry(fn: () => Promise, options?: { maxRetries?: number; shouldRetry?: (error: Error) => boolean }): Promise; // @setup
+declare function downloadFile(url: string): Promise; // @setup
+
+export async function downloadWithRetry(url: string) {
+ "use workflow";
+
+ const result = await withRetry(() => downloadFile(url), {
+ maxRetries: 5,
+ shouldRetry: (error) => error.message.includes("Timeout"),
+ });
+
+ return result;
+}
+```
+
+**When to use this vs `RetryableError`/`FatalError`:**
+- **`RetryableError`** runs inside a step -- the framework reschedules the step after the delay. Use it for transient HTTP errors (429, 503) where the runtime should handle backoff.
+- **Application-level retry** wraps the step call from the workflow. Use it when you need custom retry conditions, want to retry across different steps, or when you're building a library and prefer not to depend on workflow-specific error classes.
+
+## Tips
+
+- **`RetryableError` is for transient failures.** Use it when the request might succeed on a later attempt (429, 503, network timeout).
+- **`FatalError` is for permanent failures.** Use it when retrying won't help (404, 401, invalid input). This skips all remaining retries.
+- **The `retryAfter` option accepts** a millisecond number, a duration string (`"1m"`, `"30s"`), or a `Date` object.
+- **Steps retry up to 3 times by default.** Set `fn.maxRetries = N` to change this per step function.
+- **Don't write manual sleep-retry loops.** The runtime handles scheduling natively with `RetryableError` -- it's more efficient and survives cold starts.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/foundations/workflows-and-steps) -- marks the orchestrator function
+- [`"use step"`](/docs/foundations/workflows-and-steps) -- marks functions that run with full Node.js access
+- [`RetryableError`](/docs/api-reference/workflow/retryable-error) -- signals the runtime to retry after a delay
+- [`FatalError`](/docs/api-reference/workflow/fatal-error) -- signals a permanent failure, skipping retries
+- [`getStepMetadata()`](/docs/api-reference/step/get-step-metadata) -- provides the current attempt number and step ID
+- [`sleep()`](/docs/api-reference/workflow/sleep) -- durable pause for circuit breaker cooldowns
diff --git a/docs/content/docs/cookbook/common-patterns/saga.mdx b/docs/content/docs/cookbook/common-patterns/saga.mdx
new file mode 100644
index 0000000000..7a8f9e78d2
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/saga.mdx
@@ -0,0 +1,154 @@
+---
+title: Transactions & Rollbacks (Saga)
+description: Coordinate multi-step transactions with automatic rollback when a step fails.
+type: guide
+summary: Run a sequence of steps where each registers a compensation. If any step throws a FatalError, compensations execute in reverse order to restore consistency.
+---
+
+Use the saga pattern when a business transaction spans multiple services and you need automatic rollback if any step fails. Each forward step registers a compensation, and on failure the workflow unwinds them in reverse order.
+
+## When to use this
+
+- Multi-service transactions (reserve inventory, charge payment, provision access)
+- Any sequence where partial completion leaves the system in an inconsistent state
+- Operations that need "all or nothing" semantics across external APIs
+
+## Pattern
+
+Each step returns a result and pushes a compensation handler onto a stack. If a later step throws a `FatalError`, the workflow catches it and executes compensations in LIFO order.
+
+```typescript
+import { FatalError } from "workflow";
+
+declare function reserveSeats(accountId: string, seats: number): Promise; // @setup
+declare function releaseSeats(accountId: string, reservationId: string): Promise; // @setup
+declare function captureInvoice(accountId: string, seats: number): Promise; // @setup
+declare function refundInvoice(accountId: string, invoiceId: string): Promise; // @setup
+declare function provisionSeats(accountId: string, seats: number): Promise; // @setup
+declare function deprovisionSeats(accountId: string, entitlementId: string): Promise; // @setup
+declare function sendConfirmation(accountId: string, invoiceId: string, entitlementId: string): Promise; // @setup
+
+export async function subscriptionUpgradeSaga(accountId: string, seats: number) {
+ "use workflow";
+
+ const compensations: Array<() => Promise> = [];
+
+ try {
+ // Step 1: Reserve seats
+ const reservationId = await reserveSeats(accountId, seats);
+ compensations.push(() => releaseSeats(accountId, reservationId));
+
+ // Step 2: Capture payment
+ const invoiceId = await captureInvoice(accountId, seats);
+ compensations.push(() => refundInvoice(accountId, invoiceId));
+
+ // Step 3: Provision access
+ const entitlementId = await provisionSeats(accountId, seats);
+ compensations.push(() => deprovisionSeats(accountId, entitlementId));
+
+ // Step 4: Notify
+ await sendConfirmation(accountId, invoiceId, entitlementId);
+ return { status: "completed" };
+ } catch (error) {
+ if (!(error instanceof FatalError)) throw error;
+
+ // Unwind compensations in reverse order
+ while (compensations.length > 0) {
+ await compensations.pop()!();
+ }
+
+ return { status: "rolled_back" };
+ }
+}
+```
+
+### Step functions
+
+Each step is a `"use step"` function with full Node.js access. Forward steps do the work; compensation steps undo it.
+
+```typescript
+import { FatalError } from "workflow";
+
+async function reserveSeats(accountId: string, seats: number): Promise {
+ "use step";
+ const res = await fetch(`https://api.example.com/seats/reserve`, {
+ method: "POST",
+ body: JSON.stringify({ accountId, seats }),
+ });
+ if (!res.ok) throw new FatalError("Seat reservation failed");
+ const { reservationId } = await res.json();
+ return reservationId;
+}
+
+async function releaseSeats(accountId: string, reservationId: string): Promise {
+ "use step";
+ // Compensations should be idempotent — safe to call twice
+ await fetch(`https://api.example.com/seats/release`, {
+ method: "POST",
+ body: JSON.stringify({ accountId, reservationId }),
+ });
+}
+
+async function captureInvoice(accountId: string, seats: number): Promise {
+ "use step";
+ const res = await fetch(`https://api.example.com/invoices`, {
+ method: "POST",
+ body: JSON.stringify({ accountId, seats }),
+ });
+ if (!res.ok) throw new FatalError("Invoice capture failed");
+ const { invoiceId } = await res.json();
+ return invoiceId;
+}
+
+async function refundInvoice(accountId: string, invoiceId: string): Promise {
+ "use step";
+ await fetch(`https://api.example.com/invoices/${invoiceId}/refund`, {
+ method: "POST",
+ body: JSON.stringify({ accountId }),
+ });
+}
+
+async function provisionSeats(accountId: string, seats: number): Promise {
+ "use step";
+ const res = await fetch(`https://api.example.com/entitlements`, {
+ method: "POST",
+ body: JSON.stringify({ accountId, seats }),
+ });
+ if (!res.ok) throw new FatalError("Provisioning failed");
+ const { entitlementId } = await res.json();
+ return entitlementId;
+}
+
+async function deprovisionSeats(accountId: string, entitlementId: string): Promise {
+ "use step";
+ await fetch(`https://api.example.com/entitlements/${entitlementId}`, {
+ method: "DELETE",
+ body: JSON.stringify({ accountId }),
+ });
+}
+
+async function sendConfirmation(
+ accountId: string,
+ invoiceId: string,
+ entitlementId: string
+): Promise {
+ "use step";
+ await fetch(`https://api.example.com/notifications`, {
+ method: "POST",
+ body: JSON.stringify({ accountId, invoiceId, entitlementId, template: "upgrade-complete" }),
+ });
+}
+```
+
+## Tips
+
+- **Use `FatalError` for permanent failures.** Regular errors trigger automatic retries (up to 3 by default). Throw `FatalError` when retrying won't help (e.g., insufficient funds, invalid input).
+- **Make compensations idempotent.** If a compensation step is retried, it should produce the same result. Check whether the resource was already released before releasing it again.
+- **Compensation steps are also `"use step"` functions.** This makes them durable — if the workflow restarts mid-rollback, it resumes where it left off.
+- **Capture values in closures carefully.** Use block-scoped variables or copy values before pushing compensations to avoid referencing stale state.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/api-reference/workflow/use-workflow) -- declares the orchestrator function
+- [`"use step"`](/docs/api-reference/workflow/use-step) -- declares step functions with full Node.js access
+- [`FatalError`](/docs/api-reference/workflow/fatal-error) -- non-retryable error that triggers compensation
diff --git a/docs/content/docs/cookbook/common-patterns/scheduling.mdx b/docs/content/docs/cookbook/common-patterns/scheduling.mdx
new file mode 100644
index 0000000000..c8713c31b7
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/scheduling.mdx
@@ -0,0 +1,249 @@
+---
+title: Sleep, Scheduling & Timed Workflows
+description: Use durable sleep to schedule actions minutes, hours, days, or weeks into the future.
+type: guide
+summary: Schedule future actions with durable sleep that survives cold starts, and race sleeps against hooks to let external events wake the workflow early.
+---
+
+Workflow's `sleep()` is durable -- it survives cold starts, restarts, and deployments. This makes it the foundation for scheduled actions, drip campaigns, reminders, and any pattern that needs to wait for real-world time to pass.
+
+## When to use this
+
+- Sending emails on a schedule (drip campaigns, reminders, digests)
+- Waiting for a deadline before taking action
+- Any pattern where "do X, wait N hours, then do Y" needs to be reliable
+
+## Pattern: Drip campaign
+
+Send emails at scheduled intervals using `sleep()` between steps. The workflow runs for days or weeks, sleeping between each email.
+
+```typescript
+import { sleep } from "workflow";
+
+export async function onboardingDrip(email: string) {
+ "use workflow";
+
+ await sendEmail(email, "welcome");
+
+ await sleep("1d");
+ await sendEmail(email, "getting-started-tips");
+
+ await sleep("2d");
+ await sendEmail(email, "feature-highlights");
+
+ await sleep("4d");
+ await sendEmail(email, "follow-up");
+
+ return { email, status: "completed", totalDays: 7 };
+}
+
+async function sendEmail(email: string, template: string): Promise {
+ "use step";
+ await fetch("https://api.sendgrid.com/v3/mail/send", {
+ method: "POST",
+ headers: { Authorization: `Bearer ${process.env.SENDGRID_KEY}` },
+ body: JSON.stringify({
+ to: [{ email }],
+ template_id: template,
+ }),
+ });
+}
+```
+
+## Pattern: Interruptible reminder (sleep vs hook)
+
+Race a `sleep()` against a `defineHook` so external events can cancel, snooze, or send early:
+
+```typescript
+import { defineHook, sleep } from "workflow";
+
+type ReminderAction =
+ | { type: "cancel" }
+ | { type: "send_now" }
+ | { type: "snooze"; seconds: number };
+
+export const reminderActionHook = defineHook();
+
+export async function scheduleReminder(userId: string, delayMs: number) {
+ "use workflow";
+
+ let sendAt = new Date(Date.now() + delayMs);
+ const action = reminderActionHook.create({ token: `reminder:${userId}` });
+
+ const outcome = await Promise.race([
+ sleep(sendAt).then(() => ({ kind: "time" as const })),
+ action.then((payload) => ({ kind: "action" as const, payload })),
+ ]);
+
+ if (outcome.kind === "action") {
+ if (outcome.payload.type === "cancel") {
+ return { userId, status: "cancelled" };
+ }
+ if (outcome.payload.type === "snooze") {
+ sendAt = new Date(Date.now() + outcome.payload.seconds * 1000);
+ await sleep(sendAt);
+ }
+ // "send_now" falls through to send immediately
+ }
+
+ await sendReminderEmail(userId);
+ return { userId, status: "sent" };
+}
+
+async function sendReminderEmail(userId: string): Promise {
+ "use step";
+ await fetch("https://api.example.com/reminders/send", {
+ method: "POST",
+ body: JSON.stringify({ userId }),
+ });
+}
+```
+
+To wake the reminder early from an API route:
+
+```typescript
+import { resumeHook } from "workflow/api";
+
+// POST /api/reminder/cancel
+export async function POST(request: Request) {
+ const { userId } = await request.json();
+ await resumeHook(`reminder:${userId}`, { type: "cancel" });
+ return Response.json({ ok: true });
+}
+```
+
+## Pattern: Timed collection window (digest)
+
+Open a collection window using `sleep()` and accumulate events from a hook until the window closes:
+
+```typescript
+import { sleep, defineHook } from "workflow";
+
+type EventPayload = { type: string; message: string };
+
+export const digestEvent = defineHook();
+
+export async function collectAndSendDigest(
+ digestId: string,
+ userId: string,
+ windowMs: number = 3_600_000
+) {
+ "use workflow";
+
+ const hook = digestEvent.create({ token: `digest:${digestId}` });
+ const windowClosed = sleep(`${windowMs}ms`).then(() => ({
+ kind: "window_closed" as const,
+ }));
+ const events: EventPayload[] = [];
+
+ while (true) {
+ const outcome = await Promise.race([
+ hook.then((payload) => ({ kind: "event" as const, payload })),
+ windowClosed,
+ ]);
+
+ if (outcome.kind === "window_closed") break;
+ events.push(outcome.payload);
+ }
+
+ if (events.length > 0) {
+ await sendDigestEmail(userId, events);
+ }
+
+ return { digestId, status: events.length > 0 ? "sent" : "empty", eventCount: events.length };
+}
+
+async function sendDigestEmail(userId: string, events: EventPayload[]): Promise {
+ "use step";
+ await fetch("https://api.example.com/digest/send", {
+ method: "POST",
+ body: JSON.stringify({ userId, events }),
+ });
+}
+```
+
+## Pattern: Timeout
+
+Add a timeout to any operation by racing it against `sleep()`:
+
+```typescript
+import { sleep, FatalError } from "workflow";
+
+export async function processWithTimeout(jobId: string) {
+ "use workflow";
+
+ const result = await Promise.race([
+ processData(jobId),
+ sleep("30s").then(() => "timeout" as const),
+ ]);
+
+ if (result === "timeout") {
+ return { jobId, status: "timed_out" };
+ }
+
+ return { jobId, status: "completed", result };
+}
+
+async function processData(jobId: string): Promise {
+ "use step";
+ // Long-running computation
+ return `result-for-${jobId}`;
+}
+```
+
+## Polling external services
+
+When you need to poll an external service until a job completes, define your own `sleep` as a step function and use it in a polling loop. Each iteration becomes a separate step in the event log, making the entire loop durable.
+
+```typescript
+async function sleep(ms: number): Promise {
+ "use step";
+ await new Promise(resolve => setTimeout(resolve, ms));
+}
+
+export async function waitForTranscription(jobId: string) {
+ "use workflow";
+
+ let status = "processing";
+ let attempts = 0;
+ const maxAttempts = 36; // ~3 minutes at 5s intervals
+
+ while (status === "processing" && attempts < maxAttempts) {
+ await sleep(5000);
+ attempts++;
+ const result = await checkJobStatus(jobId);
+ status = result.status;
+ }
+
+ if (status !== "completed") {
+ return { jobId, status: "timed_out", attempts };
+ }
+
+ return { jobId, status: "completed", attempts };
+}
+
+async function checkJobStatus(jobId: string): Promise<{ status: string }> {
+ "use step";
+ const res = await fetch(`https://api.example.com/jobs/${jobId}`);
+ return res.json();
+}
+```
+
+**When to use this vs `sleep()` from `workflow`:**
+- Use `sleep()` from `workflow` for fixed, known delays (drip campaigns, reminders, cooldowns).
+- Use a custom sleep-as-step for polling loops where you need to check a condition between sleeps. The custom step version also works in libraries that don't want to import from the `workflow` module directly.
+
+## Tips
+
+- **`sleep()` accepts** duration strings (`"1d"`, `"2h"`, `"30s"`), milliseconds, or `Date` objects for sleeping until a specific time.
+- **Durable means durable.** A `sleep("7d")` workflow costs nothing while sleeping -- no compute, no memory. It resumes precisely when the timer fires.
+- **Race `sleep` against `defineHook`** for interruptible waits. This is the standard pattern for reminders, approvals with deadlines, and timed collection windows.
+- **Use `sleep()` in workflow context only.** Step functions cannot call `sleep()` directly. If a step needs a delay, use a standard `setTimeout` or return control to the workflow.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/foundations/workflows-and-steps) -- marks the orchestrator function
+- [`"use step"`](/docs/foundations/workflows-and-steps) -- marks functions that run with full Node.js access
+- [`sleep()`](/docs/api-reference/workflow/sleep) -- durable wait (survives restarts, zero compute cost while sleeping)
+- [`defineHook`](/docs/api-reference/workflow/define-hook) -- creates a hook that external systems can trigger
+- [`Promise.race()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Promise/race) -- races sleep against hooks for interruptible waits
diff --git a/docs/content/docs/cookbook/common-patterns/webhooks.mdx b/docs/content/docs/cookbook/common-patterns/webhooks.mdx
new file mode 100644
index 0000000000..aec179d946
--- /dev/null
+++ b/docs/content/docs/cookbook/common-patterns/webhooks.mdx
@@ -0,0 +1,185 @@
+---
+title: Webhooks & External Callbacks
+description: Receive HTTP callbacks from external services, process them durably, and respond inline.
+type: guide
+summary: Create webhook endpoints that your workflow can await, process incoming requests in steps, and respond to the caller — all within durable workflow context.
+---
+
+Use webhooks when external services push events to your application via HTTP callbacks. The workflow creates a webhook URL, suspends with zero compute cost, and resumes when a request arrives.
+
+## When to use this
+
+- Accepting callbacks from payment processors (Stripe, PayPal)
+- Waiting for third-party verification or processing results
+- Any integration where an external system calls you back asynchronously
+
+## Pattern: Processing webhook events
+
+Create a webhook with manual response control, then iterate over incoming requests:
+
+```typescript
+import { createWebhook, type RequestWithResponse } from "workflow";
+
+declare function processEvent(request: RequestWithResponse): Promise<{ type: string }>; // @setup
+
+export async function paymentWebhook(orderId: string) {
+ "use workflow";
+
+ const webhook = createWebhook({ respondWith: "manual" });
+ // webhook.url is the URL to give to the external service
+
+ const ledger: { type: string }[] = [];
+
+ for await (const request of webhook) {
+ const entry = await processEvent(request);
+ ledger.push(entry);
+
+ // Break when we've received a terminal event
+ if (entry.type === "payment.succeeded" || entry.type === "refund.created") {
+ break;
+ }
+ }
+
+ return { orderId, webhookUrl: webhook.url, ledger, status: "settled" };
+}
+```
+
+### Step function for processing
+
+Each webhook request is processed in its own step, giving you full Node.js access for validation, database writes, and responding to the caller:
+
+```typescript
+import { type RequestWithResponse } from "workflow";
+
+async function processEvent(
+ request: RequestWithResponse
+): Promise<{ type: string }> {
+ "use step";
+
+ const body = await request.json().catch(() => ({}));
+ const type = body?.type ?? "unknown";
+
+ // Validate, process, and respond inline
+ if (type === "payment.succeeded") {
+ // Record the payment in your database
+ await request.respondWith(Response.json({ ack: true, action: "captured" }));
+ } else if (type === "payment.failed") {
+ await request.respondWith(Response.json({ ack: true, action: "flagged" }));
+ } else {
+ await request.respondWith(Response.json({ ack: true, action: "ignored" }));
+ }
+
+ return { type };
+}
+```
+
+## Pattern: Async request-reply with timeout
+
+Submit a request to an external service, pass it your webhook URL, then race the callback against a deadline:
+
+```typescript
+import { createWebhook, sleep, FatalError, type RequestWithResponse } from "workflow";
+
+export async function asyncVerification(documentId: string) {
+ "use workflow";
+
+ const webhook = createWebhook({ respondWith: "manual" });
+
+ // Submit to vendor, passing our webhook URL for the callback
+ await submitToVendor(documentId, webhook.url);
+
+ // Race: wait for callback OR timeout after 30 seconds
+ const result = await Promise.race([
+ (async () => {
+ for await (const request of webhook) {
+ const body = await processCallback(request);
+ return body;
+ }
+ throw new FatalError("Webhook closed without callback");
+ })(),
+ sleep("30s").then(() => ({ status: "timed_out" as const })),
+ ]);
+
+ return { documentId, ...result };
+}
+
+async function submitToVendor(documentId: string, callbackUrl: string): Promise {
+ "use step";
+ await fetch("https://vendor.example.com/verify", {
+ method: "POST",
+ body: JSON.stringify({ documentId, callbackUrl }),
+ });
+}
+
+async function processCallback(
+ request: RequestWithResponse
+): Promise<{ status: string; details: string }> {
+ "use step";
+ const body = await request.json();
+ await request.respondWith(Response.json({ ack: true }));
+ return {
+ status: body.approved ? "verified" : "rejected",
+ details: body.details ?? body.reason ?? "",
+ };
+}
+```
+
+## Pattern: Large payload by reference
+
+When payloads are too large to serialize into the event log, pass a lightweight reference (a "claim check") instead. Use a hook to signal when the data is ready:
+
+```typescript
+import { defineHook } from "workflow";
+
+export const blobReady = defineHook<{ blobToken: string }>();
+
+export async function importLargeFile(importId: string) {
+ "use workflow";
+
+ // Suspend until the external system signals the blob is uploaded
+ const { blobToken } = await blobReady.create({ token: `upload:${importId}` });
+
+ // Process by reference -- the full payload never enters the event log
+ await processBlob(blobToken);
+
+ return { importId, blobToken, status: "indexed" };
+}
+
+async function processBlob(blobToken: string): Promise {
+ "use step";
+ // Fetch the blob using the token, process it
+ const res = await fetch(`https://storage.example.com/blobs/${blobToken}`);
+ const data = await res.arrayBuffer();
+ // Index, transform, or store the data
+}
+```
+
+Resume from an API route when the upload completes:
+
+```typescript
+import { resumeHook } from "workflow/api";
+
+// POST /api/upload-complete
+export async function POST(request: Request) {
+ const { importId, blobToken } = await request.json();
+ await resumeHook(`upload:${importId}`, { blobToken });
+ return Response.json({ ok: true });
+}
+```
+
+## Tips
+
+- **`respondWith: "manual"`** gives you control over the HTTP response from inside a step. Use this when you need to validate the request before responding.
+- **`for await` on a webhook** lets you process multiple events from the same URL. Use `break` to stop listening after a terminal event.
+- **Webhooks auto-generate URLs** at `/.well-known/workflow/v1/webhook/:token`. Pass this URL to external services.
+- **Race webhooks against `sleep()`** for deadlines. If the callback doesn't arrive in time, the workflow can take a fallback action.
+- **For large payloads**, use a hook + reference token instead of passing the data through the workflow. The event log serializes all step inputs/outputs, so large payloads hurt performance.
+
+## Key APIs
+
+- [`"use workflow"`](/docs/foundations/workflows-and-steps) -- marks the orchestrator function
+- [`"use step"`](/docs/foundations/workflows-and-steps) -- marks functions with full Node.js access
+- [`createWebhook()`](/docs/api-reference/workflow/create-webhook) -- creates an HTTP endpoint the workflow can await
+- [`defineHook()`](/docs/api-reference/workflow/define-hook) -- creates a typed hook for signal-based patterns
+- [`sleep()`](/docs/api-reference/workflow/sleep) -- durable timer for deadlines
+- [`FatalError`](/docs/api-reference/workflow/fatal-error) -- prevents retry on permanent failures
diff --git a/docs/content/docs/cookbook/index.mdx b/docs/content/docs/cookbook/index.mdx
new file mode 100644
index 0000000000..cfb9c3bfb1
--- /dev/null
+++ b/docs/content/docs/cookbook/index.mdx
@@ -0,0 +1,15 @@
+---
+title: Cookbook
+description: Best-practice workflow patterns with copy-paste code examples. Common patterns, agent patterns, integrations, and advanced techniques.
+type: overview
+---
+
+
+**For AI agents:** point your agent at this page or install the [Vercel skill](https://vercel.com/docs/workflow) to get workflow best practices directly in your coding assistant.
+
+
+The cookbook is a curated collection of workflow patterns — like [vercel/react-best-practices](https://github.com/vercel/react-best-practices), but for durable workflows. Each recipe gives you a clean, copy-paste workflow shape for a real use case.
+
+Use **Guide me** to answer a few questions and find the right pattern, or press `/` / `Ctrl/⌘+K` to search directly.
+
+
diff --git a/docs/content/docs/cookbook/integrations/ai-sdk.mdx b/docs/content/docs/cookbook/integrations/ai-sdk.mdx
new file mode 100644
index 0000000000..d144124632
--- /dev/null
+++ b/docs/content/docs/cookbook/integrations/ai-sdk.mdx
@@ -0,0 +1,191 @@
+---
+title: AI SDK
+description: Use AI SDK model providers, tool calling, and streaming inside durable workflows.
+type: guide
+summary: Turn any AI SDK model call into a retryable, observable workflow step with built-in streaming.
+related:
+ - /docs/ai
+ - /docs/ai/defining-tools
+ - /docs/ai/resumable-streams
+ - /docs/api-reference/workflow-ai/durable-agent
+---
+
+Workflow DevKit integrates with [AI SDK](https://ai-sdk.dev) through the `@workflow/ai` package. This turns your LLM calls and tool executions into durable, retryable steps with built-in streaming and observability.
+
+## What It Enables
+
+- **Durable LLM calls** -- Model invocations become steps that survive crashes and cold starts
+- **Any model provider** -- Use OpenAI, Anthropic, Google, Bedrock, or any AI SDK-compatible provider through [Vercel Gateway](https://vercel.com/docs/gateway) or direct provider configuration
+- **Tool durability** -- Tool executions become steps with automatic retries and event logging
+- **Resumable streaming** -- Clients reconnect mid-stream without losing data
+
+## When to Use
+
+Use this integration when your application calls an LLM and needs:
+
+- Reliability for long-running agent loops (multi-step tool calling)
+- Automatic retry on transient model API failures
+- Stream resumption after disconnects
+- Observability into each model call and tool execution
+
+## DurableAgent with Model Providers
+
+The `DurableAgent` wraps AI SDK's streaming interface. Pass any model string supported by [Vercel Gateway](https://vercel.com/docs/gateway) or a provider-specific model ID.
+
+```typescript title="workflows/research.ts" lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
+import { getWritable } from "workflow";
+import z from "zod/v4";
+
+async function searchWeb(input: { query: string }): Promise<{ results: string[] }> {
+ "use step";
+ const response = await fetch(
+ `https://api.example.com/search?q=${encodeURIComponent(input.query)}`
+ );
+ const data = await response.json();
+ return { results: data.items.map((item: { title: string }) => item.title) };
+}
+
+async function summarize(input: { text: string }): Promise<{ summary: string }> {
+ "use step";
+ // Each step is individually retried on failure
+ const response = await fetch("https://api.example.com/summarize", {
+ method: "POST",
+ body: JSON.stringify({ text: input.text }),
+ });
+ const data = await response.json();
+ return { summary: data.summary };
+}
+
+export async function researchAgent(messages: UIMessage[]) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-sonnet-4-20250514",
+ instructions: "You are a research assistant. Search the web and summarize findings.",
+ tools: {
+ searchWeb: {
+ description: "Search the web for information",
+ inputSchema: z.object({
+ query: z.string().describe("The search query"),
+ }),
+ execute: searchWeb,
+ },
+ summarize: {
+ description: "Summarize a block of text",
+ inputSchema: z.object({
+ text: z.string().describe("The text to summarize"),
+ }),
+ execute: summarize,
+ },
+ },
+ });
+
+ const result = await agent.stream({
+ messages: await convertToModelMessages(messages),
+ writable: getWritable(),
+ });
+
+ return { messages: result.messages };
+}
+```
+
+### Using Different Providers
+
+Switch between providers by changing the model string. No other code changes required.
+
+{/* @skip-typecheck - illustrative snippets with intentional redeclarations */}
+```typescript
+// Vercel Gateway (recommended) -- works with all providers
+const agent = new DurableAgent({ model: "anthropic/claude-sonnet-4-20250514" });
+
+// OpenAI
+const agent = new DurableAgent({ model: "openai/gpt-4o" });
+
+// Google
+const agent = new DurableAgent({ model: "google/gemini-2.5-pro" });
+
+// Amazon Bedrock
+const agent = new DurableAgent({ model: "bedrock/claude-haiku-4-5-20251001-v1" });
+```
+
+### Provider-Specific Options
+
+Pass provider options for features like reasoning or extended thinking.
+
+```typescript
+const agent = new DurableAgent({
+ model: "anthropic/claude-sonnet-4-20250514",
+ providerOptions: {
+ anthropic: { thinking: { type: "enabled", budgetTokens: 10000 } },
+ },
+ // ...tools and instructions
+});
+```
+
+## Tool Functions as Steps
+
+Every tool `execute` function marked with `"use step"` becomes a durable step. This means:
+
+- **Automatic retries** -- If a tool fails (network error, API timeout), the framework retries it
+- **Event logging** -- Inputs and outputs are recorded for observability and replay
+- **Idempotency** -- On replay after a crash, completed steps return their cached result
+
+```typescript
+async function bookFlight(input: {
+ origin: string;
+ destination: string;
+ date: string;
+}): Promise<{ confirmationId: string }> {
+ "use step";
+ // This call is retried on transient failures and its result is persisted
+ const response = await fetch("https://api.airline.com/book", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify(input),
+ });
+ if (!response.ok) throw new Error(`Booking failed: ${response.status}`);
+ return response.json();
+}
+```
+
+## Resumable Streaming
+
+Use `WorkflowChatTransport` on the client to automatically reconnect to a workflow's stream if the connection drops.
+
+```typescript title="app/api/chat/route.ts" lineNumbers
+import { start } from "workflow/api";
+import { researchAgent } from "@/workflows/research";
+
+export async function POST(request: Request) {
+ const { messages } = await request.json();
+ return start(researchAgent, [messages]);
+}
+```
+
+```typescript title="components/chat.tsx" lineNumbers
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { WorkflowChatTransport } from "@workflow/ai";
+
+export function Chat() {
+ const chat = useChat({
+ transport: new WorkflowChatTransport({
+ api: "/api/chat",
+ }),
+ });
+
+ // Standard useChat usage -- reconnection is handled automatically
+ return (
+
+ {chat.messages.map((m) => (
+
{m.content}
+ ))}
+
+ );
+}
+```
+
+See [Resumable Streams](/docs/ai/resumable-streams) for advanced options like `startIndex` and `prepareReconnectToStreamRequest`.
diff --git a/docs/content/docs/cookbook/integrations/chat-sdk.mdx b/docs/content/docs/cookbook/integrations/chat-sdk.mdx
new file mode 100644
index 0000000000..6369c58d8e
--- /dev/null
+++ b/docs/content/docs/cookbook/integrations/chat-sdk.mdx
@@ -0,0 +1,191 @@
+---
+title: Chat SDK
+description: Build durable chat sessions by combining workflow persistence with AI SDK's chat primitives.
+type: guide
+summary: Use workflow hooks and streaming to create chat sessions that survive disconnects and server restarts.
+related:
+ - /docs/ai/chat-session-modeling
+ - /docs/ai/resumable-streams
+ - /docs/ai/message-queueing
+ - /docs/api-reference/workflow-ai/durable-agent
+ - /docs/api-reference/workflow/define-hook
+---
+
+AI SDK provides chat primitives (`useChat`, message types, streaming utilities) for building chat interfaces. Workflow DevKit makes those chat sessions durable -- surviving disconnects, cold starts, and server restarts -- by persisting every message and LLM response as workflow events.
+
+## What It Enables
+
+- **Durable chat history** -- Messages and responses are persisted in the workflow event log, not just client state
+- **Resumable sessions** -- Users reconnect and pick up where they left off, even after server restarts
+- **Multi-turn conversations** -- A single workflow manages an entire chat session with hook-based message injection
+- **Server-side message queueing** -- Inject follow-up messages while the agent is still processing
+
+## When to Use
+
+Use this pattern when your chat application needs:
+
+- Persistence beyond the browser session
+- Recovery from server failures mid-conversation
+- Long-running agent sessions (minutes to hours)
+- Server-driven message injection (system messages, external events)
+
+## Single-Turn: Stateless Sessions
+
+Each user message starts a new workflow run. The client owns the message history and sends the full array with each request. This is the simplest pattern.
+
+```typescript title="workflows/chat.ts" lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
+import { getWritable } from "workflow";
+
+export async function chat(messages: UIMessage[]) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-sonnet-4-20250514",
+ instructions: "You are a helpful assistant.",
+ tools: { /* your tools here */ },
+ });
+
+ const result = await agent.stream({
+ messages: await convertToModelMessages(messages),
+ writable: getWritable(),
+ });
+
+ return { messages: result.messages };
+}
+```
+
+```typescript title="app/api/chat/route.ts" lineNumbers
+import { start } from "workflow/api";
+import { chat } from "@/workflows/chat";
+
+export async function POST(request: Request) {
+ const { messages } = await request.json();
+ return start(chat, [messages]);
+}
+```
+
+The client uses `WorkflowChatTransport` for automatic stream resumption.
+
+```typescript title="components/chat.tsx" lineNumbers
+"use client";
+
+import { useChat } from "@ai-sdk/react";
+import { WorkflowChatTransport } from "@workflow/ai";
+
+export function Chat() {
+ const chat = useChat({
+ transport: new WorkflowChatTransport({ api: "/api/chat" }),
+ });
+
+ return (
+
+ {chat.messages.map((m) => (
+
{m.content}
+ ))}
+
+
+ );
+}
+```
+
+## Multi-Turn: Durable Sessions
+
+A single workflow manages the entire conversation. The workflow loops, waiting for new messages via a hook. This gives you server-side ownership of the full chat history.
+
+```typescript title="workflows/durable-chat.ts" lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import {
+ convertToModelMessages,
+ type UIMessage,
+ type UIMessageChunk,
+} from "ai";
+import { defineHook, getWritable, getWorkflowMetadata } from "workflow";
+import { z } from "zod";
+
+const chatMessageHook = defineHook({
+ schema: z.object({
+ messages: z.array(z.any()),
+ }),
+});
+
+export async function durableChat(initialMessages: UIMessage[]) {
+ "use workflow";
+
+ const { workflowRunId } = getWorkflowMetadata();
+ let allMessages = await convertToModelMessages(initialMessages);
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-sonnet-4-20250514",
+ instructions: "You are a helpful assistant.",
+ tools: { /* your tools here */ },
+ });
+
+ // First turn
+ const firstResult = await agent.stream({
+ messages: allMessages,
+ writable: getWritable(),
+ preventClose: true,
+ });
+ allMessages = firstResult.messages;
+
+ // Subsequent turns -- wait for new messages via hook
+ while (true) {
+ const hook = chatMessageHook.create({ token: workflowRunId });
+ const { messages: newMessages } = await hook;
+
+ allMessages = [
+ ...allMessages,
+ ...await convertToModelMessages(newMessages),
+ ];
+
+ const result = await agent.stream({
+ messages: allMessages,
+ writable: getWritable(),
+ preventClose: true,
+ });
+ allMessages = result.messages;
+ }
+}
+```
+
+### Multi-Turn API Routes
+
+You need two routes: one to start the session, another to send follow-up messages.
+
+```typescript title="app/api/chat/route.ts" lineNumbers
+import { start } from "workflow/api";
+import { durableChat } from "@/workflows/durable-chat";
+
+export async function POST(request: Request) {
+ const { messages } = await request.json();
+ return start(durableChat, [messages]);
+}
+```
+
+```typescript title="app/api/chat/follow-up/route.ts" lineNumbers
+import { resumeHook } from "workflow/api";
+
+export async function POST(request: Request) {
+ const { runId, messages } = await request.json();
+ await resumeHook(runId, { messages });
+ return new Response("OK");
+}
+```
+
+## Choosing a Pattern
+
+| | Single-Turn | Multi-Turn |
+|---|---|---|
+| **State ownership** | Client | Server (workflow event log) |
+| **Message injection** | Not needed | Via hooks |
+| **Complexity** | Low | Medium |
+| **Session duration** | Per-request | Minutes to hours |
+| **Crash recovery** | Client resends full history | Workflow replays from event log |
+
+Start with single-turn. Move to multi-turn when you need server-owned state, message injection from external sources, or sessions that outlive the browser tab.
+
+See [Chat Session Modeling](/docs/ai/chat-session-modeling) for the full guide including multiplayer patterns and message queueing.
diff --git a/docs/content/docs/cookbook/integrations/meta.json b/docs/content/docs/cookbook/integrations/meta.json
new file mode 100644
index 0000000000..aebbb4e5f1
--- /dev/null
+++ b/docs/content/docs/cookbook/integrations/meta.json
@@ -0,0 +1,4 @@
+{
+ "title": "Integrations",
+ "pages": ["ai-sdk", "sandbox", "chat-sdk"]
+}
diff --git a/docs/content/docs/cookbook/integrations/sandbox.mdx b/docs/content/docs/cookbook/integrations/sandbox.mdx
new file mode 100644
index 0000000000..2c923bb1e3
--- /dev/null
+++ b/docs/content/docs/cookbook/integrations/sandbox.mdx
@@ -0,0 +1,190 @@
+---
+title: Sandbox
+description: Orchestrate Vercel Sandbox lifecycle -- creation, code execution, snapshotting -- inside durable workflows.
+type: guide
+summary: Use workflow steps to provision sandboxes, run code, and manage sandbox lifecycle with automatic cleanup on failure.
+related:
+ - /docs/ai/defining-tools
+ - /docs/foundations/errors-and-retries
+ - /docs/api-reference/workflow-ai/durable-agent
+---
+
+[Vercel Sandbox](https://vercel.com/docs/sandbox) provides isolated code execution environments. Workflow DevKit lets you orchestrate the full sandbox lifecycle -- creation, execution, snapshotting, and teardown -- as durable steps with automatic retry and cleanup.
+
+## What It Enables
+
+- **Durable sandbox sessions** -- Sandbox provisioning and teardown survive cold starts
+- **Automatic cleanup** -- Saga-style compensation ensures sandboxes are destroyed on failure
+- **Multi-step code execution** -- Run a sequence of commands in the same sandbox with each step logged
+- **Agent-driven sandboxes** -- Give your DurableAgent a tool that spins up sandboxes on demand
+
+## When to Use
+
+Use this integration when your workflow needs to:
+
+- Execute user-provided or AI-generated code safely
+- Run multi-step build/test pipelines in isolated environments
+- Provision temporary environments for interactive sessions
+- Snapshot sandbox state between steps for reproducibility
+
+## Sandbox Lifecycle as Steps
+
+Each phase of sandbox management becomes a step. This ensures that if a failure occurs partway through, the workflow can retry or compensate.
+
+```typescript title="workflows/sandbox-pipeline.ts" lineNumbers
+import { FatalError } from "workflow";
+
+// TODO: Replace with actual @vercel/sandbox imports when available in your project.
+// These type declarations illustrate the expected API shape.
+declare function createSandbox(options: { template: string }): Promise<{ id: string; url: string }>;
+declare function execInSandbox(sandboxId: string, command: string): Promise<{ stdout: string; stderr: string; exitCode: number }>;
+declare function destroySandbox(sandboxId: string): Promise;
+declare function snapshotSandbox(sandboxId: string): Promise<{ snapshotId: string }>;
+
+async function provisionSandbox(input: {
+ template: string;
+}): Promise<{ sandboxId: string; url: string }> {
+ "use step";
+ const sandbox = await createSandbox({ template: input.template });
+ return { sandboxId: sandbox.id, url: sandbox.url };
+}
+
+async function runCommand(input: {
+ sandboxId: string;
+ command: string;
+}): Promise<{ stdout: string; exitCode: number }> {
+ "use step";
+ const result = await execInSandbox(input.sandboxId, input.command);
+ if (result.exitCode !== 0) {
+ throw new FatalError(`Command failed: ${result.stderr}`);
+ }
+ return { stdout: result.stdout, exitCode: result.exitCode };
+}
+
+async function teardownSandbox(input: { sandboxId: string }): Promise {
+ "use step";
+ await destroySandbox(input.sandboxId);
+}
+
+async function saveSandboxSnapshot(input: {
+ sandboxId: string;
+}): Promise<{ snapshotId: string }> {
+ "use step";
+ return snapshotSandbox(input.sandboxId);
+}
+
+export async function sandboxPipeline(input: {
+ template: string;
+ commands: string[];
+}) {
+ "use workflow";
+
+ const { sandboxId, url } = await provisionSandbox({
+ template: input.template,
+ });
+
+ try {
+ const results = [];
+ for (const command of input.commands) {
+ const result = await runCommand({ sandboxId, command });
+ results.push(result);
+ }
+
+ const { snapshotId } = await saveSandboxSnapshot({ sandboxId });
+
+ return { status: "completed", url, snapshotId, results };
+ } catch (error) {
+ // Always clean up the sandbox, even on failure
+ await teardownSandbox({ sandboxId });
+ throw error;
+ }
+}
+```
+
+## Sandbox as an Agent Tool
+
+Give a DurableAgent the ability to create and use sandboxes. The agent decides when to spin up a sandbox, what code to run, and when to tear it down.
+
+```typescript title="workflows/code-agent.ts" lineNumbers
+import { DurableAgent } from "@workflow/ai/agent";
+import { convertToModelMessages, type UIMessage, type UIMessageChunk } from "ai";
+import { getWritable } from "workflow";
+import z from "zod/v4";
+
+// TODO: Replace with actual @vercel/sandbox imports when available.
+declare function createSandbox(options: { template: string }): Promise<{ id: string; url: string }>;
+declare function execInSandbox(sandboxId: string, command: string): Promise<{ stdout: string; stderr: string; exitCode: number }>;
+declare function destroySandbox(sandboxId: string): Promise;
+
+async function createCodeSandbox(input: {
+ template: string;
+}): Promise<{ sandboxId: string; url: string }> {
+ "use step";
+ const sandbox = await createSandbox({ template: input.template });
+ return { sandboxId: sandbox.id, url: sandbox.url };
+}
+
+async function executeCode(input: {
+ sandboxId: string;
+ code: string;
+}): Promise<{ output: string; exitCode: number }> {
+ "use step";
+ const result = await execInSandbox(input.sandboxId, input.code);
+ return { output: result.stdout || result.stderr, exitCode: result.exitCode };
+}
+
+async function cleanupSandbox(input: { sandboxId: string }): Promise<{ cleaned: boolean }> {
+ "use step";
+ await destroySandbox(input.sandboxId);
+ return { cleaned: true };
+}
+
+export async function codeAgent(messages: UIMessage[]) {
+ "use workflow";
+
+ const agent = new DurableAgent({
+ model: "anthropic/claude-sonnet-4-20250514",
+ instructions:
+ "You are a coding assistant. You can create sandboxes to run code. " +
+ "Always create a sandbox first, then execute code in it. " +
+ "Clean up the sandbox when you are done.",
+ tools: {
+ createCodeSandbox: {
+ description: "Create an isolated sandbox environment for running code",
+ inputSchema: z.object({
+ template: z.string().describe("The sandbox template (e.g., 'node', 'python')"),
+ }),
+ execute: createCodeSandbox,
+ },
+ executeCode: {
+ description: "Execute code in an existing sandbox",
+ inputSchema: z.object({
+ sandboxId: z.string().describe("The sandbox ID from createCodeSandbox"),
+ code: z.string().describe("The code or command to execute"),
+ }),
+ execute: executeCode,
+ },
+ cleanupSandbox: {
+ description: "Destroy a sandbox when finished",
+ inputSchema: z.object({
+ sandboxId: z.string().describe("The sandbox ID to destroy"),
+ }),
+ execute: cleanupSandbox,
+ },
+ },
+ });
+
+ const result = await agent.stream({
+ messages: await convertToModelMessages(messages),
+ writable: getWritable(),
+ });
+
+ return { messages: result.messages };
+}
+```
+
+## Saga Pattern for Cleanup
+
+Combine sandbox orchestration with the [saga pattern](/docs/cookbook/common-patterns/saga) to ensure sandboxes are always cleaned up, even when a step in the middle of your pipeline fails.
+
+The example above uses a try/catch around the command execution loop. For more complex pipelines with multiple resources (sandbox + database + external API), push compensation functions onto a stack as shown in the [saga recipe](/docs/cookbook/common-patterns/saga).
diff --git a/docs/content/docs/cookbook/meta.json b/docs/content/docs/cookbook/meta.json
new file mode 100644
index 0000000000..ef260fb046
--- /dev/null
+++ b/docs/content/docs/cookbook/meta.json
@@ -0,0 +1,5 @@
+{
+ "title": "Cookbook",
+ "defaultOpen": true,
+ "pages": ["common-patterns", "agent-patterns", "integrations", "advanced"]
+}
diff --git a/docs/geistdocs.tsx b/docs/geistdocs.tsx
index 834c7debbd..6b9ac3de2f 100644
--- a/docs/geistdocs.tsx
+++ b/docs/geistdocs.tsx
@@ -27,6 +27,10 @@ export const nav = [
label: 'Docs',
href: '/docs',
},
+ {
+ label: 'Cookbook',
+ href: '/cookbook',
+ },
{
label: 'Worlds',
href: '/worlds',
diff --git a/docs/lib/cookbook-tree.ts b/docs/lib/cookbook-tree.ts
new file mode 100644
index 0000000000..75ca03be85
--- /dev/null
+++ b/docs/lib/cookbook-tree.ts
@@ -0,0 +1,462 @@
+export type Branch = {
+ label: string;
+ icon: string;
+ slugs?: string[];
+ next?: TreeNode;
+};
+
+export type TreeNode = {
+ id: string;
+ question: string;
+ branches: Branch[];
+};
+
+export type Recipe = {
+ slug: string;
+ title: string;
+ description: string;
+ whenToUse: string;
+ category: string;
+};
+
+export type RecipeCategory =
+ | 'common-patterns'
+ | 'agent-patterns'
+ | 'integrations'
+ | 'advanced';
+
+export const categoryOrder: RecipeCategory[] = [
+ 'common-patterns',
+ 'agent-patterns',
+ 'integrations',
+ 'advanced',
+];
+
+export const categoryLabels: Record = {
+ 'common-patterns': 'Common Patterns',
+ 'agent-patterns': 'Agent Patterns',
+ integrations: 'Integrations',
+ advanced: 'Advanced',
+};
+
+/** Map from slug → category folder for URL construction */
+export const slugToCategory: Record = {
+ // Common Patterns
+ saga: 'common-patterns',
+ batching: 'common-patterns',
+ 'rate-limiting': 'common-patterns',
+ 'fan-out': 'common-patterns',
+ scheduling: 'common-patterns',
+ idempotency: 'common-patterns',
+ webhooks: 'common-patterns',
+ 'content-router': 'common-patterns',
+ 'child-workflows': 'common-patterns',
+
+ // Agent Patterns
+ 'durable-agent': 'agent-patterns',
+ 'tool-streaming': 'agent-patterns',
+ 'human-in-the-loop': 'agent-patterns',
+ 'tool-orchestration': 'agent-patterns',
+ 'stop-workflow': 'agent-patterns',
+
+ // Integrations
+ 'ai-sdk': 'integrations',
+ sandbox: 'integrations',
+ 'chat-sdk': 'integrations',
+
+ // Advanced
+ 'serializable-steps': 'advanced',
+ 'durable-objects': 'advanced',
+ 'isomorphic-packages': 'advanced',
+ 'secure-credentials': 'advanced',
+ 'custom-serialization': 'advanced',
+ 'publishing-libraries': 'advanced',
+};
+
+/** All recipe metadata, keyed by slug */
+export const recipes: Record = {
+ // Common Patterns
+ saga: {
+ slug: 'saga',
+ title: 'Transactions & Rollbacks (Saga)',
+ description:
+ 'Coordinate multi-step transactions with automatic rollback when a step fails.',
+ whenToUse:
+ 'Run a sequence of steps where each registers a compensation. If any step throws a FatalError, compensations execute in reverse order.',
+ category: 'common-patterns',
+ },
+ batching: {
+ slug: 'batching',
+ title: 'Batching & Parallel Processing',
+ description:
+ 'Process large collections in parallel batches with failure isolation between groups.',
+ whenToUse:
+ 'Split items into fixed-size batches, process each batch concurrently with Promise.allSettled, and pace batches with sleep.',
+ category: 'common-patterns',
+ },
+ 'rate-limiting': {
+ slug: 'rate-limiting',
+ title: 'Rate Limiting & Retries',
+ description:
+ 'Handle 429 responses and transient failures with RetryableError and exponential backoff.',
+ whenToUse:
+ 'When an external API returns 429, throw RetryableError with the Retry-After value so the runtime reschedules the step.',
+ category: 'common-patterns',
+ },
+ 'fan-out': {
+ slug: 'fan-out',
+ title: 'Fan-Out & Parallel Delivery',
+ description:
+ 'Send a message to multiple channels or recipients in parallel with independent failure handling.',
+ whenToUse:
+ 'Fan out an alert to Slack, email, SMS, and PagerDuty simultaneously so a failure in one channel does not block the others.',
+ category: 'common-patterns',
+ },
+ scheduling: {
+ slug: 'scheduling',
+ title: 'Sleep, Scheduling & Timed Workflows',
+ description:
+ 'Use durable sleep to schedule actions minutes, hours, days, or weeks into the future.',
+ whenToUse:
+ 'Schedule future actions with durable sleep that survives cold starts, and race sleeps against hooks for early wake.',
+ category: 'common-patterns',
+ },
+ idempotency: {
+ slug: 'idempotency',
+ title: 'Idempotency',
+ description:
+ 'Ensure external side effects happen exactly once, even when steps are retried or workflows are replayed.',
+ whenToUse:
+ 'Use step IDs as idempotency keys for external APIs like Stripe so retries and replays do not create duplicates.',
+ category: 'common-patterns',
+ },
+ webhooks: {
+ slug: 'webhooks',
+ title: 'Webhooks & External Callbacks',
+ description:
+ 'Receive HTTP callbacks from external services, process them durably, and respond inline.',
+ whenToUse:
+ 'Create webhook endpoints that your workflow can await, process incoming requests in steps, and respond to the caller.',
+ category: 'common-patterns',
+ },
+ 'content-router': {
+ slug: 'content-router',
+ title: 'Conditional Routing',
+ description:
+ 'Inspect a payload and route it to different step handlers based on its content.',
+ whenToUse:
+ 'Classify incoming messages and branch to specialized handlers using standard if/else logic in the workflow function.',
+ category: 'common-patterns',
+ },
+
+ 'child-workflows': {
+ slug: 'child-workflows',
+ title: 'Child Workflows',
+ description:
+ 'Spawn and orchestrate child workflows from a parent, polling for completion and handling partial failures.',
+ whenToUse:
+ 'Fan out work to independent child workflows via start(), poll with getRun() and sleep(), and collect results.',
+ category: 'common-patterns',
+ },
+
+ // Agent Patterns
+ 'durable-agent': {
+ slug: 'durable-agent',
+ title: 'Durable Agent',
+ description:
+ 'Replace a stateless AI agent with a durable one that survives crashes, retries tool calls, and streams output.',
+ whenToUse:
+ 'Convert an AI SDK Agent into a DurableAgent backed by a workflow, with tools as retryable steps.',
+ category: 'agent-patterns',
+ },
+ 'tool-streaming': {
+ slug: 'tool-streaming',
+ title: 'Tool Streaming',
+ description:
+ 'Stream real-time progress updates from tools to the UI while they execute.',
+ whenToUse:
+ 'Emit custom data parts from step functions to show incremental results during long-running tool calls.',
+ category: 'agent-patterns',
+ },
+ 'human-in-the-loop': {
+ slug: 'human-in-the-loop',
+ title: 'Human-in-the-Loop',
+ description:
+ 'Pause an AI agent to wait for human approval, then resume based on the decision.',
+ whenToUse:
+ 'Use defineHook with the tool call ID to suspend an agent for human approval, with an optional timeout.',
+ category: 'agent-patterns',
+ },
+ 'tool-orchestration': {
+ slug: 'tool-orchestration',
+ title: 'Tool Orchestration',
+ description:
+ 'Choose between step-level and workflow-level tools, or combine both for complex tool implementations.',
+ whenToUse:
+ 'Implement tools as steps for retries and I/O, at the workflow level for sleep and hooks, or combine both.',
+ category: 'agent-patterns',
+ },
+ 'stop-workflow': {
+ slug: 'stop-workflow',
+ title: 'Stop Workflow',
+ description:
+ 'Gracefully cancel a running agent workflow using a hook signal.',
+ whenToUse:
+ 'Use a hook as a stop signal to break out of an agent loop and close the stream cleanly.',
+ category: 'agent-patterns',
+ },
+
+ // Integrations
+ 'ai-sdk': {
+ slug: 'ai-sdk',
+ title: 'AI SDK',
+ description:
+ 'Use AI SDK model providers, tool calling, and streaming inside durable workflows.',
+ whenToUse:
+ 'Turn any AI SDK model call into a retryable, observable workflow step with built-in streaming.',
+ category: 'integrations',
+ },
+ sandbox: {
+ slug: 'sandbox',
+ title: 'Sandbox',
+ description:
+ 'Orchestrate Vercel Sandbox lifecycle \u2014 creation, code execution, snapshotting \u2014 inside durable workflows.',
+ whenToUse:
+ 'Use workflow steps to provision sandboxes, run code, and manage sandbox lifecycle with automatic cleanup on failure.',
+ category: 'integrations',
+ },
+ 'chat-sdk': {
+ slug: 'chat-sdk',
+ title: 'Chat SDK',
+ description:
+ 'Build durable chat sessions by combining workflow persistence with AI SDK chat primitives.',
+ whenToUse:
+ 'Use workflow hooks and streaming to create chat sessions that survive disconnects and server restarts.',
+ category: 'integrations',
+ },
+
+ // Advanced
+ 'serializable-steps': {
+ slug: 'serializable-steps',
+ title: 'Serializable Steps',
+ description:
+ 'Wrap non-serializable objects (like AI model providers) inside step functions so they can cross the workflow boundary.',
+ whenToUse:
+ 'Return a callback from a step to defer provider initialization, making non-serializable AI SDK models work inside durable workflows.',
+ category: 'advanced',
+ },
+ 'durable-objects': {
+ slug: 'durable-objects',
+ title: 'Durable Objects',
+ description:
+ 'Model long-lived stateful entities as workflows that persist state across requests.',
+ whenToUse:
+ 'Build a durable counter or session object whose state survives restarts by using the event log as the persistence layer.',
+ category: 'advanced',
+ },
+ 'isomorphic-packages': {
+ slug: 'isomorphic-packages',
+ title: 'Isomorphic Packages',
+ description:
+ 'Publish reusable workflow packages that work both inside and outside the workflow runtime.',
+ whenToUse:
+ 'Use try/catch around getWorkflowMetadata, dynamic imports, and optional peer dependencies for dual-environment libraries.',
+ category: 'advanced',
+ },
+ 'secure-credentials': {
+ slug: 'secure-credentials',
+ title: 'Secure Credentials',
+ description:
+ 'Encrypt secrets before passing them through workflows so they never appear in the event log.',
+ whenToUse:
+ 'Encrypt credentials before start(), resolve them inside steps via a provider, and avoid making secret-returning functions into steps.',
+ category: 'advanced',
+ },
+ 'custom-serialization': {
+ slug: 'custom-serialization',
+ title: 'Custom Serialization',
+ description:
+ 'Make custom classes survive workflow serialization using the WORKFLOW_SERIALIZE/WORKFLOW_DESERIALIZE protocol.',
+ whenToUse:
+ 'Implement static serde symbols on a class so instances can cross the workflow/step boundary intact.',
+ category: 'advanced',
+ },
+ 'publishing-libraries': {
+ slug: 'publishing-libraries',
+ title: 'Publishing Libraries',
+ description:
+ 'Ship an npm package that exports reusable workflow functions with stable IDs and clean step I/O.',
+ whenToUse:
+ 'Structure, test, and publish a library that consumers can import and start() in their own workflow apps.',
+ category: 'advanced',
+ },
+};
+
+/** Build a cookbook recipe href */
+export function getRecipeHref(lang: string, slug: string): string {
+ return `/${lang}/cookbook/${slugToCategory[slug]}/${slug}`;
+}
+
+/** Get recipes for a category, in definition order */
+export function getRecipesByCategory(category: RecipeCategory): Recipe[] {
+ return Object.values(recipes).filter((r) => r.category === category);
+}
+
+/** Recursively collect all slugs reachable from a branch */
+export function collectSlugs(branch: Branch): string[] {
+ const slugs = branch.slugs ?? [];
+ if (branch.next) {
+ return [...slugs, ...branch.next.branches.flatMap(collectSlugs)];
+ }
+ return slugs;
+}
+
+/** The decision tree */
+export const tree: TreeNode = {
+ id: 'root',
+ question: 'I want to\u2026',
+ branches: [
+ {
+ label: 'Process payments & orders',
+ icon: '$',
+ next: {
+ id: 'payments',
+ question: 'What happens if a step fails?',
+ branches: [
+ {
+ label: 'Roll back everything automatically',
+ icon: '\u21a9',
+ slugs: ['saga'],
+ },
+ {
+ label: 'Make sure nothing is duplicated',
+ icon: '\u2713',
+ slugs: ['idempotency'],
+ },
+ {
+ label: 'Route to the right handler',
+ icon: '\u25c8',
+ slugs: ['content-router'],
+ },
+ ],
+ },
+ },
+ {
+ label: 'Build a durable AI agent',
+ icon: '\u2605',
+ next: {
+ id: 'agent',
+ question: 'What does the agent need?',
+ branches: [
+ {
+ label: 'Basic durable agent setup',
+ icon: '\u25b8',
+ slugs: ['durable-agent'],
+ },
+ {
+ label: 'Stream progress from tools',
+ icon: '\u2192',
+ slugs: ['tool-streaming'],
+ },
+ {
+ label: 'Wait for human approval',
+ icon: '\u270b',
+ slugs: ['human-in-the-loop'],
+ },
+ {
+ label: 'Complex tool patterns',
+ icon: '\u2699',
+ slugs: ['tool-orchestration', 'stop-workflow'],
+ },
+ ],
+ },
+ },
+ {
+ label: 'Handle flaky APIs',
+ icon: '\u21bb',
+ next: {
+ id: 'flaky',
+ question: "What's going wrong?",
+ branches: [
+ {
+ label: 'Rate limited (429s)',
+ icon: '\u2298',
+ slugs: ['rate-limiting'],
+ },
+ {
+ label: 'Need parallel processing with isolation',
+ icon: '\u25a4',
+ slugs: ['batching'],
+ },
+ {
+ label: 'Orchestrate many child workflows',
+ icon: '\u2b50',
+ slugs: ['child-workflows'],
+ },
+ ],
+ },
+ },
+ {
+ label: 'Send notifications & alerts',
+ icon: '\u2192',
+ next: {
+ id: 'notify',
+ question: 'How should they be sent?',
+ branches: [
+ {
+ label: 'All at once, in parallel',
+ icon: '\u2ad8',
+ slugs: ['fan-out'],
+ },
+ {
+ label: 'Spread out over days or weeks',
+ icon: '\u25f4',
+ slugs: ['scheduling'],
+ },
+ ],
+ },
+ },
+ {
+ label: 'Wait for a webhook or callback',
+ icon: '\u2193',
+ slugs: ['webhooks'],
+ },
+ {
+ label: 'Integrate with Vercel products',
+ icon: '\u25b2',
+ next: {
+ id: 'integrate',
+ question: 'Which product?',
+ branches: [
+ {
+ label: 'AI SDK',
+ icon: '\u2605',
+ slugs: ['ai-sdk'],
+ },
+ {
+ label: 'Chat SDK',
+ icon: '\u2328',
+ slugs: ['chat-sdk'],
+ },
+ {
+ label: 'Sandbox',
+ icon: '\u2610',
+ slugs: ['sandbox'],
+ },
+ ],
+ },
+ },
+ {
+ label: 'Advanced internals',
+ icon: '\u2699',
+ slugs: [
+ 'serializable-steps',
+ 'durable-objects',
+ 'isomorphic-packages',
+ 'secure-credentials',
+ 'custom-serialization',
+ 'publishing-libraries',
+ ],
+ },
+ ],
+};
diff --git a/docs/lib/geistdocs/cookbook-source.ts b/docs/lib/geistdocs/cookbook-source.ts
new file mode 100644
index 0000000000..60a0e3358b
--- /dev/null
+++ b/docs/lib/geistdocs/cookbook-source.ts
@@ -0,0 +1,90 @@
+import type { Node, Root } from 'fumadocs-core/page-tree';
+import {
+ categoryLabels,
+ categoryOrder,
+ recipes,
+ type RecipeCategory,
+} from '../cookbook-tree';
+import { source } from './source';
+
+const COOKBOOK_DOCS_PREFIX_RE = /\/docs\/cookbook(?=\/|$)/g;
+
+type FolderNode = Extract;
+type PageNode = Extract;
+
+export function rewriteCookbookUrl(url: string): string {
+ return url.replace(COOKBOOK_DOCS_PREFIX_RE, '/cookbook');
+}
+
+export function rewriteCookbookUrlsInText(text: string): string {
+ return text.replace(COOKBOOK_DOCS_PREFIX_RE, '/cookbook');
+}
+
+function isCookbookFolder(node: Node): boolean {
+ return (
+ node.type === 'folder' &&
+ (node.index?.url?.startsWith('/docs/cookbook') ?? false)
+ );
+}
+
+/**
+ * Return the docs page tree with cookbook nodes removed.
+ * Used by the docs layout so the sidebar never shows cookbook entries.
+ */
+export function getDocsTreeWithoutCookbook(lang: string): Root {
+ const fullTree = source.pageTree[lang];
+ return {
+ ...fullTree,
+ children: fullTree.children.filter((node) => !isCookbookFolder(node)),
+ };
+}
+
+function createOverviewPage(): PageNode {
+ return {
+ type: 'page',
+ $id: 'cookbook__overview',
+ name: 'Overview',
+ url: '/cookbook',
+ } as PageNode;
+}
+
+function createRecipePage(category: RecipeCategory, slug: string): PageNode {
+ const recipe = recipes[slug];
+ return {
+ type: 'page',
+ $id: `cookbook__${slug}`,
+ name: recipe.title,
+ url: `/cookbook/${category}/${slug}`,
+ } as PageNode;
+}
+
+function createCategoryFolder(category: RecipeCategory): FolderNode {
+ const categoryRecipes = Object.values(recipes).filter(
+ (recipe) => recipe.category === category
+ );
+ return {
+ type: 'folder',
+ $id: `cookbook__${category}`,
+ name: categoryLabels[category],
+ children: categoryRecipes.map((recipe) =>
+ createRecipePage(category as RecipeCategory, recipe.slug)
+ ),
+ } as FolderNode;
+}
+
+/**
+ * Build a standalone cookbook sidebar tree from cookbook-tree metadata.
+ * No longer depends on locating a cookbook node inside the docs page tree.
+ */
+export function getCookbookTree(lang: string): Root {
+ const fullTree = source.pageTree[lang];
+
+ return {
+ ...fullTree,
+ name: 'Cookbook',
+ children: [
+ createOverviewPage(),
+ ...categoryOrder.map((category) => createCategoryFolder(category)),
+ ],
+ };
+}
diff --git a/docs/next.config.ts b/docs/next.config.ts
index 24eb802ab1..8333357460 100644
--- a/docs/next.config.ts
+++ b/docs/next.config.ts
@@ -18,6 +18,9 @@ const config: NextConfig = {
},
async rewrites() {
+ const markdownAcceptHeader =
+ '(?=.*(?:text/plain|text/markdown))(?!.*text/html.*(?:text/plain|text/markdown)).*';
+
return {
beforeFiles: [
{
@@ -31,10 +34,29 @@ const config: NextConfig = {
{
type: 'header',
key: 'Accept',
- // Have text/markdown or text/plain but before any text/html
- // Note, that Claude Code currently requests text/plain
- value:
- '(?=.*(?:text/plain|text/markdown))(?!.*text/html.*(?:text/plain|text/markdown)).*',
+ value: markdownAcceptHeader,
+ },
+ ],
+ },
+ {
+ source: '/cookbook',
+ destination: '/llms.mdx/cookbook',
+ has: [
+ {
+ type: 'header',
+ key: 'Accept',
+ value: markdownAcceptHeader,
+ },
+ ],
+ },
+ {
+ source: '/cookbook/:path*',
+ destination: '/llms.mdx/cookbook/:path*',
+ has: [
+ {
+ type: 'header',
+ key: 'Accept',
+ value: markdownAcceptHeader,
},
],
},
@@ -49,6 +71,26 @@ const config: NextConfig = {
destination: '/docs/getting-started',
permanent: true,
},
+ {
+ source: '/docs/cookbook',
+ destination: '/cookbook',
+ permanent: true,
+ },
+ {
+ source: '/docs/cookbook/:path*',
+ destination: '/cookbook/:path*',
+ permanent: true,
+ },
+ {
+ source: '/cookbooks',
+ destination: '/cookbook',
+ permanent: true,
+ },
+ {
+ source: '/cookbooks/:path*',
+ destination: '/cookbook/:path*',
+ permanent: true,
+ },
{
source: '/err/:slug',
destination: '/docs/errors/:slug',