diff --git a/apps/docs/docs.json b/apps/docs/docs.json
index 22b3e9ff..2bbfdc0a 100644
--- a/apps/docs/docs.json
+++ b/apps/docs/docs.json
@@ -185,6 +185,17 @@
],
"tab": "Developer Platform"
},
+ {
+ "icon": "book-open",
+ "anchors": [
+ {
+ "anchor": "API Reference",
+ "icon": "unplug",
+ "openapi": "https://api.supermemory.ai/v3/openapi"
+ }
+ ],
+ "tab": "API Reference"
+ },
{
"icon": "plug",
"anchors": [
@@ -234,15 +245,35 @@
"tab": "SDKs"
},
{
- "icon": "book-open",
+ "icon": "flask-conical",
"anchors": [
{
- "anchor": "API Reference",
- "icon": "unplug",
- "openapi": "https://api.supermemory.ai/v3/openapi"
+ "anchor": "MemoryBench",
+ "icon": "flask-conical",
+ "pages": [
+ "memorybench/overview",
+ "memorybench/github",
+ {
+ "group": "Getting Started",
+ "pages": ["memorybench/installation", "memorybench/quickstart"]
+ },
+ {
+ "group": "Development",
+ "pages": [
+ "memorybench/architecture",
+ "memorybench/extend-provider",
+ "memorybench/extend-benchmark",
+ "memorybench/contributing"
+ ]
+ },
+ {
+ "group": "Reference",
+ "pages": ["memorybench/cli", "memorybench/integrations"]
+ }
+ ]
}
],
- "tab": "API Reference"
+ "tab": "MemoryBench"
},
{
"icon": "chef-hat",
@@ -269,7 +300,6 @@
],
"tab": "Cookbook"
},
-
{
"icon": "list-ordered",
"anchors": [
diff --git a/apps/docs/memorybench/architecture.mdx b/apps/docs/memorybench/architecture.mdx
new file mode 100644
index 00000000..5d087817
--- /dev/null
+++ b/apps/docs/memorybench/architecture.mdx
@@ -0,0 +1,99 @@
+---
+title: "Architecture"
+description: "Understanding MemoryBench's design and implementation"
+sidebarTitle: "Architecture"
+---
+
+## System Overview
+
+```mermaid
+flowchart TB
+ B["Benchmarks
(LoCoMo, LongMemEval..)"]
+ P["Providers
(Supermemory, Mem0, Zep)"]
+ J["Judges
(GPT-4o, Claude..)"]
+
+ B --> O[Orchestrator]
+ P --> O
+ J --> O
+
+ O --> Pipeline
+
+ subgraph Pipeline[" "]
+ direction LR
+ I[Ingest] --> IX[Indexing] --> S[Search] --> A[Answer] --> E[Evaluate]
+ end
+
+ style B fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style P fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style J fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style O fill:#0369A1,stroke:#0369A1,color:#fff
+ style I fill:#F1F5F9,stroke:#64748B,color:#334155
+ style IX fill:#F1F5F9,stroke:#64748B,color:#334155
+ style S fill:#F1F5F9,stroke:#64748B,color:#334155
+ style A fill:#F1F5F9,stroke:#64748B,color:#334155
+ style E fill:#F1F5F9,stroke:#64748B,color:#334155
+```
+
+## Core Components
+
+| Component | Role |
+|-----------|------|
+| **Benchmarks** | Load test data and provide questions with ground truth answers |
+| **Providers** | Memory services being evaluated (handle ingestion and search) |
+| **Judges** | LLM-based evaluators that score answers against ground truth |
+
+See [Integrations](/memorybench/integrations) for all supported benchmarks, providers, and models.
+
+## Pipeline
+
+```mermaid
+flowchart LR
+ A[Ingest] --> B[Index] --> C[Search] --> D[Answer] --> E[Evaluate] --> F[Report]
+
+ style A fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style B fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style C fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style D fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style E fill:#E0F2FE,stroke:#0369A1,color:#0C4A6E
+ style F fill:#DCFCE7,stroke:#16A34A,color:#166534
+```
+
+| Phase | What Happens |
+|-------|--------------|
+| **Ingest** | Load benchmark sessions → Push to provider |
+| **Index** | Wait for provider indexing |
+| **Search** | Query provider → Retrieve context |
+| **Answer** | Build prompt → Generate answer via LLM |
+| **Evaluate** | Compare to ground truth → Score via judge |
+| **Report** | Aggregate scores → Output accuracy + latency |
+
+Each phase checkpoints independently. Failed runs resume from last successful point.
+
+## Advanced Checkpointing
+
+Runs persist to `data/runs/{runId}/`:
+
+```
+data/runs/my-run/
+├── checkpoint.json # Run state and progress
+├── results/ # Search results per question
+└── report.json # Final report
+```
+
+Re-running same ID resumes. Use `--force` to restart.
+
+## File Structure
+
+```
+src/
+├── cli/commands/ # run, compare, test, serve, status...
+├── orchestrator/phases/ # ingest, search, answer, evaluate, report
+├── benchmarks/
+│ └── /index.ts # e.g. locomo/, longmemeval/, convomem/
+├── providers/
+│ └── /
+│ ├── index.ts # Provider implementation
+│ └── prompts.ts # Custom prompts (optional)
+├── judges/ # openai.ts, anthropic.ts, google.ts
+└── types/ # provider.ts, benchmark.ts, unified.ts
+```
diff --git a/apps/docs/memorybench/cli.mdx b/apps/docs/memorybench/cli.mdx
new file mode 100644
index 00000000..3ab5c503
--- /dev/null
+++ b/apps/docs/memorybench/cli.mdx
@@ -0,0 +1,117 @@
+---
+title: "CLI Reference"
+description: "Command-line interface for running MemoryBench evaluations"
+sidebarTitle: "CLI"
+---
+
+## Commands
+
+### run
+
+Execute the full benchmark pipeline.
+
+```bash
+bun run src/index.ts run -p -b -j -r
+```
+
+| Option | Description |
+|--------|-------------|
+| `-p, --provider` | Memory provider (`supermemory`, `mem0`, `zep`) |
+| `-b, --benchmark` | Benchmark (`locomo`, `longmemeval`, `convomem`) |
+| `-j, --judge` | Judge model (default: `gpt-4o`) |
+| `-r, --run-id` | Run identifier (auto-generated if omitted) |
+| `-m, --answering-model` | Model for answer generation (default: `gpt-4o`) |
+| `-l, --limit` | Limit number of questions |
+| `-s, --sample` | Sample N questions per category |
+| `--sample-type` | Sampling strategy: `consecutive` (default), `random` |
+| `--force` | Clear checkpoint and restart |
+
+See [Supported Models](/memorybench/supported-models) for all available judge and answering models.
+
+---
+
+### compare
+
+Run benchmark across multiple providers in parallel.
+
+```bash
+bun run src/index.ts compare -p supermemory,mem0,zep -b locomo -j gpt-4o
+```
+
+---
+
+### test
+
+Evaluate a single question for debugging.
+
+```bash
+bun run src/index.ts test -r -q
+```
+
+---
+
+### status
+
+Check progress of a run.
+
+```bash
+bun run src/index.ts status -r
+```
+
+---
+
+### show-failures
+
+Debug failed questions with full context.
+
+```bash
+bun run src/index.ts show-failures -r
+```
+
+---
+
+### list-questions
+
+Browse benchmark questions.
+
+```bash
+bun run src/index.ts list-questions -b
+```
+
+---
+
+### Random Sampling
+
+Sample N questions per category with optional randomization.
+
+```bash
+bun run src/index.ts run -p supermemory -b longmemeval -s 3 --sample-type random
+```
+
+---
+
+### serve
+
+Start the web UI.
+
+```bash
+bun run src/index.ts serve
+```
+
+Opens at [http://localhost:3000](http://localhost:3000).
+
+---
+
+### help
+
+Get help on providers, models, or benchmarks.
+
+```bash
+bun run src/index.ts help providers
+bun run src/index.ts help models
+bun run src/index.ts help benchmarks
+```
+
+## Checkpointing
+
+Runs are saved to `data/runs/{runId}/` and automatically resume from the last successful phase. Use `--force` to restart.
diff --git a/apps/docs/memorybench/contributing.mdx b/apps/docs/memorybench/contributing.mdx
new file mode 100644
index 00000000..2f8e45e2
--- /dev/null
+++ b/apps/docs/memorybench/contributing.mdx
@@ -0,0 +1,89 @@
+---
+title: "Contributing"
+description: "Guidelines for contributing to MemoryBench"
+sidebarTitle: "Contributing"
+---
+
+## Getting Started
+
+1. Fork the repository
+2. Clone your fork:
+ ```bash
+ git clone https://github.com/YOUR_USERNAME/memorybench
+ cd memorybench
+ bun install
+ ```
+3. Create a branch:
+ ```bash
+ git checkout -b feature/your-feature
+ ```
+
+## Development Workflow
+
+### Running Tests
+
+```bash
+bun test
+```
+
+### Running the CLI
+
+```bash
+bun run src/index.ts
+```
+
+### Running the Web UI
+
+```bash
+cd ui
+bun run dev
+```
+
+## Code Structure
+
+| Directory | Purpose |
+|-----------|---------|
+| `src/cli/` | CLI commands |
+| `src/orchestrator/` | Pipeline execution |
+| `src/benchmarks/` | Benchmark adapters |
+| `src/providers/` | Provider integrations |
+| `src/judges/` | LLM judge implementations |
+| `src/types/` | TypeScript interfaces |
+| `ui/` | Next.js web interface |
+
+## Contribution Types
+
+### Adding a Provider
+
+See [Extending MemoryBench](/memorybench/extend-provider) for the full guide.
+
+1. Create `src/providers/yourprovider/index.ts`
+2. Implement the `Provider` interface
+3. Register in `src/providers/index.ts`
+4. Add config in `src/utils/config.ts`
+5. Submit PR with tests
+
+### Adding a Benchmark
+
+1. Create `src/benchmarks/yourbenchmark/index.ts`
+2. Implement the `Benchmark` interface
+3. Register in `src/benchmarks/index.ts`
+4. Document question types
+5. Submit PR with sample data
+
+### Bug Fixes
+
+1. Create an issue describing the bug
+2. Reference the issue in your PR
+3. Include test cases that reproduce the bug
+
+## Pull Request Guidelines
+
+- Keep PRs focused on a single change
+- Update documentation if needed
+- Ensure all tests pass
+- Follow existing code style
+
+## Questions?
+
+Open an issue on [GitHub](https://github.com/supermemoryai/memorybench/issues).
diff --git a/apps/docs/memorybench/extend-benchmark.mdx b/apps/docs/memorybench/extend-benchmark.mdx
new file mode 100644
index 00000000..c66cf4d9
--- /dev/null
+++ b/apps/docs/memorybench/extend-benchmark.mdx
@@ -0,0 +1,75 @@
+---
+title: "Extend Benchmark"
+description: "Add a custom benchmark dataset to MemoryBench"
+sidebarTitle: "Extend Benchmark"
+---
+
+## Benchmark Interface
+
+```typescript
+interface Benchmark {
+ name: string
+ load(config?: BenchmarkConfig): Promise
+ getQuestions(filter?: QuestionFilter): UnifiedQuestion[]
+ getHaystackSessions(questionId: string): UnifiedSession[]
+ getGroundTruth(questionId: string): string
+ getQuestionTypes(): QuestionTypeRegistry
+}
+```
+
+---
+
+## Adding a Custom Benchmark
+
+### 1. Create the Benchmark
+
+```typescript
+// src/benchmarks/mybenchmark/index.ts
+import type { Benchmark, UnifiedQuestion, UnifiedSession } from "../../types"
+
+export class MyBenchmark implements Benchmark {
+ name = "mybenchmark"
+ private questions: UnifiedQuestion[] = []
+ private sessions: Map = new Map()
+
+ async load() {
+ const data = await this.loadDataset()
+ this.processData(data)
+ }
+
+ getQuestions(filter?: QuestionFilter) {
+ let result = [...this.questions]
+ if (filter?.limit) result = result.slice(0, filter.limit)
+ return result
+ }
+
+ getHaystackSessions(questionId: string) {
+ return this.sessions.get(questionId) || []
+ }
+
+ getGroundTruth(questionId: string) {
+ return this.questions.find(q => q.questionId === questionId)?.groundTruth || ""
+ }
+
+ getQuestionTypes() {
+ return {
+ "type1": { id: "type1", description: "Type 1 questions" },
+ "type2": { id: "type2", description: "Type 2 questions" },
+ }
+ }
+}
+```
+
+### 2. Register the Benchmark
+
+```typescript
+// src/benchmarks/index.ts
+import { MyBenchmark } from "./mybenchmark"
+
+export const benchmarks = {
+ locomo: LoComoBenchmark,
+ longmemeval: LongMemEvalBenchmark,
+ convomem: ConvoMemBenchmark,
+ mybenchmark: MyBenchmark, // Add here
+}
+```
diff --git a/apps/docs/memorybench/extend-provider.mdx b/apps/docs/memorybench/extend-provider.mdx
new file mode 100644
index 00000000..f3fec92e
--- /dev/null
+++ b/apps/docs/memorybench/extend-provider.mdx
@@ -0,0 +1,118 @@
+---
+title: "Extend Provider"
+description: "Add a custom memory provider to MemoryBench"
+sidebarTitle: "Extend Provider"
+---
+
+## Provider Interface
+
+```typescript
+interface Provider {
+ name: string
+ prompts?: ProviderPrompts
+ initialize(config: ProviderConfig): Promise
+ ingest(sessions: UnifiedSession[], options: IngestOptions): Promise
+ awaitIndexing(result: IngestResult, containerTag: string): Promise
+ search(query: string, options: SearchOptions): Promise
+ clear(containerTag: string): Promise
+}
+```
+
+---
+
+## Adding a Custom Provider
+
+### 1. Create the Provider
+
+```typescript
+// src/providers/myprovider/index.ts
+import type { Provider, ProviderConfig, UnifiedSession } from "../../types"
+
+export class MyProvider implements Provider {
+ name = "myprovider"
+ private client: MyClient | null = null
+
+ async initialize(config: ProviderConfig) {
+ this.client = new MyClient({ apiKey: config.apiKey })
+ }
+
+ async ingest(sessions: UnifiedSession[], options: IngestOptions) {
+ const documentIds: string[] = []
+ for (const session of sessions) {
+ const response = await this.client.add({
+ content: JSON.stringify(session.messages),
+ metadata: session.metadata
+ })
+ documentIds.push(response.id)
+ }
+ return { documentIds }
+ }
+
+ async awaitIndexing(result: IngestResult) {
+ // Poll until indexing complete
+ }
+
+ async search(query: string, options: SearchOptions) {
+ return await this.client.search({ q: query, limit: 10 })
+ }
+
+ async clear(containerTag: string) {
+ await this.client.delete(containerTag)
+ }
+}
+```
+
+### 2. Register the Provider
+
+```typescript
+// src/providers/index.ts
+import { MyProvider } from "./myprovider"
+
+export const providers = {
+ supermemory: SupermemoryProvider,
+ mem0: Mem0Provider,
+ zep: ZepProvider,
+ myprovider: MyProvider, // Add here
+}
+```
+
+### 3. Add Configuration
+
+```typescript
+// src/utils/config.ts
+case "myprovider":
+ return {
+ apiKey: process.env.MYPROVIDER_API_KEY!,
+ }
+```
+
+---
+
+## Custom Prompts
+
+Providers can define custom answer and judge prompts for better results.
+
+```typescript
+// src/providers/myprovider/prompts.ts
+export const MY_PROMPTS: ProviderPrompts = {
+ answerPrompt: (question, context, questionDate) => {
+ return `Based on context:\n${context}\n\nAnswer: ${question}`
+ },
+
+ judgePrompt: (question, groundTruth, hypothesis) => ({
+ default: "Compare answer to ground truth...",
+ temporal: "Allow off-by-one for dates...",
+ adversarial: "Check if model correctly abstained...",
+ })
+}
+```
+
+Then reference in your provider:
+
+```typescript
+export class MyProvider implements Provider {
+ name = "myprovider"
+ prompts = MY_PROMPTS // Custom prompts
+ // ...
+}
+```
diff --git a/apps/docs/memorybench/github.mdx b/apps/docs/memorybench/github.mdx
new file mode 100644
index 00000000..34468d21
--- /dev/null
+++ b/apps/docs/memorybench/github.mdx
@@ -0,0 +1,5 @@
+---
+title: "MemoryBench on GitHub"
+url: "https://github.com/supermemoryai/memorybench"
+icon: github
+---
diff --git a/apps/docs/memorybench/installation.mdx b/apps/docs/memorybench/installation.mdx
new file mode 100644
index 00000000..ef21cac8
--- /dev/null
+++ b/apps/docs/memorybench/installation.mdx
@@ -0,0 +1,60 @@
+---
+title: "Installation"
+description: "Get MemoryBench up and running in your environment"
+sidebarTitle: "Installation"
+---
+
+## Prerequisites
+
+- [Bun](https://bun.sh) runtime installed
+- API keys for providers and LLM judges you want to use
+
+## Install MemoryBench
+
+```bash
+git clone https://github.com/supermemoryai/memorybench
+cd memorybench
+bun install
+```
+
+## Configure API Keys
+
+Create a `.env.local` file in the root directory:
+
+```bash
+# Memory Providers (add keys for providers you want to test)
+SUPERMEMORY_API_KEY=your_key
+MEM0_API_KEY=your_key
+ZEP_API_KEY=your_key
+
+# LLM Judges (at least one required)
+OPENAI_API_KEY=your_key
+ANTHROPIC_API_KEY=your_key
+GOOGLE_API_KEY=your_key
+```
+
+
+You only need API keys for the providers and judges you plan to use. For example, to benchmark Supermemory with GPT-4o as judge, you only need `SUPERMEMORY_API_KEY` and `OPENAI_API_KEY`.
+
+
+## Verify Installation
+
+```bash
+bun run src/index.ts help
+```
+
+You should see the list of available commands.
+
+## Start the Web Interface
+
+```bash
+bun run src/index.ts serve
+```
+
+Opens at [http://localhost:3000](http://localhost:3000).
+
+## Next Steps
+
+- [CLI Reference](/memorybench/cli) - Play around with MemoryBench
+- [Architecture](/memorybench/architecture) - Understand how MemoryBench works
+- [Extend MemoryBench](/memorybench/extend-provider) - Add custom providers, benchmarks, and prompts
diff --git a/apps/docs/memorybench/integrations.mdx b/apps/docs/memorybench/integrations.mdx
new file mode 100644
index 00000000..e4532d50
--- /dev/null
+++ b/apps/docs/memorybench/integrations.mdx
@@ -0,0 +1,39 @@
+---
+title: "Integrations"
+description: "Supported benchmarks and providers in MemoryBench"
+sidebarTitle: "Integrations"
+---
+
+## Benchmarks
+
+| Benchmark | Description | Source | Categories |
+|-----------|-------------|--------|------------|
+| LoCoMo | Long context memory testing fact recall across extended conversations | [snap-research/locomo](https://github.com/snap-research/locomo) | `single-hop`, `multi-hop`, `temporal`, `world-knowledge`, `adversarial` |
+| LongMemEval | Long-term memory evaluation across multiple sessions with knowledge updates | [xiaowu0162/longmemeval](https://huggingface.co/datasets/xiaowu0162/longmemeval-cleaned) | `single-session-user`, `single-session-assistant`, `multi-session`, `temporal-reasoning`, `knowledge-update` |
+| ConvoMem | Conversational memory focused on personalization and preference learning | [Salesforce/ConvoMem](https://huggingface.co/datasets/Salesforce/ConvoMem) | `user_evidence`, `assistant_facts_evidence`, `preference_evidence`, `changing_evidence`, `abstention_evidence` |
+
+
+We're actively adding support for more benchmarks. [Contribute your own](/memorybench/extend-benchmark) or [create a feature request](https://github.com/supermemoryai/memorybench/issues).
+
+
+---
+
+## Providers
+
+
+
+ Chunk-based semantic search
+
+
+
+ LLM-powered memory extraction
+
+
+
+ Knowledge graph construction
+
+
+
+
+We're actively adding support for more providers. [Contribute your own](/memorybench/extend-provider) or [create a feature request](https://github.com/supermemoryai/memorybench/issues).
+
diff --git a/apps/docs/memorybench/overview.mdx b/apps/docs/memorybench/overview.mdx
new file mode 100644
index 00000000..14e01466
--- /dev/null
+++ b/apps/docs/memorybench/overview.mdx
@@ -0,0 +1,53 @@
+---
+title: "MemoryBench"
+description: "Open-source framework for standardized, reproducible benchmarks of memory layer providers"
+sidebarTitle: "Overview"
+icon: "flask-conical"
+---
+
+Our goal is to make evaluation more rigorous, accessible, and in line with industry standards. Design and run evaluations tailored to your specific needs, and run industry-standard benchmarks easily on any memory provider. With MemoryBench, you can trust in provider through transparent, reproducible, and domain-relevant evaluations.
+
+
+
+
+ ```bun run src/index.ts serve```
+
+
+
+ Get MemoryBench up and running in your environment
+
+
+
+ Command-line interface for running evaluations
+
+
+
+ Understanding MemoryBench's design and implementation
+
+
+
+## Works with any memory provider
+
+
+
+ Cloud-based memory layer
+
+
+
+ Graph-based memory
+
+
+
+ Long-term memory for AI
+
+
+
+
+We're actively adding support for more providers. [Contribute your own](/memorybench/extend-provider) or [create a feature request](https://github.com/supermemoryai/memorybench/issues).
+
+
+## Contribute
+
+
+ Found a bug or have a feature request? Let us know.
+
diff --git a/apps/docs/memorybench/quickstart.mdx b/apps/docs/memorybench/quickstart.mdx
new file mode 100644
index 00000000..e52094a9
--- /dev/null
+++ b/apps/docs/memorybench/quickstart.mdx
@@ -0,0 +1,61 @@
+---
+title: "Quick Start"
+description: "Run your first benchmark evaluation in 3 steps"
+sidebarTitle: "Quick Start"
+---
+
+## 1. Run Your First Benchmark
+
+```bash
+bun run src/index.ts run -p supermemory -b longmemeval -j gpt-4o -r my-first-run
+```
+
+## 2. View Results
+
+### Option A: Web UI
+
+```bash
+bun run src/index.ts serve
+```
+
+Open [http://localhost:3000](http://localhost:3000) to see results visually.
+
+### Option B: CLI
+
+```bash
+# Check run status
+bun run src/index.ts status -r my-first-run
+
+# View failed questions for debugging
+bun run src/index.ts show-failures -r my-first-run
+```
+
+## 3. Compare Providers
+
+Run the same benchmark across multiple providers:
+
+```bash
+bun run src/index.ts compare -p supermemory,mem0,zep -b locomo -j gpt-4o
+```
+
+Results are saved to `data/runs/{runId}/report.json`.
+
+## Sample Output
+
+```json
+{
+ "accuracy": 0.72,
+ "accuracyByType": {
+ "single-hop": 0.85,
+ "multi-hop": 0.65,
+ "temporal": 0.70,
+ "adversarial": 0.68
+ },
+ "avgLatency": 1250,
+ "totalQuestions": 50
+}
+```
+
+## What's Next
+
+Head to [CLI Reference](/memorybench/cli) to play around with all the commands, or check out [Architecture](/memorybench/architecture) to understand how MemoryBench works under the hood.
diff --git a/apps/docs/memorybench/supported-models.mdx b/apps/docs/memorybench/supported-models.mdx
new file mode 100644
index 00000000..fd374c64
--- /dev/null
+++ b/apps/docs/memorybench/supported-models.mdx
@@ -0,0 +1,49 @@
+---
+title: "Supported Models"
+description: "Available models for judges and answer generation"
+sidebarTitle: "Supported Models"
+---
+
+Models available for evaluation judges and answer generation in MemoryBench.
+
+## OpenAI
+
+| Model Name | Slug |
+|------------|------|
+| GPT-4o | `gpt-4o` |
+| GPT-4o Mini | `gpt-4o-mini` |
+| GPT-4.1 | `gpt-4.1` |
+| GPT-4.1 Mini | `gpt-4.1-mini` |
+| GPT-4.1 Nano | `gpt-4.1-nano` |
+| GPT-5 | `gpt-5` |
+| GPT-5 Mini | `gpt-5-mini` |
+| o1 | `o1` |
+| o1 Pro | `o1-pro` |
+| o3 | `o3` |
+| o3 Mini | `o3-mini` |
+| o3 Pro | `o3-pro` |
+| o4 Mini | `o4-mini` |
+
+## Anthropic
+
+| Model Name | Slug |
+|------------|------|
+| Claude Opus 4.5 | `opus-4.5` |
+| Claude Sonnet 4.5 | `sonnet-4.5` |
+| Claude Haiku 4.5 | `haiku-4.5` |
+| Claude Opus 4.1 | `opus-4.1` |
+| Claude Sonnet 4 | `sonnet-4` |
+
+## Google
+
+| Model Name | Slug |
+|------------|------|
+| Gemini 2.5 Pro | `gemini-2.5-pro` |
+| Gemini 2.5 Flash | `gemini-2.5-flash` |
+| Gemini 2.5 Flash Lite | `gemini-2.5-flash-lite` |
+| Gemini 2.0 Flash | `gemini-2.0-flash` |
+| Gemini 3 Pro Preview | `gemini-3-pro-preview` |
+
+
+Make sure you have the corresponding API key set in your `.env.local` for the model you want to use.
+