sveltejs
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 38 additions & 57 deletions b/‎README.md‎
Lines changed: 38 additions & 57 deletions
diff --git a/‎bun.lock‎
Lines changed: 7 additions & 0 deletions b/‎bun.lock‎
Lines changed: 7 additions & 0 deletions
@@ -40,3 +40,5 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
 results/*
 !results/.gitkeep
 !results/*.json
+.vercel
+.env*.local
@@ -1,6 +1,6 @@
 # ai-sdk-bench
 
-AI SDK benchmarking tool that tests AI agents with MCP (Model Context Protocol) integration. Automatically discovers and runs all tests in the `tests/` directory, verifying LLM-generated Svelte components against test suites.
+AI SDK benchmarking tool that tests AI agents with MCP (Model Context Protocol) integration using the Vercel AI Gateway. Automatically discovers and runs all tests in the `tests/` directory, verifying LLM-generated Svelte components against test suites.
 
 ## Installation
 
@@ -12,84 +12,60 @@ bun install
 
 ## Setup
 
-To set up `.env`:
+Configure your API keys in `.env`:
 
 ```bash
 cp .env.example .env
 ```
 
-Then configure your API keys and model in `.env`:
+Then add the necessary API key use the `vercel env pull`
 
-```bash
-# Required: Choose your model
-MODEL=anthropic/claude-sonnet-4
-ANTHROPIC_API_KEY=your_key_here
-
-# Optional: Enable MCP integration (leave empty to disable)
-MCP_SERVER_URL=https://mcp.svelte.dev/mcp
-```
-
-### Environment Variables
-
-**Required:**
-
-- `MODEL`: The AI model to use (e.g., `anthropic/claude-sonnet-4`, `openai/gpt-5`, `openrouter/anthropic/claude-sonnet-4`, `lmstudio/model-name`)
-- Corresponding API key (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or `OPENROUTER_API_KEY`)
-  - Note: No API key required for `lmstudio/*` models (runs locally)
-
-**Optional:**
-
-- `MCP_SERVER_URL`: MCP server URL (leave empty to disable MCP integration)
-
-### Supported Providers
-
-**Cloud Providers:**
+### Required API Keys
 
-- `anthropic/*` - Direct Anthropic API (requires `ANTHROPIC_API_KEY`)
-- `openai/*` - Direct OpenAI API (requires `OPENAI_API_KEY`)
-- `openrouter/*` - OpenRouter unified API (requires `OPENROUTER_API_KEY`)
+You'll need at least one API key for the providers you want to test:
 
-**Local Providers:**
+- `VERCEL_OIDC_TOKEN`: The OIDC token for vercel AI gateway
 
-- `lmstudio/*` - LM Studio local server (requires LM Studio running on `http://localhost:1234`)
+## Usage
 
-Example configurations:
+To run the benchmark:
 
 ```bash
-# Anthropic
-MODEL=anthropic/claude-sonnet-4
-ANTHROPIC_API_KEY=sk-ant-...
+bun run index.ts
+```
 
-# OpenAI
-MODEL=openai/gpt-5
-OPENAI_API_KEY=sk-...
+### Interactive CLI
 
-# OpenRouter
-MODEL=openrouter/anthropic/claude-sonnet-4
-OPENROUTER_API_KEY=sk-or-...
+The benchmark features an interactive CLI that will prompt you for configuration:
 
-# LM Studio (local)
-MODEL=lmstudio/llama-3-8b
-# No API key needed - make sure LM Studio is running!
-```
+1. **Model Selection**: Choose one or more models from the Vercel AI Gateway
+   - Select from available models in your configured providers
+   - Optionally add custom model IDs
+   - Can test multiple models in a single run
 
-## Usage
+2. **MCP Integration**: Choose your MCP configuration
+   - **No MCP Integration**: Run without external tools
+   - **MCP over HTTP**: Use HTTP-based MCP server (default: `https://mcp.svelte.dev/mcp`)
+   - **MCP over StdIO**: Use local MCP server via command (default: `npx -y @sveltejs/mcp`)
+   - Option to provide custom MCP server URL or command
 
-To run the benchmark (automatically discovers and runs all tests):
+3. **TestComponent Tool**: Enable/disable the testing tool for models
+   - Allows models to run tests during component development
+   - Enabled by default
 
-```bash
-bun run index.ts
-```
+### Benchmark Workflow
 
-The benchmark will:
+After configuration, the benchmark will:
 
 1. Discover all tests in `tests/` directory
-2. For each test:
+2. For each selected model and test:
    - Run the AI agent with the test's prompt
    - Extract the generated Svelte component
    - Verify the component against the test suite
 3. Generate a combined report with all results
 
+### Results and Reports
+
 Results are saved to the `results/` directory with timestamped filenames:
 
 - `results/result-2024-12-07-14-30-45.json` - Full execution trace with all test results
@@ -148,12 +124,17 @@ This copies each `Reference.svelte` to `Component.svelte` temporarily and runs t
 
 ## MCP Integration
 
-The tool supports optional integration with MCP (Model Context Protocol) servers:
+The tool supports optional integration with MCP (Model Context Protocol) servers through the interactive CLI. When running the benchmark, you'll be prompted to choose:
 
-- **Enabled**: Set `MCP_SERVER_URL` to a valid MCP server URL
-- **Disabled**: Leave `MCP_SERVER_URL` empty or unset
+- **No MCP Integration**: Run without external tools
+- **MCP over HTTP**: Connect to an HTTP-based MCP server
+  - Default: `https://mcp.svelte.dev/mcp`
+  - Option to provide a custom URL
+- **MCP over StdIO**: Connect to a local MCP server via command
+  - Default: `npx -y @sveltejs/mcp`
+  - Option to provide a custom command
 
-MCP status is documented in both the JSON metadata and displayed as a badge in the HTML report.
+MCP status, transport type, and server configuration are documented in both the JSON metadata and displayed as a badge in the HTML report.
 
 ## Exit Codes
 
 
@@ -9,6 +9,7 @@
         "@ai-sdk/mcp": "0.0.11",
         "@ai-sdk/openai": "^2.0.77",
         "@ai-sdk/openai-compatible": "^1.0.28",
+        "@clack/prompts": "^0.11.0",
         "@openrouter/ai-sdk-provider": "^1.4.1",
         "@testing-library/svelte": "^5.2.9",
         "@testing-library/user-event": "^14.6.1",
@@ -54,6 +55,10 @@
 
     "@babel/runtime": ["@babel/runtime@7.28.4", "", {}, "sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ=="],
 
+    "@clack/core": ["@clack/core@0.5.0", "", { "dependencies": { "picocolors": "^1.0.0", "sisteransi": "^1.0.5" } }, "sha512-p3y0FIOwaYRUPRcMO7+dlmLh8PSRcrjuTndsiA0WAFbWES0mLZlrjVoBRZ9DzkPFJZG6KGkJmoEAY0ZcVWTkow=="],
+
+    "@clack/prompts": ["@clack/prompts@0.11.0", "", { "dependencies": { "@clack/core": "0.5.0", "picocolors": "^1.0.0", "sisteransi": "^1.0.5" } }, "sha512-pMN5FcrEw9hUkZA4f+zLlzivQSeQf5dRGJjSUbvVYDLvpKCdQx5OaknvKzgbtXOizhP+SJJJjqEbOe55uKKfAw=="],
+
     "@csstools/color-helpers": ["@csstools/color-helpers@5.1.0", "", {}, "sha512-S11EXWJyy0Mz5SYvRmY8nJYTFFd1LCNV+7cXyAgQtOOuzb4EsgfqDufL+9esx72/eLhsRdGZwaldu/h+E4t4BA=="],
 
     "@csstools/css-calc": ["@csstools/css-calc@2.1.4", "", { "peerDependencies": { "@csstools/css-parser-algorithms": "^3.0.5", "@csstools/css-tokenizer": "^3.0.4" } }, "sha512-3N8oaj+0juUw/1H3YwmDDJXCgTB1gKU6Hc/bB502u9zR0q2vd786XJH9QfrKIEgFlZmhZiq6epXl4rHqhzsIgQ=="],
@@ -446,6 +451,8 @@
 
     "siginfo": ["siginfo@2.0.0", "", {}, "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g=="],
 
+    "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="],
+
     "slash": ["slash@2.0.0", "", {}, "sha512-ZYKh3Wh2z1PpEXWr0MpSBZ0V6mZHAQfYevttO11c51CaWjGTaadiKZ+wVt1PbMlDV5qhMFslpZCemhwOK7C89A=="],
 
     "source-map-js": ["source-map-js@1.2.1", "", {}, "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA=="],