diff --git a/packages/views/runtimes/pricing.generated.ts b/packages/views/runtimes/pricing.generated.ts new file mode 100644 index 0000000000..4dc9896446 --- /dev/null +++ b/packages/views/runtimes/pricing.generated.ts @@ -0,0 +1,208 @@ +// AUTO-GENERATED — do not edit by hand. +// +// Source: https://models.dev/api.json (MIT, community-maintained, +// the same dataset OpenCode uses internally). +// Snapshot: 2026-04-29 +// Providers: anthropic, openai, google, moonshotai, opencode, opencode-go +// +// Regenerate with: node scripts/generate-pricing.mjs + +// Cost values are USD per million tokens, matching the raw +// `cost` shape on models.dev. We emit a complete numeric shape +// and default missing provider fields to 0 for type safety. +export interface ModelCost { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; +} + +// Keys are `/` to match what OpenCode and other +// multi-provider runtimes report on the wire. +export const PRICING: Readonly> = { + "anthropic/claude-3-5-haiku-20241022": { input: 0.8, output: 4, cacheRead: 0.08, cacheWrite: 1 }, + "anthropic/claude-3-5-haiku-latest": { input: 0.8, output: 4, cacheRead: 0.08, cacheWrite: 1 }, + "anthropic/claude-3-5-sonnet-20240620": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-3-5-sonnet-20241022": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-3-7-sonnet-20250219": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-3-haiku-20240307": { input: 0.25, output: 1.25, cacheRead: 0.03, cacheWrite: 0.3 }, + "anthropic/claude-3-opus-20240229": { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + "anthropic/claude-3-sonnet-20240229": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 0.3 }, + "anthropic/claude-haiku-4-5": { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 }, + "anthropic/claude-haiku-4-5-20251001": { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 }, + "anthropic/claude-opus-4-0": { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + "anthropic/claude-opus-4-1": { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + "anthropic/claude-opus-4-1-20250805": { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + "anthropic/claude-opus-4-20250514": { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + "anthropic/claude-opus-4-5": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "anthropic/claude-opus-4-5-20251101": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "anthropic/claude-opus-4-6": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "anthropic/claude-opus-4-7": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "anthropic/claude-sonnet-4-0": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-sonnet-4-20250514": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-sonnet-4-5": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-sonnet-4-5-20250929": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "anthropic/claude-sonnet-4-6": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "google/gemini-1.5-flash": { input: 0.075, output: 0.3, cacheRead: 0.01875, cacheWrite: 0 }, + "google/gemini-1.5-flash-8b": { input: 0.0375, output: 0.15, cacheRead: 0.01, cacheWrite: 0 }, + "google/gemini-1.5-pro": { input: 1.25, output: 5, cacheRead: 0.3125, cacheWrite: 0 }, + "google/gemini-2.0-flash": { input: 0.1, output: 0.4, cacheRead: 0.025, cacheWrite: 0 }, + "google/gemini-2.0-flash-lite": { input: 0.075, output: 0.3, cacheRead: 0, cacheWrite: 0 }, + "google/gemini-2.5-flash": { input: 0.3, output: 2.5, cacheRead: 0.03, cacheWrite: 0 }, + "google/gemini-2.5-flash-image": { input: 0.3, output: 30, cacheRead: 0.075, cacheWrite: 0 }, + "google/gemini-2.5-flash-image-preview": { input: 0.3, output: 30, cacheRead: 0.075, cacheWrite: 0 }, + "google/gemini-2.5-flash-lite": { input: 0.1, output: 0.4, cacheRead: 0.025, cacheWrite: 0 }, + "google/gemini-2.5-flash-lite-preview-06-17": { input: 0.1, output: 0.4, cacheRead: 0.025, cacheWrite: 0 }, + "google/gemini-2.5-flash-lite-preview-09-2025": { input: 0.1, output: 0.4, cacheRead: 0.025, cacheWrite: 0 }, + "google/gemini-2.5-flash-preview-04-17": { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 }, + "google/gemini-2.5-flash-preview-05-20": { input: 0.15, output: 0.6, cacheRead: 0.0375, cacheWrite: 0 }, + "google/gemini-2.5-flash-preview-09-2025": { input: 0.3, output: 2.5, cacheRead: 0.075, cacheWrite: 0 }, + "google/gemini-2.5-flash-preview-tts": { input: 0.5, output: 10, cacheRead: 0, cacheWrite: 0 }, + "google/gemini-2.5-pro": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "google/gemini-2.5-pro-preview-05-06": { input: 1.25, output: 10, cacheRead: 0.31, cacheWrite: 0 }, + "google/gemini-2.5-pro-preview-06-05": { input: 1.25, output: 10, cacheRead: 0.31, cacheWrite: 0 }, + "google/gemini-2.5-pro-preview-tts": { input: 1, output: 20, cacheRead: 0, cacheWrite: 0 }, + "google/gemini-3-flash-preview": { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 }, + "google/gemini-3-pro-preview": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + "google/gemini-3.1-flash-image-preview": { input: 0.25, output: 60, cacheRead: 0, cacheWrite: 0 }, + "google/gemini-3.1-flash-lite-preview": { input: 0.25, output: 1.5, cacheRead: 0.025, cacheWrite: 1 }, + "google/gemini-3.1-pro-preview": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + "google/gemini-3.1-pro-preview-customtools": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + "google/gemini-embedding-001": { input: 0.15, output: 0, cacheRead: 0, cacheWrite: 0 }, + "google/gemini-flash-latest": { input: 0.3, output: 2.5, cacheRead: 0.075, cacheWrite: 0 }, + "google/gemini-flash-lite-latest": { input: 0.1, output: 0.4, cacheRead: 0.025, cacheWrite: 0 }, + "google/gemini-live-2.5-flash": { input: 0.5, output: 2, cacheRead: 0, cacheWrite: 0 }, + "google/gemini-live-2.5-flash-preview-native-audio": { input: 0.5, output: 2, cacheRead: 0, cacheWrite: 0 }, + "google/gemma-3-12b-it": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "google/gemma-3-27b-it": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "google/gemma-3-4b-it": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "google/gemma-3n-e2b-it": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "google/gemma-3n-e4b-it": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "moonshotai/kimi-k2-0711-preview": { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 }, + "moonshotai/kimi-k2-0905-preview": { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 }, + "moonshotai/kimi-k2-thinking": { input: 0.6, output: 2.5, cacheRead: 0.15, cacheWrite: 0 }, + "moonshotai/kimi-k2-thinking-turbo": { input: 1.15, output: 8, cacheRead: 0.15, cacheWrite: 0 }, + "moonshotai/kimi-k2-turbo-preview": { input: 2.4, output: 10, cacheRead: 0.6, cacheWrite: 0 }, + "moonshotai/kimi-k2.5": { input: 0.6, output: 3, cacheRead: 0.1, cacheWrite: 0 }, + "moonshotai/kimi-k2.6": { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 }, + "openai/gpt-3.5-turbo": { input: 0.5, output: 1.5, cacheRead: 1.25, cacheWrite: 0 }, + "openai/gpt-4": { input: 30, output: 60, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-4-turbo": { input: 10, output: 30, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-4.1": { input: 2, output: 8, cacheRead: 0.5, cacheWrite: 0 }, + "openai/gpt-4.1-mini": { input: 0.4, output: 1.6, cacheRead: 0.1, cacheWrite: 0 }, + "openai/gpt-4.1-nano": { input: 0.1, output: 0.4, cacheRead: 0.03, cacheWrite: 0 }, + "openai/gpt-4o": { input: 2.5, output: 10, cacheRead: 1.25, cacheWrite: 0 }, + "openai/gpt-4o-2024-05-13": { input: 5, output: 15, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-4o-2024-08-06": { input: 2.5, output: 10, cacheRead: 1.25, cacheWrite: 0 }, + "openai/gpt-4o-2024-11-20": { input: 2.5, output: 10, cacheRead: 1.25, cacheWrite: 0 }, + "openai/gpt-4o-mini": { input: 0.15, output: 0.6, cacheRead: 0.08, cacheWrite: 0 }, + "openai/gpt-5": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "openai/gpt-5-chat-latest": { input: 1.25, output: 10, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-5-codex": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "openai/gpt-5-mini": { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 }, + "openai/gpt-5-nano": { input: 0.05, output: 0.4, cacheRead: 0.005, cacheWrite: 0 }, + "openai/gpt-5-pro": { input: 15, output: 120, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-5.1": { input: 1.25, output: 10, cacheRead: 0.13, cacheWrite: 0 }, + "openai/gpt-5.1-chat-latest": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "openai/gpt-5.1-codex": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "openai/gpt-5.1-codex-max": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "openai/gpt-5.1-codex-mini": { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 }, + "openai/gpt-5.2": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "openai/gpt-5.2-chat-latest": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "openai/gpt-5.2-codex": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "openai/gpt-5.2-pro": { input: 21, output: 168, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-5.3-chat-latest": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "openai/gpt-5.3-codex": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "openai/gpt-5.3-codex-spark": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "openai/gpt-5.4": { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 }, + "openai/gpt-5.4-mini": { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 }, + "openai/gpt-5.4-nano": { input: 0.2, output: 1.25, cacheRead: 0.02, cacheWrite: 0 }, + "openai/gpt-5.4-pro": { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 }, + "openai/gpt-5.5": { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 }, + "openai/gpt-5.5-pro": { input: 30, output: 180, cacheRead: 0, cacheWrite: 0 }, + "openai/o1": { input: 15, output: 60, cacheRead: 7.5, cacheWrite: 0 }, + "openai/o1-mini": { input: 1.1, output: 4.4, cacheRead: 0.55, cacheWrite: 0 }, + "openai/o1-preview": { input: 15, output: 60, cacheRead: 7.5, cacheWrite: 0 }, + "openai/o1-pro": { input: 150, output: 600, cacheRead: 0, cacheWrite: 0 }, + "openai/o3": { input: 2, output: 8, cacheRead: 0.5, cacheWrite: 0 }, + "openai/o3-deep-research": { input: 10, output: 40, cacheRead: 2.5, cacheWrite: 0 }, + "openai/o3-mini": { input: 1.1, output: 4.4, cacheRead: 0.55, cacheWrite: 0 }, + "openai/o3-pro": { input: 20, output: 80, cacheRead: 0, cacheWrite: 0 }, + "openai/o4-mini": { input: 1.1, output: 4.4, cacheRead: 0.28, cacheWrite: 0 }, + "openai/o4-mini-deep-research": { input: 2, output: 8, cacheRead: 0.5, cacheWrite: 0 }, + "openai/text-embedding-3-large": { input: 0.13, output: 0, cacheRead: 0, cacheWrite: 0 }, + "openai/text-embedding-3-small": { input: 0.02, output: 0, cacheRead: 0, cacheWrite: 0 }, + "openai/text-embedding-ada-002": { input: 0.1, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode-go/deepseek-v4-flash": { input: 0.14, output: 0.28, cacheRead: 0.0028, cacheWrite: 0 }, + "opencode-go/deepseek-v4-pro": { input: 1.74, output: 3.48, cacheRead: 0.0145, cacheWrite: 0 }, + "opencode-go/glm-5": { input: 1, output: 3.2, cacheRead: 0.2, cacheWrite: 0 }, + "opencode-go/glm-5.1": { input: 1.4, output: 4.4, cacheRead: 0.26, cacheWrite: 0 }, + "opencode-go/kimi-k2.5": { input: 0.6, output: 3, cacheRead: 0.1, cacheWrite: 0 }, + "opencode-go/kimi-k2.6": { input: 0.32, output: 1.34, cacheRead: 0.054, cacheWrite: 0 }, + "opencode-go/mimo-v2-omni": { input: 0.4, output: 2, cacheRead: 0.08, cacheWrite: 0 }, + "opencode-go/mimo-v2-pro": { input: 1, output: 3, cacheRead: 0.2, cacheWrite: 0 }, + "opencode-go/mimo-v2.5": { input: 0.4, output: 2, cacheRead: 0.08, cacheWrite: 0 }, + "opencode-go/mimo-v2.5-pro": { input: 1, output: 3, cacheRead: 0.2, cacheWrite: 0 }, + "opencode-go/minimax-m2.5": { input: 0.3, output: 1.2, cacheRead: 0.03, cacheWrite: 0 }, + "opencode-go/minimax-m2.7": { input: 0.3, output: 1.2, cacheRead: 0.06, cacheWrite: 0 }, + "opencode-go/qwen3.5-plus": { input: 0.2, output: 1.2, cacheRead: 0.02, cacheWrite: 0.25 }, + "opencode-go/qwen3.6-plus": { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0.625 }, + "opencode/big-pickle": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/claude-3-5-haiku": { input: 0.8, output: 4, cacheRead: 0.08, cacheWrite: 1 }, + "opencode/claude-haiku-4-5": { input: 1, output: 5, cacheRead: 0.1, cacheWrite: 1.25 }, + "opencode/claude-opus-4-1": { input: 15, output: 75, cacheRead: 1.5, cacheWrite: 18.75 }, + "opencode/claude-opus-4-5": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "opencode/claude-opus-4-6": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "opencode/claude-opus-4-7": { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, + "opencode/claude-sonnet-4": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "opencode/claude-sonnet-4-5": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "opencode/claude-sonnet-4-6": { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, + "opencode/gemini-3-flash": { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0 }, + "opencode/gemini-3-pro": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + "opencode/gemini-3.1-pro": { input: 2, output: 12, cacheRead: 0.2, cacheWrite: 0 }, + "opencode/glm-4.6": { input: 0.6, output: 2.2, cacheRead: 0.1, cacheWrite: 0 }, + "opencode/glm-4.7": { input: 0.6, output: 2.2, cacheRead: 0.1, cacheWrite: 0 }, + "opencode/glm-4.7-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/glm-5": { input: 1, output: 3.2, cacheRead: 0.2, cacheWrite: 0 }, + "opencode/glm-5-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/glm-5.1": { input: 1.4, output: 4.4, cacheRead: 0.26, cacheWrite: 0 }, + "opencode/gpt-5": { input: 1.07, output: 8.5, cacheRead: 0.107, cacheWrite: 0 }, + "opencode/gpt-5-codex": { input: 1.07, output: 8.5, cacheRead: 0.107, cacheWrite: 0 }, + "opencode/gpt-5-nano": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/gpt-5.1": { input: 1.07, output: 8.5, cacheRead: 0.107, cacheWrite: 0 }, + "opencode/gpt-5.1-codex": { input: 1.07, output: 8.5, cacheRead: 0.107, cacheWrite: 0 }, + "opencode/gpt-5.1-codex-max": { input: 1.25, output: 10, cacheRead: 0.125, cacheWrite: 0 }, + "opencode/gpt-5.1-codex-mini": { input: 0.25, output: 2, cacheRead: 0.025, cacheWrite: 0 }, + "opencode/gpt-5.2": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "opencode/gpt-5.2-codex": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "opencode/gpt-5.3-codex": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "opencode/gpt-5.3-codex-spark": { input: 1.75, output: 14, cacheRead: 0.175, cacheWrite: 0 }, + "opencode/gpt-5.4": { input: 2.5, output: 15, cacheRead: 0.25, cacheWrite: 0 }, + "opencode/gpt-5.4-mini": { input: 0.75, output: 4.5, cacheRead: 0.075, cacheWrite: 0 }, + "opencode/gpt-5.4-nano": { input: 0.2, output: 1.25, cacheRead: 0.02, cacheWrite: 0 }, + "opencode/gpt-5.4-pro": { input: 30, output: 180, cacheRead: 30, cacheWrite: 0 }, + "opencode/gpt-5.5": { input: 5, output: 30, cacheRead: 0.5, cacheWrite: 0 }, + "opencode/gpt-5.5-pro": { input: 30, output: 180, cacheRead: 30, cacheWrite: 0 }, + "opencode/grok-code": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/hy3-preview-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/kimi-k2": { input: 0.4, output: 2.5, cacheRead: 0.4, cacheWrite: 0 }, + "opencode/kimi-k2-thinking": { input: 0.4, output: 2.5, cacheRead: 0.4, cacheWrite: 0 }, + "opencode/kimi-k2.5": { input: 0.6, output: 3, cacheRead: 0.08, cacheWrite: 0 }, + "opencode/kimi-k2.5-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/kimi-k2.6": { input: 0.95, output: 4, cacheRead: 0.16, cacheWrite: 0 }, + "opencode/ling-2.6-flash-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/mimo-v2-flash-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/mimo-v2-omni-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/mimo-v2-pro-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/minimax-m2.1": { input: 0.3, output: 1.2, cacheRead: 0.1, cacheWrite: 0 }, + "opencode/minimax-m2.1-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/minimax-m2.5": { input: 0.3, output: 1.2, cacheRead: 0.06, cacheWrite: 0 }, + "opencode/minimax-m2.5-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/minimax-m2.7": { input: 0.3, output: 1.2, cacheRead: 0.06, cacheWrite: 0 }, + "opencode/nemotron-3-super-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/qwen3-coder": { input: 0.45, output: 1.8, cacheRead: 0, cacheWrite: 0 }, + "opencode/qwen3.5-plus": { input: 0.2, output: 1.2, cacheRead: 0.02, cacheWrite: 0.25 }, + "opencode/qwen3.6-plus": { input: 0.5, output: 3, cacheRead: 0.05, cacheWrite: 0.625 }, + "opencode/qwen3.6-plus-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + "opencode/trinity-large-preview-free": { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, +}; diff --git a/packages/views/runtimes/utils.test.ts b/packages/views/runtimes/utils.test.ts new file mode 100644 index 0000000000..1cf330d35b --- /dev/null +++ b/packages/views/runtimes/utils.test.ts @@ -0,0 +1,243 @@ +import { describe, it, expect } from "vitest"; +import type { RuntimeUsage } from "@multica/core/types"; +import { + collectUnmappedModels, + estimateCost, + estimateCostBreakdown, + isModelPriced, +} from "./utils"; + +// Build a one-million-token usage row so estimateCost output equals the +// per-MTok rate directly — makes pricing assertions readable. +function usage(overrides: Partial): RuntimeUsage { + return { + runtime_id: "rt-1", + date: "2026-04-01", + provider: "", + model: "", + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + ...overrides, + }; +} + +const ONE_M = 1_000_000; + +describe("resolvePricing / isModelPriced", () => { + it("matches full provider/model keys exactly", () => { + expect(isModelPriced("anthropic/claude-sonnet-4-5")).toBe(true); + }); + + it("matches models with trailing date suffix via startsWith on date-stripped name", () => { + expect(isModelPriced("anthropic/claude-sonnet-4-5-20250929")).toBe(true); + }); + + it("matches a date-suffixed `provider/model-YYYYMMDD` form", () => { + expect(isModelPriced("anthropic/claude-sonnet-4-5-20250929")).toBe(true); + expect(isModelPriced("openai/gpt-4o-2024-08-06")).toBe(true); + }); + + it("matches popular OpenAI models reported by OpenCode", () => { + for (const m of [ + "openai/gpt-4o", + "openai/gpt-4o-mini", + "openai/gpt-4.1", + "openai/o1", + "openai/o3-mini", + "openai/o4-mini", + ]) { + expect(isModelPriced(m), m).toBe(true); + } + }); + + it("matches popular Google Gemini models reported by OpenCode", () => { + for (const m of [ + "google/gemini-2.5-pro", + "google/gemini-2.5-flash", + "google/gemini-2.0-flash", + ]) { + expect(isModelPriced(m), m).toBe(true); + } + }); + + it("returns false for xAI and DeepSeek models (not in pricing table)", () => { + for (const m of [ + "xai/grok-4", + "xai/grok-3-mini", + "deepseek/deepseek-chat", + "deepseek/deepseek-reasoner", + ]) { + expect(isModelPriced(m), m).toBe(false); + } + }); + + it("returns undefined / false for genuinely unknown provider/model", () => { + expect(isModelPriced("madeup/totally-not-a-real-model")).toBe(false); + expect(isModelPriced("openai/this-is-not-a-real-model-xyzzy")).toBe(false); + }); + + it("treats empty strings as unknown", () => { + expect(isModelPriced("")).toBe(false); + }); + + it("matches bare model names without provider prefix", () => { + for (const m of [ + "gpt-4o", + "gpt-4o-mini", + "claude-sonnet-4-5", + "claude-opus-4-1", + "gemini-2.5-pro", + ]) { + expect(isModelPriced(m), m).toBe(true); + } + }); + + it("matches bare models with date suffix", () => { + expect(isModelPriced("claude-opus-4-1-20260105")).toBe(true); + expect(isModelPriced("gpt-4o-2024-08-06")).toBe(true); + }); + + it("matches provider-prefixed models with date suffix", () => { + expect(isModelPriced("anthropic/claude-opus-4-1-20260105")).toBe(true); + expect(isModelPriced("openai/gpt-4o-2024-08-06")).toBe(true); + }); +}); + +describe("estimateCost — OpenCode provider/model parity", () => { + // The "Anthropic-via-OpenCode" parity case from the acceptance criteria: + // routing the same model through OpenCode must not change billing. + it("returns accurate cost for anthropic/claude-sonnet-4-5", () => { + const tokens = { + input_tokens: 1_000_000, + output_tokens: 500_000, + cache_read_tokens: 200_000, + cache_write_tokens: 50_000, + }; + const cost = estimateCost(usage({ model: "anthropic/claude-sonnet-4-5", ...tokens })); + expect(cost).toBeGreaterThan(0); + }); + + it("returns a non-zero, accurate cost for openai/gpt-4o", () => { + // OpenAI gpt-4o published rates (per 1M tokens): + // input $2.50 + // output $10.00 + // 1M input + 1M output should be exactly $12.50 — assert tightly so any + // accidental decimal-point shift in MODEL_PRICING fails this test. + const cost = estimateCost( + usage({ + model: "openai/gpt-4o", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(cost).toBeCloseTo(12.5, 6); + }); + + it("returns a non-zero, accurate cost for google/gemini-2.5-pro", () => { + // Google Gemini 2.5 Pro (≤200K-context tier, the OpenCode default): + // input $1.25 + // output $10.00 + const cost = estimateCost( + usage({ + model: "google/gemini-2.5-pro", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(cost).toBeCloseTo(11.25, 6); + }); + + it("deepseek/deepseek-chat is not priced (not in pricing table)", () => { + const cost = estimateCost( + usage({ + model: "deepseek/deepseek-chat", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(cost).toBe(0); + }); + + it("matches Anthropic's `claude-3-5-haiku-latest` id format", () => { + // Anthropic's actual model ids use `claude-3-5-haiku-…`, not + // `claude-haiku-3-5-…`. Earlier the table keyed off the latter and + // returned $0 for OpenCode's `anthropic/claude-3-5-haiku-latest`. + const cost = estimateCost( + usage({ + model: "anthropic/claude-3-5-haiku-latest", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(cost).toBeCloseTo(0.8 + 4, 6); + }); + + it("resolves the gpt-4o family to each entry's specific price", () => { + // Three sibling entries in the snapshot share the `gpt-4o` prefix but + // carry different prices. The resolver must hit the *exact* bare key + // for each one rather than greedily falling through a startsWith + // match — otherwise `gpt-4o-2024-05-13` would resolve to `gpt-4o`'s + // price (alphabetically first) instead of its own. + const bareGpt4o = estimateCost( + usage({ + model: "gpt-4o", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(bareGpt4o).toBeCloseTo(2.5 + 10, 6); // openai/gpt-4o → $2.50 / $10 + + const gpt4oMini = estimateCost( + usage({ + model: "gpt-4o-mini", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(gpt4oMini).toBeCloseTo(0.15 + 0.6, 6); // openai/gpt-4o-mini → $0.15 / $0.60 + + const gpt4oDated = estimateCost( + usage({ + model: "gpt-4o-2024-05-13", + input_tokens: ONE_M, + output_tokens: ONE_M, + }), + ); + expect(gpt4oDated).toBeCloseTo(5 + 15, 6); // openai/gpt-4o-2024-05-13 → $5 / $15 + }); + + it("breakdown sums match the total cost", () => { + const u = usage({ + model: "openai/gpt-4o-mini", + input_tokens: 750_000, + output_tokens: 250_000, + cache_read_tokens: 100_000, + cache_write_tokens: 50_000, + }); + const total = estimateCost(u); + const b = estimateCostBreakdown(u); + expect(b.input + b.output + b.cacheRead + b.cacheWrite).toBeCloseTo(total, 10); + expect(total).toBeGreaterThan(0); + }); +}); + +describe("collectUnmappedModels", () => { + it("returns an empty list for supported providers (excludes xAI and DeepSeek)", () => { + const rows = [ + usage({ model: "anthropic/claude-sonnet-4-5", input_tokens: 1 }), + usage({ model: "openai/gpt-4o", input_tokens: 1 }), + usage({ model: "google/gemini-2.5-pro", input_tokens: 1 }), + ]; + expect(collectUnmappedModels(rows)).toEqual([]); + }); + + it("still flags genuinely unknown models", () => { + const rows = [ + usage({ model: "anthropic/claude-sonnet-4-5", input_tokens: 1 }), + usage({ model: "noprovider/madeup-model-xyzzy", input_tokens: 1 }), + ]; + expect(collectUnmappedModels(rows)).toEqual(["noprovider/madeup-model-xyzzy"]); + }); +}); diff --git a/packages/views/runtimes/utils.ts b/packages/views/runtimes/utils.ts index 6175ed080c..a885fff2b8 100644 --- a/packages/views/runtimes/utils.ts +++ b/packages/views/runtimes/utils.ts @@ -4,6 +4,8 @@ import type { RuntimeUsageByHour, } from "@multica/core/types"; +import { PRICING, type ModelCost } from "./pricing.generated"; + // --------------------------------------------------------------------------- // Formatting helpers // --------------------------------------------------------------------------- @@ -113,58 +115,46 @@ export function formatTokens(n: number): string { // --------------------------------------------------------------------------- // Cost estimation // --------------------------------------------------------------------------- +// Strip a leading `provider/` segment, e.g. `openai/gpt-4o` → `gpt-4o`. +function stripProviderPrefix(model: string): string { + const slash = model.indexOf("/"); + return slash > 0 ? model.slice(slash + 1) : model; +} -// Pricing per million tokens (USD). Sourced from -// https://platform.claude.com/docs/en/about-claude/pricing — keep in sync -// when Anthropic releases new models or adjusts prices. cacheWrite reflects -// the 5-minute cache TTL (1.25× input); the daemon reports -// cache_creation_input_tokens without TTL metadata, so 5m is the safest / -// cheapest assumption (matches the API default). -// -// Iteration order matters: the resolver's startsWith() fallback walks this -// object in insertion order, so MORE SPECIFIC keys (e.g. claude-sonnet-4-5) -// must precede SHORTER prefixes (e.g. claude-sonnet-4) of the same family. -const MODEL_PRICING: Record< - string, - { input: number; output: number; cacheRead: number; cacheWrite: number } -> = { - // -- Current generation (4.5+ — Opus dropped from 15/75 to 5/25 here) -- - "claude-haiku-4-5": { input: 1, output: 5, cacheRead: 0.10, cacheWrite: 1.25 }, - "claude-sonnet-4-5": { input: 3, output: 15, cacheRead: 0.30, cacheWrite: 3.75 }, - "claude-sonnet-4-6": { input: 3, output: 15, cacheRead: 0.30, cacheWrite: 3.75 }, - "claude-opus-4-5": { input: 5, output: 25, cacheRead: 0.50, cacheWrite: 6.25 }, - "claude-opus-4-6": { input: 5, output: 25, cacheRead: 0.50, cacheWrite: 6.25 }, - "claude-opus-4-7": { input: 5, output: 25, cacheRead: 0.50, cacheWrite: 6.25 }, - - // -- Pre-4.5 Opus (legacy, still served at original price tier) -- - "claude-opus-4-1": { input: 15, output: 75, cacheRead: 1.50, cacheWrite: 18.75 }, - "claude-opus-4": { input: 15, output: 75, cacheRead: 1.50, cacheWrite: 18.75 }, - - // -- Sonnet 4.0 (deprecated; same price as the 4.x family) -- - "claude-sonnet-4": { input: 3, output: 15, cacheRead: 0.30, cacheWrite: 3.75 }, - - // -- Older Haiku tier (defensive entry for the rare runtime still on it) -- - "claude-haiku-3-5": { input: 0.80, output: 4, cacheRead: 0.08, cacheWrite: 1.00 }, -}; +// Strip a trailing date / `-latest` tag, e.g. `claude-sonnet-4-5-20250929` +// → `claude-sonnet-4-5`. Anthropic, OpenAI and Google all version their +// model snapshots this way; the family is what we price. +function stripDateSuffix(model: string): string { + return model.replace(/-(20\d{2}-?\d{2}-?\d{2}|latest)$/, ""); +} -// Resolve a model string to its pricing tier. Two layers of fallback so the -// daemon-reported model name doesn't have to match the keys exactly: -// 1. Exact match. -// 2. Strip a trailing date / "latest" tag (Claude Code typically reports -// `claude-sonnet-4-5-20250929` — the date is volatile, the family is -// what we price). Try exact match again on the stripped name. -// 3. startsWith on either the raw or stripped name. -// Anything that misses all three is genuinely unknown; we return undefined -// so callers can distinguish "$0 spend" from "spent but model not priced". -function resolvePricing(model: string) { +// Resolve a model string to its pricing tier. PRICING is keyed by +// `provider/model`. Walked in two passes so exact bare matches always +// win over prefix matches — otherwise `gpt-4o-2024-05-13` would resolve +// to `openai/gpt-4o`'s price (alphabetically first; `keyBare.startsWith` +// is true) instead of the dated entry's specific price. +// 1. Exact match on the full string (`provider/model`). +// 2. First pass — exact bare match (`keyBare === bare`). Catches a +// bare-keyed input (`gpt-4o`, `gpt-4o-mini`, `gpt-4o-2024-05-13`) +// hitting the corresponding `provider/` entry verbatim. +// 3. Second pass — `keyBare.startsWith(withoutDate)`. Catches a +// date-suffixed input whose exact dated entry isn't tracked +// (`claude-opus-4-1-20260105` falls back to `claude-opus-4-1`). +function resolvePricing(model: string): ModelCost | undefined { if (!model) return undefined; - if (MODEL_PRICING[model]) return MODEL_PRICING[model]; - const stripped = model.replace(/-(20\d{6}|latest)$/, ""); - if (stripped !== model && MODEL_PRICING[stripped]) return MODEL_PRICING[stripped]; + const exact = PRICING[model]; + if (exact) return exact; + + const bare = stripProviderPrefix(model); + const withoutDate = stripDateSuffix(bare); + + for (const [key, p] of Object.entries(PRICING)) { + if (stripProviderPrefix(key) === bare) return p; + } - for (const [key, p] of Object.entries(MODEL_PRICING)) { - if (model.startsWith(key) || stripped.startsWith(key)) return p; + for (const [key, p] of Object.entries(PRICING)) { + if (stripProviderPrefix(key).startsWith(withoutDate)) return p; } return undefined; } diff --git a/scripts/generate-pricing.mjs b/scripts/generate-pricing.mjs new file mode 100755 index 0000000000..1b9d561bab --- /dev/null +++ b/scripts/generate-pricing.mjs @@ -0,0 +1,165 @@ +#!/usr/bin/env node +// Fetch https://models.dev/api.json (the open-source pricing database +// OpenCode uses internally, MIT-licensed), filter it to the LLM +// providers Multica runtimes actually emit, and write a static pricing +// snapshot to packages/views/runtimes/pricing.generated.ts. +// +// Output is keyed by `/` to match what OpenCode and +// other multi-provider runtimes report on the wire. Only the `cost` +// field of each model entry is preserved. +// +// Usage: +// node scripts/generate-pricing.mjs # live fetch +// MODELS_DEV_PATH=/path/to/api.json node scripts/generate-pricing.mjs +// +// Re-run whenever upstream prices change. The output file is checked in +// so production builds don't depend on `models.dev` being reachable. + +import { readFileSync, writeFileSync, mkdirSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname, resolve } from "node:path"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = resolve(__dirname, ".."); +const OUTPUT_PATH = resolve( + REPO_ROOT, + "packages/views/runtimes/pricing.generated.ts", +); + +// LLM providers (as keyed in models.dev) whose models Multica runtimes +// surface in usage data. Each entry is paired with the Multica runtime +// kinds that route to it, so the rationale stays close to the data. +// +// Add a provider here only if a Multica runtime can actually emit a +// `/` pair under it — extra providers just bloat the +// snapshot with markup-laden re-hosts (see 302ai, helicone, etc., which +// we deliberately leave out). +// Providers: anthropic, openai, google, moonshotai, opencode, opencode-go +const ALLOWED_PROVIDERS = [ + "anthropic", + "openai", + "google", + "moonshotai", + "opencode", + "opencode-go", +]; + +async function loadModelsDev() { + const path = process.env.MODELS_DEV_PATH; + if (path) return JSON.parse(readFileSync(path, "utf8")); + const res = await fetch("https://models.dev/api.json"); + if (!res.ok) throw new Error(`models.dev fetch ${res.status}`); + return await res.json(); +} + +// Stable JSON-ish field order so generated diffs only show real price +// changes, not key reshuffles. +const COST_FIELD_ORDER = [ + "input", + "output", + // "reasoning", // never used in Multica cost computation + "cache_read", + "cache_write", + // "input_audio", // never used in Multica cost computation + // "output_audio", // never used in Multica cost computation +]; + +const COST_OUTPUT_KEY_MAP = { + cache_read: "cacheRead", + cache_write: "cacheWrite", + // input_audio: "inputAudio", // never used in Multica cost computation + // output_audio: "outputAudio", // never used in Multica cost computation +}; + +function serializeCost(cost) { + // We deliberately emit only the enumerated number fields. models.dev + // sometimes includes context-tier objects (e.g. + // `gemini-2.5-pro.cost.context_over_200k`); the Multica usage stream + // doesn't carry context size, so we always price at the standard + // tier. If a new flat number field appears upstream, add it to + // COST_FIELD_ORDER and ModelCost together. + const entries = []; + for (const k of COST_FIELD_ORDER) { + const outputKey = COST_OUTPUT_KEY_MAP[k] || k; + const value = typeof cost[k] === "number" ? cost[k] : 0; + entries.push(`${outputKey}: ${value}`); + } + return `{ ${entries.join(", ")} }`; +} + +(async () => { + const db = await loadModelsDev(); + + const skippedProviders = ALLOWED_PROVIDERS.filter((p) => !db[p]); + if (skippedProviders.length) { + console.warn( + "Warning: allowed providers missing from models.dev: " + + skippedProviders.join(", "), + ); + } + + const rows = []; + let pricedCount = 0; + let skippedNoCost = 0; + for (const provider of ALLOWED_PROVIDERS) { + const prov = db[provider]; + if (!prov) continue; + const ids = Object.keys(prov.models || {}).sort(); + for (const id of ids) { + const cost = prov.models[id]?.cost; + if (!cost || (cost.input == null && cost.output == null)) { + skippedNoCost++; + continue; + } + rows.push({ key: `${provider}/${id}`, cost }); + pricedCount++; + } + } + rows.sort((a, b) => a.key.localeCompare(b.key)); + + const today = new Date().toISOString().slice(0, 10); + const lines = [ + "// AUTO-GENERATED — do not edit by hand.", + "//", + "// Source: https://models.dev/api.json (MIT, community-maintained,", + "// the same dataset OpenCode uses internally).", + `// Snapshot: ${today}`, + "// Providers: " + ALLOWED_PROVIDERS.join(", "), + "//", + "// Regenerate with: node scripts/generate-pricing.mjs", + "", + "// Cost values are USD per million tokens, matching the raw", + "// `cost` shape on models.dev. We emit a complete numeric shape", + "// and default missing provider fields to 0 for type safety.", + "export interface ModelCost {", + " input: number;", + " output: number;", +// " reasoning: number;", // never used in Multica cost computation + " cacheRead: number;", + " cacheWrite: number;", +// " inputAudio: number;", // never used in Multica cost computation +// " outputAudio: number;", // never used in Multica cost computation + "}", + "", + "// Keys are `/` to match what OpenCode and other", + "// multi-provider runtimes report on the wire.", + "export const PRICING: Readonly> = {", + ]; + + for (const r of rows) { + lines.push(` ${JSON.stringify(r.key)}: ${serializeCost(r.cost)},`); + } + lines.push("};"); + lines.push(""); + + mkdirSync(dirname(OUTPUT_PATH), { recursive: true }); + writeFileSync(OUTPUT_PATH, lines.join("\n")); + + console.log(`Wrote ${OUTPUT_PATH}`); + console.log(` Providers included: ${ALLOWED_PROVIDERS.length - skippedProviders.length}`); + console.log(` Models with pricing: ${pricedCount}`); + console.log(` Models skipped (no cost data): ${skippedNoCost}`); +})().catch((err) => { + console.error(err); + process.exit(1); +});