diff --git a/crates/openfang-channels/src/line.rs b/crates/openfang-channels/src/line.rs index b20294afc..ab5211f5a 100644 --- a/crates/openfang-channels/src/line.rs +++ b/crates/openfang-channels/src/line.rs @@ -108,7 +108,7 @@ impl LineAdapter { diff |= a ^ b; } if diff != 0 { - let computed = base64::engine::general_purpose::STANDARD.encode(&result); + let computed = base64::engine::general_purpose::STANDARD.encode(result); // Log first/last 4 chars of each signature for debugging without leaking full HMAC let comp_redacted = format!( "{}...{}", diff --git a/crates/openfang-cli/src/launcher.rs b/crates/openfang-cli/src/launcher.rs index 18a8f1236..178d94d07 100644 --- a/crates/openfang-cli/src/launcher.rs +++ b/crates/openfang-cli/src/launcher.rs @@ -20,6 +20,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[ ("ANTHROPIC_API_KEY", "Anthropic"), ("OPENAI_API_KEY", "OpenAI"), ("DEEPSEEK_API_KEY", "DeepSeek"), + ("VOLCENGINE_API_KEY", "Volcano Engine"), ("GEMINI_API_KEY", "Gemini"), ("GOOGLE_API_KEY", "Gemini"), ("GROQ_API_KEY", "Groq"), @@ -31,7 +32,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[ fn detect_provider() -> Option<(&'static str, &'static str)> { for &(var, name) in PROVIDER_ENV_VARS { - if std::env::var(var).is_ok() { + if std::env::var(var).ok().filter(|v| !v.is_empty()).is_some() { return Some((name, var)); } } diff --git a/crates/openfang-cli/src/main.rs b/crates/openfang-cli/src/main.rs index 104286a8e..15d275ece 100644 --- a/crates/openfang-cli/src/main.rs +++ b/crates/openfang-cli/src/main.rs @@ -1420,6 +1420,18 @@ fn provider_list() -> Vec<(&'static str, &'static str, &'static str, &'static st ("groq", "GROQ_API_KEY", "llama-3.3-70b-versatile", "Groq"), ("gemini", "GEMINI_API_KEY", "gemini-2.5-flash", "Gemini"), ("deepseek", "DEEPSEEK_API_KEY", "deepseek-chat", "DeepSeek"), + ( + "volcengine", + "VOLCENGINE_API_KEY", + "doubao-seed-1-6-251015", + "Volcano Engine", + ), + ( + "volcengine_coding", + "VOLCENGINE_API_KEY", + "ark-code-latest", + "Volcano Engine Coding Plan", + ), ( "anthropic", "ANTHROPIC_API_KEY", @@ -4541,6 +4553,7 @@ fn provider_to_env_var(provider: &str) -> String { "perplexity" => "PERPLEXITY_API_KEY".to_string(), "cohere" => "COHERE_API_KEY".to_string(), "xai" => "XAI_API_KEY".to_string(), + "volcengine" | "doubao" | "volcengine_coding" => "VOLCENGINE_API_KEY".to_string(), "brave" => "BRAVE_API_KEY".to_string(), "tavily" => "TAVILY_API_KEY".to_string(), other => format!("{}_API_KEY", other.to_uppercase()), @@ -4592,6 +4605,13 @@ pub(crate) fn test_api_key(provider: &str, env_var: &str) -> bool { .get("https://openrouter.ai/api/v1/models") .bearer_auth(&key) .send(), + "volcengine" | "doubao" => { + let base = openfang_types::model_catalog::VOLCENGINE_BASE_URL.trim_end_matches('/'); + client.get(format!("{base}/models")).bearer_auth(&key).send() + } + // The Ark Coding endpoint (/api/coding/v3) does not expose a standard + // OpenAI-compatible /models list — skip probing and assume the key is valid. + "volcengine_coding" => return true, _ => return true, // unknown provider — skip test }; diff --git a/crates/openfang-cli/src/tui/screens/init_wizard.rs b/crates/openfang-cli/src/tui/screens/init_wizard.rs index 279b1e6cf..e7ca8c456 100644 --- a/crates/openfang-cli/src/tui/screens/init_wizard.rs +++ b/crates/openfang-cli/src/tui/screens/init_wizard.rs @@ -68,6 +68,22 @@ const PROVIDERS: &[ProviderInfo] = &[ needs_key: true, hint: "", }, + ProviderInfo { + name: "volcengine", + display: "Volcano Engine", + env_var: "VOLCENGINE_API_KEY", + default_model: "doubao-seed-1-6-251015", + needs_key: true, + hint: "ByteDance Ark platform; cn-beijing; override base_url for other regions", + }, + ProviderInfo { + name: "volcengine_coding", + display: "Volcano Engine (Coding Plan)", + env_var: "VOLCENGINE_API_KEY", + default_model: "ark-code-latest", + needs_key: true, + hint: "Shares VOLCENGINE_API_KEY with Volcano Engine standard plan. Uses Ark Coding endpoint.", + }, ProviderInfo { name: "openrouter", display: "OpenRouter", @@ -143,7 +159,7 @@ const PROVIDERS: &[ProviderInfo] = &[ ProviderInfo { name: "qwen", display: "Qwen (Alibaba)", - env_var: "QWEN_API_KEY", + env_var: "DASHSCOPE_API_KEY", default_model: "qwen-plus", needs_key: true, hint: "", @@ -151,7 +167,7 @@ const PROVIDERS: &[ProviderInfo] = &[ ProviderInfo { name: "huggingface", display: "Hugging Face", - env_var: "HUGGINGFACE_API_KEY", + env_var: "HF_API_KEY", default_model: "meta-llama/Llama-3.3-70B-Instruct", needs_key: true, hint: "", @@ -167,7 +183,7 @@ const PROVIDERS: &[ProviderInfo] = &[ ProviderInfo { name: "replicate", display: "Replicate", - env_var: "REPLICATE_API_KEY", + env_var: "REPLICATE_API_TOKEN", default_model: "meta/meta-llama-3-70b-instruct", needs_key: true, hint: "", diff --git a/crates/openfang-cli/src/tui/screens/welcome.rs b/crates/openfang-cli/src/tui/screens/welcome.rs index 768a51ca4..96f94d115 100644 --- a/crates/openfang-cli/src/tui/screens/welcome.rs +++ b/crates/openfang-cli/src/tui/screens/welcome.rs @@ -32,6 +32,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[ ("ANTHROPIC_API_KEY", "Anthropic"), ("OPENAI_API_KEY", "OpenAI"), ("DEEPSEEK_API_KEY", "DeepSeek"), + ("VOLCENGINE_API_KEY", "Volcano Engine"), ("GEMINI_API_KEY", "Gemini"), ("GOOGLE_API_KEY", "Gemini"), ("GROQ_API_KEY", "Groq"), @@ -47,7 +48,7 @@ const PROVIDER_ENV_VARS: &[(&str, &str)] = &[ /// Returns (provider_name, env_var_name) for the first detected key, or None. fn detect_provider() -> Option<(&'static str, &'static str)> { for &(var, name) in PROVIDER_ENV_VARS { - if std::env::var(var).is_ok() { + if std::env::var(var).ok().filter(|v| !v.is_empty()).is_some() { return Some((name, var)); } } diff --git a/crates/openfang-cli/src/tui/screens/wizard.rs b/crates/openfang-cli/src/tui/screens/wizard.rs index f15b8f8c8..27b6ae782 100644 --- a/crates/openfang-cli/src/tui/screens/wizard.rs +++ b/crates/openfang-cli/src/tui/screens/wizard.rs @@ -13,6 +13,7 @@ use crate::tui::theme; /// Provider metadata for the setup wizard. struct ProviderInfo { name: &'static str, + display: &'static str, env_var: &'static str, default_model: &'static str, needs_key: bool, @@ -21,108 +22,140 @@ struct ProviderInfo { const PROVIDERS: &[ProviderInfo] = &[ ProviderInfo { name: "groq", + display: "Groq", env_var: "GROQ_API_KEY", default_model: "llama-3.3-70b-versatile", needs_key: true, }, ProviderInfo { name: "anthropic", + display: "Anthropic", env_var: "ANTHROPIC_API_KEY", default_model: "claude-sonnet-4-20250514", needs_key: true, }, ProviderInfo { name: "openai", + display: "OpenAI", env_var: "OPENAI_API_KEY", default_model: "gpt-4o", needs_key: true, }, ProviderInfo { name: "openrouter", + display: "OpenRouter", env_var: "OPENROUTER_API_KEY", default_model: "google/gemini-2.5-flash", needs_key: true, }, ProviderInfo { name: "deepseek", + display: "DeepSeek", env_var: "DEEPSEEK_API_KEY", default_model: "deepseek-chat", needs_key: true, }, ProviderInfo { name: "together", + display: "Together AI", env_var: "TOGETHER_API_KEY", default_model: "meta-llama/Llama-3.3-70B-Instruct-Turbo", needs_key: true, }, ProviderInfo { name: "mistral", + display: "Mistral", env_var: "MISTRAL_API_KEY", default_model: "mistral-large-latest", needs_key: true, }, ProviderInfo { name: "fireworks", + display: "Fireworks AI", env_var: "FIREWORKS_API_KEY", default_model: "accounts/fireworks/models/llama-v3p3-70b-instruct", needs_key: true, }, ProviderInfo { name: "gemini", + display: "Gemini", env_var: "GEMINI_API_KEY", default_model: "gemini-2.5-flash", needs_key: true, }, ProviderInfo { name: "xai", + display: "xAI", env_var: "XAI_API_KEY", default_model: "grok-4-0709", needs_key: true, }, ProviderInfo { name: "qwen", + display: "Qwen", env_var: "DASHSCOPE_API_KEY", default_model: "qwen-plus", needs_key: true, }, + ProviderInfo { + name: "volcengine", + display: "Volcano Engine", + env_var: "VOLCENGINE_API_KEY", + default_model: "doubao-seed-1-6-251015", + needs_key: true, + }, + ProviderInfo { + name: "volcengine_coding", + display: "Volcano Engine (Coding Plan)", + env_var: "VOLCENGINE_API_KEY", + default_model: "ark-code-latest", + needs_key: true, + }, ProviderInfo { name: "perplexity", + display: "Perplexity", env_var: "PERPLEXITY_API_KEY", default_model: "sonar-pro", needs_key: true, }, ProviderInfo { name: "cohere", - env_var: "CO_API_KEY", + display: "Cohere", + env_var: "COHERE_API_KEY", default_model: "command-a", needs_key: true, }, ProviderInfo { name: "cerebras", + display: "Cerebras", env_var: "CEREBRAS_API_KEY", default_model: "llama-3.3-70b", needs_key: true, }, ProviderInfo { name: "sambanova", + display: "SambaNova", env_var: "SAMBANOVA_API_KEY", default_model: "Meta-Llama-3.3-70B-Instruct", needs_key: true, }, ProviderInfo { name: "moonshot", + display: "Moonshot", env_var: "MOONSHOT_API_KEY", default_model: "moonshot-v1-128k", needs_key: true, }, ProviderInfo { name: "zhipu", + display: "Zhipu AI", env_var: "ZHIPU_API_KEY", default_model: "glm-4-plus", needs_key: true, }, ProviderInfo { name: "zhipu_coding", + display: "Zhipu AI (Coding)", env_var: "ZHIPU_API_KEY", default_model: "codegeex-4", needs_key: true, @@ -135,24 +168,28 @@ const PROVIDERS: &[ProviderInfo] = &[ }, ProviderInfo { name: "claude-code", + display: "Claude Code", env_var: "", default_model: "claude-code/sonnet", needs_key: false, }, ProviderInfo { name: "ollama", + display: "Ollama", env_var: "OLLAMA_API_KEY", default_model: "llama3.2", needs_key: false, }, ProviderInfo { name: "vllm", + display: "vLLM", env_var: "VLLM_API_KEY", default_model: "local-model", needs_key: false, }, ProviderInfo { name: "lmstudio", + display: "LM Studio", env_var: "LMSTUDIO_API_KEY", default_model: "local-model", needs_key: false, @@ -544,7 +581,7 @@ fn draw_provider(f: &mut Frame, area: Rect, state: &mut WizardState) { format!("requires {}", p.env_var) }; ListItem::new(Line::from(vec![ - Span::raw(format!(" {:<14}", p.name)), + Span::raw(format!(" {:<24}", p.display)), Span::styled(hint, theme::dim_style()), ])) }) diff --git a/crates/openfang-kernel/src/kernel.rs b/crates/openfang-kernel/src/kernel.rs index f449addac..6071a830a 100644 --- a/crates/openfang-kernel/src/kernel.rs +++ b/crates/openfang-kernel/src/kernel.rs @@ -2043,12 +2043,22 @@ impl OpenFangKernel { // Persist usage to database (same as non-streaming path) let model = &manifest.model.model; + // Reconstruct canonical catalog ID (e.g. "alibaba-coding-plan/qwen3.5-plus") + // when the provider prefix was stripped before the agent ran, so that + // catalog.pricing() finds the correct $0 subscription entry. + let catalog_model_id = { + let cat = kernel_clone + .model_catalog + .read() + .unwrap_or_else(|e| e.into_inner()); + Self::resolve_catalog_model_id(&manifest.model.provider, model, &cat) + }; let cost = MeteringEngine::estimate_cost_with_catalog( &kernel_clone .model_catalog .read() .unwrap_or_else(|e| e.into_inner()), - model, + &catalog_model_id, result.total_usage.input_tokens, result.total_usage.output_tokens, ); @@ -2600,9 +2610,16 @@ impl OpenFangKernel { // Record usage in the metering engine (uses catalog pricing as single source of truth) let model = &manifest.model.model; + // The model field may have had its provider prefix stripped (e.g. "qwen3.5-plus" instead + // of "alibaba-coding-plan/qwen3.5-plus"). Reconstruct the canonical catalog ID so that + // catalog.pricing() can find the entry and return the correct $0 subscription rate. + let catalog_model_id = { + let cat = self.model_catalog.read().unwrap_or_else(|e| e.into_inner()); + Self::resolve_catalog_model_id(&manifest.model.provider, model, &cat) + }; let cost = MeteringEngine::estimate_cost_with_catalog( &self.model_catalog.read().unwrap_or_else(|e| e.into_inner()), - model, + &catalog_model_id, result.total_usage.input_tokens, result.total_usage.output_tokens, ); @@ -2634,6 +2651,31 @@ impl OpenFangKernel { Ok(result) } + /// Reconstructs the canonical catalog model ID for any provider that stores models + /// with a `provider/model` prefix format (alibaba-coding-plan, codex, copilot, + /// qwen-code, etc.). Used before metering to ensure catalog lookup succeeds. + /// + /// When a provider prefix was stripped before the agent ran (e.g. "qwen3.5-plus" + /// instead of "alibaba-coding-plan/qwen3.5-plus"), this helper rebuilds the + /// prefixed form so that `catalog.pricing()` can find the correct entry. + /// If the model already contains '/' it is returned as-is. + fn resolve_catalog_model_id( + provider: &str, + model: &str, + cat: &openfang_runtime::model_catalog::ModelCatalog, + ) -> String { + if model.contains('/') { + return model.to_owned(); + } + let provider_hyphen = provider.replace('_', "-"); + let prefixed = format!("{}/{}", provider_hyphen, model); + if cat.pricing(&prefixed).is_some() { + prefixed + } else { + model.to_owned() + } + } + /// Resolve a module path relative to the kernel's home directory. /// /// If the path is absolute, return it as-is. Otherwise, resolve relative @@ -3094,9 +3136,13 @@ impl OpenFangKernel { .unwrap_or((0, 0)); let model = &entry.manifest.model.model; + let catalog_model_id = { + let cat = self.model_catalog.read().unwrap_or_else(|e| e.into_inner()); + Self::resolve_catalog_model_id(&entry.manifest.model.provider, model, &cat) + }; let cost = MeteringEngine::estimate_cost_with_catalog( &self.model_catalog.read().unwrap_or_else(|e| e.into_inner()), - model, + &catalog_model_id, input_tokens, output_tokens, ); @@ -6844,4 +6890,47 @@ mod tests { kernel.shutdown(); } + + /// `resolve_catalog_model_id` must work for any provider that stores models with a + /// `provider/model` prefix, not just alibaba-coding-plan. + #[test] + fn test_resolve_catalog_model_id_multi_provider() { + use openfang_runtime::model_catalog::ModelCatalog; + + let cat = ModelCatalog::new(); + + // Case 1: alibaba-coding-plan — model exists in catalog after prefix rebuild. + let result = OpenFangKernel::resolve_catalog_model_id( + "alibaba_coding_plan", + "qwen3.5-plus", + &cat, + ); + assert_eq!( + result, "alibaba-coding-plan/qwen3.5-plus", + "should rebuild prefixed form when catalog entry exists" + ); + + // Case 2: model already contains '/' — returned unchanged regardless of provider. + let already_prefixed = OpenFangKernel::resolve_catalog_model_id( + "alibaba_coding_plan", + "alibaba-coding-plan/qwen3.5-plus", + &cat, + ); + assert_eq!( + already_prefixed, "alibaba-coding-plan/qwen3.5-plus", + "already-prefixed model should pass through unchanged" + ); + + // Case 3: unknown provider (e.g. qwen_code) with no catalog entry — falls back + // to the bare model name so callers can still proceed. + let fallback = OpenFangKernel::resolve_catalog_model_id( + "qwen_code", + "some-unknown-model", + &cat, + ); + assert_eq!( + fallback, "some-unknown-model", + "when prefixed form is not in catalog, bare model name should be returned" + ); + } } diff --git a/crates/openfang-kernel/src/metering.rs b/crates/openfang-kernel/src/metering.rs index e34dff4e9..2ad0b6d13 100644 --- a/crates/openfang-kernel/src/metering.rs +++ b/crates/openfang-kernel/src/metering.rs @@ -374,6 +374,14 @@ fn estimate_cost_rates(model: &str) -> (f64, f64) { if model.contains("llama") || model.contains("mixtral") { return (0.05, 0.10); } + // ── Alibaba Coding Plan (subscription-based: $50/month) ───── + // Must come BEFORE the qwen/glm/kimi/minimax checks below + // because model IDs like "alibaba-coding-plan/qwen3.5-plus" contain "qwen". + // All models in the Coding Plan use flat-rate subscription pricing. + // Per-token costs are $0 — actual cost is the fixed monthly fee. + if model.contains("alibaba-coding-plan") { + return (0.0, 0.0); + } // ── Qwen (Alibaba) ────────────────────────────────────────── if model.contains("qwen-max") { return (4.00, 12.00); @@ -387,6 +395,9 @@ fn estimate_cost_rates(model: &str) -> (f64, f64) { if model.contains("qwen-turbo") { return (0.30, 0.60); } + // ── Qwen (generic / pay-per-token) ────────────────────────── + // Note: "alibaba-coding-plan/*" models are caught by the earlier guard above + // and never reach this branch. if model.contains("qwen") { return (0.20, 0.60); } @@ -751,6 +762,70 @@ mod tests { assert!((cost - 0.12).abs() < 0.01); // $0.06 + $0.06 } + #[test] + fn test_estimate_cost_alibaba_coding_plan_subscription() { + // Alibaba Coding Plan is subscription-based ($50/month) — per-token cost is $0 + let qwen35_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/qwen3.5-plus", + 1_000_000, + 1_000_000, + ); + assert!((qwen35_cost).abs() < 0.001, "Qwen 3.5 Plus should be $0 (subscription)"); + + let glm5_cost = + MeteringEngine::estimate_cost("alibaba-coding-plan/glm-5", 1_000_000, 1_000_000); + assert!((glm5_cost).abs() < 0.001, "GLM-5 should be $0 (subscription)"); + + let kimi_cost = + MeteringEngine::estimate_cost("alibaba-coding-plan/kimi-k2.5", 1_000_000, 1_000_000); + assert!((kimi_cost).abs() < 0.001, "Kimi K2.5 should be $0 (subscription)"); + + let minimax_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/minimax-m2.5", + 1_000_000, + 1_000_000, + ); + assert!((minimax_cost).abs() < 0.001, "MiniMax M2.5 should be $0 (subscription)"); + + let glm47_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/glm-4.7", + 1_000_000, + 1_000_000, + ); + assert!((glm47_cost).abs() < 0.001, "GLM-4.7 should be $0 (subscription)"); + + let qwen3_max_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/qwen3-max-2026-01-23", + 1_000_000, + 1_000_000, + ); + assert!( + (qwen3_max_cost).abs() < 0.001, + "Qwen3-max-2026-01-23 should be $0 (subscription)" + ); + + let qwen3_coder_plus_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/qwen3-coder-plus", + 1_000_000, + 1_000_000, + ); + assert!( + (qwen3_coder_plus_cost).abs() < 0.001, + "Qwen3-coder-plus should be $0 (subscription)" + ); + + let qwen3_coder_next_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/qwen3-coder-next", + 1_000_000, + 1_000_000, + ); + assert!( + (qwen3_coder_next_cost).abs() < 0.001, + "Qwen3-coder-next should be $0 (subscription)" + ); + + } + #[test] fn test_estimate_cost_with_catalog() { let catalog = openfang_runtime::model_catalog::ModelCatalog::new(); diff --git a/crates/openfang-runtime/src/agent_loop.rs b/crates/openfang-runtime/src/agent_loop.rs index f773def41..065d2af77 100644 --- a/crates/openfang-runtime/src/agent_loop.rs +++ b/crates/openfang-runtime/src/agent_loop.rs @@ -18,6 +18,9 @@ use openfang_memory::session::Session; use openfang_memory::MemorySubstrate; use openfang_skills::registry::SkillRegistry; use openfang_types::agent::{AgentManifest, FallbackModel}; +use openfang_types::model_catalog::{ + DOUBAO_PROVIDER_ID, VOLCENGINE_CODING_PROVIDER_ID, VOLCENGINE_PROVIDER_ID, +}; use openfang_types::error::{OpenFangError, OpenFangResult}; use openfang_types::memory::{Memory, MemoryFilter, MemorySource}; use openfang_types::message::{ @@ -107,15 +110,30 @@ fn append_tool_error_guidance(tool_result_blocks: &mut Vec) { /// Many models are stored as `provider/org/model` (e.g. `openrouter/google/gemini-2.5-flash`) /// but the upstream API expects just `org/model` (e.g. `google/gemini-2.5-flash`). pub fn strip_provider_prefix(model: &str, provider: &str) -> String { - let slash_prefix = format!("{}/", provider); - let colon_prefix = format!("{}:", provider); - if model.starts_with(&slash_prefix) { + let provider_normalized = provider.replace('_', "-"); + let slash_prefix = format!("{}/", provider_normalized); + let colon_prefix = format!("{}:", provider_normalized); + let mut result = if model.starts_with(&slash_prefix) { model[slash_prefix.len()..].to_string() } else if model.starts_with(&colon_prefix) { model[colon_prefix.len()..].to_string() } else { model.to_string() + }; + // Strip "ark/" catalog namespace prefix before sending to Ark API. + // "ark/" is used internally to disambiguate Ark marketplace models from + // native provider models with the same name (e.g. ark/minimax-m2.5 vs + // minimax provider's minimax-m2.5). The Ark API endpoint expects the bare + // model name (e.g. "minimax-m2.5"), not the namespaced form. + // Strip ark/ prefix only for Volcano Engine providers (Ark marketplace models) + if (provider == VOLCENGINE_CODING_PROVIDER_ID + || provider == VOLCENGINE_PROVIDER_ID + || provider == DOUBAO_PROVIDER_ID) + && result.starts_with("ark/") + { + result = result["ark/".len()..].to_string(); } + result } /// Default context window size (tokens) for token-based trimming. @@ -2953,6 +2971,30 @@ mod tests { use openfang_types::tool::ToolCall; use std::sync::atomic::{AtomicU32, Ordering}; + #[test] + fn test_strip_provider_prefix_alibaba() { + // alibaba: underscore provider normalizes to hyphen prefix + assert_eq!( + strip_provider_prefix("alibaba-coding-plan/qwen3.5-plus", "alibaba_coding_plan"), + "qwen3.5-plus" + ); + // no prefix passthrough + assert_eq!( + strip_provider_prefix("qwen3.5-plus", "alibaba_coding_plan"), + "qwen3.5-plus" + ); + // regression: kimi_coding model passes through unchanged + assert_eq!( + strip_provider_prefix("kimi-for-coding", "kimi_coding"), + "kimi-for-coding" + ); + // regression: zhipu_coding model passes through unchanged + assert_eq!( + strip_provider_prefix("codegeex-4", "zhipu_coding"), + "codegeex-4" + ); + } + #[test] fn test_max_iterations_constant() { assert_eq!(MAX_ITERATIONS, 50); @@ -3022,6 +3064,42 @@ mod tests { assert_eq!(MAX_HISTORY_MESSAGES, 20); } + #[test] + fn test_strip_ark_catalog_prefix_for_volcengine_coding() { + // ark/ is catalog-only; Ark API expects bare name + assert_eq!( + strip_provider_prefix("ark/doubao-seed-code", "volcengine_coding"), + "doubao-seed-code" + ); + } + + #[test] + fn test_strip_provider_prefix_ark_volcengine() { + // Should strip ark/ for volcengine + assert_eq!( + strip_provider_prefix("ark/doubao-seed-code", "volcengine"), + "doubao-seed-code" + ); + } + + #[test] + fn test_strip_provider_prefix_ark_doubao() { + // Should strip ark/ for doubao provider alias + assert_eq!( + strip_provider_prefix("ark/some-model", "doubao"), + "some-model" + ); + } + + #[test] + fn test_strip_provider_prefix_ark_not_stripped_for_other_providers() { + // Must NOT strip ark/ for non-volcengine providers + assert_eq!( + strip_provider_prefix("ark/some-model", "openai"), + "ark/some-model" + ); + } + // --- Integration tests for empty response guards --- fn test_manifest() -> AgentManifest { diff --git a/crates/openfang-runtime/src/drivers/mod.rs b/crates/openfang-runtime/src/drivers/mod.rs index 2df8923d4..13a6185b2 100644 --- a/crates/openfang-runtime/src/drivers/mod.rs +++ b/crates/openfang-runtime/src/drivers/mod.rs @@ -2,7 +2,8 @@ //! //! Contains drivers for Anthropic Claude, Google Gemini, OpenAI-compatible APIs, and more. //! Supports: Anthropic, Gemini, OpenAI, Groq, OpenRouter, DeepSeek, Together, -//! Mistral, Fireworks, Ollama, vLLM, Chutes.ai, and any OpenAI-compatible endpoint. +//! Mistral, Fireworks, Ollama, vLLM, Chutes.ai, volcengine (Doubao / Ark), +//! and any OpenAI-compatible endpoint. pub mod anthropic; pub mod claude_code; @@ -15,14 +16,15 @@ pub mod vertex; use crate::llm_driver::{DriverConfig, LlmDriver, LlmError}; use openfang_types::model_catalog::{ - AI21_BASE_URL, ANTHROPIC_BASE_URL, AZURE_OPENAI_BASE_URL, CEREBRAS_BASE_URL, CHUTES_BASE_URL, - COHERE_BASE_URL, DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, GEMINI_BASE_URL, GROQ_BASE_URL, - HUGGINGFACE_BASE_URL, KIMI_CODING_BASE_URL, LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, - MINIMAX_BASE_URL, MISTRAL_BASE_URL, MOONSHOT_BASE_URL, NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, - OPENAI_BASE_URL, OPENROUTER_BASE_URL, PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, - REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, - VOLCENGINE_BASE_URL, VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, - ZAI_CODING_BASE_URL, ZHIPU_BASE_URL, ZHIPU_CODING_BASE_URL, + AI21_BASE_URL, ALIBABA_CODING_PLAN_BASE_URL, ANTHROPIC_BASE_URL, AZURE_OPENAI_BASE_URL, + CEREBRAS_BASE_URL, CHUTES_BASE_URL, COHERE_BASE_URL, DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, + GEMINI_BASE_URL, GROQ_BASE_URL, HUGGINGFACE_BASE_URL, KIMI_CODING_BASE_URL, + LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, MINIMAX_BASE_URL, MISTRAL_BASE_URL, MOONSHOT_BASE_URL, + NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, OPENAI_BASE_URL, OPENROUTER_BASE_URL, + PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, + TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, VOLCENGINE_BASE_URL, + VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, ZAI_CODING_BASE_URL, + ZHIPU_BASE_URL, ZHIPU_CODING_BASE_URL, }; use std::sync::Arc; @@ -197,6 +199,7 @@ fn provider_defaults(provider: &str) -> Option { api_key_env: "QIANFAN_API_KEY", key_required: true, }), + // "doubao" is also a model alias in builtin_aliases; here it acts as a provider alias "volcengine" | "doubao" => Some(ProviderDefaults { base_url: VOLCENGINE_BASE_URL, api_key_env: "VOLCENGINE_API_KEY", @@ -260,6 +263,7 @@ fn provider_defaults(provider: &str) -> Option { /// - `xai` — xAI (Grok) /// - `replicate` — Replicate /// - `chutes` — Chutes.ai (serverless open-source model inference) +/// - `volcengine` — Volcano Engine (Doubao/Ark) /// - Any custom provider with `base_url` set uses OpenAI-compatible format pub fn create_driver(config: &DriverConfig) -> Result, LlmError> { let provider = config.provider.as_str(); @@ -426,6 +430,29 @@ pub fn create_driver(config: &DriverConfig) -> Result, LlmErr return Ok(Arc::new(anthropic::AnthropicDriver::new(api_key, base_url))); } + // Alibaba Coding Plan — OpenAI-compatible endpoint. + // Accept both underscore form ("alibaba_coding_plan") and hyphen form ("alibaba-coding-plan"). + // NOTE: prefix stripping ("alibaba-coding-plan/") is handled upstream by + // `strip_provider_prefix` in agent_loop.rs before the CompletionRequest is built. + // By the time the request reaches this driver, `request.model` is already the bare + // model name (e.g. "qwen3.5-plus"). No wrapper needed here. + if provider == "alibaba_coding_plan" || provider == "alibaba-coding-plan" { + let api_key = config + .api_key + .clone() + .or_else(|| std::env::var("ALIBABA_CODING_PLAN_API_KEY").ok()) + .ok_or_else(|| { + LlmError::MissingApiKey( + "Set ALIBABA_CODING_PLAN_API_KEY environment variable".to_string(), + ) + })?; + let base_url = config + .base_url + .clone() + .unwrap_or_else(|| ALIBABA_CODING_PLAN_BASE_URL.to_string()); + return Ok(Arc::new(openai::OpenAIDriver::new(api_key, base_url))); + } + // All other providers use OpenAI-compatible format if let Some(defaults) = provider_defaults(provider) { let api_key = config @@ -489,7 +516,11 @@ pub fn create_driver(config: &DriverConfig) -> Result, LlmErr "Unknown provider '{}'. Supported: anthropic, gemini, openai, azure, groq, openrouter, \ deepseek, together, mistral, fireworks, ollama, vllm, lmstudio, perplexity, \ cohere, ai21, cerebras, sambanova, huggingface, xai, replicate, github-copilot, \ - chutes, venice, nvidia, codex, claude-code. Or set base_url for a custom OpenAI-compatible endpoint.", + chutes, venice, nvidia, codex, claude-code, qwen-code, kimi_coding, zhipu_coding, \ + zai, zai_coding, qianfan, moonshot, lemonade, \ + alibaba_coding_plan (or alibaba-coding-plan), \ + volcengine (or doubao), volcengine_coding. \ + Or set base_url for a custom OpenAI-compatible endpoint.", provider ), }) @@ -507,6 +538,7 @@ pub fn detect_available_provider() -> Option<(&'static str, &'static str, &'stat ("gemini", "gemini-2.5-flash", "GEMINI_API_KEY"), ("groq", "llama-3.3-70b-versatile", "GROQ_API_KEY"), ("deepseek", "deepseek-chat", "DEEPSEEK_API_KEY"), + ("volcengine", "doubao-seed-1-6-251015", "VOLCENGINE_API_KEY"), ( "openrouter", "openrouter/google/gemini-2.5-flash", @@ -530,6 +562,11 @@ pub fn detect_available_provider() -> Option<(&'static str, &'static str, &'stat "PERPLEXITY_API_KEY", ), ("cohere", "command-r-plus", "COHERE_API_KEY"), + ( + "alibaba_coding_plan", + "alibaba-coding-plan/qwen3.5-plus", + "ALIBABA_CODING_PLAN_API_KEY", + ), ]; for &(provider, model, env_var) in PROBE_ORDER { if std::env::var(env_var) @@ -584,6 +621,8 @@ pub fn known_providers() -> &'static [&'static str] { "kimi_coding", "qianfan", "volcengine", + "volcengine_coding", + "doubao", "chutes", "venice", "nvidia", @@ -591,6 +630,7 @@ pub fn known_providers() -> &'static [&'static str] { "claude-code", "qwen-code", "azure", + "alibaba_coding_plan", ] } @@ -689,13 +729,17 @@ mod tests { assert!(providers.contains(&"kimi_coding")); assert!(providers.contains(&"qianfan")); assert!(providers.contains(&"volcengine")); + assert!(providers.contains(&"volcengine_coding")); + assert!(providers.contains(&"doubao")); assert!(providers.contains(&"chutes")); assert!(providers.contains(&"nvidia")); assert!(providers.contains(&"codex")); assert!(providers.contains(&"claude-code")); assert!(providers.contains(&"qwen-code")); assert!(providers.contains(&"azure")); - assert_eq!(providers.len(), 37); + // Alibaba Coding Plan + assert!(providers.contains(&"alibaba_coding_plan")); + assert_eq!(providers.len(), 40); } #[test] @@ -768,6 +812,93 @@ mod tests { assert!(driver.is_err()); } + #[test] + fn test_alibaba_coding_plan_driver_with_direct_key() { + // Test that a directly-supplied API key is accepted. + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: Some("sk-sp-test-direct-key-67890".to_string()), + base_url: None, + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!( + driver.is_ok(), + "Alibaba Coding Plan with direct API key should succeed" + ); + } + + #[test] + fn test_alibaba_coding_plan_no_key_errors() { + // Alibaba Coding Plan with no API key should return MissingApiKey. + // We supply api_key: None and a unique base_url so the alibaba_coding_plan + // branch is entered deterministically without reading or modifying any env var. + // This is safe to run in parallel with other tests. + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: None, + base_url: Some("https://test-alibaba-no-key.invalid".to_string()), + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!(driver.is_err(), "Expected error when api_key is None"); + let err = driver.err().unwrap().to_string(); + assert!( + err.contains("ALIBABA_CODING_PLAN_API_KEY"), + "Error should mention ALIBABA_CODING_PLAN_API_KEY: {}", + err + ); + } + + #[test] + fn test_alibaba_coding_plan_custom_base_url() { + // Test custom base URL override + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: Some("sk-sp-test-key".to_string()), + base_url: Some("https://custom-endpoint.example.com/v1".to_string()), + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!(driver.is_ok()); + } + + #[test] + fn test_doubao_alias_driver_with_direct_key() { + // "doubao" is a provider alias for volcengine — create_driver should route it correctly. + let config = DriverConfig { + provider: "doubao".to_string(), + api_key: Some("sk-test-doubao-direct-key-12345".to_string()), + base_url: None, + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!( + driver.is_ok(), + "doubao provider alias with direct API key should succeed: {:?}", + driver.err() + ); + } + + #[test] + fn test_doubao_alias_driver_no_key_errors() { + // "doubao" with no key and a unique base_url should return MissingApiKey. + let config = DriverConfig { + provider: "doubao".to_string(), + api_key: None, + base_url: Some("https://test-doubao-no-key.invalid".to_string()), + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!(driver.is_err(), "Expected error when doubao api_key is None"); + let err = driver.err().unwrap().to_string(); + assert!( + err.contains("VOLCENGINE_API_KEY"), + "Error should mention VOLCENGINE_API_KEY: {}", + err + ); + } + #[test] fn test_custom_provider_key_no_url_helpful_error() { // Custom provider with key set (via env) but no base_url should give helpful error. @@ -888,4 +1019,31 @@ mod tests { "azure-openai alias should create driver successfully" ); } + + #[test] + fn test_provider_defaults_volcengine() { + let d = provider_defaults("volcengine").unwrap(); + assert_eq!(d.base_url, "https://ark.cn-beijing.volces.com/api/v3"); + assert_eq!(d.api_key_env, "VOLCENGINE_API_KEY"); + assert!(d.key_required); + } + + #[test] + fn test_provider_defaults_volcengine_doubao_alias() { + let d = provider_defaults("doubao").unwrap(); + assert_eq!(d.base_url, "https://ark.cn-beijing.volces.com/api/v3"); + assert_eq!(d.api_key_env, "VOLCENGINE_API_KEY"); + assert!(d.key_required); + } + + #[test] + fn test_provider_defaults_volcengine_coding() { + let d = provider_defaults("volcengine_coding").unwrap(); + assert_eq!( + d.base_url, + "https://ark.cn-beijing.volces.com/api/coding/v3" + ); + assert_eq!(d.api_key_env, "VOLCENGINE_API_KEY"); + assert!(d.key_required); + } } diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs index 62b81c74e..fff666b9f 100644 --- a/crates/openfang-runtime/src/model_catalog.rs +++ b/crates/openfang-runtime/src/model_catalog.rs @@ -4,15 +4,16 @@ //! with alias resolution, auth status detection, and pricing lookups. use openfang_types::model_catalog::{ - AuthStatus, ModelCatalogEntry, ModelTier, ProviderInfo, AI21_BASE_URL, ANTHROPIC_BASE_URL, - AZURE_OPENAI_BASE_URL, BEDROCK_BASE_URL, CEREBRAS_BASE_URL, CHUTES_BASE_URL, COHERE_BASE_URL, - DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, GEMINI_BASE_URL, GITHUB_COPILOT_BASE_URL, GROQ_BASE_URL, - HUGGINGFACE_BASE_URL, KIMI_CODING_BASE_URL, LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, - MINIMAX_BASE_URL, MISTRAL_BASE_URL, MOONSHOT_BASE_URL, NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, - OPENAI_BASE_URL, OPENROUTER_BASE_URL, PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, - REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, - VOLCENGINE_BASE_URL, VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, - ZAI_CODING_BASE_URL, ZHIPU_BASE_URL, ZHIPU_CODING_BASE_URL, + AuthStatus, ModelCatalogEntry, ModelTier, ProviderInfo, AI21_BASE_URL, + ALIBABA_CODING_PLAN_BASE_URL, ANTHROPIC_BASE_URL, AZURE_OPENAI_BASE_URL, BEDROCK_BASE_URL, + CEREBRAS_BASE_URL, CHUTES_BASE_URL, COHERE_BASE_URL, DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, + GEMINI_BASE_URL, GITHUB_COPILOT_BASE_URL, GROQ_BASE_URL, HUGGINGFACE_BASE_URL, + KIMI_CODING_BASE_URL, LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, MINIMAX_BASE_URL, MISTRAL_BASE_URL, + MOONSHOT_BASE_URL, NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, OPENAI_BASE_URL, OPENROUTER_BASE_URL, + PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, + TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, VOLCENGINE_BASE_URL, + VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, ZAI_CODING_BASE_URL, ZHIPU_BASE_URL, + ZHIPU_CODING_BASE_URL, }; use std::collections::HashMap; @@ -81,13 +82,23 @@ impl ModelCatalog { } // Primary: check the provider's declared env var - let has_key = std::env::var(&provider.api_key_env).is_ok(); + let has_key = std::env::var(&provider.api_key_env) + .ok() + .filter(|v| !v.is_empty()) + .is_some(); // Secondary: provider-specific fallback auth let has_fallback = match provider.id.as_str() { - "gemini" => std::env::var("GOOGLE_API_KEY").is_ok(), + "gemini" => std::env::var("GOOGLE_API_KEY") + .ok() + .filter(|v| !v.is_empty()) + .is_some(), "codex" => { - std::env::var("OPENAI_API_KEY").is_ok() || read_codex_credential().is_some() + std::env::var("OPENAI_API_KEY") + .ok() + .filter(|v| !v.is_empty()) + .is_some() + || read_codex_credential().is_some() } // claude-code is handled above (before key_required check) _ => false, @@ -765,7 +776,7 @@ fn builtin_providers() -> Vec { auth_status: AuthStatus::Missing, model_count: 0, }, - // ── Chinese providers (5) ──────────────────────────────────── + // ── Chinese providers (6) ──────────────────────────────────── ProviderInfo { id: "qwen".into(), display_name: "Qwen (Alibaba)".into(), @@ -775,6 +786,17 @@ fn builtin_providers() -> Vec { auth_status: AuthStatus::Missing, model_count: 0, }, + // Provider ID uses underscores ("alibaba_coding_plan") while model IDs use hyphens ("alibaba-coding-plan/..."). + // This matches the pattern of zhipu_coding, kimi_coding, etc. Do not normalize — create_driver matches on underscore form. + ProviderInfo { + id: "alibaba_coding_plan".into(), + display_name: "Alibaba Coding Plan (Intl)".into(), + api_key_env: "ALIBABA_CODING_PLAN_API_KEY".into(), + base_url: ALIBABA_CODING_PLAN_BASE_URL.into(), + key_required: true, + auth_status: AuthStatus::Missing, + model_count: 0, + }, ProviderInfo { id: "minimax".into(), display_name: "MiniMax".into(), @@ -850,7 +872,7 @@ fn builtin_providers() -> Vec { // ── Volcano Engine (Doubao) ────────────────────────────────── ProviderInfo { id: "volcengine".into(), - display_name: "Volcano Engine (Doubao)".into(), + display_name: "Volcano Engine".into(), api_key_env: "VOLCENGINE_API_KEY".into(), base_url: VOLCENGINE_BASE_URL.into(), key_required: true, @@ -979,6 +1001,11 @@ fn builtin_aliases() -> HashMap { ("minimax-highspeed", "MiniMax-M2.5-highspeed"), ("minimax-m2.1", "MiniMax-M2.1"), ("codegeex", "codegeex-4"), + // Alibaba Coding Plan aliases + ("alibaba-coding-plan", "alibaba-coding-plan/qwen3.5-plus"), + // "qwen3-coder" (with digit 3) → alibaba-coding-plan coding endpoint + // "qwen-coder" (no digit) → qwen-code/qwen3-coder (direct Qwen Code model) + ("qwen3-coder", "alibaba-coding-plan/qwen3-coder-plus"), // Codex aliases ("codex", "codex/gpt-5.4"), ("codex-5.4", "codex/gpt-5.4"), @@ -3193,6 +3220,138 @@ fn builtin_models() -> Vec { aliases: vec!["abab7".into()], }, // ══════════════════════════════════════════════════════════════ + // Alibaba Coding Plan International (8) + // All pricing set to $0 — actual cost is fixed monthly subscription. + // Provides multi-provider access: Qwen, Zhipu GLM, Moonshot Kimi, MiniMax. + // See: https://www.alibabacloud.com/help/en/model-studio/coding-plan + // ══════════════════════════════════════════════════════════════ + // ── Qwen models (4) ────────────────────────────────────────── + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3.5-plus".into(), + display_name: "Qwen 3.5 Plus (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 1_000_000, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + // The Coding Plan endpoint (coding-intl.dashscope.aliyuncs.com) is a + // coding-specialized endpoint that does not expose multimodal routes. + // Use the standard Qwen provider if vision is required. + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3-max-2026-01-23".into(), + display_name: "Qwen 3 Max 2026-01-23 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Frontier, + context_window: 262_144, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3-coder-plus".into(), + display_name: "Qwen 3 Coder Plus (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 1_000_000, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3-coder-next".into(), + display_name: "Qwen 3 Coder Next (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Frontier, + context_window: 262_144, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ── Zhipu / GLM via Coding Plan (2) ───────────────────────── + // API receives "glm-5" / "glm-4.7" after prefix stripping + ModelCatalogEntry { + id: "alibaba-coding-plan/glm-5".into(), + display_name: "GLM-5 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Frontier, + context_window: 202_752, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/glm-4.7".into(), + display_name: "GLM-4.7 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 202_752, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ── Moonshot / Kimi via Coding Plan (1) ───────────────────── + // API receives "kimi-k2.5" after prefix stripping + ModelCatalogEntry { + id: "alibaba-coding-plan/kimi-k2.5".into(), + display_name: "Kimi K2.5 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + // The Coding Plan endpoint (coding-intl.dashscope.aliyuncs.com) is a + // coding-specialized endpoint that does not expose multimodal routes. + // Use the standard Kimi/Moonshot provider if vision is required. + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ── MiniMax via Coding Plan (1) ────────────────────────────── + // The bare alias "minimax-m2.5" is claimed by the MiniMax provider. Users must specify + // the full "alibaba-coding-plan/minimax-m2.5" ID to use this model via the Coding Plan endpoint. + ModelCatalogEntry { + id: "alibaba-coding-plan/minimax-m2.5".into(), + display_name: "MiniMax M2.5 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 204_800, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ══════════════════════════════════════════════════════════════ // Zhipu AI / GLM (6) // ══════════════════════════════════════════════════════════════ ModelCatalogEntry { @@ -3462,9 +3621,156 @@ fn builtin_models() -> Vec { supports_streaming: true, aliases: vec![], }, + // ══════════════════════════════════════════════════════════════ + // Volcano Engine Coding Plan (9) + // ══════════════════════════════════════════════════════════════ + ModelCatalogEntry { + id: "ark-code-latest".into(), + display_name: "Ark Code (Latest)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 131_072, + max_output_tokens: 8_192, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec!["ark-code".into()], + }, + ModelCatalogEntry { + id: "doubao-seed-2.0-code".into(), + display_name: "Doubao Seed 2.0 Code (Coding Plan)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "doubao-seed-2.0-pro".into(), + display_name: "Doubao Seed 2.0 Pro".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Frontier, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "doubao-seed-2.0-lite".into(), + display_name: "Doubao Seed 2.0 Lite (Ark Coding)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Fast, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "doubao-seed-code-ark".into(), + display_name: "Doubao Seed Code (Ark)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // Third-party models available via Ark marketplace. + // The "ark/" prefix is the canonical ID for these Ark marketplace models to avoid + // collisions with native provider entries (minimax, zhipu, moonshot). + // The bare model name is kept as an alias only where no collision exists. + // Pricing not publicly documented for Ark-routed third-party models; set to 0.0 + ModelCatalogEntry { + id: "ark/minimax-m2.5".into(), + display_name: "MiniMax M2.5 (via Ark)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 200_000, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + // "minimax-m2.5" NOT added as alias — already canonical on the minimax provider entry + aliases: vec![], + }, + ModelCatalogEntry { + id: "ark/glm-4.7".into(), + display_name: "GLM 4.7 (via Ark)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Balanced, + context_window: 200_000, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + // "glm-4.7" NOT added as alias — already canonical on the zhipu provider entry + aliases: vec![], + }, + ModelCatalogEntry { + id: "ark/deepseek-v3.2".into(), + display_name: "DeepSeek V3.2 (via Ark)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 131_072, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + // "deepseek-v3.2" kept as alias — no collision with other providers + aliases: vec!["deepseek-v3.2".into()], + }, + ModelCatalogEntry { + id: "ark/kimi-k2.5".into(), + display_name: "Kimi K2.5 (via Ark)".into(), + provider: "volcengine_coding".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 16_384, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + // "kimi-k2.5" NOT added as alias — already canonical on the moonshot provider entry + aliases: vec![], + }, + // ══════════════════════════════════════════════════════════════ // Volcano Engine / Doubao (4) // ══════════════════════════════════════════════════════════════ + // + // NOTE on separators: the volcengine provider uses hyphen-only IDs + // (e.g. "doubao-seed-2-0-lite") because the Ark /api/v3 endpoint uses + // endpoint-access-point names that don't contain dots. The + // volcengine_coding entries above use dot notation + // (e.g. "doubao-seed-2.0-lite") which is the model version string used + // by the /api/coding/v3 endpoint. These are different endpoint paths + // and the IDs must not be unified. ModelCatalogEntry { id: "doubao-seed-1-6-251015".into(), display_name: "Doubao Seed 1.6 Pro".into(), @@ -3477,6 +3783,8 @@ fn builtin_models() -> Vec { supports_tools: true, supports_vision: false, supports_streaming: true, + // "doubao" also maps to the volcengine provider in provider_defaults() — intentional dual alias + // Also matched as a provider alias in provider_defaults() — keep in sync aliases: vec!["doubao".into(), "doubao-pro".into()], }, ModelCatalogEntry { @@ -3507,9 +3815,10 @@ fn builtin_models() -> Vec { supports_streaming: true, aliases: vec!["doubao-mini".into()], }, + // Standard plan (/api/v3) variant; see volcengine_coding for the coding-plan endpoint ModelCatalogEntry { id: "doubao-seed-code".into(), - display_name: "Doubao Seed Code".into(), + display_name: "Doubao Seed Code (Standard Plan)".into(), provider: "volcengine".into(), tier: ModelTier::Smart, context_window: 131_072, @@ -3905,7 +4214,7 @@ mod tests { #[test] fn test_catalog_has_providers() { let catalog = ModelCatalog::new(); - assert_eq!(catalog.list_providers().len(), 41); + assert_eq!(catalog.list_providers().len(), 42); } #[test] @@ -4137,6 +4446,10 @@ mod tests { assert!(catalog.get_provider("moonshot").is_some()); assert!(catalog.get_provider("qianfan").is_some()); assert!(catalog.get_provider("bedrock").is_some()); + // Alibaba Coding Plan provider + assert!(catalog.get_provider("alibaba_coding_plan").is_some()); + let provider = catalog.get_provider("alibaba_coding_plan").unwrap(); + assert_eq!(provider.api_key_env, "ALIBABA_CODING_PLAN_API_KEY"); } #[test] @@ -4176,6 +4489,109 @@ mod tests { assert!(abab7.supports_vision); } + #[test] + fn test_alibaba_coding_plan_models() { + let catalog = ModelCatalog::new(); + + // Test all 8 models are present + let models = catalog.models_by_provider("alibaba_coding_plan"); + assert_eq!(models.len(), 8); + + // Test qwen3.5-plus — flagship model with 1M context + // Note: supports_vision is false — the Coding Plan endpoint is coding-specialized + let qwen35 = catalog + .find_model("alibaba-coding-plan/qwen3.5-plus") + .unwrap(); + assert_eq!(qwen35.display_name, "Qwen 3.5 Plus (Alibaba Coding Plan)"); + assert_eq!(qwen35.tier, ModelTier::Smart); + assert_eq!(qwen35.context_window, 1_000_000); + assert_eq!(qwen35.max_output_tokens, 65_536); + assert!(qwen35.supports_tools); + assert!(!qwen35.supports_vision); + assert!(qwen35.supports_streaming); + // Subscription-based pricing + assert!((qwen35.input_cost_per_m).abs() < f64::EPSILON); + assert!((qwen35.output_cost_per_m).abs() < f64::EPSILON); + + // Test qwen3-max-2026-01-23 — frontier model + let qwen3max = catalog + .find_model("alibaba-coding-plan/qwen3-max-2026-01-23") + .unwrap(); + assert_eq!(qwen3max.tier, ModelTier::Frontier); + assert_eq!(qwen3max.context_window, 262_144); + assert!(!qwen3max.supports_vision); + assert!(qwen3max.supports_tools); + + // Test qwen3-coder-plus — coding model with 1M context + let qwen3coder = catalog + .find_model("alibaba-coding-plan/qwen3-coder-plus") + .unwrap(); + assert_eq!(qwen3coder.context_window, 1_000_000); + assert!(!qwen3coder.supports_vision); + assert!(qwen3coder.supports_tools); + + // Test qwen3-coder alias — registered in builtin_aliases() which uses + // or_insert_with (first-wins), so explicit builtin entries take priority + // over any model-level aliases vecs that might also claim the same key. + let qwen3coder_alias = catalog.find_model("qwen3-coder").unwrap(); + assert_eq!(qwen3coder_alias.id, "alibaba-coding-plan/qwen3-coder-plus"); + + // Verify qwen3 alias still resolves to qwen provider (not overwritten by alibaba aliases) + let qwen3 = catalog.find_model("qwen3").unwrap(); + assert_eq!(qwen3.provider, "qwen"); + + // Test glm-5 — Zhipu model via Coding Plan + let glm5 = catalog.find_model("alibaba-coding-plan/glm-5").unwrap(); + assert_eq!(glm5.display_name, "GLM-5 (Alibaba Coding Plan)"); + assert_eq!(glm5.tier, ModelTier::Frontier); + assert_eq!(glm5.context_window, 202_752); + assert_eq!(glm5.max_output_tokens, 32_768); + assert!(glm5.supports_tools); + assert!(!glm5.supports_vision); + + // Test glm-4.7 + let glm47 = catalog.find_model("alibaba-coding-plan/glm-4.7").unwrap(); + assert_eq!(glm47.tier, ModelTier::Smart); + assert_eq!(glm47.context_window, 202_752); + + // Test kimi-k2.5 — Moonshot model via Coding Plan (no vision on this endpoint) + let kimi = catalog.find_model("alibaba-coding-plan/kimi-k2.5").unwrap(); + assert_eq!(kimi.display_name, "Kimi K2.5 (Alibaba Coding Plan)"); + assert_eq!(kimi.context_window, 262_144); + assert!(!kimi.supports_vision); + assert!(kimi.supports_tools); + + // Test MiniMax-M2.5 via Coding Plan + let minimax = catalog + .find_model("alibaba-coding-plan/minimax-m2.5") + .unwrap(); + assert_eq!(minimax.display_name, "MiniMax M2.5 (Alibaba Coding Plan)"); + assert_eq!(minimax.context_window, 204_800); + assert_eq!(minimax.max_output_tokens, 32_768); + assert!(minimax.supports_tools); + assert!(!minimax.supports_vision); + } + + #[test] + fn test_alibaba_coding_plan_aliases() { + let catalog = ModelCatalog::new(); + + // Test alibaba-coding-plan alias resolves to qwen3.5-plus + let alias1 = catalog.find_model("alibaba-coding-plan").unwrap(); + assert_eq!(alias1.id, "alibaba-coding-plan/qwen3.5-plus"); + + // Test qwen3-coder alias resolves to qwen3-coder-plus + assert_eq!(catalog.find_model("qwen3-coder").unwrap().id, "alibaba-coding-plan/qwen3-coder-plus"); + + // Regression: "qwen-coder" (no digit 3) must remain distinct — it maps to the direct + // Qwen Code model, not the Alibaba Coding Plan endpoint. + assert_eq!(catalog.find_model("qwen-coder").unwrap().id, "qwen-code/qwen3-coder"); + + // Test case-insensitive alias resolution + let alias_lower = catalog.find_model("Alibaba-Coding-Plan").unwrap(); + assert_eq!(alias_lower.id, "alibaba-coding-plan/qwen3.5-plus"); + } + #[test] fn test_bedrock_models() { let catalog = ModelCatalog::new(); @@ -4523,4 +4939,69 @@ mod tests { assert_eq!(found.provider, "custom_provider"); assert_eq!(found.id, "My-Custom-LLM"); } + + #[test] + fn test_ark_alias_resolution() { + let catalog = ModelCatalog::new(); + // ark/ IDs are now canonical — resolve_alias returns the id itself (no alias mapping needed) + // deepseek-v3.2 is still an alias pointing to ark/deepseek-v3.2 + assert_eq!(catalog.resolve_alias("deepseek-v3.2"), Some("ark/deepseek-v3.2")); + // find_model via ark/ canonical ID returns the volcengine_coding entry directly. + let m25 = catalog.find_model("ark/minimax-m2.5").unwrap(); + assert_eq!(m25.id, "ark/minimax-m2.5"); + assert_eq!(m25.provider, "volcengine_coding"); + // glm-4.7 canonical ID now belongs to ark entry; zhipu entry is unaffected. + let glm = catalog.find_model("ark/glm-4.7").unwrap(); + assert_eq!(glm.id, "ark/glm-4.7"); + assert_eq!(glm.provider, "volcengine_coding"); + // deepseek-v3.2 exists only under volcengine_coding; bare alias still resolves. + let ds = catalog.find_model("ark/deepseek-v3.2").unwrap(); + assert_eq!(ds.id, "ark/deepseek-v3.2"); + assert_eq!(ds.provider, "volcengine_coding"); + let ds_alias = catalog.find_model("deepseek-v3.2").unwrap(); + assert_eq!(ds_alias.id, "ark/deepseek-v3.2"); + let kimi = catalog.find_model("ark/kimi-k2.5").unwrap(); + assert_eq!(kimi.id, "ark/kimi-k2.5"); + assert_eq!(kimi.provider, "volcengine_coding"); + // Native provider entries are unaffected by the ark/ rename + let minimax_native = catalog.find_model("minimax-m2.5").unwrap(); + assert_eq!(minimax_native.provider, "minimax"); + let glm_native = catalog.find_model("glm-4.7").unwrap(); + assert_eq!(glm_native.provider, "zhipu"); + let kimi_native = catalog.find_model("kimi-k2.5").unwrap(); + assert_eq!(kimi_native.provider, "moonshot"); + } + + #[test] + fn test_doubao_alias_resolves_to_volcengine_model() { + let catalog = ModelCatalog::new(); + // "doubao" alias should resolve to the model ID + let resolved = catalog.resolve_alias("doubao"); + assert_eq!(resolved, Some("doubao-seed-1-6-251015")); + // The model should belong to the volcengine provider + let model = catalog.find_model("doubao-seed-1-6-251015").unwrap(); + assert_eq!(model.provider, "volcengine"); + } + + #[test] + fn test_doubao_dual_role_consistency() { + use openfang_types::model_catalog::VOLCENGINE_BASE_URL; + + // 1. The canonical VOLCENGINE_BASE_URL must point at volces.com + assert!( + VOLCENGINE_BASE_URL.contains("volces.com"), + "VOLCENGINE_BASE_URL must contain volces.com, got: {}", + VOLCENGINE_BASE_URL + ); + + // 2. The model found via the "doubao" alias must belong to the "volcengine" provider + let catalog = ModelCatalog::new(); + let model = catalog + .find_model("doubao") + .expect("catalog.find_model(\"doubao\") must resolve via alias"); + assert_eq!( + model.provider, "volcengine", + "model resolved by 'doubao' must have provider == 'volcengine'" + ); + } } diff --git a/crates/openfang-types/src/model_catalog.rs b/crates/openfang-types/src/model_catalog.rs index a7d2627ca..a837a3357 100644 --- a/crates/openfang-types/src/model_catalog.rs +++ b/crates/openfang-types/src/model_catalog.rs @@ -47,8 +47,19 @@ pub const ZAI_CODING_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; pub const MOONSHOT_BASE_URL: &str = "https://api.moonshot.ai/v1"; pub const KIMI_CODING_BASE_URL: &str = "https://api.kimi.com/coding"; pub const QIANFAN_BASE_URL: &str = "https://qianfan.baidubce.com/v2"; +// Hardcoded to cn-beijing region. Operators in other regions can override via +// `base_url` in the provider config (e.g. `[provider_urls] volcengine = "https://ark..volces.com/api/v3"`). pub const VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/v3"; pub const VOLCENGINE_CODING_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3"; +// Alibaba Cloud Coding Plan International — subscription-based +// See: https://www.alibabacloud.com/help/en/model-studio/coding-plan +pub const ALIBABA_CODING_PLAN_BASE_URL: &str = "https://coding-intl.dashscope.aliyuncs.com/v1"; +/// Canonical provider ID for Volcano Engine (standard Ark /api/v3 endpoint). +pub const VOLCENGINE_PROVIDER_ID: &str = "volcengine"; +/// Canonical provider ID for Volcano Engine Coding Plan (Ark /api/coding/v3 endpoint). +pub const VOLCENGINE_CODING_PROVIDER_ID: &str = "volcengine_coding"; +/// Provider alias for Doubao — routes to the Volcano Engine standard Ark endpoint. +pub const DOUBAO_PROVIDER_ID: &str = "doubao"; // ── Chutes.ai ──────────────────────────────────────────────────── pub const CHUTES_BASE_URL: &str = "https://llm.chutes.ai/v1"; diff --git a/docs/providers.md b/docs/providers.md index 9b3ccb59f..e6276639a 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -1,6 +1,6 @@ # LLM Providers Guide -OpenFang ships with a comprehensive model catalog covering **3 native LLM drivers**, **20 providers**, **51 builtin models**, and **23 aliases**. Every provider uses one of three battle-tested drivers: the native **Anthropic** driver, the native **Gemini** driver, or the universal **OpenAI-compatible** driver. This guide is the single source of truth for configuring, selecting, and managing LLM providers in OpenFang. +OpenFang ships with a comprehensive model catalog covering **3 native LLM drivers**, **21 providers**, **61 builtin models**, and **25 aliases**. Every provider uses one of three battle-tested drivers: the native **Anthropic** driver, the native **Gemini** driver, or the universal **OpenAI-compatible** driver. This guide is the single source of truth for configuring, selecting, and managing LLM providers in OpenFang. --- @@ -549,9 +549,47 @@ For Gemini specifically, either `GEMINI_API_KEY` or `GOOGLE_API_KEY` will work. --- +### 21. Alibaba Coding Plan (Intl) + +| | | +|---|---| +| **Display Name** | Alibaba Coding Plan (Intl) | +| **Driver** | OpenAI-compatible | +| **Env Var** | `ALIBABA_CODING_PLAN_API_KEY` | +| **Base URL** | `https://coding-intl.dashscope.aliyuncs.com/v1` | +| **Key Required** | Yes | +| **Free Tier** | No (subscription-based) | +| **Auth** | `Authorization: Bearer` header | +| **Models** | 8 | + +**Available Models:** +- `alibaba-coding-plan/qwen3.5-plus` (Smart) — 1M context +- `alibaba-coding-plan/qwen3-max-2026-01-23` (Frontier) — 262K context +- `alibaba-coding-plan/qwen3-coder-plus` (Smart) — 1M context, coding optimized +- `alibaba-coding-plan/qwen3-coder-next` (Frontier) — 262K context +- `alibaba-coding-plan/glm-5` (Frontier) — Zhipu GLM-5, 202K context +- `alibaba-coding-plan/glm-4.7` (Smart) — Zhipu GLM-4.7, 202K context +- `alibaba-coding-plan/kimi-k2.5` (Smart) — Moonshot Kimi, 262K context +- `alibaba-coding-plan/minimax-m2.5` (Smart) — 204K context + +**Setup:** +1. Go to [Alibaba Cloud Model Studio Coding Plan](https://modelstudio.console.alibabacloud.com/ap-southeast-1/?tab=globalset#/efm/coding_plan) +2. Subscribe to the Pro plan ($50/month) +3. Copy your plan-specific API key (format: `sk-sp-xxxxx`) +4. `export ALIBABA_CODING_PLAN_API_KEY="sk-sp-..."` + +**Important Notes:** +- **Subscription-based pricing**: $50/month Pro plan with quota limits (90,000 requests/month, 45,000/week, 6,000 per 5 hours). Per-token costs are $0 — actual cost is the fixed monthly fee. +- **API Key format**: Must use plan-specific key starting with `sk-sp-`. Regular Model Studio keys (`sk-xxxxx`) will not work. +- **Base URL**: Must use the Coding Plan endpoint (`coding-intl.dashscope.aliyuncs.com`). The general Model Studio URL will fail. +- **Multi-provider access**: Single subscription provides access to models from Qwen (Alibaba), GLM (Zhipu), Kimi (Moonshot), and MiniMax. +- **Vision**: No vision support — this is a coding-specialized endpoint. + +--- + ## Model Catalog -The complete catalog of all 51 builtin models, sorted by provider. Pricing is per million tokens. +The complete catalog of all 61 builtin models, sorted by provider. Pricing is per million tokens. | # | Model ID | Display Name | Provider | Tier | Context Window | Max Output | Input $/M | Output $/M | Tools | Vision | |---|----------|-------------|----------|------|---------------|------------|-----------|------------|-------|--------| @@ -608,6 +646,16 @@ The complete catalog of all 51 builtin models, sorted by provider. Pricing is pe | 51 | `grok-2-mini` | Grok 2 Mini | xai | Fast | 131,072 | 32,768 | $0.30 | $0.50 | Yes | No | | 52 | `hf/meta-llama/Llama-3.3-70B-Instruct` | Llama 3.3 70B (HF) | huggingface | Balanced | 128,000 | 4,096 | $0.30 | $0.30 | No | No | | 53 | `replicate/meta-llama-3.3-70b-instruct` | Llama 3.3 70B (Replicate) | replicate | Balanced | 128,000 | 4,096 | $0.40 | $0.40 | No | No | +| 54 | `alibaba-coding-plan/qwen3.5-plus` | Qwen 3.5 Plus (Coding Plan) | alibaba_coding_plan | Smart | 1,000,000 | 65,536 | $0.00 | $0.00 | Yes | No | +| 55 | `alibaba-coding-plan/qwen3-max-2026-01-23` | Qwen 3 Max 2026-01-23 (Coding Plan) | alibaba_coding_plan | Frontier | 262,144 | 65,536 | $0.00 | $0.00 | Yes | No | +| 56 | `alibaba-coding-plan/qwen3-coder-plus` | Qwen 3 Coder Plus (Coding Plan) | alibaba_coding_plan | Smart | 1,000,000 | 65,536 | $0.00 | $0.00 | Yes | No | +| 57 | `alibaba-coding-plan/qwen3-coder-next` | Qwen 3 Coder Next (Coding Plan) | alibaba_coding_plan | Frontier | 262,144 | 65,536 | $0.00 | $0.00 | Yes | No | +| 58 | `alibaba-coding-plan/glm-5` | GLM-5 (Coding Plan) | alibaba_coding_plan | Frontier | 202,752 | 32,768 | $0.00 | $0.00 | Yes | No | +| 59 | `alibaba-coding-plan/glm-4.7` | GLM-4.7 (Coding Plan) | alibaba_coding_plan | Smart | 202,752 | 32,768 | $0.00 | $0.00 | Yes | No | +| 60 | `alibaba-coding-plan/kimi-k2.5` | Kimi K2.5 (Coding Plan) | alibaba_coding_plan | Smart | 262,144 | 32,768 | $0.00 | $0.00 | Yes | No | +| 61 | `alibaba-coding-plan/minimax-m2.5` | MiniMax M2.5 (Coding Plan) | alibaba_coding_plan | Smart | 204,800 | 32,768 | $0.00 | $0.00 | Yes | No | + +\* Provider ID uses underscores; model IDs use hyphens. Both forms are accepted by the driver. **Model Tiers:** @@ -621,13 +669,13 @@ The complete catalog of all 51 builtin models, sorted by provider. Pricing is pe **Notes:** - Local providers (Ollama, vLLM, LM Studio) auto-discover models at runtime. Any model you download and serve will be merged into the catalog with `Local` tier and zero cost. -- The 46 entries above are the builtin models. The total of 51 referenced in the catalog includes runtime auto-discovered models that vary per installation. +- The 61 entries above are the builtin models. The total may vary per installation due to runtime auto-discovered models from local providers. --- ## Model Aliases -All 23 aliases resolve to canonical model IDs. Aliases are case-insensitive. +All 25 aliases resolve to canonical model IDs. Aliases are case-insensitive. | Alias | Resolves To | |-------|------------| @@ -654,6 +702,8 @@ All 23 aliases resolve to canonical model IDs. Aliases are case-insensitive. | `sonar` | `sonar-pro` | | `jamba` | `jamba-1.5-large` | | `command-r` | `command-r-plus` | +| `alibaba-coding-plan` | `alibaba-coding-plan/qwen3.5-plus` | +| `qwen3-coder` | `alibaba-coding-plan/qwen3-coder-plus` | You can use aliases anywhere a model ID is accepted: in config files, REST API calls, chat commands, and the model routing configuration. @@ -778,6 +828,7 @@ The `MeteringEngine` first checks the **model catalog** for exact pricing. If th | `*replicate*` | $0.40 | $0.40 | | `*llama*` / `*mixtral*` | $0.05 | $0.10 | | `*qwen*` | $0.20 | $0.60 | +| `*alibaba-coding-plan*` | $0.00 | $0.00 **(subscription)** | | `mistral-large*` | $2.00 | $6.00 | | `*mistral*` (other) | $0.10 | $0.30 | | `command-r-plus` | $2.50 | $10.00 | @@ -903,7 +954,7 @@ Returns a map of all alias-to-canonical-ID mappings. GET /api/providers ``` -Returns all 20 providers with auth status and model counts. +Returns all 21 providers with auth status and model counts. **Response:** ```json @@ -998,7 +1049,7 @@ Local: ### `/providers` -Lists all 20 providers with their authentication status. +Lists all 21 providers with their authentication status. ``` /providers @@ -1006,7 +1057,7 @@ Lists all 20 providers with their authentication status. Example output: ``` -LLM Providers (20): +LLM Providers (21): Anthropic ANTHROPIC_API_KEY Configured 3 models OpenAI OPENAI_API_KEY Missing 6 models @@ -1047,6 +1098,7 @@ Quick reference for all provider environment variables: | Hugging Face | `HF_API_KEY` | Yes | | xAI | `XAI_API_KEY` | Yes | | Replicate | `REPLICATE_API_TOKEN` | Yes | +| Alibaba Coding Plan (Intl) | `ALIBABA_CODING_PLAN_API_KEY` | Yes | ---