diff --git a/crates/openfang-kernel/src/metering.rs b/crates/openfang-kernel/src/metering.rs index e34dff4e9..79a0f6f96 100644 --- a/crates/openfang-kernel/src/metering.rs +++ b/crates/openfang-kernel/src/metering.rs @@ -374,6 +374,14 @@ fn estimate_cost_rates(model: &str) -> (f64, f64) { if model.contains("llama") || model.contains("mixtral") { return (0.05, 0.10); } + // ── Alibaba Coding Plan (subscription-based: $50/month) ───── + // Must come BEFORE individual provider checks (qwen, glm, kimi, minimax) + // because model IDs like "alibaba-coding-plan/qwen3.5-plus" contain "qwen". + // All models in the Coding Plan use flat-rate subscription pricing. + // Per-token costs are $0 — actual cost is the fixed monthly fee. + if model.contains("alibaba-coding-plan") || model.contains("alibaba_coding_plan") { + return (0.0, 0.0); + } // ── Qwen (Alibaba) ────────────────────────────────────────── if model.contains("qwen-max") { return (4.00, 12.00); @@ -751,6 +759,32 @@ mod tests { assert!((cost - 0.12).abs() < 0.01); // $0.06 + $0.06 } + #[test] + fn test_estimate_cost_alibaba_coding_plan_subscription() { + // Alibaba Coding Plan is subscription-based ($50/month) — per-token cost is $0 + let qwen35_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/qwen3.5-plus", + 1_000_000, + 1_000_000, + ); + assert!((qwen35_cost).abs() < 0.001, "Qwen 3.5 Plus should be $0 (subscription)"); + + let glm5_cost = + MeteringEngine::estimate_cost("alibaba-coding-plan/glm-5", 1_000_000, 1_000_000); + assert!((glm5_cost).abs() < 0.001, "GLM-5 should be $0 (subscription)"); + + let kimi_cost = + MeteringEngine::estimate_cost("alibaba-coding-plan/kimi-k2.5", 1_000_000, 1_000_000); + assert!((kimi_cost).abs() < 0.001, "Kimi K2.5 should be $0 (subscription)"); + + let minimax_cost = MeteringEngine::estimate_cost( + "alibaba-coding-plan/MiniMax-M2.5", + 1_000_000, + 1_000_000, + ); + assert!((minimax_cost).abs() < 0.001, "MiniMax M2.5 should be $0 (subscription)"); + } + #[test] fn test_estimate_cost_with_catalog() { let catalog = openfang_runtime::model_catalog::ModelCatalog::new(); diff --git a/crates/openfang-runtime/src/drivers/mod.rs b/crates/openfang-runtime/src/drivers/mod.rs index 2df8923d4..30242ed6e 100644 --- a/crates/openfang-runtime/src/drivers/mod.rs +++ b/crates/openfang-runtime/src/drivers/mod.rs @@ -15,14 +15,15 @@ pub mod vertex; use crate::llm_driver::{DriverConfig, LlmDriver, LlmError}; use openfang_types::model_catalog::{ - AI21_BASE_URL, ANTHROPIC_BASE_URL, AZURE_OPENAI_BASE_URL, CEREBRAS_BASE_URL, CHUTES_BASE_URL, - COHERE_BASE_URL, DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, GEMINI_BASE_URL, GROQ_BASE_URL, - HUGGINGFACE_BASE_URL, KIMI_CODING_BASE_URL, LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, - MINIMAX_BASE_URL, MISTRAL_BASE_URL, MOONSHOT_BASE_URL, NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, - OPENAI_BASE_URL, OPENROUTER_BASE_URL, PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, - REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, - VOLCENGINE_BASE_URL, VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, - ZAI_CODING_BASE_URL, ZHIPU_BASE_URL, ZHIPU_CODING_BASE_URL, + AI21_BASE_URL, ALIBABA_CODING_PLAN_BASE_URL, ANTHROPIC_BASE_URL, AZURE_OPENAI_BASE_URL, + CEREBRAS_BASE_URL, CHUTES_BASE_URL, COHERE_BASE_URL, DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, + GEMINI_BASE_URL, GROQ_BASE_URL, HUGGINGFACE_BASE_URL, KIMI_CODING_BASE_URL, + LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, MINIMAX_BASE_URL, MISTRAL_BASE_URL, MOONSHOT_BASE_URL, + NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, OPENAI_BASE_URL, OPENROUTER_BASE_URL, + PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, + TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, VOLCENGINE_BASE_URL, + VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, ZAI_CODING_BASE_URL, + ZHIPU_BASE_URL, ZHIPU_CODING_BASE_URL, }; use std::sync::Arc; @@ -207,6 +208,11 @@ fn provider_defaults(provider: &str) -> Option { api_key_env: "VOLCENGINE_API_KEY", key_required: true, }), + "alibaba_coding_plan" => Some(ProviderDefaults { + base_url: ALIBABA_CODING_PLAN_BASE_URL, + api_key_env: "ALIBABA_CODING_PLAN_API_KEY", + key_required: true, + }), "chutes" => Some(ProviderDefaults { base_url: CHUTES_BASE_URL, api_key_env: "CHUTES_API_KEY", @@ -591,6 +597,7 @@ pub fn known_providers() -> &'static [&'static str] { "claude-code", "qwen-code", "azure", + "alibaba_coding_plan", ] } @@ -695,7 +702,9 @@ mod tests { assert!(providers.contains(&"claude-code")); assert!(providers.contains(&"qwen-code")); assert!(providers.contains(&"azure")); - assert_eq!(providers.len(), 37); + // Alibaba Coding Plan + assert!(providers.contains(&"alibaba_coding_plan")); + assert_eq!(providers.len(), 38); } #[test] @@ -768,6 +777,84 @@ mod tests { assert!(driver.is_err()); } + #[test] + fn test_alibaba_coding_plan_defaults() { + let d = provider_defaults("alibaba_coding_plan").unwrap(); + assert_eq!(d.base_url, "https://coding-intl.dashscope.aliyuncs.com/v1"); + assert_eq!(d.api_key_env, "ALIBABA_CODING_PLAN_API_KEY"); + assert!(d.key_required); + } + + #[test] + fn test_alibaba_coding_plan_driver_with_key() { + // Alibaba Coding Plan requires API key + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: Some("sk-sp-test-key-12345".to_string()), + base_url: None, + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!( + driver.is_ok(), + "Alibaba Coding Plan with API key should succeed" + ); + } + + #[test] + fn test_alibaba_coding_plan_driver_from_env() { + // Test that API key can be loaded from environment variable + let unique_key = "sk-sp-test-env-key-67890"; + std::env::set_var("ALIBABA_CODING_PLAN_API_KEY", unique_key); + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: None, + base_url: None, + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!( + driver.is_ok(), + "Alibaba Coding Plan with env var should succeed" + ); + std::env::remove_var("ALIBABA_CODING_PLAN_API_KEY"); + } + + #[test] + fn test_alibaba_coding_plan_no_key_errors() { + // Alibaba Coding Plan with no API key should error + // Clear env var first to avoid pollution from parallel tests + std::env::remove_var("ALIBABA_CODING_PLAN_API_KEY"); + + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: None, + base_url: None, + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!(driver.is_err()); + let err = driver.err().unwrap().to_string(); + assert!( + err.contains("ALIBABA_CODING_PLAN_API_KEY"), + "Error should mention ALIBABA_CODING_PLAN_API_KEY: {}", + err + ); + } + + #[test] + fn test_alibaba_coding_plan_custom_base_url() { + // Test custom base URL override + let config = DriverConfig { + provider: "alibaba_coding_plan".to_string(), + api_key: Some("sk-sp-test-key".to_string()), + base_url: Some("https://custom-endpoint.example.com/v1".to_string()), + skip_permissions: true, + }; + let driver = create_driver(&config); + assert!(driver.is_ok()); + } + #[test] fn test_custom_provider_key_no_url_helpful_error() { // Custom provider with key set (via env) but no base_url should give helpful error. diff --git a/crates/openfang-runtime/src/model_catalog.rs b/crates/openfang-runtime/src/model_catalog.rs index 62b81c74e..15a7d4111 100644 --- a/crates/openfang-runtime/src/model_catalog.rs +++ b/crates/openfang-runtime/src/model_catalog.rs @@ -4,15 +4,16 @@ //! with alias resolution, auth status detection, and pricing lookups. use openfang_types::model_catalog::{ - AuthStatus, ModelCatalogEntry, ModelTier, ProviderInfo, AI21_BASE_URL, ANTHROPIC_BASE_URL, - AZURE_OPENAI_BASE_URL, BEDROCK_BASE_URL, CEREBRAS_BASE_URL, CHUTES_BASE_URL, COHERE_BASE_URL, - DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, GEMINI_BASE_URL, GITHUB_COPILOT_BASE_URL, GROQ_BASE_URL, - HUGGINGFACE_BASE_URL, KIMI_CODING_BASE_URL, LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, - MINIMAX_BASE_URL, MISTRAL_BASE_URL, MOONSHOT_BASE_URL, NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, - OPENAI_BASE_URL, OPENROUTER_BASE_URL, PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, - REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, - VOLCENGINE_BASE_URL, VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, - ZAI_CODING_BASE_URL, ZHIPU_BASE_URL, ZHIPU_CODING_BASE_URL, + AuthStatus, ModelCatalogEntry, ModelTier, ProviderInfo, AI21_BASE_URL, + ALIBABA_CODING_PLAN_BASE_URL, ANTHROPIC_BASE_URL, AZURE_OPENAI_BASE_URL, BEDROCK_BASE_URL, + CEREBRAS_BASE_URL, CHUTES_BASE_URL, COHERE_BASE_URL, DEEPSEEK_BASE_URL, FIREWORKS_BASE_URL, + GEMINI_BASE_URL, GITHUB_COPILOT_BASE_URL, GROQ_BASE_URL, HUGGINGFACE_BASE_URL, + KIMI_CODING_BASE_URL, LEMONADE_BASE_URL, LMSTUDIO_BASE_URL, MINIMAX_BASE_URL, MISTRAL_BASE_URL, + MOONSHOT_BASE_URL, NVIDIA_NIM_BASE_URL, OLLAMA_BASE_URL, OPENAI_BASE_URL, OPENROUTER_BASE_URL, + PERPLEXITY_BASE_URL, QIANFAN_BASE_URL, QWEN_BASE_URL, REPLICATE_BASE_URL, SAMBANOVA_BASE_URL, + TOGETHER_BASE_URL, VENICE_BASE_URL, VLLM_BASE_URL, VOLCENGINE_BASE_URL, + VOLCENGINE_CODING_BASE_URL, XAI_BASE_URL, ZAI_BASE_URL, ZAI_CODING_BASE_URL, ZHIPU_BASE_URL, + ZHIPU_CODING_BASE_URL, }; use std::collections::HashMap; @@ -765,7 +766,7 @@ fn builtin_providers() -> Vec { auth_status: AuthStatus::Missing, model_count: 0, }, - // ── Chinese providers (5) ──────────────────────────────────── + // ── Chinese providers (6) ──────────────────────────────────── ProviderInfo { id: "qwen".into(), display_name: "Qwen (Alibaba)".into(), @@ -775,6 +776,15 @@ fn builtin_providers() -> Vec { auth_status: AuthStatus::Missing, model_count: 0, }, + ProviderInfo { + id: "alibaba_coding_plan".into(), + display_name: "Alibaba Coding Plan (Intl)".into(), + api_key_env: "ALIBABA_CODING_PLAN_API_KEY".into(), + base_url: ALIBABA_CODING_PLAN_BASE_URL.into(), + key_required: true, + auth_status: AuthStatus::Missing, + model_count: 0, + }, ProviderInfo { id: "minimax".into(), display_name: "MiniMax".into(), @@ -979,6 +989,8 @@ fn builtin_aliases() -> HashMap { ("minimax-highspeed", "MiniMax-M2.5-highspeed"), ("minimax-m2.1", "MiniMax-M2.1"), ("codegeex", "codegeex-4"), + // Alibaba Coding Plan aliases + ("alibaba-coding-plan", "alibaba-coding-plan/qwen3.5-plus"), // Codex aliases ("codex", "codex/gpt-5.4"), ("codex-5.4", "codex/gpt-5.4"), @@ -3193,6 +3205,131 @@ fn builtin_models() -> Vec { aliases: vec!["abab7".into()], }, // ══════════════════════════════════════════════════════════════ + // Alibaba Coding Plan International (8) + // All pricing set to $0 — actual cost is fixed monthly subscription. + // Provides multi-provider access: Qwen, Zhipu GLM, Moonshot Kimi, MiniMax. + // See: https://www.alibabacloud.com/help/en/model-studio/coding-plan + // ══════════════════════════════════════════════════════════════ + // ── Qwen models (4) ────────────────────────────────────────── + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3.5-plus".into(), + display_name: "Qwen 3.5 Plus (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 1_000_000, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: true, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3-max-2026-01-23".into(), + display_name: "Qwen 3 Max 2026-01-23 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Frontier, + context_window: 262_144, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3-coder-plus".into(), + display_name: "Qwen 3 Coder Plus (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 1_000_000, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec!["qwen3-coder".into()], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/qwen3-coder-next".into(), + display_name: "Qwen 3 Coder Next (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Frontier, + context_window: 262_144, + max_output_tokens: 65_536, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ── Zhipu / GLM via Coding Plan (2) ───────────────────────── + // API receives "glm-5" / "glm-4.7" after prefix stripping + ModelCatalogEntry { + id: "alibaba-coding-plan/glm-5".into(), + display_name: "GLM-5 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Frontier, + context_window: 202_752, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + ModelCatalogEntry { + id: "alibaba-coding-plan/glm-4.7".into(), + display_name: "GLM-4.7 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 202_752, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ── Moonshot / Kimi via Coding Plan (1) ───────────────────── + // API receives "kimi-k2.5" after prefix stripping + ModelCatalogEntry { + id: "alibaba-coding-plan/kimi-k2.5".into(), + display_name: "Kimi K2.5 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 262_144, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: true, + supports_streaming: true, + aliases: vec![], + }, + // ── MiniMax via Coding Plan (1) ────────────────────────────── + // API receives "MiniMax-M2.5" after prefix stripping + ModelCatalogEntry { + id: "alibaba-coding-plan/MiniMax-M2.5".into(), + display_name: "MiniMax M2.5 (Alibaba Coding Plan)".into(), + provider: "alibaba_coding_plan".into(), + tier: ModelTier::Smart, + context_window: 204_800, + max_output_tokens: 32_768, + input_cost_per_m: 0.0, + output_cost_per_m: 0.0, + supports_tools: true, + supports_vision: false, + supports_streaming: true, + aliases: vec![], + }, + // ══════════════════════════════════════════════════════════════ // Zhipu AI / GLM (6) // ══════════════════════════════════════════════════════════════ ModelCatalogEntry { @@ -3905,7 +4042,7 @@ mod tests { #[test] fn test_catalog_has_providers() { let catalog = ModelCatalog::new(); - assert_eq!(catalog.list_providers().len(), 41); + assert_eq!(catalog.list_providers().len(), 42); } #[test] @@ -4137,6 +4274,10 @@ mod tests { assert!(catalog.get_provider("moonshot").is_some()); assert!(catalog.get_provider("qianfan").is_some()); assert!(catalog.get_provider("bedrock").is_some()); + // Alibaba Coding Plan provider + assert!(catalog.get_provider("alibaba_coding_plan").is_some()); + let provider = catalog.get_provider("alibaba_coding_plan").unwrap(); + assert_eq!(provider.api_key_env, "ALIBABA_CODING_PLAN_API_KEY"); } #[test] @@ -4176,6 +4317,95 @@ mod tests { assert!(abab7.supports_vision); } + #[test] + fn test_alibaba_coding_plan_models() { + let catalog = ModelCatalog::new(); + + // Test all 8 models are present + let models = catalog.models_by_provider("alibaba_coding_plan"); + assert_eq!(models.len(), 8); + + // Test qwen3.5-plus — flagship model with 1M context and vision + let qwen35 = catalog + .find_model("alibaba-coding-plan/qwen3.5-plus") + .unwrap(); + assert_eq!(qwen35.display_name, "Qwen 3.5 Plus (Alibaba Coding Plan)"); + assert_eq!(qwen35.tier, ModelTier::Smart); + assert_eq!(qwen35.context_window, 1_000_000); + assert_eq!(qwen35.max_output_tokens, 65_536); + assert!(qwen35.supports_tools); + assert!(qwen35.supports_vision); + assert!(qwen35.supports_streaming); + // Subscription-based pricing + assert!((qwen35.input_cost_per_m).abs() < f64::EPSILON); + assert!((qwen35.output_cost_per_m).abs() < f64::EPSILON); + + // Test qwen3-max-2026-01-23 — frontier model + let qwen3max = catalog + .find_model("alibaba-coding-plan/qwen3-max-2026-01-23") + .unwrap(); + assert_eq!(qwen3max.tier, ModelTier::Frontier); + assert_eq!(qwen3max.context_window, 262_144); + assert!(!qwen3max.supports_vision); + assert!(qwen3max.supports_tools); + + // Test qwen3-coder-plus — coding model with 1M context + let qwen3coder = catalog + .find_model("alibaba-coding-plan/qwen3-coder-plus") + .unwrap(); + assert_eq!(qwen3coder.context_window, 1_000_000); + assert!(!qwen3coder.supports_vision); + assert!(qwen3coder.supports_tools); + + // Test qwen3-coder-alias + let qwen3coder_alias = catalog.find_model("qwen3-coder").unwrap(); + assert_eq!(qwen3coder_alias.id, "alibaba-coding-plan/qwen3-coder-plus"); + + // Test glm-5 — Zhipu model via Coding Plan + let glm5 = catalog.find_model("alibaba-coding-plan/glm-5").unwrap(); + assert_eq!(glm5.display_name, "GLM-5 (Alibaba Coding Plan)"); + assert_eq!(glm5.tier, ModelTier::Frontier); + assert_eq!(glm5.context_window, 202_752); + assert_eq!(glm5.max_output_tokens, 32_768); + assert!(glm5.supports_tools); + assert!(!glm5.supports_vision); + + // Test glm-4.7 + let glm47 = catalog.find_model("alibaba-coding-plan/glm-4.7").unwrap(); + assert_eq!(glm47.tier, ModelTier::Smart); + assert_eq!(glm47.context_window, 202_752); + + // Test kimi-k2.5 — Moonshot model with vision support + let kimi = catalog.find_model("alibaba-coding-plan/kimi-k2.5").unwrap(); + assert_eq!(kimi.display_name, "Kimi K2.5 (Alibaba Coding Plan)"); + assert_eq!(kimi.context_window, 262_144); + assert!(kimi.supports_vision); + assert!(kimi.supports_tools); + + // Test MiniMax-M2.5 via Coding Plan + let minimax = catalog + .find_model("alibaba-coding-plan/MiniMax-M2.5") + .unwrap(); + assert_eq!(minimax.display_name, "MiniMax M2.5 (Alibaba Coding Plan)"); + assert_eq!(minimax.context_window, 204_800); + assert_eq!(minimax.max_output_tokens, 32_768); + assert!(minimax.supports_tools); + assert!(!minimax.supports_vision); + } + + #[test] + fn test_alibaba_coding_plan_aliases() { + let catalog = ModelCatalog::new(); + + // Test alibaba-coding-plan alias resolves to qwen3.5-plus + let alias1 = catalog.find_model("alibaba-coding-plan").unwrap(); + assert_eq!(alias1.id, "alibaba-coding-plan/qwen3.5-plus"); + + // Test case-insensitive alias resolution + let alias_lower = catalog.find_model("Alibaba-Coding-Plan").unwrap(); + assert_eq!(alias_lower.id, "alibaba-coding-plan/qwen3.5-plus"); + } + #[test] fn test_bedrock_models() { let catalog = ModelCatalog::new(); diff --git a/crates/openfang-types/src/model_catalog.rs b/crates/openfang-types/src/model_catalog.rs index a7d2627ca..ad4430c81 100644 --- a/crates/openfang-types/src/model_catalog.rs +++ b/crates/openfang-types/src/model_catalog.rs @@ -49,6 +49,9 @@ pub const KIMI_CODING_BASE_URL: &str = "https://api.kimi.com/coding"; pub const QIANFAN_BASE_URL: &str = "https://qianfan.baidubce.com/v2"; pub const VOLCENGINE_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/v3"; pub const VOLCENGINE_CODING_BASE_URL: &str = "https://ark.cn-beijing.volces.com/api/coding/v3"; +// Alibaba Cloud Coding Plan International — subscription-based +// See: https://www.alibabacloud.com/help/en/model-studio/coding-plan +pub const ALIBABA_CODING_PLAN_BASE_URL: &str = "https://coding-intl.dashscope.aliyuncs.com/v1"; // ── Chutes.ai ──────────────────────────────────────────────────── pub const CHUTES_BASE_URL: &str = "https://llm.chutes.ai/v1"; diff --git a/docs/providers.md b/docs/providers.md index 9b3ccb59f..ccdf9e6eb 100644 --- a/docs/providers.md +++ b/docs/providers.md @@ -1,6 +1,6 @@ # LLM Providers Guide -OpenFang ships with a comprehensive model catalog covering **3 native LLM drivers**, **20 providers**, **51 builtin models**, and **23 aliases**. Every provider uses one of three battle-tested drivers: the native **Anthropic** driver, the native **Gemini** driver, or the universal **OpenAI-compatible** driver. This guide is the single source of truth for configuring, selecting, and managing LLM providers in OpenFang. +OpenFang ships with a comprehensive model catalog covering **3 native LLM drivers**, **21 providers**, **61 builtin models**, and **24 aliases**. Every provider uses one of three battle-tested drivers: the native **Anthropic** driver, the native **Gemini** driver, or the universal **OpenAI-compatible** driver. This guide is the single source of truth for configuring, selecting, and managing LLM providers in OpenFang. --- @@ -549,9 +549,47 @@ For Gemini specifically, either `GEMINI_API_KEY` or `GOOGLE_API_KEY` will work. --- +### 21. Alibaba DashScope Coding Plan (International) + +| | | +|---|---| +| **Display Name** | DashScope Coding Plan (Intl) | +| **Driver** | OpenAI-compatible | +| **Env Var** | `ALIBABA_CODING_PLAN_API_KEY` | +| **Base URL** | `https://coding-intl.dashscope.aliyuncs.com/v1` | +| **Key Required** | Yes | +| **Free Tier** | No (subscription-based) | +| **Auth** | `Authorization: Bearer` header | +| **Models** | 8 | + +**Available Models:** +- `alibaba-coding-plan/qwen3.5-plus` (Smart) — 1M context, vision ✅ +- `alibaba-coding-plan/qwen3-max-2026-01-23` (Frontier) — 262K context +- `alibaba-coding-plan/qwen3-coder-plus` (Smart) — 1M context, coding optimized +- `alibaba-coding-plan/qwen3-coder-next` (Frontier) — 262K context +- `alibaba-coding-plan/glm-5` (Frontier) — Zhipu GLM-5, 202K context +- `alibaba-coding-plan/glm-4.7` (Smart) — Zhipu GLM-4.7, 202K context +- `alibaba-coding-plan/kimi-k2.5` (Smart) — Moonshot Kimi, 262K context, vision ✅ +- `alibaba-coding-plan/MiniMax-M2.5` (Smart) — 204K context + +**Setup:** +1. Go to [Alibaba Cloud Model Studio Coding Plan](https://modelstudio.console.alibabacloud.com/ap-southeast-1/?tab=globalset#/efm/coding_plan) +2. Subscribe to the Pro plan ($50/month) +3. Copy your plan-specific API key (format: `sk-sp-xxxxx`) +4. `export ALIBABA_CODING_PLAN_API_KEY="sk-sp-..."` + +**Important Notes:** +- **Subscription-based pricing**: $50/month Pro plan with quota limits (90,000 requests/month, 45,000/week, 6,000 per 5 hours). Per-token costs are $0 — actual cost is the fixed monthly fee. +- **API Key format**: Must use plan-specific key starting with `sk-sp-`. Regular Model Studio keys (`sk-xxxxx`) will not work. +- **Base URL**: Must use the Coding Plan endpoint (`coding-intl.dashscope.aliyuncs.com`). The general Model Studio URL will fail. +- **Multi-provider access**: Single subscription provides access to models from Qwen (Alibaba), GLM (Zhipu), Kimi (Moonshot), and MiniMax. +- **Vision support**: qwen3.5-plus and kimi-k2.5 support image understanding. + +--- + ## Model Catalog -The complete catalog of all 51 builtin models, sorted by provider. Pricing is per million tokens. +The complete catalog of all 61 builtin models, sorted by provider. Pricing is per million tokens. | # | Model ID | Display Name | Provider | Tier | Context Window | Max Output | Input $/M | Output $/M | Tools | Vision | |---|----------|-------------|----------|------|---------------|------------|-----------|------------|-------|--------| @@ -608,6 +646,14 @@ The complete catalog of all 51 builtin models, sorted by provider. Pricing is pe | 51 | `grok-2-mini` | Grok 2 Mini | xai | Fast | 131,072 | 32,768 | $0.30 | $0.50 | Yes | No | | 52 | `hf/meta-llama/Llama-3.3-70B-Instruct` | Llama 3.3 70B (HF) | huggingface | Balanced | 128,000 | 4,096 | $0.30 | $0.30 | No | No | | 53 | `replicate/meta-llama-3.3-70b-instruct` | Llama 3.3 70B (Replicate) | replicate | Balanced | 128,000 | 4,096 | $0.40 | $0.40 | No | No | +| 54 | `alibaba-coding-plan/qwen3.5-plus` | Qwen 3.5 Plus (Coding Plan) | alibaba_coding_plan | Smart | 1,000,000 | 65,536 | $0.00 | $0.00 | Yes | Yes | +| 55 | `alibaba-coding-plan/qwen3-max-2026-01-23` | Qwen 3 Max 2026-01-23 (Coding Plan) | alibaba_coding_plan | Frontier | 262,144 | 65,536 | $0.00 | $0.00 | Yes | No | +| 56 | `alibaba-coding-plan/qwen3-coder-plus` | Qwen 3 Coder Plus (Coding Plan) | alibaba_coding_plan | Smart | 1,000,000 | 65,536 | $0.00 | $0.00 | Yes | No | +| 57 | `alibaba-coding-plan/qwen3-coder-next` | Qwen 3 Coder Next (Coding Plan) | alibaba_coding_plan | Frontier | 262,144 | 65,536 | $0.00 | $0.00 | Yes | No | +| 58 | `alibaba-coding-plan/glm-5` | GLM-5 (Coding Plan) | alibaba_coding_plan | Frontier | 202,752 | 32,768 | $0.00 | $0.00 | Yes | No | +| 59 | `alibaba-coding-plan/glm-4.7` | GLM-4.7 (Coding Plan) | alibaba_coding_plan | Smart | 202,752 | 32,768 | $0.00 | $0.00 | Yes | No | +| 60 | `alibaba-coding-plan/kimi-k2.5` | Kimi K2.5 (Coding Plan) | alibaba_coding_plan | Smart | 262,144 | 32,768 | $0.00 | $0.00 | Yes | Yes | +| 61 | `alibaba-coding-plan/MiniMax-M2.5` | MiniMax M2.5 (Coding Plan) | alibaba_coding_plan | Smart | 204,800 | 32,768 | $0.00 | $0.00 | Yes | No | **Model Tiers:** @@ -621,13 +667,13 @@ The complete catalog of all 51 builtin models, sorted by provider. Pricing is pe **Notes:** - Local providers (Ollama, vLLM, LM Studio) auto-discover models at runtime. Any model you download and serve will be merged into the catalog with `Local` tier and zero cost. -- The 46 entries above are the builtin models. The total of 51 referenced in the catalog includes runtime auto-discovered models that vary per installation. +- The 61 entries above are the builtin models. The total may vary per installation due to runtime auto-discovered models from local providers. --- ## Model Aliases -All 23 aliases resolve to canonical model IDs. Aliases are case-insensitive. +All 24 aliases resolve to canonical model IDs. Aliases are case-insensitive. | Alias | Resolves To | |-------|------------| @@ -654,6 +700,7 @@ All 23 aliases resolve to canonical model IDs. Aliases are case-insensitive. | `sonar` | `sonar-pro` | | `jamba` | `jamba-1.5-large` | | `command-r` | `command-r-plus` | +| `alibaba-coding-plan` | `alibaba-coding-plan/qwen3.5-plus` | You can use aliases anywhere a model ID is accepted: in config files, REST API calls, chat commands, and the model routing configuration. @@ -778,6 +825,7 @@ The `MeteringEngine` first checks the **model catalog** for exact pricing. If th | `*replicate*` | $0.40 | $0.40 | | `*llama*` / `*mixtral*` | $0.05 | $0.10 | | `*qwen*` | $0.20 | $0.60 | +| `*alibaba_coding_plan*` | $0.00 | $0.00 **(subscription)** | | `mistral-large*` | $2.00 | $6.00 | | `*mistral*` (other) | $0.10 | $0.30 | | `command-r-plus` | $2.50 | $10.00 | @@ -903,7 +951,7 @@ Returns a map of all alias-to-canonical-ID mappings. GET /api/providers ``` -Returns all 20 providers with auth status and model counts. +Returns all 21 providers with auth status and model counts. **Response:** ```json @@ -998,7 +1046,7 @@ Local: ### `/providers` -Lists all 20 providers with their authentication status. +Lists all 21 providers with their authentication status. ``` /providers @@ -1006,7 +1054,7 @@ Lists all 20 providers with their authentication status. Example output: ``` -LLM Providers (20): +LLM Providers (21): Anthropic ANTHROPIC_API_KEY Configured 3 models OpenAI OPENAI_API_KEY Missing 6 models @@ -1047,6 +1095,7 @@ Quick reference for all provider environment variables: | Hugging Face | `HF_API_KEY` | Yes | | xAI | `XAI_API_KEY` | Yes | | Replicate | `REPLICATE_API_TOKEN` | Yes | +| Alibaba DashScope Coding Plan | `ALIBABA_CODING_PLAN_API_KEY` | Yes | ---