From 1e2617aeedaf8849de8dbbe8a0b436c12aacfb48 Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Sun, 28 Sep 2025 05:17:28 +0800 Subject: [PATCH 01/10] Add LiteLLM OpenRouter configuration files and Docker setup - Created .env.openrouter for environment variables including API keys and database URL. - Added docker-compose.openrouter.yml to define services for LiteLLM OpenRouter and PostgreSQL database. - Introduced openrouter_config.yaml with model configurations and general settings for LiteLLM. --- infra/litellm/chutesai/.env.chutes | 8 + infra/litellm/chutesai/textModelsList.txt | 1796 +++++++++++++++++ infra/litellm/openrouter/.env.openrouter | 10 + .../docker-compose.openrouter.yml} | 6 +- .../openrouter_config.yaml} | 0 5 files changed, 1817 insertions(+), 3 deletions(-) create mode 100644 infra/litellm/chutesai/.env.chutes create mode 100644 infra/litellm/chutesai/textModelsList.txt create mode 100644 infra/litellm/openrouter/.env.openrouter rename infra/litellm/{docker-compose.litellm.yml => openrouter/docker-compose.openrouter.yml} (88%) rename infra/litellm/{litellm_config.yaml => openrouter/openrouter_config.yaml} (100%) diff --git a/infra/litellm/chutesai/.env.chutes b/infra/litellm/chutesai/.env.chutes new file mode 100644 index 0000000..5f24924 --- /dev/null +++ b/infra/litellm/chutesai/.env.chutes @@ -0,0 +1,8 @@ +# LiteLLM Proxy Configuration +LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0 + +# Model Provider API Keys (used by LiteLLM) +CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry + +# Database (Postgres) +LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm \ No newline at end of file diff --git a/infra/litellm/chutesai/textModelsList.txt b/infra/litellm/chutesai/textModelsList.txt new file mode 100644 index 0000000..c2370b6 --- /dev/null +++ b/infra/litellm/chutesai/textModelsList.txt @@ -0,0 +1,1796 @@ +{ + "object": "list", + "data": [ + { + "id": "unsloth/gemma-3-12b-it", + "root": "unsloth/gemma-3-12b-it", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 96000 + }, + { + "id": "deepseek-ai/DeepSeek-R1", + "root": "deepseek-ai/DeepSeek-R1", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "NousResearch/Hermes-4-405B-FP8", + "root": "NousResearch/Hermes-4-405B-FP8", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999326, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "deepseek-ai/DeepSeek-V3-0324", + "root": "deepseek-ai/DeepSeek-V3-0324", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "deepseek-ai/DeepSeek-V3.1", + "root": "deepseek-ai/DeepSeek-V3.1", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "deepseek-ai/DeepSeek-R1-0528", + "root": "deepseek-ai/DeepSeek-R1-0528", + "price": { + "input": { + "tao": 0.0013427045157166921, + "usd": 0.4 + }, + "output": { + "tao": 0.005874332256260527, + "usd": 1.75 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.4, + "completion": 1.75 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "zai-org/GLM-4.5-Air", + "root": "zai-org/GLM-4.5-Air", + "price": { + "input": { + "tao": 0.0, + "usd": 0.0 + }, + "output": { + "tao": 0.0, + "usd": 0.0 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.0, + "completion": 0.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "NousResearch/DeepHermes-3-Llama-3-8B-Preview", + "root": "NousResearch/DeepHermes-3-Llama-3-8B-Preview", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.00016783806446458652, + "usd": 0.05 + } + }, + "object": "model", + "parent": null, + "created": 1758999329, + "pricing": { + "prompt": 0.01, + "completion": 0.05 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-85501f5e49754c08a052a755b67e9797", + "group": null, + "object": "model_permission", + "created": 1758999329, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 131072 + }, + { + "id": "unsloth/Llama-3.2-3B-Instruct", + "root": "unsloth/Llama-3.2-3B-Instruct", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.0000335676128929173, + "usd": 0.01 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.01, + "completion": 0.01 + }, + "owned_by": "sglang", + "max_model_len": 16384 + }, + { + "id": "Qwen/Qwen3-32B", + "root": "Qwen/Qwen3-32B", + "price": { + "input": { + "tao": 0.0001007028386787519, + "usd": 0.03 + }, + "output": { + "tao": 0.0004363789676079249, + "usd": 0.13 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.03, + "completion": 0.13 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "deepseek-ai/DeepSeek-V3", + "root": "deepseek-ai/DeepSeek-V3", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "zai-org/GLM-4.5-FP8", + "root": "zai-org/GLM-4.5-FP8", + "price": { + "input": { + "tao": 0.0013762721286096093, + "usd": 0.41 + }, + "output": { + "tao": 0.005538656127331354, + "usd": 1.65 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.41, + "completion": 1.65 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "Qwen/Qwen3-14B", + "root": "Qwen/Qwen3-14B", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", + "root": "NousResearch/DeepHermes-3-Mistral-24B-Preview", + "price": { + "input": { + "tao": 0.0004363789676079249, + "usd": 0.13 + }, + "output": { + "tao": 0.0017119482575387822, + "usd": 0.51 + } + }, + "object": "model", + "created": 1758999326, + "pricing": { + "prompt": 0.13, + "completion": 0.51 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "NousResearch/Hermes-4-70B", + "root": "NousResearch/Hermes-4-70B", + "price": { + "input": { + "tao": 0.0003692437418220903, + "usd": 0.11 + }, + "output": { + "tao": 0.0012755692899308574, + "usd": 0.38 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.11, + "completion": 0.38 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", + "root": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.00016783806446458652, + "usd": 0.05 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.01, + "completion": 0.05 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "root": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "price": { + "input": { + "tao": 0.0007384874836441806, + "usd": 0.22 + }, + "output": { + "tao": 0.003188923224827143, + "usd": 0.95 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.22, + "completion": 0.95 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "unsloth/Mistral-Small-24B-Instruct-2501", + "root": "unsloth/Mistral-Small-24B-Instruct-2501", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0005035141933937595, + "usd": 0.15 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.15 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "root": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "price": { + "input": { + "tao": 0.00033567612892917303, + "usd": 0.1 + }, + "output": { + "tao": 0.0013091369028237747, + "usd": 0.39 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.1, + "completion": 0.39 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "tngtech/DeepSeek-TNG-R1T2-Chimera", + "root": "tngtech/DeepSeek-TNG-R1T2-Chimera", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "tngtech/DeepSeek-R1T-Chimera", + "root": "tngtech/DeepSeek-R1T-Chimera", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "Tesslate/UIGEN-X-32B-0727", + "root": "Tesslate/UIGEN-X-32B-0727", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "Qwen/Qwen2.5-72B-Instruct", + "root": "Qwen/Qwen2.5-72B-Instruct", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0008727579352158498, + "usd": 0.26 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.07, + "completion": 0.26 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", + "root": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", + "price": { + "input": { + "tao": 0.0004699465805008422, + "usd": 0.14 + }, + "output": { + "tao": 0.0019133539348962858, + "usd": 0.57 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.14, + "completion": 0.57 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "moonshotai/Kimi-K2-Instruct-0905", + "root": "moonshotai/Kimi-K2-Instruct-0905", + "price": { + "input": { + "tao": 0.0013427045157166921, + "usd": 0.4 + }, + "output": { + "tao": 0.0075527129009063925, + "usd": 2.25 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.4, + "completion": 2.25 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "zai-org/GLM-4-32B-0414", + "root": "zai-org/GLM-4-32B-0414", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "deepseek-ai/DeepSeek-V3.1-Terminus", + "root": "deepseek-ai/DeepSeek-V3.1-Terminus", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "root": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "price": { + "input": { + "tao": 0.0001007028386787519, + "usd": 0.03 + }, + "output": { + "tao": 0.0004363789676079249, + "usd": 0.13 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.03, + "completion": 0.13 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "microsoft/MAI-DS-R1-FP8", + "root": "microsoft/MAI-DS-R1-FP8", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "unsloth/gemma-3-27b-it", + "root": "unsloth/gemma-3-27b-it", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0008727579352158498, + "usd": 0.26 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.07, + "completion": 0.26 + }, + "owned_by": "sglang", + "max_model_len": 96000 + }, + { + "id": "openai/gpt-oss-120b", + "root": "openai/gpt-oss-120b", + "price": { + "input": { + "tao": 0.00016783806446458652, + "usd": 0.05 + }, + "output": { + "tao": 0.0008391903223229325, + "usd": 0.25 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.05, + "completion": 0.25 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "OpenGVLab/InternVL3-78B", + "root": "OpenGVLab/InternVL3-78B", + "price": { + "input": { + "tao": 0.0001007028386787519, + "usd": 0.03 + }, + "output": { + "tao": 0.0004363789676079249, + "usd": 0.13 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.03, + "completion": 0.13 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "TheDrummer/Gemmasutra-Pro-27B-v1.1", + "root": "TheDrummer/Gemmasutra-Pro-27B-v1.1", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.0001007028386787519, + "usd": 0.03 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.01, + "completion": 0.03 + }, + "owned_by": "sglang", + "max_model_len": 8192 + }, + { + "id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "root": "Qwen/Qwen2.5-Coder-32B-Instruct", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "root": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "price": { + "input": { + "tao": 0.00033567612892917303, + "usd": 0.1 + }, + "output": { + "tao": 0.0013091369028237747, + "usd": 0.39 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.1, + "completion": 0.39 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "Qwen/Qwen3-235B-A22B", + "root": "Qwen/Qwen3-235B-A22B", + "price": { + "input": { + "tao": 0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "root": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0009398931610016844, + "usd": 0.28 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 0.07, + "completion": 0.28 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "TheDrummer/Tunguska-39B-v1", + "root": "TheDrummer/Tunguska-39B-v1", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0005370818062866768, + "usd": 0.16 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.04, + "completion": 0.16 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "unsloth/Mistral-Nemo-Instruct-2407", + "root": "unsloth/Mistral-Nemo-Instruct-2407", + "price": { + "input": { + "tao": 0.0000671352257858346, + "usd": 0.02 + }, + "output": { + "tao": 0.0002349732902504211, + "usd": 0.07 + } + }, + "object": "model", + "created": 1758999326, + "pricing": { + "prompt": 0.02, + "completion": 0.07 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "TheDrummer/Skyfall-36B-v2", + "root": "TheDrummer/Skyfall-36B-v2", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0005370818062866768, + "usd": 0.16 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.16 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "root": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0009398931610016844, + "usd": 0.28 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.07, + "completion": 0.28 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "root": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "price": { + "input": { + "tao": 0.00033567612892917303, + "usd": 0.1 + }, + "output": { + "tao": 0.0026854090314333843, + "usd": 0.8 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.1, + "completion": 0.8 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "chutesai/Mistral-Small-3.1-24B-Instruct-2503", + "root": "chutesai/Mistral-Small-3.1-24B-Instruct-2503", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0005035141933937595, + "usd": 0.15 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 0.04, + "completion": 0.15 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "ArliAI/QwQ-32B-ArliAI-RpR-v1", + "root": "ArliAI/QwQ-32B-ArliAI-RpR-v1", + "price": { + "input": { + "tao": 0.0000671352257858346, + "usd": 0.02 + }, + "output": { + "tao": 0.0002349732902504211, + "usd": 0.07 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.02, + "completion": 0.07 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "chutesai/Devstral-Small-2505", + "root": "chutesai/Devstral-Small-2505", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "Qwen/Qwen2.5-VL-72B-Instruct", + "root": "Qwen/Qwen2.5-VL-72B-Instruct", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0009398931610016844, + "usd": 0.28 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.07, + "completion": 0.28 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "Qwen/Qwen3-30B-A3B", + "root": "Qwen/Qwen3-30B-A3B", + "price": { + "input": { + "tao": 0.0002014056773575038, + "usd": 0.06 + }, + "output": { + "tao": 0.0007384874836441806, + "usd": 0.22 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.06, + "completion": 0.22 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "Qwen/Qwen2.5-VL-32B-Instruct", + "root": "Qwen/Qwen2.5-VL-32B-Instruct", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 16384 + }, + { + "id": "zai-org/GLM-Z1-32B-0414", + "root": "zai-org/GLM-Z1-32B-0414", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "zai-org/GLM-4.5-turbo", + "root": "zai-org/GLM-4.5-turbo", + "price": { + "input": { + "tao": 0.00335676128929173, + "usd": 1.0 + }, + "output": { + "tao": 0.01007028386787519, + "usd": 3.0 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 1.0, + "completion": 3.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "openai/gpt-oss-20b", + "root": "openai/gpt-oss-20b", + "price": { + "input": { + "tao": 0.0, + "usd": 0.0 + }, + "output": { + "tao": 0.0, + "usd": 0.0 + } + }, + "object": "model", + "created": 1758999326, + "pricing": { + "prompt": 0.0, + "completion": 0.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "unsloth/gemma-2-9b-it", + "root": "unsloth/gemma-2-9b-it", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.0000671352257858346, + "usd": 0.02 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.01, + "completion": 0.02 + }, + "owned_by": "sglang", + "max_model_len": 8192 + }, + { + "id": "cognitivecomputations/Dolphin3.0-Mistral-24B", + "root": "cognitivecomputations/Dolphin3.0-Mistral-24B", + "price": { + "input": { + "tao": 0.0001007028386787519, + "usd": 0.03 + }, + "output": { + "tao": 0.0003692437418220903, + "usd": 0.11 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.03, + "completion": 0.11 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "Qwen/Qwen3-8B", + "root": "Qwen/Qwen3-8B", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "shisa-ai/shisa-v2-llama3.3-70b", + "root": "shisa-ai/shisa-v2-llama3.3-70b", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "deepseek-ai/DeepSeek-V3.1-turbo", + "root": "deepseek-ai/DeepSeek-V3.1-turbo", + "price": { + "input": { + "tao": 0.00335676128929173, + "usd": 1.0 + }, + "output": { + "tao": 0.01007028386787519, + "usd": 3.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 1.0, + "completion": 3.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "moonshotai/Kimi-Dev-72B", + "root": "moonshotai/Kimi-Dev-72B", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0008727579352158498, + "usd": 0.26 + } + }, + "object": "model", + "created": 1758999326, + "pricing": { + "prompt": 0.07, + "completion": 0.26 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "deepseek-ai/DeepSeek-V3-0324-turbo", + "root": "deepseek-ai/DeepSeek-V3-0324-turbo", + "price": { + "input": { + "tao": 0.00335676128929173, + "usd": 1.0 + }, + "output": { + "tao": 0.01007028386787519, + "usd": 3.0 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 1.0, + "completion": 3.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "meituan-longcat/LongCat-Flash-Chat-FP8", + "root": "meituan-longcat/LongCat-Flash-Chat-FP8", + "price": { + "input": { + "tao": 0.0, + "usd": 0.0 + }, + "output": { + "tao": 0.0, + "usd": 0.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.0, + "completion": 0.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "agentica-org/DeepCoder-14B-Preview", + "root": "agentica-org/DeepCoder-14B-Preview", + "price": { + "input": { + "tao": 0.0000671352257858346, + "usd": 0.02 + }, + "output": { + "tao": 0.0002349732902504211, + "usd": 0.07 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 0.02, + "completion": 0.07 + }, + "owned_by": "sglang", + "max_model_len": 96000 + }, + { + "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "root": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "price": { + "input": { + "tao": 0.00033567612892917303, + "usd": 0.1 + }, + "output": { + "tao": 0.0026854090314333843, + "usd": 0.8 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.1, + "completion": 0.8 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "tencent/Hunyuan-A13B-Instruct", + "root": "tencent/Hunyuan-A13B-Instruct", + "price": { + "input": { + "tao": 0.0001342704515716692, + "usd": 0.04 + }, + "output": { + "tao": 0.0004699465805008422, + "usd": 0.14 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.04, + "completion": 0.14 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "moonshotai/Kimi-VL-A3B-Thinking", + "root": "moonshotai/Kimi-VL-A3B-Thinking", + "price": { + "input": { + "tao": 0.0000671352257858346, + "usd": 0.02 + }, + "output": { + "tao": 0.0002349732902504211, + "usd": 0.07 + } + }, + "object": "model", + "parent": null, + "created": 1758999327, + "pricing": { + "prompt": 0.02, + "completion": 0.07 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-3dcbdf5a380c416fb2fbd0db315efd85", + "group": null, + "object": "model_permission", + "created": 1758999327, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 131072 + }, + { + "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", + "root": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.0001007028386787519, + "usd": 0.03 + } + }, + "object": "model", + "created": 1758999329, + "pricing": { + "prompt": 0.01, + "completion": 0.03 + }, + "owned_by": "sglang", + "max_model_len": 32768 + }, + { + "id": "meituan-longcat/LongCat-Flash-Thinking-FP8", + "root": "meituan-longcat/LongCat-Flash-Thinking-FP8", + "price": { + "input": { + "tao": 0.0, + "usd": 0.0 + }, + "output": { + "tao": 0.0, + "usd": 0.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.0, + "completion": 0.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "deepseek-ai/DeepSeek-V3.1-Base", + "root": "deepseek-ai/DeepSeek-V3.1-Base", + "price": { + "input": { + "tao": s0.0008391903223229325, + "usd": 0.25 + }, + "output": { + "tao": 0.00335676128929173, + "usd": 1.0 + } + }, + "object": "model", + "created": 1758999330, + "pricing": { + "prompt": 0.25, + "completion": 1.0 + }, + "owned_by": "sglang", + "max_model_len": 163840 + }, + { + "id": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "root": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "price": { + "input": { + "tao": 0.0005370818062866768, + "usd": 0.16 + }, + "output": { + "tao": 0.0021818948380396244, + "usd": 0.65 + } + }, + "object": "model", + "parent": null, + "created": 1758999329, + "pricing": { + "prompt": 0.16, + "completion": 0.65 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-ed83cf3457094ad0be21124eeceffb2b", + "group": null, + "object": "model_permission", + "created": 1758999329, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 262144 + }, + { + "id": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", + "root": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", + "price": { + "input": { + "tao": 0.0, + "usd": 0.0 + }, + "output": { + "tao": 0.0, + "usd": 0.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.0, + "completion": 0.0 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "zai-org/GLM-4.5V", + "root": "zai-org/GLM-4.5V", + "price": { + "input": { + "tao": 0.0002685409031433384, + "usd": 0.08 + }, + "output": { + "tao": 0.001107731225466271, + "usd": 0.33 + } + }, + "object": "model", + "parent": null, + "created": 1758999327, + "pricing": { + "prompt": 0.08, + "completion": 0.33 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-7e45ae7399bd45e7851cc0bc864b352e", + "group": null, + "object": "model_permission", + "created": 1758999327, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 65536 + }, + { + "id": "unsloth/gemma-3-4b-it", + "root": "unsloth/gemma-3-4b-it", + "price": { + "input": { + "tao": 0.0, + "usd": 0.0 + }, + "output": { + "tao": 0.0, + "usd": 0.0 + } + }, + "object": "model", + "created": 1758999327, + "pricing": { + "prompt": 0.0, + "completion": 0.0 + }, + "owned_by": "sglang", + "max_model_len": 96000 + }, + { + "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "root": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "price": { + "input": { + "tao": 0.0002685409031433384, + "usd": 0.08 + }, + "output": { + "tao": 0.0009734607738946016, + "usd": 0.29 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.08, + "completion": 0.29 + }, + "owned_by": "sglang", + "max_model_len": 262144 + }, + { + "id": "ByteDance-Seed/Seed-OSS-36B-Instruct", + "root": "ByteDance-Seed/Seed-OSS-36B-Instruct", + "price": { + "input": { + "tao": 0.0005370818062866768, + "usd": 0.16 + }, + "output": { + "tao": 0.0021818948380396244, + "usd": 0.65 + } + }, + "object": "model", + "created": 1758999328, + "pricing": { + "prompt": 0.16, + "completion": 0.65 + }, + "owned_by": "sglang", + "max_model_len": 131072 + }, + { + "id": "NousResearch/Hermes-4-14B", + "root": "NousResearch/Hermes-4-14B", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.00016783806446458652, + "usd": 0.05 + } + }, + "object": "model", + "created": 1758999326, + "pricing": { + "prompt": 0.01, + "completion": 0.05 + }, + "owned_by": "sglang", + "max_model_len": 40960 + }, + { + "id": "unsloth/Llama-3.2-1B-Instruct", + "root": "unsloth/Llama-3.2-1B-Instruct", + "price": { + "input": { + "tao": 0.0000335676128929173, + "usd": 0.01 + }, + "output": { + "tao": 0.0000335676128929173, + "usd": 0.01 + } + }, + "object": "model", + "parent": null, + "created": 1758999326, + "pricing": { + "prompt": 0.01, + "completion": 0.01 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-c5aef38bb2e849aeb8b9bdb89ffb08bb", + "group": null, + "object": "model_permission", + "created": 1758999326, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 16384 + }, + { + "id": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", + "root": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", + "price": { + "input": { + "tao": 0.0002349732902504211, + "usd": 0.07 + }, + "output": { + "tao": 0.0008727579352158498, + "usd": 0.26 + } + }, + "object": "model", + "parent": null, + "created": 1758999326, + "pricing": { + "prompt": 0.07, + "completion": 0.26 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-3eb629c0adaa4d358130bc1adbde4432", + "group": null, + "object": "model_permission", + "created": 1758999326, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 131072 + }, + { + "id": "all-hands/openhands-lm-32b-v0.1-ep3", + "root": "all-hands/openhands-lm-32b-v0.1-ep3", + "price": { + "input": { + "tao": 0.0000671352257858346, + "usd": 0.02 + }, + "output": { + "tao": 0.0002349732902504211, + "usd": 0.07 + } + }, + "object": "model", + "parent": null, + "created": 1758999326, + "pricing": { + "prompt": 0.02, + "completion": 0.07 + }, + "owned_by": "vllm", + "permission": [ + { + "id": "modelperm-e829488c88be4eaba9fba9c18365a02f", + "group": null, + "object": "model_permission", + "created": 1758999326, + "allow_view": true, + "is_blocking": false, + "organization": "*", + "allow_logprobs": true, + "allow_sampling": true, + "allow_fine_tuning": false, + "allow_create_engine": false, + "allow_search_indices": false + } + ], + "max_model_len": 16384 + } + ] +} diff --git a/infra/litellm/openrouter/.env.openrouter b/infra/litellm/openrouter/.env.openrouter new file mode 100644 index 0000000..770eecd --- /dev/null +++ b/infra/litellm/openrouter/.env.openrouter @@ -0,0 +1,10 @@ +# LiteLLM Proxy Configuration +LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0 + +# Model Provider API Keys (used by LiteLLM) +OPENROUTER_API_KEY=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 + +CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry + +# Database (Postgres) +LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm \ No newline at end of file diff --git a/infra/litellm/docker-compose.litellm.yml b/infra/litellm/openrouter/docker-compose.openrouter.yml similarity index 88% rename from infra/litellm/docker-compose.litellm.yml rename to infra/litellm/openrouter/docker-compose.openrouter.yml index 5118e33..4ef2742 100644 --- a/infra/litellm/docker-compose.litellm.yml +++ b/infra/litellm/openrouter/docker-compose.openrouter.yml @@ -1,11 +1,11 @@ services: - litellm-proxy: + litellm-openrouter: image: ghcr.io/berriai/litellm:main-latest - container_name: litellm-proxy + container_name: litellm-openrouter ports: - "4000:4000" volumes: - - ./litellm_config.yaml:/app/config.yaml + - ./openrouter_config.yaml:/app/config.yaml - ./data:/data env_file: - .env.litellm diff --git a/infra/litellm/litellm_config.yaml b/infra/litellm/openrouter/openrouter_config.yaml similarity index 100% rename from infra/litellm/litellm_config.yaml rename to infra/litellm/openrouter/openrouter_config.yaml From a24634ec3d9e7a758af5edfd9d289bf255ac88b5 Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Mon, 29 Sep 2025 14:55:40 +0800 Subject: [PATCH 02/10] feat: Add Nebula Block provider integration with Docker support - Updated .gitignore to include new environment variable files for Nebula Block, ChutesAI, and OpenRouter. - Removed obsolete .env.litellm.example and .env.chutes files. - Added .env.nebulablock.example with configuration for Nebula Block. - Created README.md for Nebula Block service with setup instructions and model details. - Implemented docker-compose configuration for Nebula Block service. - Added nebulablock_config.yaml for model configurations and pricing. - Introduced textModelList.txt for available models and their pricing. - Created shared Docker Compose file for infrastructure services including PostgreSQL, Redis, Prometheus, and Grafana. - Added initialization script for setting up database schemas for multiple LiteLLM services. - Ensured data directories are tracked with .gitkeep files. --- .gitignore | 5 +- infra/litellm/.env.litellm.example | 5 - infra/litellm/chutesai/.env.chutes | 8 - infra/litellm/chutesai/data/.gitkeep | 1 + .../chutesai/docker-compose.chutesai.yml | 25 ++ infra/litellm/data/.gitkeep | 0 infra/litellm/manage-all-services.sh | 301 ++++++++++++++++++ .../nebulablock/.env.nebulablock.example | 18 ++ infra/litellm/nebulablock/README.md | 163 ++++++++++ infra/litellm/nebulablock/data/.gitkeep | 1 + .../docker-compose.nebulablock.yml | 25 ++ .../nebulablock/nebulablock_config.yaml | 190 +++++++++++ infra/litellm/nebulablock/textModelList.txt | 154 +++++++++ infra/litellm/openrouter/data/.gitkeep | 1 + .../litellm/shared/docker-compose.shared.yml | 105 ++++++ .../shared/init-scripts/01-init-schemas.sh | 63 ++++ 16 files changed, 1051 insertions(+), 14 deletions(-) delete mode 100644 infra/litellm/.env.litellm.example delete mode 100644 infra/litellm/chutesai/.env.chutes create mode 100644 infra/litellm/chutesai/data/.gitkeep create mode 100644 infra/litellm/chutesai/docker-compose.chutesai.yml delete mode 100644 infra/litellm/data/.gitkeep create mode 100644 infra/litellm/manage-all-services.sh create mode 100644 infra/litellm/nebulablock/.env.nebulablock.example create mode 100644 infra/litellm/nebulablock/README.md create mode 100644 infra/litellm/nebulablock/data/.gitkeep create mode 100644 infra/litellm/nebulablock/docker-compose.nebulablock.yml create mode 100644 infra/litellm/nebulablock/nebulablock_config.yaml create mode 100644 infra/litellm/nebulablock/textModelList.txt create mode 100644 infra/litellm/openrouter/data/.gitkeep create mode 100644 infra/litellm/shared/docker-compose.shared.yml create mode 100755 infra/litellm/shared/init-scripts/01-init-schemas.sh diff --git a/.gitignore b/.gitignore index c6b938c..8cef037 100644 --- a/.gitignore +++ b/.gitignore @@ -327,4 +327,7 @@ data/prompts/* # VS Code .vscode/ -.env.litellm +# Environment variables files +.env.nebulablock +.env.chutesai +.env.openrouter \ No newline at end of file diff --git a/infra/litellm/.env.litellm.example b/infra/litellm/.env.litellm.example deleted file mode 100644 index 070432c..0000000 --- a/infra/litellm/.env.litellm.example +++ /dev/null @@ -1,5 +0,0 @@ -# LiteLLM Proxy Configuration -LITELLM_MASTER_KEY=sk-LITELLM_MASTER_KEY - -# Model Provider API Keys (used by LiteLLM) -OPENROUTER_API_KEY=sk-OPENROUTER_API_KEY diff --git a/infra/litellm/chutesai/.env.chutes b/infra/litellm/chutesai/.env.chutes deleted file mode 100644 index 5f24924..0000000 --- a/infra/litellm/chutesai/.env.chutes +++ /dev/null @@ -1,8 +0,0 @@ -# LiteLLM Proxy Configuration -LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0 - -# Model Provider API Keys (used by LiteLLM) -CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry - -# Database (Postgres) -LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm \ No newline at end of file diff --git a/infra/litellm/chutesai/data/.gitkeep b/infra/litellm/chutesai/data/.gitkeep new file mode 100644 index 0000000..6e9476f --- /dev/null +++ b/infra/litellm/chutesai/data/.gitkeep @@ -0,0 +1 @@ +# This file ensures the data directory is tracked by git but its contents are ignored \ No newline at end of file diff --git a/infra/litellm/chutesai/docker-compose.chutesai.yml b/infra/litellm/chutesai/docker-compose.chutesai.yml new file mode 100644 index 0000000..5e68336 --- /dev/null +++ b/infra/litellm/chutesai/docker-compose.chutesai.yml @@ -0,0 +1,25 @@ +services: + litellm-chutesai: + image: ghcr.io/berriai/litellm:main-latest + container_name: litellm-chutesai + ports: + - "4004:4000" # Different port to avoid conflicts with other providers + volumes: + - ./chutesai_config.yaml:/app/config.yaml + - ./data:/data + env_file: + - .env.chutesai + command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"] + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:4000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + networks: + - aimo-llm-network # Use shared network + +networks: + aimo-llm-network: + external: true # Reference external shared network \ No newline at end of file diff --git a/infra/litellm/data/.gitkeep b/infra/litellm/data/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh new file mode 100644 index 0000000..8e1f391 --- /dev/null +++ b/infra/litellm/manage-all-services.sh @@ -0,0 +1,301 @@ +#!/bin/bash + +# AIMO Multi-Provider LLM Services Management Script +# Usage: ./manage-all-services.sh [command] [service] + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SHARED_DIR="$SCRIPT_DIR/shared" +PROVIDERS=("openrouter" "nebulablock" "phala" "chutesai") + +log() { + echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" +} + +check_network() { + if ! docker network ls | grep -q aimo-llm-network; then + log "🌐 Creating shared network..." + docker network create aimo-llm-network --subnet=172.20.0.0/16 + fi +} + +start_shared_services() { + log "πŸ—οΈ Starting shared infrastructure (database, monitoring)..." + check_network + cd "$SHARED_DIR" + docker-compose -f docker-compose.shared.yml up -d + + # Wait for database to be ready + log "⏳ Waiting for shared database to be ready..." + timeout=60 + while [ $timeout -gt 0 ]; do + if docker exec aimo-shared-db pg_isready -U litellm -d litellm >/dev/null 2>&1; then + log "βœ… Shared database is ready" + break + fi + sleep 2 + ((timeout--)) + done + + if [ $timeout -eq 0 ]; then + log "❌ Shared database failed to start within timeout" + exit 1 + fi +} + +stop_shared_services() { + log "πŸ›‘ Stopping shared infrastructure..." + cd "$SHARED_DIR" + docker-compose -f docker-compose.shared.yml down +} + +start_provider() { + local provider=$1 + if [ ! -d "$SCRIPT_DIR/$provider" ]; then + log "❌ Provider '$provider' not found" + return 1 + fi + + log "πŸš€ Starting $provider service..." + cd "$SCRIPT_DIR/$provider" + + # Find the compose file + compose_file="" + for file in docker-compose.*.yml; do + if [ -f "$file" ]; then + compose_file="$file" + break + fi + done + + if [ -z "$compose_file" ]; then + log "❌ No docker-compose file found for $provider" + return 1 + fi + + docker-compose -f "$compose_file" up -d + log "βœ… $provider service started" +} + +stop_provider() { + local provider=$1 + if [ ! -d "$SCRIPT_DIR/$provider" ]; then + log "❌ Provider '$provider' not found" + return 1 + fi + + log "πŸ›‘ Stopping $provider service..." + cd "$SCRIPT_DIR/$provider" + + compose_file="" + for file in docker-compose.*.yml; do + if [ -f "$file" ]; then + compose_file="$file" + break + fi + done + + if [ -n "$compose_file" ]; then + docker-compose -f "$compose_file" down + fi + log "βœ… $provider service stopped" +} + +start_all_providers() { + for provider in "${PROVIDERS[@]}"; do + if [ -d "$SCRIPT_DIR/$provider" ]; then + start_provider "$provider" + else + log "⚠️ Provider '$provider' directory not found, skipping..." + fi + done +} + +stop_all_providers() { + for provider in "${PROVIDERS[@]}"; do + if [ -d "$SCRIPT_DIR/$provider" ]; then + stop_provider "$provider" + fi + done +} + +show_status() { + log "πŸ“Š AIMO LLM Services Status" + echo "==================================" + + # Check shared services + log "πŸ—οΈ Shared Infrastructure:" + docker ps --filter "name=aimo-shared-" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" + + # Check provider services + log "πŸ€– Provider Services:" + docker ps --filter "name=litellm-" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" + + # Health checks + log "πŸ₯ Health Status:" + + # Check database + if docker exec aimo-shared-db pg_isready -U litellm -d litellm >/dev/null 2>&1; then + log "βœ… Shared database: healthy" + else + log "❌ Shared database: unhealthy" + fi + + # Check providers + ports=(4001 4002 4003 4004) + provider_names=("openrouter" "nebulablock" "phala" "chutesai") + + for i in "${!ports[@]}"; do + port=${ports[$i]} + name=${provider_names[$i]} + + if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then + log "βœ… $name (port $port): healthy" + else + log "❌ $name (port $port): unhealthy or not running" + fi + done +} + +show_logs() { + local service=$1 + if [ "$service" = "shared" ]; then + cd "$SHARED_DIR" + docker-compose -f docker-compose.shared.yml logs -f + elif [ -n "$service" ] && [ -d "$SCRIPT_DIR/$service" ]; then + cd "$SCRIPT_DIR/$service" + compose_file="" + for file in docker-compose.*.yml; do + if [ -f "$file" ]; then + compose_file="$file" + break + fi + done + + if [ -n "$compose_file" ]; then + docker-compose -f "$compose_file" logs -f + fi + else + log "πŸ“‹ Showing logs for all services (press Ctrl+C to exit):" + docker logs -f --tail=100 $(docker ps --filter "name=aimo-" --filter "name=litellm-" -q) + fi +} + +test_all_services() { + log "πŸ§ͺ Testing all LLM services..." + + # Test shared database + if docker exec aimo-shared-db pg_isready -U litellm -d litellm >/dev/null 2>&1; then + log "βœ… Database connection test passed" + else + log "❌ Database connection test failed" + fi + + # Test provider services + ports=(4001 4002 4003 4004) + provider_names=("openrouter" "nebulablock" "phala" "chutesai") + + for i in "${!ports[@]}"; do + port=${ports[$i]} + name=${provider_names[$i]} + + log "Testing $name service on port $port..." + + if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then + log "βœ… $name health check passed" + + # Test models endpoint + if curl -sf "http://localhost:$port/v1/models" >/dev/null 2>&1; then + log "βœ… $name models endpoint accessible" + else + log "⚠️ $name models endpoint not accessible" + fi + else + log "❌ $name service not responding" + fi + done + + log "πŸŽ‰ Service testing completed!" +} + +case "${1:-}" in + start) + if [ "$2" = "shared" ]; then + start_shared_services + elif [ "$2" = "all" ] || [ -z "$2" ]; then + start_shared_services + sleep 5 + start_all_providers + elif [ -n "$2" ]; then + check_network + start_provider "$2" + fi + ;; + stop) + if [ "$2" = "shared" ]; then + stop_shared_services + elif [ "$2" = "all" ] || [ -z "$2" ]; then + stop_all_providers + stop_shared_services + elif [ -n "$2" ]; then + stop_provider "$2" + fi + ;; + restart) + if [ "$2" = "all" ] || [ -z "$2" ]; then + stop_all_providers + stop_shared_services + sleep 3 + start_shared_services + sleep 5 + start_all_providers + elif [ "$2" = "shared" ]; then + stop_shared_services + sleep 3 + start_shared_services + elif [ -n "$2" ]; then + stop_provider "$2" + sleep 2 + start_provider "$2" + fi + ;; + status) + show_status + ;; + logs) + show_logs "$2" + ;; + test) + test_all_services + ;; + *) + echo "AIMO Multi-Provider LLM Services Manager" + echo "" + echo "Usage: $0 {start|stop|restart|status|logs|test} [service]" + echo "" + echo "Commands:" + echo " start [service] - Start services (all, shared, or specific provider)" + echo " stop [service] - Stop services" + echo " restart [service] - Restart services" + echo " status - Show status of all services" + echo " logs [service] - Show logs (all, shared, or specific provider)" + echo " test - Test all services" + echo "" + echo "Services:" + echo " all - All services (default)" + echo " shared - Shared infrastructure (database, monitoring)" + printf " %s\n" "${PROVIDERS[@]}" + echo "" + echo "Examples:" + echo " $0 start # Start all services" + echo " $0 start shared # Start only shared infrastructure" + echo " $0 start nebulablock # Start only Nebula Block service" + echo " $0 stop all # Stop all services" + echo " $0 restart openrouter # Restart OpenRouter service" + echo " $0 status # Show service status" + echo " $0 logs nebulablock # Show Nebula Block logs" + echo " $0 test # Test all services" + exit 1 + ;; +esac \ No newline at end of file diff --git a/infra/litellm/nebulablock/.env.nebulablock.example b/infra/litellm/nebulablock/.env.nebulablock.example new file mode 100644 index 0000000..888c253 --- /dev/null +++ b/infra/litellm/nebulablock/.env.nebulablock.example @@ -0,0 +1,18 @@ +# LiteLLM Proxy Configuration for Nebula Block +LITELLM_MASTER_KEY=sk-nebulablock-proxy-key + +# Nebula Block API Configuration +NEBULABLOCK_API_KEY=your_nebulablock_api_key_here + +# Shared Database Configuration - connects to shared LiteLLM database +# Uses table prefix 'nebulablock_' to separate data logically +LITELLM_DATABASE_URL=postgresql://litellm:litellm123@aimo-shared-db:5432/litellm +LITELLM_TABLE_PREFIX=nebulablock_ + +# Service Configuration +SERVICE_NAME=nebulablock-llm-proxy +LOG_LEVEL=INFO + +# Additional Provider Keys (if needed for fallbacks) +# OPENAI_API_KEY=your_openai_key_for_fallbacks +# ANTHROPIC_API_KEY=your_anthropic_key_for_fallbacks \ No newline at end of file diff --git a/infra/litellm/nebulablock/README.md b/infra/litellm/nebulablock/README.md new file mode 100644 index 0000000..ca2baff --- /dev/null +++ b/infra/litellm/nebulablock/README.md @@ -0,0 +1,163 @@ +# Nebula Block LiteLLM Service + +This directory contains the Docker configuration for running LiteLLM proxy with Nebula Block provider integration. + +## Files Structure + +``` +nebulablock/ +β”œβ”€β”€ docker-compose.nebulablock.yml # Docker Compose configuration +β”œβ”€β”€ nebulablock_config.yaml # LiteLLM model configuration +β”œβ”€β”€ .env.nebulablock # Environment variables +β”œβ”€β”€ README.md # This file +└── data/ # Persistent data directory +``` + +## Available Models + +### Premium Models (Paid) +- **OpenAI**: gpt-4o-mini ($0.40/$1.60 per 1M tokens) +- **Google Gemini**: + - gemini-2.5-pro ($1.00/$8.00 per 1M tokens) + - gemini-2.5-flash ($0.24/$2.00 per 1M tokens) + - gemini-2.5-flash-lite ($0.08/$0.32 per 1M tokens) + - gemini-2.0-flash ($0.08/$0.32 per 1M tokens) + - gemini-2.0-flash-lite ($0.06/$0.24 per 1M tokens) +- **Meta Llama**: llama-3.3-70b ($0.10/$0.30 per 1M tokens) +- **Qwen**: qwq-32b ($0.15/$0.40 per 1M tokens) + +### Free Models +- **Community Models**: L3.3-MS-Nevoria-70b, L3-70B-Euryale-v2.1, L3-8B-Stheno-v3.2 +- **Mistral**: Mistral-Small-3.2-24B-Instruct-2506 +- **DeepSeek**: DeepSeek-R1-0528, DeepSeek-V3-0324, DeepSeek-R1 + +## Setup Instructions + +### 1. Configure Environment Variables + +Copy and edit the environment file: +```bash +cp .env.nebulablock.example .env.nebulablock +``` + +Edit `.env.nebulablock` and add your Nebula Block API key: +```bash +NEBULABLOCK_API_KEY=your_actual_api_key_here +``` + +### 2. Start the Service + +```bash +# Start Nebula Block LLM service +docker-compose -f docker-compose.nebulablock.yml up -d + +# Check service status +docker-compose -f docker-compose.nebulablock.yml ps + +# View logs +docker-compose -f docker-compose.nebulablock.yml logs -f +``` + +### 3. Test the Service + +```bash +# Health check +curl http://localhost:4002/health + +# List available models +curl http://localhost:4002/v1/models + +# Test chat completion with a free model +curl -X POST http://localhost:4002/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-nebulablock-proxy-key" \ + -d '{ + "model": "deepseek-r1-free", + "messages": [{"role": "user", "content": "Hello!"}], + "max_tokens": 100 + }' +``` + +### 4. Stop the Service + +```bash +# Stop the service +docker-compose -f docker-compose.nebulablock.yml down + +# Stop and remove volumes (caution: this deletes database data) +docker-compose -f docker-compose.nebulablock.yml down -v +``` + +## Configuration Details + +### Model Naming Convention +- Models are prefixed with `nebulablock/` to identify the provider +- Free models are explicitly marked in the configuration +- Pricing information is included for cost tracking + +### Network Configuration +- Service runs on port 4002 to avoid conflicts with other LLM services +- Uses dedicated `nebulablock-network` for isolation +- PostgreSQL database for logging and analytics + +### Fallback Strategy +- Free models are configured as fallbacks for paid models +- Routing strategy set to "least-busy" for load balancing +- Request timeout set to 10 minutes for complex queries + +## Integration with Main AIMO Service + +To use this service in your main AIMO application, configure: + +```bash +# Add to main .env file +LLM_BASE_URL=http://localhost:4002 +LLM_API_KEY=sk-nebulablock-proxy-key +LLM_MODEL_DEFAULT=deepseek-r1-free # Use free model as default +``` + +## Monitoring and Maintenance + +### Health Monitoring +- Health check endpoint: `http://localhost:4002/health` +- Database status included in health checks +- Automatic restart on failure + +### Logs and Analytics +- JSON formatted logs for structured analysis +- Database logging for request analytics +- Optional integration with Langfuse for advanced tracking + +### Resource Management +- Single worker process for development +- Configurable timeout and rate limiting +- Automatic parameter validation and cleanup + +## Troubleshooting + +### Common Issues +1. **Port 4002 already in use**: Change the port in docker-compose.yml +2. **API key invalid**: Verify NEBULABLOCK_API_KEY in .env.nebulablock +3. **Models not loading**: Check nebulablock_config.yaml syntax +4. **Database connection issues**: Ensure PostgreSQL container is healthy + +### Debug Mode +Enable debug logging by setting in .env.nebulablock: +```bash +LOG_LEVEL=DEBUG +``` + +### Performance Tuning +For production use, consider: +- Increasing `num_workers` in docker-compose.yml +- Adjusting rate limits in configuration +- Setting up external PostgreSQL database +- Adding Redis for caching + +## Security Considerations + +- Change default master key in production +- Use strong database passwords +- Implement network-level access controls +- Regular API key rotation +- Monitor usage for anomalies \ No newline at end of file diff --git a/infra/litellm/nebulablock/data/.gitkeep b/infra/litellm/nebulablock/data/.gitkeep new file mode 100644 index 0000000..6e9476f --- /dev/null +++ b/infra/litellm/nebulablock/data/.gitkeep @@ -0,0 +1 @@ +# This file ensures the data directory is tracked by git but its contents are ignored \ No newline at end of file diff --git a/infra/litellm/nebulablock/docker-compose.nebulablock.yml b/infra/litellm/nebulablock/docker-compose.nebulablock.yml new file mode 100644 index 0000000..1965737 --- /dev/null +++ b/infra/litellm/nebulablock/docker-compose.nebulablock.yml @@ -0,0 +1,25 @@ +Íservices: + litellm-nebulablock: + image: ghcr.io/berriai/litellm:main-latest + container_name: litellm-nebulablock + ports: + - "4002:4000" # Different port to avoid conflicts with OpenRouter + volumes: + - ./nebulablock_config.yaml:/app/config.yaml + - ./data:/data + env_file: + - .env.nebulablock + command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"] + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:4000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + networks: + - aimo-llm-network # Use shared network + +networks: + aimo-llm-network: + external: true # Reference external shared network \ No newline at end of file diff --git a/infra/litellm/nebulablock/nebulablock_config.yaml b/infra/litellm/nebulablock/nebulablock_config.yaml new file mode 100644 index 0000000..9c7c045 --- /dev/null +++ b/infra/litellm/nebulablock/nebulablock_config.yaml @@ -0,0 +1,190 @@ +model_list: + # OpenAI Models + - model_name: gpt-4o-mini + litellm_params: + model: nebulablock/openai/gpt-4o-mini + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.40 # per 1M tokens + completion: 1.60 # per 1M tokens + + # Google Gemini Models + - model_name: gemini-2_5-pro + litellm_params: + model: nebulablock/gemini/gemini-2.5-pro + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 1.00 + completion: 8.00 + + - model_name: gemini-2_5-flash + litellm_params: + model: nebulablock/gemini/gemini-2.5-flash + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.24 + completion: 2.00 + + - model_name: gemini-2_5-flash-lite + litellm_params: + model: nebulablock/gemini/gemini-2.5-flash-lite + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.08 + completion: 0.32 + + - model_name: gemini-2_0-flash + litellm_params: + model: nebulablock/gemini/gemini-2.0-flash + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.08 + completion: 0.32 + + - model_name: gemini-2_0-flash-lite + litellm_params: + model: nebulablock/gemini/gemini-2.0-flash-lite + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.06 + completion: 0.24 + + # Community Models (Free) + - model_name: l3-ms-nevoria-70b + litellm_params: + model: nebulablock/Steelskull/L3.3-MS-Nevoria-70b + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free model + completion: 0 + + - model_name: mistral-small-free + litellm_params: + model: nebulablock/mistralai/Mistral-Small-3.2-24B-Instruct-2506 + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free model + completion: 0 + + - model_name: l3-70b-euryale + litellm_params: + model: nebulablock/Sao10K/L3-70B-Euryale-v2.1 + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free model + completion: 0 + + - model_name: l3-8b-stheno + litellm_params: + model: nebulablock/Sao10K/L3-8B-Stheno-v3.2 + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free model + completion: 0 + + # DeepSeek Models + - model_name: deepseek-r1-0528-free + litellm_params: + model: nebulablock/deepseek-ai/DeepSeek-R1-0528 + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free version + completion: 0 + + - model_name: deepseek-v3-0324-free + litellm_params: + model: nebulablock/deepseek-ai/DeepSeek-V3-0324 + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free version + completion: 0 + + - model_name: deepseek-r1-free + litellm_params: + model: nebulablock/deepseek-ai/DeepSeek-R1 + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0 # Free version + completion: 0 + + # Meta Llama Models + - model_name: llama-3_3-70b + litellm_params: + model: nebulablock/meta-llama/Llama-3.3-70B-Instruct + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.10 + completion: 0.30 + + # Qwen Models + - model_name: qwq-32b + litellm_params: + model: nebulablock/Qwen/QwQ-32B + api_base: https://api.nebulablock.ai/v1 + api_key: os.environ/NEBULABLOCK_API_KEY + pricing: + prompt: 0.15 + completion: 0.40 + +# General settings +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY # Set a master key for proxy auth + database_url: os.environ/LITELLM_DATABASE_URL # Database connection + database_logging: true + service_name: "nebulablock-llm-proxy" + database_params: + # Use specific schema for this service + options: "-c search_path=nebulablock,public" + +# Logging configuration +litellm_settings: + drop_params: true # Drop unsupported params instead of erroring + set_verbose: true + json_logs: true + request_timeout: 600 # 10 minutes timeout + +# Rate limiting and routing +router_settings: + enable_pre_call_checks: true + enable_admin_api: true + model_fallbacks: + # Fallback strategy for paid models to free alternatives + default: ["l3-ms-nevoria-70b", "mistral-small-free", "deepseek-r1-free"] + routing_strategy: "least-busy" + +logging: + level: DEBUG + format: json + +# Health check configuration +health_check: + enable: true + endpoint: "/health" + +# Success/Error callbacks (optional) +# success_callback: ["langfuse"] # Track successful calls +# failure_callback: ["langfuse"] # Track failed calls + +# Custom provider settings for Nebula Block +provider_settings: + nebulablock: + base_url: "https://api.nebulablock.ai/v1" + headers: + "User-Agent": "LiteLLM-NebulaBlock/1.0" + rate_limit: + requests_per_minute: 1000 + tokens_per_minute: 100000 \ No newline at end of file diff --git a/infra/litellm/nebulablock/textModelList.txt b/infra/litellm/nebulablock/textModelList.txt new file mode 100644 index 0000000..028107b --- /dev/null +++ b/infra/litellm/nebulablock/textModelList.txt @@ -0,0 +1,154 @@ +{ + "data": [ + { + "id": "openai/gpt-4o-mini", + "object": "model", + "created": 1677610602, + "owned_by": "openai", + "pricing": { + "prompt": 0.40, + "completion": 1.60 + } + }, + { + "id": "gemini/gemini-2.5-pro", + "object": "model", + "created": 1677610602, + "owned_by": "google", + "pricing": { + "prompt": 1.00, + "completion": 8.00 + } + }, + { + "id": "gemini/gemini-2.5-flash", + "object": "model", + "created": 1677610602, + "owned_by": "google", + "pricing": { + "prompt": 0.24, + "completion": 2.00 + } + }, + { + "id": "gemini/gemini-2.5-flash-lite", + "object": "model", + "created": 1677610602, + "owned_by": "google", + "pricing": { + "prompt": 0.08, + "completion": 0.32 + } + }, + { + "id": "gemini/gemini-2.0-flash", + "object": "model", + "created": 1677610602, + "owned_by": "google", + "pricing": { + "prompt": 0.08, + "completion": 0.32 + } + }, + { + "id": "gemini/gemini-2.0-flash-lite", + "object": "model", + "created": 1677610602, + "owned_by": "google", + "pricing": { + "prompt": 0.06, + "completion": 0.24 + } + }, + { + "id": "Steelskull/L3.3-MS-Nevoria-70b", + "object": "model", + "created": 1677610602, + "owned_by": "Steelskull", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "object": "model", + "created": 1677610602, + "owned_by": "mistralai", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "Sao10K/L3-70B-Euryale-v2.1", + "object": "model", + "created": 1677610602, + "owned_by": "Sao10K", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "Sao10K/L3-8B-Stheno-v3.2", + "object": "model", + "created": 1677610602, + "owned_by": "Sao10K", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "deepseek-ai/DeepSeek-R1-0528", + "object": "model", + "created": 1677610602, + "owned_by": "deepseek-ai", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "deepseek-ai/DeepSeek-V3-0324", + "object": "model", + "created": 1677610602, + "owned_by": "deepseek-ai", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "deepseek-ai/DeepSeek-R1", + "object": "model", + "created": 1677610602, + "owned_by": "deepseek-ai", + "pricing": { + "prompt": 0, + "completion": 0 + } + }, + { + "id": "meta-llama/Llama-3.3-70B-Instruct", + "object": "model", + "created": 1677610602, + "owned_by": "meta-llama", + "pricing": { + "prompt": 0.10, + "completion": 0.30 + } + }, + { + "id": "Qwen/QwQ-32B", + "object": "model", + "created": 1677610602, + "owned_by": "Qwen", + "pricing": { + "prompt": 0.15, + "completion": 0.40 + } + } + ] +} \ No newline at end of file diff --git a/infra/litellm/openrouter/data/.gitkeep b/infra/litellm/openrouter/data/.gitkeep new file mode 100644 index 0000000..6e9476f --- /dev/null +++ b/infra/litellm/openrouter/data/.gitkeep @@ -0,0 +1 @@ +# This file ensures the data directory is tracked by git but its contents are ignored \ No newline at end of file diff --git a/infra/litellm/shared/docker-compose.shared.yml b/infra/litellm/shared/docker-compose.shared.yml new file mode 100644 index 0000000..5a0d566 --- /dev/null +++ b/infra/litellm/shared/docker-compose.shared.yml @@ -0,0 +1,105 @@ +version: '3.8' + +services: + # Shared PostgreSQL Database for all LiteLLM services + aimo-shared-db: + image: postgres:15 + container_name: aimo-shared-db + restart: unless-stopped + environment: + POSTGRES_USER: litellm + POSTGRES_PASSWORD: litellm123 + POSTGRES_DB: litellm + # Enable multiple databases if needed + POSTGRES_MULTIPLE_DATABASES: litellm,analytics,monitoring + volumes: + - shared-db-data:/var/lib/postgresql/data + - ./init-scripts:/docker-entrypoint-initdb.d # Database initialization scripts + ports: + - "5432:5432" # Expose for external access if needed + networks: + - aimo-llm-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U litellm -d litellm"] + interval: 30s + timeout: 10s + retries: 5 + + # Redis for shared caching and session management + aimo-shared-redis: + image: redis:7-alpine + container_name: aimo-shared-redis + restart: unless-stopped + command: redis-server --requirepass redis123 + volumes: + - shared-redis-data:/data + ports: + - "6379:6379" + networks: + - aimo-llm-network + healthcheck: + test: ["CMD", "redis-cli", "--raw", "incr", "ping"] + interval: 30s + timeout: 10s + retries: 5 + + # Shared monitoring and metrics + aimo-prometheus: + image: prom/prometheus:latest + container_name: aimo-prometheus + restart: unless-stopped + ports: + - "9090:9090" + volumes: + - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-data:/prometheus + networks: + - aimo-llm-network + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/etc/prometheus/console_libraries' + - '--web.console.templates=/etc/prometheus/consoles' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + + # Shared Grafana for visualization + aimo-grafana: + image: grafana/grafana:latest + container_name: aimo-grafana + restart: unless-stopped + ports: + - "3000:3000" + environment: + - GF_SECURITY_ADMIN_PASSWORD=admin123 + - GF_DATABASE_TYPE=postgres + - GF_DATABASE_HOST=aimo-shared-db:5432 + - GF_DATABASE_NAME=litellm + - GF_DATABASE_USER=litellm + - GF_DATABASE_PASSWORD=litellm123 + volumes: + - grafana-data:/var/lib/grafana + - ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards + - ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources + networks: + - aimo-llm-network + depends_on: + - aimo-shared-db + - aimo-prometheus + +volumes: + shared-db-data: + driver: local + shared-redis-data: + driver: local + prometheus-data: + driver: local + grafana-data: + driver: local + +networks: + aimo-llm-network: + driver: bridge + ipam: + config: + - subnet: 172.20.0.0/16 \ No newline at end of file diff --git a/infra/litellm/shared/init-scripts/01-init-schemas.sh b/infra/litellm/shared/init-scripts/01-init-schemas.sh new file mode 100755 index 0000000..117a569 --- /dev/null +++ b/infra/litellm/shared/init-scripts/01-init-schemas.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Database initialization script for multiple services +# This script creates separate schemas for different LiteLLM services + +set -e + +echo "Initializing shared database for multiple LiteLLM services..." + +# Create schemas for different providers +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + -- Create schemas for different providers + CREATE SCHEMA IF NOT EXISTS openrouter; + CREATE SCHEMA IF NOT EXISTS nebulablock; + CREATE SCHEMA IF NOT EXISTS phala; + CREATE SCHEMA IF NOT EXISTS chutesai; + CREATE SCHEMA IF NOT EXISTS shared_analytics; + + -- Grant permissions + GRANT ALL PRIVILEGES ON SCHEMA openrouter TO $POSTGRES_USER; + GRANT ALL PRIVILEGES ON SCHEMA nebulablock TO $POSTGRES_USER; + GRANT ALL PRIVILEGES ON SCHEMA phala TO $POSTGRES_USER; + GRANT ALL PRIVILEGES ON SCHEMA chutesai TO $POSTGRES_USER; + GRANT ALL PRIVILEGES ON SCHEMA shared_analytics TO $POSTGRES_USER; + + -- Create a view for unified analytics across all providers + CREATE OR REPLACE VIEW shared_analytics.unified_requests AS + SELECT + 'openrouter' as provider, + * + FROM openrouter.litellm_requestlogs + WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'openrouter' AND table_name = 'litellm_requestlogs') + + UNION ALL + + SELECT + 'nebulablock' as provider, + * + FROM nebulablock.litellm_requestlogs + WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'nebulablock' AND table_name = 'litellm_requestlogs') + + UNION ALL + + SELECT + 'phala' as provider, + * + FROM phala.litellm_requestlogs + WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'phala' AND table_name = 'litellm_requestlogs') + + UNION ALL + + SELECT + 'chutesai' as provider, + * + FROM chutesai.litellm_requestlogs + WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'chutesai' AND table_name = 'litellm_requestlogs'); + + -- Create indexes for better performance + -- These will be created automatically when tables are created by LiteLLM + +EOSQL + +echo "Shared database initialization completed!" \ No newline at end of file From 7754c4452df27a8fa00f2182a30f1c987ab2edb5 Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Mon, 29 Sep 2025 15:02:14 +0800 Subject: [PATCH 03/10] Stop tracking .env.openrouter file --- .gitignore | 4 +- infra/litellm/chutesai/.env.chutesai.example | 13 + infra/litellm/chutesai/README.md | 250 ++++++++++++ infra/litellm/chutesai/chutesai_config.yaml | 369 ++++++++++++++++++ ...shared.yml => docker-compose.database.yml} | 0 5 files changed, 635 insertions(+), 1 deletion(-) create mode 100644 infra/litellm/chutesai/.env.chutesai.example create mode 100644 infra/litellm/chutesai/README.md create mode 100644 infra/litellm/chutesai/chutesai_config.yaml rename infra/litellm/shared/{docker-compose.shared.yml => docker-compose.database.yml} (100%) diff --git a/.gitignore b/.gitignore index 8cef037..2a13895 100644 --- a/.gitignore +++ b/.gitignore @@ -330,4 +330,6 @@ data/prompts/* # Environment variables files .env.nebulablock .env.chutesai -.env.openrouter \ No newline at end of file +.env.openrouter +infra/litellm/openrouter/.env.openrouter +infra/litellm/openrouter/.env.openrouter diff --git a/infra/litellm/chutesai/.env.chutesai.example b/infra/litellm/chutesai/.env.chutesai.example new file mode 100644 index 0000000..07e7fc2 --- /dev/null +++ b/infra/litellm/chutesai/.env.chutesai.example @@ -0,0 +1,13 @@ +# LiteLLM Proxy Configuration +LITELLM_MASTER_KEY=sk-chutesai-proxy-key + +# Model Provider API Keys (used by LiteLLM) +CHUTESAI_API_KEY=your_chutesai_api_key_here + +# Database (Shared PostgreSQL with schema separation) +LITELLM_DATABASE_URL=postgresql://litellm:litellm123@aimo-shared-db:5432/litellm +LITELLM_TABLE_PREFIX=chutesai_ + +# Service Configuration +SERVICE_NAME=chutesai-llm-proxy +LOG_LEVEL=INFO \ No newline at end of file diff --git a/infra/litellm/chutesai/README.md b/infra/litellm/chutesai/README.md new file mode 100644 index 0000000..a156342 --- /dev/null +++ b/infra/litellm/chutesai/README.md @@ -0,0 +1,250 @@ +# ChutesAI LiteLLM Service + +This directory contains the Docker configuration for running LiteLLM proxy with ChutesAI provider integration. + +## Files Structure + +``` +chutesai/ +β”œβ”€β”€ docker-compose.chutesai.yml # Docker Compose configuration +β”œβ”€β”€ chutesai_config.yaml # LiteLLM model configuration +β”œβ”€β”€ .env.chutesai # Environment variables (create from example) +β”œβ”€β”€ .env.chutesai.example # Environment variables template +β”œβ”€β”€ README.md # This file +β”œβ”€β”€ textModelsList.txt # Complete model list with pricing +└── data/ # Persistent data directory +``` + +## Available Models + +### Free Models (0.0 pricing) +- **GLM Models**: glm-4.5-air-free (Free, 131K context) +- **OpenAI OSS**: openai-gpt-oss-20b-free (Free, 131K context) +- **Google Gemma**: gemma-3-4b-it-free (Free, 96K context) +- **LongCat Models**: longcat-flash-chat-fp8-free, longcat-flash-thinking-fp8-free (Free, 131K context) +- **Alibaba**: tongyi-deepresearch-30b-free (Free, 131K context) + +### Budget Models ($0.01-$0.07 per 1M tokens) +- **Meta Llama**: llama-3.2-1b-instruct ($0.01/$0.01), llama-3.2-3b-instruct ($0.01/$0.01) +- **Google Gemma**: gemma-2-9b-it ($0.01/$0.02), gemmasutra-pro-27b ($0.01/$0.03) +- **NousResearch**: hermes-4-14b ($0.01/$0.05), deephermes-3-llama-3-8b ($0.01/$0.05) +- **DeepSeek**: deepseek-r1-0528-qwen3-8b ($0.01/$0.05) +- **Mistral**: mistral-nemo-instruct ($0.02/$0.07) +- **Moonshot**: kimi-dev-72b ($0.07/$0.26), kimi-vl-a3b-thinking ($0.02/$0.07) + +### Mid-range Models ($0.04-$0.29 per 1M tokens) +- **Google Gemma**: gemma-3-12b-it ($0.04/$0.14) +- **Qwen**: qwen3-30b-a3b-thinking ($0.08/$0.29) +- **GLM**: glm-4.5v ($0.08/$0.33) +- **Tencent**: hunyuan-a13b-instruct ($0.04/$0.14) +- **NVIDIA**: llama-3.3-nemotron-super-49b ($0.07/$0.26) + +### Premium Models ($0.14-$3.0 per 1M tokens) +- **ChutesAI Mistral**: mistral-small-3.2-24b ($0.14/$0.57) +- **Qwen Advanced**: qwen3-next-80b-a3b-thinking ($0.1/$0.8), qwen3-vl-235b-a22b-thinking ($0.16/$0.65) +- **DeepSeek Premium**: deepseek-v3.1-turbo ($1.0/$3.0), deepseek-r1-0528 ($0.55/$1.75) +- **ByteDance**: seed-oss-36b-instruct ($0.16/$0.65) + +### Ultra Premium Models ($0.25-$1.0+ per 1M tokens) +- **DeepSeek Flagship**: deepseek-r1, deepseek-v3, deepseek-v3.1 ($0.25/$1.0) +- **NousResearch**: hermes-4-405b-fp8 ($0.25/$1.0) + +## Setup Instructions + +### 1. Configure Environment Variables + +Copy and edit the environment file: +```bash +cp .env.chutesai.example .env.chutesai +``` + +Edit `.env.chutesai` and add your ChutesAI API key: +```bash +# Update this with your actual API key +CHUTESAI_API_KEY=your_actual_chutesai_api_key_here +``` + +### 2. Start the Service + +```bash +# Start ChutesAI LLM service +docker-compose -f docker-compose.chutesai.yml up -d + +# Check service status +docker-compose -f docker-compose.chutesai.yml ps + +# View logs +docker-compose -f docker-compose.chutesai.yml logs -f +``` + +### 3. Test the Service + +```bash +# Health check +curl http://localhost:4004/health + +# List available models +curl http://localhost:4004/v1/models + +# Test chat completion with a free model +curl -X POST http://localhost:4004/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-chutesai-proxy-key" \ + -d '{ + "model": "glm-4_5-air-free", + "messages": [{"role": "user", "content": "Hello! Can you help me with coding?"}], + "max_tokens": 100 + }' + +# Test with a premium model +curl -X POST http://localhost:4004/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-chutesai-proxy-key" \ + -d '{ + "model": "deepseek-r1", + "messages": [{"role": "user", "content": "Explain quantum computing in simple terms"}], + "max_tokens": 150 + }' +``` + +### 4. Stop the Service + +```bash +# Stop the service +docker-compose -f docker-compose.chutesai.yml down + +# Stop and remove volumes (caution: this deletes database data) +docker-compose -f docker-compose.chutesai.yml down -v +``` + +## Configuration Details + +### Model Naming Convention +- Models are prefixed with `chutesai/` to identify the provider +- Free models are explicitly marked with "-free" suffix +- Pricing information is included for cost tracking and routing decisions + +### Network Configuration +- Service runs on port 4004 to avoid conflicts with other LLM services +- Uses shared `aimo-llm-network` for integration with other services +- Shared PostgreSQL database with `chutesai` schema for isolation + +### Fallback Strategy +- Free models (glm-4.5-air-free, openai-gpt-oss-20b-free, etc.) are configured as fallbacks +- Routing strategy set to "least-busy" for optimal load distribution +- Request timeout set to 10 minutes for complex reasoning queries + +## Integration with Main AIMO Service + +To use this service in your main AIMO application, configure: + +```bash +# Add to main .env file +LLM_BASE_URL=http://localhost:4004 +LLM_API_KEY=sk-chutesai-proxy-key +LLM_MODEL_DEFAULT=glm-4_5-air-free # Use free model as default +``` + +## Model Categories and Use Cases + +### Free Tier (Perfect for Development) +- **General Chat**: glm-4.5-air-free, openai-gpt-oss-20b-free +- **Code Generation**: gemma-3-4b-it-free +- **Long Context**: longcat-flash-chat-fp8-free (131K tokens) +- **Research**: tongyi-deepresearch-30b-free + +### Production Ready (Cost-Effective) +- **Balanced Performance**: hermes-4-14b, deephermes-3-llama-3-8b +- **Reasoning Tasks**: deepseek-r1-0528-qwen3-8b +- **Multimodal**: kimi-vl-a3b-thinking +- **Code Assistant**: deepcoder-14b-preview + +### Enterprise Grade (High Performance) +- **Advanced Reasoning**: deepseek-r1, deepseek-v3.1 +- **Large Context**: qwen3-vl-235b-a22b-thinking (262K context) +- **Specialized Tasks**: mistral-small-3.2-24b +- **Vision Models**: glm-4.5v + +### Ultra Premium (Cutting Edge) +- **Best Reasoning**: deepseek-v3.1-turbo +- **Largest Models**: hermes-4-405b-fp8 +- **Advanced Multimodal**: qwen3-vl-235b-a22b-thinking + +## Monitoring and Maintenance + +### Health Monitoring +- Health check endpoint: `http://localhost:4004/health` +- Database connectivity included in health checks +- Automatic container restart on failure + +### Logs and Analytics +- JSON formatted logs for structured analysis +- Database logging for request analytics and cost tracking +- Schema-based data separation from other providers + +### Resource Management +- Single worker process optimized for development +- Configurable timeout and rate limiting +- Automatic parameter validation and cleanup +- Memory-efficient model loading + +## Troubleshooting + +### Common Issues +1. **Port 4004 already in use**: Change the port in docker-compose.yml +2. **API key invalid**: Verify CHUTESAI_API_KEY in .env.chutesai +3. **Models not loading**: Check chutesai_config.yaml syntax +4. **Database connection issues**: Ensure shared PostgreSQL container is healthy + +### Debug Mode +Enable debug logging by setting in .env.chutesai: +```bash +LOG_LEVEL=DEBUG +``` + +### Performance Tuning +For production use, consider: +- Increasing `num_workers` in docker-compose.yml +- Adjusting rate limits in configuration +- Setting up external PostgreSQL database +- Adding Redis for caching +- Using load balancer for high availability + +### Cost Management +- Use free models for development and testing +- Set up model fallbacks to prevent overspending +- Monitor usage through database logs +- Consider budget models for production workloads + +## Security Considerations + +- Change default master key in production +- Use strong database passwords +- Implement network-level access controls +- Regular API key rotation +- Monitor usage for anomalies +- Set up rate limiting per user/API key + +## API Compatibility + +ChutesAI service is fully compatible with OpenAI API format: +- `/v1/chat/completions` - Chat completions +- `/v1/models` - List available models +- `/health` - Service health check +- Standard OpenAI headers and request/response format + +## Cost Optimization Tips + +1. **Start with Free Models**: Use glm-4.5-air-free, openai-gpt-oss-20b-free for development +2. **Fallback Strategy**: Configure fallbacks from premium to free models +3. **Right-size Models**: Use smaller models for simple tasks +4. **Monitor Usage**: Track costs through database logging +5. **Batch Requests**: Group multiple requests when possible + +## Support and Documentation + +For issues specific to ChutesAI integration: +1. Check service logs: `docker-compose -f docker-compose.chutesai.yml logs` +2. Verify API connectivity: `curl http://localhost:4004/health` +3. Test model availability: `curl http://localhost:4004/v1/models` +4. Check database schema: Ensure `chutesai` schema exists \ No newline at end of file diff --git a/infra/litellm/chutesai/chutesai_config.yaml b/infra/litellm/chutesai/chutesai_config.yaml new file mode 100644 index 0000000..4674430 --- /dev/null +++ b/infra/litellm/chutesai/chutesai_config.yaml @@ -0,0 +1,369 @@ +model_list: + # Popular Free Models (0.0 pricing) + - model_name: glm-4_5-air-free + litellm_params: + model: chutesai/zai-org/GLM-4.5-Air + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.0 # Free model + completion: 0.0 + + - model_name: openai-gpt-oss-20b-free + litellm_params: + model: chutesai/openai/gpt-oss-20b + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.0 # Free model + completion: 0.0 + + - model_name: gemma-3-4b-it-free + litellm_params: + model: chutesai/unsloth/gemma-3-4b-it + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.0 # Free model + completion: 0.0 + + - model_name: longcat-flash-chat-fp8-free + litellm_params: + model: chutesai/meituan-longcat/LongCat-Flash-Chat-FP8 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.0 # Free model + completion: 0.0 + + - model_name: longcat-flash-thinking-fp8-free + litellm_params: + model: chutesai/meituan-longcat/LongCat-Flash-Thinking-FP8 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.0 # Free model + completion: 0.0 + + - model_name: tongyi-deepresearch-30b-free + litellm_params: + model: chutesai/Alibaba-NLP/Tongyi-DeepResearch-30B-A3B + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.0 # Free model + completion: 0.0 + + # Budget Models (Low cost) + - model_name: llama-3_2-1b-instruct + litellm_params: + model: chutesai/unsloth/Llama-3.2-1B-Instruct + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.01 + + - model_name: llama-3_2-3b-instruct + litellm_params: + model: chutesai/unsloth/Llama-3.2-3B-Instruct + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.01 + + - model_name: gemma-2-9b-it + litellm_params: + model: chutesai/unsloth/gemma-2-9b-it + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.02 + + - model_name: gemmasutra-pro-27b + litellm_params: + model: chutesai/TheDrummer/Gemmasutra-Pro-27B-v1.1 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.03 + + - model_name: dolphin3-r1-mistral-24b + litellm_params: + model: chutesai/cognitivecomputations/Dolphin3.0-R1-Mistral-24B + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.03 + + # Mid-range Models + - model_name: gemma-3-12b-it + litellm_params: + model: chutesai/unsloth/gemma-3-12b-it + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.04 + completion: 0.14 + + - model_name: hermes-4-14b + litellm_params: + model: chutesai/NousResearch/Hermes-4-14B + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.05 + + - model_name: deephermes-3-llama-3-8b + litellm_params: + model: chutesai/NousResearch/DeepHermes-3-Llama-3-8B-Preview + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.05 + + - model_name: deepseek-r1-0528-qwen3-8b + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.01 + completion: 0.05 + + - model_name: mistral-nemo-instruct + litellm_params: + model: chutesai/unsloth/Mistral-Nemo-Instruct-2407 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.02 + completion: 0.07 + + - model_name: deepcoder-14b-preview + litellm_params: + model: chutesai/agentica-org/DeepCoder-14B-Preview + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.02 + completion: 0.07 + + - model_name: kimi-vl-a3b-thinking + litellm_params: + model: chutesai/moonshotai/Kimi-VL-A3B-Thinking + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.02 + completion: 0.07 + + - model_name: openhands-lm-32b + litellm_params: + model: chutesai/all-hands/openhands-lm-32b-v0.1-ep3 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.02 + completion: 0.07 + + - model_name: kimi-dev-72b + litellm_params: + model: chutesai/moonshotai/Kimi-Dev-72B + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.07 + completion: 0.26 + + - model_name: llama-3_3-nemotron-super-49b + litellm_params: + model: chutesai/nvidia/Llama-3_3-Nemotron-Super-49B-v1_5 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.07 + completion: 0.26 + + # High-end Models + - model_name: qwen3-30b-a3b-thinking + litellm_params: + model: chutesai/Qwen/Qwen3-30B-A3B-Thinking-2507 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.08 + completion: 0.29 + + - model_name: glm-4_5v + litellm_params: + model: chutesai/zai-org/GLM-4.5V + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.08 + completion: 0.33 + + - model_name: hunyuan-a13b-instruct + litellm_params: + model: chutesai/tencent/Hunyuan-A13B-Instruct + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.04 + completion: 0.14 + + - model_name: mistral-small-3_2-24b + litellm_params: + model: chutesai/chutesai/Mistral-Small-3.2-24B-Instruct-2506 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.14 + completion: 0.57 + + - model_name: qwen3-next-80b-a3b-thinking + litellm_params: + model: chutesai/Qwen/Qwen3-Next-80B-A3B-Thinking + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.1 + completion: 0.8 + + - model_name: seed-oss-36b-instruct + litellm_params: + model: chutesai/ByteDance-Seed/Seed-OSS-36B-Instruct + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.16 + completion: 0.65 + + - model_name: qwen3-vl-235b-a22b-thinking + litellm_params: + model: chutesai/Qwen/Qwen3-VL-235B-A22B-Thinking + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.16 + completion: 0.65 + + - model_name: deepseek-v3_1-base + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-V3.1-Base + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.25 + completion: 1.0 + + # Premium DeepSeek Models + - model_name: deepseek-r1 + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-R1 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.25 + completion: 1.0 + + - model_name: deepseek-v3 + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-V3 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.25 + completion: 1.0 + + - model_name: deepseek-v3_1 + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-V3.1 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.25 + completion: 1.0 + + - model_name: deepseek-r1-0528 + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-R1-0528 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.55 + completion: 1.75 + + - model_name: deepseek-v3_1-turbo + litellm_params: + model: chutesai/deepseek-ai/DeepSeek-V3.1-turbo + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 1.0 + completion: 3.0 + + - model_name: hermes-4-405b-fp8 + litellm_params: + model: chutesai/NousResearch/Hermes-4-405B-FP8 + api_base: https://api.chutesai.com/v1 + api_key: os.environ/CHUTESAI_API_KEY + pricing: + prompt: 0.25 + completion: 1.0 + +# General settings +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY # Set a master key for proxy auth + database_url: os.environ/LITELLM_DATABASE_URL # Database connection + database_logging: true + service_name: "chutesai-llm-proxy" + database_params: + # Use specific schema for this service + options: "-c search_path=chutesai,public" + +# Logging configuration +litellm_settings: + drop_params: true # Drop unsupported params instead of erroring + set_verbose: true + json_logs: true + request_timeout: 600 # 10 minutes timeout + +# Rate limiting and routing +router_settings: + enable_pre_call_checks: true + enable_admin_api: true + model_fallbacks: + # Fallback strategy: paid models fall back to free alternatives + default: ["glm-4_5-air-free", "openai-gpt-oss-20b-free", "gemma-3-4b-it-free", "longcat-flash-chat-fp8-free"] + routing_strategy: "least-busy" + +logging: + level: DEBUG + format: json + +# Health check configuration +health_check: + enable: true + endpoint: "/health" + +# Success/Error callbacks (optional) +# success_callback: ["langfuse"] # Track successful calls +# failure_callback: ["langfuse"] # Track failed calls + +# Custom provider settings for ChutesAI +provider_settings: + chutesai: + base_url: "https://api.chutesai.com/v1" + headers: + "User-Agent": "LiteLLM-ChutesAI/1.0" + rate_limit: + requests_per_minute: 1000 + tokens_per_minute: 100000 \ No newline at end of file diff --git a/infra/litellm/shared/docker-compose.shared.yml b/infra/litellm/shared/docker-compose.database.yml similarity index 100% rename from infra/litellm/shared/docker-compose.shared.yml rename to infra/litellm/shared/docker-compose.database.yml From 9a86b844480a532355638fb95c184cc37deb6744 Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Mon, 29 Sep 2025 15:28:53 +0800 Subject: [PATCH 04/10] feat: Update service configurations and documentation for LiteLLM providers --- .gitignore | 2 -- infra/litellm/README.md | 36 +++++++++++++++++++ .../chutesai/docker-compose.chutesai.yml | 2 +- infra/litellm/manage-all-services.sh | 12 +++---- .../docker-compose.nebulablock.yml | 8 ++--- .../openrouter/docker-compose.openrouter.yml | 25 +++---------- .../shared/init-scripts/01-init-schemas.sh | 10 ------ 7 files changed, 52 insertions(+), 43 deletions(-) create mode 100644 infra/litellm/README.md diff --git a/.gitignore b/.gitignore index 2a13895..b3fa34a 100644 --- a/.gitignore +++ b/.gitignore @@ -331,5 +331,3 @@ data/prompts/* .env.nebulablock .env.chutesai .env.openrouter -infra/litellm/openrouter/.env.openrouter -infra/litellm/openrouter/.env.openrouter diff --git a/infra/litellm/README.md b/infra/litellm/README.md new file mode 100644 index 0000000..aa24687 --- /dev/null +++ b/infra/litellm/README.md @@ -0,0 +1,36 @@ +# AIMO Multi-Provider LLM Services Manager + +This script provides unified management for all LLM provider services and shared infrastructure (database, monitoring) in the AIMO project. + +## Usage + +```bash +./manage-all-services.sh {start|stop|restart|status|logs|test} [service] +``` + +- `start [service]` Start all, shared, or a specific provider service +- `stop [service]` Stop all, shared, or a specific provider service +- `restart [service]` Restart all, shared, or a specific provider service +- `status` Show status and health of all services +- `logs [service]` Show logs for all, shared, or a specific provider +- `test` Test health and endpoints of all services + +## Examples + +```bash +./manage-all-services.sh start # Start all services +./manage-all-services.sh start shared # Start only shared infrastructure +./manage-all-services.sh start openrouter # Start only OpenRouter service +./manage-all-services.sh stop all # Stop all services +./manage-all-services.sh status # Show service status +./manage-all-services.sh logs nebulablock # Show Nebula Block logs +./manage-all-services.sh test # Test all services +``` + +## Notes + +- Services managed: openrouter, nebulablock, chutesai +- Shared infrastructure includes database, Redis, Prometheus, Grafana +- Requires Docker and docker-compose installed + +--- \ No newline at end of file diff --git a/infra/litellm/chutesai/docker-compose.chutesai.yml b/infra/litellm/chutesai/docker-compose.chutesai.yml index 5e68336..ece7f8c 100644 --- a/infra/litellm/chutesai/docker-compose.chutesai.yml +++ b/infra/litellm/chutesai/docker-compose.chutesai.yml @@ -3,7 +3,7 @@ services: image: ghcr.io/berriai/litellm:main-latest container_name: litellm-chutesai ports: - - "4004:4000" # Different port to avoid conflicts with other providers + - "4003:4000" volumes: - ./chutesai_config.yaml:/app/config.yaml - ./data:/data diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh index 8e1f391..696b0f6 100644 --- a/infra/litellm/manage-all-services.sh +++ b/infra/litellm/manage-all-services.sh @@ -7,7 +7,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SHARED_DIR="$SCRIPT_DIR/shared" -PROVIDERS=("openrouter" "nebulablock" "phala" "chutesai") +PROVIDERS=("openrouter" "nebulablock" "chutesai") log() { echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" @@ -143,8 +143,8 @@ show_status() { fi # Check providers - ports=(4001 4002 4003 4004) - provider_names=("openrouter" "nebulablock" "phala" "chutesai") + ports=(4001 4002 4004) + provider_names=("openrouter" "nebulablock" "chutesai") for i in "${!ports[@]}"; do port=${ports[$i]} @@ -193,8 +193,8 @@ test_all_services() { fi # Test provider services - ports=(4001 4002 4003 4004) - provider_names=("openrouter" "nebulablock" "phala" "chutesai") + ports=(4001 4002 4004) + provider_names=("openrouter" "nebulablock" "chutesai") for i in "${!ports[@]}"; do port=${ports[$i]} @@ -285,7 +285,7 @@ case "${1:-}" in echo "Services:" echo " all - All services (default)" echo " shared - Shared infrastructure (database, monitoring)" - printf " %s\n" "${PROVIDERS[@]}" + printf " %s\n" "${PROVIDERS[@]}" echo "" echo "Examples:" echo " $0 start # Start all services" diff --git a/infra/litellm/nebulablock/docker-compose.nebulablock.yml b/infra/litellm/nebulablock/docker-compose.nebulablock.yml index 1965737..7e52595 100644 --- a/infra/litellm/nebulablock/docker-compose.nebulablock.yml +++ b/infra/litellm/nebulablock/docker-compose.nebulablock.yml @@ -1,9 +1,9 @@ -Íservices: +services: litellm-nebulablock: image: ghcr.io/berriai/litellm:main-latest container_name: litellm-nebulablock ports: - - "4002:4000" # Different port to avoid conflicts with OpenRouter + - "4002:4000" volumes: - ./nebulablock_config.yaml:/app/config.yaml - ./data:/data @@ -18,8 +18,8 @@ retries: 3 start_period: 10s networks: - - aimo-llm-network # Use shared network + - aimo-llm-network networks: aimo-llm-network: - external: true # Reference external shared network \ No newline at end of file + external: true \ No newline at end of file diff --git a/infra/litellm/openrouter/docker-compose.openrouter.yml b/infra/litellm/openrouter/docker-compose.openrouter.yml index 4ef2742..5297368 100644 --- a/infra/litellm/openrouter/docker-compose.openrouter.yml +++ b/infra/litellm/openrouter/docker-compose.openrouter.yml @@ -3,16 +3,14 @@ services: image: ghcr.io/berriai/litellm:main-latest container_name: litellm-openrouter ports: - - "4000:4000" + - "4001:4000" volumes: - ./openrouter_config.yaml:/app/config.yaml - ./data:/data env_file: - - .env.litellm + - .env.openrouter command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"] restart: unless-stopped - depends_on: - - db healthcheck: test: ["CMD", "curl", "-f", "http://localhost:4000/health"] interval: 30s @@ -20,21 +18,8 @@ services: retries: 3 start_period: 10s networks: - - litellm-network - - db: - image: postgres:15 - container_name: litellm-db - restart: unless-stopped - environment: - POSTGRES_USER: litellm - POSTGRES_PASSWORD: litellm123 - POSTGRES_DB: litellm - volumes: - - ./pgdata:/var/lib/postgresql/data - networks: - - litellm-network + - aimo-llm-network networks: - litellm-network: - driver: bridge + aimo-llm-network: + external: true diff --git a/infra/litellm/shared/init-scripts/01-init-schemas.sh b/infra/litellm/shared/init-scripts/01-init-schemas.sh index 117a569..b99aeed 100755 --- a/infra/litellm/shared/init-scripts/01-init-schemas.sh +++ b/infra/litellm/shared/init-scripts/01-init-schemas.sh @@ -12,14 +12,12 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E -- Create schemas for different providers CREATE SCHEMA IF NOT EXISTS openrouter; CREATE SCHEMA IF NOT EXISTS nebulablock; - CREATE SCHEMA IF NOT EXISTS phala; CREATE SCHEMA IF NOT EXISTS chutesai; CREATE SCHEMA IF NOT EXISTS shared_analytics; -- Grant permissions GRANT ALL PRIVILEGES ON SCHEMA openrouter TO $POSTGRES_USER; GRANT ALL PRIVILEGES ON SCHEMA nebulablock TO $POSTGRES_USER; - GRANT ALL PRIVILEGES ON SCHEMA phala TO $POSTGRES_USER; GRANT ALL PRIVILEGES ON SCHEMA chutesai TO $POSTGRES_USER; GRANT ALL PRIVILEGES ON SCHEMA shared_analytics TO $POSTGRES_USER; @@ -41,14 +39,6 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E UNION ALL - SELECT - 'phala' as provider, - * - FROM phala.litellm_requestlogs - WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'phala' AND table_name = 'litellm_requestlogs') - - UNION ALL - SELECT 'chutesai' as provider, * From d77f4dc8c9d454ac49222d432fa81afd1257312b Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Tue, 30 Sep 2025 00:49:02 +0100 Subject: [PATCH 05/10] feat: Update ChutesAI model pricing and remove .env.openrouter file --- infra/litellm/chutesai/chutesai_config.yaml | 2 +- infra/litellm/openrouter/.env.openrouter | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) delete mode 100644 infra/litellm/openrouter/.env.openrouter diff --git a/infra/litellm/chutesai/chutesai_config.yaml b/infra/litellm/chutesai/chutesai_config.yaml index 4674430..e8fb2a2 100644 --- a/infra/litellm/chutesai/chutesai_config.yaml +++ b/infra/litellm/chutesai/chutesai_config.yaml @@ -6,7 +6,7 @@ model_list: api_base: https://api.chutesai.com/v1 api_key: os.environ/CHUTESAI_API_KEY pricing: - prompt: 0.0 # Free model + prompt: 0.0 completion: 0.0 - model_name: openai-gpt-oss-20b-free diff --git a/infra/litellm/openrouter/.env.openrouter b/infra/litellm/openrouter/.env.openrouter deleted file mode 100644 index 770eecd..0000000 --- a/infra/litellm/openrouter/.env.openrouter +++ /dev/null @@ -1,10 +0,0 @@ -# LiteLLM Proxy Configuration -LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0 - -# Model Provider API Keys (used by LiteLLM) -OPENROUTER_API_KEY=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 - -CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry - -# Database (Postgres) -LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm \ No newline at end of file From 9f8b5b52046e566bd8d25fb77b82b50eadacc253 Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Tue, 30 Sep 2025 02:48:31 +0100 Subject: [PATCH 06/10] feat: Update database service configuration and add Prometheus monitoring setup --- infra/litellm/manage-all-services.sh | 2 +- .../shared/docker-compose.database.yml | 5 +-- .../litellm/shared/monitoring/prometheus.yml | 31 +++++++++++++++++++ 3 files changed, 33 insertions(+), 5 deletions(-) mode change 100644 => 100755 infra/litellm/manage-all-services.sh create mode 100644 infra/litellm/shared/monitoring/prometheus.yml diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh old mode 100644 new mode 100755 index 696b0f6..7087a7f --- a/infra/litellm/manage-all-services.sh +++ b/infra/litellm/manage-all-services.sh @@ -24,7 +24,7 @@ start_shared_services() { log "πŸ—οΈ Starting shared infrastructure (database, monitoring)..." check_network cd "$SHARED_DIR" - docker-compose -f docker-compose.shared.yml up -d + docker-compose -f docker-compose.database.yml up -d # Wait for database to be ready log "⏳ Waiting for shared database to be ready..." diff --git a/infra/litellm/shared/docker-compose.database.yml b/infra/litellm/shared/docker-compose.database.yml index 5a0d566..6b4e695 100644 --- a/infra/litellm/shared/docker-compose.database.yml +++ b/infra/litellm/shared/docker-compose.database.yml @@ -99,7 +99,4 @@ volumes: networks: aimo-llm-network: - driver: bridge - ipam: - config: - - subnet: 172.20.0.0/16 \ No newline at end of file + external: true \ No newline at end of file diff --git a/infra/litellm/shared/monitoring/prometheus.yml b/infra/litellm/shared/monitoring/prometheus.yml new file mode 100644 index 0000000..7e56a62 --- /dev/null +++ b/infra/litellm/shared/monitoring/prometheus.yml @@ -0,0 +1,31 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'litellm-openrouter' + static_configs: + - targets: ['host.docker.internal:4001'] + metrics_path: /metrics + + - job_name: 'litellm-nebulablock' + static_configs: + - targets: ['host.docker.internal:4002'] + metrics_path: /metrics + + - job_name: 'litellm-chutesai' + static_configs: + - targets: ['host.docker.internal:4004'] + metrics_path: /metrics + + - job_name: 'postgres' + static_configs: + - targets: ['aimo-shared-db:5432'] + + - job_name: 'redis' + static_configs: + - targets: ['aimo-shared-redis:6379'] From 15f38716d901d682b183e5bea4e1b637576738eb Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Tue, 30 Sep 2025 22:13:55 +0100 Subject: [PATCH 07/10] feat: Enhance healthcheck for services to include authorization support --- .../chutesai/docker-compose.chutesai.yml | 2 +- infra/litellm/manage-all-services.sh | 123 ++++++++++++++++-- .../docker-compose.nebulablock.yml | 2 +- .../openrouter/docker-compose.openrouter.yml | 2 +- 4 files changed, 112 insertions(+), 17 deletions(-) diff --git a/infra/litellm/chutesai/docker-compose.chutesai.yml b/infra/litellm/chutesai/docker-compose.chutesai.yml index ece7f8c..69811c4 100644 --- a/infra/litellm/chutesai/docker-compose.chutesai.yml +++ b/infra/litellm/chutesai/docker-compose.chutesai.yml @@ -12,7 +12,7 @@ services: command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"] restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:4000/health"] + test: ["CMD-SHELL", "if [ -n \"$${LITELLM_MASTER_KEY}\" ]; then curl -sf -H \"Authorization: Bearer $${LITELLM_MASTER_KEY}\" http://localhost:4000/health; else curl -sf http://localhost:4000/health; fi"] interval: 30s timeout: 10s retries: 3 diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh index 7087a7f..5cd4eb6 100755 --- a/infra/litellm/manage-all-services.sh +++ b/infra/litellm/manage-all-services.sh @@ -24,7 +24,20 @@ start_shared_services() { log "πŸ—οΈ Starting shared infrastructure (database, monitoring)..." check_network cd "$SHARED_DIR" - docker-compose -f docker-compose.database.yml up -d + # support either docker-compose.database.yml or docker-compose.shared.yml + compose_file="" + if [ -f docker-compose.database.yml ]; then + compose_file="docker-compose.database.yml" + elif [ -f docker-compose.shared.yml ]; then + compose_file="docker-compose.shared.yml" + fi + + if [ -z "$compose_file" ]; then + log "❌ No shared docker-compose file found in $SHARED_DIR" + return 1 + fi + + docker-compose -f "$compose_file" up -d # Wait for database to be ready log "⏳ Waiting for shared database to be ready..." @@ -47,7 +60,19 @@ start_shared_services() { stop_shared_services() { log "πŸ›‘ Stopping shared infrastructure..." cd "$SHARED_DIR" - docker-compose -f docker-compose.shared.yml down + compose_file="" + if [ -f docker-compose.shared.yml ]; then + compose_file="docker-compose.shared.yml" + elif [ -f docker-compose.database.yml ]; then + compose_file="docker-compose.database.yml" + fi + + if [ -z "$compose_file" ]; then + log "⚠️ No shared docker-compose file found in $SHARED_DIR, skipping" + return 0 + fi + + docker-compose -f "$compose_file" down } start_provider() { @@ -143,17 +168,54 @@ show_status() { fi # Check providers - ports=(4001 4002 4004) + ports=(4001 4002 4003) provider_names=("openrouter" "nebulablock" "chutesai") - + + # helper: find LITELLM_MASTER_KEY for a provider from .env files or environment + get_provider_master_key() { + local provider=$1 + local key="" + # look for .env* files in the provider dir + for f in "$SCRIPT_DIR/$provider"/.env*; do + if [ -f "$f" ]; then + key=$(grep -E '^\s*LITELLM_MASTER_KEY=' "$f" 2>/dev/null | head -n1 | cut -d'=' -f2- | tr -d '\r' | tr -d '"') + if [ -n "$key" ]; then + echo "$key" + return 0 + fi + fi + done + + # fallback: check env var named LITELLM_MASTER_KEY_ + local up=$(echo "$provider" | tr '[:lower:]' '[:upper:]') + local varname="LITELLM_MASTER_KEY_$up" + eval val=\$$varname + if [ -n "$val" ]; then + echo "$val" + return 0 + fi + + # nothing found + echo "" + } + for i in "${!ports[@]}"; do port=${ports[$i]} name=${provider_names[$i]} - - if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then - log "βœ… $name (port $port): healthy" + + key=$(get_provider_master_key "$name") + if [ -n "$key" ]; then + if curl -sf -H "Authorization: Bearer $key" "http://localhost:$port/health" >/dev/null 2>&1; then + log "βœ… $name (port $port): healthy" + else + log "❌ $name (port $port): unhealthy or not running" + fi else - log "❌ $name (port $port): unhealthy or not running" + if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then + log "βœ… $name (port $port): healthy" + else + log "❌ $name (port $port): unhealthy or not running (no master key found)" + fi fi done } @@ -193,7 +255,7 @@ test_all_services() { fi # Test provider services - ports=(4001 4002 4004) + ports=(4001 4002 4003) provider_names=("openrouter" "nebulablock" "chutesai") for i in "${!ports[@]}"; do @@ -202,14 +264,47 @@ test_all_services() { log "Testing $name service on port $port..." - if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then + # include Authorization header if provider has a master key configured + key="" + for f in "$SCRIPT_DIR/$name"/.env*; do + if [ -f "$f" ]; then + key=$(grep -E '^\s*LITELLM_MASTER_KEY=' "$f" 2>/dev/null | head -n1 | cut -d'=' -f2- | tr -d '\r' | tr -d '"') + if [ -n "$key" ]; then + break + fi + fi + done + + if [ -n "$key" ]; then + if curl -sf -H "Authorization: Bearer $key" "http://localhost:$port/health" >/dev/null 2>&1; then + ok=1 + else + ok=0 + fi + else + if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then + ok=1 + else + ok=0 + fi + fi + + if [ "$ok" -eq 1 ]; then log "βœ… $name health check passed" - + # Test models endpoint - if curl -sf "http://localhost:$port/v1/models" >/dev/null 2>&1; then - log "βœ… $name models endpoint accessible" + if [ -n "$key" ]; then + if curl -sf -H "Authorization: Bearer $key" "http://localhost:$port/v1/models" >/dev/null 2>&1; then + log "βœ… $name models endpoint accessible" + else + log "⚠️ $name models endpoint not accessible" + fi else - log "⚠️ $name models endpoint not accessible" + if curl -sf "http://localhost:$port/v1/models" >/dev/null 2>&1; then + log "βœ… $name models endpoint accessible" + else + log "⚠️ $name models endpoint not accessible" + fi fi else log "❌ $name service not responding" diff --git a/infra/litellm/nebulablock/docker-compose.nebulablock.yml b/infra/litellm/nebulablock/docker-compose.nebulablock.yml index 7e52595..cea21ee 100644 --- a/infra/litellm/nebulablock/docker-compose.nebulablock.yml +++ b/infra/litellm/nebulablock/docker-compose.nebulablock.yml @@ -12,7 +12,7 @@ services: command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"] restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:4000/health"] + test: ["CMD-SHELL", "if [ -n \"$${LITELLM_MASTER_KEY}\" ]; then curl -sf -H \"Authorization: Bearer $${LITELLM_MASTER_KEY}\" http://localhost:4000/health; else curl -sf http://localhost:4000/health; fi"] interval: 30s timeout: 10s retries: 3 diff --git a/infra/litellm/openrouter/docker-compose.openrouter.yml b/infra/litellm/openrouter/docker-compose.openrouter.yml index 5297368..bcb52c4 100644 --- a/infra/litellm/openrouter/docker-compose.openrouter.yml +++ b/infra/litellm/openrouter/docker-compose.openrouter.yml @@ -12,7 +12,7 @@ services: command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"] restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:4000/health"] + test: ["CMD-SHELL", "if [ -n \"$${LITELLM_MASTER_KEY}\" ]; then curl -sf -H \"Authorization: Bearer $${LITELLM_MASTER_KEY}\" http://localhost:4000/health; else curl -sf http://localhost:4000/health; fi"] interval: 30s timeout: 10s retries: 3 From de8af37a5dd51e794dcfe66b51153151c77690c9 Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Tue, 30 Sep 2025 22:26:11 +0100 Subject: [PATCH 08/10] feat: Migrate from docker-compose to docker compose for service management --- infra/litellm/manage-all-services.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh index 5cd4eb6..7be0a7d 100755 --- a/infra/litellm/manage-all-services.sh +++ b/infra/litellm/manage-all-services.sh @@ -37,7 +37,7 @@ start_shared_services() { return 1 fi - docker-compose -f "$compose_file" up -d + docker compose -f "$compose_file" up -d # Wait for database to be ready log "⏳ Waiting for shared database to be ready..." @@ -72,7 +72,7 @@ stop_shared_services() { return 0 fi - docker-compose -f "$compose_file" down + docker compose -f "$compose_file" down } start_provider() { @@ -99,7 +99,7 @@ start_provider() { return 1 fi - docker-compose -f "$compose_file" up -d + docker compose -f "$compose_file" up -d log "βœ… $provider service started" } @@ -122,7 +122,7 @@ stop_provider() { done if [ -n "$compose_file" ]; then - docker-compose -f "$compose_file" down + docker compose -f "$compose_file" down fi log "βœ… $provider service stopped" } @@ -224,7 +224,7 @@ show_logs() { local service=$1 if [ "$service" = "shared" ]; then cd "$SHARED_DIR" - docker-compose -f docker-compose.shared.yml logs -f + docker compose -f docker-compose.shared.yml logs -f elif [ -n "$service" ] && [ -d "$SCRIPT_DIR/$service" ]; then cd "$SCRIPT_DIR/$service" compose_file="" @@ -236,7 +236,7 @@ show_logs() { done if [ -n "$compose_file" ]; then - docker-compose -f "$compose_file" logs -f + docker compose -f "$compose_file" logs -f fi else log "πŸ“‹ Showing logs for all services (press Ctrl+C to exit):" From 0dc62871cf26266e6ec37200fae1a1c31ec2eb5e Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Wed, 1 Oct 2025 04:12:17 +0100 Subject: [PATCH 09/10] feat: Add configuration files for ChutesAI, NebulaBlock, and OpenRouter models --- .../chutesai/proxy/proxy.chutesai.toml | 342 ++++++++++++++++++ .../nebulablock/proxy/proxy.nebulablock.toml | 165 +++++++++ .../openrouter/proxy/proxy.openrouter.toml | 181 +++++++++ 3 files changed, 688 insertions(+) create mode 100644 infra/litellm/chutesai/proxy/proxy.chutesai.toml create mode 100644 infra/litellm/nebulablock/proxy/proxy.nebulablock.toml create mode 100644 infra/litellm/openrouter/proxy/proxy.openrouter.toml diff --git a/infra/litellm/chutesai/proxy/proxy.chutesai.toml b/infra/litellm/chutesai/proxy/proxy.chutesai.toml new file mode 100644 index 0000000..c037605 --- /dev/null +++ b/infra/litellm/chutesai/proxy/proxy.chutesai.toml @@ -0,0 +1,342 @@ +[router] +url = "http://localhost:8001" +api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg" + +[endpoint] +url = "http://127.0.0.1:4001/v1/chat/completions" +api-key = "sk-EyO5DwID9Sm_WwzJawRPug" + +[metadata] +id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq" +name = "AiMo Network - ChutesAI" +category = "completion_model" + +# ------------------------------- +# Free Models (0.0 pricing) +# ------------------------------- +[[metadata.models]] +name = "glm-4_5-air-free" +display_name = "GLM 4.5 Air" +provider_name = "zhipu" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "openai-gpt-oss-20b-free" +display_name = "GPT-OSS 20B" +provider_name = "openai" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "gemma-3-4b-it-free" +display_name = "Gemma 3 4B IT" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "longcat-flash-chat-fp8-free" +display_name = "LongCat Flash Chat FP8" +provider_name = "meituan" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "longcat-flash-thinking-fp8-free" +display_name = "LongCat Flash Thinking FP8" +provider_name = "meituan" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "tongyi-deepresearch-30b-free" +display_name = "Tongyi DeepResearch 30B" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +# ------------------------------- +# Budget Models (Low cost) +# ------------------------------- +[[metadata.models]] +name = "llama-3_2-1b-instruct" +display_name = "LLaMA 3.2 1B Instruct" +provider_name = "meta" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 10 + +[[metadata.models]] +name = "llama-3_2-3b-instruct" +display_name = "LLaMA 3.2 3B Instruct" +provider_name = "meta" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 10 + +[[metadata.models]] +name = "gemma-2-9b-it" +display_name = "Gemma 2 9B IT" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 20 + +[[metadata.models]] +name = "gemmasutra-pro-27b" +display_name = "Gemmasutra Pro 27B" +provider_name = "thedrummer" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 30 + +[[metadata.models]] +name = "dolphin3-r1-mistral-24b" +display_name = "Dolphin 3.0 R1 Mistral 24B" +provider_name = "cognitivecomputations" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 30 + +# ------------------------------- +# Mid-range Models +# ------------------------------- +[[metadata.models]] +name = "gemma-3-12b-it" +display_name = "Gemma 3 12B IT" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 40 +output_price = 140 + +[[metadata.models]] +name = "hermes-4-14b" +display_name = "Hermes 4 14B" +provider_name = "nousresearch" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 50 + +[[metadata.models]] +name = "deephermes-3-llama-3-8b" +display_name = "DeepHermes 3 LLaMA 3 8B" +provider_name = "nousresearch" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 50 + +[[metadata.models]] +name = "deepseek-r1-0528-qwen3-8b" +display_name = "DeepSeek R1 0528 Qwen3 8B" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 10 +output_price = 50 + +[[metadata.models]] +name = "mistral-nemo-instruct" +display_name = "Mistral Nemo Instruct" +provider_name = "mistral" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 20 +output_price = 70 + +[[metadata.models]] +name = "deepcoder-14b-preview" +display_name = "DeepCoder 14B Preview" +provider_name = "agentica" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 20 +output_price = 70 + +[[metadata.models]] +name = "kimi-vl-a3b-thinking" +display_name = "Kimi VL A3B Thinking" +provider_name = "moonshot" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 20 +output_price = 70 + +[[metadata.models]] +name = "openhands-lm-32b" +display_name = "OpenHands LM 32B" +provider_name = "allhands" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 20 +output_price = 70 + +[[metadata.models]] +name = "kimi-dev-72b" +display_name = "Kimi Dev 72B" +provider_name = "moonshot" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 70 +output_price = 260 + +[[metadata.models]] +name = "llama-3_3-nemotron-super-49b" +display_name = "LLaMA 3.3 Nemotron Super 49B" +provider_name = "nvidia" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 70 +output_price = 260 + +# ------------------------------- +# High-end Models +# ------------------------------- +[[metadata.models]] +name = "qwen3-30b-a3b-thinking" +display_name = "Qwen3 30B A3B Thinking" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 80 +output_price = 290 + +[[metadata.models]] +name = "glm-4_5v" +display_name = "GLM 4.5V" +provider_name = "zhipu" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 80 +output_price = 330 + +[[metadata.models]] +name = "hunyuan-a13b-instruct" +display_name = "Hunyuan A13B Instruct" +provider_name = "tencent" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 40 +output_price = 140 + +[[metadata.models]] +name = "mistral-small-3_2-24b" +display_name = "Mistral Small 3.2 24B" +provider_name = "mistral" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 140 +output_price = 570 + +[[metadata.models]] +name = "qwen3-next-80b-a3b-thinking" +display_name = "Qwen3 Next 80B A3B Thinking" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 100 +output_price = 800 + +[[metadata.models]] +name = "seed-oss-36b-instruct" +display_name = "Seed OSS 36B Instruct" +provider_name = "bytedance" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 160 +output_price = 650 + +[[metadata.models]] +name = "qwen3-vl-235b-a22b-thinking" +display_name = "Qwen3 VL 235B A22B Thinking" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 160 +output_price = 650 + +[[metadata.models]] +name = "deepseek-v3_1-base" +display_name = "DeepSeek V3.1 Base" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 250 +output_price = 1000 + +# ------------------------------- +# Premium DeepSeek Models +# ------------------------------- +[[metadata.models]] +name = "deepseek-r1" +display_name = "DeepSeek R1" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 250 +output_price = 1000 + +[[metadata.models]] +name = "deepseek-v3" +display_name = "DeepSeek V3" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 250 +output_price = 1000 + +[[metadata.models]] +name = "deepseek-v3_1" +display_name = "DeepSeek V3.1" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 250 +output_price = 1000 + +[[metadata.models]] +name = "deepseek-r1-0528" +display_name = "DeepSeek R1 0528" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 550 +output_price = 1750 + +[[metadata.models]] +name = "deepseek-v3_1-turbo" +display_name = "DeepSeek V3.1 Turbo" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 1000 +output_price = 3000 + +[[metadata.models]] +name = "hermes-4-405b-fp8" +display_name = "Hermes 4 405B FP8" +provider_name = "nousresearch" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 250 +output_price = 1000 diff --git a/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml new file mode 100644 index 0000000..6a373ed --- /dev/null +++ b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml @@ -0,0 +1,165 @@ +[router] +url = "http://localhost:8002" +api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg" + +[endpoint] +url = "http://127.0.0.1:4002/v1/chat/completions" +api-key = "sk-EyO5DwID9Sm_WwzJawRPug" + +[metadata] +id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq" +name = "AiMo Network - NebulaBlock" +category = "completion_model" + +# ------------------------------- +# OpenAI Models +# ------------------------------- +[[metadata.models]] +name = "gpt-4o-mini" +display_name = "GPT-4o Mini" +provider_name = "openai" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 400 +output_price = 1600 + +# ------------------------------- +# Google Gemini Models +# ------------------------------- +[[metadata.models]] +name = "gemini-2_5-pro" +display_name = "Gemini 2.5 Pro" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 1000 +output_price = 8000 + +[[metadata.models]] +name = "gemini-2_5-flash" +display_name = "Gemini 2.5 Flash" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 240 +output_price = 2000 + +[[metadata.models]] +name = "gemini-2_5-flash-lite" +display_name = "Gemini 2.5 Flash Lite" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 80 +output_price = 320 + +[[metadata.models]] +name = "gemini-2_0-flash" +display_name = "Gemini 2.0 Flash" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 80 +output_price = 320 + +[[metadata.models]] +name = "gemini-2_0-flash-lite" +display_name = "Gemini 2.0 Flash Lite" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 60 +output_price = 240 + +# ------------------------------- +# Community Models (Free) +# ------------------------------- +[[metadata.models]] +name = "l3-ms-nevoria-70b" +display_name = "L3.3 MS Nevoria 70B" +provider_name = "steelskull" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "mistral-small-free" +display_name = "Mistral Small 3.2 24B" +provider_name = "mistral" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "l3-70b-euryale" +display_name = "L3 70B Euryale v2.1" +provider_name = "sao10k" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "l3-8b-stheno" +display_name = "L3 8B Stheno v3.2" +provider_name = "sao10k" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +# ------------------------------- +# DeepSeek Models (Free) +# ------------------------------- +[[metadata.models]] +name = "deepseek-r1-0528-free" +display_name = "DeepSeek R1 0528" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "deepseek-v3-0324-free" +display_name = "DeepSeek V3 0324" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +[[metadata.models]] +name = "deepseek-r1-free" +display_name = "DeepSeek R1" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +# ------------------------------- +# Meta Llama Models +# ------------------------------- +[[metadata.models]] +name = "llama-3_3-70b" +display_name = "LLaMA 3.3 70B Instruct" +provider_name = "meta" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 100 +output_price = 300 + +# ------------------------------- +# Qwen Models +# ------------------------------- +[[metadata.models]] +name = "qwq-32b" +display_name = "QwQ 32B" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 150 +output_price = 400 diff --git a/infra/litellm/openrouter/proxy/proxy.openrouter.toml b/infra/litellm/openrouter/proxy/proxy.openrouter.toml new file mode 100644 index 0000000..69c44d7 --- /dev/null +++ b/infra/litellm/openrouter/proxy/proxy.openrouter.toml @@ -0,0 +1,181 @@ +[router] +url = "http://localhost:8000" +api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg" + +[endpoint] +url = "http://127.0.0.1:4000/v1/chat/completions" +api-key = "sk-EyO5DwID9Sm_WwzJawRPug" + +[metadata] +id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq" +name = "AiMo Network" +category = "completion_model" + +# ------------------------------- +# DeepSeek +# ------------------------------- +[[metadata.models]] +name = "deepseek-chat-v3" +display_name = "DeepSeek Chat V3 0324" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 200 +output_price = 800 + +[[metadata.models]] +name = "deepseek-chat-v3_1" +display_name = "DeepSeek Chat V3.1" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 200 +output_price = 800 + +[[metadata.models]] +name = "deepseek-r1" +display_name = "DeepSeek R1" +provider_name = "deepseek" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 200 +output_price = 800 + +# ------------------------------- +# OpenAI +# ------------------------------- + +[[metadata.models]] +name = "gpt-oss-20b-free" +display_name = "GPT-OSS 20B" +provider_name = "openai" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 72 +output_price = 280 + +# ------------------------------- +# Meta (LLaMA) +# ------------------------------- +[[metadata.models]] +name = "llama-3_3-70b-free" +display_name = "LLaMA 3.3 70B Instruct" +provider_name = "meta" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 38 +output_price = 120 + +# ------------------------------- +# Moonshot +# ------------------------------- +#[[metadata.models]] +#name = "kimi-k2-free" +#display_name = "Kimi K2" +#provider_name = "moonshot" +#[[metadata.models.pricing]] +#token = "USDC_9" +#input_price = 140 +#output_price = 2490 + +# ------------------------------- +# Qwen +# ------------------------------- +[[metadata.models]] +name = "qwq-32b-free" +display_name = "QwQ 32B" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 75 +output_price = 150 + +[[metadata.models]] +name = "qwen3-235b-a22b" +display_name = "Qwen3 235B" +provider_name = "alibaba" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 75 +output_price = 150 + +# ------------------------------- +# Google +# ------------------------------- +[[metadata.models]] +name = "gemma-3-27b-free" +display_name = "Gemma 3 27B" +provider_name = "google" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 67 +output_price = 267 + +# ------------------------------- +# Mistral +# ------------------------------- +[[metadata.models]] +name = "mistral-small-free" +display_name = "Mistral Small 3.2 24B" +provider_name = "mistral" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 50 +output_price = 100 + +# ------------------------------- +# Venice (Dolphin) +# ------------------------------- +[[metadata.models]] +name = "dolphin-mistral-free" +display_name = "Venice Uncensored" +provider_name = "cognitivecomputations" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 0 +output_price = 0 + +# ------------------------------- +# xAI +# ------------------------------- +[[metadata.models]] +name = "grok-3-mini" +display_name = "Grok 3 Mini" +provider_name = "xai" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 300 +output_price = 500 + +# ------------------------------- +# Zhipu +# ------------------------------- +[[metadata.models]] +name = "glm-4_5" +display_name = "GLM 4.5" +provider_name = "zhipu" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 200 +output_price = 800 + +# ------------------------------- +# NousResearch +# ------------------------------- +[[metadata.models]] +name = "nousresearch-hermes-4-70b" +display_name = "Hermes 4 70B" +provider_name = "nousresearch" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 380 +output_price = 1200 + +[[metadata.models]] +name = "nousresearch-hermes-3-llama-3_1-70b" +display_name = "Hermes 3 70B" +provider_name = "nousresearch" +[[metadata.models.pricing]] +token = "USDC_9" +input_price = 93 +output_price = 373 \ No newline at end of file From 057c1c042a9b798653f5aa4f9bbd319c9b75802d Mon Sep 17 00:00:00 2001 From: Wes1eyyy Date: Thu, 2 Oct 2025 01:32:46 +0100 Subject: [PATCH 10/10] feat: Update proxy configuration files for ChutesAI, NebulaBlock, and OpenRouter models --- infra/litellm/chutesai/proxy/proxy.chutesai.toml | 8 ++++---- infra/litellm/nebulablock/proxy/proxy.nebulablock.toml | 8 ++++---- infra/litellm/openrouter/proxy/proxy.openrouter.toml | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/infra/litellm/chutesai/proxy/proxy.chutesai.toml b/infra/litellm/chutesai/proxy/proxy.chutesai.toml index c037605..0f914b6 100644 --- a/infra/litellm/chutesai/proxy/proxy.chutesai.toml +++ b/infra/litellm/chutesai/proxy/proxy.chutesai.toml @@ -1,13 +1,13 @@ [router] -url = "http://localhost:8001" -api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg" +url = "http://localhost:8000" +api-key = "aimo-sk-dev-key" [endpoint] url = "http://127.0.0.1:4001/v1/chat/completions" -api-key = "sk-EyO5DwID9Sm_WwzJawRPug" +api-key = "sk-key" [metadata] -id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq" +id = "solana id" name = "AiMo Network - ChutesAI" category = "completion_model" diff --git a/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml index 6a373ed..481971c 100644 --- a/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml +++ b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml @@ -1,13 +1,13 @@ [router] -url = "http://localhost:8002" -api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg" +url = "http://localhost:8000" +api-key = "aimo-sk-dev-key" [endpoint] url = "http://127.0.0.1:4002/v1/chat/completions" -api-key = "sk-EyO5DwID9Sm_WwzJawRPug" +api-key = "api-key" [metadata] -id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq" +id = "solana id" name = "AiMo Network - NebulaBlock" category = "completion_model" diff --git a/infra/litellm/openrouter/proxy/proxy.openrouter.toml b/infra/litellm/openrouter/proxy/proxy.openrouter.toml index 69c44d7..4897cd9 100644 --- a/infra/litellm/openrouter/proxy/proxy.openrouter.toml +++ b/infra/litellm/openrouter/proxy/proxy.openrouter.toml @@ -1,13 +1,13 @@ [router] url = "http://localhost:8000" -api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg" +api-key = "aimo-sk-dev-key" [endpoint] url = "http://127.0.0.1:4000/v1/chat/completions" -api-key = "sk-EyO5DwID9Sm_WwzJawRPug" +api-key = "sk-key" [metadata] -id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq" +id = "solana ID" name = "AiMo Network" category = "completion_model"