From 1e2617aeedaf8849de8dbbe8a0b436c12aacfb48 Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Sun, 28 Sep 2025 05:17:28 +0800
Subject: [PATCH 01/10] Add LiteLLM OpenRouter configuration files and Docker
 setup

- Created .env.openrouter for environment variables including API keys and database URL.
- Added docker-compose.openrouter.yml to define services for LiteLLM OpenRouter and PostgreSQL database.
- Introduced openrouter_config.yaml with model configurations and general settings for LiteLLM.
---
 infra/litellm/chutesai/.env.chutes            |    8 +
 infra/litellm/chutesai/textModelsList.txt     | 1796 +++++++++++++++++
 infra/litellm/openrouter/.env.openrouter      |   10 +
 .../docker-compose.openrouter.yml}            |    6 +-
 .../openrouter_config.yaml}                   |    0
 5 files changed, 1817 insertions(+), 3 deletions(-)
 create mode 100644 infra/litellm/chutesai/.env.chutes
 create mode 100644 infra/litellm/chutesai/textModelsList.txt
 create mode 100644 infra/litellm/openrouter/.env.openrouter
 rename infra/litellm/{docker-compose.litellm.yml => openrouter/docker-compose.openrouter.yml} (88%)
 rename infra/litellm/{litellm_config.yaml => openrouter/openrouter_config.yaml} (100%)

diff --git a/infra/litellm/chutesai/.env.chutes b/infra/litellm/chutesai/.env.chutes
new file mode 100644
index 0000000..5f24924
--- /dev/null
+++ b/infra/litellm/chutesai/.env.chutes
@@ -0,0 +1,8 @@
+# LiteLLM Proxy Configuration
+LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0
+
+# Model Provider API Keys (used by LiteLLM)
+CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry
+
+# Database (Postgres)
+LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm
\ No newline at end of file
diff --git a/infra/litellm/chutesai/textModelsList.txt b/infra/litellm/chutesai/textModelsList.txt
new file mode 100644
index 0000000..c2370b6
--- /dev/null
+++ b/infra/litellm/chutesai/textModelsList.txt
@@ -0,0 +1,1796 @@
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "unsloth/gemma-3-12b-it",
+      "root": "unsloth/gemma-3-12b-it",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 96000
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-R1",
+      "root": "deepseek-ai/DeepSeek-R1",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "NousResearch/Hermes-4-405B-FP8",
+      "root": "NousResearch/Hermes-4-405B-FP8",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3-0324",
+      "root": "deepseek-ai/DeepSeek-V3-0324",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3.1",
+      "root": "deepseek-ai/DeepSeek-V3.1",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-R1-0528",
+      "root": "deepseek-ai/DeepSeek-R1-0528",
+      "price": {
+        "input": {
+          "tao": 0.0013427045157166921,
+          "usd": 0.4
+        },
+        "output": {
+          "tao": 0.005874332256260527,
+          "usd": 1.75
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.4,
+        "completion": 1.75
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "zai-org/GLM-4.5-Air",
+      "root": "zai-org/GLM-4.5-Air",
+      "price": {
+        "input": {
+          "tao": 0.0,
+          "usd": 0.0
+        },
+        "output": {
+          "tao": 0.0,
+          "usd": 0.0
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.0,
+        "completion": 0.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "NousResearch/DeepHermes-3-Llama-3-8B-Preview",
+      "root": "NousResearch/DeepHermes-3-Llama-3-8B-Preview",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.00016783806446458652,
+          "usd": 0.05
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.05
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-85501f5e49754c08a052a755b67e9797",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999329,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 131072
+    },
+    {
+      "id": "unsloth/Llama-3.2-3B-Instruct",
+      "root": "unsloth/Llama-3.2-3B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.01
+      },
+      "owned_by": "sglang",
+      "max_model_len": 16384
+    },
+    {
+      "id": "Qwen/Qwen3-32B",
+      "root": "Qwen/Qwen3-32B",
+      "price": {
+        "input": {
+          "tao": 0.0001007028386787519,
+          "usd": 0.03
+        },
+        "output": {
+          "tao": 0.0004363789676079249,
+          "usd": 0.13
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.03,
+        "completion": 0.13
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3",
+      "root": "deepseek-ai/DeepSeek-V3",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "zai-org/GLM-4.5-FP8",
+      "root": "zai-org/GLM-4.5-FP8",
+      "price": {
+        "input": {
+          "tao": 0.0013762721286096093,
+          "usd": 0.41
+        },
+        "output": {
+          "tao": 0.005538656127331354,
+          "usd": 1.65
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.41,
+        "completion": 1.65
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "Qwen/Qwen3-14B",
+      "root": "Qwen/Qwen3-14B",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview",
+      "root": "NousResearch/DeepHermes-3-Mistral-24B-Preview",
+      "price": {
+        "input": {
+          "tao": 0.0004363789676079249,
+          "usd": 0.13
+        },
+        "output": {
+          "tao": 0.0017119482575387822,
+          "usd": 0.51
+        }
+      },
+      "object": "model",
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.13,
+        "completion": 0.51
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "NousResearch/Hermes-4-70B",
+      "root": "NousResearch/Hermes-4-70B",
+      "price": {
+        "input": {
+          "tao": 0.0003692437418220903,
+          "usd": 0.11
+        },
+        "output": {
+          "tao": 0.0012755692899308574,
+          "usd": 0.38
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.11,
+        "completion": 0.38
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+      "root": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.00016783806446458652,
+          "usd": 0.05
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.05
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
+      "root": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
+      "price": {
+        "input": {
+          "tao": 0.0007384874836441806,
+          "usd": 0.22
+        },
+        "output": {
+          "tao": 0.003188923224827143,
+          "usd": 0.95
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.22,
+        "completion": 0.95
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "unsloth/Mistral-Small-24B-Instruct-2501",
+      "root": "unsloth/Mistral-Small-24B-Instruct-2501",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0005035141933937595,
+          "usd": 0.15
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.15
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
+      "root": "Qwen/Qwen3-235B-A22B-Instruct-2507",
+      "price": {
+        "input": {
+          "tao": 0.00033567612892917303,
+          "usd": 0.1
+        },
+        "output": {
+          "tao": 0.0013091369028237747,
+          "usd": 0.39
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.1,
+        "completion": 0.39
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "tngtech/DeepSeek-TNG-R1T2-Chimera",
+      "root": "tngtech/DeepSeek-TNG-R1T2-Chimera",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "tngtech/DeepSeek-R1T-Chimera",
+      "root": "tngtech/DeepSeek-R1T-Chimera",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "Tesslate/UIGEN-X-32B-0727",
+      "root": "Tesslate/UIGEN-X-32B-0727",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "Qwen/Qwen2.5-72B-Instruct",
+      "root": "Qwen/Qwen2.5-72B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0008727579352158498,
+          "usd": 0.26
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.26
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506",
+      "root": "chutesai/Mistral-Small-3.2-24B-Instruct-2506",
+      "price": {
+        "input": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        },
+        "output": {
+          "tao": 0.0019133539348962858,
+          "usd": 0.57
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.14,
+        "completion": 0.57
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "moonshotai/Kimi-K2-Instruct-0905",
+      "root": "moonshotai/Kimi-K2-Instruct-0905",
+      "price": {
+        "input": {
+          "tao": 0.0013427045157166921,
+          "usd": 0.4
+        },
+        "output": {
+          "tao": 0.0075527129009063925,
+          "usd": 2.25
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.4,
+        "completion": 2.25
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "zai-org/GLM-4-32B-0414",
+      "root": "zai-org/GLM-4-32B-0414",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3.1-Terminus",
+      "root": "deepseek-ai/DeepSeek-V3.1-Terminus",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+      "root": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+      "price": {
+        "input": {
+          "tao": 0.0001007028386787519,
+          "usd": 0.03
+        },
+        "output": {
+          "tao": 0.0004363789676079249,
+          "usd": 0.13
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.03,
+        "completion": 0.13
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "microsoft/MAI-DS-R1-FP8",
+      "root": "microsoft/MAI-DS-R1-FP8",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "unsloth/gemma-3-27b-it",
+      "root": "unsloth/gemma-3-27b-it",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0008727579352158498,
+          "usd": 0.26
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.26
+      },
+      "owned_by": "sglang",
+      "max_model_len": 96000
+    },
+    {
+      "id": "openai/gpt-oss-120b",
+      "root": "openai/gpt-oss-120b",
+      "price": {
+        "input": {
+          "tao": 0.00016783806446458652,
+          "usd": 0.05
+        },
+        "output": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.05,
+        "completion": 0.25
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "OpenGVLab/InternVL3-78B",
+      "root": "OpenGVLab/InternVL3-78B",
+      "price": {
+        "input": {
+          "tao": 0.0001007028386787519,
+          "usd": 0.03
+        },
+        "output": {
+          "tao": 0.0004363789676079249,
+          "usd": 0.13
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.03,
+        "completion": 0.13
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "TheDrummer/Gemmasutra-Pro-27B-v1.1",
+      "root": "TheDrummer/Gemmasutra-Pro-27B-v1.1",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.0001007028386787519,
+          "usd": 0.03
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.03
+      },
+      "owned_by": "sglang",
+      "max_model_len": 8192
+    },
+    {
+      "id": "Qwen/Qwen2.5-Coder-32B-Instruct",
+      "root": "Qwen/Qwen2.5-Coder-32B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
+      "root": "Qwen/Qwen3-235B-A22B-Thinking-2507",
+      "price": {
+        "input": {
+          "tao": 0.00033567612892917303,
+          "usd": 0.1
+        },
+        "output": {
+          "tao": 0.0013091369028237747,
+          "usd": 0.39
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.1,
+        "completion": 0.39
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "Qwen/Qwen3-235B-A22B",
+      "root": "Qwen/Qwen3-235B-A22B",
+      "price": {
+        "input": {
+          "tao": 0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+      "root": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0009398931610016844,
+          "usd": 0.28
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.28
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "TheDrummer/Tunguska-39B-v1",
+      "root": "TheDrummer/Tunguska-39B-v1",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0005370818062866768,
+          "usd": 0.16
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.16
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "unsloth/Mistral-Nemo-Instruct-2407",
+      "root": "unsloth/Mistral-Nemo-Instruct-2407",
+      "price": {
+        "input": {
+          "tao": 0.0000671352257858346,
+          "usd": 0.02
+        },
+        "output": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        }
+      },
+      "object": "model",
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.02,
+        "completion": 0.07
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "TheDrummer/Skyfall-36B-v2",
+      "root": "TheDrummer/Skyfall-36B-v2",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0005370818062866768,
+          "usd": 0.16
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.16
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+      "root": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0009398931610016844,
+          "usd": 0.28
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.28
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "Qwen/Qwen3-Next-80B-A3B-Instruct",
+      "root": "Qwen/Qwen3-Next-80B-A3B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.00033567612892917303,
+          "usd": 0.1
+        },
+        "output": {
+          "tao": 0.0026854090314333843,
+          "usd": 0.8
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.1,
+        "completion": 0.8
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "chutesai/Mistral-Small-3.1-24B-Instruct-2503",
+      "root": "chutesai/Mistral-Small-3.1-24B-Instruct-2503",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0005035141933937595,
+          "usd": 0.15
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.15
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "ArliAI/QwQ-32B-ArliAI-RpR-v1",
+      "root": "ArliAI/QwQ-32B-ArliAI-RpR-v1",
+      "price": {
+        "input": {
+          "tao": 0.0000671352257858346,
+          "usd": 0.02
+        },
+        "output": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.02,
+        "completion": 0.07
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "chutesai/Devstral-Small-2505",
+      "root": "chutesai/Devstral-Small-2505",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "Qwen/Qwen2.5-VL-72B-Instruct",
+      "root": "Qwen/Qwen2.5-VL-72B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0009398931610016844,
+          "usd": 0.28
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.28
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "Qwen/Qwen3-30B-A3B",
+      "root": "Qwen/Qwen3-30B-A3B",
+      "price": {
+        "input": {
+          "tao": 0.0002014056773575038,
+          "usd": 0.06
+        },
+        "output": {
+          "tao": 0.0007384874836441806,
+          "usd": 0.22
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.06,
+        "completion": 0.22
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "Qwen/Qwen2.5-VL-32B-Instruct",
+      "root": "Qwen/Qwen2.5-VL-32B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 16384
+    },
+    {
+      "id": "zai-org/GLM-Z1-32B-0414",
+      "root": "zai-org/GLM-Z1-32B-0414",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "zai-org/GLM-4.5-turbo",
+      "root": "zai-org/GLM-4.5-turbo",
+      "price": {
+        "input": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        },
+        "output": {
+          "tao": 0.01007028386787519,
+          "usd": 3.0
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 1.0,
+        "completion": 3.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "openai/gpt-oss-20b",
+      "root": "openai/gpt-oss-20b",
+      "price": {
+        "input": {
+          "tao": 0.0,
+          "usd": 0.0
+        },
+        "output": {
+          "tao": 0.0,
+          "usd": 0.0
+        }
+      },
+      "object": "model",
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.0,
+        "completion": 0.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "unsloth/gemma-2-9b-it",
+      "root": "unsloth/gemma-2-9b-it",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.0000671352257858346,
+          "usd": 0.02
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.02
+      },
+      "owned_by": "sglang",
+      "max_model_len": 8192
+    },
+    {
+      "id": "cognitivecomputations/Dolphin3.0-Mistral-24B",
+      "root": "cognitivecomputations/Dolphin3.0-Mistral-24B",
+      "price": {
+        "input": {
+          "tao": 0.0001007028386787519,
+          "usd": 0.03
+        },
+        "output": {
+          "tao": 0.0003692437418220903,
+          "usd": 0.11
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.03,
+        "completion": 0.11
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "Qwen/Qwen3-8B",
+      "root": "Qwen/Qwen3-8B",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "shisa-ai/shisa-v2-llama3.3-70b",
+      "root": "shisa-ai/shisa-v2-llama3.3-70b",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3.1-turbo",
+      "root": "deepseek-ai/DeepSeek-V3.1-turbo",
+      "price": {
+        "input": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        },
+        "output": {
+          "tao": 0.01007028386787519,
+          "usd": 3.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 1.0,
+        "completion": 3.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "moonshotai/Kimi-Dev-72B",
+      "root": "moonshotai/Kimi-Dev-72B",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0008727579352158498,
+          "usd": 0.26
+        }
+      },
+      "object": "model",
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.26
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3-0324-turbo",
+      "root": "deepseek-ai/DeepSeek-V3-0324-turbo",
+      "price": {
+        "input": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        },
+        "output": {
+          "tao": 0.01007028386787519,
+          "usd": 3.0
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 1.0,
+        "completion": 3.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "meituan-longcat/LongCat-Flash-Chat-FP8",
+      "root": "meituan-longcat/LongCat-Flash-Chat-FP8",
+      "price": {
+        "input": {
+          "tao": 0.0,
+          "usd": 0.0
+        },
+        "output": {
+          "tao": 0.0,
+          "usd": 0.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.0,
+        "completion": 0.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "agentica-org/DeepCoder-14B-Preview",
+      "root": "agentica-org/DeepCoder-14B-Preview",
+      "price": {
+        "input": {
+          "tao": 0.0000671352257858346,
+          "usd": 0.02
+        },
+        "output": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 0.02,
+        "completion": 0.07
+      },
+      "owned_by": "sglang",
+      "max_model_len": 96000
+    },
+    {
+      "id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
+      "root": "Qwen/Qwen3-Next-80B-A3B-Thinking",
+      "price": {
+        "input": {
+          "tao": 0.00033567612892917303,
+          "usd": 0.1
+        },
+        "output": {
+          "tao": 0.0026854090314333843,
+          "usd": 0.8
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.1,
+        "completion": 0.8
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "tencent/Hunyuan-A13B-Instruct",
+      "root": "tencent/Hunyuan-A13B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0001342704515716692,
+          "usd": 0.04
+        },
+        "output": {
+          "tao": 0.0004699465805008422,
+          "usd": 0.14
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.04,
+        "completion": 0.14
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "moonshotai/Kimi-VL-A3B-Thinking",
+      "root": "moonshotai/Kimi-VL-A3B-Thinking",
+      "price": {
+        "input": {
+          "tao": 0.0000671352257858346,
+          "usd": 0.02
+        },
+        "output": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.02,
+        "completion": 0.07
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-3dcbdf5a380c416fb2fbd0db315efd85",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999327,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 131072
+    },
+    {
+      "id": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B",
+      "root": "cognitivecomputations/Dolphin3.0-R1-Mistral-24B",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.0001007028386787519,
+          "usd": 0.03
+        }
+      },
+      "object": "model",
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.03
+      },
+      "owned_by": "sglang",
+      "max_model_len": 32768
+    },
+    {
+      "id": "meituan-longcat/LongCat-Flash-Thinking-FP8",
+      "root": "meituan-longcat/LongCat-Flash-Thinking-FP8",
+      "price": {
+        "input": {
+          "tao": 0.0,
+          "usd": 0.0
+        },
+        "output": {
+          "tao": 0.0,
+          "usd": 0.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.0,
+        "completion": 0.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3.1-Base",
+      "root": "deepseek-ai/DeepSeek-V3.1-Base",
+      "price": {
+        "input": {
+          "tao": s0.0008391903223229325,
+          "usd": 0.25
+        },
+        "output": {
+          "tao": 0.00335676128929173,
+          "usd": 1.0
+        }
+      },
+      "object": "model",
+      "created": 1758999330,
+      "pricing": {
+        "prompt": 0.25,
+        "completion": 1.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 163840
+    },
+    {
+      "id": "Qwen/Qwen3-VL-235B-A22B-Thinking",
+      "root": "Qwen/Qwen3-VL-235B-A22B-Thinking",
+      "price": {
+        "input": {
+          "tao": 0.0005370818062866768,
+          "usd": 0.16
+        },
+        "output": {
+          "tao": 0.0021818948380396244,
+          "usd": 0.65
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999329,
+      "pricing": {
+        "prompt": 0.16,
+        "completion": 0.65
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-ed83cf3457094ad0be21124eeceffb2b",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999329,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 262144
+    },
+    {
+      "id": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
+      "root": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
+      "price": {
+        "input": {
+          "tao": 0.0,
+          "usd": 0.0
+        },
+        "output": {
+          "tao": 0.0,
+          "usd": 0.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.0,
+        "completion": 0.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "zai-org/GLM-4.5V",
+      "root": "zai-org/GLM-4.5V",
+      "price": {
+        "input": {
+          "tao": 0.0002685409031433384,
+          "usd": 0.08
+        },
+        "output": {
+          "tao": 0.001107731225466271,
+          "usd": 0.33
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.08,
+        "completion": 0.33
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-7e45ae7399bd45e7851cc0bc864b352e",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999327,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 65536
+    },
+    {
+      "id": "unsloth/gemma-3-4b-it",
+      "root": "unsloth/gemma-3-4b-it",
+      "price": {
+        "input": {
+          "tao": 0.0,
+          "usd": 0.0
+        },
+        "output": {
+          "tao": 0.0,
+          "usd": 0.0
+        }
+      },
+      "object": "model",
+      "created": 1758999327,
+      "pricing": {
+        "prompt": 0.0,
+        "completion": 0.0
+      },
+      "owned_by": "sglang",
+      "max_model_len": 96000
+    },
+    {
+      "id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
+      "root": "Qwen/Qwen3-30B-A3B-Thinking-2507",
+      "price": {
+        "input": {
+          "tao": 0.0002685409031433384,
+          "usd": 0.08
+        },
+        "output": {
+          "tao": 0.0009734607738946016,
+          "usd": 0.29
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.08,
+        "completion": 0.29
+      },
+      "owned_by": "sglang",
+      "max_model_len": 262144
+    },
+    {
+      "id": "ByteDance-Seed/Seed-OSS-36B-Instruct",
+      "root": "ByteDance-Seed/Seed-OSS-36B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0005370818062866768,
+          "usd": 0.16
+        },
+        "output": {
+          "tao": 0.0021818948380396244,
+          "usd": 0.65
+        }
+      },
+      "object": "model",
+      "created": 1758999328,
+      "pricing": {
+        "prompt": 0.16,
+        "completion": 0.65
+      },
+      "owned_by": "sglang",
+      "max_model_len": 131072
+    },
+    {
+      "id": "NousResearch/Hermes-4-14B",
+      "root": "NousResearch/Hermes-4-14B",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.00016783806446458652,
+          "usd": 0.05
+        }
+      },
+      "object": "model",
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.05
+      },
+      "owned_by": "sglang",
+      "max_model_len": 40960
+    },
+    {
+      "id": "unsloth/Llama-3.2-1B-Instruct",
+      "root": "unsloth/Llama-3.2-1B-Instruct",
+      "price": {
+        "input": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        },
+        "output": {
+          "tao": 0.0000335676128929173,
+          "usd": 0.01
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.01,
+        "completion": 0.01
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-c5aef38bb2e849aeb8b9bdb89ffb08bb",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999326,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 16384
+    },
+    {
+      "id": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5",
+      "root": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5",
+      "price": {
+        "input": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        },
+        "output": {
+          "tao": 0.0008727579352158498,
+          "usd": 0.26
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.07,
+        "completion": 0.26
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-3eb629c0adaa4d358130bc1adbde4432",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999326,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 131072
+    },
+    {
+      "id": "all-hands/openhands-lm-32b-v0.1-ep3",
+      "root": "all-hands/openhands-lm-32b-v0.1-ep3",
+      "price": {
+        "input": {
+          "tao": 0.0000671352257858346,
+          "usd": 0.02
+        },
+        "output": {
+          "tao": 0.0002349732902504211,
+          "usd": 0.07
+        }
+      },
+      "object": "model",
+      "parent": null,
+      "created": 1758999326,
+      "pricing": {
+        "prompt": 0.02,
+        "completion": 0.07
+      },
+      "owned_by": "vllm",
+      "permission": [
+        {
+          "id": "modelperm-e829488c88be4eaba9fba9c18365a02f",
+          "group": null,
+          "object": "model_permission",
+          "created": 1758999326,
+          "allow_view": true,
+          "is_blocking": false,
+          "organization": "*",
+          "allow_logprobs": true,
+          "allow_sampling": true,
+          "allow_fine_tuning": false,
+          "allow_create_engine": false,
+          "allow_search_indices": false
+        }
+      ],
+      "max_model_len": 16384
+    }
+  ]
+}
diff --git a/infra/litellm/openrouter/.env.openrouter b/infra/litellm/openrouter/.env.openrouter
new file mode 100644
index 0000000..770eecd
--- /dev/null
+++ b/infra/litellm/openrouter/.env.openrouter
@@ -0,0 +1,10 @@
+# LiteLLM Proxy Configuration
+LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0
+
+# Model Provider API Keys (used by LiteLLM)
+OPENROUTER_API_KEY=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
+
+CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry
+
+# Database (Postgres)
+LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm
\ No newline at end of file
diff --git a/infra/litellm/docker-compose.litellm.yml b/infra/litellm/openrouter/docker-compose.openrouter.yml
similarity index 88%
rename from infra/litellm/docker-compose.litellm.yml
rename to infra/litellm/openrouter/docker-compose.openrouter.yml
index 5118e33..4ef2742 100644
--- a/infra/litellm/docker-compose.litellm.yml
+++ b/infra/litellm/openrouter/docker-compose.openrouter.yml
@@ -1,11 +1,11 @@
 services:
-  litellm-proxy:
+  litellm-openrouter:
     image: ghcr.io/berriai/litellm:main-latest
-    container_name: litellm-proxy
+    container_name: litellm-openrouter
     ports:
       - "4000:4000"
     volumes:
-      - ./litellm_config.yaml:/app/config.yaml
+      - ./openrouter_config.yaml:/app/config.yaml
       - ./data:/data
     env_file:
       - .env.litellm
diff --git a/infra/litellm/litellm_config.yaml b/infra/litellm/openrouter/openrouter_config.yaml
similarity index 100%
rename from infra/litellm/litellm_config.yaml
rename to infra/litellm/openrouter/openrouter_config.yaml

From a24634ec3d9e7a758af5edfd9d289bf255ac88b5 Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Mon, 29 Sep 2025 14:55:40 +0800
Subject: [PATCH 02/10] feat: Add Nebula Block provider integration with Docker
 support

- Updated .gitignore to include new environment variable files for Nebula Block, ChutesAI, and OpenRouter.
- Removed obsolete .env.litellm.example and .env.chutes files.
- Added .env.nebulablock.example with configuration for Nebula Block.
- Created README.md for Nebula Block service with setup instructions and model details.
- Implemented docker-compose configuration for Nebula Block service.
- Added nebulablock_config.yaml for model configurations and pricing.
- Introduced textModelList.txt for available models and their pricing.
- Created shared Docker Compose file for infrastructure services including PostgreSQL, Redis, Prometheus, and Grafana.
- Added initialization script for setting up database schemas for multiple LiteLLM services.
- Ensured data directories are tracked with .gitkeep files.
---
 .gitignore                                    |   5 +-
 infra/litellm/.env.litellm.example            |   5 -
 infra/litellm/chutesai/.env.chutes            |   8 -
 infra/litellm/chutesai/data/.gitkeep          |   1 +
 .../chutesai/docker-compose.chutesai.yml      |  25 ++
 infra/litellm/data/.gitkeep                   |   0
 infra/litellm/manage-all-services.sh          | 301 ++++++++++++++++++
 .../nebulablock/.env.nebulablock.example      |  18 ++
 infra/litellm/nebulablock/README.md           | 163 ++++++++++
 infra/litellm/nebulablock/data/.gitkeep       |   1 +
 .../docker-compose.nebulablock.yml            |  25 ++
 .../nebulablock/nebulablock_config.yaml       | 190 +++++++++++
 infra/litellm/nebulablock/textModelList.txt   | 154 +++++++++
 infra/litellm/openrouter/data/.gitkeep        |   1 +
 .../litellm/shared/docker-compose.shared.yml  | 105 ++++++
 .../shared/init-scripts/01-init-schemas.sh    |  63 ++++
 16 files changed, 1051 insertions(+), 14 deletions(-)
 delete mode 100644 infra/litellm/.env.litellm.example
 delete mode 100644 infra/litellm/chutesai/.env.chutes
 create mode 100644 infra/litellm/chutesai/data/.gitkeep
 create mode 100644 infra/litellm/chutesai/docker-compose.chutesai.yml
 delete mode 100644 infra/litellm/data/.gitkeep
 create mode 100644 infra/litellm/manage-all-services.sh
 create mode 100644 infra/litellm/nebulablock/.env.nebulablock.example
 create mode 100644 infra/litellm/nebulablock/README.md
 create mode 100644 infra/litellm/nebulablock/data/.gitkeep
 create mode 100644 infra/litellm/nebulablock/docker-compose.nebulablock.yml
 create mode 100644 infra/litellm/nebulablock/nebulablock_config.yaml
 create mode 100644 infra/litellm/nebulablock/textModelList.txt
 create mode 100644 infra/litellm/openrouter/data/.gitkeep
 create mode 100644 infra/litellm/shared/docker-compose.shared.yml
 create mode 100755 infra/litellm/shared/init-scripts/01-init-schemas.sh

diff --git a/.gitignore b/.gitignore
index c6b938c..8cef037 100644
--- a/.gitignore
+++ b/.gitignore
@@ -327,4 +327,7 @@ data/prompts/*
 # VS Code
 .vscode/
 
-.env.litellm
+# Environment variables files
+.env.nebulablock
+.env.chutesai
+.env.openrouter
\ No newline at end of file
diff --git a/infra/litellm/.env.litellm.example b/infra/litellm/.env.litellm.example
deleted file mode 100644
index 070432c..0000000
--- a/infra/litellm/.env.litellm.example
+++ /dev/null
@@ -1,5 +0,0 @@
-# LiteLLM Proxy Configuration
-LITELLM_MASTER_KEY=sk-LITELLM_MASTER_KEY
-
-# Model Provider API Keys (used by LiteLLM)
-OPENROUTER_API_KEY=sk-OPENROUTER_API_KEY
diff --git a/infra/litellm/chutesai/.env.chutes b/infra/litellm/chutesai/.env.chutes
deleted file mode 100644
index 5f24924..0000000
--- a/infra/litellm/chutesai/.env.chutes
+++ /dev/null
@@ -1,8 +0,0 @@
-# LiteLLM Proxy Configuration
-LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0
-
-# Model Provider API Keys (used by LiteLLM)
-CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry
-
-# Database (Postgres)
-LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm
\ No newline at end of file
diff --git a/infra/litellm/chutesai/data/.gitkeep b/infra/litellm/chutesai/data/.gitkeep
new file mode 100644
index 0000000..6e9476f
--- /dev/null
+++ b/infra/litellm/chutesai/data/.gitkeep
@@ -0,0 +1 @@
+# This file ensures the data directory is tracked by git but its contents are ignored
\ No newline at end of file
diff --git a/infra/litellm/chutesai/docker-compose.chutesai.yml b/infra/litellm/chutesai/docker-compose.chutesai.yml
new file mode 100644
index 0000000..5e68336
--- /dev/null
+++ b/infra/litellm/chutesai/docker-compose.chutesai.yml
@@ -0,0 +1,25 @@
+services:
+  litellm-chutesai:
+    image: ghcr.io/berriai/litellm:main-latest
+    container_name: litellm-chutesai
+    ports:
+      - "4004:4000"  # Different port to avoid conflicts with other providers
+    volumes:
+      - ./chutesai_config.yaml:/app/config.yaml
+      - ./data:/data
+    env_file:
+      - .env.chutesai
+    command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+    networks:
+      - aimo-llm-network  # Use shared network
+
+networks:
+  aimo-llm-network:
+    external: true  # Reference external shared network
\ No newline at end of file
diff --git a/infra/litellm/data/.gitkeep b/infra/litellm/data/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh
new file mode 100644
index 0000000..8e1f391
--- /dev/null
+++ b/infra/litellm/manage-all-services.sh
@@ -0,0 +1,301 @@
+#!/bin/bash
+
+# AIMO Multi-Provider LLM Services Management Script
+# Usage: ./manage-all-services.sh [command] [service]
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+SHARED_DIR="$SCRIPT_DIR/shared"
+PROVIDERS=("openrouter" "nebulablock" "phala" "chutesai")
+
+log() {
+    echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
+}
+
+check_network() {
+    if ! docker network ls | grep -q aimo-llm-network; then
+        log "🌐 Creating shared network..."
+        docker network create aimo-llm-network --subnet=172.20.0.0/16
+    fi
+}
+
+start_shared_services() {
+    log "🏗️ Starting shared infrastructure (database, monitoring)..."
+    check_network
+    cd "$SHARED_DIR"
+    docker-compose -f docker-compose.shared.yml up -d
+    
+    # Wait for database to be ready
+    log "⏳ Waiting for shared database to be ready..."
+    timeout=60
+    while [ $timeout -gt 0 ]; do
+        if docker exec aimo-shared-db pg_isready -U litellm -d litellm >/dev/null 2>&1; then
+            log "✅ Shared database is ready"
+            break
+        fi
+        sleep 2
+        ((timeout--))
+    done
+    
+    if [ $timeout -eq 0 ]; then
+        log "❌ Shared database failed to start within timeout"
+        exit 1
+    fi
+}
+
+stop_shared_services() {
+    log "🛑 Stopping shared infrastructure..."
+    cd "$SHARED_DIR"
+    docker-compose -f docker-compose.shared.yml down
+}
+
+start_provider() {
+    local provider=$1
+    if [ ! -d "$SCRIPT_DIR/$provider" ]; then
+        log "❌ Provider '$provider' not found"
+        return 1
+    fi
+    
+    log "🚀 Starting $provider service..."
+    cd "$SCRIPT_DIR/$provider"
+    
+    # Find the compose file
+    compose_file=""
+    for file in docker-compose.*.yml; do
+        if [ -f "$file" ]; then
+            compose_file="$file"
+            break
+        fi
+    done
+    
+    if [ -z "$compose_file" ]; then
+        log "❌ No docker-compose file found for $provider"
+        return 1
+    fi
+    
+    docker-compose -f "$compose_file" up -d
+    log "✅ $provider service started"
+}
+
+stop_provider() {
+    local provider=$1
+    if [ ! -d "$SCRIPT_DIR/$provider" ]; then
+        log "❌ Provider '$provider' not found"
+        return 1
+    fi
+    
+    log "🛑 Stopping $provider service..."
+    cd "$SCRIPT_DIR/$provider"
+    
+    compose_file=""
+    for file in docker-compose.*.yml; do
+        if [ -f "$file" ]; then
+            compose_file="$file"
+            break
+        fi
+    done
+    
+    if [ -n "$compose_file" ]; then
+        docker-compose -f "$compose_file" down
+    fi
+    log "✅ $provider service stopped"
+}
+
+start_all_providers() {
+    for provider in "${PROVIDERS[@]}"; do
+        if [ -d "$SCRIPT_DIR/$provider" ]; then
+            start_provider "$provider"
+        else
+            log "⚠️  Provider '$provider' directory not found, skipping..."
+        fi
+    done
+}
+
+stop_all_providers() {
+    for provider in "${PROVIDERS[@]}"; do
+        if [ -d "$SCRIPT_DIR/$provider" ]; then
+            stop_provider "$provider"
+        fi
+    done
+}
+
+show_status() {
+    log "📊 AIMO LLM Services Status"
+    echo "=================================="
+    
+    # Check shared services
+    log "🏗️ Shared Infrastructure:"
+    docker ps --filter "name=aimo-shared-" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+    
+    # Check provider services
+    log "🤖 Provider Services:"
+    docker ps --filter "name=litellm-" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+    
+    # Health checks
+    log "🏥 Health Status:"
+    
+    # Check database
+    if docker exec aimo-shared-db pg_isready -U litellm -d litellm >/dev/null 2>&1; then
+        log "✅ Shared database: healthy"
+    else
+        log "❌ Shared database: unhealthy"
+    fi
+    
+    # Check providers
+    ports=(4001 4002 4003 4004)
+    provider_names=("openrouter" "nebulablock" "phala" "chutesai")
+    
+    for i in "${!ports[@]}"; do
+        port=${ports[$i]}
+        name=${provider_names[$i]}
+        
+        if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then
+            log "✅ $name (port $port): healthy"
+        else
+            log "❌ $name (port $port): unhealthy or not running"
+        fi
+    done
+}
+
+show_logs() {
+    local service=$1
+    if [ "$service" = "shared" ]; then
+        cd "$SHARED_DIR"
+        docker-compose -f docker-compose.shared.yml logs -f
+    elif [ -n "$service" ] && [ -d "$SCRIPT_DIR/$service" ]; then
+        cd "$SCRIPT_DIR/$service"
+        compose_file=""
+        for file in docker-compose.*.yml; do
+            if [ -f "$file" ]; then
+                compose_file="$file"
+                break
+            fi
+        done
+        
+        if [ -n "$compose_file" ]; then
+            docker-compose -f "$compose_file" logs -f
+        fi
+    else
+        log "📋 Showing logs for all services (press Ctrl+C to exit):"
+        docker logs -f --tail=100 $(docker ps --filter "name=aimo-" --filter "name=litellm-" -q)
+    fi
+}
+
+test_all_services() {
+    log "🧪 Testing all LLM services..."
+    
+    # Test shared database
+    if docker exec aimo-shared-db pg_isready -U litellm -d litellm >/dev/null 2>&1; then
+        log "✅ Database connection test passed"
+    else
+        log "❌ Database connection test failed"
+    fi
+    
+    # Test provider services
+    ports=(4001 4002 4003 4004)
+    provider_names=("openrouter" "nebulablock" "phala" "chutesai")
+    
+    for i in "${!ports[@]}"; do
+        port=${ports[$i]}
+        name=${provider_names[$i]}
+        
+        log "Testing $name service on port $port..."
+        
+        if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then
+            log "✅ $name health check passed"
+            
+            # Test models endpoint
+            if curl -sf "http://localhost:$port/v1/models" >/dev/null 2>&1; then
+                log "✅ $name models endpoint accessible"
+            else
+                log "⚠️  $name models endpoint not accessible"
+            fi
+        else
+            log "❌ $name service not responding"
+        fi
+    done
+    
+    log "🎉 Service testing completed!"
+}
+
+case "${1:-}" in
+    start)
+        if [ "$2" = "shared" ]; then
+            start_shared_services
+        elif [ "$2" = "all" ] || [ -z "$2" ]; then
+            start_shared_services
+            sleep 5
+            start_all_providers
+        elif [ -n "$2" ]; then
+            check_network
+            start_provider "$2"
+        fi
+        ;;
+    stop)
+        if [ "$2" = "shared" ]; then
+            stop_shared_services
+        elif [ "$2" = "all" ] || [ -z "$2" ]; then
+            stop_all_providers
+            stop_shared_services
+        elif [ -n "$2" ]; then
+            stop_provider "$2"
+        fi
+        ;;
+    restart)
+        if [ "$2" = "all" ] || [ -z "$2" ]; then
+            stop_all_providers
+            stop_shared_services
+            sleep 3
+            start_shared_services
+            sleep 5
+            start_all_providers
+        elif [ "$2" = "shared" ]; then
+            stop_shared_services
+            sleep 3
+            start_shared_services
+        elif [ -n "$2" ]; then
+            stop_provider "$2"
+            sleep 2
+            start_provider "$2"
+        fi
+        ;;
+    status)
+        show_status
+        ;;
+    logs)
+        show_logs "$2"
+        ;;
+    test)
+        test_all_services
+        ;;
+    *)
+        echo "AIMO Multi-Provider LLM Services Manager"
+        echo ""
+        echo "Usage: $0 {start|stop|restart|status|logs|test} [service]"
+        echo ""
+        echo "Commands:"
+        echo "  start [service]   - Start services (all, shared, or specific provider)"
+        echo "  stop [service]    - Stop services"
+        echo "  restart [service] - Restart services"
+        echo "  status            - Show status of all services"
+        echo "  logs [service]    - Show logs (all, shared, or specific provider)"
+        echo "  test              - Test all services"
+        echo ""
+        echo "Services:"
+        echo "  all              - All services (default)"
+        echo "  shared           - Shared infrastructure (database, monitoring)"
+        printf "  %s\n" "${PROVIDERS[@]}"
+        echo ""
+        echo "Examples:"
+        echo "  $0 start                    # Start all services"
+        echo "  $0 start shared             # Start only shared infrastructure"
+        echo "  $0 start nebulablock        # Start only Nebula Block service"
+        echo "  $0 stop all                 # Stop all services"
+        echo "  $0 restart openrouter       # Restart OpenRouter service"
+        echo "  $0 status                   # Show service status"
+        echo "  $0 logs nebulablock         # Show Nebula Block logs"
+        echo "  $0 test                     # Test all services"
+        exit 1
+        ;;
+esac
\ No newline at end of file
diff --git a/infra/litellm/nebulablock/.env.nebulablock.example b/infra/litellm/nebulablock/.env.nebulablock.example
new file mode 100644
index 0000000..888c253
--- /dev/null
+++ b/infra/litellm/nebulablock/.env.nebulablock.example
@@ -0,0 +1,18 @@
+# LiteLLM Proxy Configuration for Nebula Block
+LITELLM_MASTER_KEY=sk-nebulablock-proxy-key
+
+# Nebula Block API Configuration
+NEBULABLOCK_API_KEY=your_nebulablock_api_key_here
+
+# Shared Database Configuration - connects to shared LiteLLM database
+# Uses table prefix 'nebulablock_' to separate data logically
+LITELLM_DATABASE_URL=postgresql://litellm:litellm123@aimo-shared-db:5432/litellm
+LITELLM_TABLE_PREFIX=nebulablock_
+
+# Service Configuration
+SERVICE_NAME=nebulablock-llm-proxy
+LOG_LEVEL=INFO
+
+# Additional Provider Keys (if needed for fallbacks)
+# OPENAI_API_KEY=your_openai_key_for_fallbacks
+# ANTHROPIC_API_KEY=your_anthropic_key_for_fallbacks
\ No newline at end of file
diff --git a/infra/litellm/nebulablock/README.md b/infra/litellm/nebulablock/README.md
new file mode 100644
index 0000000..ca2baff
--- /dev/null
+++ b/infra/litellm/nebulablock/README.md
@@ -0,0 +1,163 @@
+# Nebula Block LiteLLM Service
+
+This directory contains the Docker configuration for running LiteLLM proxy with Nebula Block provider integration.
+
+## Files Structure
+
+```
+nebulablock/
+├── docker-compose.nebulablock.yml    # Docker Compose configuration
+├── nebulablock_config.yaml           # LiteLLM model configuration
+├── .env.nebulablock                   # Environment variables
+├── README.md                          # This file
+└── data/                              # Persistent data directory
+```
+
+## Available Models
+
+### Premium Models (Paid)
+- **OpenAI**: gpt-4o-mini ($0.40/$1.60 per 1M tokens)
+- **Google Gemini**: 
+  - gemini-2.5-pro ($1.00/$8.00 per 1M tokens)
+  - gemini-2.5-flash ($0.24/$2.00 per 1M tokens)
+  - gemini-2.5-flash-lite ($0.08/$0.32 per 1M tokens)
+  - gemini-2.0-flash ($0.08/$0.32 per 1M tokens)
+  - gemini-2.0-flash-lite ($0.06/$0.24 per 1M tokens)
+- **Meta Llama**: llama-3.3-70b ($0.10/$0.30 per 1M tokens)
+- **Qwen**: qwq-32b ($0.15/$0.40 per 1M tokens)
+
+### Free Models
+- **Community Models**: L3.3-MS-Nevoria-70b, L3-70B-Euryale-v2.1, L3-8B-Stheno-v3.2
+- **Mistral**: Mistral-Small-3.2-24B-Instruct-2506
+- **DeepSeek**: DeepSeek-R1-0528, DeepSeek-V3-0324, DeepSeek-R1
+
+## Setup Instructions
+
+### 1. Configure Environment Variables
+
+Copy and edit the environment file:
+```bash
+cp .env.nebulablock.example .env.nebulablock
+```
+
+Edit `.env.nebulablock` and add your Nebula Block API key:
+```bash
+NEBULABLOCK_API_KEY=your_actual_api_key_here
+```
+
+### 2. Start the Service
+
+```bash
+# Start Nebula Block LLM service
+docker-compose -f docker-compose.nebulablock.yml up -d
+
+# Check service status
+docker-compose -f docker-compose.nebulablock.yml ps
+
+# View logs
+docker-compose -f docker-compose.nebulablock.yml logs -f
+```
+
+### 3. Test the Service
+
+```bash
+# Health check
+curl http://localhost:4002/health
+
+# List available models
+curl http://localhost:4002/v1/models
+
+# Test chat completion with a free model
+curl -X POST http://localhost:4002/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-nebulablock-proxy-key" \
+  -d '{
+    "model": "deepseek-r1-free",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 100
+  }'
+```
+
+### 4. Stop the Service
+
+```bash
+# Stop the service
+docker-compose -f docker-compose.nebulablock.yml down
+
+# Stop and remove volumes (caution: this deletes database data)
+docker-compose -f docker-compose.nebulablock.yml down -v
+```
+
+## Configuration Details
+
+### Model Naming Convention
+- Models are prefixed with `nebulablock/` to identify the provider
+- Free models are explicitly marked in the configuration
+- Pricing information is included for cost tracking
+
+### Network Configuration
+- Service runs on port 4002 to avoid conflicts with other LLM services
+- Uses dedicated `nebulablock-network` for isolation
+- PostgreSQL database for logging and analytics
+
+### Fallback Strategy
+- Free models are configured as fallbacks for paid models
+- Routing strategy set to "least-busy" for load balancing
+- Request timeout set to 10 minutes for complex queries
+
+## Integration with Main AIMO Service
+
+To use this service in your main AIMO application, configure:
+
+```bash
+# Add to main .env file
+LLM_BASE_URL=http://localhost:4002
+LLM_API_KEY=sk-nebulablock-proxy-key
+LLM_MODEL_DEFAULT=deepseek-r1-free  # Use free model as default
+```
+
+## Monitoring and Maintenance
+
+### Health Monitoring
+- Health check endpoint: `http://localhost:4002/health`
+- Database status included in health checks
+- Automatic restart on failure
+
+### Logs and Analytics
+- JSON formatted logs for structured analysis
+- Database logging for request analytics
+- Optional integration with Langfuse for advanced tracking
+
+### Resource Management
+- Single worker process for development
+- Configurable timeout and rate limiting
+- Automatic parameter validation and cleanup
+
+## Troubleshooting
+
+### Common Issues
+1. **Port 4002 already in use**: Change the port in docker-compose.yml
+2. **API key invalid**: Verify NEBULABLOCK_API_KEY in .env.nebulablock
+3. **Models not loading**: Check nebulablock_config.yaml syntax
+4. **Database connection issues**: Ensure PostgreSQL container is healthy
+
+### Debug Mode
+Enable debug logging by setting in .env.nebulablock:
+```bash
+LOG_LEVEL=DEBUG
+```
+
+### Performance Tuning
+For production use, consider:
+- Increasing `num_workers` in docker-compose.yml
+- Adjusting rate limits in configuration
+- Setting up external PostgreSQL database
+- Adding Redis for caching
+
+## Security Considerations
+
+- Change default master key in production
+- Use strong database passwords
+- Implement network-level access controls
+- Regular API key rotation
+- Monitor usage for anomalies
\ No newline at end of file
diff --git a/infra/litellm/nebulablock/data/.gitkeep b/infra/litellm/nebulablock/data/.gitkeep
new file mode 100644
index 0000000..6e9476f
--- /dev/null
+++ b/infra/litellm/nebulablock/data/.gitkeep
@@ -0,0 +1 @@
+# This file ensures the data directory is tracked by git but its contents are ignored
\ No newline at end of file
diff --git a/infra/litellm/nebulablock/docker-compose.nebulablock.yml b/infra/litellm/nebulablock/docker-compose.nebulablock.yml
new file mode 100644
index 0000000..1965737
--- /dev/null
+++ b/infra/litellm/nebulablock/docker-compose.nebulablock.yml
@@ -0,0 +1,25 @@
+Íservices:
+  litellm-nebulablock:
+    image: ghcr.io/berriai/litellm:main-latest
+    container_name: litellm-nebulablock
+    ports:
+      - "4002:4000"  # Different port to avoid conflicts with OpenRouter
+    volumes:
+      - ./nebulablock_config.yaml:/app/config.yaml
+      - ./data:/data
+    env_file:
+      - .env.nebulablock
+    command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 10s
+    networks:
+      - aimo-llm-network  # Use shared network
+
+networks:
+  aimo-llm-network:
+    external: true  # Reference external shared network
\ No newline at end of file
diff --git a/infra/litellm/nebulablock/nebulablock_config.yaml b/infra/litellm/nebulablock/nebulablock_config.yaml
new file mode 100644
index 0000000..9c7c045
--- /dev/null
+++ b/infra/litellm/nebulablock/nebulablock_config.yaml
@@ -0,0 +1,190 @@
+model_list:
+  # OpenAI Models
+  - model_name: gpt-4o-mini
+    litellm_params:
+      model: nebulablock/openai/gpt-4o-mini
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.40  # per 1M tokens
+        completion: 1.60  # per 1M tokens
+
+  # Google Gemini Models
+  - model_name: gemini-2_5-pro
+    litellm_params:
+      model: nebulablock/gemini/gemini-2.5-pro
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 1.00
+        completion: 8.00
+
+  - model_name: gemini-2_5-flash
+    litellm_params:
+      model: nebulablock/gemini/gemini-2.5-flash
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.24
+        completion: 2.00
+
+  - model_name: gemini-2_5-flash-lite
+    litellm_params:
+      model: nebulablock/gemini/gemini-2.5-flash-lite
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.08
+        completion: 0.32
+
+  - model_name: gemini-2_0-flash
+    litellm_params:
+      model: nebulablock/gemini/gemini-2.0-flash
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.08
+        completion: 0.32
+
+  - model_name: gemini-2_0-flash-lite
+    litellm_params:
+      model: nebulablock/gemini/gemini-2.0-flash-lite
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.06
+        completion: 0.24
+
+  # Community Models (Free)
+  - model_name: l3-ms-nevoria-70b
+    litellm_params:
+      model: nebulablock/Steelskull/L3.3-MS-Nevoria-70b
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free model
+        completion: 0
+
+  - model_name: mistral-small-free
+    litellm_params:
+      model: nebulablock/mistralai/Mistral-Small-3.2-24B-Instruct-2506
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free model
+        completion: 0
+
+  - model_name: l3-70b-euryale
+    litellm_params:
+      model: nebulablock/Sao10K/L3-70B-Euryale-v2.1
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free model
+        completion: 0
+
+  - model_name: l3-8b-stheno
+    litellm_params:
+      model: nebulablock/Sao10K/L3-8B-Stheno-v3.2
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free model
+        completion: 0
+
+  # DeepSeek Models
+  - model_name: deepseek-r1-0528-free
+    litellm_params:
+      model: nebulablock/deepseek-ai/DeepSeek-R1-0528
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free version
+        completion: 0
+
+  - model_name: deepseek-v3-0324-free
+    litellm_params:
+      model: nebulablock/deepseek-ai/DeepSeek-V3-0324
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free version
+        completion: 0
+
+  - model_name: deepseek-r1-free
+    litellm_params:
+      model: nebulablock/deepseek-ai/DeepSeek-R1
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0  # Free version
+        completion: 0
+
+  # Meta Llama Models
+  - model_name: llama-3_3-70b
+    litellm_params:
+      model: nebulablock/meta-llama/Llama-3.3-70B-Instruct
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.10
+        completion: 0.30
+
+  # Qwen Models
+  - model_name: qwq-32b
+    litellm_params:
+      model: nebulablock/Qwen/QwQ-32B
+      api_base: https://api.nebulablock.ai/v1
+      api_key: os.environ/NEBULABLOCK_API_KEY
+      pricing:
+        prompt: 0.15
+        completion: 0.40
+
+# General settings
+general_settings:
+  master_key: os.environ/LITELLM_MASTER_KEY  # Set a master key for proxy auth
+  database_url: os.environ/LITELLM_DATABASE_URL  # Database connection
+  database_logging: true
+  service_name: "nebulablock-llm-proxy"
+  database_params:
+    # Use specific schema for this service
+    options: "-c search_path=nebulablock,public"
+  
+# Logging configuration
+litellm_settings:
+  drop_params: true  # Drop unsupported params instead of erroring
+  set_verbose: true
+  json_logs: true
+  request_timeout: 600  # 10 minutes timeout
+  
+# Rate limiting and routing
+router_settings:
+  enable_pre_call_checks: true
+  enable_admin_api: true
+  model_fallbacks:
+    # Fallback strategy for paid models to free alternatives
+    default: ["l3-ms-nevoria-70b", "mistral-small-free", "deepseek-r1-free"]
+  routing_strategy: "least-busy"
+  
+logging:
+  level: DEBUG
+  format: json
+
+# Health check configuration
+health_check:
+  enable: true
+  endpoint: "/health"
+  
+# Success/Error callbacks (optional)
+# success_callback: ["langfuse"]  # Track successful calls
+# failure_callback: ["langfuse"]  # Track failed calls
+
+# Custom provider settings for Nebula Block
+provider_settings:
+  nebulablock:
+    base_url: "https://api.nebulablock.ai/v1"
+    headers:
+      "User-Agent": "LiteLLM-NebulaBlock/1.0"
+    rate_limit:
+      requests_per_minute: 1000
+      tokens_per_minute: 100000
\ No newline at end of file
diff --git a/infra/litellm/nebulablock/textModelList.txt b/infra/litellm/nebulablock/textModelList.txt
new file mode 100644
index 0000000..028107b
--- /dev/null
+++ b/infra/litellm/nebulablock/textModelList.txt
@@ -0,0 +1,154 @@
+{
+  "data": [
+    {
+      "id": "openai/gpt-4o-mini",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "openai",
+      "pricing": {
+        "prompt": 0.40,
+        "completion": 1.60
+      }
+    },
+    {
+      "id": "gemini/gemini-2.5-pro",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "google",
+      "pricing": {
+        "prompt": 1.00,
+        "completion": 8.00
+      }
+    },
+    {
+      "id": "gemini/gemini-2.5-flash",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "google",
+      "pricing": {
+        "prompt": 0.24,
+        "completion": 2.00
+      }
+    },
+    {
+      "id": "gemini/gemini-2.5-flash-lite",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "google",
+      "pricing": {
+        "prompt": 0.08,
+        "completion": 0.32
+      }
+    },
+    {
+      "id": "gemini/gemini-2.0-flash",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "google",
+      "pricing": {
+        "prompt": 0.08,
+        "completion": 0.32
+      }
+    },
+    {
+      "id": "gemini/gemini-2.0-flash-lite",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "google",
+      "pricing": {
+        "prompt": 0.06,
+        "completion": 0.24
+      }
+    },
+    {
+      "id": "Steelskull/L3.3-MS-Nevoria-70b",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "Steelskull",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "mistralai",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "Sao10K/L3-70B-Euryale-v2.1",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "Sao10K",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "Sao10K/L3-8B-Stheno-v3.2",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "Sao10K",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-R1-0528",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "deepseek-ai",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3-0324",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "deepseek-ai",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-R1",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "deepseek-ai",
+      "pricing": {
+        "prompt": 0,
+        "completion": 0
+      }
+    },
+    {
+      "id": "meta-llama/Llama-3.3-70B-Instruct",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "meta-llama",
+      "pricing": {
+        "prompt": 0.10,
+        "completion": 0.30
+      }
+    },
+    {
+      "id": "Qwen/QwQ-32B",
+      "object": "model",
+      "created": 1677610602,
+      "owned_by": "Qwen",
+      "pricing": {
+        "prompt": 0.15,
+        "completion": 0.40
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/infra/litellm/openrouter/data/.gitkeep b/infra/litellm/openrouter/data/.gitkeep
new file mode 100644
index 0000000..6e9476f
--- /dev/null
+++ b/infra/litellm/openrouter/data/.gitkeep
@@ -0,0 +1 @@
+# This file ensures the data directory is tracked by git but its contents are ignored
\ No newline at end of file
diff --git a/infra/litellm/shared/docker-compose.shared.yml b/infra/litellm/shared/docker-compose.shared.yml
new file mode 100644
index 0000000..5a0d566
--- /dev/null
+++ b/infra/litellm/shared/docker-compose.shared.yml
@@ -0,0 +1,105 @@
+version: '3.8'
+
+services:
+  # Shared PostgreSQL Database for all LiteLLM services
+  aimo-shared-db:
+    image: postgres:15
+    container_name: aimo-shared-db
+    restart: unless-stopped
+    environment:
+      POSTGRES_USER: litellm
+      POSTGRES_PASSWORD: litellm123
+      POSTGRES_DB: litellm
+      # Enable multiple databases if needed
+      POSTGRES_MULTIPLE_DATABASES: litellm,analytics,monitoring
+    volumes:
+      - shared-db-data:/var/lib/postgresql/data
+      - ./init-scripts:/docker-entrypoint-initdb.d  # Database initialization scripts
+    ports:
+      - "5432:5432"  # Expose for external access if needed
+    networks:
+      - aimo-llm-network
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U litellm -d litellm"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+
+  # Redis for shared caching and session management
+  aimo-shared-redis:
+    image: redis:7-alpine
+    container_name: aimo-shared-redis
+    restart: unless-stopped
+    command: redis-server --requirepass redis123
+    volumes:
+      - shared-redis-data:/data
+    ports:
+      - "6379:6379"
+    networks:
+      - aimo-llm-network
+    healthcheck:
+      test: ["CMD", "redis-cli", "--raw", "incr", "ping"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+
+  # Shared monitoring and metrics
+  aimo-prometheus:
+    image: prom/prometheus:latest
+    container_name: aimo-prometheus
+    restart: unless-stopped
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml
+      - prometheus-data:/prometheus
+    networks:
+      - aimo-llm-network
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--storage.tsdb.retention.time=200h'
+      - '--web.enable-lifecycle'
+
+  # Shared Grafana for visualization
+  aimo-grafana:
+    image: grafana/grafana:latest
+    container_name: aimo-grafana
+    restart: unless-stopped
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin123
+      - GF_DATABASE_TYPE=postgres
+      - GF_DATABASE_HOST=aimo-shared-db:5432
+      - GF_DATABASE_NAME=litellm
+      - GF_DATABASE_USER=litellm
+      - GF_DATABASE_PASSWORD=litellm123
+    volumes:
+      - grafana-data:/var/lib/grafana
+      - ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
+      - ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
+    networks:
+      - aimo-llm-network
+    depends_on:
+      - aimo-shared-db
+      - aimo-prometheus
+
+volumes:
+  shared-db-data:
+    driver: local
+  shared-redis-data:
+    driver: local
+  prometheus-data:
+    driver: local
+  grafana-data:
+    driver: local
+
+networks:
+  aimo-llm-network:
+    driver: bridge
+    ipam:
+      config:
+        - subnet: 172.20.0.0/16
\ No newline at end of file
diff --git a/infra/litellm/shared/init-scripts/01-init-schemas.sh b/infra/litellm/shared/init-scripts/01-init-schemas.sh
new file mode 100755
index 0000000..117a569
--- /dev/null
+++ b/infra/litellm/shared/init-scripts/01-init-schemas.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+# Database initialization script for multiple services
+# This script creates separate schemas for different LiteLLM services
+
+set -e
+
+echo "Initializing shared database for multiple LiteLLM services..."
+
+# Create schemas for different providers
+psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL
+    -- Create schemas for different providers
+    CREATE SCHEMA IF NOT EXISTS openrouter;
+    CREATE SCHEMA IF NOT EXISTS nebulablock;
+    CREATE SCHEMA IF NOT EXISTS phala;
+    CREATE SCHEMA IF NOT EXISTS chutesai;
+    CREATE SCHEMA IF NOT EXISTS shared_analytics;
+
+    -- Grant permissions
+    GRANT ALL PRIVILEGES ON SCHEMA openrouter TO $POSTGRES_USER;
+    GRANT ALL PRIVILEGES ON SCHEMA nebulablock TO $POSTGRES_USER;
+    GRANT ALL PRIVILEGES ON SCHEMA phala TO $POSTGRES_USER;
+    GRANT ALL PRIVILEGES ON SCHEMA chutesai TO $POSTGRES_USER;
+    GRANT ALL PRIVILEGES ON SCHEMA shared_analytics TO $POSTGRES_USER;
+
+    -- Create a view for unified analytics across all providers
+    CREATE OR REPLACE VIEW shared_analytics.unified_requests AS
+    SELECT 
+        'openrouter' as provider,
+        *
+    FROM openrouter.litellm_requestlogs
+    WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'openrouter' AND table_name = 'litellm_requestlogs')
+    
+    UNION ALL
+    
+    SELECT 
+        'nebulablock' as provider,
+        *
+    FROM nebulablock.litellm_requestlogs
+    WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'nebulablock' AND table_name = 'litellm_requestlogs')
+    
+    UNION ALL
+    
+    SELECT 
+        'phala' as provider,
+        *
+    FROM phala.litellm_requestlogs
+    WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'phala' AND table_name = 'litellm_requestlogs')
+    
+    UNION ALL
+    
+    SELECT 
+        'chutesai' as provider,
+        *
+    FROM chutesai.litellm_requestlogs
+    WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'chutesai' AND table_name = 'litellm_requestlogs');
+
+    -- Create indexes for better performance
+    -- These will be created automatically when tables are created by LiteLLM
+
+EOSQL
+
+echo "Shared database initialization completed!"
\ No newline at end of file

From 7754c4452df27a8fa00f2182a30f1c987ab2edb5 Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Mon, 29 Sep 2025 15:02:14 +0800
Subject: [PATCH 03/10] Stop tracking .env.openrouter file

---
 .gitignore                                    |   4 +-
 infra/litellm/chutesai/.env.chutesai.example  |  13 +
 infra/litellm/chutesai/README.md              | 250 ++++++++++++
 infra/litellm/chutesai/chutesai_config.yaml   | 369 ++++++++++++++++++
 ...shared.yml => docker-compose.database.yml} |   0
 5 files changed, 635 insertions(+), 1 deletion(-)
 create mode 100644 infra/litellm/chutesai/.env.chutesai.example
 create mode 100644 infra/litellm/chutesai/README.md
 create mode 100644 infra/litellm/chutesai/chutesai_config.yaml
 rename infra/litellm/shared/{docker-compose.shared.yml => docker-compose.database.yml} (100%)

diff --git a/.gitignore b/.gitignore
index 8cef037..2a13895 100644
--- a/.gitignore
+++ b/.gitignore
@@ -330,4 +330,6 @@ data/prompts/*
 # Environment variables files
 .env.nebulablock
 .env.chutesai
-.env.openrouter
\ No newline at end of file
+.env.openrouter
+infra/litellm/openrouter/.env.openrouter
+infra/litellm/openrouter/.env.openrouter
diff --git a/infra/litellm/chutesai/.env.chutesai.example b/infra/litellm/chutesai/.env.chutesai.example
new file mode 100644
index 0000000..07e7fc2
--- /dev/null
+++ b/infra/litellm/chutesai/.env.chutesai.example
@@ -0,0 +1,13 @@
+# LiteLLM Proxy Configuration
+LITELLM_MASTER_KEY=sk-chutesai-proxy-key
+
+# Model Provider API Keys (used by LiteLLM)
+CHUTESAI_API_KEY=your_chutesai_api_key_here
+
+# Database (Shared PostgreSQL with schema separation)
+LITELLM_DATABASE_URL=postgresql://litellm:litellm123@aimo-shared-db:5432/litellm
+LITELLM_TABLE_PREFIX=chutesai_
+
+# Service Configuration
+SERVICE_NAME=chutesai-llm-proxy
+LOG_LEVEL=INFO
\ No newline at end of file
diff --git a/infra/litellm/chutesai/README.md b/infra/litellm/chutesai/README.md
new file mode 100644
index 0000000..a156342
--- /dev/null
+++ b/infra/litellm/chutesai/README.md
@@ -0,0 +1,250 @@
+# ChutesAI LiteLLM Service
+
+This directory contains the Docker configuration for running LiteLLM proxy with ChutesAI provider integration.
+
+## Files Structure
+
+```
+chutesai/
+├── docker-compose.chutesai.yml        # Docker Compose configuration
+├── chutesai_config.yaml               # LiteLLM model configuration
+├── .env.chutesai                       # Environment variables (create from example)
+├── .env.chutesai.example               # Environment variables template
+├── README.md                           # This file
+├── textModelsList.txt                  # Complete model list with pricing
+└── data/                               # Persistent data directory
+```
+
+## Available Models
+
+### Free Models (0.0 pricing)
+- **GLM Models**: glm-4.5-air-free (Free, 131K context)
+- **OpenAI OSS**: openai-gpt-oss-20b-free (Free, 131K context)
+- **Google Gemma**: gemma-3-4b-it-free (Free, 96K context)
+- **LongCat Models**: longcat-flash-chat-fp8-free, longcat-flash-thinking-fp8-free (Free, 131K context)
+- **Alibaba**: tongyi-deepresearch-30b-free (Free, 131K context)
+
+### Budget Models ($0.01-$0.07 per 1M tokens)
+- **Meta Llama**: llama-3.2-1b-instruct ($0.01/$0.01), llama-3.2-3b-instruct ($0.01/$0.01)
+- **Google Gemma**: gemma-2-9b-it ($0.01/$0.02), gemmasutra-pro-27b ($0.01/$0.03)
+- **NousResearch**: hermes-4-14b ($0.01/$0.05), deephermes-3-llama-3-8b ($0.01/$0.05)
+- **DeepSeek**: deepseek-r1-0528-qwen3-8b ($0.01/$0.05)
+- **Mistral**: mistral-nemo-instruct ($0.02/$0.07)
+- **Moonshot**: kimi-dev-72b ($0.07/$0.26), kimi-vl-a3b-thinking ($0.02/$0.07)
+
+### Mid-range Models ($0.04-$0.29 per 1M tokens)
+- **Google Gemma**: gemma-3-12b-it ($0.04/$0.14)
+- **Qwen**: qwen3-30b-a3b-thinking ($0.08/$0.29)
+- **GLM**: glm-4.5v ($0.08/$0.33)
+- **Tencent**: hunyuan-a13b-instruct ($0.04/$0.14)
+- **NVIDIA**: llama-3.3-nemotron-super-49b ($0.07/$0.26)
+
+### Premium Models ($0.14-$3.0 per 1M tokens)
+- **ChutesAI Mistral**: mistral-small-3.2-24b ($0.14/$0.57)
+- **Qwen Advanced**: qwen3-next-80b-a3b-thinking ($0.1/$0.8), qwen3-vl-235b-a22b-thinking ($0.16/$0.65)
+- **DeepSeek Premium**: deepseek-v3.1-turbo ($1.0/$3.0), deepseek-r1-0528 ($0.55/$1.75)
+- **ByteDance**: seed-oss-36b-instruct ($0.16/$0.65)
+
+### Ultra Premium Models ($0.25-$1.0+ per 1M tokens)
+- **DeepSeek Flagship**: deepseek-r1, deepseek-v3, deepseek-v3.1 ($0.25/$1.0)
+- **NousResearch**: hermes-4-405b-fp8 ($0.25/$1.0)
+
+## Setup Instructions
+
+### 1. Configure Environment Variables
+
+Copy and edit the environment file:
+```bash
+cp .env.chutesai.example .env.chutesai
+```
+
+Edit `.env.chutesai` and add your ChutesAI API key:
+```bash
+# Update this with your actual API key
+CHUTESAI_API_KEY=your_actual_chutesai_api_key_here
+```
+
+### 2. Start the Service
+
+```bash
+# Start ChutesAI LLM service
+docker-compose -f docker-compose.chutesai.yml up -d
+
+# Check service status
+docker-compose -f docker-compose.chutesai.yml ps
+
+# View logs
+docker-compose -f docker-compose.chutesai.yml logs -f
+```
+
+### 3. Test the Service
+
+```bash
+# Health check
+curl http://localhost:4004/health
+
+# List available models
+curl http://localhost:4004/v1/models
+
+# Test chat completion with a free model
+curl -X POST http://localhost:4004/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-chutesai-proxy-key" \
+  -d '{
+    "model": "glm-4_5-air-free",
+    "messages": [{"role": "user", "content": "Hello! Can you help me with coding?"}],
+    "max_tokens": 100
+  }'
+
+# Test with a premium model
+curl -X POST http://localhost:4004/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-chutesai-proxy-key" \
+  -d '{
+    "model": "deepseek-r1",
+    "messages": [{"role": "user", "content": "Explain quantum computing in simple terms"}],
+    "max_tokens": 150
+  }'
+```
+
+### 4. Stop the Service
+
+```bash
+# Stop the service
+docker-compose -f docker-compose.chutesai.yml down
+
+# Stop and remove volumes (caution: this deletes database data)
+docker-compose -f docker-compose.chutesai.yml down -v
+```
+
+## Configuration Details
+
+### Model Naming Convention
+- Models are prefixed with `chutesai/` to identify the provider
+- Free models are explicitly marked with "-free" suffix
+- Pricing information is included for cost tracking and routing decisions
+
+### Network Configuration
+- Service runs on port 4004 to avoid conflicts with other LLM services
+- Uses shared `aimo-llm-network` for integration with other services
+- Shared PostgreSQL database with `chutesai` schema for isolation
+
+### Fallback Strategy
+- Free models (glm-4.5-air-free, openai-gpt-oss-20b-free, etc.) are configured as fallbacks
+- Routing strategy set to "least-busy" for optimal load distribution
+- Request timeout set to 10 minutes for complex reasoning queries
+
+## Integration with Main AIMO Service
+
+To use this service in your main AIMO application, configure:
+
+```bash
+# Add to main .env file
+LLM_BASE_URL=http://localhost:4004
+LLM_API_KEY=sk-chutesai-proxy-key
+LLM_MODEL_DEFAULT=glm-4_5-air-free  # Use free model as default
+```
+
+## Model Categories and Use Cases
+
+### Free Tier (Perfect for Development)
+- **General Chat**: glm-4.5-air-free, openai-gpt-oss-20b-free
+- **Code Generation**: gemma-3-4b-it-free
+- **Long Context**: longcat-flash-chat-fp8-free (131K tokens)
+- **Research**: tongyi-deepresearch-30b-free
+
+### Production Ready (Cost-Effective)
+- **Balanced Performance**: hermes-4-14b, deephermes-3-llama-3-8b
+- **Reasoning Tasks**: deepseek-r1-0528-qwen3-8b
+- **Multimodal**: kimi-vl-a3b-thinking
+- **Code Assistant**: deepcoder-14b-preview
+
+### Enterprise Grade (High Performance)
+- **Advanced Reasoning**: deepseek-r1, deepseek-v3.1
+- **Large Context**: qwen3-vl-235b-a22b-thinking (262K context)
+- **Specialized Tasks**: mistral-small-3.2-24b
+- **Vision Models**: glm-4.5v
+
+### Ultra Premium (Cutting Edge)
+- **Best Reasoning**: deepseek-v3.1-turbo
+- **Largest Models**: hermes-4-405b-fp8
+- **Advanced Multimodal**: qwen3-vl-235b-a22b-thinking
+
+## Monitoring and Maintenance
+
+### Health Monitoring
+- Health check endpoint: `http://localhost:4004/health`
+- Database connectivity included in health checks
+- Automatic container restart on failure
+
+### Logs and Analytics
+- JSON formatted logs for structured analysis
+- Database logging for request analytics and cost tracking
+- Schema-based data separation from other providers
+
+### Resource Management
+- Single worker process optimized for development
+- Configurable timeout and rate limiting
+- Automatic parameter validation and cleanup
+- Memory-efficient model loading
+
+## Troubleshooting
+
+### Common Issues
+1. **Port 4004 already in use**: Change the port in docker-compose.yml
+2. **API key invalid**: Verify CHUTESAI_API_KEY in .env.chutesai
+3. **Models not loading**: Check chutesai_config.yaml syntax
+4. **Database connection issues**: Ensure shared PostgreSQL container is healthy
+
+### Debug Mode
+Enable debug logging by setting in .env.chutesai:
+```bash
+LOG_LEVEL=DEBUG
+```
+
+### Performance Tuning
+For production use, consider:
+- Increasing `num_workers` in docker-compose.yml
+- Adjusting rate limits in configuration
+- Setting up external PostgreSQL database
+- Adding Redis for caching
+- Using load balancer for high availability
+
+### Cost Management
+- Use free models for development and testing
+- Set up model fallbacks to prevent overspending
+- Monitor usage through database logs
+- Consider budget models for production workloads
+
+## Security Considerations
+
+- Change default master key in production
+- Use strong database passwords
+- Implement network-level access controls
+- Regular API key rotation
+- Monitor usage for anomalies
+- Set up rate limiting per user/API key
+
+## API Compatibility
+
+ChutesAI service is fully compatible with OpenAI API format:
+- `/v1/chat/completions` - Chat completions
+- `/v1/models` - List available models
+- `/health` - Service health check
+- Standard OpenAI headers and request/response format
+
+## Cost Optimization Tips
+
+1. **Start with Free Models**: Use glm-4.5-air-free, openai-gpt-oss-20b-free for development
+2. **Fallback Strategy**: Configure fallbacks from premium to free models
+3. **Right-size Models**: Use smaller models for simple tasks
+4. **Monitor Usage**: Track costs through database logging
+5. **Batch Requests**: Group multiple requests when possible
+
+## Support and Documentation
+
+For issues specific to ChutesAI integration:
+1. Check service logs: `docker-compose -f docker-compose.chutesai.yml logs`
+2. Verify API connectivity: `curl http://localhost:4004/health`
+3. Test model availability: `curl http://localhost:4004/v1/models`
+4. Check database schema: Ensure `chutesai` schema exists
\ No newline at end of file
diff --git a/infra/litellm/chutesai/chutesai_config.yaml b/infra/litellm/chutesai/chutesai_config.yaml
new file mode 100644
index 0000000..4674430
--- /dev/null
+++ b/infra/litellm/chutesai/chutesai_config.yaml
@@ -0,0 +1,369 @@
+model_list:
+  # Popular Free Models (0.0 pricing)
+  - model_name: glm-4_5-air-free
+    litellm_params:
+      model: chutesai/zai-org/GLM-4.5-Air
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.0  # Free model
+        completion: 0.0
+
+  - model_name: openai-gpt-oss-20b-free
+    litellm_params:
+      model: chutesai/openai/gpt-oss-20b
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.0  # Free model
+        completion: 0.0
+
+  - model_name: gemma-3-4b-it-free
+    litellm_params:
+      model: chutesai/unsloth/gemma-3-4b-it
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.0  # Free model
+        completion: 0.0
+
+  - model_name: longcat-flash-chat-fp8-free
+    litellm_params:
+      model: chutesai/meituan-longcat/LongCat-Flash-Chat-FP8
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.0  # Free model
+        completion: 0.0
+
+  - model_name: longcat-flash-thinking-fp8-free
+    litellm_params:
+      model: chutesai/meituan-longcat/LongCat-Flash-Thinking-FP8
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.0  # Free model
+        completion: 0.0
+
+  - model_name: tongyi-deepresearch-30b-free
+    litellm_params:
+      model: chutesai/Alibaba-NLP/Tongyi-DeepResearch-30B-A3B
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.0  # Free model
+        completion: 0.0
+
+  # Budget Models (Low cost)
+  - model_name: llama-3_2-1b-instruct
+    litellm_params:
+      model: chutesai/unsloth/Llama-3.2-1B-Instruct
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.01
+
+  - model_name: llama-3_2-3b-instruct
+    litellm_params:
+      model: chutesai/unsloth/Llama-3.2-3B-Instruct
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.01
+
+  - model_name: gemma-2-9b-it
+    litellm_params:
+      model: chutesai/unsloth/gemma-2-9b-it
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.02
+
+  - model_name: gemmasutra-pro-27b
+    litellm_params:
+      model: chutesai/TheDrummer/Gemmasutra-Pro-27B-v1.1
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.03
+
+  - model_name: dolphin3-r1-mistral-24b
+    litellm_params:
+      model: chutesai/cognitivecomputations/Dolphin3.0-R1-Mistral-24B
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.03
+
+  # Mid-range Models
+  - model_name: gemma-3-12b-it
+    litellm_params:
+      model: chutesai/unsloth/gemma-3-12b-it
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.04
+        completion: 0.14
+
+  - model_name: hermes-4-14b
+    litellm_params:
+      model: chutesai/NousResearch/Hermes-4-14B
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.05
+
+  - model_name: deephermes-3-llama-3-8b
+    litellm_params:
+      model: chutesai/NousResearch/DeepHermes-3-Llama-3-8B-Preview
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.05
+
+  - model_name: deepseek-r1-0528-qwen3-8b
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.01
+        completion: 0.05
+
+  - model_name: mistral-nemo-instruct
+    litellm_params:
+      model: chutesai/unsloth/Mistral-Nemo-Instruct-2407
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.02
+        completion: 0.07
+
+  - model_name: deepcoder-14b-preview
+    litellm_params:
+      model: chutesai/agentica-org/DeepCoder-14B-Preview
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.02
+        completion: 0.07
+
+  - model_name: kimi-vl-a3b-thinking
+    litellm_params:
+      model: chutesai/moonshotai/Kimi-VL-A3B-Thinking
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.02
+        completion: 0.07
+
+  - model_name: openhands-lm-32b
+    litellm_params:
+      model: chutesai/all-hands/openhands-lm-32b-v0.1-ep3
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.02
+        completion: 0.07
+
+  - model_name: kimi-dev-72b
+    litellm_params:
+      model: chutesai/moonshotai/Kimi-Dev-72B
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.07
+        completion: 0.26
+
+  - model_name: llama-3_3-nemotron-super-49b
+    litellm_params:
+      model: chutesai/nvidia/Llama-3_3-Nemotron-Super-49B-v1_5
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.07
+        completion: 0.26
+
+  # High-end Models
+  - model_name: qwen3-30b-a3b-thinking
+    litellm_params:
+      model: chutesai/Qwen/Qwen3-30B-A3B-Thinking-2507
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.08
+        completion: 0.29
+
+  - model_name: glm-4_5v
+    litellm_params:
+      model: chutesai/zai-org/GLM-4.5V
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.08
+        completion: 0.33
+
+  - model_name: hunyuan-a13b-instruct
+    litellm_params:
+      model: chutesai/tencent/Hunyuan-A13B-Instruct
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.04
+        completion: 0.14
+
+  - model_name: mistral-small-3_2-24b
+    litellm_params:
+      model: chutesai/chutesai/Mistral-Small-3.2-24B-Instruct-2506
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.14
+        completion: 0.57
+
+  - model_name: qwen3-next-80b-a3b-thinking
+    litellm_params:
+      model: chutesai/Qwen/Qwen3-Next-80B-A3B-Thinking
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.1
+        completion: 0.8
+
+  - model_name: seed-oss-36b-instruct
+    litellm_params:
+      model: chutesai/ByteDance-Seed/Seed-OSS-36B-Instruct
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.16
+        completion: 0.65
+
+  - model_name: qwen3-vl-235b-a22b-thinking
+    litellm_params:
+      model: chutesai/Qwen/Qwen3-VL-235B-A22B-Thinking
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.16
+        completion: 0.65
+
+  - model_name: deepseek-v3_1-base
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-V3.1-Base
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.25
+        completion: 1.0
+
+  # Premium DeepSeek Models
+  - model_name: deepseek-r1
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-R1
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.25
+        completion: 1.0
+
+  - model_name: deepseek-v3
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-V3
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.25
+        completion: 1.0
+
+  - model_name: deepseek-v3_1
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-V3.1
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.25
+        completion: 1.0
+
+  - model_name: deepseek-r1-0528
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-R1-0528
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.55
+        completion: 1.75
+
+  - model_name: deepseek-v3_1-turbo
+    litellm_params:
+      model: chutesai/deepseek-ai/DeepSeek-V3.1-turbo
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 1.0
+        completion: 3.0
+
+  - model_name: hermes-4-405b-fp8
+    litellm_params:
+      model: chutesai/NousResearch/Hermes-4-405B-FP8
+      api_base: https://api.chutesai.com/v1
+      api_key: os.environ/CHUTESAI_API_KEY
+      pricing:
+        prompt: 0.25
+        completion: 1.0
+
+# General settings
+general_settings:
+  master_key: os.environ/LITELLM_MASTER_KEY  # Set a master key for proxy auth
+  database_url: os.environ/LITELLM_DATABASE_URL  # Database connection
+  database_logging: true
+  service_name: "chutesai-llm-proxy"
+  database_params:
+    # Use specific schema for this service
+    options: "-c search_path=chutesai,public"
+  
+# Logging configuration
+litellm_settings:
+  drop_params: true  # Drop unsupported params instead of erroring
+  set_verbose: true
+  json_logs: true
+  request_timeout: 600  # 10 minutes timeout
+  
+# Rate limiting and routing
+router_settings:
+  enable_pre_call_checks: true
+  enable_admin_api: true
+  model_fallbacks:
+    # Fallback strategy: paid models fall back to free alternatives
+    default: ["glm-4_5-air-free", "openai-gpt-oss-20b-free", "gemma-3-4b-it-free", "longcat-flash-chat-fp8-free"]
+  routing_strategy: "least-busy"
+  
+logging:
+  level: DEBUG
+  format: json
+
+# Health check configuration
+health_check:
+  enable: true
+  endpoint: "/health"
+  
+# Success/Error callbacks (optional)
+# success_callback: ["langfuse"]  # Track successful calls
+# failure_callback: ["langfuse"]  # Track failed calls
+
+# Custom provider settings for ChutesAI
+provider_settings:
+  chutesai:
+    base_url: "https://api.chutesai.com/v1"
+    headers:
+      "User-Agent": "LiteLLM-ChutesAI/1.0"
+    rate_limit:
+      requests_per_minute: 1000
+      tokens_per_minute: 100000
\ No newline at end of file
diff --git a/infra/litellm/shared/docker-compose.shared.yml b/infra/litellm/shared/docker-compose.database.yml
similarity index 100%
rename from infra/litellm/shared/docker-compose.shared.yml
rename to infra/litellm/shared/docker-compose.database.yml

From 9a86b844480a532355638fb95c184cc37deb6744 Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Mon, 29 Sep 2025 15:28:53 +0800
Subject: [PATCH 04/10] feat: Update service configurations and documentation
 for LiteLLM providers

---
 .gitignore                                    |  2 --
 infra/litellm/README.md                       | 36 +++++++++++++++++++
 .../chutesai/docker-compose.chutesai.yml      |  2 +-
 infra/litellm/manage-all-services.sh          | 12 +++----
 .../docker-compose.nebulablock.yml            |  8 ++---
 .../openrouter/docker-compose.openrouter.yml  | 25 +++----------
 .../shared/init-scripts/01-init-schemas.sh    | 10 ------
 7 files changed, 52 insertions(+), 43 deletions(-)
 create mode 100644 infra/litellm/README.md

diff --git a/.gitignore b/.gitignore
index 2a13895..b3fa34a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -331,5 +331,3 @@ data/prompts/*
 .env.nebulablock
 .env.chutesai
 .env.openrouter
-infra/litellm/openrouter/.env.openrouter
-infra/litellm/openrouter/.env.openrouter
diff --git a/infra/litellm/README.md b/infra/litellm/README.md
new file mode 100644
index 0000000..aa24687
--- /dev/null
+++ b/infra/litellm/README.md
@@ -0,0 +1,36 @@
+# AIMO Multi-Provider LLM Services Manager
+
+This script provides unified management for all LLM provider services and shared infrastructure (database, monitoring) in the AIMO project.
+
+## Usage
+
+```bash
+./manage-all-services.sh {start|stop|restart|status|logs|test} [service]
+```
+
+- `start [service]`    Start all, shared, or a specific provider service
+- `stop [service]`     Stop all, shared, or a specific provider service
+- `restart [service]`  Restart all, shared, or a specific provider service
+- `status`             Show status and health of all services
+- `logs [service]`     Show logs for all, shared, or a specific provider
+- `test`               Test health and endpoints of all services
+
+## Examples
+
+```bash
+./manage-all-services.sh start           # Start all services
+./manage-all-services.sh start shared    # Start only shared infrastructure
+./manage-all-services.sh start openrouter # Start only OpenRouter service
+./manage-all-services.sh stop all        # Stop all services
+./manage-all-services.sh status          # Show service status
+./manage-all-services.sh logs nebulablock # Show Nebula Block logs
+./manage-all-services.sh test            # Test all services
+```
+
+## Notes
+
+- Services managed: openrouter, nebulablock, chutesai
+- Shared infrastructure includes database, Redis, Prometheus, Grafana
+- Requires Docker and docker-compose installed
+
+---
\ No newline at end of file
diff --git a/infra/litellm/chutesai/docker-compose.chutesai.yml b/infra/litellm/chutesai/docker-compose.chutesai.yml
index 5e68336..ece7f8c 100644
--- a/infra/litellm/chutesai/docker-compose.chutesai.yml
+++ b/infra/litellm/chutesai/docker-compose.chutesai.yml
@@ -3,7 +3,7 @@ services:
     image: ghcr.io/berriai/litellm:main-latest
     container_name: litellm-chutesai
     ports:
-      - "4004:4000"  # Different port to avoid conflicts with other providers
+      - "4003:4000"
     volumes:
       - ./chutesai_config.yaml:/app/config.yaml
       - ./data:/data
diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh
index 8e1f391..696b0f6 100644
--- a/infra/litellm/manage-all-services.sh
+++ b/infra/litellm/manage-all-services.sh
@@ -7,7 +7,7 @@ set -e
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SHARED_DIR="$SCRIPT_DIR/shared"
-PROVIDERS=("openrouter" "nebulablock" "phala" "chutesai")
+PROVIDERS=("openrouter" "nebulablock" "chutesai")
 
 log() {
     echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
@@ -143,8 +143,8 @@ show_status() {
     fi
     
     # Check providers
-    ports=(4001 4002 4003 4004)
-    provider_names=("openrouter" "nebulablock" "phala" "chutesai")
+    ports=(4001 4002 4004)
+    provider_names=("openrouter" "nebulablock" "chutesai")
     
     for i in "${!ports[@]}"; do
         port=${ports[$i]}
@@ -193,8 +193,8 @@ test_all_services() {
     fi
     
     # Test provider services
-    ports=(4001 4002 4003 4004)
-    provider_names=("openrouter" "nebulablock" "phala" "chutesai")
+    ports=(4001 4002 4004)
+    provider_names=("openrouter" "nebulablock" "chutesai")
     
     for i in "${!ports[@]}"; do
         port=${ports[$i]}
@@ -285,7 +285,7 @@ case "${1:-}" in
         echo "Services:"
         echo "  all              - All services (default)"
         echo "  shared           - Shared infrastructure (database, monitoring)"
-        printf "  %s\n" "${PROVIDERS[@]}"
+    printf "  %s\n" "${PROVIDERS[@]}"
         echo ""
         echo "Examples:"
         echo "  $0 start                    # Start all services"
diff --git a/infra/litellm/nebulablock/docker-compose.nebulablock.yml b/infra/litellm/nebulablock/docker-compose.nebulablock.yml
index 1965737..7e52595 100644
--- a/infra/litellm/nebulablock/docker-compose.nebulablock.yml
+++ b/infra/litellm/nebulablock/docker-compose.nebulablock.yml
@@ -1,9 +1,9 @@
-Íservices:
+services:
   litellm-nebulablock:
     image: ghcr.io/berriai/litellm:main-latest
     container_name: litellm-nebulablock
     ports:
-      - "4002:4000"  # Different port to avoid conflicts with OpenRouter
+      - "4002:4000"
     volumes:
       - ./nebulablock_config.yaml:/app/config.yaml
       - ./data:/data
@@ -18,8 +18,8 @@
       retries: 3
       start_period: 10s
     networks:
-      - aimo-llm-network  # Use shared network
+      - aimo-llm-network
 
 networks:
   aimo-llm-network:
-    external: true  # Reference external shared network
\ No newline at end of file
+    external: true
\ No newline at end of file
diff --git a/infra/litellm/openrouter/docker-compose.openrouter.yml b/infra/litellm/openrouter/docker-compose.openrouter.yml
index 4ef2742..5297368 100644
--- a/infra/litellm/openrouter/docker-compose.openrouter.yml
+++ b/infra/litellm/openrouter/docker-compose.openrouter.yml
@@ -3,16 +3,14 @@ services:
     image: ghcr.io/berriai/litellm:main-latest
     container_name: litellm-openrouter
     ports:
-      - "4000:4000"
+      - "4001:4000"
     volumes:
       - ./openrouter_config.yaml:/app/config.yaml
       - ./data:/data
     env_file:
-      - .env.litellm
+      - .env.openrouter
     command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
     restart: unless-stopped
-    depends_on:
-      - db
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
       interval: 30s
@@ -20,21 +18,8 @@ services:
       retries: 3
       start_period: 10s
     networks:
-      - litellm-network
-
-  db:
-    image: postgres:15
-    container_name: litellm-db
-    restart: unless-stopped
-    environment:
-      POSTGRES_USER: litellm
-      POSTGRES_PASSWORD: litellm123
-      POSTGRES_DB: litellm
-    volumes:
-      - ./pgdata:/var/lib/postgresql/data
-    networks:
-      - litellm-network
+      - aimo-llm-network
 
 networks:
-  litellm-network:
-    driver: bridge
+  aimo-llm-network:
+    external: true
diff --git a/infra/litellm/shared/init-scripts/01-init-schemas.sh b/infra/litellm/shared/init-scripts/01-init-schemas.sh
index 117a569..b99aeed 100755
--- a/infra/litellm/shared/init-scripts/01-init-schemas.sh
+++ b/infra/litellm/shared/init-scripts/01-init-schemas.sh
@@ -12,14 +12,12 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E
     -- Create schemas for different providers
     CREATE SCHEMA IF NOT EXISTS openrouter;
     CREATE SCHEMA IF NOT EXISTS nebulablock;
-    CREATE SCHEMA IF NOT EXISTS phala;
     CREATE SCHEMA IF NOT EXISTS chutesai;
     CREATE SCHEMA IF NOT EXISTS shared_analytics;
 
     -- Grant permissions
     GRANT ALL PRIVILEGES ON SCHEMA openrouter TO $POSTGRES_USER;
     GRANT ALL PRIVILEGES ON SCHEMA nebulablock TO $POSTGRES_USER;
-    GRANT ALL PRIVILEGES ON SCHEMA phala TO $POSTGRES_USER;
     GRANT ALL PRIVILEGES ON SCHEMA chutesai TO $POSTGRES_USER;
     GRANT ALL PRIVILEGES ON SCHEMA shared_analytics TO $POSTGRES_USER;
 
@@ -41,14 +39,6 @@ psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-E
     
     UNION ALL
     
-    SELECT 
-        'phala' as provider,
-        *
-    FROM phala.litellm_requestlogs
-    WHERE EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = 'phala' AND table_name = 'litellm_requestlogs')
-    
-    UNION ALL
-    
     SELECT 
         'chutesai' as provider,
         *

From d77f4dc8c9d454ac49222d432fa81afd1257312b Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Tue, 30 Sep 2025 00:49:02 +0100
Subject: [PATCH 05/10] feat: Update ChutesAI model pricing and remove
 .env.openrouter file

---
 infra/litellm/chutesai/chutesai_config.yaml |  2 +-
 infra/litellm/openrouter/.env.openrouter    | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)
 delete mode 100644 infra/litellm/openrouter/.env.openrouter

diff --git a/infra/litellm/chutesai/chutesai_config.yaml b/infra/litellm/chutesai/chutesai_config.yaml
index 4674430..e8fb2a2 100644
--- a/infra/litellm/chutesai/chutesai_config.yaml
+++ b/infra/litellm/chutesai/chutesai_config.yaml
@@ -6,7 +6,7 @@ model_list:
       api_base: https://api.chutesai.com/v1
       api_key: os.environ/CHUTESAI_API_KEY
       pricing:
-        prompt: 0.0  # Free model
+        prompt: 0.0
         completion: 0.0
 
   - model_name: openai-gpt-oss-20b-free
diff --git a/infra/litellm/openrouter/.env.openrouter b/infra/litellm/openrouter/.env.openrouter
deleted file mode 100644
index 770eecd..0000000
--- a/infra/litellm/openrouter/.env.openrouter
+++ /dev/null
@@ -1,10 +0,0 @@
-# LiteLLM Proxy Configuration
-LITELLM_MASTER_KEY=sk-TiKnmxLUrn9aLdO1MHUGkdL-w8bwPUieo0aiqAosTQ0
-
-# Model Provider API Keys (used by LiteLLM)
-OPENROUTER_API_KEY=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
-
-CHUTESAI_API_KEY=cpk_5e4867be39504f8b887fd539ebaf599c.d6116916424e5e9c8c88ed6e1bc1b05a.TUps4clNMPQOECD5lDDpKbmkheo2JWry
-
-# Database (Postgres)
-LITELLM_DATABASE_URL=postgresql://litellm:litellm123@db:5432/litellm
\ No newline at end of file

From 9f8b5b52046e566bd8d25fb77b82b50eadacc253 Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Tue, 30 Sep 2025 02:48:31 +0100
Subject: [PATCH 06/10] feat: Update database service configuration and add
 Prometheus monitoring setup

---
 infra/litellm/manage-all-services.sh          |  2 +-
 .../shared/docker-compose.database.yml        |  5 +--
 .../litellm/shared/monitoring/prometheus.yml  | 31 +++++++++++++++++++
 3 files changed, 33 insertions(+), 5 deletions(-)
 mode change 100644 => 100755 infra/litellm/manage-all-services.sh
 create mode 100644 infra/litellm/shared/monitoring/prometheus.yml

diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh
old mode 100644
new mode 100755
index 696b0f6..7087a7f
--- a/infra/litellm/manage-all-services.sh
+++ b/infra/litellm/manage-all-services.sh
@@ -24,7 +24,7 @@ start_shared_services() {
     log "🏗️ Starting shared infrastructure (database, monitoring)..."
     check_network
     cd "$SHARED_DIR"
-    docker-compose -f docker-compose.shared.yml up -d
+    docker-compose -f docker-compose.database.yml up -d
     
     # Wait for database to be ready
     log "⏳ Waiting for shared database to be ready..."
diff --git a/infra/litellm/shared/docker-compose.database.yml b/infra/litellm/shared/docker-compose.database.yml
index 5a0d566..6b4e695 100644
--- a/infra/litellm/shared/docker-compose.database.yml
+++ b/infra/litellm/shared/docker-compose.database.yml
@@ -99,7 +99,4 @@ volumes:
 
 networks:
   aimo-llm-network:
-    driver: bridge
-    ipam:
-      config:
-        - subnet: 172.20.0.0/16
\ No newline at end of file
+    external: true
\ No newline at end of file
diff --git a/infra/litellm/shared/monitoring/prometheus.yml b/infra/litellm/shared/monitoring/prometheus.yml
new file mode 100644
index 0000000..7e56a62
--- /dev/null
+++ b/infra/litellm/shared/monitoring/prometheus.yml
@@ -0,0 +1,31 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']
+
+  - job_name: 'litellm-openrouter'
+    static_configs:
+      - targets: ['host.docker.internal:4001']
+    metrics_path: /metrics
+
+  - job_name: 'litellm-nebulablock'
+    static_configs:
+      - targets: ['host.docker.internal:4002']
+    metrics_path: /metrics
+
+  - job_name: 'litellm-chutesai'
+    static_configs:
+      - targets: ['host.docker.internal:4004']
+    metrics_path: /metrics
+
+  - job_name: 'postgres'
+    static_configs:
+      - targets: ['aimo-shared-db:5432']
+
+  - job_name: 'redis'
+    static_configs:
+      - targets: ['aimo-shared-redis:6379']

From 15f38716d901d682b183e5bea4e1b637576738eb Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Tue, 30 Sep 2025 22:13:55 +0100
Subject: [PATCH 07/10] feat: Enhance healthcheck for services to include
 authorization support

---
 .../chutesai/docker-compose.chutesai.yml      |   2 +-
 infra/litellm/manage-all-services.sh          | 123 ++++++++++++++++--
 .../docker-compose.nebulablock.yml            |   2 +-
 .../openrouter/docker-compose.openrouter.yml  |   2 +-
 4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/infra/litellm/chutesai/docker-compose.chutesai.yml b/infra/litellm/chutesai/docker-compose.chutesai.yml
index ece7f8c..69811c4 100644
--- a/infra/litellm/chutesai/docker-compose.chutesai.yml
+++ b/infra/litellm/chutesai/docker-compose.chutesai.yml
@@ -12,7 +12,7 @@ services:
     command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
+      test: ["CMD-SHELL", "if [ -n \"$${LITELLM_MASTER_KEY}\" ]; then curl -sf -H \"Authorization: Bearer $${LITELLM_MASTER_KEY}\" http://localhost:4000/health; else curl -sf http://localhost:4000/health; fi"]
       interval: 30s
       timeout: 10s
       retries: 3
diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh
index 7087a7f..5cd4eb6 100755
--- a/infra/litellm/manage-all-services.sh
+++ b/infra/litellm/manage-all-services.sh
@@ -24,7 +24,20 @@ start_shared_services() {
     log "🏗️ Starting shared infrastructure (database, monitoring)..."
     check_network
     cd "$SHARED_DIR"
-    docker-compose -f docker-compose.database.yml up -d
+    # support either docker-compose.database.yml or docker-compose.shared.yml
+    compose_file=""
+    if [ -f docker-compose.database.yml ]; then
+        compose_file="docker-compose.database.yml"
+    elif [ -f docker-compose.shared.yml ]; then
+        compose_file="docker-compose.shared.yml"
+    fi
+
+    if [ -z "$compose_file" ]; then
+        log "❌ No shared docker-compose file found in $SHARED_DIR"
+        return 1
+    fi
+
+    docker-compose -f "$compose_file" up -d
     
     # Wait for database to be ready
     log "⏳ Waiting for shared database to be ready..."
@@ -47,7 +60,19 @@ start_shared_services() {
 stop_shared_services() {
     log "🛑 Stopping shared infrastructure..."
     cd "$SHARED_DIR"
-    docker-compose -f docker-compose.shared.yml down
+    compose_file=""
+    if [ -f docker-compose.shared.yml ]; then
+        compose_file="docker-compose.shared.yml"
+    elif [ -f docker-compose.database.yml ]; then
+        compose_file="docker-compose.database.yml"
+    fi
+
+    if [ -z "$compose_file" ]; then
+        log "⚠️  No shared docker-compose file found in $SHARED_DIR, skipping"
+        return 0
+    fi
+
+    docker-compose -f "$compose_file" down
 }
 
 start_provider() {
@@ -143,17 +168,54 @@ show_status() {
     fi
     
     # Check providers
-    ports=(4001 4002 4004)
+    ports=(4001 4002 4003)
     provider_names=("openrouter" "nebulablock" "chutesai")
-    
+
+    # helper: find LITELLM_MASTER_KEY for a provider from .env files or environment
+    get_provider_master_key() {
+        local provider=$1
+        local key=""
+        # look for .env* files in the provider dir
+        for f in "$SCRIPT_DIR/$provider"/.env*; do
+            if [ -f "$f" ]; then
+                key=$(grep -E '^\s*LITELLM_MASTER_KEY=' "$f" 2>/dev/null | head -n1 | cut -d'=' -f2- | tr -d '\r' | tr -d '"')
+                if [ -n "$key" ]; then
+                    echo "$key"
+                    return 0
+                fi
+            fi
+        done
+
+        # fallback: check env var named LITELLM_MASTER_KEY_<PROVIDER>
+        local up=$(echo "$provider" | tr '[:lower:]' '[:upper:]')
+        local varname="LITELLM_MASTER_KEY_$up"
+        eval val=\$$varname
+        if [ -n "$val" ]; then
+            echo "$val"
+            return 0
+        fi
+
+        # nothing found
+        echo ""
+    }
+
     for i in "${!ports[@]}"; do
         port=${ports[$i]}
         name=${provider_names[$i]}
-        
-        if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then
-            log "✅ $name (port $port): healthy"
+
+        key=$(get_provider_master_key "$name")
+        if [ -n "$key" ]; then
+            if curl -sf -H "Authorization: Bearer $key" "http://localhost:$port/health" >/dev/null 2>&1; then
+                log "✅ $name (port $port): healthy"
+            else
+                log "❌ $name (port $port): unhealthy or not running"
+            fi
         else
-            log "❌ $name (port $port): unhealthy or not running"
+            if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then
+                log "✅ $name (port $port): healthy"
+            else
+                log "❌ $name (port $port): unhealthy or not running (no master key found)"
+            fi
         fi
     done
 }
@@ -193,7 +255,7 @@ test_all_services() {
     fi
     
     # Test provider services
-    ports=(4001 4002 4004)
+    ports=(4001 4002 4003)
     provider_names=("openrouter" "nebulablock" "chutesai")
     
     for i in "${!ports[@]}"; do
@@ -202,14 +264,47 @@ test_all_services() {
         
         log "Testing $name service on port $port..."
         
-        if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then
+        # include Authorization header if provider has a master key configured
+        key=""
+        for f in "$SCRIPT_DIR/$name"/.env*; do
+            if [ -f "$f" ]; then
+                key=$(grep -E '^\s*LITELLM_MASTER_KEY=' "$f" 2>/dev/null | head -n1 | cut -d'=' -f2- | tr -d '\r' | tr -d '"')
+                if [ -n "$key" ]; then
+                    break
+                fi
+            fi
+        done
+
+        if [ -n "$key" ]; then
+            if curl -sf -H "Authorization: Bearer $key" "http://localhost:$port/health" >/dev/null 2>&1; then
+                ok=1
+            else
+                ok=0
+            fi
+        else
+            if curl -sf "http://localhost:$port/health" >/dev/null 2>&1; then
+                ok=1
+            else
+                ok=0
+            fi
+        fi
+
+        if [ "$ok" -eq 1 ]; then
             log "✅ $name health check passed"
-            
+
             # Test models endpoint
-            if curl -sf "http://localhost:$port/v1/models" >/dev/null 2>&1; then
-                log "✅ $name models endpoint accessible"
+            if [ -n "$key" ]; then
+                if curl -sf -H "Authorization: Bearer $key" "http://localhost:$port/v1/models" >/dev/null 2>&1; then
+                    log "✅ $name models endpoint accessible"
+                else
+                    log "⚠️  $name models endpoint not accessible"
+                fi
             else
-                log "⚠️  $name models endpoint not accessible"
+                if curl -sf "http://localhost:$port/v1/models" >/dev/null 2>&1; then
+                    log "✅ $name models endpoint accessible"
+                else
+                    log "⚠️  $name models endpoint not accessible"
+                fi
             fi
         else
             log "❌ $name service not responding"
diff --git a/infra/litellm/nebulablock/docker-compose.nebulablock.yml b/infra/litellm/nebulablock/docker-compose.nebulablock.yml
index 7e52595..cea21ee 100644
--- a/infra/litellm/nebulablock/docker-compose.nebulablock.yml
+++ b/infra/litellm/nebulablock/docker-compose.nebulablock.yml
@@ -12,7 +12,7 @@ services:
     command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
+      test: ["CMD-SHELL", "if [ -n \"$${LITELLM_MASTER_KEY}\" ]; then curl -sf -H \"Authorization: Bearer $${LITELLM_MASTER_KEY}\" http://localhost:4000/health; else curl -sf http://localhost:4000/health; fi"]
       interval: 30s
       timeout: 10s
       retries: 3
diff --git a/infra/litellm/openrouter/docker-compose.openrouter.yml b/infra/litellm/openrouter/docker-compose.openrouter.yml
index 5297368..bcb52c4 100644
--- a/infra/litellm/openrouter/docker-compose.openrouter.yml
+++ b/infra/litellm/openrouter/docker-compose.openrouter.yml
@@ -12,7 +12,7 @@ services:
     command: ["--config", "/app/config.yaml", "--port", "4000", "--num_workers", "1"]
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:4000/health"]
+      test: ["CMD-SHELL", "if [ -n \"$${LITELLM_MASTER_KEY}\" ]; then curl -sf -H \"Authorization: Bearer $${LITELLM_MASTER_KEY}\" http://localhost:4000/health; else curl -sf http://localhost:4000/health; fi"]
       interval: 30s
       timeout: 10s
       retries: 3

From de8af37a5dd51e794dcfe66b51153151c77690c9 Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Tue, 30 Sep 2025 22:26:11 +0100
Subject: [PATCH 08/10] feat: Migrate from docker-compose to docker compose for
 service management

---
 infra/litellm/manage-all-services.sh | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/infra/litellm/manage-all-services.sh b/infra/litellm/manage-all-services.sh
index 5cd4eb6..7be0a7d 100755
--- a/infra/litellm/manage-all-services.sh
+++ b/infra/litellm/manage-all-services.sh
@@ -37,7 +37,7 @@ start_shared_services() {
         return 1
     fi
 
-    docker-compose -f "$compose_file" up -d
+        docker compose -f "$compose_file" up -d
     
     # Wait for database to be ready
     log "⏳ Waiting for shared database to be ready..."
@@ -72,7 +72,7 @@ stop_shared_services() {
         return 0
     fi
 
-    docker-compose -f "$compose_file" down
+        docker compose -f "$compose_file" down
 }
 
 start_provider() {
@@ -99,7 +99,7 @@ start_provider() {
         return 1
     fi
     
-    docker-compose -f "$compose_file" up -d
+        docker compose -f "$compose_file" up -d
     log "✅ $provider service started"
 }
 
@@ -122,7 +122,7 @@ stop_provider() {
     done
     
     if [ -n "$compose_file" ]; then
-        docker-compose -f "$compose_file" down
+           docker compose -f "$compose_file" down
     fi
     log "✅ $provider service stopped"
 }
@@ -224,7 +224,7 @@ show_logs() {
     local service=$1
     if [ "$service" = "shared" ]; then
         cd "$SHARED_DIR"
-        docker-compose -f docker-compose.shared.yml logs -f
+            docker compose -f docker-compose.shared.yml logs -f
     elif [ -n "$service" ] && [ -d "$SCRIPT_DIR/$service" ]; then
         cd "$SCRIPT_DIR/$service"
         compose_file=""
@@ -236,7 +236,7 @@ show_logs() {
         done
         
         if [ -n "$compose_file" ]; then
-            docker-compose -f "$compose_file" logs -f
+                docker compose -f "$compose_file" logs -f
         fi
     else
         log "📋 Showing logs for all services (press Ctrl+C to exit):"

From 0dc62871cf26266e6ec37200fae1a1c31ec2eb5e Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Wed, 1 Oct 2025 04:12:17 +0100
Subject: [PATCH 09/10] feat: Add configuration files for ChutesAI,
 NebulaBlock, and OpenRouter models

---
 .../chutesai/proxy/proxy.chutesai.toml        | 342 ++++++++++++++++++
 .../nebulablock/proxy/proxy.nebulablock.toml  | 165 +++++++++
 .../openrouter/proxy/proxy.openrouter.toml    | 181 +++++++++
 3 files changed, 688 insertions(+)
 create mode 100644 infra/litellm/chutesai/proxy/proxy.chutesai.toml
 create mode 100644 infra/litellm/nebulablock/proxy/proxy.nebulablock.toml
 create mode 100644 infra/litellm/openrouter/proxy/proxy.openrouter.toml

diff --git a/infra/litellm/chutesai/proxy/proxy.chutesai.toml b/infra/litellm/chutesai/proxy/proxy.chutesai.toml
new file mode 100644
index 0000000..c037605
--- /dev/null
+++ b/infra/litellm/chutesai/proxy/proxy.chutesai.toml
@@ -0,0 +1,342 @@
+[router]
+url = "http://localhost:8001"
+api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg"
+
+[endpoint]
+url = "http://127.0.0.1:4001/v1/chat/completions"
+api-key = "sk-EyO5DwID9Sm_WwzJawRPug"
+
+[metadata]
+id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq"
+name = "AiMo Network - ChutesAI"
+category = "completion_model"
+
+# -------------------------------
+# Free Models (0.0 pricing)
+# -------------------------------
+[[metadata.models]]
+name = "glm-4_5-air-free"
+display_name = "GLM 4.5 Air"
+provider_name = "zhipu"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "openai-gpt-oss-20b-free"
+display_name = "GPT-OSS 20B"
+provider_name = "openai"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "gemma-3-4b-it-free"
+display_name = "Gemma 3 4B IT"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "longcat-flash-chat-fp8-free"
+display_name = "LongCat Flash Chat FP8"
+provider_name = "meituan"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "longcat-flash-thinking-fp8-free"
+display_name = "LongCat Flash Thinking FP8"
+provider_name = "meituan"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "tongyi-deepresearch-30b-free"
+display_name = "Tongyi DeepResearch 30B"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+# -------------------------------
+# Budget Models (Low cost)
+# -------------------------------
+[[metadata.models]]
+name = "llama-3_2-1b-instruct"
+display_name = "LLaMA 3.2 1B Instruct"
+provider_name = "meta"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 10
+
+[[metadata.models]]
+name = "llama-3_2-3b-instruct"
+display_name = "LLaMA 3.2 3B Instruct"
+provider_name = "meta"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 10
+
+[[metadata.models]]
+name = "gemma-2-9b-it"
+display_name = "Gemma 2 9B IT"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 20
+
+[[metadata.models]]
+name = "gemmasutra-pro-27b"
+display_name = "Gemmasutra Pro 27B"
+provider_name = "thedrummer"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 30
+
+[[metadata.models]]
+name = "dolphin3-r1-mistral-24b"
+display_name = "Dolphin 3.0 R1 Mistral 24B"
+provider_name = "cognitivecomputations"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 30
+
+# -------------------------------
+# Mid-range Models
+# -------------------------------
+[[metadata.models]]
+name = "gemma-3-12b-it"
+display_name = "Gemma 3 12B IT"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 40
+output_price = 140
+
+[[metadata.models]]
+name = "hermes-4-14b"
+display_name = "Hermes 4 14B"
+provider_name = "nousresearch"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 50
+
+[[metadata.models]]
+name = "deephermes-3-llama-3-8b"
+display_name = "DeepHermes 3 LLaMA 3 8B"
+provider_name = "nousresearch"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 50
+
+[[metadata.models]]
+name = "deepseek-r1-0528-qwen3-8b"
+display_name = "DeepSeek R1 0528 Qwen3 8B"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 10
+output_price = 50
+
+[[metadata.models]]
+name = "mistral-nemo-instruct"
+display_name = "Mistral Nemo Instruct"
+provider_name = "mistral"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 20
+output_price = 70
+
+[[metadata.models]]
+name = "deepcoder-14b-preview"
+display_name = "DeepCoder 14B Preview"
+provider_name = "agentica"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 20
+output_price = 70
+
+[[metadata.models]]
+name = "kimi-vl-a3b-thinking"
+display_name = "Kimi VL A3B Thinking"
+provider_name = "moonshot"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 20
+output_price = 70
+
+[[metadata.models]]
+name = "openhands-lm-32b"
+display_name = "OpenHands LM 32B"
+provider_name = "allhands"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 20
+output_price = 70
+
+[[metadata.models]]
+name = "kimi-dev-72b"
+display_name = "Kimi Dev 72B"
+provider_name = "moonshot"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 70
+output_price = 260
+
+[[metadata.models]]
+name = "llama-3_3-nemotron-super-49b"
+display_name = "LLaMA 3.3 Nemotron Super 49B"
+provider_name = "nvidia"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 70
+output_price = 260
+
+# -------------------------------
+# High-end Models
+# -------------------------------
+[[metadata.models]]
+name = "qwen3-30b-a3b-thinking"
+display_name = "Qwen3 30B A3B Thinking"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 80
+output_price = 290
+
+[[metadata.models]]
+name = "glm-4_5v"
+display_name = "GLM 4.5V"
+provider_name = "zhipu"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 80
+output_price = 330
+
+[[metadata.models]]
+name = "hunyuan-a13b-instruct"
+display_name = "Hunyuan A13B Instruct"
+provider_name = "tencent"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 40
+output_price = 140
+
+[[metadata.models]]
+name = "mistral-small-3_2-24b"
+display_name = "Mistral Small 3.2 24B"
+provider_name = "mistral"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 140
+output_price = 570
+
+[[metadata.models]]
+name = "qwen3-next-80b-a3b-thinking"
+display_name = "Qwen3 Next 80B A3B Thinking"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 100
+output_price = 800
+
+[[metadata.models]]
+name = "seed-oss-36b-instruct"
+display_name = "Seed OSS 36B Instruct"
+provider_name = "bytedance"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 160
+output_price = 650
+
+[[metadata.models]]
+name = "qwen3-vl-235b-a22b-thinking"
+display_name = "Qwen3 VL 235B A22B Thinking"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 160
+output_price = 650
+
+[[metadata.models]]
+name = "deepseek-v3_1-base"
+display_name = "DeepSeek V3.1 Base"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 250
+output_price = 1000
+
+# -------------------------------
+# Premium DeepSeek Models
+# -------------------------------
+[[metadata.models]]
+name = "deepseek-r1"
+display_name = "DeepSeek R1"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 250
+output_price = 1000
+
+[[metadata.models]]
+name = "deepseek-v3"
+display_name = "DeepSeek V3"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 250
+output_price = 1000
+
+[[metadata.models]]
+name = "deepseek-v3_1"
+display_name = "DeepSeek V3.1"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 250
+output_price = 1000
+
+[[metadata.models]]
+name = "deepseek-r1-0528"
+display_name = "DeepSeek R1 0528"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 550
+output_price = 1750
+
+[[metadata.models]]
+name = "deepseek-v3_1-turbo"
+display_name = "DeepSeek V3.1 Turbo"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 1000
+output_price = 3000
+
+[[metadata.models]]
+name = "hermes-4-405b-fp8"
+display_name = "Hermes 4 405B FP8"
+provider_name = "nousresearch"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 250
+output_price = 1000
diff --git a/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml
new file mode 100644
index 0000000..6a373ed
--- /dev/null
+++ b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml
@@ -0,0 +1,165 @@
+[router]
+url = "http://localhost:8002"
+api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg"
+
+[endpoint]
+url = "http://127.0.0.1:4002/v1/chat/completions"
+api-key = "sk-EyO5DwID9Sm_WwzJawRPug"
+
+[metadata]
+id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq"
+name = "AiMo Network - NebulaBlock"
+category = "completion_model"
+
+# -------------------------------
+# OpenAI Models
+# -------------------------------
+[[metadata.models]]
+name = "gpt-4o-mini"
+display_name = "GPT-4o Mini"
+provider_name = "openai"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 400
+output_price = 1600
+
+# -------------------------------
+# Google Gemini Models
+# -------------------------------
+[[metadata.models]]
+name = "gemini-2_5-pro"
+display_name = "Gemini 2.5 Pro"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 1000
+output_price = 8000
+
+[[metadata.models]]
+name = "gemini-2_5-flash"
+display_name = "Gemini 2.5 Flash"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 240
+output_price = 2000
+
+[[metadata.models]]
+name = "gemini-2_5-flash-lite"
+display_name = "Gemini 2.5 Flash Lite"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 80
+output_price = 320
+
+[[metadata.models]]
+name = "gemini-2_0-flash"
+display_name = "Gemini 2.0 Flash"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 80
+output_price = 320
+
+[[metadata.models]]
+name = "gemini-2_0-flash-lite"
+display_name = "Gemini 2.0 Flash Lite"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 60
+output_price = 240
+
+# -------------------------------
+# Community Models (Free)
+# -------------------------------
+[[metadata.models]]
+name = "l3-ms-nevoria-70b"
+display_name = "L3.3 MS Nevoria 70B"
+provider_name = "steelskull"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "mistral-small-free"
+display_name = "Mistral Small 3.2 24B"
+provider_name = "mistral"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "l3-70b-euryale"
+display_name = "L3 70B Euryale v2.1"
+provider_name = "sao10k"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "l3-8b-stheno"
+display_name = "L3 8B Stheno v3.2"
+provider_name = "sao10k"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+# -------------------------------
+# DeepSeek Models (Free)
+# -------------------------------
+[[metadata.models]]
+name = "deepseek-r1-0528-free"
+display_name = "DeepSeek R1 0528"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "deepseek-v3-0324-free"
+display_name = "DeepSeek V3 0324"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+[[metadata.models]]
+name = "deepseek-r1-free"
+display_name = "DeepSeek R1"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+# -------------------------------
+# Meta Llama Models
+# -------------------------------
+[[metadata.models]]
+name = "llama-3_3-70b"
+display_name = "LLaMA 3.3 70B Instruct"
+provider_name = "meta"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 100
+output_price = 300
+
+# -------------------------------
+# Qwen Models
+# -------------------------------
+[[metadata.models]]
+name = "qwq-32b"
+display_name = "QwQ 32B"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 150
+output_price = 400
diff --git a/infra/litellm/openrouter/proxy/proxy.openrouter.toml b/infra/litellm/openrouter/proxy/proxy.openrouter.toml
new file mode 100644
index 0000000..69c44d7
--- /dev/null
+++ b/infra/litellm/openrouter/proxy/proxy.openrouter.toml
@@ -0,0 +1,181 @@
+[router]
+url = "http://localhost:8000"
+api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg"
+
+[endpoint]
+url = "http://127.0.0.1:4000/v1/chat/completions"
+api-key = "sk-EyO5DwID9Sm_WwzJawRPug"
+
+[metadata]
+id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq"
+name = "AiMo Network"
+category = "completion_model"
+
+# -------------------------------
+# DeepSeek
+# -------------------------------
+[[metadata.models]]
+name = "deepseek-chat-v3"
+display_name = "DeepSeek Chat V3 0324"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 200
+output_price = 800
+
+[[metadata.models]]
+name = "deepseek-chat-v3_1"
+display_name = "DeepSeek Chat V3.1"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 200
+output_price = 800
+
+[[metadata.models]]
+name = "deepseek-r1"
+display_name = "DeepSeek R1"
+provider_name = "deepseek"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 200
+output_price = 800
+
+# -------------------------------
+# OpenAI
+# -------------------------------
+
+[[metadata.models]]
+name = "gpt-oss-20b-free"
+display_name = "GPT-OSS 20B"
+provider_name = "openai"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 72
+output_price = 280
+
+# -------------------------------
+# Meta (LLaMA)
+# -------------------------------
+[[metadata.models]]
+name = "llama-3_3-70b-free"
+display_name = "LLaMA 3.3 70B Instruct"
+provider_name = "meta"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 38
+output_price = 120
+
+# -------------------------------
+# Moonshot
+# -------------------------------
+#[[metadata.models]]
+#name = "kimi-k2-free"
+#display_name = "Kimi K2"
+#provider_name = "moonshot"
+#[[metadata.models.pricing]]
+#token = "USDC_9"
+#input_price = 140
+#output_price = 2490
+
+# -------------------------------
+# Qwen
+# -------------------------------
+[[metadata.models]]
+name = "qwq-32b-free"
+display_name = "QwQ 32B"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 75
+output_price = 150
+
+[[metadata.models]]
+name = "qwen3-235b-a22b"
+display_name = "Qwen3 235B"
+provider_name = "alibaba"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 75
+output_price = 150
+
+# -------------------------------
+# Google
+# -------------------------------
+[[metadata.models]]
+name = "gemma-3-27b-free"
+display_name = "Gemma 3 27B"
+provider_name = "google"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 67
+output_price = 267
+
+# -------------------------------
+# Mistral
+# -------------------------------
+[[metadata.models]]
+name = "mistral-small-free"
+display_name = "Mistral Small 3.2 24B"
+provider_name = "mistral"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 50
+output_price = 100
+
+# -------------------------------
+# Venice (Dolphin)
+# -------------------------------
+[[metadata.models]]
+name = "dolphin-mistral-free"
+display_name = "Venice Uncensored"
+provider_name = "cognitivecomputations"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 0
+output_price = 0
+
+# -------------------------------
+# xAI
+# -------------------------------
+[[metadata.models]]
+name = "grok-3-mini"
+display_name = "Grok 3 Mini"
+provider_name = "xai"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 300
+output_price = 500
+
+# -------------------------------
+# Zhipu
+# -------------------------------
+[[metadata.models]]
+name = "glm-4_5"
+display_name = "GLM 4.5"
+provider_name = "zhipu"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 200
+output_price = 800
+
+# -------------------------------
+# NousResearch
+# -------------------------------
+[[metadata.models]]
+name = "nousresearch-hermes-4-70b"
+display_name = "Hermes 4 70B"
+provider_name = "nousresearch"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 380
+output_price = 1200
+
+[[metadata.models]]
+name = "nousresearch-hermes-3-llama-3_1-70b"
+display_name = "Hermes 3 70B"
+provider_name = "nousresearch"
+[[metadata.models.pricing]]
+token = "USDC_9"
+input_price = 93
+output_price = 373
\ No newline at end of file

From 057c1c042a9b798653f5aa4f9bbd319c9b75802d Mon Sep 17 00:00:00 2001
From: Wes1eyyy <wesleyxu0622@gmail.com>
Date: Thu, 2 Oct 2025 01:32:46 +0100
Subject: [PATCH 10/10] feat: Update proxy configuration files for ChutesAI,
 NebulaBlock, and OpenRouter models

---
 infra/litellm/chutesai/proxy/proxy.chutesai.toml       | 8 ++++----
 infra/litellm/nebulablock/proxy/proxy.nebulablock.toml | 8 ++++----
 infra/litellm/openrouter/proxy/proxy.openrouter.toml   | 6 +++---
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/infra/litellm/chutesai/proxy/proxy.chutesai.toml b/infra/litellm/chutesai/proxy/proxy.chutesai.toml
index c037605..0f914b6 100644
--- a/infra/litellm/chutesai/proxy/proxy.chutesai.toml
+++ b/infra/litellm/chutesai/proxy/proxy.chutesai.toml
@@ -1,13 +1,13 @@
 [router]
-url = "http://localhost:8001"
-api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg"
+url = "http://localhost:8000"
+api-key = "aimo-sk-dev-key"
 
 [endpoint]
 url = "http://127.0.0.1:4001/v1/chat/completions"
-api-key = "sk-EyO5DwID9Sm_WwzJawRPug"
+api-key = "sk-key"
 
 [metadata]
-id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq"
+id = "solana id"
 name = "AiMo Network - ChutesAI"
 category = "completion_model"
 
diff --git a/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml
index 6a373ed..481971c 100644
--- a/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml
+++ b/infra/litellm/nebulablock/proxy/proxy.nebulablock.toml
@@ -1,13 +1,13 @@
 [router]
-url = "http://localhost:8002"
-api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg"
+url = "http://localhost:8000"
+api-key = "aimo-sk-dev-key"
 
 [endpoint]
 url = "http://127.0.0.1:4002/v1/chat/completions"
-api-key = "sk-EyO5DwID9Sm_WwzJawRPug"
+api-key = "api-key"
 
 [metadata]
-id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq"
+id = "solana id"
 name = "AiMo Network - NebulaBlock"
 category = "completion_model"
 
diff --git a/infra/litellm/openrouter/proxy/proxy.openrouter.toml b/infra/litellm/openrouter/proxy/proxy.openrouter.toml
index 69c44d7..4897cd9 100644
--- a/infra/litellm/openrouter/proxy/proxy.openrouter.toml
+++ b/infra/litellm/openrouter/proxy/proxy.openrouter.toml
@@ -1,13 +1,13 @@
 [router]
 url = "http://localhost:8000"
-api-key = "aimo-sk-dev-2zpjTUg1bwBvnLEGuw14H7YgYirodkpi1VLf8KV2PA1eEBRFanUPispRpqSBu7fqSX6Fhj4KGFzmQ5gpcRG4CnXgyTu6WLD7g8jU8LoVkXpQ6K3dLYwHJgmmSBui3FryyJ9Cqu821RRERnzXWPbuLXzXRmM6BFf1y5Kj773Ch8BLBZNGg"
+api-key = "aimo-sk-dev-key"
 
 [endpoint]
 url = "http://127.0.0.1:4000/v1/chat/completions"
-api-key = "sk-EyO5DwID9Sm_WwzJawRPug"
+api-key = "sk-key"
 
 [metadata]
-id = "8W7X1tGnWh9CXwnPD7wgke31Gdcqmex4LapJvQ2afBUq"
+id = "solana ID"
 name = "AiMo Network"
 category = "completion_model"