From ec7e33bfb0e981becf6208553149d4be9fafa50c Mon Sep 17 00:00:00 2001 From: realityinspector Date: Sat, 14 Mar 2026 08:12:14 -0600 Subject: [PATCH] feat: add NVIDIA Nemotron and NousResearch Hermes model families --- app/api/v1/models.py | 30 ++++++ app/api/v1/timepoints.py | 10 +- app/config.py | 17 ++++ app/core/model_capabilities.py | 167 +++++++++++++++++++++++++++++++++ app/core/model_policy.py | 10 +- 5 files changed, 227 insertions(+), 7 deletions(-) diff --git a/app/api/v1/models.py b/app/api/v1/models.py index 4ae5ec7..14480fa 100644 --- a/app/api/v1/models.py +++ b/app/api/v1/models.py @@ -141,6 +141,36 @@ def get_configured_models() -> list[ModelInfo]: capabilities=["image_generation"], pricing={"prompt": 0.00012, "completion": 0.0}, ), + # NVIDIA Nemotron + ModelInfo( + id="nvidia/llama-3.3-nemotron-super-49b-v1.5", + name="Nemotron Super 49B v1.5", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), + ModelInfo( + id="nvidia/llama-3.1-nemotron-70b-instruct", + name="Nemotron 70B Instruct", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), + # NousResearch Hermes + ModelInfo( + id="nousresearch/hermes-4-70b", + name="Hermes 4 70B", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), + ModelInfo( + id="nousresearch/hermes-3-llama-3.1-405b", + name="Hermes 3 405B", + provider="openrouter", + capabilities=["text"], + context_length=131072, + ), ]) return models diff --git a/app/api/v1/timepoints.py b/app/api/v1/timepoints.py index fd40669..db9d2fb 100644 --- a/app/api/v1/timepoints.py +++ b/app/api/v1/timepoints.py @@ -234,10 +234,14 @@ def _get_permissive_text_model() -> str: preference = [ "meta-llama/llama-4-scout-17b-16e-instruct", "meta-llama/llama-4-maverick-17b-128e-instruct", - "deepseek/deepseek-chat-v3-0324", # Fast chat model - "qwen/qwen3-30b-a3b", # Fast MoE model + "nvidia/llama-3.3-nemotron-super-49b-v1.5", # Nemotron Super, fast MoE + "nousresearch/hermes-4-70b", # Hermes 4 70B, strong reasoning + "deepseek/deepseek-chat-v3-0324", # Fast chat model + "qwen/qwen3-30b-a3b", # Fast MoE model + "nvidia/nemotron-3-nano-30b-a3b", # Nemotron Nano, very fast + "nousresearch/hermes-3-llama-3.1-70b", # Hermes 3 70B fallback "mistralai/mistral-small-3.2-24b-instruct", - "qwen/qwen3-235b-a22b", # Large but non-thinking + "qwen/qwen3-235b-a22b", # Large but non-thinking "deepseek/deepseek-r1-0528", # Thinking model — slow, last resort ] for model_id in preference: diff --git a/app/config.py b/app/config.py index 593e57f..b05f1d5 100644 --- a/app/config.py +++ b/app/config.py @@ -104,6 +104,23 @@ class VerifiedModels: "google/gemini-2.0-flash-001", # Fast, handles JSON well "google/gemini-2.0-flash-001:free", # Free tier (rate limited) "google/gemini-3-flash-preview", # Latest thinking model, agentic workflows + # NVIDIA Nemotron family + "nvidia/llama-3.1-nemotron-70b-instruct", + "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "nvidia/nemotron-3-nano-30b-a3b", + "nvidia/nemotron-3-nano-30b-a3b:free", + "nvidia/nemotron-3-super-120b-a12b:free", + "nvidia/nemotron-nano-12b-v2-vl", + "nvidia/nemotron-nano-12b-v2-vl:free", + "nvidia/nemotron-nano-9b-v2", + "nvidia/nemotron-nano-9b-v2:free", + # NousResearch Hermes family + "nousresearch/hermes-2-pro-llama-3-8b", + "nousresearch/hermes-3-llama-3.1-405b", + "nousresearch/hermes-3-llama-3.1-405b:free", + "nousresearch/hermes-3-llama-3.1-70b", + "nousresearch/hermes-4-405b", + "nousresearch/hermes-4-70b", ] # Fallback chains - ordered by preference diff --git a/app/core/model_capabilities.py b/app/core/model_capabilities.py index 6719121..628cb69 100644 --- a/app/core/model_capabilities.py +++ b/app/core/model_capabilities.py @@ -404,6 +404,173 @@ class TextModelConfig: max_output_tokens=16384, notes="GPT-4o Mini via OpenRouter", ), + # NVIDIA Nemotron family (via OpenRouter) + "nvidia/llama-3.1-nemotron-70b-instruct": TextModelConfig( + model_id="nvidia/llama-3.1-nemotron-70b-instruct", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 70B instruct, strong reasoning", + ), + "nvidia/llama-3.3-nemotron-super-49b-v1.5": TextModelConfig( + model_id="nvidia/llama-3.3-nemotron-super-49b-v1.5", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Super 49B v1.5, efficient MoE", + ), + "nvidia/nemotron-3-nano-30b-a3b": TextModelConfig( + model_id="nvidia/nemotron-3-nano-30b-a3b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 3 Nano 30B, fast MoE (3B active)", + ), + "nvidia/nemotron-3-nano-30b-a3b:free": TextModelConfig( + model_id="nvidia/nemotron-3-nano-30b-a3b:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 3 Nano 30B free tier", + ), + "nvidia/nemotron-3-super-120b-a12b:free": TextModelConfig( + model_id="nvidia/nemotron-3-super-120b-a12b:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron 3 Super 120B free tier, large MoE (12B active)", + ), + "nvidia/nemotron-nano-12b-v2-vl": TextModelConfig( + model_id="nvidia/nemotron-nano-12b-v2-vl", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 12B v2 with vision", + ), + "nvidia/nemotron-nano-12b-v2-vl:free": TextModelConfig( + model_id="nvidia/nemotron-nano-12b-v2-vl:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 12B v2 with vision, free tier", + ), + "nvidia/nemotron-nano-9b-v2": TextModelConfig( + model_id="nvidia/nemotron-nano-9b-v2", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 9B v2, compact and fast", + ), + "nvidia/nemotron-nano-9b-v2:free": TextModelConfig( + model_id="nvidia/nemotron-nano-9b-v2:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Nemotron Nano 9B v2 free tier", + ), + # NousResearch Hermes family (via OpenRouter) + "nousresearch/hermes-2-pro-llama-3-8b": TextModelConfig( + model_id="nousresearch/hermes-2-pro-llama-3-8b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 2 Pro 8B, compact function-calling model", + ), + "nousresearch/hermes-3-llama-3.1-405b": TextModelConfig( + model_id="nousresearch/hermes-3-llama-3.1-405b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 3 405B, flagship open-weight model", + ), + "nousresearch/hermes-3-llama-3.1-405b:free": TextModelConfig( + model_id="nousresearch/hermes-3-llama-3.1-405b:free", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 3 405B free tier", + ), + "nousresearch/hermes-3-llama-3.1-70b": TextModelConfig( + model_id="nousresearch/hermes-3-llama-3.1-70b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 3 70B, strong general-purpose model", + ), + "nousresearch/hermes-4-405b": TextModelConfig( + model_id="nousresearch/hermes-4-405b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 4 405B, latest flagship", + ), + "nousresearch/hermes-4-70b": TextModelConfig( + model_id="nousresearch/hermes-4-70b", + provider="openrouter", + supports_json_schema=False, + supports_json_mode=True, + supports_function_calling=True, + supports_streaming=True, + supports_extended_thinking=False, + max_output_tokens=8192, + notes="Hermes 4 70B, strong reasoning", + ), } # Default config for unknown models (conservative - assume JSON mode works) diff --git a/app/core/model_policy.py b/app/core/model_policy.py index ff8145c..74679ca 100644 --- a/app/core/model_policy.py +++ b/app/core/model_policy.py @@ -13,7 +13,8 @@ "microsoft/", # Phi family "google/gemma", # Gemma open-weight "allenai/", - "nvidia/", + "nvidia/", # Nemotron family + "nousresearch/", # Hermes family (open-weight) "black-forest-labs/", # FLUX open-weight image models ) @@ -21,7 +22,7 @@ GOOGLE_MODEL_PREFIXES = ("gemini", "imagen", "flux-schnell") # Prefixes routed through OpenRouter (may be restricted or permissive) -OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "black-forest-labs/") +OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "nvidia/", "nousresearch/", "black-forest-labs/") def derive_model_provider(model_id: str | None) -> str: @@ -47,8 +48,9 @@ def is_model_permissive(model_id: str | None) -> bool: def derive_model_permissiveness(model_id: str | None) -> str: """Derive distillation licensing permissiveness from a model ID. - Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma) are - 'permissive' — safe for distillation and derivative works. + Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma, + Nemotron, Hermes) are 'permissive' — safe for distillation and + derivative works. Frontier models (Google Gemini, Anthropic, OpenAI) are 'restricted'. """ if not model_id: