Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions app/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,36 @@ def get_configured_models() -> list[ModelInfo]:
capabilities=["image_generation"],
pricing={"prompt": 0.00012, "completion": 0.0},
),
# NVIDIA Nemotron
ModelInfo(
id="nvidia/llama-3.3-nemotron-super-49b-v1.5",
name="Nemotron Super 49B v1.5",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
ModelInfo(
id="nvidia/llama-3.1-nemotron-70b-instruct",
name="Nemotron 70B Instruct",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
# NousResearch Hermes
ModelInfo(
id="nousresearch/hermes-4-70b",
name="Hermes 4 70B",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
ModelInfo(
id="nousresearch/hermes-3-llama-3.1-405b",
name="Hermes 3 405B",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
])

return models
Expand Down
10 changes: 7 additions & 3 deletions app/api/v1/timepoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,14 @@ def _get_permissive_text_model() -> str:
preference = [
"meta-llama/llama-4-scout-17b-16e-instruct",
"meta-llama/llama-4-maverick-17b-128e-instruct",
"deepseek/deepseek-chat-v3-0324", # Fast chat model
"qwen/qwen3-30b-a3b", # Fast MoE model
"nvidia/llama-3.3-nemotron-super-49b-v1.5", # Nemotron Super, fast MoE
"nousresearch/hermes-4-70b", # Hermes 4 70B, strong reasoning
"deepseek/deepseek-chat-v3-0324", # Fast chat model
"qwen/qwen3-30b-a3b", # Fast MoE model
"nvidia/nemotron-3-nano-30b-a3b", # Nemotron Nano, very fast
"nousresearch/hermes-3-llama-3.1-70b", # Hermes 3 70B fallback
"mistralai/mistral-small-3.2-24b-instruct",
"qwen/qwen3-235b-a22b", # Large but non-thinking
"qwen/qwen3-235b-a22b", # Large but non-thinking
"deepseek/deepseek-r1-0528", # Thinking model — slow, last resort
]
for model_id in preference:
Expand Down
17 changes: 17 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,23 @@ class VerifiedModels:
"google/gemini-2.0-flash-001", # Fast, handles JSON well
"google/gemini-2.0-flash-001:free", # Free tier (rate limited)
"google/gemini-3-flash-preview", # Latest thinking model, agentic workflows
# NVIDIA Nemotron family
"nvidia/llama-3.1-nemotron-70b-instruct",
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
"nvidia/nemotron-3-nano-30b-a3b",
"nvidia/nemotron-3-nano-30b-a3b:free",
"nvidia/nemotron-3-super-120b-a12b:free",
"nvidia/nemotron-nano-12b-v2-vl",
"nvidia/nemotron-nano-12b-v2-vl:free",
"nvidia/nemotron-nano-9b-v2",
"nvidia/nemotron-nano-9b-v2:free",
# NousResearch Hermes family
"nousresearch/hermes-2-pro-llama-3-8b",
"nousresearch/hermes-3-llama-3.1-405b",
"nousresearch/hermes-3-llama-3.1-405b:free",
"nousresearch/hermes-3-llama-3.1-70b",
"nousresearch/hermes-4-405b",
"nousresearch/hermes-4-70b",
]

# Fallback chains - ordered by preference
Expand Down
167 changes: 167 additions & 0 deletions app/core/model_capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,173 @@ class TextModelConfig:
max_output_tokens=16384,
notes="GPT-4o Mini via OpenRouter",
),
# NVIDIA Nemotron family (via OpenRouter)
"nvidia/llama-3.1-nemotron-70b-instruct": TextModelConfig(
model_id="nvidia/llama-3.1-nemotron-70b-instruct",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 70B instruct, strong reasoning",
),
"nvidia/llama-3.3-nemotron-super-49b-v1.5": TextModelConfig(
model_id="nvidia/llama-3.3-nemotron-super-49b-v1.5",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Super 49B v1.5, efficient MoE",
),
"nvidia/nemotron-3-nano-30b-a3b": TextModelConfig(
model_id="nvidia/nemotron-3-nano-30b-a3b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 3 Nano 30B, fast MoE (3B active)",
),
"nvidia/nemotron-3-nano-30b-a3b:free": TextModelConfig(
model_id="nvidia/nemotron-3-nano-30b-a3b:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 3 Nano 30B free tier",
),
"nvidia/nemotron-3-super-120b-a12b:free": TextModelConfig(
model_id="nvidia/nemotron-3-super-120b-a12b:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 3 Super 120B free tier, large MoE (12B active)",
),
"nvidia/nemotron-nano-12b-v2-vl": TextModelConfig(
model_id="nvidia/nemotron-nano-12b-v2-vl",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 12B v2 with vision",
),
"nvidia/nemotron-nano-12b-v2-vl:free": TextModelConfig(
model_id="nvidia/nemotron-nano-12b-v2-vl:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 12B v2 with vision, free tier",
),
"nvidia/nemotron-nano-9b-v2": TextModelConfig(
model_id="nvidia/nemotron-nano-9b-v2",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 9B v2, compact and fast",
),
"nvidia/nemotron-nano-9b-v2:free": TextModelConfig(
model_id="nvidia/nemotron-nano-9b-v2:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 9B v2 free tier",
),
# NousResearch Hermes family (via OpenRouter)
"nousresearch/hermes-2-pro-llama-3-8b": TextModelConfig(
model_id="nousresearch/hermes-2-pro-llama-3-8b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 2 Pro 8B, compact function-calling model",
),
"nousresearch/hermes-3-llama-3.1-405b": TextModelConfig(
model_id="nousresearch/hermes-3-llama-3.1-405b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 3 405B, flagship open-weight model",
),
"nousresearch/hermes-3-llama-3.1-405b:free": TextModelConfig(
model_id="nousresearch/hermes-3-llama-3.1-405b:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 3 405B free tier",
),
"nousresearch/hermes-3-llama-3.1-70b": TextModelConfig(
model_id="nousresearch/hermes-3-llama-3.1-70b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 3 70B, strong general-purpose model",
),
"nousresearch/hermes-4-405b": TextModelConfig(
model_id="nousresearch/hermes-4-405b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 4 405B, latest flagship",
),
"nousresearch/hermes-4-70b": TextModelConfig(
model_id="nousresearch/hermes-4-70b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 4 70B, strong reasoning",
),
}

# Default config for unknown models (conservative - assume JSON mode works)
Expand Down
10 changes: 6 additions & 4 deletions app/core/model_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,16 @@
"microsoft/", # Phi family
"google/gemma", # Gemma open-weight
"allenai/",
"nvidia/",
"nvidia/", # Nemotron family
"nousresearch/", # Hermes family (open-weight)
"black-forest-labs/", # FLUX open-weight image models
)

# Google-native model prefixes (always restricted)
GOOGLE_MODEL_PREFIXES = ("gemini", "imagen", "flux-schnell")

# Prefixes routed through OpenRouter (may be restricted or permissive)
OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "black-forest-labs/")
OPENROUTER_PREFIXES = ("meta-llama/", "anthropic/", "mistralai/", "openai/", "deepseek/", "qwen/", "microsoft/", "nvidia/", "nousresearch/", "black-forest-labs/")


def derive_model_provider(model_id: str | None) -> str:
Expand All @@ -47,8 +48,9 @@ def is_model_permissive(model_id: str | None) -> bool:
def derive_model_permissiveness(model_id: str | None) -> str:
"""Derive distillation licensing permissiveness from a model ID.

Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma) are
'permissive' — safe for distillation and derivative works.
Open-weight models (Llama, DeepSeek, Qwen, Mistral, Phi, Gemma,
Nemotron, Hermes) are 'permissive' — safe for distillation and
derivative works.
Frontier models (Google Gemini, Anthropic, OpenAI) are 'restricted'.
"""
if not model_id:
Expand Down
Loading