Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,10 @@ Open-source engines for temporal AI. Render the past. Simulate the future. Score
| **SNAG Bench** | Open Source | timepoint-snag-bench | Quality Certifier — measures Causal Resolution across renderings |
| **Proteus** | Open Source | proteus | Settlement Layer — prediction markets that validate Rendered Futures |
| **TDF** | Open Source | timepoint-tdf | Data Format — JSON-LD interchange across all services |
| **Web App** | Private | timepoint-web-app | Browser client at app.timepointai.com |
| **iPhone App** | Private | timepoint-iphone-app | iOS client — Synthetic Time Travel on mobile |
| **Billing** | Private | timepoint-billing | Payment processing — Apple IAP + Stripe |
| **Landing** | Private | timepoint-landing | Marketing site at timepointai.com |
| **Web App** | | | Browser client at app.timepointai.com |
| **iPhone App** | | | iOS client — Synthetic Time Travel on mobile |
| **Billing** | | | Payment processing — Apple IAP + Stripe |
| **Landing** | | | Marketing site at timepointai.com |

---

Expand Down
30 changes: 30 additions & 0 deletions app/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,36 @@ def get_configured_models() -> list[ModelInfo]:
capabilities=["image_generation"],
pricing={"prompt": 0.00012, "completion": 0.0},
),
# NVIDIA Nemotron
ModelInfo(
id="nvidia/llama-3.3-nemotron-super-49b-v1.5",
name="Nemotron Super 49B v1.5",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
ModelInfo(
id="nvidia/llama-3.1-nemotron-70b-instruct",
name="Nemotron 70B Instruct",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
# NousResearch Hermes
ModelInfo(
id="nousresearch/hermes-4-70b",
name="Hermes 4 70B",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
ModelInfo(
id="nousresearch/hermes-3-llama-3.1-405b",
name="Hermes 3 405B",
provider="openrouter",
capabilities=["text"],
context_length=131072,
),
])

return models
Expand Down
10 changes: 7 additions & 3 deletions app/api/v1/timepoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,14 @@ def _get_permissive_text_model() -> str:
preference = [
"meta-llama/llama-4-scout-17b-16e-instruct",
"meta-llama/llama-4-maverick-17b-128e-instruct",
"deepseek/deepseek-chat-v3-0324", # Fast chat model
"qwen/qwen3-30b-a3b", # Fast MoE model
"nvidia/llama-3.3-nemotron-super-49b-v1.5", # Nemotron Super, fast MoE
"nousresearch/hermes-4-70b", # Hermes 4 70B, strong reasoning
"deepseek/deepseek-chat-v3-0324", # Fast chat model
"qwen/qwen3-30b-a3b", # Fast MoE model
"nvidia/nemotron-3-nano-30b-a3b", # Nemotron Nano, very fast
"nousresearch/hermes-3-llama-3.1-70b", # Hermes 3 70B fallback
"mistralai/mistral-small-3.2-24b-instruct",
"qwen/qwen3-235b-a22b", # Large but non-thinking
"qwen/qwen3-235b-a22b", # Large but non-thinking
"deepseek/deepseek-r1-0528", # Thinking model — slow, last resort
]
for model_id in preference:
Expand Down
37 changes: 36 additions & 1 deletion app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,14 @@ class QualityPreset(str, Enum):
- HYPER: Fastest speed with Gemini 2.0 Flash via OpenRouter
- BALANCED: Default balance of quality and speed
- GEMINI3: Latest Gemini 3 Flash Preview via OpenRouter (thinking model)
- FREE_DISTILLABLE: Free distillable models — $0 cost, outputs usable for training/distillation
"""

HD = "hd"
HYPER = "hyper"
BALANCED = "balanced"
GEMINI3 = "gemini3"
FREE_DISTILLABLE = "free_distillable"


class Environment(str, Enum):
Expand Down Expand Up @@ -104,6 +106,26 @@ class VerifiedModels:
"google/gemini-2.0-flash-001", # Fast, handles JSON well
"google/gemini-2.0-flash-001:free", # Free tier (rate limited)
"google/gemini-3-flash-preview", # Latest thinking model, agentic workflows
# NVIDIA Nemotron family
"nvidia/llama-3.1-nemotron-70b-instruct",
"nvidia/llama-3.3-nemotron-super-49b-v1.5",
"nvidia/nemotron-3-nano-30b-a3b",
"nvidia/nemotron-3-nano-30b-a3b:free",
"nvidia/nemotron-3-super-120b-a12b:free",
"nvidia/nemotron-nano-12b-v2-vl",
"nvidia/nemotron-nano-12b-v2-vl:free",
"nvidia/nemotron-nano-9b-v2",
"nvidia/nemotron-nano-9b-v2:free",
# NousResearch Hermes family
"nousresearch/hermes-2-pro-llama-3-8b",
"nousresearch/hermes-3-llama-3.1-405b",
"nousresearch/hermes-3-llama-3.1-405b:free",
"nousresearch/hermes-3-llama-3.1-70b",
"nousresearch/hermes-4-405b",
"nousresearch/hermes-4-70b",
# OpenRouter free distillable models
"openrouter/hunter-alpha",
"openrouter/healer-alpha",
]

# Fallback chains - ordered by preference
Expand Down Expand Up @@ -228,6 +250,18 @@ def is_verified_or_available(cls, model: str, provider: "ProviderType") -> bool:
"thinking_level": "medium", # Gemini 3 supports configurable thinking
"image_supported": True,
},
QualityPreset.FREE_DISTILLABLE: {
"name": "Free Distillable",
"description": "Free models with distillation rights — $0 cost, text-only (no image gen)",
"text_model": "openrouter/hunter-alpha",
"judge_model": "openrouter/healer-alpha",
"image_model": None, # No free distillable image models available yet
"image_provider": None,
"text_provider": ProviderType.OPENROUTER,
"max_tokens": 4096,
"thinking_level": None,
"image_supported": False, # Text-only mode
},
}


Expand All @@ -238,6 +272,7 @@ def is_verified_or_available(cls, model: str, provider: "ProviderType") -> bool:
QualityPreset.BALANCED: ParallelismMode.NORMAL, # Default behavior
QualityPreset.HYPER: ParallelismMode.MAX, # Speed focus, maximum parallelism
QualityPreset.GEMINI3: ParallelismMode.AGGRESSIVE, # Thinking model, moderate parallelism
QualityPreset.FREE_DISTILLABLE: ParallelismMode.SEQUENTIAL, # Free models need sequential
}

# Provider rate limits (requests per minute and safe concurrent calls)
Expand Down Expand Up @@ -444,7 +479,7 @@ class Settings(BaseSettings):
)
CORS_ORIGINS: str = Field(
default="",
description="Comma-separated additional CORS origins (e.g. https://your-app.up.railway.app)",
description="Comma-separated additional CORS origins (e.g. https://your-domain.example.com)",
)

# Share URL
Expand Down
11 changes: 10 additions & 1 deletion app/core/llm_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ class ModelTier(str, Enum):
}


# Known free distillable models (no :free suffix but still free)
FREE_DISTILLABLE_MODELS = {
"openrouter/hunter-alpha",
"openrouter/healer-alpha",
}


def is_free_model(model_id: str) -> bool:
"""Check if a model is a free tier model on OpenRouter.

Expand All @@ -146,7 +153,9 @@ def is_free_model(model_id: str) -> bool:
if not model_id:
return False
model_lower = model_id.lower()
return ":free" in model_lower or "/free" in model_lower
if ":free" in model_lower or "/free" in model_lower:
return True
return model_lower in FREE_DISTILLABLE_MODELS


class LLMRouter:
Expand Down
190 changes: 190 additions & 0 deletions app/core/model_capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,196 @@ class TextModelConfig:
max_output_tokens=16384,
notes="GPT-4o Mini via OpenRouter",
),
# NVIDIA Nemotron family (via OpenRouter)
"nvidia/llama-3.1-nemotron-70b-instruct": TextModelConfig(
model_id="nvidia/llama-3.1-nemotron-70b-instruct",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 70B instruct, strong reasoning",
),
"nvidia/llama-3.3-nemotron-super-49b-v1.5": TextModelConfig(
model_id="nvidia/llama-3.3-nemotron-super-49b-v1.5",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Super 49B v1.5, efficient MoE",
),
"nvidia/nemotron-3-nano-30b-a3b": TextModelConfig(
model_id="nvidia/nemotron-3-nano-30b-a3b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 3 Nano 30B, fast MoE (3B active)",
),
"nvidia/nemotron-3-nano-30b-a3b:free": TextModelConfig(
model_id="nvidia/nemotron-3-nano-30b-a3b:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 3 Nano 30B free tier",
),
"nvidia/nemotron-3-super-120b-a12b:free": TextModelConfig(
model_id="nvidia/nemotron-3-super-120b-a12b:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron 3 Super 120B free tier, large MoE (12B active)",
),
"nvidia/nemotron-nano-12b-v2-vl": TextModelConfig(
model_id="nvidia/nemotron-nano-12b-v2-vl",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 12B v2 with vision",
),
"nvidia/nemotron-nano-12b-v2-vl:free": TextModelConfig(
model_id="nvidia/nemotron-nano-12b-v2-vl:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 12B v2 with vision, free tier",
),
"nvidia/nemotron-nano-9b-v2": TextModelConfig(
model_id="nvidia/nemotron-nano-9b-v2",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 9B v2, compact and fast",
),
"nvidia/nemotron-nano-9b-v2:free": TextModelConfig(
model_id="nvidia/nemotron-nano-9b-v2:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Nemotron Nano 9B v2 free tier",
),
# NousResearch Hermes family (via OpenRouter)
"nousresearch/hermes-2-pro-llama-3-8b": TextModelConfig(
model_id="nousresearch/hermes-2-pro-llama-3-8b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 2 Pro 8B, compact function-calling model",
),
"nousresearch/hermes-3-llama-3.1-405b": TextModelConfig(
model_id="nousresearch/hermes-3-llama-3.1-405b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 3 405B, flagship open-weight model",
),
"nousresearch/hermes-3-llama-3.1-405b:free": TextModelConfig(
model_id="nousresearch/hermes-3-llama-3.1-405b:free",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 3 405B free tier",
),
"nousresearch/hermes-3-llama-3.1-70b": TextModelConfig(
model_id="nousresearch/hermes-3-llama-3.1-70b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 3 70B, strong general-purpose model",
),
"nousresearch/hermes-4-405b": TextModelConfig(
model_id="nousresearch/hermes-4-405b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 4 405B, latest flagship",
),
"nousresearch/hermes-4-70b": TextModelConfig(
model_id="nousresearch/hermes-4-70b",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=8192,
notes="Hermes 4 70B, strong reasoning",
),
# OpenRouter free distillable models
"openrouter/hunter-alpha": TextModelConfig(
model_id="openrouter/hunter-alpha",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=32000,
notes="Free distillable, 1M context, text+image input",
),
"openrouter/healer-alpha": TextModelConfig(
model_id="openrouter/healer-alpha",
provider="openrouter",
supports_json_schema=False,
supports_json_mode=True,
supports_function_calling=True,
supports_streaming=True,
supports_extended_thinking=False,
max_output_tokens=32000,
notes="Free distillable, 262K context, multimodal input",
),
}

# Default config for unknown models (conservative - assume JSON mode works)
Expand Down
Loading
Loading