Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Create a `.env` file in the project root (or export these in your shell):
```bash
ANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models
OPENAI_API_KEY=<your-openai-api-key> # if using openai models
TOKENROUTER_API_KEY=<your-tokenrouter-api-key> # if using tokenrouter models
HF_TOKEN=<your-hugging-face-token>
GITHUB_TOKEN=<github-personal-access-token>
```
Expand All @@ -52,10 +53,17 @@ ml-intern "fine-tune llama on my dataset"
```bash
ml-intern --model anthropic/claude-opus-4-6 "your prompt"
ml-intern --model openai/gpt-5.5 "your prompt"
ml-intern --model tokenrouter/auto:balance "your prompt"
ml-intern --max-iterations 100 "your prompt"
ml-intern --no-stream "your prompt"
```

OpenAI-compatible TokenRouter models use the `tokenrouter/` prefix. ML Intern
sends the suffix as the TokenRouter model id, so examples include
`tokenrouter/auto:balance`, `tokenrouter/auto:fast`, and
`tokenrouter/openai:gpt-4o`. Set `TOKENROUTER_API_BASE` only if you need to
override the default `https://api.tokenrouter.io/v1` endpoint.

## Supported Gateways

ML Intern currently supports one-way notification gateways from CLI sessions.
Expand Down
1 change: 1 addition & 0 deletions agent/core/agent_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ def _friendly_error_message(error: Exception) -> str | None:
"To fix this, set the API key for your model provider:\n"
" • Anthropic: export ANTHROPIC_API_KEY=sk-...\n"
" • OpenAI: export OPENAI_API_KEY=sk-...\n"
" • TokenRouter: export TOKENROUTER_API_KEY=tr_...\n"
" • HF Router: export HF_TOKEN=hf_...\n\n"
"You can also add it to a .env file in the project root.\n"
"To switch models, use the /model command."
Expand Down
41 changes: 41 additions & 0 deletions agent/core/llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
creating circular imports.
"""

import os

from agent.core.hf_tokens import get_hf_bill_to, resolve_hf_router_token


Expand All @@ -13,6 +15,23 @@ def _resolve_hf_router_token(session_hf_token: str | None = None) -> str | None:
return resolve_hf_router_token(session_hf_token)


def _clean_env_value(value: str | None) -> str | None:
if value is None:
return None
return value.replace("\r", "").replace("\n", "").strip() or None


def _resolve_tokenrouter_api_key() -> str | None:
return _clean_env_value(os.environ.get("TOKENROUTER_API_KEY"))


def _resolve_tokenrouter_api_base() -> str:
return (
_clean_env_value(os.environ.get("TOKENROUTER_API_BASE"))
or "https://api.tokenrouter.io/v1"
).rstrip("/")


def _patch_litellm_effort_validation() -> None:
"""Neuter LiteLLM 1.83's hardcoded effort-level validation.

Expand Down Expand Up @@ -72,6 +91,7 @@ def _widened(model: str) -> bool:
# Effort levels accepted on the wire.
# Anthropic (4.6+): low | medium | high | xhigh | max (output_config.effort)
# OpenAI direct: minimal | low | medium | high | xhigh (reasoning_effort top-level)
# TokenRouter: no effort parameter documented on OpenAI-compatible chat
# HF router: low | medium | high (extra_body.reasoning_effort)
#
# We validate *shape* here and let the probe cascade walk down on rejection;
Expand Down Expand Up @@ -114,6 +134,15 @@ def _resolve_llm_params(
• ``openai/<model>`` — ``reasoning_effort`` forwarded as a top-level
kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``.

• ``tokenrouter/<model>`` — TokenRouter's OpenAI-compatible endpoint at
``https://api.tokenrouter.io/v1``. The prefix is stripped and the
remainder is sent through LiteLLM's OpenAI adapter, so ids like
``tokenrouter/auto:balance`` and ``tokenrouter/openai:gpt-4o`` work.
Set ``TOKENROUTER_API_KEY`` for authentication and optionally
``TOKENROUTER_API_BASE`` for a custom compatible endpoint. TokenRouter's
chat-completions compatibility docs do not list ``reasoning_effort``, so
we omit it instead of forwarding a provider-specific parameter.

• Anything else is treated as a HuggingFace router id. We hit the
auto-routing OpenAI-compatible endpoint at
``https://router.huggingface.co/v1``. The id can be bare or carry an
Expand Down Expand Up @@ -180,6 +209,18 @@ def _resolve_llm_params(
params["reasoning_effort"] = reasoning_effort
return params

if model_name.startswith("tokenrouter/"):
if reasoning_effort and strict:
raise UnsupportedEffortError(
f"TokenRouter doesn't accept effort={reasoning_effort!r}"
)
tokenrouter_model = model_name.removeprefix("tokenrouter/")
return {
"model": f"openai/{tokenrouter_model}",
"api_base": _resolve_tokenrouter_api_base(),
"api_key": _resolve_tokenrouter_api_key(),
}

hf_model = model_name.removeprefix("huggingface/")
api_key = _resolve_hf_router_token(session_hf_token)
params = {
Expand Down
24 changes: 15 additions & 9 deletions agent/core/model_switcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@


# Suggested models shown by `/model` (not a gate). Users can paste any HF
# model id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/`
# prefix for direct API access. For HF ids, append ":fastest" /
# ":cheapest" / ":preferred" / ":<provider>" to override the default
# routing policy (auto = fastest with failover).
# model id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/`, `openai/`,
# `bedrock/`, or `tokenrouter/` prefix for direct API access. For HF ids,
# append ":fastest" / ":cheapest" / ":preferred" / ":<provider>" to override
# the default routing policy (auto = fastest with failover).
SUGGESTED_MODELS = [
{"id": "openai/gpt-5.5", "label": "GPT-5.5"},
{"id": "openai/gpt-5.4", "label": "GPT-5.4"},
{"id": "tokenrouter/auto:balance", "label": "TokenRouter Auto Balance"},
{"id": "anthropic/claude-opus-4-7", "label": "Claude Opus 4.7"},
{"id": "anthropic/claude-opus-4-6", "label": "Claude Opus 4.6"},
{"id": "bedrock/us.anthropic.claude-opus-4-6-v1", "label": "Claude Opus 4.6 via Bedrock"},
Expand All @@ -44,6 +45,8 @@ def is_valid_model_id(model_id: str) -> bool:
Accepts:
• anthropic/<model>
• openai/<model>
• bedrock/<model>
• tokenrouter/<model>
• <org>/<model>[:<tag>] (HF router; tag = provider or policy)
• huggingface/<org>/<model>[:<tag>] (same, accepts legacy prefix)

Expand All @@ -63,10 +66,10 @@ def _print_hf_routing_info(model_id: str, console) -> bool:
proceed with the switch, ``False`` to indicate a hard problem the user
should notice before we fire the effort probe.

Anthropic / OpenAI ids return ``True`` without printing anything —
the probe below covers "does this model exist".
Anthropic / OpenAI / Bedrock / TokenRouter ids return ``True`` without
printing anything — the probe below covers "does this model exist".
"""
if model_id.startswith(("anthropic/", "openai/")):
if model_id.startswith(("anthropic/", "openai/", "bedrock/", "tokenrouter/")):
return True

from agent.core import hf_router_catalog as cat
Expand Down Expand Up @@ -139,7 +142,8 @@ def print_model_listing(config, console) -> None:
console.print(
"\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n"
"Add ':fastest', ':cheapest', ':preferred', or ':<provider>' to override routing.\n"
"Use 'anthropic/<model>' or 'openai/<model>' for direct API access.[/dim]"
"Use 'anthropic/<model>', 'openai/<model>', 'bedrock/<model>', or "
"'tokenrouter/<model>' for direct API access.[/dim]"
)


Expand All @@ -149,7 +153,9 @@ def print_invalid_id(arg: str, console) -> None:
"[dim]Expected:\n"
" • <org>/<model>[:tag] (HF router — paste from huggingface.co)\n"
" • anthropic/<model>\n"
" • openai/<model>[/dim]"
" • openai/<model>\n"
" • bedrock/<model>\n"
" • tokenrouter/<model>[/dim]"
)


Expand Down
18 changes: 13 additions & 5 deletions backend/routes/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@
"tier": "pro",
"recommended": True,
},
{
"id": "tokenrouter/auto:balance",
"label": "TokenRouter Auto Balance",
"provider": "tokenrouter",
"tier": "external",
},
{
"id": "MiniMaxAI/MiniMax-M2.7",
"label": "MiniMax M2.7",
Expand All @@ -72,16 +78,18 @@


def _is_anthropic_model(model_id: str) -> bool:
return "anthropic" in model_id
return model_id.startswith("anthropic/") or (
model_id.startswith("bedrock/") and "anthropic" in model_id
)


async def _require_hf_for_anthropic(request: Request, model_id: str) -> None:
"""403 if a non-``huggingface``-org user tries to select an Anthropic model.

Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; every
other model in ``AVAILABLE_MODELS`` is routed through HF Router and
billed via ``X-HF-Bill-To``. The gate only fires for Anthropic so
non-HF users can still freely switch between the free models.
Anthropic models are billed to the Space's ``ANTHROPIC_API_KEY``; HF Router
models are billed via ``X-HF-Bill-To`` and TokenRouter models use the
server's ``TOKENROUTER_API_KEY``. The gate only fires for direct Anthropic
models so non-HF users can still switch between the other models.

Pattern: https://github.com/huggingface/ml-intern/pull/63
"""
Expand Down
7 changes: 7 additions & 0 deletions frontend/src/components/Chat/ChatInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ const MODEL_OPTIONS: ModelOption[] = [
avatarUrl: 'https://huggingface.co/api/avatars/Anthropic',
recommended: true,
},
{
id: 'tokenrouter-auto-balance',
name: 'TokenRouter Auto',
description: 'TokenRouter',
modelPath: 'tokenrouter/auto:balance',
avatarUrl: 'https://www.google.com/s2/favicons?domain=tokenrouter.io&sz=64',
},
{
id: 'minimax-m2.7',
name: 'MiniMax M2.7',
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/test_llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,43 @@ def test_openai_max_effort_is_still_rejected():
raise AssertionError("Expected UnsupportedEffortError for max effort")


def test_tokenrouter_params_use_openai_compatible_endpoint(monkeypatch):
monkeypatch.setenv("TOKENROUTER_API_KEY", " tokenrouter-key ")
monkeypatch.delenv("TOKENROUTER_API_BASE", raising=False)

params = _resolve_llm_params("tokenrouter/auto:balance")

assert params == {
"model": "openai/auto:balance",
"api_base": "https://api.tokenrouter.io/v1",
"api_key": "tokenrouter-key",
}


def test_tokenrouter_api_base_can_be_overridden(monkeypatch):
monkeypatch.setenv("TOKENROUTER_API_KEY", "tokenrouter-key")
monkeypatch.setenv("TOKENROUTER_API_BASE", " https://proxy.example.test/v1/ ")

params = _resolve_llm_params("tokenrouter/openai:gpt-4o")

assert params["model"] == "openai/openai:gpt-4o"
assert params["api_base"] == "https://proxy.example.test/v1"
assert params["api_key"] == "tokenrouter-key"


def test_tokenrouter_effort_is_rejected_in_strict_probe_mode():
try:
_resolve_llm_params(
"tokenrouter/auto:balance",
reasoning_effort="high",
strict=True,
)
except UnsupportedEffortError as exc:
assert "TokenRouter doesn't accept effort='high'" in str(exc)
else:
raise AssertionError("Expected UnsupportedEffortError for TokenRouter effort")


def test_hf_router_token_prefers_inference_token(monkeypatch):
monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ")
monkeypatch.setenv("HF_TOKEN", "hf-token")
Expand Down
Loading