Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Create a `.env` file in the project root (or export these in your shell):
```bash
ANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models
OPENAI_API_KEY=<your-openai-api-key> # if using openai models
OPENROUTER_API_KEY=<your-openrouter-api-key> # if using openrouter models
HF_TOKEN=<your-hugging-face-token>
GITHUB_TOKEN=<github-personal-access-token>
```
Expand All @@ -52,12 +53,17 @@ ml-intern "fine-tune llama on my dataset"
```bash
ml-intern --model anthropic/claude-opus-4-7 "your prompt" # requires ANTHROPIC_API_KEY
ml-intern --model openai/gpt-5.5 "your prompt" # requires OPENAI_API_KEY
ml-intern --model openrouter/anthropic/claude-opus-4.7 "your prompt" # requires OPENROUTER_API_KEY
ml-intern --max-iterations 100 "your prompt"
ml-intern --no-stream "your prompt"
```

Run `ml-intern` then `/model` to see the full list of suggested model ids
(Claude, GPT, and HF-router models like MiniMax, Kimi, GLM, DeepSeek).
(Claude, GPT, OpenRouter, and HF-router models like MiniMax, Kimi, GLM, DeepSeek).
OpenRouter models must use the explicit `openrouter/<provider>/<model>` prefix.
Optional OpenRouter env vars `OPENROUTER_API_BASE`, `OR_SITE_URL`, and
`OR_APP_NAME` are passed through by LiteLLM. `OPENAI_BASE_URL` is not used;
`openai/...` remains reserved for direct OpenAI models.

## Sharing Traces

Expand Down
21 changes: 21 additions & 0 deletions agent/core/llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def _widened(model: str) -> bool:
# Effort levels accepted on the wire.
# Anthropic (4.6+): low | medium | high | xhigh | max (output_config.effort)
# OpenAI direct: minimal | low | medium | high | xhigh (reasoning_effort top-level)
# OpenRouter: minimal | low | medium | high | xhigh (reasoning_effort top-level)
# HF router: low | medium | high (extra_body.reasoning_effort)
#
# We validate *shape* here and let the probe cascade walk down on rejection;
Expand Down Expand Up @@ -121,6 +122,12 @@ def _resolve_llm_params(
• ``openai/<model>`` — ``reasoning_effort`` forwarded as a top-level
kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``.

• ``openrouter/<provider>/<model>`` — preserved as-is so LiteLLM routes via
its native OpenRouter provider. LiteLLM uses ``OPENROUTER_API_KEY`` and
optionally ``OPENROUTER_API_BASE``, ``OR_SITE_URL``, and ``OR_APP_NAME``.
``reasoning_effort`` is forwarded as a top-level kwarg for providers that
accept the OpenAI-compatible shape.

• Anything else is treated as a HuggingFace router id. We hit the
auto-routing OpenAI-compatible endpoint at
``https://router.huggingface.co/v1``. The id can be bare or carry an
Expand All @@ -138,6 +145,8 @@ def _resolve_llm_params(
can't crash a turn — it just doesn't get sent.

Token precedence (first non-empty wins):
For ``openrouter/...``, LiteLLM handles OpenRouter-specific env vars.
For Hugging Face Router ids:
1. INFERENCE_TOKEN env — shared key on the hosted Space (inference is
free for users, billed to the Space owner via ``X-HF-Bill-To``).
2. session.hf_token — the user's own token (CLI / OAuth / cache file).
Expand Down Expand Up @@ -187,6 +196,18 @@ def _resolve_llm_params(
params["reasoning_effort"] = reasoning_effort
return params

if model_name.startswith("openrouter/"):
params = {"model": model_name}
if reasoning_effort:
if reasoning_effort not in _OPENAI_EFFORTS:
if strict:
raise UnsupportedEffortError(
f"OpenRouter doesn't accept effort={reasoning_effort!r}"
)
else:
params["reasoning_effort"] = reasoning_effort
return params

hf_model = model_name.removeprefix("huggingface/")
api_key = _resolve_hf_router_token(session_hf_token)
params = {
Expand Down
22 changes: 14 additions & 8 deletions agent/core/model_switcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
from agent.core.effort_probe import ProbeInconclusive, probe_effort


# Suggested models shown by `/model` (not a gate). Users can paste any HF
# model id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/`
# prefix for direct API access. For HF ids, append ":fastest" /
# Suggested models shown by `/model` (not a gate). Users can paste any HF model
# id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/` /
# `openrouter/` / `bedrock/` prefix for direct provider access. For HF ids,
# append ":fastest" /
# ":cheapest" / ":preferred" / ":<provider>" to override the default
# routing policy (auto = fastest with failover).
SUGGESTED_MODELS = [
Expand Down Expand Up @@ -48,6 +49,8 @@ def is_valid_model_id(model_id: str) -> bool:
Accepts:
• anthropic/<model>
• openai/<model>
• openrouter/<provider>/<model>
• bedrock/<model>
• <org>/<model>[:<tag>] (HF router; tag = provider or policy)
• huggingface/<org>/<model>[:<tag>] (same, accepts legacy prefix)

Expand All @@ -67,10 +70,10 @@ def _print_hf_routing_info(model_id: str, console) -> bool:
proceed with the switch, ``False`` to indicate a hard problem the user
should notice before we fire the effort probe.

Anthropic / OpenAI ids return ``True`` without printing anything —
the probe below covers "does this model exist".
Direct provider ids return ``True`` without printing anything — the probe
below covers "does this model exist".
"""
if model_id.startswith(("anthropic/", "openai/")):
if model_id.startswith(("anthropic/", "openai/", "openrouter/", "bedrock/")):
return True

from agent.core import hf_router_catalog as cat
Expand Down Expand Up @@ -141,7 +144,8 @@ def print_model_listing(config, console) -> None:
console.print(
"\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n"
"Add ':fastest', ':cheapest', ':preferred', or ':<provider>' to override routing.\n"
"Use 'anthropic/<model>' or 'openai/<model>' for direct API access.[/dim]"
"Use 'anthropic/<model>', 'openai/<model>', 'openrouter/<provider>/<model>', "
"or 'bedrock/<model>' for direct provider access.[/dim]"
)


Expand All @@ -151,7 +155,9 @@ def print_invalid_id(arg: str, console) -> None:
"[dim]Expected:\n"
" • <org>/<model>[:tag] (HF router — paste from huggingface.co)\n"
" • anthropic/<model>\n"
" • openai/<model>[/dim]"
" • openai/<model>\n"
" • openrouter/<provider>/<model>\n"
" • bedrock/<model>[/dim]"
)


Expand Down
15 changes: 15 additions & 0 deletions tests/unit/test_cli_rendering.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest

import agent.main as main_mod
from agent.core import model_switcher
from agent.tools.research_tool import _get_research_model
from agent.utils import terminal_display

Expand All @@ -29,6 +30,20 @@ def test_non_anthropic_research_model_is_unchanged():
assert _get_research_model("openai/gpt-5.4") == "openai/gpt-5.4"


def test_openrouter_model_switch_bypasses_hf_router_catalog(monkeypatch):
def fail_lookup(_model_id):
raise AssertionError("OpenRouter ids should not query the HF router catalog")

monkeypatch.setattr("agent.core.hf_router_catalog.lookup", fail_lookup)

console = SimpleNamespace(print=lambda *_args, **_kwargs: None)

assert model_switcher._print_hf_routing_info(
"openrouter/anthropic/claude-opus-4.7",
console,
)


def test_subagent_display_does_not_spawn_background_redraw(monkeypatch):
calls: list[object] = []

Expand Down
38 changes: 38 additions & 0 deletions tests/unit/test_llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,44 @@ def test_openai_max_effort_is_still_rejected():
raise AssertionError("Expected UnsupportedEffortError for max effort")


def test_openai_base_url_is_not_forwarded(monkeypatch):
monkeypatch.setenv("OPENAI_BASE_URL", "https://openrouter.ai/api/v1")

params = _resolve_llm_params("openai/gpt-5.5")

assert params == {"model": "openai/gpt-5.5"}


def test_openrouter_params_preserve_model_and_skip_hf_router_auth(monkeypatch):
monkeypatch.setenv("INFERENCE_TOKEN", "inference-token")
monkeypatch.setenv("HF_BILL_TO", "test-org")

params = _resolve_llm_params(
"openrouter/anthropic/claude-opus-4.7",
session_hf_token="session-token",
reasoning_effort="high",
strict=True,
)

assert params == {
"model": "openrouter/anthropic/claude-opus-4.7",
"reasoning_effort": "high",
}


def test_openrouter_max_effort_is_rejected_in_strict_mode():
try:
_resolve_llm_params(
"openrouter/anthropic/claude-opus-4.7",
reasoning_effort="max",
strict=True,
)
except UnsupportedEffortError as exc:
assert "OpenRouter doesn't accept effort='max'" in str(exc)
else:
raise AssertionError("Expected UnsupportedEffortError for max effort")


def test_hf_router_token_prefers_inference_token(monkeypatch):
monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ")
monkeypatch.setenv("HF_TOKEN", "hf-token")
Expand Down
Loading