From 1311a6873475b10196850ccf5fc00d8e2276ea87 Mon Sep 17 00:00:00 2001 From: Lewis Tunstall Date: Wed, 6 May 2026 16:24:37 +0200 Subject: [PATCH] Add native OpenRouter model support Co-authored-by: OpenAI Codex --- README.md | 8 ++++++- agent/core/llm_params.py | 21 ++++++++++++++++++ agent/core/model_switcher.py | 22 +++++++++++------- tests/unit/test_cli_rendering.py | 15 +++++++++++++ tests/unit/test_llm_params.py | 38 ++++++++++++++++++++++++++++++++ 5 files changed, 95 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index ab2f7d52..5fa533ce 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Create a `.env` file in the project root (or export these in your shell): ```bash ANTHROPIC_API_KEY= # if using anthropic models OPENAI_API_KEY= # if using openai models +OPENROUTER_API_KEY= # if using openrouter models HF_TOKEN= GITHUB_TOKEN= ``` @@ -52,12 +53,17 @@ ml-intern "fine-tune llama on my dataset" ```bash ml-intern --model anthropic/claude-opus-4-7 "your prompt" # requires ANTHROPIC_API_KEY ml-intern --model openai/gpt-5.5 "your prompt" # requires OPENAI_API_KEY +ml-intern --model openrouter/anthropic/claude-opus-4.7 "your prompt" # requires OPENROUTER_API_KEY ml-intern --max-iterations 100 "your prompt" ml-intern --no-stream "your prompt" ``` Run `ml-intern` then `/model` to see the full list of suggested model ids -(Claude, GPT, and HF-router models like MiniMax, Kimi, GLM, DeepSeek). +(Claude, GPT, OpenRouter, and HF-router models like MiniMax, Kimi, GLM, DeepSeek). +OpenRouter models must use the explicit `openrouter//` prefix. +Optional OpenRouter env vars `OPENROUTER_API_BASE`, `OR_SITE_URL`, and +`OR_APP_NAME` are passed through by LiteLLM. `OPENAI_BASE_URL` is not used; +`openai/...` remains reserved for direct OpenAI models. ## Sharing Traces diff --git a/agent/core/llm_params.py b/agent/core/llm_params.py index 028dd6df..f5c412b4 100644 --- a/agent/core/llm_params.py +++ b/agent/core/llm_params.py @@ -79,6 +79,7 @@ def _widened(model: str) -> bool: # Effort levels accepted on the wire. # Anthropic (4.6+): low | medium | high | xhigh | max (output_config.effort) # OpenAI direct: minimal | low | medium | high | xhigh (reasoning_effort top-level) +# OpenRouter: minimal | low | medium | high | xhigh (reasoning_effort top-level) # HF router: low | medium | high (extra_body.reasoning_effort) # # We validate *shape* here and let the probe cascade walk down on rejection; @@ -121,6 +122,12 @@ def _resolve_llm_params( • ``openai/`` — ``reasoning_effort`` forwarded as a top-level kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``. + • ``openrouter//`` — preserved as-is so LiteLLM routes via + its native OpenRouter provider. LiteLLM uses ``OPENROUTER_API_KEY`` and + optionally ``OPENROUTER_API_BASE``, ``OR_SITE_URL``, and ``OR_APP_NAME``. + ``reasoning_effort`` is forwarded as a top-level kwarg for providers that + accept the OpenAI-compatible shape. + • Anything else is treated as a HuggingFace router id. We hit the auto-routing OpenAI-compatible endpoint at ``https://router.huggingface.co/v1``. The id can be bare or carry an @@ -138,6 +145,8 @@ def _resolve_llm_params( can't crash a turn — it just doesn't get sent. Token precedence (first non-empty wins): + For ``openrouter/...``, LiteLLM handles OpenRouter-specific env vars. + For Hugging Face Router ids: 1. INFERENCE_TOKEN env — shared key on the hosted Space (inference is free for users, billed to the Space owner via ``X-HF-Bill-To``). 2. session.hf_token — the user's own token (CLI / OAuth / cache file). @@ -187,6 +196,18 @@ def _resolve_llm_params( params["reasoning_effort"] = reasoning_effort return params + if model_name.startswith("openrouter/"): + params = {"model": model_name} + if reasoning_effort: + if reasoning_effort not in _OPENAI_EFFORTS: + if strict: + raise UnsupportedEffortError( + f"OpenRouter doesn't accept effort={reasoning_effort!r}" + ) + else: + params["reasoning_effort"] = reasoning_effort + return params + hf_model = model_name.removeprefix("huggingface/") api_key = _resolve_hf_router_token(session_hf_token) params = { diff --git a/agent/core/model_switcher.py b/agent/core/model_switcher.py index 14b5233d..081bf08f 100644 --- a/agent/core/model_switcher.py +++ b/agent/core/model_switcher.py @@ -18,9 +18,10 @@ from agent.core.effort_probe import ProbeInconclusive, probe_effort -# Suggested models shown by `/model` (not a gate). Users can paste any HF -# model id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/` -# prefix for direct API access. For HF ids, append ":fastest" / +# Suggested models shown by `/model` (not a gate). Users can paste any HF model +# id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/` / +# `openrouter/` / `bedrock/` prefix for direct provider access. For HF ids, +# append ":fastest" / # ":cheapest" / ":preferred" / ":" to override the default # routing policy (auto = fastest with failover). SUGGESTED_MODELS = [ @@ -48,6 +49,8 @@ def is_valid_model_id(model_id: str) -> bool: Accepts: • anthropic/ • openai/ + • openrouter// + • bedrock//[:] (HF router; tag = provider or policy) • huggingface//[:] (same, accepts legacy prefix) @@ -67,10 +70,10 @@ def _print_hf_routing_info(model_id: str, console) -> bool: proceed with the switch, ``False`` to indicate a hard problem the user should notice before we fire the effort probe. - Anthropic / OpenAI ids return ``True`` without printing anything — - the probe below covers "does this model exist". + Direct provider ids return ``True`` without printing anything — the probe + below covers "does this model exist". """ - if model_id.startswith(("anthropic/", "openai/")): + if model_id.startswith(("anthropic/", "openai/", "openrouter/", "bedrock/")): return True from agent.core import hf_router_catalog as cat @@ -141,7 +144,8 @@ def print_model_listing(config, console) -> None: console.print( "\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n" "Add ':fastest', ':cheapest', ':preferred', or ':' to override routing.\n" - "Use 'anthropic/' or 'openai/' for direct API access.[/dim]" + "Use 'anthropic/', 'openai/', 'openrouter//', " + "or 'bedrock/' for direct provider access.[/dim]" ) @@ -151,7 +155,9 @@ def print_invalid_id(arg: str, console) -> None: "[dim]Expected:\n" " • /[:tag] (HF router — paste from huggingface.co)\n" " • anthropic/\n" - " • openai/[/dim]" + " • openai/\n" + " • openrouter//\n" + " • bedrock/[/dim]" ) diff --git a/tests/unit/test_cli_rendering.py b/tests/unit/test_cli_rendering.py index e94700bf..cc4a0cb6 100644 --- a/tests/unit/test_cli_rendering.py +++ b/tests/unit/test_cli_rendering.py @@ -7,6 +7,7 @@ import pytest import agent.main as main_mod +from agent.core import model_switcher from agent.tools.research_tool import _get_research_model from agent.utils import terminal_display @@ -29,6 +30,20 @@ def test_non_anthropic_research_model_is_unchanged(): assert _get_research_model("openai/gpt-5.4") == "openai/gpt-5.4" +def test_openrouter_model_switch_bypasses_hf_router_catalog(monkeypatch): + def fail_lookup(_model_id): + raise AssertionError("OpenRouter ids should not query the HF router catalog") + + monkeypatch.setattr("agent.core.hf_router_catalog.lookup", fail_lookup) + + console = SimpleNamespace(print=lambda *_args, **_kwargs: None) + + assert model_switcher._print_hf_routing_info( + "openrouter/anthropic/claude-opus-4.7", + console, + ) + + def test_subagent_display_does_not_spawn_background_redraw(monkeypatch): calls: list[object] = [] diff --git a/tests/unit/test_llm_params.py b/tests/unit/test_llm_params.py index 5234461a..01025bfb 100644 --- a/tests/unit/test_llm_params.py +++ b/tests/unit/test_llm_params.py @@ -30,6 +30,44 @@ def test_openai_max_effort_is_still_rejected(): raise AssertionError("Expected UnsupportedEffortError for max effort") +def test_openai_base_url_is_not_forwarded(monkeypatch): + monkeypatch.setenv("OPENAI_BASE_URL", "https://openrouter.ai/api/v1") + + params = _resolve_llm_params("openai/gpt-5.5") + + assert params == {"model": "openai/gpt-5.5"} + + +def test_openrouter_params_preserve_model_and_skip_hf_router_auth(monkeypatch): + monkeypatch.setenv("INFERENCE_TOKEN", "inference-token") + monkeypatch.setenv("HF_BILL_TO", "test-org") + + params = _resolve_llm_params( + "openrouter/anthropic/claude-opus-4.7", + session_hf_token="session-token", + reasoning_effort="high", + strict=True, + ) + + assert params == { + "model": "openrouter/anthropic/claude-opus-4.7", + "reasoning_effort": "high", + } + + +def test_openrouter_max_effort_is_rejected_in_strict_mode(): + try: + _resolve_llm_params( + "openrouter/anthropic/claude-opus-4.7", + reasoning_effort="max", + strict=True, + ) + except UnsupportedEffortError as exc: + assert "OpenRouter doesn't accept effort='max'" in str(exc) + else: + raise AssertionError("Expected UnsupportedEffortError for max effort") + + def test_hf_router_token_prefers_inference_token(monkeypatch): monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ") monkeypatch.setenv("HF_TOKEN", "hf-token")