From 1311a6873475b10196850ccf5fc00d8e2276ea87 Mon Sep 17 00:00:00 2001
From: Lewis Tunstall <lewis.c.tunstall@gmail.com>
Date: Wed, 6 May 2026 16:24:37 +0200
Subject: [PATCH] Add native OpenRouter model support

Co-authored-by: OpenAI Codex <codex@openai.com>
---
 README.md                        |  8 ++++++-
 agent/core/llm_params.py         | 21 ++++++++++++++++++
 agent/core/model_switcher.py     | 22 +++++++++++-------
 tests/unit/test_cli_rendering.py | 15 +++++++++++++
 tests/unit/test_llm_params.py    | 38 ++++++++++++++++++++++++++++++++
 5 files changed, 95 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index ab2f7d52..5fa533ce 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,7 @@ Create a `.env` file in the project root (or export these in your shell):
 ```bash
 ANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models
 OPENAI_API_KEY=<your-openai-api-key> # if using openai models
+OPENROUTER_API_KEY=<your-openrouter-api-key> # if using openrouter models
 HF_TOKEN=<your-hugging-face-token>
 GITHUB_TOKEN=<github-personal-access-token> 
 ```
@@ -52,12 +53,17 @@ ml-intern "fine-tune llama on my dataset"
 ```bash
 ml-intern --model anthropic/claude-opus-4-7 "your prompt"   # requires ANTHROPIC_API_KEY
 ml-intern --model openai/gpt-5.5 "your prompt"              # requires OPENAI_API_KEY
+ml-intern --model openrouter/anthropic/claude-opus-4.7 "your prompt" # requires OPENROUTER_API_KEY
 ml-intern --max-iterations 100 "your prompt"
 ml-intern --no-stream "your prompt"
 ```
 
 Run `ml-intern` then `/model` to see the full list of suggested model ids
-(Claude, GPT, and HF-router models like MiniMax, Kimi, GLM, DeepSeek).
+(Claude, GPT, OpenRouter, and HF-router models like MiniMax, Kimi, GLM, DeepSeek).
+OpenRouter models must use the explicit `openrouter/<provider>/<model>` prefix.
+Optional OpenRouter env vars `OPENROUTER_API_BASE`, `OR_SITE_URL`, and
+`OR_APP_NAME` are passed through by LiteLLM. `OPENAI_BASE_URL` is not used;
+`openai/...` remains reserved for direct OpenAI models.
 
 ## Sharing Traces
 
diff --git a/agent/core/llm_params.py b/agent/core/llm_params.py
index 028dd6df..f5c412b4 100644
--- a/agent/core/llm_params.py
+++ b/agent/core/llm_params.py
@@ -79,6 +79,7 @@ def _widened(model: str) -> bool:
 # Effort levels accepted on the wire.
 #   Anthropic (4.6+):  low | medium | high | xhigh | max   (output_config.effort)
 #   OpenAI direct:     minimal | low | medium | high | xhigh (reasoning_effort top-level)
+#   OpenRouter:        minimal | low | medium | high | xhigh (reasoning_effort top-level)
 #   HF router:         low | medium | high                 (extra_body.reasoning_effort)
 #
 # We validate *shape* here and let the probe cascade walk down on rejection;
@@ -121,6 +122,12 @@ def _resolve_llm_params(
     • ``openai/<model>`` — ``reasoning_effort`` forwarded as a top-level
       kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``.
 
+    • ``openrouter/<provider>/<model>`` — preserved as-is so LiteLLM routes via
+      its native OpenRouter provider. LiteLLM uses ``OPENROUTER_API_KEY`` and
+      optionally ``OPENROUTER_API_BASE``, ``OR_SITE_URL``, and ``OR_APP_NAME``.
+      ``reasoning_effort`` is forwarded as a top-level kwarg for providers that
+      accept the OpenAI-compatible shape.
+
     • Anything else is treated as a HuggingFace router id. We hit the
       auto-routing OpenAI-compatible endpoint at
       ``https://router.huggingface.co/v1``. The id can be bare or carry an
@@ -138,6 +145,8 @@ def _resolve_llm_params(
     can't crash a turn — it just doesn't get sent.
 
     Token precedence (first non-empty wins):
+      For ``openrouter/...``, LiteLLM handles OpenRouter-specific env vars.
+      For Hugging Face Router ids:
       1. INFERENCE_TOKEN env — shared key on the hosted Space (inference is
          free for users, billed to the Space owner via ``X-HF-Bill-To``).
       2. session.hf_token — the user's own token (CLI / OAuth / cache file).
@@ -187,6 +196,18 @@ def _resolve_llm_params(
                 params["reasoning_effort"] = reasoning_effort
         return params
 
+    if model_name.startswith("openrouter/"):
+        params = {"model": model_name}
+        if reasoning_effort:
+            if reasoning_effort not in _OPENAI_EFFORTS:
+                if strict:
+                    raise UnsupportedEffortError(
+                        f"OpenRouter doesn't accept effort={reasoning_effort!r}"
+                    )
+            else:
+                params["reasoning_effort"] = reasoning_effort
+        return params
+
     hf_model = model_name.removeprefix("huggingface/")
     api_key = _resolve_hf_router_token(session_hf_token)
     params = {
diff --git a/agent/core/model_switcher.py b/agent/core/model_switcher.py
index 14b5233d..081bf08f 100644
--- a/agent/core/model_switcher.py
+++ b/agent/core/model_switcher.py
@@ -18,9 +18,10 @@
 from agent.core.effort_probe import ProbeInconclusive, probe_effort
 
 
-# Suggested models shown by `/model` (not a gate). Users can paste any HF
-# model id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/`
-# prefix for direct API access. For HF ids, append ":fastest" /
+# Suggested models shown by `/model` (not a gate). Users can paste any HF model
+# id (e.g. "MiniMaxAI/MiniMax-M2.7") or an `anthropic/` / `openai/` /
+# `openrouter/` / `bedrock/` prefix for direct provider access. For HF ids,
+# append ":fastest" /
 # ":cheapest" / ":preferred" / ":<provider>" to override the default
 # routing policy (auto = fastest with failover).
 SUGGESTED_MODELS = [
@@ -48,6 +49,8 @@ def is_valid_model_id(model_id: str) -> bool:
     Accepts:
       • anthropic/<model>
       • openai/<model>
+      • openrouter/<provider>/<model>
+      • bedrock/<model>
       • <org>/<model>[:<tag>]            (HF router; tag = provider or policy)
       • huggingface/<org>/<model>[:<tag>] (same, accepts legacy prefix)
 
@@ -67,10 +70,10 @@ def _print_hf_routing_info(model_id: str, console) -> bool:
     proceed with the switch, ``False`` to indicate a hard problem the user
     should notice before we fire the effort probe.
 
-    Anthropic / OpenAI ids return ``True`` without printing anything —
-    the probe below covers "does this model exist".
+    Direct provider ids return ``True`` without printing anything — the probe
+    below covers "does this model exist".
     """
-    if model_id.startswith(("anthropic/", "openai/")):
+    if model_id.startswith(("anthropic/", "openai/", "openrouter/", "bedrock/")):
         return True
 
     from agent.core import hf_router_catalog as cat
@@ -141,7 +144,8 @@ def print_model_listing(config, console) -> None:
     console.print(
         "\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n"
         "Add ':fastest', ':cheapest', ':preferred', or ':<provider>' to override routing.\n"
-        "Use 'anthropic/<model>' or 'openai/<model>' for direct API access.[/dim]"
+        "Use 'anthropic/<model>', 'openai/<model>', 'openrouter/<provider>/<model>', "
+        "or 'bedrock/<model>' for direct provider access.[/dim]"
     )
 
 
@@ -151,7 +155,9 @@ def print_invalid_id(arg: str, console) -> None:
         "[dim]Expected:\n"
         "  • <org>/<model>[:tag]    (HF router — paste from huggingface.co)\n"
         "  • anthropic/<model>\n"
-        "  • openai/<model>[/dim]"
+        "  • openai/<model>\n"
+        "  • openrouter/<provider>/<model>\n"
+        "  • bedrock/<model>[/dim]"
     )
 
 
diff --git a/tests/unit/test_cli_rendering.py b/tests/unit/test_cli_rendering.py
index e94700bf..cc4a0cb6 100644
--- a/tests/unit/test_cli_rendering.py
+++ b/tests/unit/test_cli_rendering.py
@@ -7,6 +7,7 @@
 import pytest
 
 import agent.main as main_mod
+from agent.core import model_switcher
 from agent.tools.research_tool import _get_research_model
 from agent.utils import terminal_display
 
@@ -29,6 +30,20 @@ def test_non_anthropic_research_model_is_unchanged():
     assert _get_research_model("openai/gpt-5.4") == "openai/gpt-5.4"
 
 
+def test_openrouter_model_switch_bypasses_hf_router_catalog(monkeypatch):
+    def fail_lookup(_model_id):
+        raise AssertionError("OpenRouter ids should not query the HF router catalog")
+
+    monkeypatch.setattr("agent.core.hf_router_catalog.lookup", fail_lookup)
+
+    console = SimpleNamespace(print=lambda *_args, **_kwargs: None)
+
+    assert model_switcher._print_hf_routing_info(
+        "openrouter/anthropic/claude-opus-4.7",
+        console,
+    )
+
+
 def test_subagent_display_does_not_spawn_background_redraw(monkeypatch):
     calls: list[object] = []
 
diff --git a/tests/unit/test_llm_params.py b/tests/unit/test_llm_params.py
index 5234461a..01025bfb 100644
--- a/tests/unit/test_llm_params.py
+++ b/tests/unit/test_llm_params.py
@@ -30,6 +30,44 @@ def test_openai_max_effort_is_still_rejected():
         raise AssertionError("Expected UnsupportedEffortError for max effort")
 
 
+def test_openai_base_url_is_not_forwarded(monkeypatch):
+    monkeypatch.setenv("OPENAI_BASE_URL", "https://openrouter.ai/api/v1")
+
+    params = _resolve_llm_params("openai/gpt-5.5")
+
+    assert params == {"model": "openai/gpt-5.5"}
+
+
+def test_openrouter_params_preserve_model_and_skip_hf_router_auth(monkeypatch):
+    monkeypatch.setenv("INFERENCE_TOKEN", "inference-token")
+    monkeypatch.setenv("HF_BILL_TO", "test-org")
+
+    params = _resolve_llm_params(
+        "openrouter/anthropic/claude-opus-4.7",
+        session_hf_token="session-token",
+        reasoning_effort="high",
+        strict=True,
+    )
+
+    assert params == {
+        "model": "openrouter/anthropic/claude-opus-4.7",
+        "reasoning_effort": "high",
+    }
+
+
+def test_openrouter_max_effort_is_rejected_in_strict_mode():
+    try:
+        _resolve_llm_params(
+            "openrouter/anthropic/claude-opus-4.7",
+            reasoning_effort="max",
+            strict=True,
+        )
+    except UnsupportedEffortError as exc:
+        assert "OpenRouter doesn't accept effort='max'" in str(exc)
+    else:
+        raise AssertionError("Expected UnsupportedEffortError for max effort")
+
+
 def test_hf_router_token_prefers_inference_token(monkeypatch):
     monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ")
     monkeypatch.setenv("HF_TOKEN", "hf-token")