diff --git a/README.md b/README.md index 8a6c1ccd..9b8934a6 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Create a `.env` file in the project root (or export these in your shell): ```bash ANTHROPIC_API_KEY= # if using anthropic models OPENAI_API_KEY= # if using openai models +OPENROUTER_API_KEY= # if using openrouter models HF_TOKEN= GITHUB_TOKEN= ``` @@ -52,10 +53,15 @@ ml-intern "fine-tune llama on my dataset" ```bash ml-intern --model anthropic/claude-opus-4-6 "your prompt" ml-intern --model openai/gpt-5.5 "your prompt" +ml-intern --model openrouter/anthropic/claude-sonnet-4-5 "your prompt" ml-intern --max-iterations 100 "your prompt" ml-intern --no-stream "your prompt" ``` +OpenRouter models use the `openrouter//` naming convention and +the `OPENROUTER_API_KEY` environment variable. Optional attribution headers can +be set with `OR_SITE_URL` and `OR_APP_NAME`. + ## Supported Gateways ML Intern currently supports one-way notification gateways from CLI sessions. diff --git a/agent/core/llm_params.py b/agent/core/llm_params.py index 880886b3..b1b17126 100644 --- a/agent/core/llm_params.py +++ b/agent/core/llm_params.py @@ -5,6 +5,8 @@ creating circular imports. """ +import os + from agent.core.hf_tokens import get_hf_bill_to, resolve_hf_router_token @@ -72,12 +74,14 @@ def _widened(model: str) -> bool: # Effort levels accepted on the wire. # Anthropic (4.6+): low | medium | high | xhigh | max (output_config.effort) # OpenAI direct: minimal | low | medium | high | xhigh (reasoning_effort top-level) +# OpenRouter: minimal | low | medium | high | xhigh (reasoning_effort top-level) # HF router: low | medium | high (extra_body.reasoning_effort) # # We validate *shape* here and let the probe cascade walk down on rejection; # we deliberately do NOT maintain a per-model capability table. _ANTHROPIC_EFFORTS = {"low", "medium", "high", "xhigh", "max"} _OPENAI_EFFORTS = {"minimal", "low", "medium", "high", "xhigh"} +_OPENROUTER_EFFORTS = {"minimal", "low", "medium", "high", "xhigh"} _HF_EFFORTS = {"low", "medium", "high"} @@ -114,6 +118,10 @@ def _resolve_llm_params( • ``openai/`` — ``reasoning_effort`` forwarded as a top-level kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``. + • ``openrouter//`` — routed through LiteLLM's OpenRouter + provider. LiteLLM uses ``OPENROUTER_API_KEY``; optional attribution + headers can be set with ``OR_SITE_URL`` and ``OR_APP_NAME``. + • Anything else is treated as a HuggingFace router id. We hit the auto-routing OpenAI-compatible endpoint at ``https://router.huggingface.co/v1``. The id can be bare or carry an @@ -180,6 +188,31 @@ def _resolve_llm_params( params["reasoning_effort"] = reasoning_effort return params + if model_name.startswith("openrouter/"): + params = {"model": model_name} + if api_key := os.getenv("OPENROUTER_API_KEY", "").strip(): + params["api_key"] = api_key + if api_base := os.getenv("OPENROUTER_API_BASE", "").strip(): + params["api_base"] = api_base + + extra_headers = {} + if site_url := os.getenv("OR_SITE_URL", "").strip(): + extra_headers["HTTP-Referer"] = site_url + if app_name := os.getenv("OR_APP_NAME", "").strip(): + extra_headers["X-Title"] = app_name + if extra_headers: + params["extra_headers"] = extra_headers + + if reasoning_effort: + if reasoning_effort not in _OPENROUTER_EFFORTS: + if strict: + raise UnsupportedEffortError( + f"OpenRouter doesn't accept effort={reasoning_effort!r}" + ) + else: + params["reasoning_effort"] = reasoning_effort + return params + hf_model = model_name.removeprefix("huggingface/") api_key = _resolve_hf_router_token(session_hf_token) params = { diff --git a/tests/unit/test_llm_params.py b/tests/unit/test_llm_params.py index 5234461a..262c7528 100644 --- a/tests/unit/test_llm_params.py +++ b/tests/unit/test_llm_params.py @@ -30,6 +30,47 @@ def test_openai_max_effort_is_still_rejected(): raise AssertionError("Expected UnsupportedEffortError for max effort") +def test_openrouter_params_use_openrouter_provider(monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", " openrouter-token ") + monkeypatch.setenv("OR_SITE_URL", " https://example.com/ml-intern ") + monkeypatch.setenv("OR_APP_NAME", " ML Intern ") + + params = _resolve_llm_params( + "openrouter/anthropic/claude-sonnet-4-5", + reasoning_effort="high", + strict=True, + ) + + assert params["model"] == "openrouter/anthropic/claude-sonnet-4-5" + assert params["api_key"] == "openrouter-token" + assert params["reasoning_effort"] == "high" + assert params["extra_headers"] == { + "HTTP-Referer": "https://example.com/ml-intern", + "X-Title": "ML Intern", + } + + +def test_openrouter_api_base_override(monkeypatch): + monkeypatch.setenv("OPENROUTER_API_BASE", " https://openrouter.example/v1 ") + + params = _resolve_llm_params("openrouter/openai/gpt-5.5") + + assert params["api_base"] == "https://openrouter.example/v1" + + +def test_openrouter_rejects_max_effort_in_strict_mode(): + try: + _resolve_llm_params( + "openrouter/openai/gpt-5.5", + reasoning_effort="max", + strict=True, + ) + except UnsupportedEffortError as exc: + assert "OpenRouter doesn't accept effort='max'" in str(exc) + else: + raise AssertionError("Expected UnsupportedEffortError for max effort") + + def test_hf_router_token_prefers_inference_token(monkeypatch): monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ") monkeypatch.setenv("HF_TOKEN", "hf-token")