Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Create a `.env` file in the project root (or export these in your shell):
```bash
ANTHROPIC_API_KEY=<your-anthropic-api-key> # if using anthropic models
OPENAI_API_KEY=<your-openai-api-key> # if using openai models
OPENROUTER_API_KEY=<your-openrouter-api-key> # if using openrouter models
HF_TOKEN=<your-hugging-face-token>
GITHUB_TOKEN=<github-personal-access-token>
```
Expand All @@ -52,10 +53,15 @@ ml-intern "fine-tune llama on my dataset"
```bash
ml-intern --model anthropic/claude-opus-4-6 "your prompt"
ml-intern --model openai/gpt-5.5 "your prompt"
ml-intern --model openrouter/anthropic/claude-sonnet-4-5 "your prompt"
ml-intern --max-iterations 100 "your prompt"
ml-intern --no-stream "your prompt"
```

OpenRouter models use the `openrouter/<provider>/<model>` naming convention and
the `OPENROUTER_API_KEY` environment variable. Optional attribution headers can
be set with `OR_SITE_URL` and `OR_APP_NAME`.

## Supported Gateways

ML Intern currently supports one-way notification gateways from CLI sessions.
Expand Down
33 changes: 33 additions & 0 deletions agent/core/llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
creating circular imports.
"""

import os

from agent.core.hf_tokens import get_hf_bill_to, resolve_hf_router_token


Expand Down Expand Up @@ -72,12 +74,14 @@ def _widened(model: str) -> bool:
# Effort levels accepted on the wire.
# Anthropic (4.6+): low | medium | high | xhigh | max (output_config.effort)
# OpenAI direct: minimal | low | medium | high | xhigh (reasoning_effort top-level)
# OpenRouter: minimal | low | medium | high | xhigh (reasoning_effort top-level)
# HF router: low | medium | high (extra_body.reasoning_effort)
#
# We validate *shape* here and let the probe cascade walk down on rejection;
# we deliberately do NOT maintain a per-model capability table.
_ANTHROPIC_EFFORTS = {"low", "medium", "high", "xhigh", "max"}
_OPENAI_EFFORTS = {"minimal", "low", "medium", "high", "xhigh"}
_OPENROUTER_EFFORTS = {"minimal", "low", "medium", "high", "xhigh"}
_HF_EFFORTS = {"low", "medium", "high"}


Expand Down Expand Up @@ -114,6 +118,10 @@ def _resolve_llm_params(
• ``openai/<model>`` — ``reasoning_effort`` forwarded as a top-level
kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``.

• ``openrouter/<provider>/<model>`` — routed through LiteLLM's OpenRouter
provider. LiteLLM uses ``OPENROUTER_API_KEY``; optional attribution
headers can be set with ``OR_SITE_URL`` and ``OR_APP_NAME``.

• Anything else is treated as a HuggingFace router id. We hit the
auto-routing OpenAI-compatible endpoint at
``https://router.huggingface.co/v1``. The id can be bare or carry an
Expand Down Expand Up @@ -180,6 +188,31 @@ def _resolve_llm_params(
params["reasoning_effort"] = reasoning_effort
return params

if model_name.startswith("openrouter/"):
params = {"model": model_name}
if api_key := os.getenv("OPENROUTER_API_KEY", "").strip():
params["api_key"] = api_key
if api_base := os.getenv("OPENROUTER_API_BASE", "").strip():
params["api_base"] = api_base

extra_headers = {}
if site_url := os.getenv("OR_SITE_URL", "").strip():
extra_headers["HTTP-Referer"] = site_url
if app_name := os.getenv("OR_APP_NAME", "").strip():
extra_headers["X-Title"] = app_name
if extra_headers:
params["extra_headers"] = extra_headers

if reasoning_effort:
if reasoning_effort not in _OPENROUTER_EFFORTS:
if strict:
raise UnsupportedEffortError(
f"OpenRouter doesn't accept effort={reasoning_effort!r}"
)
else:
params["reasoning_effort"] = reasoning_effort
return params

hf_model = model_name.removeprefix("huggingface/")
api_key = _resolve_hf_router_token(session_hf_token)
params = {
Expand Down
41 changes: 41 additions & 0 deletions tests/unit/test_llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,47 @@ def test_openai_max_effort_is_still_rejected():
raise AssertionError("Expected UnsupportedEffortError for max effort")


def test_openrouter_params_use_openrouter_provider(monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", " openrouter-token ")
monkeypatch.setenv("OR_SITE_URL", " https://example.com/ml-intern ")
monkeypatch.setenv("OR_APP_NAME", " ML Intern ")

params = _resolve_llm_params(
"openrouter/anthropic/claude-sonnet-4-5",
reasoning_effort="high",
strict=True,
)

assert params["model"] == "openrouter/anthropic/claude-sonnet-4-5"
assert params["api_key"] == "openrouter-token"
assert params["reasoning_effort"] == "high"
assert params["extra_headers"] == {
"HTTP-Referer": "https://example.com/ml-intern",
"X-Title": "ML Intern",
}


def test_openrouter_api_base_override(monkeypatch):
monkeypatch.setenv("OPENROUTER_API_BASE", " https://openrouter.example/v1 ")

params = _resolve_llm_params("openrouter/openai/gpt-5.5")

assert params["api_base"] == "https://openrouter.example/v1"


def test_openrouter_rejects_max_effort_in_strict_mode():
try:
_resolve_llm_params(
"openrouter/openai/gpt-5.5",
reasoning_effort="max",
strict=True,
)
except UnsupportedEffortError as exc:
assert "OpenRouter doesn't accept effort='max'" in str(exc)
else:
raise AssertionError("Expected UnsupportedEffortError for max effort")


def test_hf_router_token_prefers_inference_token(monkeypatch):
monkeypatch.setenv("INFERENCE_TOKEN", " inference-token ")
monkeypatch.setenv("HF_TOKEN", "hf-token")
Expand Down
Loading