Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,16 @@ Inside interactive mode, switch with `/model`:
/model ollama/llama3.1:8b
/model lm_studio/google/gemma-3-4b
/model llamacpp/llama-3.1-8b-instruct
/model openrouter/anthropic/claude-3.5-sonnet
/model openai-compat/custom-model
```

Supported local prefixes are `ollama/`, `vllm/`, `lm_studio/`, and
`llamacpp/`. Set `LOCAL_LLM_BASE_URL` and optional `LOCAL_LLM_API_KEY` to use
one shared local endpoint, or override a specific provider with its matching
`*_BASE_URL` / `*_API_KEY` variable, such as `OLLAMA_BASE_URL` or
`VLLM_API_KEY`. Provider-specific variables take precedence over the shared
local variables. Base URLs may include or omit `/v1`.
Supported local and custom prefixes are `ollama/`, `vllm/`, `lm_studio/`,
`llamacpp/`, `openrouter/`, and `openai-compat/`. Set `LOCAL_LLM_BASE_URL` and
optional `LOCAL_LLM_API_KEY` to use one shared local endpoint, or override a
specific provider with its matching `*_BASE_URL` / `*_API_KEY` variable, such as
`OPENROUTER_API_KEY` or `VLLM_BASE_URL`. Provider-specific variables take
precedence over the shared local variables. Base URLs may include or omit `/v1`.

## Sharing Traces

Expand Down
12 changes: 11 additions & 1 deletion agent/core/local_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,20 @@
"base_url_default": "http://localhost:8080",
"api_key_env": "LLAMACPP_API_KEY",
},
"openrouter/": {
"base_url_env": "OPENROUTER_BASE_URL",
"base_url_default": "https://openrouter.ai/api/v1",
"api_key_env": "OPENROUTER_API_KEY",
},
"openai-compat/": {
"base_url_env": "LOCAL_LLM_BASE_URL",
"base_url_default": "http://localhost:8080",
"api_key_env": "LOCAL_LLM_API_KEY",
},
}

LOCAL_MODEL_PREFIXES = tuple(LOCAL_MODEL_PROVIDERS)
RESERVED_LOCAL_MODEL_PREFIXES = ("openai-compat/",)
RESERVED_LOCAL_MODEL_PREFIXES: tuple[str, ...] = ()
LOCAL_MODEL_BASE_URL_ENV = "LOCAL_LLM_BASE_URL"
LOCAL_MODEL_API_KEY_ENV = "LOCAL_LLM_API_KEY"
LOCAL_MODEL_API_KEY_DEFAULT = "sk-local-no-key-required"
Expand Down
7 changes: 4 additions & 3 deletions agent/core/model_switcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ def print_model_listing(config, console) -> None:
"\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n"
"Add ':fastest', ':cheapest', ':preferred', or ':<provider>' to override routing.\n"
"Use 'anthropic/<model>' or 'openai/<model>' for direct API access.\n"
"Use 'ollama/<model>', 'vllm/<model>', 'lm_studio/<model>', or "
"'llamacpp/<model>' for local OpenAI-compatible endpoints.[/dim]"
"Use 'ollama/<model>', 'vllm/<model>', 'lm_studio/<model>', 'llamacpp/<model>',\n"
"'openrouter/<model>', or 'openai-compat/<model>' for OpenAI-compatible endpoints.[/dim]"
)


Expand All @@ -175,7 +175,8 @@ def print_invalid_id(arg: str, console) -> None:
" • <org>/<model>[:tag] (HF router — paste from huggingface.co)\n"
" • anthropic/<model>\n"
" • openai/<model>\n"
" • ollama/<model> | vllm/<model> | lm_studio/<model> | llamacpp/<model>[/dim]"
" • ollama/<model> | vllm/<model> | lm_studio/<model> | llamacpp/<model>\n"
" • openrouter/<model> | openai-compat/<model>[/dim]"
)


Expand Down
14 changes: 13 additions & 1 deletion agent/tools/research_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,20 @@ def _get_research_model(main_model: str) -> str:
"""Pick a cheaper model for research based on the main model."""
if main_model.startswith("anthropic/"):
return "anthropic/claude-sonnet-4-6"

if main_model.startswith("bedrock/") and "anthropic" in main_model:
return "bedrock/us.anthropic.claude-sonnet-4-6"
# Extract region/profile prefix if present (e.g. "us.", "eu.")
# bedrock/us.anthropic... -> us.
# bedrock/anthropic... -> ""
model_part = main_model.removeprefix("bedrock/")
prefix = ""
if "." in model_part:
first_part = model_part.split(".")[0]
if first_part != "anthropic":
prefix = f"{first_part}."

return f"bedrock/{prefix}anthropic.claude-sonnet-4-6"

# For non-Anthropic models (HF router etc.), use the same model
return main_model

Expand Down
10 changes: 8 additions & 2 deletions tests/unit/test_cli_local_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,31 @@ def test_local_model_helper_accepts_supported_prefixes():
assert is_local_model_id("vllm/meta-llama/Llama-3.1-8B-Instruct")
assert is_local_model_id("lm_studio/google/gemma-3-4b")
assert is_local_model_id("llamacpp/unsloth/Qwen3.5-2B")
assert is_local_model_id("openrouter/anthropic/claude-3.5-sonnet")
assert is_local_model_id("openai-compat/my-model")


def test_model_switcher_accepts_supported_local_prefixes():
assert model_switcher.is_valid_model_id("ollama/llama3.1:8b")
assert model_switcher.is_valid_model_id("vllm/meta-llama/Llama-3.1-8B")
assert model_switcher.is_valid_model_id("lm_studio/google/gemma-3-4b")
assert model_switcher.is_valid_model_id("llamacpp/llama-3.1-8b")
assert model_switcher.is_valid_model_id("openrouter/google/gemini-pro-1.5")
assert model_switcher.is_valid_model_id("openai-compat/some-custom-model")


def test_model_switcher_rejects_empty_or_whitespace_local_ids():
assert not model_switcher.is_valid_model_id("ollama/")
assert not model_switcher.is_valid_model_id("vllm/")
assert not model_switcher.is_valid_model_id("lm_studio/")
assert not model_switcher.is_valid_model_id("llamacpp/")
assert not model_switcher.is_valid_model_id("openrouter/")
assert not model_switcher.is_valid_model_id("openai-compat/")
assert not model_switcher.is_valid_model_id("ollama/llama 3.1")


def test_openai_compat_prefix_is_not_supported():
assert not model_switcher.is_valid_model_id("openai-compat/custom-model")
def test_openai_compat_prefix_is_now_supported():
assert model_switcher.is_valid_model_id("openai-compat/custom-model")


def test_local_models_skip_hf_router_catalog_output():
Expand Down
33 changes: 30 additions & 3 deletions tests/unit/test_llm_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,32 @@ def test_resolve_llamacpp_params_strips_provider_prefix(monkeypatch):
assert params["api_base"] == "http://localhost:8080/v1"


def test_resolve_openrouter_params(monkeypatch):
monkeypatch.setenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-secret")

params = _resolve_llm_params("openrouter/anthropic/claude-3")

assert params == {
"model": "openai/anthropic/claude-3",
"api_base": "https://openrouter.ai/api/v1",
"api_key": "openrouter-secret",
}


def test_resolve_openai_compat_params(monkeypatch):
monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://my-proxy:8888")
monkeypatch.setenv("LOCAL_LLM_API_KEY", "proxy-secret")

params = _resolve_llm_params("openai-compat/custom-model")

assert params == {
"model": "openai/custom-model",
"api_base": "http://my-proxy:8888/v1",
"api_key": "proxy-secret",
}


def test_local_params_reject_reasoning_effort_in_strict_mode():
with pytest.raises(UnsupportedEffortError, match="reasoning_effort"):
_resolve_llm_params("ollama/llama3.1", reasoning_effort="high", strict=True)
Expand All @@ -109,9 +135,10 @@ def test_local_params_drop_reasoning_effort_in_non_strict_mode():
assert "extra_body" not in params


def test_openai_compat_prefix_is_not_a_local_escape_hatch():
with pytest.raises(ValueError, match="Unsupported local model id"):
_resolve_llm_params("openai-compat/custom-model")
def test_openai_compat_prefix_is_now_a_local_escape_hatch(monkeypatch):
monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:8080")
params = _resolve_llm_params("openai-compat/custom-model")
assert params["model"] == "openai/custom-model"


def test_empty_local_model_id_is_not_treated_as_hf_router():
Expand Down
54 changes: 54 additions & 0 deletions tests/unit/test_research_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from agent.tools.research_tool import _get_research_model


def test_get_research_model_anthropic():
assert (
_get_research_model("anthropic/claude-3-opus-20240229")
== "anthropic/claude-sonnet-4-6"
)
assert (
_get_research_model("anthropic/claude-3-5-sonnet-20240620")
== "anthropic/claude-sonnet-4-6"
)


def test_get_research_model_bedrock_with_prefix():
# US prefix
assert (
_get_research_model("bedrock/us.anthropic.claude-v3-opus:1")
== "bedrock/us.anthropic.claude-sonnet-4-6"
)
# EU prefix
assert (
_get_research_model("bedrock/eu.anthropic.claude-v3-sonnet:1")
== "bedrock/eu.anthropic.claude-sonnet-4-6"
)
# AP prefix
assert (
_get_research_model("bedrock/ap.anthropic.claude-v3-5-sonnet:1")
== "bedrock/ap.anthropic.claude-sonnet-4-6"
)


def test_get_research_model_bedrock_no_prefix():
assert (
_get_research_model("bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0")
== "bedrock/anthropic.claude-sonnet-4-6"
)


def test_get_research_model_non_anthropic():
# HF router models should remain unchanged
assert (
_get_research_model("meta-llama/Llama-3.1-8B-Instruct")
== "meta-llama/Llama-3.1-8B-Instruct"
)
assert (
_get_research_model("huggingface/deepseek-ai/DeepSeek-V3")
== "huggingface/deepseek-ai/DeepSeek-V3"
)


def test_get_research_model_openai():
# OpenAI models should remain unchanged
assert _get_research_model("openai/gpt-4o") == "openai/gpt-4o"
Loading