diff --git a/README.md b/README.md index b9db1863..fb257013 100644 --- a/README.md +++ b/README.md @@ -88,14 +88,16 @@ Inside interactive mode, switch with `/model`: /model ollama/llama3.1:8b /model lm_studio/google/gemma-3-4b /model llamacpp/llama-3.1-8b-instruct +/model openrouter/anthropic/claude-3.5-sonnet +/model openai-compat/custom-model ``` -Supported local prefixes are `ollama/`, `vllm/`, `lm_studio/`, and -`llamacpp/`. Set `LOCAL_LLM_BASE_URL` and optional `LOCAL_LLM_API_KEY` to use -one shared local endpoint, or override a specific provider with its matching -`*_BASE_URL` / `*_API_KEY` variable, such as `OLLAMA_BASE_URL` or -`VLLM_API_KEY`. Provider-specific variables take precedence over the shared -local variables. Base URLs may include or omit `/v1`. +Supported local and custom prefixes are `ollama/`, `vllm/`, `lm_studio/`, +`llamacpp/`, `openrouter/`, and `openai-compat/`. Set `LOCAL_LLM_BASE_URL` and +optional `LOCAL_LLM_API_KEY` to use one shared local endpoint, or override a +specific provider with its matching `*_BASE_URL` / `*_API_KEY` variable, such as +`OPENROUTER_API_KEY` or `VLLM_BASE_URL`. Provider-specific variables take +precedence over the shared local variables. Base URLs may include or omit `/v1`. ## Sharing Traces diff --git a/agent/core/local_models.py b/agent/core/local_models.py index 9f8a9491..a371731c 100644 --- a/agent/core/local_models.py +++ b/agent/core/local_models.py @@ -21,10 +21,20 @@ "base_url_default": "http://localhost:8080", "api_key_env": "LLAMACPP_API_KEY", }, + "openrouter/": { + "base_url_env": "OPENROUTER_BASE_URL", + "base_url_default": "https://openrouter.ai/api/v1", + "api_key_env": "OPENROUTER_API_KEY", + }, + "openai-compat/": { + "base_url_env": "LOCAL_LLM_BASE_URL", + "base_url_default": "http://localhost:8080", + "api_key_env": "LOCAL_LLM_API_KEY", + }, } LOCAL_MODEL_PREFIXES = tuple(LOCAL_MODEL_PROVIDERS) -RESERVED_LOCAL_MODEL_PREFIXES = ("openai-compat/",) +RESERVED_LOCAL_MODEL_PREFIXES: tuple[str, ...] = () LOCAL_MODEL_BASE_URL_ENV = "LOCAL_LLM_BASE_URL" LOCAL_MODEL_API_KEY_ENV = "LOCAL_LLM_API_KEY" LOCAL_MODEL_API_KEY_DEFAULT = "sk-local-no-key-required" diff --git a/agent/core/model_switcher.py b/agent/core/model_switcher.py index 34eaccdd..497228a0 100644 --- a/agent/core/model_switcher.py +++ b/agent/core/model_switcher.py @@ -163,8 +163,8 @@ def print_model_listing(config, console) -> None: "\n[dim]Paste any HF model id (e.g. 'MiniMaxAI/MiniMax-M2.7').\n" "Add ':fastest', ':cheapest', ':preferred', or ':' to override routing.\n" "Use 'anthropic/' or 'openai/' for direct API access.\n" - "Use 'ollama/', 'vllm/', 'lm_studio/', or " - "'llamacpp/' for local OpenAI-compatible endpoints.[/dim]" + "Use 'ollama/', 'vllm/', 'lm_studio/', 'llamacpp/',\n" + "'openrouter/', or 'openai-compat/' for OpenAI-compatible endpoints.[/dim]" ) @@ -175,7 +175,8 @@ def print_invalid_id(arg: str, console) -> None: " • /[:tag] (HF router — paste from huggingface.co)\n" " • anthropic/\n" " • openai/\n" - " • ollama/ | vllm/ | lm_studio/ | llamacpp/[/dim]" + " • ollama/ | vllm/ | lm_studio/ | llamacpp/\n" + " • openrouter/ | openai-compat/[/dim]" ) diff --git a/agent/tools/research_tool.py b/agent/tools/research_tool.py index f5815be8..fe0c34cd 100644 --- a/agent/tools/research_tool.py +++ b/agent/tools/research_tool.py @@ -223,8 +223,20 @@ def _get_research_model(main_model: str) -> str: """Pick a cheaper model for research based on the main model.""" if main_model.startswith("anthropic/"): return "anthropic/claude-sonnet-4-6" + if main_model.startswith("bedrock/") and "anthropic" in main_model: - return "bedrock/us.anthropic.claude-sonnet-4-6" + # Extract region/profile prefix if present (e.g. "us.", "eu.") + # bedrock/us.anthropic... -> us. + # bedrock/anthropic... -> "" + model_part = main_model.removeprefix("bedrock/") + prefix = "" + if "." in model_part: + first_part = model_part.split(".")[0] + if first_part != "anthropic": + prefix = f"{first_part}." + + return f"bedrock/{prefix}anthropic.claude-sonnet-4-6" + # For non-Anthropic models (HF router etc.), use the same model return main_model diff --git a/tests/unit/test_cli_local_models.py b/tests/unit/test_cli_local_models.py index 836fb3fd..7489bee5 100644 --- a/tests/unit/test_cli_local_models.py +++ b/tests/unit/test_cli_local_models.py @@ -9,6 +9,8 @@ def test_local_model_helper_accepts_supported_prefixes(): assert is_local_model_id("vllm/meta-llama/Llama-3.1-8B-Instruct") assert is_local_model_id("lm_studio/google/gemma-3-4b") assert is_local_model_id("llamacpp/unsloth/Qwen3.5-2B") + assert is_local_model_id("openrouter/anthropic/claude-3.5-sonnet") + assert is_local_model_id("openai-compat/my-model") def test_model_switcher_accepts_supported_local_prefixes(): @@ -16,6 +18,8 @@ def test_model_switcher_accepts_supported_local_prefixes(): assert model_switcher.is_valid_model_id("vllm/meta-llama/Llama-3.1-8B") assert model_switcher.is_valid_model_id("lm_studio/google/gemma-3-4b") assert model_switcher.is_valid_model_id("llamacpp/llama-3.1-8b") + assert model_switcher.is_valid_model_id("openrouter/google/gemini-pro-1.5") + assert model_switcher.is_valid_model_id("openai-compat/some-custom-model") def test_model_switcher_rejects_empty_or_whitespace_local_ids(): @@ -23,11 +27,13 @@ def test_model_switcher_rejects_empty_or_whitespace_local_ids(): assert not model_switcher.is_valid_model_id("vllm/") assert not model_switcher.is_valid_model_id("lm_studio/") assert not model_switcher.is_valid_model_id("llamacpp/") + assert not model_switcher.is_valid_model_id("openrouter/") + assert not model_switcher.is_valid_model_id("openai-compat/") assert not model_switcher.is_valid_model_id("ollama/llama 3.1") -def test_openai_compat_prefix_is_not_supported(): - assert not model_switcher.is_valid_model_id("openai-compat/custom-model") +def test_openai_compat_prefix_is_now_supported(): + assert model_switcher.is_valid_model_id("openai-compat/custom-model") def test_local_models_skip_hf_router_catalog_output(): diff --git a/tests/unit/test_llm_params.py b/tests/unit/test_llm_params.py index a7c7b4cd..e920921b 100644 --- a/tests/unit/test_llm_params.py +++ b/tests/unit/test_llm_params.py @@ -92,6 +92,32 @@ def test_resolve_llamacpp_params_strips_provider_prefix(monkeypatch): assert params["api_base"] == "http://localhost:8080/v1" +def test_resolve_openrouter_params(monkeypatch): + monkeypatch.setenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") + monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-secret") + + params = _resolve_llm_params("openrouter/anthropic/claude-3") + + assert params == { + "model": "openai/anthropic/claude-3", + "api_base": "https://openrouter.ai/api/v1", + "api_key": "openrouter-secret", + } + + +def test_resolve_openai_compat_params(monkeypatch): + monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://my-proxy:8888") + monkeypatch.setenv("LOCAL_LLM_API_KEY", "proxy-secret") + + params = _resolve_llm_params("openai-compat/custom-model") + + assert params == { + "model": "openai/custom-model", + "api_base": "http://my-proxy:8888/v1", + "api_key": "proxy-secret", + } + + def test_local_params_reject_reasoning_effort_in_strict_mode(): with pytest.raises(UnsupportedEffortError, match="reasoning_effort"): _resolve_llm_params("ollama/llama3.1", reasoning_effort="high", strict=True) @@ -109,9 +135,10 @@ def test_local_params_drop_reasoning_effort_in_non_strict_mode(): assert "extra_body" not in params -def test_openai_compat_prefix_is_not_a_local_escape_hatch(): - with pytest.raises(ValueError, match="Unsupported local model id"): - _resolve_llm_params("openai-compat/custom-model") +def test_openai_compat_prefix_is_now_a_local_escape_hatch(monkeypatch): + monkeypatch.setenv("LOCAL_LLM_BASE_URL", "http://localhost:8080") + params = _resolve_llm_params("openai-compat/custom-model") + assert params["model"] == "openai/custom-model" def test_empty_local_model_id_is_not_treated_as_hf_router(): diff --git a/tests/unit/test_research_tool.py b/tests/unit/test_research_tool.py new file mode 100644 index 00000000..940874b7 --- /dev/null +++ b/tests/unit/test_research_tool.py @@ -0,0 +1,54 @@ +from agent.tools.research_tool import _get_research_model + + +def test_get_research_model_anthropic(): + assert ( + _get_research_model("anthropic/claude-3-opus-20240229") + == "anthropic/claude-sonnet-4-6" + ) + assert ( + _get_research_model("anthropic/claude-3-5-sonnet-20240620") + == "anthropic/claude-sonnet-4-6" + ) + + +def test_get_research_model_bedrock_with_prefix(): + # US prefix + assert ( + _get_research_model("bedrock/us.anthropic.claude-v3-opus:1") + == "bedrock/us.anthropic.claude-sonnet-4-6" + ) + # EU prefix + assert ( + _get_research_model("bedrock/eu.anthropic.claude-v3-sonnet:1") + == "bedrock/eu.anthropic.claude-sonnet-4-6" + ) + # AP prefix + assert ( + _get_research_model("bedrock/ap.anthropic.claude-v3-5-sonnet:1") + == "bedrock/ap.anthropic.claude-sonnet-4-6" + ) + + +def test_get_research_model_bedrock_no_prefix(): + assert ( + _get_research_model("bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0") + == "bedrock/anthropic.claude-sonnet-4-6" + ) + + +def test_get_research_model_non_anthropic(): + # HF router models should remain unchanged + assert ( + _get_research_model("meta-llama/Llama-3.1-8B-Instruct") + == "meta-llama/Llama-3.1-8B-Instruct" + ) + assert ( + _get_research_model("huggingface/deepseek-ai/DeepSeek-V3") + == "huggingface/deepseek-ai/DeepSeek-V3" + ) + + +def test_get_research_model_openai(): + # OpenAI models should remain unchanged + assert _get_research_model("openai/gpt-4o") == "openai/gpt-4o"