diff --git a/agent/core/agent_loop.py b/agent/core/agent_loop.py index 0eaa6e9d..484250d7 100644 --- a/agent/core/agent_loop.py +++ b/agent/core/agent_loop.py @@ -27,6 +27,7 @@ from agent.core import telemetry from agent.core.doom_loop import check_for_doom_loop from agent.core.hub_artifacts import start_session_artifact_collection_task +from agent.core.llm_errors import friendly_llm_error_message, render_llm_error_message from agent.core.llm_params import _resolve_llm_params from agent.core.prompt_caching import with_prompt_caching from agent.core.session import Event, OpType, Session @@ -505,47 +506,7 @@ async def _heal_effort_and_rebuild_params( def _friendly_error_message(error: Exception) -> str | None: """Return a user-friendly message for known error types, or None to fall back to traceback.""" - err_str = str(error).lower() - - if ( - "authentication" in err_str - or "unauthorized" in err_str - or "invalid x-api-key" in err_str - ): - return ( - "Authentication failed — your API key is missing or invalid.\n\n" - "To fix this, set the API key for your model provider:\n" - " • Anthropic: export ANTHROPIC_API_KEY=sk-...\n" - " • OpenAI: export OPENAI_API_KEY=sk-...\n" - " • HF Router: export HF_TOKEN=hf_...\n\n" - "You can also add it to a .env file in the project root.\n" - "To switch models, use the /model command." - ) - - if "insufficient" in err_str and "credit" in err_str: - return ( - "Insufficient API credits. Please check your account balance " - "at your model provider's dashboard." - ) - - if "not supported by provider" in err_str or "no provider supports" in err_str: - return ( - "The model isn't served by the provider you pinned.\n\n" - "Drop the ':' suffix to let the HF router auto-pick a " - "provider, or use '/model' (no arg) to see which providers host " - "which models." - ) - - if "model_not_found" in err_str or ( - "model" in err_str and ("not found" in err_str or "does not exist" in err_str) - ): - return ( - "Model not found. Use '/model' to list suggestions, or paste an " - "HF model id like 'MiniMaxAI/MiniMax-M2.7'. Availability is shown " - "when you switch." - ) - - return None + return friendly_llm_error_message(error) async def _compact_and_notify(session: Session) -> None: @@ -1622,11 +1583,9 @@ async def _exec_tool( continue except Exception as e: - import traceback - - error_msg = _friendly_error_message(e) - if error_msg is None: - error_msg = str(e) + "\n" + traceback.format_exc() + logger.info("Agent turn failed: %s", e) + logger.debug("Agent turn failed", exc_info=True) + error_msg = render_llm_error_message(e) await session.send_event( Event( diff --git a/agent/core/llm_errors.py b/agent/core/llm_errors.py new file mode 100644 index 00000000..a41fe4b5 --- /dev/null +++ b/agent/core/llm_errors.py @@ -0,0 +1,147 @@ +"""Shared LLM error classification and user-facing messages.""" + +from __future__ import annotations + +from typing import Literal + +LlmErrorType = Literal[ + "auth", + "credits", + "model", + "provider", + "rate_limit", + "network", + "unknown", +] + +_AUTH_MARKERS = ( + "authentication failed", + "authentication_error", + "authentication error", + "unauthorized", + "invalid x-api-key", + "invalid api key", + "incorrect api key", + "didn't provide an api key", + "did not provide an api key", + "no api key provided", + "provide your api key", + "x-api-key header is required", + "api key header is required", + "api key required", + "api key is missing or invalid", + "api_key_invalid", + "401", +) +_CREDITS_MARKERS = ( + "insufficient credit", + "insufficient credits", + "out of credits", + "insufficient_quota", + "credit balance is too low", + "balance is too low", + "purchase credits", + "plans & billing", + "quota", + "billing", + "payment required", + "402", +) +_RATE_LIMIT_MARKERS = ("429", "rate limit", "too many requests") +_NETWORK_MARKERS = ( + "timeout", + "timed out", + "connect", + "connection error", + "connection refused", + "connection reset", + "network", + "service unavailable", + "bad gateway", + "overloaded", + "capacity", +) + + +def _has_any(err_str: str, markers: tuple[str, ...]) -> bool: + return any(marker in err_str for marker in markers) + + +def classify_llm_error(error: Exception) -> LlmErrorType: + """Classify common provider/API failures from the exception text.""" + err_str = str(error).lower() + + if _has_any(err_str, _AUTH_MARKERS): + return "auth" + if _has_any(err_str, _CREDITS_MARKERS): + return "credits" + if "not supported by provider" in err_str or "no provider supports" in err_str: + return "provider" + if "model_not_found" in err_str or "unknown model" in err_str: + return "model" + if "model" in err_str and ( + "not found" in err_str + or "does not exist" in err_str + or "not available" in err_str + ): + return "model" + if _has_any(err_str, _RATE_LIMIT_MARKERS): + return "rate_limit" + if _has_any(err_str, _NETWORK_MARKERS): + return "network" + return "unknown" + + +def friendly_llm_error_message(error: Exception) -> str | None: + """Return a clean user-facing message for common LLM failures.""" + error_type = classify_llm_error(error) + + if error_type == "auth": + return ( + "Authentication failed — your API key is missing or invalid.\n\n" + "To fix this, set the API key for your model provider:\n" + " • Anthropic: export ANTHROPIC_API_KEY=sk-...\n" + " • OpenAI: export OPENAI_API_KEY=sk-...\n" + " • HF Router: export HF_TOKEN=hf_...\n\n" + "You can also add it to a .env file in the project root.\n" + "To switch models, use the /model command." + ) + if error_type == "credits": + return ( + "Insufficient API credits or quota for this model/provider.\n\n" + "Check billing for the current provider, or switch models with /model." + ) + if error_type == "provider": + return ( + "The model isn't served by the provider you pinned.\n\n" + "Drop the ':' suffix to let the HF router auto-pick a " + "provider, or use '/model' (no arg) to see which providers host " + "which models." + ) + if error_type == "model": + return ( + "Model not found. Use '/model' to list suggestions, or paste an " + "HF model id like 'MiniMaxAI/MiniMax-M2.7'. Availability is shown " + "when you switch." + ) + if error_type == "rate_limit": + return ( + "Rate limit reached. Wait a moment and retry, or switch models/providers " + "with /model." + ) + if error_type == "network": + return "The model provider is unavailable or timed out. Retry in a moment." + return None + + +def render_llm_error_message(error: Exception) -> str: + """Return the message safe to show to users.""" + return friendly_llm_error_message(error) or str(error) + + +def health_error_type(error: Exception) -> str: + """Map LLM failures to the backend health endpoint error_type values.""" + error_type = classify_llm_error(error) + if error_type in {"auth", "credits", "rate_limit", "network"}: + return error_type + return "unknown" diff --git a/agent/core/llm_params.py b/agent/core/llm_params.py index f95695fb..45cd4e67 100644 --- a/agent/core/llm_params.py +++ b/agent/core/llm_params.py @@ -5,16 +5,10 @@ creating circular imports. """ -import os - -from agent.core.hf_tokens import get_hf_bill_to, resolve_hf_router_token -from agent.core.local_models import ( - LOCAL_MODEL_API_KEY_DEFAULT, - LOCAL_MODEL_API_KEY_ENV, - LOCAL_MODEL_BASE_URL_ENV, - is_reserved_local_model_id, - local_model_name, - local_model_provider, +from agent.core.hf_tokens import resolve_hf_router_token +from agent.core.provider_adapters import ( + UnsupportedEffortError, + resolve_adapter, ) @@ -23,30 +17,15 @@ def _resolve_hf_router_token(session_hf_token: str | None = None) -> str | None: return resolve_hf_router_token(session_hf_token) -def _patch_litellm_effort_validation() -> None: - """Neuter LiteLLM 1.83's hardcoded effort-level validation. - - Context: at ``litellm/llms/anthropic/chat/transformation.py:~1443`` the - Anthropic adapter validates ``output_config.effort ∈ {high, medium, - low, max}`` and gates ``max`` behind an ``_is_opus_4_6_model`` check - that only matches the substring ``opus-4-6`` / ``opus_4_6``. Result: +__all__ = [ + "UnsupportedEffortError", + "_resolve_hf_router_token", + "_resolve_llm_params", +] - * ``xhigh`` — valid on Anthropic's real API for Claude 4.7 — is - rejected pre-flight with "Invalid effort value: xhigh". - * ``max`` on Opus 4.7 is rejected with "effort='max' is only supported - by Claude Opus 4.6", even though Opus 4.7 accepts it in practice. - We don't want to maintain a parallel model table, so we let the - Anthropic API itself be the validator: widen ``_is_opus_4_6_model`` - to also match ``opus-4-7``+ families, and drop the valid-effort-set - check entirely. If Anthropic rejects an effort level, we see a 400 - and the cascade walks down — exactly the behavior we want for any - future model family. - - Removable once litellm ships 1.83.8-stable (which merges PR #25867, - "Litellm day 0 opus 4.7 support") — see commit 0868a82 on their main - branch. Until then, this one-time patch is the escape hatch. - """ +def _patch_litellm_effort_validation() -> None: + """Patch LiteLLM's Anthropic effort validation for Claude Opus 4.7.""" try: from litellm.llms.anthropic.chat import transformation as _t except Exception: @@ -86,105 +65,15 @@ def _widened(model: str) -> bool: _patch_litellm_effort_validation() -# Effort levels accepted on the wire. -# Anthropic (4.6+): low | medium | high | xhigh | max (output_config.effort) -# OpenAI direct: minimal | low | medium | high | xhigh (reasoning_effort top-level) -# HF router: low | medium | high (extra_body.reasoning_effort) -# -# We validate *shape* here and let the probe cascade walk down on rejection; -# we deliberately do NOT maintain a per-model capability table. -_ANTHROPIC_EFFORTS = {"low", "medium", "high", "xhigh", "max"} -_OPENAI_EFFORTS = {"minimal", "low", "medium", "high", "xhigh"} -_HF_EFFORTS = {"low", "medium", "high"} - - -class UnsupportedEffortError(ValueError): - """The requested effort isn't valid for this provider's API surface. - - Raised synchronously before any network call so the probe cascade can - skip levels the provider can't accept (e.g. ``max`` on HF router). - """ - - -def _local_api_base(base_url: str) -> str: - base = base_url.strip().rstrip("/") - if base.endswith("/v1"): - return base - return f"{base}/v1" - - -def _resolve_local_model_params( - model_name: str, - reasoning_effort: str | None = None, - strict: bool = False, -) -> dict: - if reasoning_effort and strict: - raise UnsupportedEffortError( - "Local OpenAI-compatible endpoints don't accept reasoning_effort" - ) - - local_name = local_model_name(model_name) - if local_name is None: - raise ValueError(f"Unsupported local model id: {model_name}") - - provider = local_model_provider(model_name) - assert provider is not None - raw_base = ( - os.environ.get(provider["base_url_env"]) - or os.environ.get(LOCAL_MODEL_BASE_URL_ENV) - or provider["base_url_default"] - ) - api_key = ( - os.environ.get(provider["api_key_env"]) - or os.environ.get(LOCAL_MODEL_API_KEY_ENV) - or LOCAL_MODEL_API_KEY_DEFAULT - ) - return { - "model": f"openai/{local_name}", - "api_base": _local_api_base(raw_base), - "api_key": api_key, - } - - def _resolve_llm_params( model_name: str, session_hf_token: str | None = None, reasoning_effort: str | None = None, strict: bool = False, ) -> dict: - """ - Build LiteLLM kwargs for a given model id. - - • ``anthropic/`` — native thinking config. We bypass LiteLLM's - ``reasoning_effort`` → ``thinking`` mapping (which lags new Claude - releases like 4.7 and sends the wrong API shape). Instead we pass - both ``thinking={"type": "adaptive"}`` and ``output_config= - {"effort": }`` as top-level kwargs — LiteLLM's Anthropic - adapter forwards unknown top-level kwargs into the request body - verbatim (confirmed by live probe; ``extra_body`` does NOT work - here because Anthropic's API rejects it as "Extra inputs are not - permitted"). This is the stable API for 4.6 and 4.7. Older - extended-thinking models that only accept ``thinking.type.enabled`` - will reject this; the probe's cascade catches that and falls back - to no thinking. + """Build LiteLLM kwargs for a given model id. - • ``openai/`` — ``reasoning_effort`` forwarded as a top-level - kwarg (GPT-5 / o-series). LiteLLM uses the user's ``OPENAI_API_KEY``. - - • ``ollama/``, ``vllm/``, ``lm_studio/``, and - ``llamacpp/`` — local OpenAI-compatible endpoints. The id prefix - selects a configurable localhost base URL, and the model suffix is sent - to LiteLLM as ``openai/``. These endpoints don't receive - ``reasoning_effort``. - - • Anything else is treated as a HuggingFace router id. We hit the - auto-routing OpenAI-compatible endpoint at - ``https://router.huggingface.co/v1``. The id can be bare or carry an - HF routing suffix (``:fastest`` / ``:cheapest`` / ``:``). - A leading ``huggingface/`` is stripped. ``reasoning_effort`` is - forwarded via ``extra_body`` (LiteLLM's OpenAI adapter refuses it as - a top-level kwarg for non-OpenAI models). "minimal" normalizes to - "low". + Delegates to the matching provider adapter. ``strict=True`` raises ``UnsupportedEffortError`` when the requested effort isn't in the provider's accepted set, instead of silently @@ -200,71 +89,12 @@ def _resolve_llm_params( 3. huggingface_hub cache — ``HF_TOKEN`` / ``HUGGING_FACE_HUB_TOKEN`` / local ``hf auth login`` cache. """ - if model_name.startswith("anthropic/"): - params: dict = {"model": model_name} - if reasoning_effort: - level = reasoning_effort - if level == "minimal": - level = "low" - if level not in _ANTHROPIC_EFFORTS: - if strict: - raise UnsupportedEffortError( - f"Anthropic doesn't accept effort={level!r}" - ) - else: - # Adaptive thinking + output_config.effort is the stable - # Anthropic API for Claude 4.6 / 4.7. Both kwargs are - # passed top-level: LiteLLM forwards unknown params into - # the request body for Anthropic, so ``output_config`` - # reaches the API. ``extra_body`` does NOT work here — - # Anthropic rejects it as "Extra inputs are not - # permitted". - params["thinking"] = {"type": "adaptive"} - params["output_config"] = {"effort": level} - return params - - if model_name.startswith("bedrock/"): - # LiteLLM routes ``bedrock/...`` through the Converse adapter, which - # picks up AWS credentials from the standard env vars - # (``AWS_ACCESS_KEY_ID`` / ``AWS_SECRET_ACCESS_KEY`` / ``AWS_REGION``). - # The Anthropic thinking/effort shape is not forwarded through Converse - # the same way, so we leave it off for now. - return {"model": model_name} - - if model_name.startswith("openai/"): - params = {"model": model_name} - if reasoning_effort: - if reasoning_effort not in _OPENAI_EFFORTS: - if strict: - raise UnsupportedEffortError( - f"OpenAI doesn't accept effort={reasoning_effort!r}" - ) - else: - params["reasoning_effort"] = reasoning_effort - return params - - if is_reserved_local_model_id(model_name): - raise ValueError(f"Unsupported local model id: {model_name}") - - if local_model_provider(model_name) is not None: - return _resolve_local_model_params(model_name, reasoning_effort, strict) - - hf_model = model_name.removeprefix("huggingface/") - api_key = _resolve_hf_router_token(session_hf_token) - params = { - "model": f"openai/{hf_model}", - "api_base": "https://router.huggingface.co/v1", - "api_key": api_key, - } - if bill_to := get_hf_bill_to(): - params["extra_headers"] = {"X-HF-Bill-To": bill_to} - if reasoning_effort: - hf_level = "low" if reasoning_effort == "minimal" else reasoning_effort - if hf_level not in _HF_EFFORTS: - if strict: - raise UnsupportedEffortError( - f"HF router doesn't accept effort={hf_level!r}" - ) - else: - params["extra_body"] = {"reasoning_effort": hf_level} - return params + adapter = resolve_adapter(model_name) + if adapter is None: + raise ValueError(f"No provider adapter for model: {model_name}") + return adapter.build_params( + model_name, + session_hf_token=session_hf_token, + reasoning_effort=reasoning_effort, + strict=strict, + ) diff --git a/agent/core/model_switcher.py b/agent/core/model_switcher.py index 34eaccdd..ce4d6c74 100644 --- a/agent/core/model_switcher.py +++ b/agent/core/model_switcher.py @@ -20,12 +20,13 @@ from litellm import acompletion from agent.core.effort_probe import ProbeInconclusive, probe_effort +from agent.core.llm_errors import render_llm_error_message from agent.core.llm_params import _resolve_llm_params from agent.core.local_models import ( LOCAL_MODEL_PREFIXES, is_local_model_id, - is_reserved_local_model_id, ) +from agent.core.provider_adapters import is_valid_model_name # Suggested models shown by `/model` (not a gate). Users can paste any HF @@ -50,7 +51,7 @@ _ROUTING_POLICIES = {"fastest", "cheapest", "preferred"} -_DIRECT_PREFIXES = ("anthropic/", "openai/", *LOCAL_MODEL_PREFIXES) +_DIRECT_PREFIXES = ("anthropic/", "openai/", "bedrock/", *LOCAL_MODEL_PREFIXES) _LOCAL_PROBE_TIMEOUT = 15.0 @@ -67,19 +68,7 @@ def is_valid_model_id(model_id: str) -> bool: Actual availability is verified against the HF router catalog on switch, and by the provider on the probe's ping call. """ - if not model_id: - return False - if is_local_model_id(model_id): - return True - if is_reserved_local_model_id(model_id): - return False - if any(model_id.startswith(prefix) for prefix in LOCAL_MODEL_PREFIXES): - return False - if "/" not in model_id: - return False - head = model_id.split(":", 1)[0] - parts = head.split("/") - return len(parts) >= 2 and all(parts) + return is_valid_model_name(model_id) def _print_hf_routing_info(model_id: str, console) -> bool: @@ -175,6 +164,7 @@ def print_invalid_id(arg: str, console) -> None: " • /[:tag] (HF router — paste from huggingface.co)\n" " • anthropic/\n" " • openai/\n" + " • bedrock/\n" " • ollama/ | vllm/ | lm_studio/ | llamacpp/[/dim]" ) @@ -248,14 +238,15 @@ async def probe_and_switch_model( outcome = await probe_effort(model_id, preference, hf_token, session=session) except ProbeInconclusive as e: _commit_switch(model_id, config, session, effective=None, cache=False) + warning = render_llm_error_message(e) console.print( f"[yellow]Model switched to {model_id}[/yellow] " - f"[dim](couldn't validate: {e}; will verify on first message)[/dim]" + f"[dim](couldn't validate: {warning}; will verify on first message)[/dim]" ) return except Exception as e: # Hard persistent error — auth, unknown model, quota. Don't switch. - console.print(f"[bold red]Switch failed:[/bold red] {e}") + console.print(f"[bold red]Switch failed:[/bold red] {render_llm_error_message(e)}") console.print(f"[dim]Keeping current model: {config.model_name}[/dim]") return diff --git a/agent/core/provider_adapters.py b/agent/core/provider_adapters.py new file mode 100644 index 00000000..4c12a224 --- /dev/null +++ b/agent/core/provider_adapters.py @@ -0,0 +1,288 @@ +"""Provider adapters for runtime params and model-name validation.""" + +import os +from dataclasses import dataclass +from typing import Any, ClassVar + +from agent.core.hf_tokens import get_hf_bill_to, resolve_hf_router_token +from agent.core.local_models import ( + LOCAL_MODEL_API_KEY_DEFAULT, + LOCAL_MODEL_API_KEY_ENV, + LOCAL_MODEL_BASE_URL_ENV, + LOCAL_MODEL_PREFIXES, + RESERVED_LOCAL_MODEL_PREFIXES, + is_local_model_id, + is_reserved_local_model_id, + local_model_name, + local_model_provider, +) + + +class UnsupportedEffortError(ValueError): + """The requested effort isn't valid for this provider's API surface. + + Raised synchronously before any network call so the probe cascade can + skip levels the provider can't accept (e.g. ``max`` on HF router). + """ + + +def _has_model_suffix(model_name: str, prefix: str) -> bool: + if not model_name.startswith(prefix): + return False + tail = model_name[len(prefix) :].split(":", 1)[0] + return bool(tail) and all(tail.split("/")) + + +def _is_hf_model_name(model_name: str) -> bool: + if model_name.startswith(("anthropic/", "openai/", "bedrock/")): + return False + if model_name.startswith(LOCAL_MODEL_PREFIXES): + return False + if model_name.startswith(RESERVED_LOCAL_MODEL_PREFIXES): + return False + bare = model_name.removeprefix("huggingface/").split(":", 1)[0] + parts = bare.split("/") + return len(parts) >= 2 and all(parts) + + +@dataclass(frozen=True) +class ProviderAdapter: + provider_id: str + prefixes: tuple[str, ...] = () + + def matches(self, model_name: str) -> bool: + return bool(self.prefixes) and model_name.startswith(self.prefixes) + + def build_params( + self, + model_name: str, + *, + session_hf_token: str | None = None, + reasoning_effort: str | None = None, + strict: bool = False, + ) -> dict: + raise NotImplementedError + + def allows_model_name(self, model_name: str) -> bool: + return self.matches(model_name) + + +@dataclass(frozen=True) +class AnthropicAdapter(ProviderAdapter): + """Anthropic models via native API (thinking + output_config.effort).""" + + prefixes: tuple[str, ...] = ("anthropic/",) + _EFFORTS: ClassVar[frozenset[str]] = frozenset( + {"low", "medium", "high", "xhigh", "max"} + ) + + def allows_model_name(self, model_name: str) -> bool: + return _has_model_suffix(model_name, "anthropic/") + + def build_params( + self, + model_name: str, + *, + session_hf_token: str | None = None, + reasoning_effort: str | None = None, + strict: bool = False, + ) -> dict: + params: dict[str, Any] = {"model": model_name} + if reasoning_effort: + level = "low" if reasoning_effort == "minimal" else reasoning_effort + if level not in self._EFFORTS: + if strict: + raise UnsupportedEffortError( + f"Anthropic doesn't accept effort={level!r}" + ) + else: + params["thinking"] = {"type": "adaptive"} + params["output_config"] = {"effort": level} + return params + + +@dataclass(frozen=True) +class OpenAIAdapter(ProviderAdapter): + """OpenAI models via native API (reasoning_effort top-level kwarg).""" + + prefixes: tuple[str, ...] = ("openai/",) + _EFFORTS: ClassVar[frozenset[str]] = frozenset( + {"minimal", "low", "medium", "high", "xhigh"} + ) + + def allows_model_name(self, model_name: str) -> bool: + return _has_model_suffix(model_name, "openai/") + + def build_params( + self, + model_name: str, + *, + session_hf_token: str | None = None, + reasoning_effort: str | None = None, + strict: bool = False, + ) -> dict: + params: dict[str, Any] = {"model": model_name} + if reasoning_effort: + if reasoning_effort not in self._EFFORTS: + if strict: + raise UnsupportedEffortError( + f"OpenAI doesn't accept effort={reasoning_effort!r}" + ) + else: + params["reasoning_effort"] = reasoning_effort + return params + + +@dataclass(frozen=True) +class BedrockAdapter(ProviderAdapter): + """AWS Bedrock models via LiteLLM Converse adapter. + + Picks up AWS credentials from standard env vars. + Thinking/effort not forwarded through Converse for now. + """ + + prefixes: tuple[str, ...] = ("bedrock/",) + + def allows_model_name(self, model_name: str) -> bool: + return _has_model_suffix(model_name, "bedrock/") + + def build_params( + self, + model_name: str, + *, + session_hf_token: str | None = None, + reasoning_effort: str | None = None, + strict: bool = False, + ) -> dict: + return {"model": model_name} + + +@dataclass(frozen=True) +class HfRouterAdapter(ProviderAdapter): + """HuggingFace router — OpenAI-compat endpoint with HF token chain.""" + + _EFFORTS: ClassVar[frozenset[str]] = frozenset({"low", "medium", "high"}) + + def matches(self, model_name: str) -> bool: + if model_name.startswith(("anthropic/", "openai/", "bedrock/")): + return False + if model_name.startswith(LOCAL_MODEL_PREFIXES): + return False + if model_name.startswith(RESERVED_LOCAL_MODEL_PREFIXES): + return False + return True + + def allows_model_name(self, model_name: str) -> bool: + return _is_hf_model_name(model_name) + + def build_params( + self, + model_name: str, + *, + session_hf_token: str | None = None, + reasoning_effort: str | None = None, + strict: bool = False, + ) -> dict: + hf_model = model_name.removeprefix("huggingface/") + api_key = resolve_hf_router_token(session_hf_token) + + params: dict[str, Any] = { + "model": f"openai/{hf_model}", + "api_base": "https://router.huggingface.co/v1", + "api_key": api_key, + } + + if bill_to := get_hf_bill_to(): + params["extra_headers"] = {"X-HF-Bill-To": bill_to} + + if reasoning_effort: + hf_level = "low" if reasoning_effort == "minimal" else reasoning_effort + if hf_level not in self._EFFORTS: + if strict: + raise UnsupportedEffortError( + f"HF router doesn't accept effort={hf_level!r}" + ) + else: + params["extra_body"] = {"reasoning_effort": hf_level} + + return params + + +@dataclass(frozen=True) +class LocalModelAdapter(ProviderAdapter): + """Local OpenAI-compatible endpoints (ollama / vllm / lm_studio / llamacpp). + + The id prefix selects a configurable localhost base URL, and the model + suffix is sent to LiteLLM as ``openai/``. Reserved prefixes + (e.g. ``openai-compat/``) are matched here so they reject cleanly + instead of falling through to the HF router. + """ + + prefixes: tuple[str, ...] = LOCAL_MODEL_PREFIXES + + def matches(self, model_name: str) -> bool: + return model_name.startswith(self.prefixes) or model_name.startswith( + RESERVED_LOCAL_MODEL_PREFIXES + ) + + def allows_model_name(self, model_name: str) -> bool: + return is_local_model_id(model_name) + + def build_params( + self, + model_name: str, + *, + session_hf_token: str | None = None, + reasoning_effort: str | None = None, + strict: bool = False, + ) -> dict: + if is_reserved_local_model_id(model_name) or not is_local_model_id(model_name): + raise ValueError(f"Unsupported local model id: {model_name}") + + if reasoning_effort and strict: + raise UnsupportedEffortError( + "Local OpenAI-compatible endpoints don't accept reasoning_effort" + ) + + local_name = local_model_name(model_name) + provider = local_model_provider(model_name) + assert local_name is not None and provider is not None + + raw_base = ( + os.environ.get(provider["base_url_env"]) + or os.environ.get(LOCAL_MODEL_BASE_URL_ENV) + or provider["base_url_default"] + ) + api_key = ( + os.environ.get(provider["api_key_env"]) + or os.environ.get(LOCAL_MODEL_API_KEY_ENV) + or LOCAL_MODEL_API_KEY_DEFAULT + ) + base = raw_base.strip().rstrip("/") + api_base = base if base.endswith("/v1") else f"{base}/v1" + return { + "model": f"openai/{local_name}", + "api_base": api_base, + "api_key": api_key, + } + + +ADAPTERS: tuple[ProviderAdapter, ...] = ( + AnthropicAdapter(provider_id="anthropic"), + BedrockAdapter(provider_id="bedrock"), + OpenAIAdapter(provider_id="openai"), + LocalModelAdapter(provider_id="local"), + HfRouterAdapter(provider_id="huggingface"), +) + + +def resolve_adapter(model_name: str) -> ProviderAdapter | None: + for adapter in ADAPTERS: + if adapter.matches(model_name): + return adapter + return None + + +def is_valid_model_name(model_name: str) -> bool: + adapter = resolve_adapter(model_name) + return adapter is not None and adapter.allows_model_name(model_name) diff --git a/backend/routes/agent.py b/backend/routes/agent.py index c93aef8f..b5d90f4a 100644 --- a/backend/routes/agent.py +++ b/backend/routes/agent.py @@ -46,6 +46,7 @@ from agent.core.hf_access import get_jobs_access from agent.core.hf_tokens import resolve_hf_request_token, resolve_hf_router_token +from agent.core.llm_errors import health_error_type, render_llm_error_message from agent.core.llm_params import _resolve_llm_params logger = logging.getLogger(__name__) @@ -285,34 +286,12 @@ async def llm_health_check() -> LLMHealthResponse: ) return LLMHealthResponse(status="ok", model=model) except Exception as e: - err_str = str(e).lower() - error_type = "unknown" - - if ( - "401" in err_str - or "auth" in err_str - or "invalid" in err_str - or "api key" in err_str - ): - error_type = "auth" - elif ( - "402" in err_str - or "credit" in err_str - or "quota" in err_str - or "insufficient" in err_str - or "billing" in err_str - ): - error_type = "credits" - elif "429" in err_str or "rate" in err_str: - error_type = "rate_limit" - elif "timeout" in err_str or "connect" in err_str or "network" in err_str: - error_type = "network" - + error_type = health_error_type(e) logger.warning(f"LLM health check failed ({error_type}): {e}") return LLMHealthResponse( status="error", model=model, - error=str(e)[:500], + error=render_llm_error_message(e)[:500], error_type=error_type, ) diff --git a/backend/session_manager.py b/backend/session_manager.py index 449ce3a0..26e3b01d 100644 --- a/backend/session_manager.py +++ b/backend/session_manager.py @@ -11,6 +11,7 @@ from typing import Any, Optional from agent.config import load_config +from agent.core.llm_errors import render_llm_error_message from agent.core.agent_loop import process_submission from agent.core.hub_artifacts import start_session_artifact_collection_task from agent.core.session import Event, OpType, Session @@ -987,7 +988,7 @@ async def _run_session( except Exception as e: logger.error(f"Error in session {session_id}: {e}") await session.send_event( - Event(event_type="error", data={"error": str(e)}) + Event(event_type="error", data={"error": render_llm_error_message(e)}) ) finally: diff --git a/tests/test_llm_errors.py b/tests/test_llm_errors.py new file mode 100644 index 00000000..d36ebed7 --- /dev/null +++ b/tests/test_llm_errors.py @@ -0,0 +1,124 @@ +import asyncio +from types import SimpleNamespace + +from rich.console import Console + +import agent.core.model_switcher as model_switcher +from agent.core.effort_probe import ProbeInconclusive +from agent.core.llm_errors import ( + classify_llm_error, + friendly_llm_error_message, + health_error_type, + render_llm_error_message, +) + + +def test_auth_errors_get_clean_message() -> None: + error = Exception("401 unauthorized: invalid api key") + + assert classify_llm_error(error) == "auth" + assert "Authentication failed" in friendly_llm_error_message(error) + + +def test_missing_api_key_header_gets_clean_message() -> None: + error = Exception("authentication_error: x-api-key header is required") + + assert classify_llm_error(error) == "auth" + assert render_llm_error_message(error).startswith("Authentication failed") + + +def test_openai_missing_api_key_gets_clean_message() -> None: + error = Exception( + "You didn't provide an API key. You need to provide your API key in an Authorization header." + ) + + assert classify_llm_error(error) == "auth" + assert render_llm_error_message(error).startswith("Authentication failed") + + +def test_anthropic_low_credit_error_gets_clean_message() -> None: + error = Exception( + "Your credit balance is too low to access the Anthropic API. " + "Please go to Plans & Billing to upgrade or purchase credits." + ) + + assert classify_llm_error(error) == "credits" + assert render_llm_error_message(error).startswith( + "Insufficient API credits or quota" + ) + + +def test_model_not_found_error_gets_clean_message() -> None: + error = Exception("model_not_found: requested model does not exist") + + assert classify_llm_error(error) == "model" + assert render_llm_error_message(error).startswith("Model not found") + + +def test_unknown_errors_fall_back_to_plain_exception_text() -> None: + error = RuntimeError("boom") + + assert classify_llm_error(error) == "unknown" + assert render_llm_error_message(error) == "boom" + + +def test_health_error_type_keeps_public_categories_stable() -> None: + assert health_error_type(Exception("invalid api key")) == "auth" + assert health_error_type(Exception("credit balance is too low")) == "credits" + assert health_error_type(Exception("rate limit exceeded")) == "rate_limit" + assert health_error_type(Exception("model_not_found")) == "unknown" + + +def test_model_switcher_shows_clean_hard_failure(monkeypatch) -> None: + async def fake_probe_effort(*args, **kwargs): + raise Exception( + "Your credit balance is too low to access the Anthropic API. " + "Please go to Plans & Billing to upgrade or purchase credits." + ) + + monkeypatch.setattr(model_switcher, "probe_effort", fake_probe_effort) + console = Console(record=True, width=120) + config = SimpleNamespace( + reasoning_effort="high", + model_name="anthropic/claude-opus-4-6", + ) + + asyncio.run( + model_switcher.probe_and_switch_model( + "anthropic/claude-opus-4-7", + config, + None, + console, + None, + ) + ) + + output = console.export_text() + assert "Insufficient API credits or quota" in output + assert "credit balance is too low" not in output.lower() + + +def test_model_switcher_shows_clean_inconclusive_warning(monkeypatch) -> None: + async def fake_probe_effort(*args, **kwargs): + raise ProbeInconclusive("timeout talking to provider") + + monkeypatch.setattr(model_switcher, "probe_effort", fake_probe_effort) + console = Console(record=True, width=120) + config = SimpleNamespace( + reasoning_effort="high", + model_name="anthropic/claude-opus-4-6", + ) + + asyncio.run( + model_switcher.probe_and_switch_model( + "anthropic/claude-opus-4-7", + config, + None, + console, + None, + ) + ) + + output = console.export_text() + assert "The model provider is unavailable or timed out" in output + assert "timeout talking to provider" not in output.lower() diff --git a/tests/test_provider_adapters.py b/tests/test_provider_adapters.py new file mode 100644 index 00000000..3f809c96 --- /dev/null +++ b/tests/test_provider_adapters.py @@ -0,0 +1,165 @@ +import pytest + +from agent.core.llm_params import _resolve_llm_params +from agent.core.model_switcher import is_valid_model_id +from agent.core.provider_adapters import ( + UnsupportedEffortError, + is_valid_model_name, +) + + +# -- Anthropic adapter ------------------------------------------------------- + + +def test_anthropic_adapter_builds_thinking_config(): + params = _resolve_llm_params("anthropic/claude-opus-4-6", reasoning_effort="high") + + assert params == { + "model": "anthropic/claude-opus-4-6", + "thinking": {"type": "adaptive"}, + "output_config": {"effort": "high"}, + } + + +def test_anthropic_adapter_normalizes_minimal_to_low(): + params = _resolve_llm_params( + "anthropic/claude-opus-4-7", reasoning_effort="minimal" + ) + + assert params["output_config"] == {"effort": "low"} + + +def test_anthropic_adapter_no_effort(): + params = _resolve_llm_params("anthropic/claude-opus-4-6") + + assert params == {"model": "anthropic/claude-opus-4-6"} + + +def test_anthropic_adapter_strict_rejects_invalid(): + with pytest.raises(UnsupportedEffortError): + _resolve_llm_params( + "anthropic/claude-opus-4-6", reasoning_effort="turbo", strict=True + ) + + +def test_anthropic_adapter_nonstrict_drops_invalid(): + params = _resolve_llm_params( + "anthropic/claude-opus-4-6", reasoning_effort="turbo", strict=False + ) + assert "thinking" not in params + assert "output_config" not in params + + +# -- OpenAI adapter ----------------------------------------------------------- + + +def test_openai_adapter_passes_reasoning_effort(): + params = _resolve_llm_params("openai/gpt-5", reasoning_effort="medium") + + assert params == {"model": "openai/gpt-5", "reasoning_effort": "medium"} + + +def test_openai_adapter_strict_rejects_max(): + with pytest.raises(UnsupportedEffortError): + _resolve_llm_params("openai/gpt-5", reasoning_effort="max", strict=True) + + +# -- Bedrock adapter ---------------------------------------------------------- + + +def test_bedrock_adapter_returns_model_only(): + params = _resolve_llm_params("bedrock/us.anthropic.claude-opus-4-7") + assert params == {"model": "bedrock/us.anthropic.claude-opus-4-7"} + + +def test_bedrock_adapter_ignores_effort(): + params = _resolve_llm_params( + "bedrock/us.anthropic.claude-opus-4-6-v1", reasoning_effort="high" + ) + assert params == {"model": "bedrock/us.anthropic.claude-opus-4-6-v1"} + + +def test_bedrock_validation(): + assert is_valid_model_name("bedrock/us.anthropic.claude-opus-4-7") is True + assert is_valid_model_name("bedrock/") is False + + +# -- HF Router adapter -------------------------------------------------------- + + +def test_hf_adapter_builds_router_params(monkeypatch): + monkeypatch.setenv("HF_TOKEN", "hf-test") + + params = _resolve_llm_params( + "moonshotai/Kimi-K2.6:novita", reasoning_effort="minimal" + ) + + assert params == { + "model": "openai/moonshotai/Kimi-K2.6:novita", + "api_base": "https://router.huggingface.co/v1", + "api_key": "hf-test", + "extra_body": {"reasoning_effort": "low"}, + } + + +def test_hf_adapter_adds_bill_to_header(monkeypatch): + monkeypatch.setenv("INFERENCE_TOKEN", "hf-space-token") + monkeypatch.delenv("HF_TOKEN", raising=False) + + params = _resolve_llm_params("MiniMaxAI/MiniMax-M2.7") + + assert params["extra_headers"] == {"X-HF-Bill-To": "smolagents"} + assert params["api_key"] == "hf-space-token" + + +def test_hf_adapter_strict_rejects_max(): + with pytest.raises(UnsupportedEffortError): + _resolve_llm_params( + "MiniMaxAI/MiniMax-M2.7", reasoning_effort="max", strict=True + ) + + +# -- Validation --------------------------------------------------------------- + + +def test_model_validation_accepts_free_form_hf_ids(): + assert is_valid_model_name("moonshotai/Kimi-K2.6:fastest") is True + assert is_valid_model_name("huggingface/moonshotai/Kimi-K2.6:novita") is True + + +def test_model_validation_accepts_direct_provider_ids(): + assert is_valid_model_name("anthropic/claude-opus-4-7") is True + assert is_valid_model_name("openai/gpt-5") is True + assert is_valid_model_name("bedrock/us.anthropic.claude-opus-4-7") is True + + +def test_model_validation_rejects_garbage(): + assert is_valid_model_name("") is False + assert is_valid_model_name("no-slash") is False + assert is_valid_model_name("anthropic/") is False + assert is_valid_model_name("openai/") is False + assert is_valid_model_name("huggingface/nope") is False + assert is_valid_model_name("moonshotai/") is False + + +def test_cli_validation_matches_provider_validation(): + assert is_valid_model_id("openai/gpt-5") is True + assert is_valid_model_id("moonshotai/Kimi-K2.6:fastest") is True + assert is_valid_model_id("openai/") is False + assert is_valid_model_id("anthropic/") is False + + +def test_resolve_raises_on_no_adapter(monkeypatch): + from agent.core import llm_params + + monkeypatch.setattr(llm_params, "resolve_adapter", lambda _: None) + with pytest.raises(ValueError, match="No provider adapter"): + _resolve_llm_params("anything") + + +def test_unsupported_effort_reexport(): + """UnsupportedEffortError must be importable from llm_params (backward compat).""" + from agent.core.llm_params import UnsupportedEffortError as FromLlm + from agent.core.provider_adapters import UnsupportedEffortError as FromAdapters + + assert FromLlm is FromAdapters