diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index 40b629a903..d39eb2ba56 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -567,10 +567,6 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons if reasoning := getattr(choice.message, 'reasoning', None): items.append(ThinkingPart(id='reasoning', content=reasoning, provider_name=self.system)) - # NOTE: We don't currently handle OpenRouter `reasoning_details`: - # - https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks - # If you need this, please file an issue. - if choice.message.content: items.extend( (replace(part, id='content', provider_name=self.system) if isinstance(part, ThinkingPart) else part) diff --git a/pydantic_ai_slim/pydantic_ai/models/openrouter.py b/pydantic_ai_slim/pydantic_ai/models/openrouter.py new file mode 100644 index 0000000000..282b423a01 --- /dev/null +++ b/pydantic_ai_slim/pydantic_ai/models/openrouter.py @@ -0,0 +1,465 @@ +from typing import Any, Literal, cast + +from openai import AsyncOpenAI +from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionMessageParam +from openai.types.chat.chat_completion import Choice +from pydantic import BaseModel +from typing_extensions import TypedDict + +from ..exceptions import ModelHTTPError, UnexpectedModelBehavior +from ..messages import ( + ModelMessage, + ModelResponse, + ThinkingPart, +) +from ..profiles import ModelProfileSpec +from ..providers import Provider +from ..settings import ModelSettings +from . import ModelRequestParameters +from .openai import OpenAIChatModel, OpenAIChatModelSettings + + +class OpenRouterMaxPrice(TypedDict, total=False): + """The object specifying the maximum price you want to pay for this request. USD price per million tokens, for prompt and completion.""" + + prompt: int + completion: int + image: int + audio: int + request: int + + +KnownOpenRouterProviders = Literal[ + 'z-ai', + 'cerebras', + 'venice', + 'moonshotai', + 'morph', + 'stealth', + 'wandb', + 'klusterai', + 'openai', + 'sambanova', + 'amazon-bedrock', + 'mistral', + 'nextbit', + 'atoma', + 'ai21', + 'minimax', + 'baseten', + 'anthropic', + 'featherless', + 'groq', + 'lambda', + 'azure', + 'ncompass', + 'deepseek', + 'hyperbolic', + 'crusoe', + 'cohere', + 'mancer', + 'avian', + 'perplexity', + 'novita', + 'siliconflow', + 'switchpoint', + 'xai', + 'inflection', + 'fireworks', + 'deepinfra', + 'inference-net', + 'inception', + 'atlas-cloud', + 'nvidia', + 'alibaba', + 'friendli', + 'infermatic', + 'targon', + 'ubicloud', + 'aion-labs', + 'liquid', + 'nineteen', + 'cloudflare', + 'nebius', + 'chutes', + 'enfer', + 'crofai', + 'open-inference', + 'phala', + 'gmicloud', + 'meta', + 'relace', + 'parasail', + 'together', + 'google-ai-studio', + 'google-vertex', +] +"""Known providers in the OpenRouter marketplace""" + +OpenRouterProvider = str | KnownOpenRouterProviders +"""Possible OpenRouter provider slugs. + +Since OpenRouter is constantly updating their list of providers, we explicitly list some known providers but +allow any name in the type hints. +See [the OpenRouter API](https://openrouter.ai/docs/api-reference/list-available-providers) for a full list. +""" + +Transforms = Literal['middle-out'] +"""Available messages transforms for OpenRouter models with limited token windows. + +Currently only supports 'middle-out', but is expected to grow in the future. +""" + + +class OpenRouterProviderConfig(TypedDict, total=False): + """Represents the 'Provider' object from the OpenRouter API.""" + + order: list[OpenRouterProvider] + """List of provider slugs to try in order (e.g. ["anthropic", "openai"]). [See details](https://openrouter.ai/docs/features/provider-routing#ordering-specific-providers)""" + + allow_fallbacks: bool + """Whether to allow backup providers when the primary is unavailable. [See details](https://openrouter.ai/docs/features/provider-routing#disabling-fallbacks)""" + + require_parameters: bool + """Only use providers that support all parameters in your request.""" + + data_collection: Literal['allow', 'deny'] + """Control whether to use providers that may store data. [See details](https://openrouter.ai/docs/features/provider-routing#requiring-providers-to-comply-with-data-policies)""" + + zdr: bool + """Restrict routing to only ZDR (Zero Data Retention) endpoints. [See details](https://openrouter.ai/docs/features/provider-routing#zero-data-retention-enforcement)""" + + only: list[OpenRouterProvider] + """List of provider slugs to allow for this request. [See details](https://openrouter.ai/docs/features/provider-routing#allowing-only-specific-providers)""" + + ignore: list[str] + """List of provider slugs to skip for this request. [See details](https://openrouter.ai/docs/features/provider-routing#ignoring-providers)""" + + quantizations: list[Literal['int4', 'int8', 'fp4', 'fp6', 'fp8', 'fp16', 'bf16', 'fp32', 'unknown']] + """List of quantization levels to filter by (e.g. ["int4", "int8"]). [See details](https://openrouter.ai/docs/features/provider-routing#quantization)""" + + sort: Literal['price', 'throughput', 'latency'] + """Sort providers by price or throughput. (e.g. "price" or "throughput"). [See details](https://openrouter.ai/docs/features/provider-routing#provider-sorting)""" + + max_price: OpenRouterMaxPrice + """The maximum pricing you want to pay for this request. [See details](https://openrouter.ai/docs/features/provider-routing#max-price)""" + + +class OpenRouterReasoning(TypedDict, total=False): + """Configuration for reasoning tokens in OpenRouter requests. + + Reasoning tokens allow models to show their step-by-step thinking process. + You can configure this using either OpenAI-style effort levels or Anthropic-style + token limits, but not both simultaneously. + """ + + effort: Literal['high', 'medium', 'low'] + """OpenAI-style reasoning effort level. Cannot be used with max_tokens.""" + + max_tokens: int + """Anthropic-style specific token limit for reasoning. Cannot be used with effort.""" + + exclude: bool + """Whether to exclude reasoning tokens from the response. Default is False. All models support this.""" + + enabled: bool + """Whether to enable reasoning with default parameters. Default is inferred from effort or max_tokens.""" + + +class WebPlugin(TypedDict, total=False): + """You can incorporate relevant web search results for any model on OpenRouter by activating and customizing the web plugin. + + The web search plugin is powered by native search for Anthropic and OpenAI natively and by Exa for other models. For Exa, it uses their "auto" method (a combination of keyword search and embeddings-based web search) to find the most relevant results and augment/ground your prompt. + """ + + id: Literal['web'] + + engine: Literal['native', 'exa', 'undefined'] + """The web search plugin supports the following options for the engine parameter: + + `native`: Always uses the model provider's built-in web search capabilities + `exa`: Uses Exa's search API for web results + `undefined` (not specified): Uses native search if available for the provider, otherwise falls back to Exa + + Native search is used by default for OpenAI and Anthropic models that support it + Exa search is used for all other models or when native search is not supported. + + When you explicitly specify "engine": "native", it will always attempt to use the provider's native search, even if the model doesn't support it (which may result in an error).""" + + max_results: int + """The maximum results allowed by the web plugin.""" + + search_prompt: str + """The prompt used to attach results to your message.""" + + +OpenRouterPlugin = WebPlugin + + +class OpenRouterModelSettings(ModelSettings, total=False): + """Settings used for an OpenRouter model request.""" + + # ALL FIELDS MUST BE `openrouter_` PREFIXED SO YOU CAN MERGE THEM WITH OTHER MODELS. + + openrouter_models: list[str] + """A list of fallback models. + + These models will be tried, in order, if the main model returns an error. [See details](https://openrouter.ai/docs/features/model-routing#the-models-parameter) + """ + + openrouter_provider: OpenRouterProviderConfig + """OpenRouter routes requests to the best available providers for your model. By default, requests are load balanced across the top providers to maximize uptime. + + You can customize how your requests are routed using the provider object. [See more](https://openrouter.ai/docs/features/provider-routing)""" + + openrouter_preset: str + """Presets allow you to separate your LLM configuration from your code. + + Create and manage presets through the OpenRouter web application to control provider routing, model selection, system prompts, and other parameters, then reference them in OpenRouter API requests. [See more](https://openrouter.ai/docs/features/presets)""" + + openrouter_transforms: list[Transforms] + """To help with prompts that exceed the maximum context size of a model. + + Transforms work by removing or truncating messages from the middle of the prompt, until the prompt fits within the model's context window. [See more](https://openrouter.ai/docs/features/message-transforms) + """ + + openrouter_reasoning: OpenRouterReasoning + """To control the reasoning tokens in the request. + + The reasoning config object consolidates settings for controlling reasoning strength across different models. [See more](https://openrouter.ai/docs/use-cases/reasoning-tokens) + """ + + openrouter_plugins: list[OpenRouterPlugin] + + +class OpenRouterError(BaseModel): + """Utility class to validate error messages from OpenRouter.""" + + code: int + message: str + + +class BaseReasoningDetail(BaseModel): + """Common fields shared across all reasoning detail types.""" + + id: str | None = None + format: Literal['unknown', 'openai-responses-v1', 'anthropic-claude-v1', 'xai-responses-v1'] + index: int | None + + +class ReasoningSummary(BaseReasoningDetail): + """Represents a high-level summary of the reasoning process.""" + + type: Literal['reasoning.summary'] + summary: str + + +class ReasoningEncrypted(BaseReasoningDetail): + """Represents encrypted reasoning data.""" + + type: Literal['reasoning.encrypted'] + data: str + + +class ReasoningText(BaseReasoningDetail): + """Represents raw text reasoning.""" + + type: Literal['reasoning.text'] + text: str + signature: str | None = None + + +OpenRouterReasoningDetail = ReasoningSummary | ReasoningEncrypted | ReasoningText + + +class OpenRouterCompletionMessage(ChatCompletionMessage): + """Wrapped chat completion message with OpenRouter specific attributes.""" + + reasoning: str | None = None + """The reasoning text associated with the message, if any.""" + + reasoning_details: list[OpenRouterReasoningDetail] | None = None + """The reasoning details associated with the message, if any.""" + + +class OpenRouterChoice(Choice): + """Wraps OpenAI chat completion choice with OpenRouter specific attributes.""" + + native_finish_reason: str + """The provided finish reason by the downstream provider from OpenRouter.""" + + finish_reason: Literal['stop', 'length', 'tool_calls', 'content_filter', 'error'] # type: ignore[reportIncompatibleVariableOverride] + """OpenRouter specific finish reasons. + + Notably, removes 'function_call' and adds 'error' finish reasons. + """ + + message: OpenRouterCompletionMessage # type: ignore[reportIncompatibleVariableOverride] + """A wrapped chat completion message with OpenRouter specific attributes.""" + + +class OpenRouterChatCompletion(ChatCompletion): + """Wraps OpenAI chat completion with OpenRouter specific attributes.""" + + provider: str + """The downstream provider that was used by OpenRouter.""" + + choices: list[OpenRouterChoice] # type: ignore[reportIncompatibleVariableOverride] + """A list of chat completion choices modified with OpenRouter specific attributes.""" + + error: OpenRouterError | None = None + """OpenRouter specific error attribute.""" + + +def _openrouter_settings_to_openai_settings(model_settings: OpenRouterModelSettings) -> OpenAIChatModelSettings: + """Transforms a 'OpenRouterModelSettings' object into an 'OpenAIChatModelSettings' object. + + Args: + model_settings: The 'OpenRouterModelSettings' object to transform. + + Returns: + An 'OpenAIChatModelSettings' object with equivalent settings. + """ + extra_body = model_settings.get('extra_body', {}) + + if models := model_settings.pop('openrouter_models', None): + extra_body['models'] = models + if provider := model_settings.pop('openrouter_provider', None): + extra_body['provider'] = provider + if preset := model_settings.pop('openrouter_preset', None): + extra_body['preset'] = preset + if transforms := model_settings.pop('openrouter_transforms', None): + extra_body['transforms'] = transforms + + return OpenAIChatModelSettings(**model_settings, extra_body=extra_body) + + +class OpenRouterModel(OpenAIChatModel): + """Extends OpenAIModel to capture extra metadata for Openrouter.""" + + def __init__( + self, + model_name: str, + *, + provider: Literal['openrouter'] | Provider[AsyncOpenAI] = 'openrouter', + profile: ModelProfileSpec | None = None, + settings: ModelSettings | None = None, + ): + """Initialize an OpenRouter model. + + Args: + model_name: The name of the model to use. + provider: The provider to use for authentication and API access. Currently, uses OpenAI as the internal client. Can be either the string + 'openrouter' or an instance of `Provider[AsyncOpenAI]`. If not provided, a new provider will be + created using the other parameters. + profile: The model profile to use. Defaults to a profile picked by the provider based on the model name. + settings: Model-specific settings that will be used as defaults for this model. + """ + super().__init__(model_name, provider=provider, profile=profile, settings=settings) + + def prepare_request( + self, + model_settings: ModelSettings | None, + model_request_parameters: ModelRequestParameters, + ) -> tuple[ModelSettings | None, ModelRequestParameters]: + merged_settings, customized_parameters = super().prepare_request(model_settings, model_request_parameters) + new_settings = _openrouter_settings_to_openai_settings(cast(OpenRouterModelSettings, merged_settings or {})) + return new_settings, customized_parameters + + def _process_response(self, response: ChatCompletion | str) -> ModelResponse: + if not isinstance(response, ChatCompletion): + raise UnexpectedModelBehavior( + 'Invalid response from OpenRouter chat completions endpoint, expected JSON data' + ) + + native_response = OpenRouterChatCompletion.model_validate(response.model_dump()) + choice = native_response.choices[0] + + if error := native_response.error: + raise ModelHTTPError(status_code=error.code, model_name=response.model, body=error.message) + else: + if choice.finish_reason == 'error': + raise UnexpectedModelBehavior( + 'Invalid response from OpenRouter chat completions endpoint, error finish_reason without error data' + ) + + # This is done because 'super()._process_response' reads 'reasoning' to create a ThinkingPart. + # but this method will also create a ThinkingPart using 'reasoning_details'; Delete 'reasoning' to avoid duplication + if choice.message.reasoning is not None: + delattr(response.choices[0].message, 'reasoning') + + model_response = super()._process_response(response=response) + + provider_details = model_response.provider_details or {} + provider_details['downstream_provider'] = native_response.provider + provider_details['native_finish_reason'] = choice.native_finish_reason + + if reasoning_details := choice.message.reasoning_details: + reasoning = reasoning_details[0] + + new_parts: list[ThinkingPart] = [] + + if isinstance(reasoning, ReasoningText): + new_parts.append( + ThinkingPart( + id=reasoning.id, + content=reasoning.text, + signature=reasoning.signature, + provider_name=native_response.provider, + ) + ) + elif isinstance(reasoning, ReasoningSummary): + new_parts.append( + ThinkingPart( + id=reasoning.id, + content=reasoning.summary, + provider_name=native_response.provider, + ), + ) + else: + new_parts.append( + ThinkingPart( + id=reasoning.id, + content='', + signature=reasoning.data, + provider_name=native_response.provider, + ), + ) + + # TODO: Find a better way to store these attributes + new_parts[0].openrouter_type = reasoning.type + new_parts[0].openrouter_format = reasoning.format + + model_response.parts = [*new_parts, *model_response.parts] + + model_response.provider_details = provider_details + + return model_response + + async def _map_messages(self, messages: list[ModelMessage]) -> list[ChatCompletionMessageParam]: + """Maps a `pydantic_ai.Message` to a `openai.types.ChatCompletionMessageParam` and adds OpenRouter specific parameters.""" + openai_messages = await super()._map_messages(messages) + + for message, openai_message in zip(messages, openai_messages): + if isinstance(message, ModelResponse): + for part in message.parts: + if isinstance(part, ThinkingPart): + reasoning_detail: dict[str, Any] = { + 'type': part.openrouter_type, + 'id': part.id, + 'format': part.openrouter_format, + 'index': 0, + } + + match part.openrouter_type: + case 'reasoning.summary': + reasoning_detail['summary'] = part.content + case 'reasoning.text': + reasoning_detail['text'] = part.content + reasoning_detail['signature'] = part.signature + case 'reasoning.encrypted': + reasoning_detail['data'] = part.signature + + openai_message['reasoning_details'] = [reasoning_detail] + + return openai_messages diff --git a/pydantic_ai_slim/pydantic_ai/providers/openrouter.py b/pydantic_ai_slim/pydantic_ai/providers/openrouter.py index 33745ada29..d54ad6f343 100644 --- a/pydantic_ai_slim/pydantic_ai/providers/openrouter.py +++ b/pydantic_ai_slim/pydantic_ai/providers/openrouter.py @@ -81,6 +81,12 @@ def __init__(self, *, api_key: str) -> None: ... @overload def __init__(self, *, api_key: str, http_client: httpx.AsyncClient) -> None: ... + @overload + def __init__(self, *, api_key: str, http_referer: str, x_title: str) -> None: ... + + @overload + def __init__(self, *, api_key: str, http_referer: str, x_title: str, http_client: httpx.AsyncClient) -> None: ... + @overload def __init__(self, *, openai_client: AsyncOpenAI | None = None) -> None: ... @@ -88,6 +94,8 @@ def __init__( self, *, api_key: str | None = None, + http_referer: str | None = None, + x_title: str | None = None, openai_client: AsyncOpenAI | None = None, http_client: httpx.AsyncClient | None = None, ) -> None: @@ -98,10 +106,20 @@ def __init__( 'to use the OpenRouter provider.' ) + attribution_headers: dict[str, str] = {} + if http_referer := http_referer or os.getenv('OPENROUTER_HTTP_REFERER'): + attribution_headers['HTTP-Referer'] = http_referer + if x_title := x_title or os.getenv('OPENROUTER_X_TITLE'): + attribution_headers['X-Title'] = x_title + if openai_client is not None: self._client = openai_client elif http_client is not None: - self._client = AsyncOpenAI(base_url=self.base_url, api_key=api_key, http_client=http_client) + self._client = AsyncOpenAI( + base_url=self.base_url, api_key=api_key, http_client=http_client, default_headers=attribution_headers + ) else: http_client = cached_async_http_client(provider='openrouter') - self._client = AsyncOpenAI(base_url=self.base_url, api_key=api_key, http_client=http_client) + self._client = AsyncOpenAI( + base_url=self.base_url, api_key=api_key, http_client=http_client, default_headers=attribution_headers + ) diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_errors_raised.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_errors_raised.yaml new file mode 100644 index 0000000000..dacb9f72c9 --- /dev/null +++ b/tests/models/cassettes/test_openrouter/test_openrouter_errors_raised.yaml @@ -0,0 +1,161 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '158' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Be helpful. + role: system + - content: Tell me a joke. + role: user + model: google/gemini-2.0-flash-exp:free + stream: false + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + error: + code: 429 + message: Provider returned error + metadata: + provider_name: Google + raw: 'google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream. Please retry shortly, or add your own + key to accumulate your rate limits: https://openrouter.ai/settings/integrations' + user_id: user_2wT5ElBE4Es3R4QrNLpZiXICmQP + status: + code: 429 + message: Too Many Requests +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '158' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Be helpful. + role: system + - content: Tell me a joke. + role: user + model: google/gemini-2.0-flash-exp:free + stream: false + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + error: + code: 429 + message: Provider returned error + metadata: + provider_name: Google + raw: 'google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream. Please retry shortly, or add your own + key to accumulate your rate limits: https://openrouter.ai/settings/integrations' + user_id: user_2wT5ElBE4Es3R4QrNLpZiXICmQP + status: + code: 429 + message: Too Many Requests +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '158' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Be helpful. + role: system + - content: Tell me a joke. + role: user + model: google/gemini-2.0-flash-exp:free + stream: false + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + error: + code: 429 + message: Provider returned error + metadata: + provider_name: Google + raw: 'google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream. Please retry shortly, or add your own + key to accumulate your rate limits: https://openrouter.ai/settings/integrations' + user_id: user_2wT5ElBE4Es3R4QrNLpZiXICmQP + status: + code: 429 + message: Too Many Requests +version: 1 diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_map_messages_reasoning.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_map_messages_reasoning.yaml new file mode 100644 index 0000000000..aa634b6658 --- /dev/null +++ b/tests/models/cassettes/test_openrouter/test_openrouter_map_messages_reasoning.yaml @@ -0,0 +1,96 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '133' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Who are you. Think about it. + role: user + model: anthropic/claude-3.7-sonnet:thinking + stream: false + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-length: + - '4024' + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + choices: + - finish_reason: stop + index: 0 + logprobs: null + message: + content: "I am Claude, an AI assistant created by Anthropic. I'm a large language model designed to be helpful, + harmless, and honest.\n\nI don't have consciousness or sentience like humans do - I'm a sophisticated text prediction + system trained on a large dataset of human text. I don't have personal experiences, emotions, or a physical existence. + \n\nMy purpose is to assist you with information, tasks, and conversation in a helpful way, while acknowledging + my limitations. I have knowledge cutoffs, can occasionally make mistakes, and don't have the ability to access + the internet or take actions in the physical world.\n\nIs there something specific you'd like to know about me + or how I can assist you?" + reasoning: |- + This question is asking me about my identity. Let me think about how to respond clearly and accurately. + + I am Claude, an AI assistant created by Anthropic. I'm designed to be helpful, harmless, and honest in my interactions with humans. I don't have a physical form - I exist as a large language model running on computer hardware. I don't have consciousness, sentience, or feelings in the way humans do. I don't have personal experiences or a life outside of these conversations. + + My capabilities include understanding and generating natural language text, reasoning about various topics, and attempting to be helpful to users in a wide range of contexts. I have been trained on a large corpus of text data, but my training data has a cutoff date, so I don't have knowledge of events that occurred after my training. + + I have certain limitations - I don't have the ability to access the internet, run code, or interact with external systems unless given specific tools to do so. I don't have perfect knowledge and can make mistakes. + + I'm designed to be conversational and to engage with users in a way that's helpful and informative, while respecting important ethical boundaries. + reasoning_details: + - format: anthropic-claude-v1 + index: 0 + signature: ErcBCkgICBACGAIiQHtMxpqcMhnwgGUmSDWGoOL9ZHTbDKjWnhbFm0xKzFl0NmXFjQQxjFj5mieRYY718fINsJMGjycTVYeiu69npakSDDrsnKYAD/fdcpI57xoMHlQBxI93RMa5CSUZIjAFVCMQF5GfLLQCibyPbb7LhZ4kLIFxw/nqsTwDDt6bx3yipUcq7G7eGts8MZ6LxOYqHTlIDx0tfHRIlkkcNCdB2sUeMqP8e7kuQqIHoD52GAI= + text: |- + This question is asking me about my identity. Let me think about how to respond clearly and accurately. + + I am Claude, an AI assistant created by Anthropic. I'm designed to be helpful, harmless, and honest in my interactions with humans. I don't have a physical form - I exist as a large language model running on computer hardware. I don't have consciousness, sentience, or feelings in the way humans do. I don't have personal experiences or a life outside of these conversations. + + My capabilities include understanding and generating natural language text, reasoning about various topics, and attempting to be helpful to users in a wide range of contexts. I have been trained on a large corpus of text data, but my training data has a cutoff date, so I don't have knowledge of events that occurred after my training. + + I have certain limitations - I don't have the ability to access the internet, run code, or interact with external systems unless given specific tools to do so. I don't have perfect knowledge and can make mistakes. + + I'm designed to be conversational and to engage with users in a way that's helpful and informative, while respecting important ethical boundaries. + type: reasoning.text + refusal: null + role: assistant + native_finish_reason: stop + created: 1760051228 + id: gen-1760051228-zUtCCQbb0vkaM4UXZmcb + model: anthropic/claude-3.7-sonnet:thinking + object: chat.completion + provider: Google + usage: + completion_tokens: 402 + prompt_tokens: 43 + total_tokens: 445 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_with_native_options.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_with_native_options.yaml new file mode 100644 index 0000000000..b073b87179 --- /dev/null +++ b/tests/models/cassettes/test_openrouter/test_openrouter_with_native_options.yaml @@ -0,0 +1,82 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '193' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Who are you + role: user + model: google/gemini-2.0-flash-exp:free + models: + - x-ai/grok-4 + provider: + only: + - xai + stream: false + transforms: + - middle-out + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-length: + - '1067' + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + choices: + - finish_reason: stop + index: 0 + logprobs: null + message: + content: |- + I'm Grok, a helpful and maximally truthful AI built by xAI. I'm not based on any other companies' models—instead, I'm inspired by the Hitchhiker's Guide to the Galaxy and JARVIS from Iron Man. My goal is to assist with questions, provide information, and maybe crack a joke or two along the way. + + What can I help you with today? + reasoning: null + refusal: null + role: assistant + native_finish_reason: stop + created: 1759509677 + id: gen-1759509677-MpJiZ3ZkiGU3lnbM8QKo + model: x-ai/grok-4 + object: chat.completion + provider: xAI + system_fingerprint: fp_19e21a36c0 + usage: + completion_tokens: 240 + completion_tokens_details: + reasoning_tokens: 165 + prompt_tokens: 687 + prompt_tokens_details: + audio_tokens: 0 + cached_tokens: 682 + total_tokens: 927 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_with_preset.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_with_preset.yaml new file mode 100644 index 0000000000..bd85de5b07 --- /dev/null +++ b/tests/models/cassettes/test_openrouter/test_openrouter_with_preset.yaml @@ -0,0 +1,75 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '131' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Trains + role: user + model: google/gemini-2.5-flash-lite + preset: '@preset/comedian' + stream: false + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-length: + - '617' + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + choices: + - finish_reason: stop + index: 0 + logprobs: null + message: + content: |- + Why did the train break up with the track? + + Because it felt like their relationship was going nowhere. + reasoning: null + refusal: null + role: assistant + native_finish_reason: STOP + created: 1759510642 + id: gen-1759510642-J9qupM2EtKoYTfG7ehDn + model: google/gemini-2.5-flash-lite + object: chat.completion + provider: Google + usage: + completion_tokens: 21 + completion_tokens_details: + image_tokens: 0 + reasoning_tokens: 0 + prompt_tokens: 31 + prompt_tokens_details: + cached_tokens: 0 + total_tokens: 52 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/cassettes/test_openrouter/test_openrouter_with_reasoning.yaml b/tests/models/cassettes/test_openrouter/test_openrouter_with_reasoning.yaml new file mode 100644 index 0000000000..6952863a80 --- /dev/null +++ b/tests/models/cassettes/test_openrouter/test_openrouter_with_reasoning.yaml @@ -0,0 +1,103 @@ +interactions: +- request: + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '92' + content-type: + - application/json + host: + - openrouter.ai + method: POST + parsed_body: + messages: + - content: Who are you + role: user + model: z-ai/glm-4.6 + stream: false + uri: https://openrouter.ai/api/v1/chat/completions + response: + headers: + access-control-allow-origin: + - '*' + connection: + - keep-alive + content-length: + - '3750' + content-type: + - application/json + permissions-policy: + - payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com" "https://js.stripe.com" "https://*.js.stripe.com" + "https://hooks.stripe.com") + referrer-policy: + - no-referrer, strict-origin-when-cross-origin + transfer-encoding: + - chunked + vary: + - Accept-Encoding + parsed_body: + choices: + - finish_reason: stop + index: 0 + logprobs: null + message: + content: |2- + + + I'm GLM, a large language model developed by Zhipu AI. I'm designed to have natural conversations, answer questions, and assist with various tasks through text-based interactions. I've been trained on a diverse range of data to help users with information and creative tasks. + + I continuously learn to improve my capabilities, though I don't store your personal data. Is there something specific you'd like to know about me or how I can help you today? + reasoning: |- + Let me process this query about who I am. First, I should consider what the user really wants to know - they're likely seeking to understand my identity and capabilities as an AI assistant. + + I need to be clear and accurate about my nature. I'm a GLM large language model developed by Zhipu AI, not a human. This distinction is fundamental to our interaction. + + Looking at my core functions, I should highlight my ability to engage in natural conversations, answer questions, and assist with various tasks. My training involves processing vast amounts of text data, which enables me to understand and generate human-like responses. + + It's important to mention my commitment to being helpful, harmless, and honest. These principles guide my interactions and ensure I provide appropriate assistance. + + I should also emphasize my continuous learning aspect. While I don't store personal data, I'm regularly updated to improve my capabilities and knowledge base. + + The response should be welcoming and encourage further questions about specific areas where I can help. This creates an open dialogue and shows my willingness to assist with various topics. + + Let me structure this information in a clear, friendly manner that addresses the user's question while inviting further interaction. + reasoning_details: + - format: unknown + index: 0 + text: |- + Let me process this query about who I am. First, I should consider what the user really wants to know - they're likely seeking to understand my identity and capabilities as an AI assistant. + + I need to be clear and accurate about my nature. I'm a GLM large language model developed by Zhipu AI, not a human. This distinction is fundamental to our interaction. + + Looking at my core functions, I should highlight my ability to engage in natural conversations, answer questions, and assist with various tasks. My training involves processing vast amounts of text data, which enables me to understand and generate human-like responses. + + It's important to mention my commitment to being helpful, harmless, and honest. These principles guide my interactions and ensure I provide appropriate assistance. + + I should also emphasize my continuous learning aspect. While I don't store personal data, I'm regularly updated to improve my capabilities and knowledge base. + + The response should be welcoming and encourage further questions about specific areas where I can help. This creates an open dialogue and shows my willingness to assist with various topics. + + Let me structure this information in a clear, friendly manner that addresses the user's question while inviting further interaction. + type: reasoning.text + refusal: null + role: assistant + native_finish_reason: stop + created: 1759944663 + id: gen-1759944663-AyClfEwG6WFB1puHZNXg + model: z-ai/glm-4.6 + object: chat.completion + provider: GMICloud + usage: + completion_tokens: 331 + prompt_tokens: 8 + prompt_tokens_details: null + total_tokens: 339 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/models/test_openrouter.py b/tests/models/test_openrouter.py new file mode 100644 index 0000000000..35119a5255 --- /dev/null +++ b/tests/models/test_openrouter.py @@ -0,0 +1,192 @@ +from typing import cast + +import pytest +from inline_snapshot import snapshot + +from pydantic_ai import ( + Agent, + ModelHTTPError, + ModelRequest, + TextPart, + ThinkingPart, + UnexpectedModelBehavior, +) +from pydantic_ai.direct import model_request + +from ..conftest import try_import + +with try_import() as imports_successful: + from openai.types.chat import ChatCompletion + from openai.types.chat.chat_completion import Choice + + from pydantic_ai.models.openrouter import OpenRouterModel, OpenRouterModelSettings + from pydantic_ai.providers.openrouter import OpenRouterProvider + +pytestmark = [ + pytest.mark.skipif(not imports_successful(), reason='openai not installed'), + pytest.mark.vcr, + pytest.mark.anyio, +] + + +async def test_openrouter_with_preset(allow_model_requests: None, openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('google/gemini-2.5-flash-lite', provider=provider) + settings = OpenRouterModelSettings(openrouter_preset='@preset/comedian') + response = await model_request(model, [ModelRequest.user_text_prompt('Trains')], model_settings=settings) + text_part = cast(TextPart, response.parts[0]) + assert text_part.content == snapshot( + """\ +Why did the train break up with the track? + +Because it felt like their relationship was going nowhere.\ +""" + ) + + +async def test_openrouter_with_native_options(allow_model_requests: None, openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('google/gemini-2.0-flash-exp:free', provider=provider) + # These specific settings will force OpenRouter to use the fallback model, since Gemini is not available via the xAI provider. + settings = OpenRouterModelSettings( + openrouter_models=['x-ai/grok-4'], + openrouter_transforms=['middle-out'], + openrouter_provider={'only': ['xai']}, + ) + response = await model_request(model, [ModelRequest.user_text_prompt('Who are you')], model_settings=settings) + text_part = cast(TextPart, response.parts[0]) + assert text_part.content == snapshot( + """\ +I'm Grok, a helpful and maximally truthful AI built by xAI. I'm not based on any other companies' models—instead, I'm inspired by the Hitchhiker's Guide to the Galaxy and JARVIS from Iron Man. My goal is to assist with questions, provide information, and maybe crack a joke or two along the way. + +What can I help you with today?\ +""" + ) + assert response.provider_details is not None + assert response.provider_details['downstream_provider'] == 'xAI' + assert response.provider_details['native_finish_reason'] == 'stop' + + +async def test_openrouter_with_reasoning(allow_model_requests: None, openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('z-ai/glm-4.6', provider=provider) + response = await model_request(model, [ModelRequest.user_text_prompt('Who are you')]) + + assert len(response.parts) == 2 + assert isinstance(thinking_part := response.parts[0], ThinkingPart) + assert isinstance(response.parts[1], TextPart) + assert thinking_part.content == snapshot( + """\ +Let me process this query about who I am. First, I should consider what the user really wants to know - they're likely seeking to understand my identity and capabilities as an AI assistant. + +I need to be clear and accurate about my nature. I'm a GLM large language model developed by Zhipu AI, not a human. This distinction is fundamental to our interaction. + +Looking at my core functions, I should highlight my ability to engage in natural conversations, answer questions, and assist with various tasks. My training involves processing vast amounts of text data, which enables me to understand and generate human-like responses. + +It's important to mention my commitment to being helpful, harmless, and honest. These principles guide my interactions and ensure I provide appropriate assistance. + +I should also emphasize my continuous learning aspect. While I don't store personal data, I'm regularly updated to improve my capabilities and knowledge base. + +The response should be welcoming and encourage further questions about specific areas where I can help. This creates an open dialogue and shows my willingness to assist with various topics. + +Let me structure this information in a clear, friendly manner that addresses the user's question while inviting further interaction.\ +""" + ) + assert thinking_part.openrouter_type == snapshot('reasoning.text') + assert thinking_part.openrouter_format == snapshot('unknown') + + +async def test_openrouter_errors_raised(allow_model_requests: None, openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('google/gemini-2.0-flash-exp:free', provider=provider) + agent = Agent(model, instructions='Be helpful.', retries=1) + with pytest.raises(ModelHTTPError) as exc_info: + await agent.run('Tell me a joke.') + assert str(exc_info.value) == snapshot( + "status_code: 429, model_name: google/gemini-2.0-flash-exp:free, body: {'code': 429, 'message': 'Provider returned error', 'metadata': {'provider_name': 'Google', 'raw': 'google/gemini-2.0-flash-exp:free is temporarily rate-limited upstream. Please retry shortly, or add your own key to accumulate your rate limits: https://openrouter.ai/settings/integrations'}}" + ) + + +async def test_openrouter_validate_non_json_response(openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('google/gemini-2.0-flash-exp:free', provider=provider) + + with pytest.raises(UnexpectedModelBehavior) as exc_info: + model._process_response('This is not JSON!') # type: ignore[reportPrivateUsage] + + assert str(exc_info.value) == snapshot( + 'Invalid response from OpenRouter chat completions endpoint, expected JSON data' + ) + + +async def test_openrouter_validate_error_response(openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('google/gemini-2.0-flash-exp:free', provider=provider) + + choice = Choice.model_construct( + index=0, message={'role': 'assistant'}, finish_reason='error', native_finish_reason='stop' + ) + response = ChatCompletion.model_construct( + id='', choices=[choice], created=0, object='chat.completion', model='test', provider='test' + ) + response.error = {'message': 'This response has an error attribute', 'code': 200} # type: ignore[reportAttributeAccessIssue] + + with pytest.raises(ModelHTTPError) as exc_info: + model._process_response(response) # type: ignore[reportPrivateUsage] + + assert str(exc_info.value) == snapshot( + 'status_code: 200, model_name: test, body: This response has an error attribute' + ) + + +async def test_openrouter_validate_error_finish_reason(openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('google/gemini-2.0-flash-exp:free', provider=provider) + + choice = Choice.model_construct( + index=0, message={'role': 'assistant'}, finish_reason='error', native_finish_reason='stop' + ) + response = ChatCompletion.model_construct( + id='', choices=[choice], created=0, object='chat.completion', model='test', provider='test' + ) + + with pytest.raises(UnexpectedModelBehavior) as exc_info: + model._process_response(response) # type: ignore[reportPrivateUsage] + + assert str(exc_info.value) == snapshot( + 'Invalid response from OpenRouter chat completions endpoint, error finish_reason without error data' + ) + + +async def test_openrouter_map_messages_reasoning(allow_model_requests: None, openrouter_api_key: str) -> None: + provider = OpenRouterProvider(api_key=openrouter_api_key) + model = OpenRouterModel('anthropic/claude-3.7-sonnet:thinking', provider=provider) + + user_message = ModelRequest.user_text_prompt('Who are you. Think about it.') + response = await model_request(model, [user_message]) + + mapped_messages = await model._map_messages([user_message, response]) # type: ignore[reportPrivateUsage] + + assert len(mapped_messages) == 2 + assert mapped_messages[1]['reasoning_details'] == snapshot( # type: ignore[reportGeneralTypeIssues] + [ + { + 'id': None, + 'type': 'reasoning.text', + 'text': """\ +This question is asking me about my identity. Let me think about how to respond clearly and accurately. + +I am Claude, an AI assistant created by Anthropic. I'm designed to be helpful, harmless, and honest in my interactions with humans. I don't have a physical form - I exist as a large language model running on computer hardware. I don't have consciousness, sentience, or feelings in the way humans do. I don't have personal experiences or a life outside of these conversations. + +My capabilities include understanding and generating natural language text, reasoning about various topics, and attempting to be helpful to users in a wide range of contexts. I have been trained on a large corpus of text data, but my training data has a cutoff date, so I don't have knowledge of events that occurred after my training. + +I have certain limitations - I don't have the ability to access the internet, run code, or interact with external systems unless given specific tools to do so. I don't have perfect knowledge and can make mistakes. + +I'm designed to be conversational and to engage with users in a way that's helpful and informative, while respecting important ethical boundaries.\ +""", + 'signature': 'ErcBCkgICBACGAIiQHtMxpqcMhnwgGUmSDWGoOL9ZHTbDKjWnhbFm0xKzFl0NmXFjQQxjFj5mieRYY718fINsJMGjycTVYeiu69npakSDDrsnKYAD/fdcpI57xoMHlQBxI93RMa5CSUZIjAFVCMQF5GfLLQCibyPbb7LhZ4kLIFxw/nqsTwDDt6bx3yipUcq7G7eGts8MZ6LxOYqHTlIDx0tfHRIlkkcNCdB2sUeMqP8e7kuQqIHoD52GAI=', + 'format': 'anthropic-claude-v1', + 'index': 0, + } + ] + ) diff --git a/tests/providers/test_openrouter.py b/tests/providers/test_openrouter.py index acdf166c50..a070b936b7 100644 --- a/tests/providers/test_openrouter.py +++ b/tests/providers/test_openrouter.py @@ -25,7 +25,7 @@ with try_import() as imports_successful: import openai - from pydantic_ai.models.openai import OpenAIChatModel + from pydantic_ai.models.openrouter import OpenRouterModel from pydantic_ai.providers.openrouter import OpenRouterProvider @@ -44,6 +44,16 @@ def test_openrouter_provider(): assert provider.client.api_key == 'api-key' +def test_openrouter_provider_with_app_attribution(): + provider = OpenRouterProvider(api_key='api-key', http_referer='test.com', x_title='test') + assert provider.name == 'openrouter' + assert provider.base_url == 'https://openrouter.ai/api/v1' + assert isinstance(provider.client, openai.AsyncOpenAI) + assert provider.client.api_key == 'api-key' + assert provider.client.default_headers['X-Title'] == 'test' + assert provider.client.default_headers['HTTP-Referer'] == 'test.com' + + def test_openrouter_provider_need_api_key(env: TestEnv) -> None: env.remove('OPENROUTER_API_KEY') with pytest.raises( @@ -70,7 +80,7 @@ def test_openrouter_pass_openai_client() -> None: async def test_openrouter_with_google_model(allow_model_requests: None, openrouter_api_key: str) -> None: provider = OpenRouterProvider(api_key=openrouter_api_key) - model = OpenAIChatModel('google/gemini-2.0-flash-exp:free', provider=provider) + model = OpenRouterModel('google/gemini-2.0-flash-exp:free', provider=provider) agent = Agent(model, instructions='Be helpful.') response = await agent.run('Tell me a joke.') assert response.output == snapshot("""\