Add support for aspect ratio in gemini image generation (#3672)

ajac-zero · mwildehahn · DouweM · web-flow · commit f6d1152b04db · 2025-12-09T18:05:19.000-06:00
Co-authored-by: Michael Hahn &lt;mwhahn@gmail.com&gt;
Co-authored-by: Douwe Maan &lt;douwe@pydantic.dev&gt;
diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md
@@ -243,7 +243,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl
 | Provider | Supported | Notes |
 |----------|-----------|-------|
 | OpenAI Responses | ✅ | Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. |
-| Google | ✅ | No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image` and `gemini-3-pro-image-preview`. These models do not support [function tools](tools.md). These models will always have the option of generating images, even if this built-in tool is not explicitly specified. |
+| Google | ✅ | Limited parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image` and `gemini-3-pro-image-preview`. These models do not support [function tools](tools.md) and will always have the option of generating images, even if this built-in tool is not explicitly specified. |
 | Anthropic | ❌ | |
 | Groq | ❌ | |
 | Bedrock | ❌ | |
@@ -332,6 +332,27 @@ assert isinstance(result.output, BinaryImage)
 
 _(This example is complete, it can be run "as is")_
 
+OpenAI Responses models also respect the `aspect_ratio` parameter. Because the OpenAI API only exposes discrete image sizes,
+Pydantic AI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio
+results in an error, and if you also set `size` it must match the computed value.
+
+To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly:
+
+```py {title="image_generation_google_aspect_ratio.py"}
+from pydantic_ai import Agent, BinaryImage, ImageGenerationTool
+
+agent = Agent(
+    'google-gla:gemini-2.5-flash-image',
+    builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')],
+    output_type=BinaryImage,
+)
+
+result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.')
+assert isinstance(result.output, BinaryImage)
+```
+
+_(This example is complete, it can be run "as is")_
+
 For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].
 
 #### Provider Support
@@ -346,6 +367,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
 | `partial_images` | ✅ | ❌ |
 | `quality` | ✅ | ❌ |
 | `size` | ✅ | ❌ |
+| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ |
 
 ## Web Fetch Tool
 
diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py
@@ -22,6 +22,9 @@
 
 _BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {}
 
+ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5']
+"""Supported aspect ratios for image generation tools."""
+
 
 @dataclass(kw_only=True)
 class AbstractBuiltinTool(ABC):
@@ -316,6 +319,15 @@ class ImageGenerationTool(AbstractBuiltinTool):
     * OpenAI Responses
     """
 
+    aspect_ratio: ImageAspectRatio | None = None
+    """The aspect ratio to use for generated images.
+
+    Supported by:
+
+    * Google image-generation models (Gemini)
+    * OpenAI Responses (maps '1:1', '2:3', and '3:2' to supported sizes)
+    """
+
     kind: str = 'image_generation'
     """The kind of tool."""
 
diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -74,6 +74,7 @@
         GoogleSearchDict,
         GroundingMetadata,
         HttpOptionsDict,
+        ImageConfigDict,
         MediaResolution,
         Modality,
         Part,
@@ -335,12 +336,16 @@ async def request_stream(
         response = await self._generate_content(messages, True, model_settings, model_request_parameters)
         yield await self._process_streamed_response(response, model_request_parameters)  # type: ignore
 
-    def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None:
+    def _get_tools(
+        self, model_request_parameters: ModelRequestParameters
+    ) -> tuple[list[ToolDict] | None, ImageConfigDict | None]:
         tools: list[ToolDict] = [
             ToolDict(function_declarations=[_function_declaration_from_tool(t)])
             for t in model_request_parameters.tool_defs.values()
         ]
 
+        image_config: ImageConfigDict | None = None
+
         if model_request_parameters.builtin_tools:
             if model_request_parameters.function_tools:
                 raise UserError('Google does not support function tools and built-in tools at the same time.')
@@ -357,11 +362,13 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T
                         raise UserError(
                             "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
                         )
+                    if tool.aspect_ratio:
+                        image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
                 else:  # pragma: no cover
                     raise UserError(
                         f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
                     )
-        return tools or None
+        return tools or None, image_config
 
     def _get_tool_config(
         self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None
@@ -420,7 +427,7 @@ async def _build_content_and_config(
         model_settings: GoogleModelSettings,
         model_request_parameters: ModelRequestParameters,
     ) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]:
-        tools = self._get_tools(model_request_parameters)
+        tools, image_config = self._get_tools(model_request_parameters)
         if model_request_parameters.function_tools and not self.profile.supports_tools:
             raise UserError('Tools are not supported by this model.')
 
@@ -476,7 +483,9 @@ async def _build_content_and_config(
             response_mime_type=response_mime_type,
             response_json_schema=response_schema,
             response_modalities=modalities,
+            image_config=image_config,
         )
+
         return contents, config
 
     def _process_response(self, response: GenerateContentResponse) -> ModelResponse:
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -19,7 +19,7 @@
 from .._run_context import RunContext
 from .._thinking_part import split_content_into_text_and_thinking
 from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime
-from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, MCPServerTool, WebSearchTool
+from ..builtin_tools import CodeExecutionTool, ImageAspectRatio, ImageGenerationTool, MCPServerTool, WebSearchTool
 from ..exceptions import UserError
 from ..messages import (
     AudioUrl,
@@ -159,6 +159,36 @@
     'failed': 'error',
 }
 
+_OPENAI_ASPECT_RATIO_TO_SIZE: dict[ImageAspectRatio, Literal['1024x1024', '1024x1536', '1536x1024']] = {
+    '1:1': '1024x1024',
+    '2:3': '1024x1536',
+    '3:2': '1536x1024',
+}
+
+
+def _resolve_openai_image_generation_size(
+    tool: ImageGenerationTool,
+) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']:
+    """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided."""
+    aspect_ratio = tool.aspect_ratio
+    if aspect_ratio is None:
+        return tool.size
+
+    mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio)
+    if mapped_size is None:
+        supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE)
+        raise UserError(
+            f'OpenAI image generation only supports `aspect_ratio` values: {supported}. '
+            'Specify one of those values or omit `aspect_ratio`.'
+        )
+
+    if tool.size not in ('auto', mapped_size):
+        raise UserError(
+            '`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.'
+        )
+
+    return mapped_size
+
 
 class OpenAIChatModelSettings(ModelSettings, total=False):
     """Settings used for an OpenAI model request."""
@@ -1469,6 +1499,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -
                 tools.append(mcp_tool)
             elif isinstance(tool, ImageGenerationTool):  # pragma: no branch
                 has_image_generating_tool = True
+                size = _resolve_openai_image_generation_size(tool)
                 tools.append(
                     responses.tool_param.ImageGeneration(
                         type='image_generation',
@@ -1479,7 +1510,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -
                         output_format=tool.output_format or 'png',
                         partial_images=tool.partial_images,
                         quality=tool.quality,
-                        size=tool.size,
+                        size=size,
                     )
                 )
             else:
diff --git a/tests/models/test_google.py b/tests/models/test_google.py
@@ -3600,6 +3600,15 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p
         await agent.run('Generate an image of an axolotl.')
 
 
+async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None:
+    model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
+    params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')])
+
+    tools, image_config = model._get_tools(params)  # pyright: ignore[reportPrivateUsage]
+    assert tools is None
+    assert image_config == {'aspect_ratio': '16:9'}
+
+
 async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
     model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)
 
diff --git a/tests/models/test_model_request_parameters.py b/tests/models/test_model_request_parameters.py
@@ -98,6 +98,7 @@ def test_model_request_parameters_are_serializable():
                     'partial_images': 0,
                     'quality': 'auto',
                     'size': '1024x1024',
+                    'aspect_ratio': None,
                 },
                 {'kind': 'memory'},
                 {
diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py
@@ -1,7 +1,7 @@
 import json
 import re
 from dataclasses import replace
-from typing import Any, cast
+from typing import Any, Literal, cast
 
 import pytest
 from inline_snapshot import snapshot
@@ -32,17 +32,19 @@
     ToolCallPartDelta,
     ToolReturnPart,
     UnexpectedModelBehavior,
+    UserError,
     UserPromptPart,
     capture_run_messages,
 )
 from pydantic_ai.agent import Agent
-from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, WebSearchTool
+from pydantic_ai.builtin_tools import CodeExecutionTool, ImageAspectRatio, MCPServerTool, WebSearchTool
 from pydantic_ai.exceptions import ModelHTTPError, ModelRetry
 from pydantic_ai.messages import (
     BuiltinToolCallEvent,  # pyright: ignore[reportDeprecated]
     BuiltinToolResultEvent,  # pyright: ignore[reportDeprecated]
 )
 from pydantic_ai.models import ModelRequestParameters
+from pydantic_ai.models.openai import _resolve_openai_image_generation_size  # pyright: ignore[reportPrivateUsage]
 from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
 from pydantic_ai.profiles.openai import openai_model_profile
 from pydantic_ai.tools import ToolDefinition
@@ -128,6 +130,37 @@ async def test_openai_responses_image_detail_vendor_metadata(allow_model_request
     assert all(part['detail'] == 'high' for part in image_parts)
 
 
+@pytest.mark.parametrize(
+    ('aspect_ratio', 'explicit_size', 'expected_size'),
+    [
+        ('1:1', 'auto', '1024x1024'),
+        ('2:3', '1024x1536', '1024x1536'),
+        ('3:2', 'auto', '1536x1024'),
+    ],
+)
+def test_openai_responses_image_generation_tool_aspect_ratio_mapping(
+    aspect_ratio: ImageAspectRatio,
+    explicit_size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'],
+    expected_size: Literal['1024x1024', '1024x1536', '1536x1024'],
+) -> None:
+    tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=explicit_size)
+    assert _resolve_openai_image_generation_size(tool) == expected_size
+
+
+def test_openai_responses_image_generation_tool_aspect_ratio_invalid() -> None:
+    tool = ImageGenerationTool(aspect_ratio='16:9')
+
+    with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'):
+        _resolve_openai_image_generation_size(tool)
+
+
+def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size() -> None:
+    tool = ImageGenerationTool(aspect_ratio='1:1', size='1536x1024')
+
+    with pytest.raises(UserError, match='cannot combine `aspect_ratio` with a conflicting `size`'):
+        _resolve_openai_image_generation_size(tool)
+
+
 async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str):
     model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
 
diff --git a/tests/test_examples.py b/tests/test_examples.py
@@ -692,6 +692,12 @@ async def model_logic(  # noqa: C901
                     FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')),
                 ]
             )
+        elif m.content == 'Generate a wide illustration of an axolotl city skyline.':
+            return ModelResponse(
+                parts=[
+                    FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),
+                ]
+            )
         elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':
             return ModelResponse(
                 parts=[

Original file line number	Diff line number	Diff line change
`@@ -98,6 +98,7 @@ def test_model_request_parameters_are_serializable():`
`98`	`98`	`'partial_images': 0,`
`99`	`99`	`'quality': 'auto',`
`100`	`100`	`'size': '1024x1024',`
	`101`	`+ 'aspect_ratio': None,`
`101`	`102`	`},`
`102`	`103`	`{'kind': 'memory'},`
`103`	`104`	`{`
Original file line number	Diff line number	Diff line change
`@@ -692,6 +692,12 @@ async def model_logic( # noqa: C901`
`692`	`692`	`FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')),`
`693`	`693`	`]`
`694`	`694`	`)`
	`695`	`+ elif m.content == 'Generate a wide illustration of an axolotl city skyline.':`
	`696`	`+ return ModelResponse(`
	`697`	`+ parts=[`
	`698`	`+ FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),`
	`699`	`+ ]`
	`700`	`+ )`
`695`	`701`	`elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':`
`696`	`702`	`return ModelResponse(`
`697`	`703`	`parts=[`