Skip to content

Commit f6d1152

Browse files
ajac-zeromwildehahnDouweM
authored
Add support for aspect ratio in gemini image generation (#3672)
Co-authored-by: Michael Hahn <mwhahn@gmail.com> Co-authored-by: Douwe Maan <douwe@pydantic.dev>
1 parent 34314b3 commit f6d1152

File tree

8 files changed

+131
-8
lines changed

8 files changed

+131
-8
lines changed

docs/builtin-tools.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ The [`ImageGenerationTool`][pydantic_ai.builtin_tools.ImageGenerationTool] enabl
243243
| Provider | Supported | Notes |
244244
|----------|-----------|-------|
245245
| OpenAI Responses || Full feature support. Only supported by models newer than `gpt-5`. Metadata about the generated image, like the [`revised_prompt`](https://platform.openai.com/docs/guides/tools-image-generation#revised-prompt) sent to the underlying image model, is available on the [`BuiltinToolReturnPart`][pydantic_ai.messages.BuiltinToolReturnPart] that's available via [`ModelResponse.builtin_tool_calls`][pydantic_ai.messages.ModelResponse.builtin_tool_calls]. |
246-
| Google || No parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image` and `gemini-3-pro-image-preview`. These models do not support [function tools](tools.md). These models will always have the option of generating images, even if this built-in tool is not explicitly specified. |
246+
| Google || Limited parameter support. Only supported by [image generation models](https://ai.google.dev/gemini-api/docs/image-generation) like `gemini-2.5-flash-image` and `gemini-3-pro-image-preview`. These models do not support [function tools](tools.md) and will always have the option of generating images, even if this built-in tool is not explicitly specified. |
247247
| Anthropic || |
248248
| Groq || |
249249
| Bedrock || |
@@ -332,6 +332,27 @@ assert isinstance(result.output, BinaryImage)
332332

333333
_(This example is complete, it can be run "as is")_
334334

335+
OpenAI Responses models also respect the `aspect_ratio` parameter. Because the OpenAI API only exposes discrete image sizes,
336+
Pydantic AI maps `'1:1'` -> `1024x1024`, `'2:3'` -> `1024x1536`, and `'3:2'` -> `1536x1024`. Providing any other aspect ratio
337+
results in an error, and if you also set `size` it must match the computed value.
338+
339+
To control the aspect ratio when using Gemini image models, include the `ImageGenerationTool` explicitly:
340+
341+
```py {title="image_generation_google_aspect_ratio.py"}
342+
from pydantic_ai import Agent, BinaryImage, ImageGenerationTool
343+
344+
agent = Agent(
345+
'google-gla:gemini-2.5-flash-image',
346+
builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')],
347+
output_type=BinaryImage,
348+
)
349+
350+
result = agent.run_sync('Generate a wide illustration of an axolotl city skyline.')
351+
assert isinstance(result.output, BinaryImage)
352+
```
353+
354+
_(This example is complete, it can be run "as is")_
355+
335356
For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].
336357

337358
#### Provider Support
@@ -346,6 +367,7 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
346367
| `partial_images` |||
347368
| `quality` |||
348369
| `size` |||
370+
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) ||
349371

350372
## Web Fetch Tool
351373

pydantic_ai_slim/pydantic_ai/builtin_tools.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222

2323
_BUILTIN_TOOL_TYPES: dict[str, type[AbstractBuiltinTool]] = {}
2424

25+
ImageAspectRatio = Literal['21:9', '16:9', '4:3', '3:2', '1:1', '9:16', '3:4', '2:3', '5:4', '4:5']
26+
"""Supported aspect ratios for image generation tools."""
27+
2528

2629
@dataclass(kw_only=True)
2730
class AbstractBuiltinTool(ABC):
@@ -316,6 +319,15 @@ class ImageGenerationTool(AbstractBuiltinTool):
316319
* OpenAI Responses
317320
"""
318321

322+
aspect_ratio: ImageAspectRatio | None = None
323+
"""The aspect ratio to use for generated images.
324+
325+
Supported by:
326+
327+
* Google image-generation models (Gemini)
328+
* OpenAI Responses (maps '1:1', '2:3', and '3:2' to supported sizes)
329+
"""
330+
319331
kind: str = 'image_generation'
320332
"""The kind of tool."""
321333

pydantic_ai_slim/pydantic_ai/models/google.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@
7474
GoogleSearchDict,
7575
GroundingMetadata,
7676
HttpOptionsDict,
77+
ImageConfigDict,
7778
MediaResolution,
7879
Modality,
7980
Part,
@@ -335,12 +336,16 @@ async def request_stream(
335336
response = await self._generate_content(messages, True, model_settings, model_request_parameters)
336337
yield await self._process_streamed_response(response, model_request_parameters) # type: ignore
337338

338-
def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[ToolDict] | None:
339+
def _get_tools(
340+
self, model_request_parameters: ModelRequestParameters
341+
) -> tuple[list[ToolDict] | None, ImageConfigDict | None]:
339342
tools: list[ToolDict] = [
340343
ToolDict(function_declarations=[_function_declaration_from_tool(t)])
341344
for t in model_request_parameters.tool_defs.values()
342345
]
343346

347+
image_config: ImageConfigDict | None = None
348+
344349
if model_request_parameters.builtin_tools:
345350
if model_request_parameters.function_tools:
346351
raise UserError('Google does not support function tools and built-in tools at the same time.')
@@ -357,11 +362,13 @@ def _get_tools(self, model_request_parameters: ModelRequestParameters) -> list[T
357362
raise UserError(
358363
"`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
359364
)
365+
if tool.aspect_ratio:
366+
image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)
360367
else: # pragma: no cover
361368
raise UserError(
362369
f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
363370
)
364-
return tools or None
371+
return tools or None, image_config
365372

366373
def _get_tool_config(
367374
self, model_request_parameters: ModelRequestParameters, tools: list[ToolDict] | None
@@ -420,7 +427,7 @@ async def _build_content_and_config(
420427
model_settings: GoogleModelSettings,
421428
model_request_parameters: ModelRequestParameters,
422429
) -> tuple[list[ContentUnionDict], GenerateContentConfigDict]:
423-
tools = self._get_tools(model_request_parameters)
430+
tools, image_config = self._get_tools(model_request_parameters)
424431
if model_request_parameters.function_tools and not self.profile.supports_tools:
425432
raise UserError('Tools are not supported by this model.')
426433

@@ -476,7 +483,9 @@ async def _build_content_and_config(
476483
response_mime_type=response_mime_type,
477484
response_json_schema=response_schema,
478485
response_modalities=modalities,
486+
image_config=image_config,
479487
)
488+
480489
return contents, config
481490

482491
def _process_response(self, response: GenerateContentResponse) -> ModelResponse:

pydantic_ai_slim/pydantic_ai/models/openai.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from .._run_context import RunContext
2020
from .._thinking_part import split_content_into_text_and_thinking
2121
from .._utils import guard_tool_call_id as _guard_tool_call_id, now_utc as _now_utc, number_to_datetime
22-
from ..builtin_tools import CodeExecutionTool, ImageGenerationTool, MCPServerTool, WebSearchTool
22+
from ..builtin_tools import CodeExecutionTool, ImageAspectRatio, ImageGenerationTool, MCPServerTool, WebSearchTool
2323
from ..exceptions import UserError
2424
from ..messages import (
2525
AudioUrl,
@@ -159,6 +159,36 @@
159159
'failed': 'error',
160160
}
161161

162+
_OPENAI_ASPECT_RATIO_TO_SIZE: dict[ImageAspectRatio, Literal['1024x1024', '1024x1536', '1536x1024']] = {
163+
'1:1': '1024x1024',
164+
'2:3': '1024x1536',
165+
'3:2': '1536x1024',
166+
}
167+
168+
169+
def _resolve_openai_image_generation_size(
170+
tool: ImageGenerationTool,
171+
) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']:
172+
"""Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided."""
173+
aspect_ratio = tool.aspect_ratio
174+
if aspect_ratio is None:
175+
return tool.size
176+
177+
mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio)
178+
if mapped_size is None:
179+
supported = ', '.join(_OPENAI_ASPECT_RATIO_TO_SIZE)
180+
raise UserError(
181+
f'OpenAI image generation only supports `aspect_ratio` values: {supported}. '
182+
'Specify one of those values or omit `aspect_ratio`.'
183+
)
184+
185+
if tool.size not in ('auto', mapped_size):
186+
raise UserError(
187+
'`ImageGenerationTool` cannot combine `aspect_ratio` with a conflicting `size` when using OpenAI.'
188+
)
189+
190+
return mapped_size
191+
162192

163193
class OpenAIChatModelSettings(ModelSettings, total=False):
164194
"""Settings used for an OpenAI model request."""
@@ -1469,6 +1499,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -
14691499
tools.append(mcp_tool)
14701500
elif isinstance(tool, ImageGenerationTool): # pragma: no branch
14711501
has_image_generating_tool = True
1502+
size = _resolve_openai_image_generation_size(tool)
14721503
tools.append(
14731504
responses.tool_param.ImageGeneration(
14741505
type='image_generation',
@@ -1479,7 +1510,7 @@ def _get_builtin_tools(self, model_request_parameters: ModelRequestParameters) -
14791510
output_format=tool.output_format or 'png',
14801511
partial_images=tool.partial_images,
14811512
quality=tool.quality,
1482-
size=tool.size,
1513+
size=size,
14831514
)
14841515
)
14851516
else:

tests/models/test_google.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3600,6 +3600,15 @@ async def test_google_image_generation_tool(allow_model_requests: None, google_p
36003600
await agent.run('Generate an image of an axolotl.')
36013601

36023602

3603+
async def test_google_image_generation_tool_aspect_ratio(google_provider: GoogleProvider) -> None:
3604+
model = GoogleModel('gemini-2.5-flash-image', provider=google_provider)
3605+
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9')])
3606+
3607+
tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
3608+
assert tools is None
3609+
assert image_config == {'aspect_ratio': '16:9'}
3610+
3611+
36033612
async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
36043613
model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)
36053614

tests/models/test_model_request_parameters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def test_model_request_parameters_are_serializable():
9898
'partial_images': 0,
9999
'quality': 'auto',
100100
'size': '1024x1024',
101+
'aspect_ratio': None,
101102
},
102103
{'kind': 'memory'},
103104
{

tests/models/test_openai_responses.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
22
import re
33
from dataclasses import replace
4-
from typing import Any, cast
4+
from typing import Any, Literal, cast
55

66
import pytest
77
from inline_snapshot import snapshot
@@ -32,17 +32,19 @@
3232
ToolCallPartDelta,
3333
ToolReturnPart,
3434
UnexpectedModelBehavior,
35+
UserError,
3536
UserPromptPart,
3637
capture_run_messages,
3738
)
3839
from pydantic_ai.agent import Agent
39-
from pydantic_ai.builtin_tools import CodeExecutionTool, MCPServerTool, WebSearchTool
40+
from pydantic_ai.builtin_tools import CodeExecutionTool, ImageAspectRatio, MCPServerTool, WebSearchTool
4041
from pydantic_ai.exceptions import ModelHTTPError, ModelRetry
4142
from pydantic_ai.messages import (
4243
BuiltinToolCallEvent, # pyright: ignore[reportDeprecated]
4344
BuiltinToolResultEvent, # pyright: ignore[reportDeprecated]
4445
)
4546
from pydantic_ai.models import ModelRequestParameters
47+
from pydantic_ai.models.openai import _resolve_openai_image_generation_size # pyright: ignore[reportPrivateUsage]
4648
from pydantic_ai.output import NativeOutput, PromptedOutput, TextOutput, ToolOutput
4749
from pydantic_ai.profiles.openai import openai_model_profile
4850
from pydantic_ai.tools import ToolDefinition
@@ -128,6 +130,37 @@ async def test_openai_responses_image_detail_vendor_metadata(allow_model_request
128130
assert all(part['detail'] == 'high' for part in image_parts)
129131

130132

133+
@pytest.mark.parametrize(
134+
('aspect_ratio', 'explicit_size', 'expected_size'),
135+
[
136+
('1:1', 'auto', '1024x1024'),
137+
('2:3', '1024x1536', '1024x1536'),
138+
('3:2', 'auto', '1536x1024'),
139+
],
140+
)
141+
def test_openai_responses_image_generation_tool_aspect_ratio_mapping(
142+
aspect_ratio: ImageAspectRatio,
143+
explicit_size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'],
144+
expected_size: Literal['1024x1024', '1024x1536', '1536x1024'],
145+
) -> None:
146+
tool = ImageGenerationTool(aspect_ratio=aspect_ratio, size=explicit_size)
147+
assert _resolve_openai_image_generation_size(tool) == expected_size
148+
149+
150+
def test_openai_responses_image_generation_tool_aspect_ratio_invalid() -> None:
151+
tool = ImageGenerationTool(aspect_ratio='16:9')
152+
153+
with pytest.raises(UserError, match='OpenAI image generation only supports `aspect_ratio` values'):
154+
_resolve_openai_image_generation_size(tool)
155+
156+
157+
def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size() -> None:
158+
tool = ImageGenerationTool(aspect_ratio='1:1', size='1536x1024')
159+
160+
with pytest.raises(UserError, match='cannot combine `aspect_ratio` with a conflicting `size`'):
161+
_resolve_openai_image_generation_size(tool)
162+
163+
131164
async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str):
132165
model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
133166

tests/test_examples.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,12 @@ async def model_logic( # noqa: C901
692692
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='160d47')),
693693
]
694694
)
695+
elif m.content == 'Generate a wide illustration of an axolotl city skyline.':
696+
return ModelResponse(
697+
parts=[
698+
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),
699+
]
700+
)
695701
elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':
696702
return ModelResponse(
697703
parts=[

0 commit comments

Comments
 (0)