From c5b45fb10e65834e358277f7179e1390796aa4d0 Mon Sep 17 00:00:00 2001 From: Alexandros Pappas Date: Thu, 9 Oct 2025 18:09:58 +0200 Subject: [PATCH] feat: add OpenAI gpt-image-1 and gpt-image-1-mini models with new parameters Add support for OpenAI's new GPT Image models (gpt-image-1 and gpt-image-1-mini) with all new model-specific parameters according to the official API specification. Changes: - Add GPT_IMAGE_1 and GPT_IMAGE_1_MINI to ImageModel enum - Update default image model from DALL_E_3 to GPT_IMAGE_1_MINI - Add 6 new gpt-image-1 specific parameters to OpenAiImageRequest: * background: transparency control (transparent/opaque/auto) * moderation: content moderation level (low/auto) * outputCompression: compression level 0-100% for webp/jpeg * outputFormat: output format (png/jpeg/webp) * partialImages: streaming partial images support (0-3) * stream: enable streaming mode - Update OpenAiImageOptions with new fields, getters/setters, and builder methods - Update documentation to reflect model-specific parameter support - Add comprehensive integration tests (OpenAiImageApiIT) with parameterized tests - Update existing tests to use new default model Breaking Changes: - OpenAiImageRequest constructor signature updated with 6 new parameters Reference: https://platform.openai.com/docs/models Signed-off-by: Alexandros Pappas --- .../OpenAiImageAutoConfiguration.java | 21 +- .../OpenAiImageAutoConfigurationIT.java | 96 +++++++ .../autoconfigure/OpenAiPropertiesTests.java | 40 +++ models/spring-ai-openai/pom.xml | 5 + .../ai/openai/OpenAiImageModel.java | 63 ++++- .../ai/openai/OpenAiImageOptions.java | 174 +++++++++++- .../ai/openai/api/OpenAiImageApi.java | 151 ++++++++++- .../image/OpenAiImageModelStreamingIT.java | 245 +++++++++++++++++ .../ai/openai/image/api/OpenAiImageApiIT.java | 255 ++++++++++++++++++ .../image/api/OpenAiImageApiStreamingIT.java | 152 +++++++++++ .../ROOT/pages/api/image/openai-image.adoc | 112 +++++++- .../modules/ROOT/pages/api/imageclient.adoc | 32 +++ .../ai/image/StreamingImageModel.java | 69 +++++ 13 files changed, 1369 insertions(+), 46 deletions(-) create mode 100644 auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfigurationIT.java create mode 100644 models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageModelStreamingIT.java create mode 100644 models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiIT.java create mode 100644 models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiStreamingIT.java create mode 100644 spring-ai-model/src/main/java/org/springframework/ai/image/StreamingImageModel.java diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfiguration.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfiguration.java index 07da6969a70..80fea3b6a45 100644 --- a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfiguration.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/main/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfiguration.java @@ -23,7 +23,6 @@ import org.springframework.ai.model.SpringAIModelProperties; import org.springframework.ai.model.SpringAIModels; import org.springframework.ai.openai.OpenAiImageModel; -import org.springframework.ai.openai.api.OpenAiApi; import org.springframework.ai.openai.api.OpenAiImageApi; import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; import org.springframework.beans.factory.ObjectProvider; @@ -38,6 +37,7 @@ import org.springframework.retry.support.RetryTemplate; import org.springframework.web.client.ResponseErrorHandler; import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; import static org.springframework.ai.model.openai.autoconfigure.OpenAIAutoConfigurationUtil.resolveConnectionProperties; @@ -53,7 +53,7 @@ */ @AutoConfiguration(after = { RestClientAutoConfiguration.class, WebClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class }) -@ConditionalOnClass(OpenAiApi.class) +@ConditionalOnClass(OpenAiImageApi.class) @ConditionalOnProperty(name = SpringAIModelProperties.IMAGE_MODEL, havingValue = SpringAIModels.OPENAI, matchIfMissing = true) @EnableConfigurationProperties({ OpenAiConnectionProperties.class, OpenAiImageProperties.class }) @@ -61,23 +61,30 @@ public class OpenAiImageAutoConfiguration { @Bean @ConditionalOnMissingBean - public OpenAiImageModel openAiImageModel(OpenAiConnectionProperties commonProperties, + public OpenAiImageApi openAiImageApi(OpenAiConnectionProperties commonProperties, OpenAiImageProperties imageProperties, ObjectProvider restClientBuilderProvider, - RetryTemplate retryTemplate, ResponseErrorHandler responseErrorHandler, - ObjectProvider observationRegistry, - ObjectProvider observationConvention) { + ObjectProvider webClientBuilderProvider, ResponseErrorHandler responseErrorHandler) { OpenAIAutoConfigurationUtil.ResolvedConnectionProperties resolved = resolveConnectionProperties( commonProperties, imageProperties, "image"); - var openAiImageApi = OpenAiImageApi.builder() + return OpenAiImageApi.builder() .baseUrl(resolved.baseUrl()) .apiKey(new SimpleApiKey(resolved.apiKey())) .headers(resolved.headers()) .imagesPath(imageProperties.getImagesPath()) .restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder)) + .webClientBuilder(webClientBuilderProvider.getIfAvailable(WebClient::builder)) .responseErrorHandler(responseErrorHandler) .build(); + } + + @Bean + @ConditionalOnMissingBean + public OpenAiImageModel openAiImageModel(OpenAiImageApi openAiImageApi, OpenAiImageProperties imageProperties, + RetryTemplate retryTemplate, ObjectProvider observationRegistry, + ObjectProvider observationConvention) { + var imageModel = new OpenAiImageModel(openAiImageApi, imageProperties.getOptions(), retryTemplate, observationRegistry.getIfUnique(() -> ObservationRegistry.NOOP)); diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfigurationIT.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfigurationIT.java new file mode 100644 index 00000000000..af36736f1c1 --- /dev/null +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiImageAutoConfigurationIT.java @@ -0,0 +1,96 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.model.openai.autoconfigure; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; + +import org.springframework.ai.image.ImagePrompt; +import org.springframework.ai.image.ImageResponse; +import org.springframework.ai.model.tool.autoconfigure.ToolCallingAutoConfiguration; +import org.springframework.ai.openai.OpenAiImageModel; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration; +import org.springframework.boot.autoconfigure.AutoConfigurations; +import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration; +import org.springframework.boot.autoconfigure.web.reactive.function.client.WebClientAutoConfiguration; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration tests for {@link OpenAiImageAutoConfiguration}. + * + * @author Alexandros Pappas + * @since 1.1.0 + */ +@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+") +public class OpenAiImageAutoConfigurationIT { + + private static final Log logger = LogFactory.getLog(OpenAiImageAutoConfigurationIT.class); + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withPropertyValues("spring.ai.openai.apiKey=" + System.getenv("OPENAI_API_KEY")) + .withConfiguration( + AutoConfigurations.of(SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class, + WebClientAutoConfiguration.class, ToolCallingAutoConfiguration.class)); + + @Test + void imageModelAutoConfigured() { + this.contextRunner.withConfiguration(AutoConfigurations.of(OpenAiImageAutoConfiguration.class)).run(context -> { + assertThat(context.getBeansOfType(OpenAiImageModel.class)).isNotEmpty(); + assertThat(context.getBeansOfType(OpenAiImageApi.class)).isNotEmpty(); + }); + } + + @Test + void generateImage() { + this.contextRunner + .withPropertyValues("spring.ai.openai.image.options.model=dall-e-2", + "spring.ai.openai.image.options.response-format=b64_json") + .withConfiguration(AutoConfigurations.of(OpenAiImageAutoConfiguration.class)) + .run(context -> { + OpenAiImageModel imageModel = context.getBean(OpenAiImageModel.class); + ImagePrompt prompt = new ImagePrompt("A simple red circle"); + ImageResponse response = imageModel.call(prompt); + + assertThat(response).isNotNull(); + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResult().getOutput().getB64Json()).isNotEmpty(); + + logger.info("Generated image with base64 length: " + + response.getResult().getOutput().getB64Json().length()); + }); + } + + @Test + void imageModelDisabled() { + this.contextRunner.withPropertyValues("spring.ai.model.image=none") + .withConfiguration(AutoConfigurations.of(OpenAiImageAutoConfiguration.class)) + .run(context -> assertThat(context.getBeansOfType(OpenAiImageModel.class)).isEmpty()); + } + + @Test + void imageModelExplicitlyEnabled() { + this.contextRunner.withPropertyValues("spring.ai.model.image=openai") + .withConfiguration(AutoConfigurations.of(OpenAiImageAutoConfiguration.class)) + .run(context -> assertThat(context.getBeansOfType(OpenAiImageModel.class)).isNotEmpty()); + } + +} diff --git a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java index 28378329628..56ef50dcff9 100644 --- a/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java +++ b/auto-configurations/models/spring-ai-autoconfigure-model-openai/src/test/java/org/springframework/ai/model/openai/autoconfigure/OpenAiPropertiesTests.java @@ -539,6 +539,46 @@ public void imageOptionsTest() { }); } + @Test + public void imageGptImageOptionsTest() { + this.contextRunner.withPropertyValues( + // @formatter:off + "spring.ai.openai.api-key=API_KEY", + "spring.ai.openai.base-url=TEST_BASE_URL", + + "spring.ai.openai.image.options.model=gpt-image-1", + "spring.ai.openai.image.options.quality=high", + "spring.ai.openai.image.options.size=1024x1024", + "spring.ai.openai.image.options.background=transparent", + "spring.ai.openai.image.options.moderation=low", + "spring.ai.openai.image.options.output_compression=85", + "spring.ai.openai.image.options.output_format=png", + "spring.ai.openai.image.options.partial_images=2", + "spring.ai.openai.image.options.stream=true", + "spring.ai.openai.image.options.user=userXYZ" + ) + // @formatter:on + .withConfiguration(AutoConfigurations.of(OpenAiImageAutoConfiguration.class)) + .run(context -> { + var imageProperties = context.getBean(OpenAiImageProperties.class); + var connectionProperties = context.getBean(OpenAiConnectionProperties.class); + + assertThat(connectionProperties.getBaseUrl()).isEqualTo("TEST_BASE_URL"); + assertThat(connectionProperties.getApiKey()).isEqualTo("API_KEY"); + + assertThat(imageProperties.getOptions().getModel()).isEqualTo("gpt-image-1"); + assertThat(imageProperties.getOptions().getQuality()).isEqualTo("high"); + assertThat(imageProperties.getOptions().getSize()).isEqualTo("1024x1024"); + assertThat(imageProperties.getOptions().getBackground()).isEqualTo("transparent"); + assertThat(imageProperties.getOptions().getModeration()).isEqualTo("low"); + assertThat(imageProperties.getOptions().getOutputCompression()).isEqualTo(85); + assertThat(imageProperties.getOptions().getOutputFormat()).isEqualTo("png"); + assertThat(imageProperties.getOptions().getPartialImages()).isEqualTo(2); + assertThat(imageProperties.getOptions().getStream()).isTrue(); + assertThat(imageProperties.getOptions().getUser()).isEqualTo("userXYZ"); + }); + } + @Test void embeddingActivation() { diff --git a/models/spring-ai-openai/pom.xml b/models/spring-ai-openai/pom.xml index 3f9adec528e..8356045c81f 100644 --- a/models/spring-ai-openai/pom.xml +++ b/models/spring-ai-openai/pom.xml @@ -117,6 +117,11 @@ test + + io.projectreactor + reactor-test + test + diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageModel.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageModel.java index 68354662548..f64171564dd 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageModel.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageModel.java @@ -21,6 +21,7 @@ import io.micrometer.observation.ObservationRegistry; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; import org.springframework.ai.image.Image; import org.springframework.ai.image.ImageGeneration; @@ -29,6 +30,7 @@ import org.springframework.ai.image.ImagePrompt; import org.springframework.ai.image.ImageResponse; import org.springframework.ai.image.ImageResponseMetadata; +import org.springframework.ai.image.StreamingImageModel; import org.springframework.ai.image.observation.DefaultImageModelObservationConvention; import org.springframework.ai.image.observation.ImageModelObservationContext; import org.springframework.ai.image.observation.ImageModelObservationConvention; @@ -43,16 +45,26 @@ import org.springframework.util.Assert; /** - * OpenAiImageModel is a class that implements the ImageModel interface. It provides a - * client for calling the OpenAI image generation API. + * OpenAiImageModel is a class that implements the ImageModel and StreamingImageModel + * interfaces. It provides a client for calling the OpenAI image generation API with both + * synchronous and streaming capabilities. + * + *

+ * Streaming image generation is supported for GPT-Image models (gpt-image-1, + * gpt-image-1-mini) and allows receiving partial images as they are generated. DALL-E + * models do not support streaming. + *

* * @author Mark Pollack * @author Christian Tzolov * @author Hyunjoon Choi * @author Thomas Vitale + * @author Alexandros Pappas * @since 0.8.0 + * @see ImageModel + * @see StreamingImageModel */ -public class OpenAiImageModel implements ImageModel { +public class OpenAiImageModel implements ImageModel, StreamingImageModel { private static final Logger logger = LoggerFactory.getLogger(OpenAiImageModel.class); @@ -205,6 +217,51 @@ private ImagePrompt buildRequestImagePrompt(ImagePrompt imagePrompt) { return new ImagePrompt(imagePrompt.getInstructions(), requestOptions); } + @Override + public Flux stream(ImagePrompt imagePrompt) { + // Before moving any further, build the final request ImagePrompt, + // merging runtime and default options. + ImagePrompt requestImagePrompt = buildRequestImagePrompt(imagePrompt); + + OpenAiImageApi.OpenAiImageRequest imageRequest = createRequest(requestImagePrompt); + + // Validate that streaming is only used with GPT-Image models + String model = imageRequest.model(); + if (model != null && !model.startsWith("gpt-image-")) { + return Flux.error(new IllegalArgumentException( + "Streaming is only supported for GPT-Image models (gpt-image-1, gpt-image-1-mini). " + + "Current model: " + model)); + } + + // Ensure stream is set to true + if (imageRequest.stream() == null || !imageRequest.stream()) { + imageRequest = new OpenAiImageApi.OpenAiImageRequest(imageRequest.prompt(), imageRequest.model(), + imageRequest.n(), imageRequest.quality(), imageRequest.responseFormat(), imageRequest.size(), + imageRequest.style(), imageRequest.user(), imageRequest.background(), imageRequest.moderation(), + imageRequest.outputCompression(), imageRequest.outputFormat(), imageRequest.partialImages(), true); + } + + var observationContext = ImageModelObservationContext.builder() + .imagePrompt(imagePrompt) + .provider(OpenAiApiConstants.PROVIDER_NAME) + .build(); + + OpenAiImageApi.OpenAiImageRequest finalImageRequest = imageRequest; + + // Stream the image generation events + Flux eventStream = this.openAiImageApi.streamImage(finalImageRequest); + + // Convert streaming events to ImageResponse + return eventStream.map(event -> { + Image image = new Image(null, event.b64Json()); + OpenAiImageGenerationMetadata metadata = new OpenAiImageGenerationMetadata(null); + ImageGeneration generation = new ImageGeneration(image, metadata); + ImageResponseMetadata responseMetadata = event.createdAt() != null + ? new ImageResponseMetadata(event.createdAt()) : new ImageResponseMetadata(null); + return new ImageResponse(List.of(generation), responseMetadata); + }); + } + /** * Use the provided convention for reporting observation data * @param observationConvention The provided convention diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageOptions.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageOptions.java index 3b294a5b02b..68876d303d9 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageOptions.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageOptions.java @@ -67,27 +67,31 @@ public class OpenAiImageOptions implements ImageOptions { private Integer height; /** - * The quality of the image that will be generated. hd creates images with finer - * details and greater consistency across the image. This param is only supported for - * dall-e-3. + * The quality of the image that will be generated. auto (default value) will + * automatically select the best quality for the given model. high, medium and low are + * supported for gpt-image-1. hd and standard are supported for dall-e-3. standard is + * the only option for dall-e-2. */ @JsonProperty("quality") private String quality; /** - * The format in which the generated images are returned. Must be one of url or - * b64_json. + * The format in which generated images with dall-e-2 and dall-e-3 are returned. Must + * be one of url or b64_json. URLs are only valid for 60 minutes after the image has + * been generated. This parameter isn't supported for gpt-image-1 which will always + * return base64-encoded images. */ @JsonProperty("response_format") private String responseFormat; /** - * The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for - * dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. - * This property is automatically computed when both width and height are set, - * following the format "widthxheight". When setting this property directly, it must - * follow the format "WxH" where W and H are valid integers. Invalid formats will - * result in null width and height values. + * The size of the generated images. Must be one of 1024x1024, 1536x1024 (landscape), + * 1024x1536 (portrait), or auto (default value) for gpt-image-1. Must be one of + * 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, + * or 1024x1792 for dall-e-3. This property is automatically computed when both width + * and height are set, following the format "widthxheight". When setting this property + * directly, it must follow the format "WxH" where W and H are valid integers. Invalid + * formats will result in null width and height values. */ @JsonProperty("size") private String size; @@ -108,6 +112,52 @@ public class OpenAiImageOptions implements ImageOptions { @JsonProperty("user") private String user; + /** + * Allows to set transparency for the background of the generated image(s). This + * parameter is only supported for gpt-image-1. Must be one of transparent, opaque or + * auto (default value). When auto is used, the model will automatically determine the + * best background for the image. + */ + @JsonProperty("background") + private String background; + + /** + * Control the content-moderation level for images generated by gpt-image-1. Must be + * either low for less restrictive filtering or auto (default value). + */ + @JsonProperty("moderation") + private String moderation; + + /** + * The compression level (0-100%) for the generated images. This parameter is only + * supported for gpt-image-1 with the webp or jpeg output formats, and defaults to + * 100. + */ + @JsonProperty("output_compression") + private Integer outputCompression; + + /** + * The format in which the generated images are returned. This parameter is only + * supported for gpt-image-1. Must be one of png, jpeg, or webp. + */ + @JsonProperty("output_format") + private String outputFormat; + + /** + * The number of partial images to generate. This parameter is used for streaming + * responses that return partial images. Value must be between 0 and 3. When set to 0, + * the response will be a single image sent in one streaming event. + */ + @JsonProperty("partial_images") + private Integer partialImages; + + /** + * Generate the image in streaming mode. Defaults to false. This parameter is only + * supported for gpt-image-1. + */ + @JsonProperty("stream") + private Boolean stream; + public static Builder builder() { return new Builder(); } @@ -128,6 +178,12 @@ public static OpenAiImageOptions fromOptions(OpenAiImageOptions fromOptions) { options.size = fromOptions.size; options.style = fromOptions.style; options.user = fromOptions.user; + options.background = fromOptions.background; + options.moderation = fromOptions.moderation; + options.outputCompression = fromOptions.outputCompression; + options.outputFormat = fromOptions.outputFormat; + options.partialImages = fromOptions.partialImages; + options.stream = fromOptions.stream; return options; } @@ -262,6 +318,54 @@ public void setSize(String size) { } } + public String getBackground() { + return this.background; + } + + public void setBackground(String background) { + this.background = background; + } + + public String getModeration() { + return this.moderation; + } + + public void setModeration(String moderation) { + this.moderation = moderation; + } + + public Integer getOutputCompression() { + return this.outputCompression; + } + + public void setOutputCompression(Integer outputCompression) { + this.outputCompression = outputCompression; + } + + public String getOutputFormat() { + return this.outputFormat; + } + + public void setOutputFormat(String outputFormat) { + this.outputFormat = outputFormat; + } + + public Integer getPartialImages() { + return this.partialImages; + } + + public void setPartialImages(Integer partialImages) { + this.partialImages = partialImages; + } + + public Boolean getStream() { + return this.stream; + } + + public void setStream(Boolean stream) { + this.stream = stream; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -274,13 +378,18 @@ public boolean equals(Object o) { && Objects.equals(this.width, that.width) && Objects.equals(this.height, that.height) && Objects.equals(this.quality, that.quality) && Objects.equals(this.responseFormat, that.responseFormat) && Objects.equals(this.size, that.size) - && Objects.equals(this.style, that.style) && Objects.equals(this.user, that.user); + && Objects.equals(this.style, that.style) && Objects.equals(this.user, that.user) + && Objects.equals(this.background, that.background) && Objects.equals(this.moderation, that.moderation) + && Objects.equals(this.outputCompression, that.outputCompression) + && Objects.equals(this.outputFormat, that.outputFormat) + && Objects.equals(this.partialImages, that.partialImages) && Objects.equals(this.stream, that.stream); } @Override public int hashCode() { return Objects.hash(this.n, this.model, this.width, this.height, this.quality, this.responseFormat, this.size, - this.style, this.user); + this.style, this.user, this.background, this.moderation, this.outputCompression, this.outputFormat, + this.partialImages, this.stream); } @Override @@ -288,7 +397,9 @@ public String toString() { return "OpenAiImageOptions{" + "n=" + this.n + ", model='" + this.model + '\'' + ", width=" + this.width + ", height=" + this.height + ", quality='" + this.quality + '\'' + ", responseFormat='" + this.responseFormat + '\'' + ", size='" + this.size + '\'' + ", style='" + this.style + '\'' - + ", user='" + this.user + '\'' + '}'; + + ", user='" + this.user + '\'' + ", background='" + this.background + '\'' + ", moderation='" + + this.moderation + '\'' + ", outputCompression=" + this.outputCompression + ", outputFormat='" + + this.outputFormat + '\'' + ", partialImages=" + this.partialImages + ", stream=" + this.stream + '}'; } /** @@ -341,6 +452,11 @@ public Builder height(Integer height) { return this; } + public Builder size(String size) { + this.options.setSize(size); + return this; + } + public Builder style(String style) { this.options.setStyle(style); return this; @@ -351,6 +467,36 @@ public Builder user(String user) { return this; } + public Builder background(String background) { + this.options.setBackground(background); + return this; + } + + public Builder moderation(String moderation) { + this.options.setModeration(moderation); + return this; + } + + public Builder outputCompression(Integer outputCompression) { + this.options.setOutputCompression(outputCompression); + return this; + } + + public Builder outputFormat(String outputFormat) { + this.options.setOutputFormat(outputFormat); + return this; + } + + public Builder partialImages(Integer partialImages) { + this.options.setPartialImages(partialImages); + return this; + } + + public Builder stream(Boolean stream) { + this.options.setStream(stream); + return this; + } + public OpenAiImageOptions build() { return this.options; } diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java index fe82e30e56b..6f9acf442b0 100644 --- a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java +++ b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiImageApi.java @@ -17,10 +17,14 @@ package org.springframework.ai.openai.api; import java.util.List; +import java.util.function.Consumer; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import reactor.core.publisher.Flux; import org.springframework.ai.model.ApiKey; import org.springframework.ai.model.NoopApiKey; @@ -35,6 +39,7 @@ import org.springframework.util.MultiValueMap; import org.springframework.web.client.ResponseErrorHandler; import org.springframework.web.client.RestClient; +import org.springframework.web.reactive.function.client.WebClient; /** * OpenAI Image API. @@ -49,6 +54,12 @@ public class OpenAiImageApi { private final RestClient restClient; + private final WebClient webClient; + + private final ApiKey apiKey; + + private final ObjectMapper objectMapper = new ObjectMapper(); + private final String imagesPath; /** @@ -58,24 +69,30 @@ public class OpenAiImageApi { * @param headers the http headers to use. * @param imagesPath the images path to use. * @param restClientBuilder the rest client builder to use. + * @param webClientBuilder the web client builder to use for streaming. * @param responseErrorHandler the response error handler to use. */ public OpenAiImageApi(String baseUrl, ApiKey apiKey, MultiValueMap headers, String imagesPath, - RestClient.Builder restClientBuilder, ResponseErrorHandler responseErrorHandler) { + RestClient.Builder restClientBuilder, WebClient.Builder webClientBuilder, + ResponseErrorHandler responseErrorHandler) { + + this.apiKey = apiKey; // @formatter:off + Consumer defaultHeaders = h -> { + h.setContentType(MediaType.APPLICATION_JSON); + h.addAll(headers); + }; + this.restClient = restClientBuilder.clone() .baseUrl(baseUrl) - .defaultHeaders(h -> { - h.setContentType(MediaType.APPLICATION_JSON); - h.addAll(headers); - }) + .defaultHeaders(defaultHeaders) .defaultStatusHandler(responseErrorHandler) - .defaultRequest(requestHeadersSpec -> { - if (!(apiKey instanceof NoopApiKey)) { - requestHeadersSpec.header(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey.getValue()); - } - }) + .build(); + + this.webClient = webClientBuilder.clone() + .baseUrl(baseUrl) + .defaultHeaders(defaultHeaders) .build(); // @formatter:on @@ -86,11 +103,59 @@ public ResponseEntity createImage(OpenAiImageRequest openAi Assert.notNull(openAiImageRequest, "Image request cannot be null."); Assert.hasLength(openAiImageRequest.prompt(), "Prompt cannot be empty."); + // @formatter:off return this.restClient.post() .uri(this.imagesPath) + .headers(this::addDefaultHeadersIfMissing) .body(openAiImageRequest) .retrieve() .toEntity(OpenAiImageResponse.class); + // @formatter:on + } + + /** + * Creates a streaming image generation response for the given image request. + * @param imageRequest The image generation request. Must have the stream property set + * to true. + * @return Returns a {@link Flux} stream of image generation events including partial + * images (type: "image_generation.partial_image") and the final complete image (type: + * "image_generation.completed"). + */ + public Flux streamImage(OpenAiImageRequest imageRequest) { + Assert.notNull(imageRequest, "Image request cannot be null."); + Assert.hasLength(imageRequest.prompt(), "Prompt cannot be empty."); + Assert.isTrue(imageRequest.stream() != null && imageRequest.stream(), + "Request must set the stream property to true."); + + // @formatter:off + return this.webClient.post() + .uri(this.imagesPath) + .headers(this::addDefaultHeadersIfMissing) + .bodyValue(imageRequest) + .retrieve() + .bodyToFlux(String.class) + // Parse the JSON event data - each chunk is a complete JSON object + .mapNotNull(content -> { + try { + // Skip empty lines + if (content == null || content.trim().isEmpty()) { + return null; + } + return this.objectMapper.readValue(content.trim(), OpenAiImageStreamEvent.class); + } + catch (JsonProcessingException ex) { + throw new RuntimeException("Failed to parse streaming image event: " + content, ex); + } + }) + // Complete the stream after receiving the "image_generation.completed" event + .takeUntil(event -> "image_generation.completed".equals(event.type())); + // @formatter:on + } + + private void addDefaultHeadersIfMissing(HttpHeaders headers) { + if (!headers.containsKey(HttpHeaders.AUTHORIZATION) && !(this.apiKey instanceof NoopApiKey)) { + headers.setBearerAuth(this.apiKey.getValue()); + } } public static Builder builder() { @@ -99,10 +164,22 @@ public static Builder builder() { /** * OpenAI Image API model. - * DALL·E + * Models */ public enum ImageModel { + /** + * Multimodal language model that accepts both text and image inputs, and produces + * image outputs. + */ + GPT_IMAGE_1("gpt-image-1"), + + /** + * A cost-efficient version of GPT Image 1. It is a natively multimodal language + * model that accepts both text and image inputs, and produces image outputs. + */ + GPT_IMAGE_1_MINI("gpt-image-1-mini"), + /** * The latest DALL·E model released in Nov 2023. */ @@ -137,10 +214,16 @@ public record OpenAiImageRequest( @JsonProperty("response_format") String responseFormat, @JsonProperty("size") String size, @JsonProperty("style") String style, - @JsonProperty("user") String user) { + @JsonProperty("user") String user, + @JsonProperty("background") String background, + @JsonProperty("moderation") String moderation, + @JsonProperty("output_compression") Integer outputCompression, + @JsonProperty("output_format") String outputFormat, + @JsonProperty("partial_images") Integer partialImages, + @JsonProperty("stream") Boolean stream) { public OpenAiImageRequest(String prompt, String model) { - this(prompt, model, null, null, null, null, null, null); + this(prompt, model, null, null, null, null, null, null, null, null, null, null, null, null); } } @@ -159,6 +242,38 @@ public record Data(@JsonProperty("url") String url, @JsonProperty("b64_json") St } + /** + * Represents a Server-Sent Event (SSE) for streaming image generation. This event is + * emitted during streaming image generation when partial images become available + * (type: "image_generation.partial_image") or when generation completes (type: + * "image_generation.completed"). + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonIgnoreProperties(ignoreUnknown = true) + public record OpenAiImageStreamEvent(@JsonProperty("type") String type, @JsonProperty("b64_json") String b64Json, + @JsonProperty("created_at") Long createdAt, @JsonProperty("size") String size, + @JsonProperty("quality") String quality, @JsonProperty("background") String background, + @JsonProperty("output_format") String outputFormat, + @JsonProperty("partial_image_index") Integer partialImageIndex, @JsonProperty("usage") Usage usage) { + + /** + * Token usage information for image generation (only present in + * image_generation.completed event). + */ + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonIgnoreProperties(ignoreUnknown = true) + public record Usage(@JsonProperty("total_tokens") Integer totalTokens, + @JsonProperty("input_tokens") Integer inputTokens, @JsonProperty("output_tokens") Integer outputTokens, + @JsonProperty("input_tokens_details") InputTokensDetails inputTokensDetails) { + + @JsonInclude(JsonInclude.Include.NON_NULL) + @JsonIgnoreProperties(ignoreUnknown = true) + public record InputTokensDetails(@JsonProperty("text_tokens") Integer textTokens, + @JsonProperty("image_tokens") Integer imageTokens) { + } + } + } + /** * Builder to construct {@link OpenAiImageApi} instance. */ @@ -172,6 +287,8 @@ public static final class Builder { private RestClient.Builder restClientBuilder = RestClient.builder(); + private WebClient.Builder webClientBuilder = WebClient.builder(); + private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER; private String imagesPath = "v1/images/generations"; @@ -212,6 +329,12 @@ public Builder restClientBuilder(RestClient.Builder restClientBuilder) { return this; } + public Builder webClientBuilder(WebClient.Builder webClientBuilder) { + Assert.notNull(webClientBuilder, "webClientBuilder cannot be null"); + this.webClientBuilder = webClientBuilder; + return this; + } + public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) { Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null"); this.responseErrorHandler = responseErrorHandler; @@ -221,7 +344,7 @@ public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) { public OpenAiImageApi build() { Assert.notNull(this.apiKey, "apiKey must be set"); return new OpenAiImageApi(this.baseUrl, this.apiKey, this.headers, this.imagesPath, this.restClientBuilder, - this.responseErrorHandler); + this.webClientBuilder, this.responseErrorHandler); } } diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageModelStreamingIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageModelStreamingIT.java new file mode 100644 index 00000000000..c9e3330fd33 --- /dev/null +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/OpenAiImageModelStreamingIT.java @@ -0,0 +1,245 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.openai.image; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import reactor.core.publisher.Flux; +import reactor.test.StepVerifier; + +import org.springframework.ai.image.Image; +import org.springframework.ai.image.ImageMessage; +import org.springframework.ai.image.ImagePrompt; +import org.springframework.ai.image.ImageResponse; +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.openai.OpenAiImageModel; +import org.springframework.ai.openai.OpenAiImageOptions; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.ai.openai.api.OpenAiImageApi.ImageModel; +import org.springframework.web.reactive.function.client.WebClient; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration tests for streaming image generation with {@link OpenAiImageModel}. + * + * @author Alexandros Pappas + * @since 1.1.0 + */ +@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+") +public class OpenAiImageModelStreamingIT { + + OpenAiImageApi openAiImageApi = OpenAiImageApi.builder() + .apiKey(new SimpleApiKey(System.getenv("OPENAI_API_KEY"))) + .webClientBuilder(WebClient.builder()) + .build(); + + OpenAiImageModel openAiImageModel = new OpenAiImageModel(this.openAiImageApi); + + @Test + void streamImageWithGptImage1MiniAndPartialImages() { + // Create prompt with streaming options and 2 partial images + OpenAiImageOptions options = OpenAiImageOptions.builder() + .model(ImageModel.GPT_IMAGE_1_MINI.getValue()) + .quality("medium") + .size("1024x1024") + .background("opaque") + .moderation("auto") + .outputCompression(90) + .outputFormat("jpeg") + .partialImages(2) + .stream(true) + .build(); + + ImagePrompt prompt = new ImagePrompt(new ImageMessage("A simple red circle"), options); + + // Stream the image generation + Flux imageStream = this.openAiImageModel.stream(prompt); + + // Collect all responses + List responses = new ArrayList<>(); + StepVerifier.create(imageStream) + .recordWith(() -> responses) + .expectNextCount(1) // At least 1 event (final image is guaranteed) + .thenConsumeWhile(response -> true) // Consume any additional partial images + .verifyComplete(); + + // Verify we received responses + assertThat(responses).isNotEmpty(); + assertThat(responses.size()).isGreaterThanOrEqualTo(1); // At least final image + + // Verify each response has proper structure + for (ImageResponse response : responses) { + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResult()).isNotNull(); + assertThat(response.getResult().getOutput()).isNotNull(); + + Image image = response.getResult().getOutput(); + assertThat(image.getB64Json()).isNotEmpty(); + assertThat(image.getUrl()).isNull(); // GPT-Image models return base64 + } + } + + @Test + void streamImageWithGptImage1() { + // Create prompt with streaming options and 1 partial image + // Using JPEG for compression < 100 (PNG only supports compression=100) + OpenAiImageOptions options = OpenAiImageOptions.builder() + .model(ImageModel.GPT_IMAGE_1.getValue()) + .quality("high") + .size("1024x1024") + .background("opaque") // JPEG doesn't support transparency + .moderation("auto") + .outputCompression(85) + .outputFormat("jpeg") + .partialImages(1) + .stream(true) + .build(); + + ImagePrompt prompt = new ImagePrompt(new ImageMessage("A blue square"), options); + + Flux imageStream = this.openAiImageModel.stream(prompt); + + StepVerifier.create(imageStream).expectNextMatches(response -> { + // Verify response structure + boolean hasResults = !response.getResults().isEmpty(); + boolean hasImage = response.getResult() != null && response.getResult().getOutput() != null; + boolean hasB64Json = response.getResult().getOutput().getB64Json() != null + && !response.getResult().getOutput().getB64Json().isEmpty(); + return hasResults && hasImage && hasB64Json; + }) + .thenConsumeWhile( + response -> response.getResult() != null && response.getResult().getOutput().getB64Json() != null) + .verifyComplete(); + } + + @Test + void streamImageWithNoPartialImages() { + // Create prompt with streaming but 0 partial images (only final image) + OpenAiImageOptions options = OpenAiImageOptions.builder() + .model(ImageModel.GPT_IMAGE_1_MINI.getValue()) + .quality("low") + .size("1024x1024") + .background("auto") + .moderation("auto") + .outputCompression(80) + .outputFormat("jpeg") + .partialImages(0) + .stream(true) + .build(); + + ImagePrompt prompt = new ImagePrompt(new ImageMessage("A green triangle"), options); + + Flux imageStream = this.openAiImageModel.stream(prompt); + + // Collect all responses + List responses = new ArrayList<>(); + StepVerifier.create(imageStream) + .recordWith(() -> responses) + .expectNextCount(1) // Only final image + .verifyComplete(); + + // Should only have one response + assertThat(responses).hasSize(1); + + ImageResponse response = responses.get(0); + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResult().getOutput().getB64Json()).isNotEmpty(); + } + + @Test + void streamImageAutoSetsStreamParameter() { + // Create prompt WITHOUT stream parameter set + OpenAiImageOptions options = OpenAiImageOptions.builder() + .model(ImageModel.GPT_IMAGE_1_MINI.getValue()) + .quality("medium") + .partialImages(1) + // Note: NOT setting .stream(true) + .build(); + + ImagePrompt prompt = new ImagePrompt(new ImageMessage("A yellow star"), options); + + // Stream should auto-set stream=true + Flux imageStream = this.openAiImageModel.stream(prompt); + + StepVerifier.create(imageStream.take(1)).assertNext(response -> { + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResult().getOutput().getB64Json()).isNotEmpty(); + }).verifyComplete(); + } + + @Test + void streamImageRejectsNonGptImageModels() { + // Try to stream with DALL-E 3 (should fail) + OpenAiImageOptions options = OpenAiImageOptions.builder() + .model(ImageModel.DALL_E_3.getValue()) + .stream(true) + .build(); + + ImagePrompt prompt = new ImagePrompt(new ImageMessage("A painting"), options); + + Flux imageStream = this.openAiImageModel.stream(prompt); + + StepVerifier.create(imageStream) + .expectErrorMatches(throwable -> throwable instanceof IllegalArgumentException + && throwable.getMessage().contains("Streaming is only supported for GPT-Image models")) + .verify(); + } + + @Test + void streamImageVerifyResponseMetadata() { + // Test that response metadata is properly populated + OpenAiImageOptions options = OpenAiImageOptions.builder() + .model(ImageModel.GPT_IMAGE_1_MINI.getValue()) + .quality("medium") + .size("1024x1024") + .background("transparent") + .outputCompression(100) // PNG only supports compression=100 + .outputFormat("png") + .partialImages(1) + .stream(true) + .build(); + + ImagePrompt prompt = new ImagePrompt(new ImageMessage("A purple hexagon"), options); + + Flux imageStream = this.openAiImageModel.stream(prompt); + + // Collect all events to ensure we get the final one + StepVerifier.create(imageStream).expectNextMatches(response -> { + // Verify response structure + assertThat(response.getResults()).hasSize(1); + assertThat(response.getResult()).isNotNull(); + assertThat(response.getResult().getOutput()).isNotNull(); + + // Verify image data is present + Image image = response.getResult().getOutput(); + assertThat(image.getB64Json()).isNotEmpty(); + + // Verify metadata is present + assertThat(response.getMetadata()).isNotNull(); + assertThat(response.getMetadata().getCreated()).isNotNull(); + + return true; + }) + .thenConsumeWhile(response -> true) // Consume any remaining events + .verifyComplete(); + } + +} diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiIT.java new file mode 100644 index 00000000000..a526b17cedd --- /dev/null +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiIT.java @@ -0,0 +1,255 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.openai.image.api; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.ai.openai.api.OpenAiImageApi.ImageModel; +import org.springframework.ai.openai.api.OpenAiImageApi.OpenAiImageRequest; +import org.springframework.ai.openai.api.OpenAiImageApi.OpenAiImageResponse; +import org.springframework.http.ResponseEntity; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration tests for {@link OpenAiImageApi}. + * + * @author Alexandros Pappas + * @since 1.1.0 + */ +@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+") +public class OpenAiImageApiIT { + + OpenAiImageApi openAiImageApi = OpenAiImageApi.builder() + .apiKey(new SimpleApiKey(System.getenv("OPENAI_API_KEY"))) + .build(); + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(ImageModel.class) + void createImageWithAllModels(ImageModel model) { + OpenAiImageRequest request = new OpenAiImageRequest("A simple geometric pattern", model.getValue()); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().created()).isPositive(); + assertThat(response.getBody().data()).isNotEmpty(); + + // GPT-Image models return b64_json, DALL-E models can return url + boolean hasUrl = response.getBody().data().get(0).url() != null + && !response.getBody().data().get(0).url().isEmpty(); + boolean hasB64Json = response.getBody().data().get(0).b64Json() != null + && !response.getBody().data().get(0).b64Json().isEmpty(); + assertThat(hasUrl || hasB64Json).withFailMessage("Response must contain either url or b64_json").isTrue(); + } + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(names = { "DALL_E_3" }) + void createImageWithRevisedPrompt(ImageModel model) { + OpenAiImageRequest request = new OpenAiImageRequest("A painting of a sunset over the ocean", model.getValue()); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + // DALL-E 3 provides a revised prompt + assertThat(response.getBody().data().get(0).revisedPrompt()).isNotEmpty(); + } + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(names = { "DALL_E_2", "DALL_E_3" }) + void createImageWithBase64Response(ImageModel model) { + // Note: Only DALL-E models support response_format parameter + OpenAiImageRequest request = new OpenAiImageRequest("A red apple", model.getValue(), null, null, "b64_json", + null, null, null, null, null, null, null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).b64Json()).isNotEmpty(); + assertThat(response.getBody().data().get(0).url()).isNull(); + } + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(names = { "DALL_E_3" }) + void createImageWithCustomSize(ImageModel model) { + OpenAiImageRequest request = new OpenAiImageRequest("A minimalist logo", model.getValue(), null, null, null, + "1792x1024", null, null, null, null, null, null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + } + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(names = { "DALL_E_3" }) + void createImageWithHdQuality(ImageModel model) { + // Note: quality parameter is only supported by DALL-E 3 + OpenAiImageRequest request = new OpenAiImageRequest("A detailed architectural drawing", model.getValue(), null, + "hd", null, "1024x1024", null, null, null, null, null, null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + } + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(names = { "DALL_E_3" }) + void createImageWithVividStyle(ImageModel model) { + // Note: style parameter is only supported by DALL-E 3 + OpenAiImageRequest request = new OpenAiImageRequest("A vibrant abstract painting", model.getValue(), null, null, + null, "1024x1024", "vivid", null, null, null, null, null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + } + + @ParameterizedTest(name = "{0} : {displayName}") + @EnumSource(names = { "DALL_E_3" }) + void createImageWithNaturalStyle(ImageModel model) { + // Note: style parameter is only supported by DALL-E 3 + OpenAiImageRequest request = new OpenAiImageRequest("A realistic forest scene", model.getValue(), null, null, + null, "1024x1024", "natural", null, null, null, null, null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + } + + @Test + void createMultipleImagesWithDallE2() { + // DALL-E 2 supports multiple images (n > 1) + OpenAiImageRequest request = new OpenAiImageRequest("A simple icon", ImageModel.DALL_E_2.getValue(), 2, null, + null, "256x256", null, null, null, null, null, null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(2); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + assertThat(response.getBody().data().get(1).url()).isNotEmpty(); + } + + // Comprehensive model-specific tests with all parameters + + @Test + void gptImage1WithAllParameters() { + // Test GPT-Image-1 with all supported parameters (except partial which requires + // streaming) + // Using JPEG format to test compression parameter (Compression less than 100 is + // not supported for PNG output format) + OpenAiImageRequest request = new OpenAiImageRequest("A red apple floating in space", + ImageModel.GPT_IMAGE_1.getValue(), 1, "high", null, "1024x1024", null, "test-user", "opaque", "auto", + 85, "jpeg", null, false); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).b64Json()).isNotEmpty(); + } + + @Test + void gptImage1MiniWithAllParameters() { + // Test GPT-Image-1-Mini with all supported parameters (except partial which + // requires streaming) + OpenAiImageRequest request = new OpenAiImageRequest("A sunset over the ocean", + ImageModel.GPT_IMAGE_1_MINI.getValue(), 1, "medium", null, "1024x1024", null, "test-user", "opaque", + "low", 70, "jpeg", null, false); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).b64Json()).isNotEmpty(); + } + + @Test + void gptImage1MiniWithAllParametersNonStreaming() { + // Test GPT-Image-1-Mini with all supported parameters (non-streaming) + // Note: stream and partial parameters are not used for createImage() method + OpenAiImageRequest request = new OpenAiImageRequest("A colorful abstract pattern", + ImageModel.GPT_IMAGE_1_MINI.getValue(), 1, "auto", null, "1024x1024", null, "test-user", "auto", "auto", + null, "jpeg", null, false); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).b64Json()).isNotEmpty(); + } + + @Test + void dallE3WithAllParameters() { + // Test DALL-E 3 with all supported parameters + OpenAiImageRequest request = new OpenAiImageRequest("A hyper-realistic portrait of a wise old wizard", + ImageModel.DALL_E_3.getValue(), 1, "hd", "url", "1024x1024", "vivid", "test-user", null, null, null, + null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(1); + assertThat(response.getBody().data().get(0).url()).isNotEmpty(); + assertThat(response.getBody().data().get(0).revisedPrompt()).isNotEmpty(); + } + + @Test + void dallE2WithAllParameters() { + // Test DALL-E 2 with all supported parameters + OpenAiImageRequest request = new OpenAiImageRequest("A simple geometric pattern", + ImageModel.DALL_E_2.getValue(), 2, null, "b64_json", "512x512", null, "test-user", null, null, null, + null, null, null); + + ResponseEntity response = this.openAiImageApi.createImage(request); + + assertThat(response).isNotNull(); + assertThat(response.getBody()).isNotNull(); + assertThat(response.getBody().data()).hasSize(2); + assertThat(response.getBody().data().get(0).b64Json()).isNotEmpty(); + assertThat(response.getBody().data().get(1).b64Json()).isNotEmpty(); + } + +} diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiStreamingIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiStreamingIT.java new file mode 100644 index 00000000000..234cbc3f97b --- /dev/null +++ b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/image/api/OpenAiImageApiStreamingIT.java @@ -0,0 +1,152 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.openai.image.api; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import reactor.core.publisher.Flux; +import reactor.test.StepVerifier; + +import org.springframework.ai.model.SimpleApiKey; +import org.springframework.ai.openai.api.OpenAiImageApi; +import org.springframework.ai.openai.api.OpenAiImageApi.ImageModel; +import org.springframework.ai.openai.api.OpenAiImageApi.OpenAiImageRequest; +import org.springframework.ai.openai.api.OpenAiImageApi.OpenAiImageStreamEvent; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration tests for streaming image generation with {@link OpenAiImageApi}. + * + * @author Alexandros Pappas + * @since 1.1.0 + */ +@EnabledIfEnvironmentVariable(named = "OPENAI_API_KEY", matches = ".+") +public class OpenAiImageApiStreamingIT { + + OpenAiImageApi openAiImageApi = OpenAiImageApi.builder() + .apiKey(new SimpleApiKey(System.getenv("OPENAI_API_KEY"))) + .build(); + + @Test + void streamImageWithGptImage1Mini() { + // Create a streaming request with partial images + // Using JPEG format to support compression < 100 + OpenAiImageRequest request = new OpenAiImageRequest("A simple red circle", + ImageModel.GPT_IMAGE_1_MINI.getValue(), 1, "medium", null, "1024x1024", null, "test-user", "opaque", + "auto", 90, "jpeg", 2, true); + + Flux eventStream = this.openAiImageApi.streamImage(request); + + // Collect all events + List events = new ArrayList<>(); + StepVerifier.create(eventStream) + .recordWith(() -> events) + .expectNextCount(3) // Expecting 2 partial images + 1 final image + .verifyComplete(); + + // Verify we received events + assertThat(events).isNotEmpty(); + assertThat(events.size()).isGreaterThanOrEqualTo(1); // At least the final image + + // Verify the final event is "image_generation.completed" + OpenAiImageStreamEvent finalEvent = events.get(events.size() - 1); + assertThat(finalEvent.type()).isEqualTo("image_generation.completed"); + assertThat(finalEvent.b64Json()).isNotEmpty(); + assertThat(finalEvent.usage()).isNotNull(); + assertThat(finalEvent.usage().totalTokens()).isPositive(); + + // Verify partial images if present + if (events.size() > 1) { + for (int i = 0; i < events.size() - 1; i++) { + OpenAiImageStreamEvent partialEvent = events.get(i); + assertThat(partialEvent.type()).isEqualTo("image_generation.partial_image"); + assertThat(partialEvent.b64Json()).isNotEmpty(); + assertThat(partialEvent.partialImageIndex()).isNotNull(); + } + } + } + + @Test + void streamImageWithGptImage1MiniOnePartialImage() { + // Create a streaming request with 1 partial image + // Note: Only GPT-Image models support streaming (not DALL-E) + OpenAiImageRequest request = new OpenAiImageRequest("A blue square", ImageModel.GPT_IMAGE_1_MINI.getValue(), 1, + "medium", null, "1024x1024", null, null, "opaque", "auto", 85, "jpeg", 1, true); + + Flux eventStream = this.openAiImageApi.streamImage(request); + + StepVerifier.create(eventStream).expectNextMatches(event -> { + // First event should be partial or final + boolean isValidType = "image_generation.partial_image".equals(event.type()) + || "image_generation.completed".equals(event.type()); + return isValidType && event.b64Json() != null && !event.b64Json().isEmpty(); + }).thenConsumeWhile(event -> event.type() != null && event.b64Json() != null).verifyComplete(); + } + + @Test + void streamImageWithNoPartialImages() { + // Create a streaming request with 0 partial images (only final image) + OpenAiImageRequest request = new OpenAiImageRequest("A green triangle", ImageModel.GPT_IMAGE_1_MINI.getValue(), + 1, "low", null, "1024x1024", null, "test-user", "auto", "auto", 80, "jpeg", 0, true); + + Flux eventStream = this.openAiImageApi.streamImage(request); + + // Collect all events + List events = new ArrayList<>(); + StepVerifier.create(eventStream) + .recordWith(() -> events) + .expectNextCount(1) // Only final image + .verifyComplete(); + + // Should only have one event - the completed one + assertThat(events).hasSize(1); + assertThat(events.get(0).type()).isEqualTo("image_generation.completed"); + assertThat(events.get(0).b64Json()).isNotEmpty(); + assertThat(events.get(0).usage()).isNotNull(); + } + + @Test + void streamImageVerifyMetadata() { + // Test that all metadata fields are populated correctly + // Using compression=100 for PNG (PNG only supports compression=100) + OpenAiImageRequest request = new OpenAiImageRequest("A yellow star", ImageModel.GPT_IMAGE_1_MINI.getValue(), 1, + "medium", null, "1024x1024", null, "test-user", "transparent", "auto", 100, "png", 1, true); + + Flux eventStream = this.openAiImageApi.streamImage(request); + + StepVerifier.create(eventStream.takeLast(1)) // Take only the final event + .assertNext(event -> { + assertThat(event.type()).isEqualTo("image_generation.completed"); + assertThat(event.b64Json()).isNotEmpty(); + assertThat(event.createdAt()).isPositive(); + assertThat(event.size()).isEqualTo("1024x1024"); + assertThat(event.quality()).isEqualTo("medium"); + assertThat(event.background()).isEqualTo("transparent"); + assertThat(event.outputFormat()).isEqualTo("png"); + assertThat(event.usage()).isNotNull(); + assertThat(event.usage().totalTokens()).isPositive(); + assertThat(event.usage().inputTokens()).isNotNull(); + assertThat(event.usage().outputTokens()).isNotNull(); + }) + .verifyComplete(); + } + +} diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/image/openai-image.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/image/openai-image.adoc index 3d541d2d7f3..3333c80f97b 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/image/openai-image.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/image/openai-image.adoc @@ -1,7 +1,7 @@ = OpenAI Image Generation -Spring AI supports DALL-E, the Image generation model from OpenAI. +Spring AI supports OpenAI's image generation models including GPT-Image-1, GPT-Image-1-Mini, DALL-E 3, and DALL-E 2. == Prerequisites @@ -133,15 +133,21 @@ The prefix `spring.ai.openai.image` is the property prefix that lets you configu | spring.ai.openai.image.api-key | Optional overrides the spring.ai.openai.api-key to provide chat specific api-key | - | spring.ai.openai.image.organization-id | Optionally you can specify which organization used for an API request. | - | spring.ai.openai.image.project-id | Optionally, you can specify which project is used for an API request. | - -| spring.ai.openai.image.options.n | The number of images to generate. Must be between 1 and 10. For dall-e-3, only n=1 is supported. | - -| spring.ai.openai.image.options.model | The model to use for image generation. | OpenAiImageApi.DEFAULT_IMAGE_MODEL -| spring.ai.openai.image.options.quality | The quality of the image that will be generated. HD creates images with finer details and greater consistency across the image. This parameter is only supported for dall-e-3. | - -| spring.ai.openai.image.options.response_format | The format in which the generated images are returned. Must be one of URL or b64_json. | - -| `spring.ai.openai.image.options.size` | The size of the generated images. Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. | - +| spring.ai.openai.image.options.n | The number of images to generate. Must be between 1 and 10. For dall-e-3 and gpt-image models, only n=1 is supported. | - +| spring.ai.openai.image.options.model | The model to use for image generation. Available models: `dall-e-3` (default), `dall-e-2`, `gpt-image-1`, `gpt-image-1-mini`. | dall-e-3 +| spring.ai.openai.image.options.quality | The quality of the image. For gpt-image-1: `high`, `medium`, `low`, or `auto` (default). For dall-e-3: `hd` or `standard`. | - +| spring.ai.openai.image.options.response_format | The format in which the generated images are returned. Must be one of `url` or `b64_json`. Only supported for dall-e-2 and dall-e-3. GPT-Image models always return base64-encoded images. | - +| `spring.ai.openai.image.options.size` | The size of the generated images. For gpt-image-1: `1024x1024`, `1536x1024`, `1024x1536`, or `auto` (default). For dall-e-2: `256x256`, `512x512`, or `1024x1024`. For dall-e-3: `1024x1024`, `1792x1024`, or `1024x1792`. | - | `spring.ai.openai.image.options.size_width` | The width of the generated images. Must be one of 256, 512, or 1024 for dall-e-2. | - | `spring.ai.openai.image.options.size_height`| The height of the generated images. Must be one of 256, 512, or 1024 for dall-e-2. | - -| `spring.ai.openai.image.options.style` | The style of the generated images. Must be one of vivid or natural. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This parameter is only supported for dall-e-3. | - +| `spring.ai.openai.image.options.style` | The style of the generated images. Must be one of `vivid` or `natural`. Vivid causes the model to lean towards generating hyper-real and dramatic images. Natural causes the model to produce more natural, less hyper-real looking images. This parameter is only supported for dall-e-3. | - | `spring.ai.openai.image.options.user` | A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. | - +| `spring.ai.openai.image.options.background` | The background type for the generated image. Must be one of `transparent`, `opaque`, or `auto` (default). Only supported for GPT-Image models. | - +| `spring.ai.openai.image.options.moderation` | The level of content moderation to apply. Must be one of `low` or `auto` (default). Only supported for GPT-Image models. | - +| `spring.ai.openai.image.options.output_compression` | The compression level for the output image. Integer between 0-100. Only supported for GPT-Image models. NOTE: Compression less than 100 is not supported for PNG output format. | - +| `spring.ai.openai.image.options.output_format` | The format of the output image. Must be one of `png`, `jpeg`, or `webp`. Only supported for GPT-Image models. | - +| `spring.ai.openai.image.options.partial_images` | The number of partial images to generate during streaming. Must be between 0 and 3. Only supported for GPT-Image models with streaming enabled. NOTE: Partial images are only supported with streaming (`stream=true`). | - +| `spring.ai.openai.image.options.stream` | Enable streaming image generation. When `true`, partial images are sent as they are generated. Only supported for GPT-Image models. | false |==== NOTE: You can override the common `spring.ai.openai.base-url`, `spring.ai.openai.api-key`, `spring.ai.openai.organization-id` and `spring.ai.openai.project-id` properties. @@ -164,7 +170,7 @@ For example to override the OpenAI specific options such as quality and the numb ImageResponse response = openaiImageModel.call( new ImagePrompt("A light cream colored mini golden doodle", OpenAiImageOptions.builder() - .quality("hd") + .model("dall-e-2") // dall-e-2 supports generating multiple images .N(4) .height(1024) .width(1024).build()) @@ -172,4 +178,94 @@ ImageResponse response = openaiImageModel.call( ); ---- +NOTE: Only DALL-E 2 supports generating multiple images (n > 1). For DALL-E 3 and GPT-Image models, n must be 1. + TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiImageOptions.java[OpenAiImageOptions] you can use a portable https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/image/ImageOptions.java[ImageOptions] instance, created with the https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/image/ImageOptionsBuilder.java[ImageOptionsBuilder#builder()]. + +== Streaming Image Generation + +The GPT-Image models (`gpt-image-1` and `gpt-image-1-mini`) support streaming image generation, which allows you to receive partial images as they are being generated. This provides a better user experience by showing progressive refinement of the image. + +NOTE: Streaming is only supported for GPT-Image models. DALL-E models do not support streaming. + +=== Basic Streaming Example + +[source,java] +---- +OpenAiImageOptions options = OpenAiImageOptions.builder() + .model("gpt-image-1-mini") + .stream(true) + .partialImages(2) // Request 2 partial images before the final image + .size("1024x1024") + .build(); + +ImagePrompt prompt = new ImagePrompt("A serene mountain landscape", options); + +// Stream the image generation +Flux imageStream = openAiImageModel.stream(prompt); + +// Subscribe and handle partial + final images +imageStream.subscribe(response -> { + Image image = response.getResult().getOutput(); + String b64Json = image.getB64Json(); + // Display or save the partial/final image + displayImage(b64Json); +}); +---- + +=== Streaming Parameters + +[cols="3,5,1"] +|==== +| Parameter | Description | Default + +| `stream` | Enable streaming mode. Must be `true` for streaming. | false +| `partialImages` | Number of partial images to send during generation. Must be between 0 and 3. Only valid when `stream=true`. | 0 +| `outputCompression` | Compression level for streamed images (0-100). Lower values mean faster streaming but larger file sizes. | - +| `outputFormat` | Format for streamed images: `png`, `jpeg`, or `webp`. | - +|==== + +IMPORTANT: The `partialImages` parameter must be between 0 and 3. Values above 3 will result in an error. + +IMPORTANT: Partial images are only supported with streaming enabled (`stream=true`). Setting `partialImages` without `stream=true` will result in an error. + +IMPORTANT: Compression levels below 100 are not supported for PNG output format. Use JPEG or WebP format if you need compression below 100. + +=== Understanding Partial Images + +- **partialImages = 0**: Only the final completed image is streamed +- **partialImages = 1**: One intermediate image is sent, followed by the final image +- **partialImages = 2**: Two intermediate images are sent, followed by the final image +- **partialImages = 3**: Three intermediate images are sent, followed by the final image (maximum) + +Each partial image shows progressive refinement, allowing you to display an increasingly detailed preview to users before the final high-quality image is ready. + +=== Advanced Streaming Example + +[source,java] +---- +OpenAiImageOptions options = OpenAiImageOptions.builder() + .model("gpt-image-1") + .stream(true) + .partialImages(3) + .size("1536x1024") + .quality("high") + .background("transparent") + .outputFormat("jpeg") + .outputCompression(85) + .build(); + +ImagePrompt prompt = new ImagePrompt("A futuristic cityscape at sunset", options); + +Flux imageStream = openAiImageModel.stream(prompt); + +// Handle each image progressively +imageStream + .doOnNext(response -> { + Image image = response.getResult().getOutput(); + System.out.println("Received image chunk"); + updatePreview(image.getB64Json()); + }) + .doOnComplete(() -> System.out.println("Image generation complete")) + .subscribe(); +---- diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/imageclient.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/imageclient.adoc index bd8d3a001bd..9bbfb9f4137 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/imageclient.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/imageclient.adoc @@ -28,6 +28,38 @@ public interface ImageModel extends Model { } ---- +=== Streaming Image Model + +For models that support streaming image generation, Spring AI provides the link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/image/StreamingImageModel.java[StreamingImageModel] interface: + +[source,java] +---- +@FunctionalInterface +public interface StreamingImageModel extends StreamingModel { + + Flux stream(ImagePrompt prompt); + +} +---- + +The `StreamingImageModel` interface allows AI models to generate images progressively, emitting partial images as they are being created. This provides a better user experience by showing incremental progress rather than waiting for the final image. + +**Example Usage:** + +[source,java] +---- +ImageOptions options = OpenAiImageOptions.builder() + .model("gpt-image-1-mini") + .stream(true) + .partialImages(2) + .build(); + +ImagePrompt prompt = new ImagePrompt("A mountain landscape", options); +Flux stream = streamingImageModel.stream(prompt); +---- + +IMPORTANT: Streaming image generation is currently only supported by OpenAI's GPT-Image models (`gpt-image-1` and `gpt-image-1-mini`). + === ImagePrompt The https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/image/ImagePrompt.java[ImagePrompt] is a `ModelRequest` that encapsulates a list of https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/image/ImageMessage.java[ImageMessage] objects and optional model request options. diff --git a/spring-ai-model/src/main/java/org/springframework/ai/image/StreamingImageModel.java b/spring-ai-model/src/main/java/org/springframework/ai/image/StreamingImageModel.java new file mode 100644 index 00000000000..3360e417229 --- /dev/null +++ b/spring-ai-model/src/main/java/org/springframework/ai/image/StreamingImageModel.java @@ -0,0 +1,69 @@ +/* + * Copyright 2023-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.image; + +import reactor.core.publisher.Flux; + +import org.springframework.ai.model.StreamingModel; + +/** + * Reactive API for streaming image generation. Emits progressive image updates during + * generation, enabling real-time user feedback. + * + *

+ * The stream emits partial images (if configured) followed by the final complete image. + * Partial images are best-effort and model-dependent. + *

+ * + *

+ * Example usage: + *

+ * + *
{@code
+ * ImageOptions options = OpenAiImageOptions.builder()
+ *     .model("gpt-image-1-mini")
+ *     .stream(true)
+ *     .partialImages(2)
+ *     .build();
+ *
+ * ImagePrompt prompt = new ImagePrompt("A serene mountain landscape", options);
+ * Flux stream = streamingImageModel.stream(prompt);
+ *
+ * stream.subscribe(response -> {
+ *     Image image = response.getResult().getOutput();
+ *     displayImage(image.getB64Json());
+ * });
+ * }
+ * + * @author Alexandros Pappas + * @since 1.1.0 + * @see ImageModel + * @see ImagePrompt + * @see ImageResponse + */ +@FunctionalInterface +public interface StreamingImageModel extends StreamingModel { + + /** + * Streams image generation responses for the given prompt. + * @param request the image prompt with generation options + * @return reactive stream emitting partial images (if configured) and the final image + */ + @Override + Flux stream(ImagePrompt request); + +}