From feedfc6c7b4f08128d3ec2b918beeccb8dfb09e3 Mon Sep 17 00:00:00 2001 From: arlaneenalra Date: Mon, 24 Nov 2025 19:12:33 -0600 Subject: [PATCH] Fix missing "thinking" key in Ollama message metadata when streaming. Signed-off-by: arlaneenalra --- .../ai/ollama/OllamaChatModel.java | 10 +++- .../ollama/OllamaChatModelMetadataTests.java | 57 +++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java index a0a4f09dec2..d621d202ac3 100644 --- a/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java +++ b/models/spring-ai-ollama/src/main/java/org/springframework/ai/ollama/OllamaChatModel.java @@ -345,12 +345,16 @@ private Flux internalStream(Prompt prompt, ChatResponse previousCh .toolCalls(toolCalls) .build(); - ChatGenerationMetadata generationMetadata = ChatGenerationMetadata.NULL; + ChatGenerationMetadata.Builder generationMetadataBuilder = ChatGenerationMetadata.builder(); if (chunk.promptEvalCount() != null && chunk.evalCount() != null) { - generationMetadata = ChatGenerationMetadata.builder().finishReason(chunk.doneReason()).build(); + generationMetadataBuilder.finishReason(chunk.doneReason()); } - var generator = new Generation(assistantMessage, generationMetadata); + if (chunk.message() != null && chunk.message().thinking() != null) { + generationMetadataBuilder.metadata("thinking", chunk.message().thinking()); + } + + var generator = new Generation(assistantMessage, generationMetadataBuilder.build()); return new ChatResponse(List.of(generator), from(chunk, previousChatResponse)); }); diff --git a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java index b8ccdbbbbbc..3fdbc374dfc 100644 --- a/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java +++ b/models/spring-ai-ollama/src/test/java/org/springframework/ai/ollama/OllamaChatModelMetadataTests.java @@ -16,9 +16,12 @@ package org.springframework.ai.ollama; +import java.util.concurrent.atomic.AtomicBoolean; + import io.micrometer.observation.tck.TestObservationRegistry; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import reactor.core.publisher.Flux; import org.springframework.ai.chat.metadata.ChatGenerationMetadata; import org.springframework.ai.chat.model.ChatResponse; @@ -70,6 +73,60 @@ void ollamaThinkingMetadataCaptured() { }); } + @Test + void ollamaThinkingMedataCapturedWhenStreaming() { + var options = OllamaChatOptions.builder().model(MODEL).enableThinking().build(); + var response = new StringBuilder(); + var thinking = new StringBuilder(); + var foundThinking = new AtomicBoolean(false); + + Prompt prompt = new Prompt("Why is the sky blue?", options); + + Flux chatResponse = this.chatModel.stream(prompt); + var captured = chatResponse.collectList().block(); + + assertThat(captured).isNotEmpty(); + + captured.forEach(chunk -> { + ChatGenerationMetadata chatGenerationMetadata = chunk.getResult().getMetadata(); + assertThat(chatGenerationMetadata).isNotNull(); + + if (chatGenerationMetadata.containsKey("thinking") && chatGenerationMetadata.get("thinking") != null) { + foundThinking.set(true); + thinking.append(chatGenerationMetadata.get("thinking").toString()); + } + + response.append(chunk.getResult().getOutput().getText()); + }); + + assertThat(response.toString()).isNotEmpty(); + assertThat(thinking.toString()).isNotEmpty(); + } + + @Test + void ollamaThinkingMedataNotCapturedWhenStreamingWhenSetThinkingToFalse() { + var options = OllamaChatOptions.builder().model(MODEL).disableThinking().build(); + var response = new StringBuilder(); + + Prompt prompt = new Prompt("Why is the sky blue?", options); + + Flux chatResponse = this.chatModel.stream(prompt); + var captured = chatResponse.collectList().block(); + + assertThat(captured).isNotEmpty(); + + captured.forEach(chunk -> { + ChatGenerationMetadata chatGenerationMetadata = chunk.getResult().getMetadata(); + assertThat(chatGenerationMetadata).isNotNull(); + var thinking = chatGenerationMetadata.get("thinking"); + assertThat(thinking).isNull(); + + response.append(chunk.getResult().getOutput().getText()); + }); + + assertThat(response.toString()).isNotEmpty(); + } + @Test void ollamaThinkingMetadataNotCapturedWhenSetThinkFlagToFalse() { // Note: Thinking-capable models (e.g., qwen3:*) auto-enable thinking by default