Add run_stream_sync

ajac-zero · ajac-zero · commit ccaf87f2afda · 2025-10-12T11:14:57.000-06:00
diff --git a/pydantic_ai_slim/pydantic_ai/agent/abstract.py b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
@@ -569,6 +569,86 @@ async def on_complete() -> None:
         if not yielded:
             raise exceptions.AgentRunError('Agent run finished without producing a final result')  # pragma: no cover
 
+    @contextmanager
+    def run_stream_sync(
+        self,
+        user_prompt: str | Sequence[_messages.UserContent] | None = None,
+        *,
+        output_type: OutputSpec[RunOutputDataT] | None = None,
+        message_history: Sequence[_messages.ModelMessage] | None = None,
+        deferred_tool_results: DeferredToolResults | None = None,
+        model: models.Model | models.KnownModelName | str | None = None,
+        deps: AgentDepsT = None,
+        model_settings: ModelSettings | None = None,
+        usage_limits: _usage.UsageLimits | None = None,
+        usage: _usage.RunUsage | None = None,
+        infer_name: bool = True,
+        toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
+        builtin_tools: Sequence[AbstractBuiltinTool] | None = None,
+        event_stream_handler: EventStreamHandler[AgentDepsT] | None = None,
+    ) -> Iterator[result.CollectedRunResult[AgentDepsT, Any]]:
+        """Run the agent with a user prompt in collected streaming mode.
+
+        This method builds an internal agent graph (using system prompts, tools and output schemas) and then
+        runs the graph until the model produces output matching the `output_type`, for example text or structured data.
+        At this point, a streaming run result object is collected and -- once this output has completed streaming -- you can iterate over the complete output, message history, and usage.
+
+        As this method will consider the first output matching the `output_type` to be the final output,
+        it will stop running the agent graph and will not execute any tool calls made by the model after this "final" output.
+        If you want to always run the agent graph to completion and stream events and output at the same time,
+        use [`agent.run()`][pydantic_ai.agent.AbstractAgent.run] with an `event_stream_handler` or [`agent.iter()`][pydantic_ai.agent.AbstractAgent.iter] instead.
+
+        Example:
+        ```python
+        from pydantic_ai import Agent
+
+        agent = Agent('openai:gpt-4o')
+
+        def main():
+            with agent.run_stream_sync('What is the capital of the UK?') as response:
+                print(response.get_output())
+                #> The capital of the UK is London.
+        ```
+
+        Args:
+            user_prompt: User input to start/continue the conversation.
+            output_type: Custom output type to use for this run, `output_type` may only be used if the agent has no
+                output validators since output validators would expect an argument that matches the agent's output type.
+            message_history: History of the conversation so far.
+            deferred_tool_results: Optional results for deferred tool calls in the message history.
+            model: Optional model to use for this run, required if `model` was not set when creating the agent.
+            deps: Optional dependencies to use for this run.
+            model_settings: Optional settings to use for this model's request.
+            usage_limits: Optional limits on model request count or token usage.
+            usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
+            infer_name: Whether to try to infer the agent name from the call frame if it's not set.
+            toolsets: Optional additional toolsets for this run.
+            builtin_tools: Optional additional builtin tools for this run.
+            event_stream_handler: Optional handler for events from the model's streaming response and the agent's execution of tools to use for this run.
+                It will receive all the events up until the final result is found, which you can then read or stream from inside the context manager.
+                Note that it does _not_ receive any events after the final result is found.
+
+        Returns:
+            The result of the run.
+        """
+        async_cm = self.run_stream(
+            user_prompt,
+            output_type=output_type,
+            message_history=message_history,
+            deferred_tool_results=deferred_tool_results,
+            model=model,
+            deps=deps,
+            model_settings=model_settings,
+            usage_limits=usage_limits,
+            usage=usage,
+            infer_name=infer_name,
+            toolsets=toolsets,
+            builtin_tools=builtin_tools,
+            event_stream_handler=event_stream_handler,
+        )
+        async_result = get_event_loop().run_until_complete(async_cm.__aenter__())
+        yield result.CollectedRunResult.from_streamed_result(async_result)  # type: ignore[reportReturnType]
+
     @overload
     def run_stream_events(
         self,
diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py
@@ -1,6 +1,6 @@
 from __future__ import annotations as _annotations
 
-from collections.abc import AsyncIterator, Awaitable, Callable, Iterable
+from collections.abc import AsyncIterator, Awaitable, Callable, Iterable, Iterator
 from copy import deepcopy
 from dataclasses import dataclass, field
 from datetime import datetime
@@ -9,6 +9,8 @@
 from pydantic import ValidationError
 from typing_extensions import TypeVar, deprecated
 
+from pydantic_graph._utils import get_event_loop
+
 from . import _utils, exceptions, messages as _messages, models
 from ._output import (
     OutputDataT_inv,
@@ -543,6 +545,85 @@ async def _marked_completed(self, message: _messages.ModelResponse | None = None
             await self._on_complete()
 
 
+@dataclass(init=False)
+class CollectedRunResult(StreamedRunResult[AgentDepsT, OutputDataT]):
+    """Provides a synchronous API over 'StreamedRunResult' by eagerly loading the stream."""
+
+    @classmethod
+    def from_streamed_result(
+        cls, streamed_run_result: StreamedRunResult[AgentDepsT, OutputDataT]
+    ) -> CollectedRunResult[AgentDepsT, OutputDataT]:
+        """Create a CollectedRunResult from an existing StreamedRunResult."""
+        instance = cls.__new__(cls)
+
+        instance._all_messages = streamed_run_result._all_messages
+        instance._new_message_index = streamed_run_result._new_message_index
+        instance._stream_response = streamed_run_result._stream_response
+        instance._on_complete = streamed_run_result._on_complete
+        instance._run_result = streamed_run_result._run_result
+        instance.is_complete = streamed_run_result.is_complete
+
+        return instance
+
+    def _collect_async_iterator(self, async_iter: AsyncIterator[T]) -> list[T]:
+        async def collect():
+            return [item async for item in async_iter]
+
+        return get_event_loop().run_until_complete(collect())
+
+    def stream_output(self, *, debounce_by: float | None = 0.1) -> Iterator[OutputDataT]:  # type: ignore[reportIncompatibleMethodOverride]
+        """Collect and stream the output as an iterable.
+
+        The pydantic validator for structured data will be called in
+        [partial mode](https://docs.pydantic.dev/dev/concepts/experimental/#partial-validation)
+        on each iteration.
+
+        Args:
+            debounce_by: by how much (if at all) to debounce/group the output chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured outputs to reduce the overhead of
+                performing validation as each token is received.
+
+        Returns:
+            An iterable of the response data.
+        """
+        async_stream = super().stream_output(debounce_by=debounce_by)
+        yield from self._collect_async_iterator(async_stream)
+
+    def stream_text(self, *, delta: bool = False, debounce_by: float | None = 0.1) -> Iterator[str]:  # type: ignore[reportIncompatibleMethodOverride]
+        """Collect and stream the text result as an iterable.
+
+        !!! note
+            Result validators will NOT be called on the text result if `delta=True`.
+
+        Args:
+            delta: if `True`, yield each chunk of text as it is received, if `False` (default), yield the full text
+                up to the current point.
+            debounce_by: by how much (if at all) to debounce/group the response chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured responses to reduce the overhead of
+                performing validation as each token is received.
+        """
+        async_stream = super().stream_text(delta=delta, debounce_by=debounce_by)
+        yield from self._collect_async_iterator(async_stream)
+
+    def stream_responses(self, *, debounce_by: float | None = 0.1) -> Iterator[tuple[_messages.ModelResponse, bool]]:  # type: ignore[reportIncompatibleMethodOverride]
+        """Collect and stream the response as an iterable of Structured LLM Messages.
+
+        Args:
+            debounce_by: by how much (if at all) to debounce/group the response chunks by. `None` means no debouncing.
+                Debouncing is particularly important for long structured responses to reduce the overhead of
+                performing validation as each token is received.
+
+        Returns:
+            An iterable of the structured response message and whether that is the last message.
+        """
+        async_stream = super().stream_responses(debounce_by=debounce_by)
+        yield from self._collect_async_iterator(async_stream)
+
+    def get_output(self) -> OutputDataT:  # type: ignore[reportIncompatibleMethodOverride]
+        """Stream the whole response, validate and return it."""
+        return get_event_loop().run_until_complete(super().get_output())
+
+
 @dataclass(repr=False)
 class FinalResult(Generic[OutputDataT]):
     """Marker class storing the final output of an agent run and associated metadata."""
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
@@ -134,6 +134,86 @@ async def ret_a(x: str) -> str:
         )
 
 
+def test_streamed_text_sync_response():
+    m = TestModel()
+
+    test_agent = Agent(m)
+    assert test_agent.name is None
+
+    @test_agent.tool_plain
+    async def ret_a(x: str) -> str:
+        return f'{x}-apple'
+
+    with test_agent.run_stream_sync('Hello') as result:
+        # assert test_agent.name == 'test_agent'
+        assert not result.is_complete
+        assert result.all_messages() == snapshot(
+            [
+                ModelRequest(parts=[UserPromptPart(content='Hello', timestamp=IsNow(tz=timezone.utc))]),
+                ModelResponse(
+                    parts=[ToolCallPart(tool_name='ret_a', args={'x': 'a'}, tool_call_id=IsStr())],
+                    usage=RequestUsage(input_tokens=51),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelRequest(
+                    parts=[
+                        ToolReturnPart(
+                            tool_name='ret_a', content='a-apple', timestamp=IsNow(tz=timezone.utc), tool_call_id=IsStr()
+                        )
+                    ]
+                ),
+            ]
+        )
+        assert result.usage() == snapshot(
+            RunUsage(
+                requests=2,
+                input_tokens=103,
+                output_tokens=5,
+                tool_calls=1,
+            )
+        )
+        response = result.get_output()
+        assert response == snapshot('{"ret_a":"a-apple"}')
+        assert result.is_complete
+        assert result.timestamp() == IsNow(tz=timezone.utc)
+        assert result.all_messages() == snapshot(
+            [
+                ModelRequest(parts=[UserPromptPart(content='Hello', timestamp=IsNow(tz=timezone.utc))]),
+                ModelResponse(
+                    parts=[ToolCallPart(tool_name='ret_a', args={'x': 'a'}, tool_call_id=IsStr())],
+                    usage=RequestUsage(input_tokens=51),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelRequest(
+                    parts=[
+                        ToolReturnPart(
+                            tool_name='ret_a', content='a-apple', timestamp=IsNow(tz=timezone.utc), tool_call_id=IsStr()
+                        )
+                    ]
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='{"ret_a":"a-apple"}')],
+                    usage=RequestUsage(input_tokens=52, output_tokens=11),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+            ]
+        )
+        assert result.usage() == snapshot(
+            RunUsage(
+                requests=2,
+                input_tokens=103,
+                output_tokens=11,
+                tool_calls=1,
+            )
+        )
+
+
 async def test_streamed_structured_response():
     m = TestModel()
 
@@ -302,6 +382,120 @@ def upcase(text: str) -> str:
         )
 
 
+def test_streamed_text_stream_sync():
+    m = TestModel(custom_output_text='The cat sat on the mat.')
+
+    agent = Agent(m)
+
+    with agent.run_stream_sync('Hello') as result:
+        # typehint to test (via static typing) that the stream type is correctly inferred
+        chunks: list[str] = [c for c in result.stream_text()]
+        # one chunk with `stream_text()` due to group_by_temporal
+        assert chunks == snapshot(['The cat sat on the mat.'])
+        assert result.is_complete
+
+    with agent.run_stream_sync('Hello') as result:
+        # typehint to test (via static typing) that the stream type is correctly inferred
+        chunks: list[str] = [c for c in result.stream_output()]
+        # two chunks with `stream()` due to not-final vs. final
+        assert chunks == snapshot(['The cat sat on the mat.', 'The cat sat on the mat.'])
+        assert result.is_complete
+
+    with agent.run_stream_sync('Hello') as result:
+        assert [c for c in result.stream_text(debounce_by=None)] == snapshot(
+            [
+                'The ',
+                'The cat ',
+                'The cat sat ',
+                'The cat sat on ',
+                'The cat sat on the ',
+                'The cat sat on the mat.',
+            ]
+        )
+
+    with agent.run_stream_sync('Hello') as result:
+        # with stream_text, there is no need to do partial validation, so we only get the final message once:
+        assert [c for c in result.stream_text(delta=False, debounce_by=None)] == snapshot(
+            ['The ', 'The cat ', 'The cat sat ', 'The cat sat on ', 'The cat sat on the ', 'The cat sat on the mat.']
+        )
+
+    with agent.run_stream_sync('Hello') as result:
+        assert [c for c in result.stream_text(delta=True, debounce_by=None)] == snapshot(
+            ['The ', 'cat ', 'sat ', 'on ', 'the ', 'mat.']
+        )
+
+    def upcase(text: str) -> str:
+        return text.upper()
+
+    with agent.run_stream_sync('Hello', output_type=TextOutput(upcase)) as result:
+        assert [c for c in result.stream_output(debounce_by=None)] == snapshot(
+            [
+                'THE ',
+                'THE CAT ',
+                'THE CAT SAT ',
+                'THE CAT SAT ON ',
+                'THE CAT SAT ON THE ',
+                'THE CAT SAT ON THE MAT.',
+                'THE CAT SAT ON THE MAT.',
+            ]
+        )
+
+    with agent.run_stream_sync('Hello') as result:
+        assert [c for c, _is_last in result.stream_responses(debounce_by=None)] == snapshot(
+            [
+                ModelResponse(
+                    parts=[TextPart(content='The ')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=1),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='The cat ')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=2),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='The cat sat ')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=3),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='The cat sat on ')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=4),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='The cat sat on the ')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=5),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='The cat sat on the mat.')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=7),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+                ModelResponse(
+                    parts=[TextPart(content='The cat sat on the mat.')],
+                    usage=RequestUsage(input_tokens=51, output_tokens=7),
+                    model_name='test',
+                    timestamp=IsNow(tz=timezone.utc),
+                    provider_name='test',
+                ),
+            ]
+        )
+
+
 async def test_plain_response():
     call_index = 0