openai · seratch · Nov 18, 2025 · Oct 30, 2025 · Nov 14, 2025 · Nov 14, 2025
diff --git a/src/agents/items.py b/src/agents/items.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 import abc
-from dataclasses import dataclass
+import weakref
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union, cast
 
 import pydantic
@@ -84,6 +85,29 @@ class RunItemBase(Generic[T], abc.ABC):
     (i.e. `openai.types.responses.ResponseInputItemParam`).
     """
 
+    _agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        # Store a weak reference so we can release the strong reference later if desired.
+        self._agent_ref = weakref.ref(self.agent)
+
+    def __getattr__(self, name: str) -> Any:
+        if name == "agent":
+            return self._agent_ref() if self._agent_ref else None
+        raise AttributeError(name)
+
+    def release_agent(self) -> None:
+        """Release the strong reference to the agent while keeping a weak reference."""
+        if "agent" not in self.__dict__:
+            return
+        agent = self.__dict__["agent"]
+        self._agent_ref = weakref.ref(agent) if agent is not None else None
+        object.__delattr__(self, "agent")
+
     def to_input_item(self) -> TResponseInputItem:
         """Converts this item into an input item suitable for passing to the model."""
         if isinstance(self.raw_item, dict):
@@ -131,6 +155,41 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
 
     type: Literal["handoff_output_item"] = "handoff_output_item"
 
+    _source_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+    _target_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        super().__post_init__()
+        # Maintain weak references so downstream code can release the strong references when safe.
+        self._source_agent_ref = weakref.ref(self.source_agent)
+        self._target_agent_ref = weakref.ref(self.target_agent)
+
+    def __getattr__(self, name: str) -> Any:
+        if name == "source_agent":
+            return self._source_agent_ref() if self._source_agent_ref else None
+        if name == "target_agent":
+            return self._target_agent_ref() if self._target_agent_ref else None
+        return super().__getattr__(name)
+
+    def release_agent(self) -> None:
+        super().release_agent()
+        if "source_agent" in self.__dict__:
+            source_agent = self.__dict__["source_agent"]
+            self._source_agent_ref = weakref.ref(source_agent) if source_agent is not None else None
+            object.__delattr__(self, "source_agent")
+        if "target_agent" in self.__dict__:
+            target_agent = self.__dict__["target_agent"]
+            self._target_agent_ref = weakref.ref(target_agent) if target_agent is not None else None
+            object.__delattr__(self, "target_agent")
+
 
 ToolCallItemTypes: TypeAlias = Union[
     ResponseFunctionToolCall,

diff --git a/src/agents/result.py b/src/agents/result.py
@@ -2,6 +2,7 @@
 
 import abc
 import asyncio
+import weakref
 from collections.abc import AsyncIterator
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -74,6 +75,32 @@ class RunResultBase(abc.ABC):
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
 
+    def release_agents(self) -> None:
+        """
+        Release strong references to agents held by this result. After calling this method,
+        accessing `item.agent` or `last_agent` may return `None` if the agent has been garbage
+        collected. Callers can use this when they are done inspecting the result and want to
+        eagerly drop any associated agent graph.
+        """
+        for item in self.new_items:
+            release = getattr(item, "release_agent", None)
+            if callable(release):
+                release()
+        self._release_last_agent_reference()
+
+    def __del__(self) -> None:
+        try:
+            # Fall back to releasing agents automatically in case the caller never invoked
+            # `release_agents()` explicitly. This keeps the no-leak guarantee confirmed by tests.
+            self.release_agents()
+        except Exception:
+            # Avoid raising from __del__.
+            pass
+
+    @abc.abstractmethod
+    def _release_last_agent_reference(self) -> None:
+        """Release stored agent reference specific to the concrete result type."""
+
     def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) -> T:
         """A convenience method to cast the final output to a specific type. By default, the cast
         is only for the typechecker. If you set `raise_if_incorrect_type` to True, we'll raise a
@@ -111,11 +138,33 @@ def last_response_id(self) -> str | None:
 @dataclass
 class RunResult(RunResultBase):
     _last_agent: Agent[Any]
+    _last_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        self._last_agent_ref = weakref.ref(self._last_agent)
 
     @property
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
-        return self._last_agent
+        agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
+        if agent is not None:
+            return agent
+        if self._last_agent_ref:
+            agent = self._last_agent_ref()
+            if agent is not None:
+                return agent
+        raise AgentsException("Last agent reference is no longer available.")
+
+    def _release_last_agent_reference(self) -> None:
+        agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
+        if agent is None:
+            return
+        self._last_agent_ref = weakref.ref(agent)
+        object.__delattr__(self, "_last_agent")
 
     def __str__(self) -> str:
         return pretty_print_result(self)
@@ -150,6 +199,12 @@ class RunResultStreaming(RunResultBase):
     is_complete: bool = False
     """Whether the agent has finished running."""
 
+    _current_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
     # Queues that the background run_loop writes to
     _event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] = field(
         default_factory=asyncio.Queue, repr=False
@@ -167,12 +222,29 @@ class RunResultStreaming(RunResultBase):
     # Soft cancel state
     _cancel_mode: Literal["none", "immediate", "after_turn"] = field(default="none", repr=False)
 
+    def __post_init__(self) -> None:
+        self._current_agent_ref = weakref.ref(self.current_agent)
+
     @property
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run. Updates as the agent run progresses, so the true last agent
         is only available after the agent run is complete.
         """
-        return self.current_agent
+        agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
+        if agent is not None:
+            return agent
+        if self._current_agent_ref:
+            agent = self._current_agent_ref()
+            if agent is not None:
+                return agent
+        raise AgentsException("Last agent reference is no longer available.")
+
+    def _release_last_agent_reference(self) -> None:
+        agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
+        if agent is None:
+            return
+        self._current_agent_ref = weakref.ref(agent)
+        object.__delattr__(self, "current_agent")
 
     def cancel(self, mode: Literal["immediate", "after_turn"] = "immediate") -> None:
         """Cancel the streaming run.

diff --git a/tests/test_agent_memory_leak.py b/tests/test_agent_memory_leak.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+import gc
+import weakref
+
+import pytest
+from openai.types.responses import ResponseOutputMessage, ResponseOutputText
+
+from agents import Agent, Runner
+from tests.fake_model import FakeModel
+
+
+def _make_message(text: str) -> ResponseOutputMessage:
+    return ResponseOutputMessage(
+        id="msg-1",
+        content=[ResponseOutputText(annotations=[], text=text, type="output_text")],
+        role="assistant",
+        status="completed",
+        type="message",
+    )
+
+
+@pytest.mark.asyncio
+async def test_agent_is_released_after_run() -> None:
+    fake_model = FakeModel(initial_output=[_make_message("Paris")])
+    agent = Agent(name="leak-test-agent", instructions="Answer questions.", model=fake_model)
+    agent_ref = weakref.ref(agent)
+
+    # Running the agent should not leave behind strong references once the result goes out of scope.
+    await Runner.run(agent, "What is the capital of France?")
+
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import gc
 import json
 
 from openai.types.responses.response_computer_tool_call import (
@@ -148,6 +149,19 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
     assert ItemHelpers.text_message_outputs([item1, non_message_item, item2]) == "foobar"
 
 
+def test_message_output_item_retains_agent_until_release() -> None:
+    # Construct the run item with an inline agent to ensure the run item keeps a strong reference.
+    message = make_message([ResponseOutputText(annotations=[], text="hello", type="output_text")])
+    item = MessageOutputItem(agent=Agent(name="inline"), raw_item=message)
+    assert item.agent is not None
+    assert item.agent.name == "inline"
+
+    # After explicitly releasing, the weak reference should drop once GC runs.
+    item.release_agent()
+    gc.collect()
+    assert item.agent is None
+
+
 def test_tool_call_output_item_constructs_function_call_output_dict():
     # Build a simple ResponseFunctionToolCall.
     call = ResponseFunctionToolCall(

diff --git a/tests/test_result_cast.py b/tests/test_result_cast.py
@@ -1,9 +1,13 @@
+import gc
+import weakref
 from typing import Any
 
 import pytest
+from openai.types.responses import ResponseOutputMessage, ResponseOutputText
 from pydantic import BaseModel
 
-from agents import Agent, RunContextWrapper, RunResult
+from agents import Agent, MessageOutputItem, RunContextWrapper, RunResult
+from agents.exceptions import AgentsException
 
 
 def create_run_result(final_output: Any) -> RunResult:
@@ -59,3 +63,39 @@ def test_bad_cast_with_param_raises():
     result = create_run_result(Foo(bar=1))
     with pytest.raises(TypeError):
         result.final_output_as(int, raise_if_incorrect_type=True)
+
+
+def test_run_result_release_agents_breaks_strong_refs() -> None:
+    message = ResponseOutputMessage(
+        id="msg",
+        content=[ResponseOutputText(annotations=[], text="hello", type="output_text")],
+        role="assistant",
+        status="completed",
+        type="message",
+    )
+    agent = Agent(name="leak-test-agent")
+    item = MessageOutputItem(agent=agent, raw_item=message)
+    result = RunResult(
+        input="test",
+        new_items=[item],
+        raw_responses=[],
+        final_output=None,
+        input_guardrail_results=[],
+        output_guardrail_results=[],
+        tool_input_guardrail_results=[],
+        tool_output_guardrail_results=[],
+        _last_agent=agent,
+        context_wrapper=RunContextWrapper(context=None),
+    )
+    assert item.agent is not None
+    assert item.agent.name == "leak-test-agent"
+
+    agent_ref = weakref.ref(agent)
+    result.release_agents()
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
+    assert item.agent is None
+    with pytest.raises(AgentsException):
+        _ = result.last_agent