From bc352ccef085cef39280ce0b19ffbf7678199348 Mon Sep 17 00:00:00 2001
From: Daniele Morotti <58258368+DanieleMorotti@users.noreply.github.com>
Date: Thu, 30 Oct 2025 12:16:09 +0100
Subject: [PATCH 1/3] Fix agent memory leak with weakref

---
 src/agents/items.py             | 45 ++++++++++++++++++++++++++++++++-
 tests/test_agent_memory_leak.py | 34 +++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_agent_memory_leak.py

diff --git a/src/agents/items.py b/src/agents/items.py
index 8e7d1cfc3..96f9577a9 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 import abc
-from dataclasses import dataclass
+import weakref
+from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union
 
 import pydantic
@@ -84,6 +85,22 @@ class RunItemBase(Generic[T], abc.ABC):
     (i.e. `openai.types.responses.ResponseInputItemParam`).
     """
 
+    _agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        # Store the producing agent weakly to avoid keeping it alive after the run.
+        self._agent_ref = weakref.ref(self.agent)
+        object.__delattr__(self, "agent")
+
+    def __getattr__(self, name: str) -> Any:
+        if name == "agent":
+            return self._agent_ref() if self._agent_ref else None
+        raise AttributeError(name)
+
     def to_input_item(self) -> TResponseInputItem:
         """Converts this item into an input item suitable for passing to the model."""
         if isinstance(self.raw_item, dict):
@@ -131,6 +148,32 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
 
     type: Literal["handoff_output_item"] = "handoff_output_item"
 
+    _source_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+    _target_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        super().__post_init__()
+        # Handoff metadata should not hold strong references to the agents either.
+        self._source_agent_ref = weakref.ref(self.source_agent)
+        self._target_agent_ref = weakref.ref(self.target_agent)
+        object.__delattr__(self, "source_agent")
+        object.__delattr__(self, "target_agent")
+
+    def __getattr__(self, name: str) -> Any:
+        if name == "source_agent":
+            return self._source_agent_ref() if self._source_agent_ref else None
+        if name == "target_agent":
+            return self._target_agent_ref() if self._target_agent_ref else None
+        return super().__getattr__(name)
+
 
 ToolCallItemTypes: TypeAlias = Union[
     ResponseFunctionToolCall,
diff --git a/tests/test_agent_memory_leak.py b/tests/test_agent_memory_leak.py
new file mode 100644
index 000000000..6690d2189
--- /dev/null
+++ b/tests/test_agent_memory_leak.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+import gc
+import weakref
+
+import pytest
+from openai.types.responses import ResponseOutputMessage, ResponseOutputText
+
+from agents import Agent, Runner
+from tests.fake_model import FakeModel
+
+
+def _make_message(text: str) -> ResponseOutputMessage:
+    return ResponseOutputMessage(
+        id="msg-1",
+        content=[ResponseOutputText(annotations=[], text=text, type="output_text")],
+        role="assistant",
+        status="completed",
+        type="message",
+    )
+
+
+@pytest.mark.asyncio
+async def test_agent_is_released_after_run() -> None:
+    fake_model = FakeModel(initial_output=[_make_message("Paris")])
+    agent = Agent(name="leaker", instructions="Answer questions.", model=fake_model)
+    agent_ref = weakref.ref(agent)
+
+    await Runner.run(agent, "What is the capital of France?")
+
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None

From a228bdf567f2f6cef769980cd653042c2f002ebe Mon Sep 17 00:00:00 2001
From: Daniele Morotti <58258368+DanieleMorotti@users.noreply.github.com>
Date: Fri, 14 Nov 2025 13:05:10 +0100
Subject: [PATCH 2/3] Add suggested corrections

---
 src/agents/items.py             | 64 ++++++++++++++++++++++-----
 src/agents/result.py            | 76 ++++++++++++++++++++++++++++++++-
 tests/test_agent_memory_leak.py |  3 +-
 tests/test_items_helpers.py     | 44 ++++++++++++++-----
 tests/test_result_cast.py       | 42 +++++++++++++++++-
 5 files changed, 204 insertions(+), 25 deletions(-)

diff --git a/src/agents/items.py b/src/agents/items.py
index 96f9577a9..ab6f1f790 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -3,7 +3,7 @@
 import abc
 import weakref
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union, cast
 
 import pydantic
 from openai.types.responses import (
@@ -92,15 +92,22 @@ class RunItemBase(Generic[T], abc.ABC):
     )
 
     def __post_init__(self) -> None:
-        # Store the producing agent weakly to avoid keeping it alive after the run.
+        # Store a weak reference so we can release the strong reference later if desired.
         self._agent_ref = weakref.ref(self.agent)
-        object.__delattr__(self, "agent")
 
     def __getattr__(self, name: str) -> Any:
         if name == "agent":
             return self._agent_ref() if self._agent_ref else None
         raise AttributeError(name)
 
+    def release_agent(self) -> None:
+        """Release the strong reference to the agent while keeping a weak reference."""
+        if "agent" not in self.__dict__:
+            return
+        agent = self.__dict__["agent"]
+        self._agent_ref = weakref.ref(agent) if agent is not None else None
+        object.__delattr__(self, "agent")
+
     def to_input_item(self) -> TResponseInputItem:
         """Converts this item into an input item suitable for passing to the model."""
         if isinstance(self.raw_item, dict):
@@ -161,11 +168,9 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):
 
     def __post_init__(self) -> None:
         super().__post_init__()
-        # Handoff metadata should not hold strong references to the agents either.
+        # Maintain weak references so downstream code can release the strong references when safe.
         self._source_agent_ref = weakref.ref(self.source_agent)
         self._target_agent_ref = weakref.ref(self.target_agent)
-        object.__delattr__(self, "source_agent")
-        object.__delattr__(self, "target_agent")
 
     def __getattr__(self, name: str) -> Any:
         if name == "source_agent":
@@ -174,6 +179,17 @@ def __getattr__(self, name: str) -> Any:
             return self._target_agent_ref() if self._target_agent_ref else None
         return super().__getattr__(name)
 
+    def release_agent(self) -> None:
+        super().release_agent()
+        if "source_agent" in self.__dict__:
+            source_agent = self.__dict__["source_agent"]
+            self._source_agent_ref = weakref.ref(source_agent) if source_agent is not None else None
+            object.__delattr__(self, "source_agent")
+        if "target_agent" in self.__dict__:
+            target_agent = self.__dict__["target_agent"]
+            self._target_agent_ref = weakref.ref(target_agent) if target_agent is not None else None
+            object.__delattr__(self, "target_agent")
+
 
 ToolCallItemTypes: TypeAlias = Union[
     ResponseFunctionToolCall,
@@ -184,12 +200,13 @@ def __getattr__(self, name: str) -> Any:
     ResponseCodeInterpreterToolCall,
     ImageGenerationCall,
     LocalShellCall,
+    dict[str, Any],
 ]
 """A type that represents a tool call item."""
 
 
 @dataclass
-class ToolCallItem(RunItemBase[ToolCallItemTypes]):
+class ToolCallItem(RunItemBase[Any]):
     """Represents a tool call e.g. a function call or computer action call."""
 
     raw_item: ToolCallItemTypes
@@ -198,13 +215,19 @@ class ToolCallItem(RunItemBase[ToolCallItemTypes]):
     type: Literal["tool_call_item"] = "tool_call_item"
 
 
+ToolCallOutputTypes: TypeAlias = Union[
+    FunctionCallOutput,
+    ComputerCallOutput,
+    LocalShellCallOutput,
+    dict[str, Any],
+]
+
+
 @dataclass
-class ToolCallOutputItem(
-    RunItemBase[Union[FunctionCallOutput, ComputerCallOutput, LocalShellCallOutput]]
-):
+class ToolCallOutputItem(RunItemBase[Any]):
     """Represents the output of a tool call."""
 
-    raw_item: FunctionCallOutput | ComputerCallOutput | LocalShellCallOutput
+    raw_item: ToolCallOutputTypes
     """The raw item from the model."""
 
     output: Any
@@ -214,6 +237,25 @@ class ToolCallOutputItem(
 
     type: Literal["tool_call_output_item"] = "tool_call_output_item"
 
+    def to_input_item(self) -> TResponseInputItem:
+        """Converts the tool output into an input item for the next model turn.
+
+        Hosted tool outputs (e.g. shell/apply_patch) carry a `status` field for the SDK's
+        book-keeping, but the Responses API does not yet accept that parameter. Strip it from the
+        payload we send back to the model while keeping the original raw item intact.
+        """
+
+        if isinstance(self.raw_item, dict):
+            payload = dict(self.raw_item)
+            payload_type = payload.get("type")
+            if payload_type == "shell_call_output":
+                payload.pop("status", None)
+                payload.pop("shell_output", None)
+                payload.pop("provider_data", None)
+            return cast(TResponseInputItem, payload)
+
+        return super().to_input_item()
+
 
 @dataclass
 class ReasoningItem(RunItemBase[ResponseReasoningItem]):
diff --git a/src/agents/result.py b/src/agents/result.py
index 3fe20cfa5..181fffcf1 100644
--- a/src/agents/result.py
+++ b/src/agents/result.py
@@ -2,6 +2,7 @@
 
 import abc
 import asyncio
+import weakref
 from collections.abc import AsyncIterator
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -74,6 +75,32 @@ class RunResultBase(abc.ABC):
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
 
+    def release_agents(self) -> None:
+        """
+        Release strong references to agents held by this result. After calling this method,
+        accessing `item.agent` or `last_agent` may return `None` if the agent has been garbage
+        collected. Callers can use this when they are done inspecting the result and want to
+        eagerly drop any associated agent graph.
+        """
+        for item in self.new_items:
+            release = getattr(item, "release_agent", None)
+            if callable(release):
+                release()
+        self._release_last_agent_reference()
+
+    def __del__(self) -> None:
+        try:
+            # Fall back to releasing agents automatically in case the caller never invoked
+            # `release_agents()` explicitly. This keeps the no-leak guarantee confirmed by tests.
+            self.release_agents()
+        except Exception:
+            # Avoid raising from __del__.
+            pass
+
+    @abc.abstractmethod
+    def _release_last_agent_reference(self) -> None:
+        """Release stored agent reference specific to the concrete result type."""
+
     def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) -> T:
         """A convenience method to cast the final output to a specific type. By default, the cast
         is only for the typechecker. If you set `raise_if_incorrect_type` to True, we'll raise a
@@ -111,11 +138,33 @@ def last_response_id(self) -> str | None:
 @dataclass
 class RunResult(RunResultBase):
     _last_agent: Agent[Any]
+    _last_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
+    def __post_init__(self) -> None:
+        self._last_agent_ref = weakref.ref(self._last_agent)
 
     @property
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
-        return self._last_agent
+        agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
+        if agent is not None:
+            return agent
+        if self._last_agent_ref:
+            agent = self._last_agent_ref()
+            if agent is not None:
+                return agent
+        raise AgentsException("Last agent reference is no longer available.")
+
+    def _release_last_agent_reference(self) -> None:
+        agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
+        if agent is None:
+            return
+        self._last_agent_ref = weakref.ref(agent)
+        object.__delattr__(self, "_last_agent")
 
     def __str__(self) -> str:
         return pretty_print_result(self)
@@ -150,6 +199,12 @@ class RunResultStreaming(RunResultBase):
     is_complete: bool = False
     """Whether the agent has finished running."""
 
+    _current_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
+        init=False,
+        repr=False,
+        default=None,
+    )
+
     # Queues that the background run_loop writes to
     _event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] = field(
         default_factory=asyncio.Queue, repr=False
@@ -167,12 +222,29 @@ class RunResultStreaming(RunResultBase):
     # Soft cancel state
     _cancel_mode: Literal["none", "immediate", "after_turn"] = field(default="none", repr=False)
 
+    def __post_init__(self) -> None:
+        self._current_agent_ref = weakref.ref(self.current_agent)
+
     @property
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run. Updates as the agent run progresses, so the true last agent
         is only available after the agent run is complete.
         """
-        return self.current_agent
+        agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
+        if agent is not None:
+            return agent
+        if self._current_agent_ref:
+            agent = self._current_agent_ref()
+            if agent is not None:
+                return agent
+        raise AgentsException("Last agent reference is no longer available.")
+
+    def _release_last_agent_reference(self) -> None:
+        agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
+        if agent is None:
+            return
+        self._current_agent_ref = weakref.ref(agent)
+        object.__delattr__(self, "current_agent")
 
     def cancel(self, mode: Literal["immediate", "after_turn"] = "immediate") -> None:
         """Cancel the streaming run.
diff --git a/tests/test_agent_memory_leak.py b/tests/test_agent_memory_leak.py
index 6690d2189..424aa399d 100644
--- a/tests/test_agent_memory_leak.py
+++ b/tests/test_agent_memory_leak.py
@@ -23,9 +23,10 @@ def _make_message(text: str) -> ResponseOutputMessage:
 @pytest.mark.asyncio
 async def test_agent_is_released_after_run() -> None:
     fake_model = FakeModel(initial_output=[_make_message("Paris")])
-    agent = Agent(name="leaker", instructions="Answer questions.", model=fake_model)
+    agent = Agent(name="leak-test-agent", instructions="Answer questions.", model=fake_model)
     agent_ref = weakref.ref(agent)
 
+    # Running the agent should not leave behind strong references once the result goes out of scope.
     await Runner.run(agent, "What is the capital of France?")
 
     del agent
diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py
index a94d74547..408ba1319 100644
--- a/tests/test_items_helpers.py
+++ b/tests/test_items_helpers.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import gc
 import json
 
 from openai.types.responses.response_computer_tool_call import (
@@ -57,8 +58,8 @@ def make_message(
 
 def test_extract_last_content_of_text_message() -> None:
     # Build a message containing two text segments.
-    content1 = ResponseOutputText(annotations=[], text="Hello ", type="output_text")
-    content2 = ResponseOutputText(annotations=[], text="world!", type="output_text")
+    content1 = ResponseOutputText(annotations=[], text="Hello ", type="output_text", logprobs=[])
+    content2 = ResponseOutputText(annotations=[], text="world!", type="output_text", logprobs=[])
     message = make_message([content1, content2])
     # Helpers should yield the last segment's text.
     assert ItemHelpers.extract_last_content(message) == "world!"
@@ -66,7 +67,9 @@ def test_extract_last_content_of_text_message() -> None:
 
 def test_extract_last_content_of_refusal_message() -> None:
     # Build a message whose last content entry is a refusal.
-    content1 = ResponseOutputText(annotations=[], text="Before refusal", type="output_text")
+    content1 = ResponseOutputText(
+        annotations=[], text="Before refusal", type="output_text", logprobs=[]
+    )
     refusal = ResponseOutputRefusal(refusal="I cannot do that", type="refusal")
     message = make_message([content1, refusal])
     # Helpers should extract the refusal string when last content is a refusal.
@@ -87,8 +90,8 @@ def test_extract_last_content_non_message_returns_empty() -> None:
 
 def test_extract_last_text_returns_text_only() -> None:
     # A message whose last segment is text yields the text.
-    first_text = ResponseOutputText(annotations=[], text="part1", type="output_text")
-    second_text = ResponseOutputText(annotations=[], text="part2", type="output_text")
+    first_text = ResponseOutputText(annotations=[], text="part1", type="output_text", logprobs=[])
+    second_text = ResponseOutputText(annotations=[], text="part2", type="output_text", logprobs=[])
     message = make_message([first_text, second_text])
     assert ItemHelpers.extract_last_text(message) == "part2"
     # Whereas when last content is a refusal, extract_last_text returns None.
@@ -116,9 +119,9 @@ def test_input_to_new_input_list_deep_copies_lists() -> None:
 def test_text_message_output_concatenates_text_segments() -> None:
     # Build a message with both text and refusal segments, only text segments are concatenated.
     pieces: list[ResponseOutputText | ResponseOutputRefusal] = []
-    pieces.append(ResponseOutputText(annotations=[], text="a", type="output_text"))
+    pieces.append(ResponseOutputText(annotations=[], text="a", type="output_text", logprobs=[]))
     pieces.append(ResponseOutputRefusal(refusal="denied", type="refusal"))
-    pieces.append(ResponseOutputText(annotations=[], text="b", type="output_text"))
+    pieces.append(ResponseOutputText(annotations=[], text="b", type="output_text", logprobs=[]))
     message = make_message(pieces)
     # Wrap into MessageOutputItem to feed into text_message_output.
     item = MessageOutputItem(agent=Agent(name="test"), raw_item=message)
@@ -131,8 +134,12 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
     that only MessageOutputItem instances contribute any text. The non-message
     (ReasoningItem) should be ignored by Helpers.text_message_outputs.
     """
-    message1 = make_message([ResponseOutputText(annotations=[], text="foo", type="output_text")])
-    message2 = make_message([ResponseOutputText(annotations=[], text="bar", type="output_text")])
+    message1 = make_message(
+        [ResponseOutputText(annotations=[], text="foo", type="output_text", logprobs=[])]
+    )
+    message2 = make_message(
+        [ResponseOutputText(annotations=[], text="bar", type="output_text", logprobs=[])]
+    )
     item1: RunItem = MessageOutputItem(agent=Agent(name="test"), raw_item=message1)
     item2: RunItem = MessageOutputItem(agent=Agent(name="test"), raw_item=message2)
     # Create a non-message run item of a different type, e.g., a reasoning trace.
@@ -142,6 +149,19 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
     assert ItemHelpers.text_message_outputs([item1, non_message_item, item2]) == "foobar"
 
 
+def test_message_output_item_retains_agent_until_release() -> None:
+    # Construct the run item with an inline agent to ensure the run item keeps a strong reference.
+    message = make_message([ResponseOutputText(annotations=[], text="hello", type="output_text")])
+    item = MessageOutputItem(agent=Agent(name="inline"), raw_item=message)
+    assert item.agent is not None
+    assert item.agent.name == "inline"
+
+    # After explicitly releasing, the weak reference should drop once GC runs.
+    item.release_agent()
+    gc.collect()
+    assert item.agent is None
+
+
 def test_tool_call_output_item_constructs_function_call_output_dict():
     # Build a simple ResponseFunctionToolCall.
     call = ResponseFunctionToolCall(
@@ -171,7 +191,9 @@ def test_tool_call_output_item_constructs_function_call_output_dict():
 
 def test_to_input_items_for_message() -> None:
     """An output message should convert into an input dict matching the message's own structure."""
-    content = ResponseOutputText(annotations=[], text="hello world", type="output_text")
+    content = ResponseOutputText(
+        annotations=[], text="hello world", type="output_text", logprobs=[]
+    )
     message = ResponseOutputMessage(
         id="m1", content=[content], role="assistant", status="completed", type="message"
     )
@@ -184,6 +206,7 @@ def test_to_input_items_for_message() -> None:
         "content": [
             {
                 "annotations": [],
+                "logprobs": [],
                 "text": "hello world",
                 "type": "output_text",
             }
@@ -305,6 +328,7 @@ def test_input_to_new_input_list_copies_the_ones_produced_by_pydantic() -> None:
                 type="output_text",
                 text="Hey, what's up?",
                 annotations=[],
+                logprobs=[],
             )
         ],
         role="assistant",
diff --git a/tests/test_result_cast.py b/tests/test_result_cast.py
index 4ef1a293d..87f4fc2d0 100644
--- a/tests/test_result_cast.py
+++ b/tests/test_result_cast.py
@@ -1,9 +1,13 @@
+import gc
+import weakref
 from typing import Any
 
 import pytest
+from openai.types.responses import ResponseOutputMessage, ResponseOutputText
 from pydantic import BaseModel
 
-from agents import Agent, RunContextWrapper, RunResult
+from agents import Agent, MessageOutputItem, RunContextWrapper, RunResult
+from agents.exceptions import AgentsException
 
 
 def create_run_result(final_output: Any) -> RunResult:
@@ -59,3 +63,39 @@ def test_bad_cast_with_param_raises():
     result = create_run_result(Foo(bar=1))
     with pytest.raises(TypeError):
         result.final_output_as(int, raise_if_incorrect_type=True)
+
+
+def test_run_result_release_agents_breaks_strong_refs() -> None:
+    message = ResponseOutputMessage(
+        id="msg",
+        content=[ResponseOutputText(annotations=[], text="hello", type="output_text")],
+        role="assistant",
+        status="completed",
+        type="message",
+    )
+    agent = Agent(name="leak-test-agent")
+    item = MessageOutputItem(agent=agent, raw_item=message)
+    result = RunResult(
+        input="test",
+        new_items=[item],
+        raw_responses=[],
+        final_output=None,
+        input_guardrail_results=[],
+        output_guardrail_results=[],
+        tool_input_guardrail_results=[],
+        tool_output_guardrail_results=[],
+        _last_agent=agent,
+        context_wrapper=RunContextWrapper(context=None),
+    )
+    assert item.agent is not None
+    assert item.agent.name == "leak-test-agent"
+
+    agent_ref = weakref.ref(agent)
+    result.release_agents()
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
+    assert item.agent is None
+    with pytest.raises(AgentsException):
+        _ = result.last_agent

From bf3895c840316ca5ba3f28a63b3941ab246f2cf9 Mon Sep 17 00:00:00 2001
From: DanieleMorotti <daniele.morotti.99@gmail.com>
Date: Sat, 15 Nov 2025 14:30:07 +0100
Subject: [PATCH 3/3] Second round of corrections

---
 src/agents/items.py         |  54 +++++++---
 src/agents/result.py        |  23 +++--
 src/agents/run.py           |  93 ++++++++++--------
 tests/test_items_helpers.py |  53 +++++++++-
 tests/test_result_cast.py   | 191 ++++++++++++++++++++++++++++++++++--
 5 files changed, 340 insertions(+), 74 deletions(-)

diff --git a/src/agents/items.py b/src/agents/items.py
index ab6f1f790..991a7f877 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -73,6 +73,9 @@
 
 T = TypeVar("T", bound=Union[TResponseOutputItem, TResponseInputItem])
 
+# Distinguish a missing dict entry from an explicit None value.
+_MISSING_ATTR_SENTINEL = object()
+
 
 @dataclass
 class RunItemBase(Generic[T], abc.ABC):
@@ -95,18 +98,38 @@ def __post_init__(self) -> None:
         # Store a weak reference so we can release the strong reference later if desired.
         self._agent_ref = weakref.ref(self.agent)
 
-    def __getattr__(self, name: str) -> Any:
+    def __getattribute__(self, name: str) -> Any:
         if name == "agent":
-            return self._agent_ref() if self._agent_ref else None
-        raise AttributeError(name)
+            return self._get_agent_via_weakref("agent", "_agent_ref")
+        return super().__getattribute__(name)
 
     def release_agent(self) -> None:
         """Release the strong reference to the agent while keeping a weak reference."""
         if "agent" not in self.__dict__:
             return
         agent = self.__dict__["agent"]
+        if agent is None:
+            return
         self._agent_ref = weakref.ref(agent) if agent is not None else None
-        object.__delattr__(self, "agent")
+        # Set to None instead of deleting so dataclass repr/asdict keep working.
+        self.__dict__["agent"] = None
+
+    def _get_agent_via_weakref(self, attr_name: str, ref_name: str) -> Any:
+        # Preserve the dataclass field so repr/asdict still read it, but lazily resolve the weakref
+        # when the stored value is None (meaning release_agent already dropped the strong ref).
+        # If the attribute was never overridden we fall back to the default descriptor chain.
+        data = object.__getattribute__(self, "__dict__")
+        value = data.get(attr_name, _MISSING_ATTR_SENTINEL)
+        if value is _MISSING_ATTR_SENTINEL:
+            return object.__getattribute__(self, attr_name)
+        if value is not None:
+            return value
+        ref = object.__getattribute__(self, ref_name)
+        if ref is not None:
+            agent = ref()
+            if agent is not None:
+                return agent
+        return None
 
     def to_input_item(self) -> TResponseInputItem:
         """Converts this item into an input item suitable for passing to the model."""
@@ -172,23 +195,30 @@ def __post_init__(self) -> None:
         self._source_agent_ref = weakref.ref(self.source_agent)
         self._target_agent_ref = weakref.ref(self.target_agent)
 
-    def __getattr__(self, name: str) -> Any:
+    def __getattribute__(self, name: str) -> Any:
         if name == "source_agent":
-            return self._source_agent_ref() if self._source_agent_ref else None
+            # Provide lazy weakref access like the base `agent` field so HandoffOutputItem
+            # callers keep seeing the original agent until GC occurs.
+            return self._get_agent_via_weakref("source_agent", "_source_agent_ref")
         if name == "target_agent":
-            return self._target_agent_ref() if self._target_agent_ref else None
-        return super().__getattr__(name)
+            # Same as above but for the target of the handoff.
+            return self._get_agent_via_weakref("target_agent", "_target_agent_ref")
+        return super().__getattribute__(name)
 
     def release_agent(self) -> None:
         super().release_agent()
         if "source_agent" in self.__dict__:
             source_agent = self.__dict__["source_agent"]
-            self._source_agent_ref = weakref.ref(source_agent) if source_agent is not None else None
-            object.__delattr__(self, "source_agent")
+            if source_agent is not None:
+                self._source_agent_ref = weakref.ref(source_agent)
+            # Preserve dataclass fields for repr/asdict while dropping strong refs.
+            self.__dict__["source_agent"] = None
         if "target_agent" in self.__dict__:
             target_agent = self.__dict__["target_agent"]
-            self._target_agent_ref = weakref.ref(target_agent) if target_agent is not None else None
-            object.__delattr__(self, "target_agent")
+            if target_agent is not None:
+                self._target_agent_ref = weakref.ref(target_agent)
+            # Preserve dataclass fields for repr/asdict while dropping strong refs.
+            self.__dict__["target_agent"] = None
 
 
 ToolCallItemTypes: TypeAlias = Union[
diff --git a/src/agents/result.py b/src/agents/result.py
index 181fffcf1..438d53af2 100644
--- a/src/agents/result.py
+++ b/src/agents/result.py
@@ -75,24 +75,27 @@ class RunResultBase(abc.ABC):
     def last_agent(self) -> Agent[Any]:
         """The last agent that was run."""
 
-    def release_agents(self) -> None:
+    def release_agents(self, *, release_new_items: bool = True) -> None:
         """
         Release strong references to agents held by this result. After calling this method,
         accessing `item.agent` or `last_agent` may return `None` if the agent has been garbage
         collected. Callers can use this when they are done inspecting the result and want to
         eagerly drop any associated agent graph.
         """
-        for item in self.new_items:
-            release = getattr(item, "release_agent", None)
-            if callable(release):
-                release()
+        if release_new_items:
+            for item in self.new_items:
+                release = getattr(item, "release_agent", None)
+                if callable(release):
+                    release()
         self._release_last_agent_reference()
 
     def __del__(self) -> None:
         try:
             # Fall back to releasing agents automatically in case the caller never invoked
-            # `release_agents()` explicitly. This keeps the no-leak guarantee confirmed by tests.
-            self.release_agents()
+            # `release_agents()` explicitly so GC of the RunResult drops the last strong reference.
+            # We pass `release_new_items=False` so RunItems that the user intentionally keeps
+            # continue exposing their originating agent until that agent itself is collected.
+            self.release_agents(release_new_items=False)
         except Exception:
             # Avoid raising from __del__.
             pass
@@ -164,7 +167,8 @@ def _release_last_agent_reference(self) -> None:
         if agent is None:
             return
         self._last_agent_ref = weakref.ref(agent)
-        object.__delattr__(self, "_last_agent")
+        # Preserve dataclass field so repr/asdict continue to succeed.
+        self.__dict__["_last_agent"] = None
 
     def __str__(self) -> str:
         return pretty_print_result(self)
@@ -244,7 +248,8 @@ def _release_last_agent_reference(self) -> None:
         if agent is None:
             return
         self._current_agent_ref = weakref.ref(agent)
-        object.__delattr__(self, "current_agent")
+        # Preserve dataclass field so repr/asdict continue to succeed.
+        self.__dict__["current_agent"] = None
 
     def cancel(self, mode: Literal["immediate", "after_turn"] = "immediate") -> None:
         """Cancel the streaming run.
diff --git a/src/agents/run.py b/src/agents/run.py
index c14f13e3f..5ea1dbab1 100644
--- a/src/agents/run.py
+++ b/src/agents/run.py
@@ -648,51 +648,60 @@ async def run(
                     tool_input_guardrail_results.extend(turn_result.tool_input_guardrail_results)
                     tool_output_guardrail_results.extend(turn_result.tool_output_guardrail_results)
 
-                    if isinstance(turn_result.next_step, NextStepFinalOutput):
-                        output_guardrail_results = await self._run_output_guardrails(
-                            current_agent.output_guardrails + (run_config.output_guardrails or []),
-                            current_agent,
-                            turn_result.next_step.output,
-                            context_wrapper,
-                        )
-                        result = RunResult(
-                            input=original_input,
-                            new_items=generated_items,
-                            raw_responses=model_responses,
-                            final_output=turn_result.next_step.output,
-                            _last_agent=current_agent,
-                            input_guardrail_results=input_guardrail_results,
-                            output_guardrail_results=output_guardrail_results,
-                            tool_input_guardrail_results=tool_input_guardrail_results,
-                            tool_output_guardrail_results=tool_output_guardrail_results,
-                            context_wrapper=context_wrapper,
-                        )
-                        if not any(
-                            guardrail_result.output.tripwire_triggered
-                            for guardrail_result in input_guardrail_results
-                        ):
-                            await self._save_result_to_session(
-                                session, [], turn_result.new_step_items
+                    try:
+                        if isinstance(turn_result.next_step, NextStepFinalOutput):
+                            output_guardrail_results = await self._run_output_guardrails(
+                                current_agent.output_guardrails
+                                + (run_config.output_guardrails or []),
+                                current_agent,
+                                turn_result.next_step.output,
+                                context_wrapper,
+                            )
+                            result = RunResult(
+                                input=original_input,
+                                new_items=generated_items,
+                                raw_responses=model_responses,
+                                final_output=turn_result.next_step.output,
+                                _last_agent=current_agent,
+                                input_guardrail_results=input_guardrail_results,
+                                output_guardrail_results=output_guardrail_results,
+                                tool_input_guardrail_results=tool_input_guardrail_results,
+                                tool_output_guardrail_results=tool_output_guardrail_results,
+                                context_wrapper=context_wrapper,
                             )
+                            if not any(
+                                guardrail_result.output.tripwire_triggered
+                                for guardrail_result in input_guardrail_results
+                            ):
+                                await self._save_result_to_session(
+                                    session, [], turn_result.new_step_items
+                                )
 
-                        return result
-                    elif isinstance(turn_result.next_step, NextStepHandoff):
-                        current_agent = cast(Agent[TContext], turn_result.next_step.new_agent)
-                        current_span.finish(reset_current=True)
-                        current_span = None
-                        should_run_agent_start_hooks = True
-                    elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        if not any(
-                            guardrail_result.output.tripwire_triggered
-                            for guardrail_result in input_guardrail_results
-                        ):
-                            await self._save_result_to_session(
-                                session, [], turn_result.new_step_items
+                            return result
+                        elif isinstance(turn_result.next_step, NextStepHandoff):
+                            current_agent = cast(Agent[TContext], turn_result.next_step.new_agent)
+                            current_span.finish(reset_current=True)
+                            current_span = None
+                            should_run_agent_start_hooks = True
+                        elif isinstance(turn_result.next_step, NextStepRunAgain):
+                            if not any(
+                                guardrail_result.output.tripwire_triggered
+                                for guardrail_result in input_guardrail_results
+                            ):
+                                await self._save_result_to_session(
+                                    session, [], turn_result.new_step_items
+                                )
+                        else:
+                            raise AgentsException(
+                                f"Unknown next step type: {type(turn_result.next_step)}"
                             )
-                    else:
-                        raise AgentsException(
-                            f"Unknown next step type: {type(turn_result.next_step)}"
-                        )
+                    finally:
+                        # RunImpl.execute_tools_and_side_effects returns a SingleStepResult that
+                        # stores direct references to the `pre_step_items` and `new_step_items`
+                        # lists it manages internally. Clear them here so the next turn does not
+                        # hold on to items from previous turns and to avoid leaking agent refs.
+                        turn_result.pre_step_items.clear()
+                        turn_result.new_step_items.clear()
             except AgentsException as exc:
                 exc.run_data = RunErrorDetails(
                     input=original_input,
diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py
index 408ba1319..ad8da2266 100644
--- a/tests/test_items_helpers.py
+++ b/tests/test_items_helpers.py
@@ -2,6 +2,7 @@
 
 import gc
 import json
+import weakref
 
 from openai.types.responses.response_computer_tool_call import (
     ActionScreenshot,
@@ -30,6 +31,7 @@
 
 from agents import (
     Agent,
+    HandoffOutputItem,
     ItemHelpers,
     MessageOutputItem,
     ModelResponse,
@@ -152,14 +154,59 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
 def test_message_output_item_retains_agent_until_release() -> None:
     # Construct the run item with an inline agent to ensure the run item keeps a strong reference.
     message = make_message([ResponseOutputText(annotations=[], text="hello", type="output_text")])
-    item = MessageOutputItem(agent=Agent(name="inline"), raw_item=message)
-    assert item.agent is not None
+    agent = Agent(name="inline")
+    item = MessageOutputItem(agent=agent, raw_item=message)
+    assert item.agent is agent
     assert item.agent.name == "inline"
 
-    # After explicitly releasing, the weak reference should drop once GC runs.
+    # Releasing the agent should keep the weak reference alive while strong refs remain.
     item.release_agent()
+    assert item.agent is agent
+
+    agent_ref = weakref.ref(agent)
+    del agent
     gc.collect()
+
+    # Once the original agent is collected, the weak reference should drop.
+    assert agent_ref() is None
+    assert item.agent is None
+
+
+def test_handoff_output_item_retains_agents_until_gc() -> None:
+    raw_item: TResponseInputItem = {
+        "call_id": "call1",
+        "output": "handoff",
+        "type": "function_call_output",
+    }
+    owner_agent = Agent(name="owner")
+    source_agent = Agent(name="source")
+    target_agent = Agent(name="target")
+    item = HandoffOutputItem(
+        agent=owner_agent,
+        raw_item=raw_item,
+        source_agent=source_agent,
+        target_agent=target_agent,
+    )
+
+    item.release_agent()
+    assert item.agent is owner_agent
+    assert item.source_agent is source_agent
+    assert item.target_agent is target_agent
+
+    owner_ref = weakref.ref(owner_agent)
+    source_ref = weakref.ref(source_agent)
+    target_ref = weakref.ref(target_agent)
+    del owner_agent
+    del source_agent
+    del target_agent
+    gc.collect()
+
+    assert owner_ref() is None
+    assert source_ref() is None
+    assert target_ref() is None
     assert item.agent is None
+    assert item.source_agent is None
+    assert item.target_agent is None
 
 
 def test_tool_call_output_item_constructs_function_call_output_dict():
diff --git a/tests/test_result_cast.py b/tests/test_result_cast.py
index 87f4fc2d0..e919171ae 100644
--- a/tests/test_result_cast.py
+++ b/tests/test_result_cast.py
@@ -1,3 +1,4 @@
+import dataclasses
 import gc
 import weakref
 from typing import Any
@@ -6,7 +7,7 @@
 from openai.types.responses import ResponseOutputMessage, ResponseOutputText
 from pydantic import BaseModel
 
-from agents import Agent, MessageOutputItem, RunContextWrapper, RunResult
+from agents import Agent, MessageOutputItem, RunContextWrapper, RunResult, RunResultStreaming
 from agents.exceptions import AgentsException
 
 
@@ -29,6 +30,16 @@ class Foo(BaseModel):
     bar: int
 
 
+def _create_message(text: str) -> ResponseOutputMessage:
+    return ResponseOutputMessage(
+        id="msg",
+        content=[ResponseOutputText(annotations=[], text=text, type="output_text")],
+        role="assistant",
+        status="completed",
+        type="message",
+    )
+
+
 def test_result_cast_typechecks():
     """Correct casts should work fine."""
     result = create_run_result(1)
@@ -66,13 +77,7 @@ def test_bad_cast_with_param_raises():
 
 
 def test_run_result_release_agents_breaks_strong_refs() -> None:
-    message = ResponseOutputMessage(
-        id="msg",
-        content=[ResponseOutputText(annotations=[], text="hello", type="output_text")],
-        role="assistant",
-        status="completed",
-        type="message",
-    )
+    message = _create_message("hello")
     agent = Agent(name="leak-test-agent")
     item = MessageOutputItem(agent=agent, raw_item=message)
     result = RunResult(
@@ -99,3 +104,173 @@ def test_run_result_release_agents_breaks_strong_refs() -> None:
     assert item.agent is None
     with pytest.raises(AgentsException):
         _ = result.last_agent
+
+
+def test_run_item_retains_agent_when_result_is_garbage_collected() -> None:
+    def build_item() -> tuple[MessageOutputItem, weakref.ReferenceType[RunResult]]:
+        message = _create_message("persist")
+        agent = Agent(name="persisted-agent")
+        item = MessageOutputItem(agent=agent, raw_item=message)
+        result = RunResult(
+            input="test",
+            new_items=[item],
+            raw_responses=[],
+            final_output=None,
+            input_guardrail_results=[],
+            output_guardrail_results=[],
+            tool_input_guardrail_results=[],
+            tool_output_guardrail_results=[],
+            _last_agent=agent,
+            context_wrapper=RunContextWrapper(context=None),
+        )
+        return item, weakref.ref(result)
+
+    item, result_ref = build_item()
+    gc.collect()
+
+    assert result_ref() is None
+    assert item.agent is not None
+    assert item.agent.name == "persisted-agent"
+
+
+def test_run_item_repr_and_asdict_after_release() -> None:
+    message = _create_message("repr")
+    agent = Agent(name="repr-agent")
+    item = MessageOutputItem(agent=agent, raw_item=message)
+
+    item.release_agent()
+    assert item.agent is agent
+
+    text = repr(item)
+    assert "MessageOutputItem" in text
+
+    serialized = dataclasses.asdict(item)
+    assert isinstance(serialized["agent"], dict)
+    assert serialized["agent"]["name"] == "repr-agent"
+
+    agent_ref = weakref.ref(agent)
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
+    assert item.agent is None
+
+    serialized_after_gc = dataclasses.asdict(item)
+    assert serialized_after_gc["agent"] is None
+
+
+def test_run_result_repr_and_asdict_after_release_agents() -> None:
+    agent = Agent(name="repr-result-agent")
+    result = RunResult(
+        input="test",
+        new_items=[],
+        raw_responses=[],
+        final_output=None,
+        input_guardrail_results=[],
+        output_guardrail_results=[],
+        tool_input_guardrail_results=[],
+        tool_output_guardrail_results=[],
+        _last_agent=agent,
+        context_wrapper=RunContextWrapper(context=None),
+    )
+
+    result.release_agents()
+
+    text = repr(result)
+    assert "RunResult" in text
+
+    serialized = dataclasses.asdict(result)
+    assert serialized["_last_agent"] is None
+
+
+def test_run_result_release_agents_without_releasing_new_items() -> None:
+    message = _create_message("keep")
+    item_agent = Agent(name="item-agent")
+    last_agent = Agent(name="last-agent")
+    item = MessageOutputItem(agent=item_agent, raw_item=message)
+    result = RunResult(
+        input="test",
+        new_items=[item],
+        raw_responses=[],
+        final_output=None,
+        input_guardrail_results=[],
+        output_guardrail_results=[],
+        tool_input_guardrail_results=[],
+        tool_output_guardrail_results=[],
+        _last_agent=last_agent,
+        context_wrapper=RunContextWrapper(context=None),
+    )
+
+    result.release_agents(release_new_items=False)
+
+    assert item.agent is item_agent
+
+    last_agent_ref = weakref.ref(last_agent)
+    del last_agent
+    gc.collect()
+
+    assert last_agent_ref() is None
+    with pytest.raises(AgentsException):
+        _ = result.last_agent
+
+
+def test_run_result_release_agents_is_idempotent() -> None:
+    message = _create_message("idempotent")
+    agent = Agent(name="idempotent-agent")
+    item = MessageOutputItem(agent=agent, raw_item=message)
+    result = RunResult(
+        input="test",
+        new_items=[item],
+        raw_responses=[],
+        final_output=None,
+        input_guardrail_results=[],
+        output_guardrail_results=[],
+        tool_input_guardrail_results=[],
+        tool_output_guardrail_results=[],
+        _last_agent=agent,
+        context_wrapper=RunContextWrapper(context=None),
+    )
+
+    result.release_agents()
+    result.release_agents()
+
+    assert item.agent is agent
+
+    agent_ref = weakref.ref(agent)
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
+    assert item.agent is None
+    with pytest.raises(AgentsException):
+        _ = result.last_agent
+
+
+def test_run_result_streaming_release_agents_releases_current_agent() -> None:
+    agent = Agent(name="streaming-agent")
+    streaming_result = RunResultStreaming(
+        input="stream",
+        new_items=[],
+        raw_responses=[],
+        final_output=None,
+        input_guardrail_results=[],
+        output_guardrail_results=[],
+        tool_input_guardrail_results=[],
+        tool_output_guardrail_results=[],
+        context_wrapper=RunContextWrapper(context=None),
+        current_agent=agent,
+        current_turn=0,
+        max_turns=1,
+        _current_agent_output_schema=None,
+        trace=None,
+    )
+
+    streaming_result.release_agents(release_new_items=False)
+
+    agent_ref = weakref.ref(agent)
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
+    with pytest.raises(AgentsException):
+        _ = streaming_result.last_agent