Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion src/agents/items.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from __future__ import annotations

import abc
from dataclasses import dataclass
import weakref
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, Union, cast

import pydantic
Expand Down Expand Up @@ -84,6 +85,29 @@ class RunItemBase(Generic[T], abc.ABC):
(i.e. `openai.types.responses.ResponseInputItemParam`).
"""

_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
init=False,
repr=False,
default=None,
)

def __post_init__(self) -> None:
# Store a weak reference so we can release the strong reference later if desired.
self._agent_ref = weakref.ref(self.agent)

def __getattr__(self, name: str) -> Any:
if name == "agent":
return self._agent_ref() if self._agent_ref else None
raise AttributeError(name)

def release_agent(self) -> None:
"""Release the strong reference to the agent while keeping a weak reference."""
if "agent" not in self.__dict__:
return
agent = self.__dict__["agent"]
self._agent_ref = weakref.ref(agent) if agent is not None else None
object.__delattr__(self, "agent")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge release_agent deletes dataclass fields causing repr/asdict crashes

In RunItemBase.release_agent() (lines 103‑109) the code removes the dataclass field entirely via object.__delattr__(self, "agent") after storing the weakref. Deleting a dataclass field violates the assumptions of the generated __repr__, __eq__, dataclasses.asdict, etc., which immediately raise AttributeError once the field disappears. Because RunResult.release_agents() now calls this helper automatically (and tests encourage users to call it manually), any instrumentation that logs or serializes a RunItem after releasing agents will start crashing—e.g. repr(item) or dataclasses.asdict(item) now fail even though release was supposed to be a benign cleanup step. Setting the field to None (or keeping a lightweight struct with the metadata) would drop the strong reference without breaking dataclass behavior; please add coverage for repr/asdict after release so this regression is caught.

Useful? React with 👍 / 👎.


def to_input_item(self) -> TResponseInputItem:
"""Converts this item into an input item suitable for passing to the model."""
if isinstance(self.raw_item, dict):
Expand Down Expand Up @@ -131,6 +155,41 @@ class HandoffOutputItem(RunItemBase[TResponseInputItem]):

type: Literal["handoff_output_item"] = "handoff_output_item"

_source_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
init=False,
repr=False,
default=None,
)
_target_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
init=False,
repr=False,
default=None,
)

def __post_init__(self) -> None:
super().__post_init__()
# Maintain weak references so downstream code can release the strong references when safe.
self._source_agent_ref = weakref.ref(self.source_agent)
self._target_agent_ref = weakref.ref(self.target_agent)

def __getattr__(self, name: str) -> Any:
if name == "source_agent":
return self._source_agent_ref() if self._source_agent_ref else None
if name == "target_agent":
return self._target_agent_ref() if self._target_agent_ref else None
return super().__getattr__(name)

def release_agent(self) -> None:
super().release_agent()
if "source_agent" in self.__dict__:
source_agent = self.__dict__["source_agent"]
self._source_agent_ref = weakref.ref(source_agent) if source_agent is not None else None
object.__delattr__(self, "source_agent")
if "target_agent" in self.__dict__:
target_agent = self.__dict__["target_agent"]
self._target_agent_ref = weakref.ref(target_agent) if target_agent is not None else None
object.__delattr__(self, "target_agent")


ToolCallItemTypes: TypeAlias = Union[
ResponseFunctionToolCall,
Expand Down
76 changes: 74 additions & 2 deletions src/agents/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import abc
import asyncio
import weakref
from collections.abc import AsyncIterator
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal, cast
Expand Down Expand Up @@ -74,6 +75,32 @@ class RunResultBase(abc.ABC):
def last_agent(self) -> Agent[Any]:
"""The last agent that was run."""

def release_agents(self) -> None:
"""
Release strong references to agents held by this result. After calling this method,
accessing `item.agent` or `last_agent` may return `None` if the agent has been garbage
collected. Callers can use this when they are done inspecting the result and want to
eagerly drop any associated agent graph.
"""
for item in self.new_items:
release = getattr(item, "release_agent", None)
if callable(release):
release()
self._release_last_agent_reference()

def __del__(self) -> None:
try:
# Fall back to releasing agents automatically in case the caller never invoked
# `release_agents()` explicitly. This keeps the no-leak guarantee confirmed by tests.
self.release_agents()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Auto-release discards agent info for retained RunItems

Lines 78‑95 now invoke release_agents() from RunResultBase.__del__, which calls every RunItem’s release_agent() as soon as the RunResult instance is garbage-collected. release_agent() deletes the strong agent attribute and keeps only a weakref, so if the caller created the agent inline (the common Runner.run(Agent(...), input) case) and only keeps references to the MessageOutputItems for later auditing, those items lose their agent metadata the moment the enclosing result drops out of scope—the weakref is the last reference and is collected immediately. Previously the RunItem itself owned a strong reference, so item.agent.name stayed available even when the RunResult object was discarded. This regression silently breaks any code that stores RunItems but frees the RunResult to reduce memory, because their persisted items can no longer tell which agent produced them. Consider making the auto-release opt-in, or at least documenting/providing a way to keep RunItems’ agent data alive when needed (and adding a regression test for retaining items beyond the result lifecycle).

Useful? React with 👍 / 👎.

except Exception:
# Avoid raising from __del__.
pass

@abc.abstractmethod
def _release_last_agent_reference(self) -> None:
"""Release stored agent reference specific to the concrete result type."""

def final_output_as(self, cls: type[T], raise_if_incorrect_type: bool = False) -> T:
"""A convenience method to cast the final output to a specific type. By default, the cast
is only for the typechecker. If you set `raise_if_incorrect_type` to True, we'll raise a
Expand Down Expand Up @@ -111,11 +138,33 @@ def last_response_id(self) -> str | None:
@dataclass
class RunResult(RunResultBase):
_last_agent: Agent[Any]
_last_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
init=False,
repr=False,
default=None,
)

def __post_init__(self) -> None:
self._last_agent_ref = weakref.ref(self._last_agent)

@property
def last_agent(self) -> Agent[Any]:
"""The last agent that was run."""
return self._last_agent
agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
if agent is not None:
return agent
if self._last_agent_ref:
agent = self._last_agent_ref()
if agent is not None:
return agent
raise AgentsException("Last agent reference is no longer available.")

def _release_last_agent_reference(self) -> None:
agent = cast("Agent[Any] | None", self.__dict__.get("_last_agent"))
if agent is None:
return
self._last_agent_ref = weakref.ref(agent)
object.__delattr__(self, "_last_agent")

def __str__(self) -> str:
return pretty_print_result(self)
Expand Down Expand Up @@ -150,6 +199,12 @@ class RunResultStreaming(RunResultBase):
is_complete: bool = False
"""Whether the agent has finished running."""

_current_agent_ref: weakref.ReferenceType[Agent[Any]] | None = field(
init=False,
repr=False,
default=None,
)

# Queues that the background run_loop writes to
_event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] = field(
default_factory=asyncio.Queue, repr=False
Expand All @@ -167,12 +222,29 @@ class RunResultStreaming(RunResultBase):
# Soft cancel state
_cancel_mode: Literal["none", "immediate", "after_turn"] = field(default="none", repr=False)

def __post_init__(self) -> None:
self._current_agent_ref = weakref.ref(self.current_agent)

@property
def last_agent(self) -> Agent[Any]:
"""The last agent that was run. Updates as the agent run progresses, so the true last agent
is only available after the agent run is complete.
"""
return self.current_agent
agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
if agent is not None:
return agent
if self._current_agent_ref:
agent = self._current_agent_ref()
if agent is not None:
return agent
raise AgentsException("Last agent reference is no longer available.")

def _release_last_agent_reference(self) -> None:
agent = cast("Agent[Any] | None", self.__dict__.get("current_agent"))
if agent is None:
return
self._current_agent_ref = weakref.ref(agent)
object.__delattr__(self, "current_agent")

def cancel(self, mode: Literal["immediate", "after_turn"] = "immediate") -> None:
"""Cancel the streaming run.
Expand Down
35 changes: 35 additions & 0 deletions tests/test_agent_memory_leak.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from __future__ import annotations

import gc
import weakref

import pytest
from openai.types.responses import ResponseOutputMessage, ResponseOutputText

from agents import Agent, Runner
from tests.fake_model import FakeModel


def _make_message(text: str) -> ResponseOutputMessage:
return ResponseOutputMessage(
id="msg-1",
content=[ResponseOutputText(annotations=[], text=text, type="output_text")],
role="assistant",
status="completed",
type="message",
)


@pytest.mark.asyncio
async def test_agent_is_released_after_run() -> None:
fake_model = FakeModel(initial_output=[_make_message("Paris")])
agent = Agent(name="leak-test-agent", instructions="Answer questions.", model=fake_model)
agent_ref = weakref.ref(agent)

# Running the agent should not leave behind strong references once the result goes out of scope.
await Runner.run(agent, "What is the capital of France?")

del agent
gc.collect()

assert agent_ref() is None
14 changes: 14 additions & 0 deletions tests/test_items_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import gc
import json

from openai.types.responses.response_computer_tool_call import (
Expand Down Expand Up @@ -148,6 +149,19 @@ def test_text_message_outputs_across_list_of_runitems() -> None:
assert ItemHelpers.text_message_outputs([item1, non_message_item, item2]) == "foobar"


def test_message_output_item_retains_agent_until_release() -> None:
# Construct the run item with an inline agent to ensure the run item keeps a strong reference.
message = make_message([ResponseOutputText(annotations=[], text="hello", type="output_text")])
item = MessageOutputItem(agent=Agent(name="inline"), raw_item=message)
assert item.agent is not None
assert item.agent.name == "inline"

# After explicitly releasing, the weak reference should drop once GC runs.
item.release_agent()
gc.collect()
assert item.agent is None


def test_tool_call_output_item_constructs_function_call_output_dict():
# Build a simple ResponseFunctionToolCall.
call = ResponseFunctionToolCall(
Expand Down
42 changes: 41 additions & 1 deletion tests/test_result_cast.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import gc
import weakref
from typing import Any

import pytest
from openai.types.responses import ResponseOutputMessage, ResponseOutputText
from pydantic import BaseModel

from agents import Agent, RunContextWrapper, RunResult
from agents import Agent, MessageOutputItem, RunContextWrapper, RunResult
from agents.exceptions import AgentsException


def create_run_result(final_output: Any) -> RunResult:
Expand Down Expand Up @@ -59,3 +63,39 @@ def test_bad_cast_with_param_raises():
result = create_run_result(Foo(bar=1))
with pytest.raises(TypeError):
result.final_output_as(int, raise_if_incorrect_type=True)


def test_run_result_release_agents_breaks_strong_refs() -> None:
message = ResponseOutputMessage(
id="msg",
content=[ResponseOutputText(annotations=[], text="hello", type="output_text")],
role="assistant",
status="completed",
type="message",
)
agent = Agent(name="leak-test-agent")
item = MessageOutputItem(agent=agent, raw_item=message)
result = RunResult(
input="test",
new_items=[item],
raw_responses=[],
final_output=None,
input_guardrail_results=[],
output_guardrail_results=[],
tool_input_guardrail_results=[],
tool_output_guardrail_results=[],
_last_agent=agent,
context_wrapper=RunContextWrapper(context=None),
)
assert item.agent is not None
assert item.agent.name == "leak-test-agent"

agent_ref = weakref.ref(agent)
result.release_agents()
del agent
gc.collect()

assert agent_ref() is None
assert item.agent is None
with pytest.raises(AgentsException):
_ = result.last_agent