diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py index 57a68f2f5d..43944daa13 100644 --- a/sentry_sdk/integrations/openai_agents/patches/agent_run.py +++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py @@ -1,7 +1,14 @@ +import sys from functools import wraps from sentry_sdk.integrations import DidNotEnable -from ..spans import invoke_agent_span, update_invoke_agent_span, handoff_span +from sentry_sdk.utils import reraise +from ..spans import ( + invoke_agent_span, + end_invoke_agent_span, + handoff_span, +) +from ..utils import _record_exception_on_span from typing import TYPE_CHECKING @@ -38,15 +45,6 @@ def _start_invoke_agent_span(context_wrapper, agent, kwargs): return span - def _end_invoke_agent_span(context_wrapper, agent, output=None): - # type: (agents.RunContextWrapper, agents.Agent, Optional[Any]) -> None - """End the agent invocation span""" - # Clear the stored agent - if hasattr(context_wrapper, "_sentry_current_agent"): - delattr(context_wrapper, "_sentry_current_agent") - - update_invoke_agent_span(context_wrapper, agent, output) - def _has_active_agent_span(context_wrapper): # type: (agents.RunContextWrapper) -> bool """Check if there's an active agent span for this context""" @@ -69,19 +67,27 @@ async def patched_run_single_turn(cls, *args, **kwargs): context_wrapper = kwargs.get("context_wrapper") should_run_agent_start_hooks = kwargs.get("should_run_agent_start_hooks") + span = getattr(context_wrapper, "_sentry_agent_span", None) # Start agent span when agent starts (but only once per agent) if should_run_agent_start_hooks and agent and context_wrapper: # End any existing span for a different agent if _has_active_agent_span(context_wrapper): current_agent = _get_current_agent(context_wrapper) if current_agent and current_agent != agent: - _end_invoke_agent_span(context_wrapper, current_agent) + end_invoke_agent_span(context_wrapper, current_agent) span = _start_invoke_agent_span(context_wrapper, agent, kwargs) agent._sentry_agent_span = span # Call original method with all the correct parameters - result = await original_run_single_turn(*args, **kwargs) + try: + result = await original_run_single_turn(*args, **kwargs) + except Exception as exc: + if span is not None and span.timestamp is None: + _record_exception_on_span(span, exc) + end_invoke_agent_span(context_wrapper, agent) + + reraise(*sys.exc_info()) return result @@ -111,7 +117,7 @@ async def patched_execute_handoffs(cls, *args, **kwargs): finally: # End span for current agent after handoff processing is complete if agent and context_wrapper and _has_active_agent_span(context_wrapper): - _end_invoke_agent_span(context_wrapper, agent) + end_invoke_agent_span(context_wrapper, agent) return result @@ -134,7 +140,7 @@ async def patched_execute_final_output(cls, *args, **kwargs): finally: # End span for current agent after final output processing is complete if agent and context_wrapper and _has_active_agent_span(context_wrapper): - _end_invoke_agent_span(context_wrapper, agent, final_output) + end_invoke_agent_span(context_wrapper, agent, final_output) return result diff --git a/sentry_sdk/integrations/openai_agents/patches/error_tracing.py b/sentry_sdk/integrations/openai_agents/patches/error_tracing.py index 7d145267fc..2695f8a753 100644 --- a/sentry_sdk/integrations/openai_agents/patches/error_tracing.py +++ b/sentry_sdk/integrations/openai_agents/patches/error_tracing.py @@ -3,6 +3,7 @@ import sentry_sdk from sentry_sdk.consts import SPANSTATUS from sentry_sdk.tracing_utils import set_span_errored +from ..utils import _record_exception_on_span from typing import TYPE_CHECKING @@ -58,16 +59,7 @@ def sentry_attach_error_to_current_span(error, *args, **kwargs): # Set the current Sentry span to errored current_span = sentry_sdk.get_current_span() if current_span is not None: - set_span_errored(current_span) - current_span.set_data("span.status", "error") - - # Optionally capture the error details if we have them - if hasattr(error, "__class__"): - current_span.set_data("error.type", error.__class__.__name__) - if hasattr(error, "__str__"): - error_message = str(error) - if error_message: - current_span.set_data("error.message", error_message) + _record_exception_on_span(current_span, error) # Call the original function return original_attach_error(error, *args, **kwargs) diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index 05c15da4d1..736a820d35 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -1,9 +1,15 @@ from functools import wraps import sentry_sdk +from sentry_sdk.integrations import DidNotEnable -from ..spans import agent_workflow_span -from ..utils import _capture_exception +from ..spans import agent_workflow_span, end_invoke_agent_span +from ..utils import _capture_exception, _record_exception_on_span + +try: + from agents.exceptions import AgentsException +except ImportError: + raise DidNotEnable("OpenAI Agents not installed") from typing import TYPE_CHECKING @@ -29,19 +35,34 @@ async def wrapper(*args, **kwargs): # Clone agent because agent invocation spans are attached per run. agent = args[0].clone() with agent_workflow_span(agent): - result = None args = (agent, *args[1:]) try: - result = await original_func(*args, **kwargs) - return result - except Exception as exc: + run_result = await original_func(*args, **kwargs) + except AgentsException as exc: _capture_exception(exc) - # It could be that there is a "invoke agent" span still open - current_span = sentry_sdk.get_current_span() - if current_span is not None and current_span.timestamp is None: - current_span.__exit__(None, None, None) + context_wrapper = getattr(exc.run_data, "context_wrapper", None) + if context_wrapper is not None: + invoke_agent_span = getattr( + context_wrapper, "_sentry_agent_span", None + ) + + if ( + invoke_agent_span is not None + and invoke_agent_span.timestamp is None + ): + _record_exception_on_span(invoke_agent_span, exc) + end_invoke_agent_span(context_wrapper, agent) raise exc from None + except Exception as exc: + # Invoke agent span is not finished in this case. + # This is much less likely to occur than other cases because + # AgentRunner.run() is "just" a while loop around _run_single_turn. + _capture_exception(exc) + raise exc from None + + end_invoke_agent_span(run_result.context_wrapper, agent) + return run_result return wrapper diff --git a/sentry_sdk/integrations/openai_agents/spans/__init__.py b/sentry_sdk/integrations/openai_agents/spans/__init__.py index 3bc453cafa..64b979fc25 100644 --- a/sentry_sdk/integrations/openai_agents/spans/__init__.py +++ b/sentry_sdk/integrations/openai_agents/spans/__init__.py @@ -2,4 +2,8 @@ from .ai_client import ai_client_span, update_ai_client_span # noqa: F401 from .execute_tool import execute_tool_span, update_execute_tool_span # noqa: F401 from .handoff import handoff_span # noqa: F401 -from .invoke_agent import invoke_agent_span, update_invoke_agent_span # noqa: F401 +from .invoke_agent import ( + invoke_agent_span, + update_invoke_agent_span, + end_invoke_agent_span, +) # noqa: F401 diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 5d1731f247..f6311a2150 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: import agents - from typing import Any + from typing import Any, Optional def invoke_agent_span(context, agent, kwargs): @@ -95,3 +95,13 @@ def update_invoke_agent_span(context, agent, output): span.__exit__(None, None, None) delattr(context, "_sentry_agent_span") + + +def end_invoke_agent_span(context_wrapper, agent, output=None): + # type: (agents.RunContextWrapper, agents.Agent, Optional[Any]) -> None + """End the agent invocation span""" + # Clear the stored agent + if hasattr(context_wrapper, "_sentry_current_agent"): + delattr(context_wrapper, "_sentry_current_agent") + + update_invoke_agent_span(context_wrapper, agent, output) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 95ff44c1fd..ca7d4d80de 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -18,6 +18,8 @@ from typing import Any from agents import Usage + from sentry_sdk.tracing import Span + try: import agents @@ -37,6 +39,20 @@ def _capture_exception(exc): sentry_sdk.capture_event(event, hint=hint) +def _record_exception_on_span(span, error): + # type: (Span, Exception) -> Any + set_span_errored(span) + span.set_data("span.status", "error") + + # Optionally capture the error details if we have them + if hasattr(error, "__class__"): + span.set_data("error.type", error.__class__.__name__) + if hasattr(error, "__str__"): + error_message = str(error) + if error_message: + span.set_data("error.message", error_message) + + def _set_agent_data(span, agent): # type: (sentry_sdk.tracing.Span, agents.Agent) -> None span.set_data( diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 03cedd4447..c5cb25dfee 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -25,6 +25,7 @@ ResponseOutputText, ResponseFunctionToolCall, ) +from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError from agents.version import __version__ as OPENAI_AGENTS_VERSION from openai.types.responses.response_usage import ( @@ -353,6 +354,95 @@ async def test_handoff_span(sentry_init, capture_events, mock_usage): assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.asyncio +async def test_max_turns_before_handoff_span(sentry_init, capture_events, mock_usage): + """ + Example raising agents.exceptions.AgentsException after the agent invocation span is complete. + """ + # Create two simple agents with a handoff relationship + secondary_agent = agents.Agent( + name="secondary_agent", + instructions="You are a secondary agent.", + model="gpt-4o-mini", + ) + + primary_agent = agents.Agent( + name="primary_agent", + instructions="You are a primary agent that hands off to secondary agent.", + model="gpt-4o-mini", + handoffs=[secondary_agent], + ) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Mock two responses: + # 1. Primary agent calls handoff tool + # 2. Secondary agent provides final response + handoff_response = ModelResponse( + output=[ + ResponseFunctionToolCall( + id="call_handoff_123", + call_id="call_handoff_123", + name="transfer_to_secondary_agent", + type="function_call", + arguments="{}", + ) + ], + usage=mock_usage, + response_id="resp_handoff_123", + ) + + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="I'm the specialist and I can help with that!", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=mock_usage, + response_id="resp_final_123", + ) + + mock_get_response.side_effect = [handoff_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + ) + + events = capture_events() + + with pytest.raises(MaxTurnsExceeded): + await agents.Runner.run( + primary_agent, + "Please hand off to secondary agent", + run_config=test_run_config, + max_turns=1, + ) + + (error, transaction) = events + spans = transaction["spans"] + handoff_span = spans[2] + + # Verify handoff span was created + assert handoff_span is not None + assert ( + handoff_span["description"] == "handoff from primary_agent to secondary_agent" + ) + assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" + + @pytest.mark.asyncio async def test_tool_execution_span(sentry_init, capture_events, test_agent): """ @@ -605,6 +695,77 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["data"]["gen_ai.usage.total_tokens"] == 25 +@pytest.mark.asyncio +async def test_model_behavior_error(sentry_init, capture_events, test_agent): + """ + Example raising agents.exceptions.AgentsException before the agent invocation span is complete. + The mocked API response indicates that "wrong_tool" was called. + """ + + @agents.function_tool + def simple_test_tool(message: str) -> str: + """A simple tool""" + return f"Tool executed with: {message}" + + # Create agent with the tool + agent_with_tool = test_agent.clone(tools=[simple_test_tool]) + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', + ) + + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) + + mock_get_response.side_effect = [tool_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (error, transaction) = events + spans = transaction["spans"] + ( + agent_span, + ai_client_span1, + ) = spans + + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + assert agent_span["description"] == "invoke_agent test_agent" + assert agent_span["origin"] == "auto.ai.openai_agents" + + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "internal_error" + assert agent_span["tags"]["status"] == "internal_error" + + @pytest.mark.asyncio async def test_error_handling(sentry_init, capture_events, test_agent): """