From e895a779c79dacc4ff916cef3d607653c52dd002 Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 27 Oct 2025 13:51:25 +0000 Subject: [PATCH 01/19] Nest handoff history by default --- docs/running_agents.md | 3 + src/agents/_run_impl.py | 17 +++- src/agents/extensions/handoff_filters.py | 101 +++++++++++++++++++++++ src/agents/run.py | 5 ++ tests/test_agent_runner.py | 60 +++++++++++++- tests/test_agent_runner_streamed.py | 6 +- tests/test_extension_filters.py | 47 ++++++++++- 7 files changed, 230 insertions(+), 9 deletions(-) diff --git a/docs/running_agents.md b/docs/running_agents.md index ab69d8463..a5f18eceb 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -51,11 +51,14 @@ The `run_config` parameter lets you configure some global settings for the agent - [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`. - [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs. - [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details. +- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message and keeps the latest user turn separate before invoking the next agent. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior. - [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run. - [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs. - [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The group ID is an optional field that lets you link traces across multiple runs. - [`trace_metadata`][agents.run.RunConfig.trace_metadata]: Metadata to include on all traces. +By default, the SDK now nests prior turns inside a developer summary message whenever an agent hands off to another agent. This reduces repeated assistant messages and keeps the most recent user turn explicit for the receiving agent. If you'd like to return to the legacy behavior, pass `RunConfig(nest_handoff_history=False)` or supply a `handoff_input_filter` that forwards the conversation exactly as you need. + ## Conversations/chat threads Calling any of the run methods can result in one or more agents running (and hence one or more LLM calls), but it represents a single logical turn in a chat conversation. For example: diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 88a770a56..9627ca3ec 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -51,6 +51,7 @@ ToolOutputGuardrailTripwireTriggered, UserError, ) +from .extensions.handoff_filters import nest_handoff_history from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult from .handoffs import Handoff, HandoffInputData from .items import ( @@ -998,8 +999,8 @@ async def execute_handoffs( input_filter = handoff.input_filter or ( run_config.handoff_input_filter if run_config else None ) - if input_filter: - logger.debug("Filtering inputs for handoff") + handoff_input_data: HandoffInputData | None = None + if input_filter or run_config.nest_handoff_history: handoff_input_data = HandoffInputData( input_history=tuple(original_input) if isinstance(original_input, list) @@ -1008,6 +1009,9 @@ async def execute_handoffs( new_items=tuple(new_step_items), run_context=context_wrapper, ) + + if input_filter and handoff_input_data is not None: + logger.debug("Filtering inputs for handoff") if not callable(input_filter): _error_tracing.attach_error_to_span( span_handoff, @@ -1037,6 +1041,15 @@ async def execute_handoffs( ) pre_step_items = list(filtered.pre_handoff_items) new_step_items = list(filtered.new_items) + elif run_config.nest_handoff_history and handoff_input_data is not None: + nested = nest_handoff_history(handoff_input_data) + original_input = ( + nested.input_history + if isinstance(nested.input_history, str) + else list(nested.input_history) + ) + pre_step_items = list(nested.pre_handoff_items) + new_step_items = list(nested.new_items) return SingleStepResult( original_input=original_input, diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index a4433ae0c..03a6169d9 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -1,9 +1,14 @@ from __future__ import annotations +import json +from copy import deepcopy +from typing import Any + from ..handoffs import HandoffInputData from ..items import ( HandoffCallItem, HandoffOutputItem, + ItemHelpers, ReasoningItem, RunItem, ToolCallItem, @@ -34,6 +39,102 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData: ) +def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputData: + """Summarizes the previous transcript into a developer message for the next agent.""" + + normalized_history = _normalize_input_history(handoff_input_data.input_history) + pre_items_as_inputs = [ + _run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items + ] + new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items] + transcript = normalized_history + pre_items_as_inputs + new_items_as_inputs + + developer_message = _build_developer_message(transcript) + latest_user = _find_latest_user_turn(transcript) + history_items: list[TResponseInputItem] = [developer_message] + if latest_user is not None: + history_items.append(latest_user) + + filtered_pre_items = tuple( + item + for item in handoff_input_data.pre_handoff_items + if _get_run_item_role(item) != "assistant" + ) + + return handoff_input_data.clone( + input_history=tuple(history_items), + pre_handoff_items=filtered_pre_items, + ) + + +def _normalize_input_history( + input_history: str | tuple[TResponseInputItem, ...], +) -> list[TResponseInputItem]: + if isinstance(input_history, str): + return ItemHelpers.input_to_new_input_list(input_history) + return [deepcopy(item) for item in input_history] + + +def _run_item_to_plain_input(run_item: RunItem) -> TResponseInputItem: + return deepcopy(run_item.to_input_item()) + + +def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseInputItem: + if transcript: + summary_lines = [ + f"{idx + 1}. {_format_transcript_item(item)}" for idx, item in enumerate(transcript) + ] + else: + summary_lines = ["(no previous turns recorded)"] + + content = "Previous conversation before this handoff:\n" + "\n".join(summary_lines) + return {"role": "developer", "content": content} + + +def _format_transcript_item(item: TResponseInputItem) -> str: + role = item.get("role") + if isinstance(role, str): + prefix = role + name = item.get("name") + if isinstance(name, str) and name: + prefix = f"{prefix} ({name})" + content_str = _stringify_content(item.get("content")) + return f"{prefix}: {content_str}" if content_str else prefix + + item_type = item.get("type", "item") + rest = {k: v for k, v in item.items() if k != "type"} + try: + serialized = json.dumps(rest, ensure_ascii=False, default=str) + except TypeError: + serialized = str(rest) + return f"{item_type}: {serialized}" if serialized else str(item_type) + + +def _stringify_content(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content + try: + return json.dumps(content, ensure_ascii=False, default=str) + except TypeError: + return str(content) + + +def _find_latest_user_turn( + transcript: list[TResponseInputItem], +) -> TResponseInputItem | None: + for item in reversed(transcript): + if item.get("role") == "user": + return deepcopy(item) + return None + + +def _get_run_item_role(run_item: RunItem) -> str | None: + role_candidate = run_item.to_input_item().get("role") + return role_candidate if isinstance(role_candidate, str) else None + + def _remove_tools_from_items(items: tuple[RunItem, ...]) -> tuple[RunItem, ...]: filtered_items = [] for item in items: diff --git a/src/agents/run.py b/src/agents/run.py index 58eef335e..6ce2a3565 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -196,6 +196,11 @@ class RunConfig: agent. See the documentation in `Handoff.input_filter` for more details. """ + nest_handoff_history: bool = True + """Wrap prior run history in a developer message before handing off when no custom input + filter is set. Set to False to preserve the raw transcript behavior from previous releases. + """ + input_guardrails: list[InputGuardrail[Any]] | None = None """A list of input guardrails to run on the initial run input.""" diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 441054dd4..3125e84d1 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -164,9 +164,9 @@ async def test_handoffs(): assert result.final_output == "done" assert len(result.raw_responses) == 3, "should have three model responses" - assert len(result.to_input_list()) == 7, ( - "should have 7 inputs: orig input, tool call, tool result, message, handoff, handoff" - "result, and done message" + assert len(result.to_input_list()) == 8, ( + "should have 8 inputs: dev summary, latest user input, tool call, tool result, message, " + "handoff, handoff result, and done message" ) assert result.last_agent == agent_1, "should have handed off to agent_1" @@ -270,6 +270,60 @@ async def test_handoff_filters(): ) +@pytest.mark.asyncio +async def test_default_handoff_history_nested_and_filters_respected(): + model = FakeModel() + agent_1 = Agent( + name="delegate", + model=model, + ) + agent_2 = Agent( + name="triage", + model=model, + handoffs=[agent_1], + ) + + model.add_multiple_turn_outputs( + [ + [get_text_message("triage summary"), get_handoff_tool_call(agent_1)], + [get_text_message("resolution")], + ] + ) + + result = await Runner.run(agent_2, input="user_message") + + assert isinstance(result.input, list) + assert result.input[0]["role"] == "developer" + assert "Previous conversation" in result.input[0]["content"] + assert "triage summary" in result.input[0]["content"] + assert result.input[1]["role"] == "user" + assert result.input[1]["content"] == "user_message" + + passthrough_model = FakeModel() + delegate = Agent(name="delegate", model=passthrough_model) + + def passthrough_filter(data: HandoffInputData) -> HandoffInputData: + return data + + triage_with_filter = Agent( + name="triage", + model=passthrough_model, + handoffs=[handoff(delegate, input_filter=passthrough_filter)], + ) + + passthrough_model.add_multiple_turn_outputs( + [ + [get_text_message("triage summary"), get_handoff_tool_call(delegate)], + [get_text_message("resolution")], + ] + ) + + filtered_result = await Runner.run(triage_with_filter, input="user_message") + + assert isinstance(filtered_result.input, str) + assert filtered_result.input == "user_message" + + @pytest.mark.asyncio async def test_async_input_filter_supported(): # DO NOT rename this without updating pyproject.toml diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py index eca23464b..dffa4f84f 100644 --- a/tests/test_agent_runner_streamed.py +++ b/tests/test_agent_runner_streamed.py @@ -175,9 +175,9 @@ async def test_handoffs(): assert result.final_output == "done" assert len(result.raw_responses) == 3, "should have three model responses" - assert len(result.to_input_list()) == 7, ( - "should have 7 inputs: orig input, tool call, tool result, message, handoff, handoff" - "result, and done message" + assert len(result.to_input_list()) == 8, ( + "should have 8 inputs: dev summary, latest user input, tool call, tool result, message, " + "handoff, handoff result, and done message" ) assert result.last_agent == agent_1, "should have handed off to agent_1" diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index 11fba51ba..79768f229 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -2,7 +2,7 @@ from openai.types.responses.response_reasoning_item import ResponseReasoningItem from agents import Agent, HandoffInputData, RunContextWrapper -from agents.extensions.handoff_filters import remove_all_tools +from agents.extensions.handoff_filters import nest_handoff_history, remove_all_tools from agents.items import ( HandoffOutputItem, MessageOutputItem, @@ -25,6 +25,13 @@ def _get_message_input_item(content: str) -> TResponseInputItem: } +def _get_user_input_item(content: str) -> TResponseInputItem: + return { + "role": "user", + "content": content, + } + + def _get_reasoning_input_item() -> TResponseInputItem: return {"id": "rid", "summary": [], "type": "reasoning"} @@ -219,3 +226,41 @@ def test_removes_handoffs_from_history(): assert len(filtered_data.input_history) == 1 assert len(filtered_data.pre_handoff_items) == 1 assert len(filtered_data.new_items) == 1 + + +def test_nest_handoff_history_wraps_transcript() -> None: + data = HandoffInputData( + input_history=(_get_user_input_item("Hello"),), + pre_handoff_items=(_get_message_output_run_item("Assist reply"),), + new_items=( + _get_message_output_run_item("Handoff request"), + _get_handoff_output_run_item("transfer"), + ), + run_context=RunContextWrapper(context=()), + ) + + nested = nest_handoff_history(data) + + assert isinstance(nested.input_history, tuple) + assert nested.input_history[0]["role"] == "developer" + assert "Assist reply" in nested.input_history[0]["content"] + assert nested.input_history[1]["role"] == "user" + assert nested.input_history[1]["content"] == "Hello" + assert len(nested.pre_handoff_items) == 0 + assert nested.new_items == data.new_items + + +def test_nest_handoff_history_handles_missing_user() -> None: + data = HandoffInputData( + input_history=(), + pre_handoff_items=(_get_reasoning_output_run_item(),), + new_items=(), + run_context=RunContextWrapper(context=()), + ) + + nested = nest_handoff_history(data) + + assert isinstance(nested.input_history, tuple) + assert len(nested.input_history) == 1 + assert nested.input_history[0]["role"] == "developer" + assert "reasoning" in nested.input_history[0]["content"].lower() From 94447af1a1b36303ebf81a2111753f9b2dd937ca Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 27 Oct 2025 10:11:44 -0400 Subject: [PATCH 02/19] Document nested handoff history defaults --- docs/handoffs.md | 31 +++++++++++++ docs/running_agents.md | 2 +- examples/handoffs/log_handoff_history.py | 56 ++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 examples/handoffs/log_handoff_history.py diff --git a/docs/handoffs.md b/docs/handoffs.md index 85707c6b3..990878727 100644 --- a/docs/handoffs.md +++ b/docs/handoffs.md @@ -82,6 +82,8 @@ handoff_obj = handoff( When a handoff occurs, it's as though the new agent takes over the conversation, and gets to see the entire previous conversation history. If you want to change this, you can set an [`input_filter`][agents.handoffs.Handoff.input_filter]. An input filter is a function that receives the existing input via a [`HandoffInputData`][agents.handoffs.HandoffInputData], and must return a new `HandoffInputData`. +By default the runner now wraps the prior transcript inside a developer-role summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. + There are some common patterns (for example removing all tool calls from the history), which are implemented for you in [`agents.extensions.handoff_filters`][] ```python @@ -98,6 +100,35 @@ handoff_obj = handoff( 1. This will automatically remove all tools from the history when `FAQ agent` is called. +### Inspecting handoff payloads + +When you are debugging a workflow it is often useful to print the exact transcript that will be sent to the next agent. You can do this by inserting a lightweight filter that logs the `HandoffInputData` and then returns either the unmodified payload or the output from [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history). + +```python +import json + +from agents import Agent, HandoffInputData, handoff +from agents.extensions.handoff_filters import nest_handoff_history + + +def log_handoff_payload(data: HandoffInputData) -> HandoffInputData: + nested = nest_handoff_history(data) + history_items = nested.input_history if isinstance(nested.input_history, tuple) else () + for idx, item in enumerate(history_items, start=1): + print(f"Turn {idx}: {json.dumps(item, indent=2, ensure_ascii=False)}") + return nested + + +math_agent = Agent(name="Math agent") + +router = Agent( + name="Router", + handoffs=[handoff(math_agent, input_filter=log_handoff_payload)], +) +``` + +The new [examples/handoffs/log_handoff_history.py](https://github.com/openai/openai-agents-python/tree/main/examples/handoffs/log_handoff_history.py) script contains a complete runnable sample that prints the nested transcript every time a handoff occurs. + ## Recommended prompts To make sure that LLMs understand handoffs properly, we recommend including information about handoffs in your agents. We have a suggested prefix in [`agents.extensions.handoff_prompt.RECOMMENDED_PROMPT_PREFIX`][], or you can call [`agents.extensions.handoff_prompt.prompt_with_handoff_instructions`][] to automatically add recommended data to your prompts. diff --git a/docs/running_agents.md b/docs/running_agents.md index a5f18eceb..f12f34afd 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -51,7 +51,7 @@ The `run_config` parameter lets you configure some global settings for the agent - [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`. - [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs. - [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details. -- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message and keeps the latest user turn separate before invoking the next agent. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior. +- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message and keeps the latest user turn separate before invoking the next agent. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. - [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run. - [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs. - [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The group ID is an optional field that lets you link traces across multiple runs. diff --git a/examples/handoffs/log_handoff_history.py b/examples/handoffs/log_handoff_history.py new file mode 100644 index 000000000..572b5555b --- /dev/null +++ b/examples/handoffs/log_handoff_history.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +import asyncio +import json + +from agents import Agent, HandoffInputData, Runner, handoff +from agents.extensions.handoff_filters import nest_handoff_history +from agents.items import ItemHelpers + + +math_agent = Agent( + name="Math Agent", + instructions=( + "You are a friendly math expert. Explain your reasoning and finish with a clear answer." + ), +) + + +def log_handoff_history(data: HandoffInputData) -> HandoffInputData: + """Print the transcript that will be forwarded to the next agent.""" + + nested = nest_handoff_history(data) + history_items = ( + nested.input_history + if isinstance(nested.input_history, tuple) + else tuple(ItemHelpers.input_to_new_input_list(nested.input_history)) + ) + + print("\n--- Handoff transcript ---") + for idx, item in enumerate(history_items, start=1): + print(f"Turn {idx}: {json.dumps(item, indent=2, ensure_ascii=False)}") + print("--- end of transcript ---\n") + + return nested + + +router_agent = Agent( + name="Router", + instructions=( + "You greet the user and then call the math handoff tool whenever the user asks for" + " calculation help so the specialist can respond." + ), + handoffs=[handoff(math_agent, input_filter=log_handoff_history)], +) + + +async def main() -> None: + result = await Runner.run( + router_agent, + "Hi there! Could you compute 784 + 219 and explain how you got the result?", + ) + print("Final output:\n", result.final_output) + + +if __name__ == "__main__": + asyncio.run(main()) From 98d154c4724e00f1b10857dd81e2af184ff352ab Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 27 Oct 2025 10:12:00 -0400 Subject: [PATCH 03/19] Improve nested handoff conversation history --- docs/handoffs.md | 4 +- docs/running_agents.md | 2 +- src/agents/extensions/handoff_filters.py | 59 ++++++++++++++++++++++-- tests/test_agent_runner.py | 35 +++++++++++++- tests/test_extension_filters.py | 50 +++++++++++++++++++- 5 files changed, 140 insertions(+), 10 deletions(-) diff --git a/docs/handoffs.md b/docs/handoffs.md index 990878727..64675a1b8 100644 --- a/docs/handoffs.md +++ b/docs/handoffs.md @@ -82,7 +82,7 @@ handoff_obj = handoff( When a handoff occurs, it's as though the new agent takes over the conversation, and gets to see the entire previous conversation history. If you want to change this, you can set an [`input_filter`][agents.handoffs.Handoff.input_filter]. An input filter is a function that receives the existing input via a [`HandoffInputData`][agents.handoffs.HandoffInputData], and must return a new `HandoffInputData`. -By default the runner now wraps the prior transcript inside a developer-role summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. +By default the runner now wraps the prior transcript inside a developer-role summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). The summary appears inside a `` block that keeps appending new turns when multiple handoffs happen during the same run. That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. There are some common patterns (for example removing all tool calls from the history), which are implemented for you in [`agents.extensions.handoff_filters`][] @@ -127,7 +127,7 @@ router = Agent( ) ``` -The new [examples/handoffs/log_handoff_history.py](https://github.com/openai/openai-agents-python/tree/main/examples/handoffs/log_handoff_history.py) script contains a complete runnable sample that prints the nested transcript every time a handoff occurs. +The new [examples/handoffs/log_handoff_history.py](https://github.com/openai/openai-agents-python/tree/main/examples/handoffs/log_handoff_history.py) script contains a complete runnable sample that prints the nested transcript every time a handoff occurs so you can see the `` block that will be passed to the next agent. ## Recommended prompts diff --git a/docs/running_agents.md b/docs/running_agents.md index f12f34afd..6a53b20fb 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -51,7 +51,7 @@ The `run_config` parameter lets you configure some global settings for the agent - [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`. - [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs. - [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details. -- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message and keeps the latest user turn separate before invoking the next agent. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. +- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message, placing the content inside a `` block while keeping the latest user turn separate before invoking the next agent. The block automatically appends new turns as subsequent handoffs occur. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. - [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run. - [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs. - [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The group ID is an optional field that lets you link traces across multiple runs. diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index 03a6169d9..055db2e8b 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -39,15 +39,22 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData: ) +_CONVERSATION_HISTORY_START = "" +_CONVERSATION_HISTORY_END = "" +_NEST_HISTORY_METADATA_KEY = "nest_handoff_history" +_NEST_HISTORY_TRANSCRIPT_KEY = "transcript" + + def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputData: """Summarizes the previous transcript into a developer message for the next agent.""" normalized_history = _normalize_input_history(handoff_input_data.input_history) + flattened_history = _flatten_nested_history_messages(normalized_history) pre_items_as_inputs = [ _run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items ] new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items] - transcript = normalized_history + pre_items_as_inputs + new_items_as_inputs + transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs developer_message = _build_developer_message(transcript) latest_user = _find_latest_user_turn(transcript) @@ -80,15 +87,23 @@ def _run_item_to_plain_input(run_item: RunItem) -> TResponseInputItem: def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseInputItem: - if transcript: + transcript_copy = [deepcopy(item) for item in transcript] + if transcript_copy: summary_lines = [ - f"{idx + 1}. {_format_transcript_item(item)}" for idx, item in enumerate(transcript) + f"{idx + 1}. {_format_transcript_item(item)}" for idx, item in enumerate(transcript_copy) ] else: summary_lines = ["(no previous turns recorded)"] - content = "Previous conversation before this handoff:\n" + "\n".join(summary_lines) - return {"role": "developer", "content": content} + content_lines = [_CONVERSATION_HISTORY_START, *summary_lines, _CONVERSATION_HISTORY_END] + content = "\n".join(content_lines) + return { + "role": "developer", + "content": content, + "metadata": { + _NEST_HISTORY_METADATA_KEY: {_NEST_HISTORY_TRANSCRIPT_KEY: transcript_copy} + }, + } def _format_transcript_item(item: TResponseInputItem) -> str: @@ -130,6 +145,40 @@ def _find_latest_user_turn( return None +def _flatten_nested_history_messages( + items: list[TResponseInputItem], +) -> list[TResponseInputItem]: + flattened: list[TResponseInputItem] = [] + for item in items: + nested_transcript = _extract_nested_history_transcript(item) + if nested_transcript is not None: + flattened.extend(nested_transcript) + continue + flattened.append(deepcopy(item)) + return flattened + + +def _extract_nested_history_transcript( + item: TResponseInputItem, +) -> list[TResponseInputItem] | None: + if item.get("role") != "developer": + return None + metadata = item.get("metadata") + if not isinstance(metadata, dict): + return None + payload = metadata.get(_NEST_HISTORY_METADATA_KEY) + if not isinstance(payload, dict): + return None + transcript = payload.get(_NEST_HISTORY_TRANSCRIPT_KEY) + if not isinstance(transcript, list): + return None + normalized: list[TResponseInputItem] = [] + for entry in transcript: + if isinstance(entry, dict): + normalized.append(deepcopy(entry)) + return normalized if normalized else [] + + def _get_run_item_role(run_item: RunItem) -> str | None: role_candidate = run_item.to_input_item().get("role") return role_candidate if isinstance(role_candidate, str) else None diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 3125e84d1..64a5e9ff8 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -294,7 +294,7 @@ async def test_default_handoff_history_nested_and_filters_respected(): assert isinstance(result.input, list) assert result.input[0]["role"] == "developer" - assert "Previous conversation" in result.input[0]["content"] + assert "" in result.input[0]["content"] assert "triage summary" in result.input[0]["content"] assert result.input[1]["role"] == "user" assert result.input[1]["content"] == "user_message" @@ -324,6 +324,39 @@ def passthrough_filter(data: HandoffInputData) -> HandoffInputData: assert filtered_result.input == "user_message" +@pytest.mark.asyncio +async def test_default_handoff_history_accumulates_across_multiple_handoffs(): + triage_model = FakeModel() + delegate_model = FakeModel() + closer_model = FakeModel() + + closer = Agent(name="closer", model=closer_model) + delegate = Agent(name="delegate", model=delegate_model, handoffs=[closer]) + triage = Agent(name="triage", model=triage_model, handoffs=[delegate]) + + triage_model.add_multiple_turn_outputs( + [[get_text_message("triage summary"), get_handoff_tool_call(delegate)]] + ) + delegate_model.add_multiple_turn_outputs( + [[get_text_message("delegate update"), get_handoff_tool_call(closer)]] + ) + closer_model.add_multiple_turn_outputs([[get_text_message("resolution")]]) + + result = await Runner.run(triage, input="user_question") + + assert result.final_output == "resolution" + assert closer_model.first_turn_args is not None + closer_input = closer_model.first_turn_args["input"] + assert isinstance(closer_input, list) + assert closer_input[0]["role"] == "developer" + developer_content = closer_input[0]["content"] + assert developer_content.count("") == 1 + assert "triage summary" in developer_content + assert "delegate update" in developer_content + assert closer_input[1]["role"] == "user" + assert closer_input[1]["content"] == "user_question" + + @pytest.mark.asyncio async def test_async_input_filter_supported(): # DO NOT rename this without updating pyproject.toml diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index 79768f229..afb12f18e 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -243,7 +243,19 @@ def test_nest_handoff_history_wraps_transcript() -> None: assert isinstance(nested.input_history, tuple) assert nested.input_history[0]["role"] == "developer" - assert "Assist reply" in nested.input_history[0]["content"] + developer_content = nested.input_history[0]["content"] + assert "" in developer_content + assert "" in developer_content + assert "Assist reply" in developer_content + metadata = nested.input_history[0].get("metadata") + assert isinstance(metadata, dict) + history_payload = metadata.get("nest_handoff_history") + assert isinstance(history_payload, dict) + transcript = history_payload.get("transcript") + assert isinstance(transcript, list) + assert len(transcript) == 4 + assert transcript[0]["role"] == "user" + assert transcript[1]["role"] == "assistant" assert nested.input_history[1]["role"] == "user" assert nested.input_history[1]["content"] == "Hello" assert len(nested.pre_handoff_items) == 0 @@ -264,3 +276,39 @@ def test_nest_handoff_history_handles_missing_user() -> None: assert len(nested.input_history) == 1 assert nested.input_history[0]["role"] == "developer" assert "reasoning" in nested.input_history[0]["content"].lower() + + +def test_nest_handoff_history_appends_existing_history() -> None: + first = HandoffInputData( + input_history=(_get_user_input_item("Hello"),), + pre_handoff_items=(_get_message_output_run_item("First reply"),), + new_items=(), + run_context=RunContextWrapper(context=()), + ) + + first_nested = nest_handoff_history(first) + developer_message = first_nested.input_history[0] + + follow_up_history = ( + developer_message, + _get_user_input_item("Another question"), + ) + + second = HandoffInputData( + input_history=follow_up_history, + pre_handoff_items=(_get_message_output_run_item("Second reply"),), + new_items=(_get_handoff_output_run_item("transfer"),), + run_context=RunContextWrapper(context=()), + ) + + second_nested = nest_handoff_history(second) + + assert isinstance(second_nested.input_history, tuple) + developer = second_nested.input_history[0] + assert developer["role"] == "developer" + content = developer["content"] + assert content.count("") == 1 + assert content.count("") == 1 + assert "First reply" in content + assert "Second reply" in content + assert "Another question" in content From 52f0a1ee5a1bbe1e9e38888da167def5912fdb65 Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 27 Oct 2025 10:39:24 -0400 Subject: [PATCH 04/19] Remove metadata from nested handoff history --- docs/handoffs.md | 29 ----------- examples/handoffs/log_handoff_history.py | 56 --------------------- src/agents/extensions/handoff_filters.py | 62 ++++++++++++++++++------ tests/test_extension_filters.py | 9 ---- 4 files changed, 46 insertions(+), 110 deletions(-) delete mode 100644 examples/handoffs/log_handoff_history.py diff --git a/docs/handoffs.md b/docs/handoffs.md index 64675a1b8..c909b61bc 100644 --- a/docs/handoffs.md +++ b/docs/handoffs.md @@ -100,35 +100,6 @@ handoff_obj = handoff( 1. This will automatically remove all tools from the history when `FAQ agent` is called. -### Inspecting handoff payloads - -When you are debugging a workflow it is often useful to print the exact transcript that will be sent to the next agent. You can do this by inserting a lightweight filter that logs the `HandoffInputData` and then returns either the unmodified payload or the output from [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history). - -```python -import json - -from agents import Agent, HandoffInputData, handoff -from agents.extensions.handoff_filters import nest_handoff_history - - -def log_handoff_payload(data: HandoffInputData) -> HandoffInputData: - nested = nest_handoff_history(data) - history_items = nested.input_history if isinstance(nested.input_history, tuple) else () - for idx, item in enumerate(history_items, start=1): - print(f"Turn {idx}: {json.dumps(item, indent=2, ensure_ascii=False)}") - return nested - - -math_agent = Agent(name="Math agent") - -router = Agent( - name="Router", - handoffs=[handoff(math_agent, input_filter=log_handoff_payload)], -) -``` - -The new [examples/handoffs/log_handoff_history.py](https://github.com/openai/openai-agents-python/tree/main/examples/handoffs/log_handoff_history.py) script contains a complete runnable sample that prints the nested transcript every time a handoff occurs so you can see the `` block that will be passed to the next agent. - ## Recommended prompts To make sure that LLMs understand handoffs properly, we recommend including information about handoffs in your agents. We have a suggested prefix in [`agents.extensions.handoff_prompt.RECOMMENDED_PROMPT_PREFIX`][], or you can call [`agents.extensions.handoff_prompt.prompt_with_handoff_instructions`][] to automatically add recommended data to your prompts. diff --git a/examples/handoffs/log_handoff_history.py b/examples/handoffs/log_handoff_history.py deleted file mode 100644 index 572b5555b..000000000 --- a/examples/handoffs/log_handoff_history.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import annotations - -import asyncio -import json - -from agents import Agent, HandoffInputData, Runner, handoff -from agents.extensions.handoff_filters import nest_handoff_history -from agents.items import ItemHelpers - - -math_agent = Agent( - name="Math Agent", - instructions=( - "You are a friendly math expert. Explain your reasoning and finish with a clear answer." - ), -) - - -def log_handoff_history(data: HandoffInputData) -> HandoffInputData: - """Print the transcript that will be forwarded to the next agent.""" - - nested = nest_handoff_history(data) - history_items = ( - nested.input_history - if isinstance(nested.input_history, tuple) - else tuple(ItemHelpers.input_to_new_input_list(nested.input_history)) - ) - - print("\n--- Handoff transcript ---") - for idx, item in enumerate(history_items, start=1): - print(f"Turn {idx}: {json.dumps(item, indent=2, ensure_ascii=False)}") - print("--- end of transcript ---\n") - - return nested - - -router_agent = Agent( - name="Router", - instructions=( - "You greet the user and then call the math handoff tool whenever the user asks for" - " calculation help so the specialist can respond." - ), - handoffs=[handoff(math_agent, input_filter=log_handoff_history)], -) - - -async def main() -> None: - result = await Runner.run( - router_agent, - "Hi there! Could you compute 784 + 219 and explain how you got the result?", - ) - print("Final output:\n", result.final_output) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index 055db2e8b..33bebec1e 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -41,8 +41,6 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData: _CONVERSATION_HISTORY_START = "" _CONVERSATION_HISTORY_END = "" -_NEST_HISTORY_METADATA_KEY = "nest_handoff_history" -_NEST_HISTORY_TRANSCRIPT_KEY = "transcript" def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputData: @@ -100,9 +98,6 @@ def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseI return { "role": "developer", "content": content, - "metadata": { - _NEST_HISTORY_METADATA_KEY: {_NEST_HISTORY_TRANSCRIPT_KEY: transcript_copy} - }, } @@ -163,20 +158,55 @@ def _extract_nested_history_transcript( ) -> list[TResponseInputItem] | None: if item.get("role") != "developer": return None - metadata = item.get("metadata") - if not isinstance(metadata, dict): + content = item.get("content") + if not isinstance(content, str): return None - payload = metadata.get(_NEST_HISTORY_METADATA_KEY) - if not isinstance(payload, dict): + start_idx = content.find(_CONVERSATION_HISTORY_START) + end_idx = content.find(_CONVERSATION_HISTORY_END) + if start_idx == -1 or end_idx == -1 or end_idx <= start_idx: return None - transcript = payload.get(_NEST_HISTORY_TRANSCRIPT_KEY) - if not isinstance(transcript, list): + start_idx += len(_CONVERSATION_HISTORY_START) + body = content[start_idx:end_idx] + lines = [line.strip() for line in body.splitlines() if line.strip()] + parsed: list[TResponseInputItem] = [] + for line in lines: + parsed_item = _parse_summary_line(line) + if parsed_item is not None: + parsed.append(parsed_item) + return parsed + + +def _parse_summary_line(line: str) -> TResponseInputItem | None: + stripped = line.strip() + if not stripped: return None - normalized: list[TResponseInputItem] = [] - for entry in transcript: - if isinstance(entry, dict): - normalized.append(deepcopy(entry)) - return normalized if normalized else [] + dot_index = stripped.find(".") + if dot_index != -1 and stripped[:dot_index].isdigit(): + stripped = stripped[dot_index + 1 :].lstrip() + role_part, sep, remainder = stripped.partition(":") + if not sep: + return None + role_text = role_part.strip() + if not role_text: + return None + role, name = _split_role_and_name(role_text) + reconstructed: TResponseInputItem = {"role": role} + if name: + reconstructed["name"] = name + content = remainder.strip() + if content: + reconstructed["content"] = content + return reconstructed + + +def _split_role_and_name(role_text: str) -> tuple[str, str | None]: + if role_text.endswith(")") and "(" in role_text: + open_idx = role_text.rfind("(") + possible_name = role_text[open_idx + 1 : -1].strip() + role_candidate = role_text[:open_idx].strip() + if possible_name: + return (role_candidate or "developer", possible_name) + return (role_text or "developer", None) def _get_run_item_role(run_item: RunItem) -> str | None: diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index afb12f18e..95319d010 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -247,15 +247,6 @@ def test_nest_handoff_history_wraps_transcript() -> None: assert "" in developer_content assert "" in developer_content assert "Assist reply" in developer_content - metadata = nested.input_history[0].get("metadata") - assert isinstance(metadata, dict) - history_payload = metadata.get("nest_handoff_history") - assert isinstance(history_payload, dict) - transcript = history_payload.get("transcript") - assert isinstance(transcript, list) - assert len(transcript) == 4 - assert transcript[0]["role"] == "user" - assert transcript[1]["role"] == "assistant" assert nested.input_history[1]["role"] == "user" assert nested.input_history[1]["content"] == "Hello" assert len(nested.pre_handoff_items) == 0 From 71df74b4bfd0637071310053ee2080411fec21b0 Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 27 Oct 2025 10:51:10 -0400 Subject: [PATCH 05/19] Fix mypy issues in nested handoff helper --- src/agents/extensions/handoff_filters.py | 12 +++++---- tests/test_agent_runner.py | 33 ++++++++++++++++------- tests/test_extension_filters.py | 34 ++++++++++++++++++------ 3 files changed, 57 insertions(+), 22 deletions(-) diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index 33bebec1e..b7c20e631 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -2,7 +2,7 @@ import json from copy import deepcopy -from typing import Any +from typing import Any, cast from ..handoffs import HandoffInputData from ..items import ( @@ -88,17 +88,19 @@ def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseI transcript_copy = [deepcopy(item) for item in transcript] if transcript_copy: summary_lines = [ - f"{idx + 1}. {_format_transcript_item(item)}" for idx, item in enumerate(transcript_copy) + f"{idx + 1}. {_format_transcript_item(item)}" + for idx, item in enumerate(transcript_copy) ] else: summary_lines = ["(no previous turns recorded)"] content_lines = [_CONVERSATION_HISTORY_START, *summary_lines, _CONVERSATION_HISTORY_END] content = "\n".join(content_lines) - return { + developer_message: dict[str, Any] = { "role": "developer", "content": content, } + return cast(TResponseInputItem, developer_message) def _format_transcript_item(item: TResponseInputItem) -> str: @@ -190,13 +192,13 @@ def _parse_summary_line(line: str) -> TResponseInputItem | None: if not role_text: return None role, name = _split_role_and_name(role_text) - reconstructed: TResponseInputItem = {"role": role} + reconstructed: dict[str, Any] = {"role": role} if name: reconstructed["name"] = name content = remainder.strip() if content: reconstructed["content"] = content - return reconstructed + return cast(TResponseInputItem, reconstructed) def _split_role_and_name(role_text: str) -> tuple[str, str | None]: diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 64a5e9ff8..92f00b522 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -43,6 +43,14 @@ from .utils.simple_session import SimpleListSession +def _as_message(item: Any) -> dict[str, Any]: + assert isinstance(item, dict) + role = item.get("role") + assert isinstance(role, str) + assert role in {"assistant", "user", "system", "developer"} + return cast(dict[str, Any], item) + + @pytest.mark.asyncio async def test_simple_first_run(): model = FakeModel() @@ -293,11 +301,15 @@ async def test_default_handoff_history_nested_and_filters_respected(): result = await Runner.run(agent_2, input="user_message") assert isinstance(result.input, list) - assert result.input[0]["role"] == "developer" - assert "" in result.input[0]["content"] - assert "triage summary" in result.input[0]["content"] - assert result.input[1]["role"] == "user" - assert result.input[1]["content"] == "user_message" + developer = _as_message(result.input[0]) + assert developer["role"] == "developer" + developer_content = developer["content"] + assert isinstance(developer_content, str) + assert "" in developer_content + assert "triage summary" in developer_content + latest_user = _as_message(result.input[1]) + assert latest_user["role"] == "user" + assert latest_user["content"] == "user_message" passthrough_model = FakeModel() delegate = Agent(name="delegate", model=passthrough_model) @@ -348,13 +360,16 @@ async def test_default_handoff_history_accumulates_across_multiple_handoffs(): assert closer_model.first_turn_args is not None closer_input = closer_model.first_turn_args["input"] assert isinstance(closer_input, list) - assert closer_input[0]["role"] == "developer" - developer_content = closer_input[0]["content"] + developer = _as_message(closer_input[0]) + assert developer["role"] == "developer" + developer_content = developer["content"] + assert isinstance(developer_content, str) assert developer_content.count("") == 1 assert "triage summary" in developer_content assert "delegate update" in developer_content - assert closer_input[1]["role"] == "user" - assert closer_input[1]["content"] == "user_question" + latest_user = _as_message(closer_input[1]) + assert latest_user["role"] == "user" + assert latest_user["content"] == "user_question" @pytest.mark.asyncio diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index 95319d010..068d5d188 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -1,3 +1,5 @@ +from typing import Any, cast + from openai.types.responses import ResponseOutputMessage, ResponseOutputText from openai.types.responses.response_reasoning_item import ResponseReasoningItem @@ -96,6 +98,14 @@ def _get_reasoning_output_run_item() -> ReasoningItem: ) +def _as_message(item: TResponseInputItem) -> dict[str, Any]: + assert isinstance(item, dict) + role = item.get("role") + assert isinstance(role, str) + assert role in {"assistant", "user", "system", "developer"} + return cast(dict[str, Any], item) + + def test_empty_data(): handoff_input_data = HandoffInputData( input_history=(), @@ -242,13 +252,16 @@ def test_nest_handoff_history_wraps_transcript() -> None: nested = nest_handoff_history(data) assert isinstance(nested.input_history, tuple) - assert nested.input_history[0]["role"] == "developer" - developer_content = nested.input_history[0]["content"] + developer = _as_message(nested.input_history[0]) + assert developer["role"] == "developer" + developer_content = developer["content"] + assert isinstance(developer_content, str) assert "" in developer_content assert "" in developer_content assert "Assist reply" in developer_content - assert nested.input_history[1]["role"] == "user" - assert nested.input_history[1]["content"] == "Hello" + latest_user = _as_message(nested.input_history[1]) + assert latest_user["role"] == "user" + assert latest_user["content"] == "Hello" assert len(nested.pre_handoff_items) == 0 assert nested.new_items == data.new_items @@ -265,8 +278,11 @@ def test_nest_handoff_history_handles_missing_user() -> None: assert isinstance(nested.input_history, tuple) assert len(nested.input_history) == 1 - assert nested.input_history[0]["role"] == "developer" - assert "reasoning" in nested.input_history[0]["content"].lower() + developer = _as_message(nested.input_history[0]) + assert developer["role"] == "developer" + developer_content = developer["content"] + assert isinstance(developer_content, str) + assert "reasoning" in developer_content.lower() def test_nest_handoff_history_appends_existing_history() -> None: @@ -278,9 +294,10 @@ def test_nest_handoff_history_appends_existing_history() -> None: ) first_nested = nest_handoff_history(first) + assert isinstance(first_nested.input_history, tuple) developer_message = first_nested.input_history[0] - follow_up_history = ( + follow_up_history: tuple[TResponseInputItem, ...] = ( developer_message, _get_user_input_item("Another question"), ) @@ -295,9 +312,10 @@ def test_nest_handoff_history_appends_existing_history() -> None: second_nested = nest_handoff_history(second) assert isinstance(second_nested.input_history, tuple) - developer = second_nested.input_history[0] + developer = _as_message(second_nested.input_history[0]) assert developer["role"] == "developer" content = developer["content"] + assert isinstance(content, str) assert content.count("") == 1 assert content.count("") == 1 assert "First reply" in content From 17aa135bb97481e394488a896ae9c03045c8818c Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Fri, 7 Nov 2025 16:44:41 -0500 Subject: [PATCH 06/19] Add handoff history mapper --- docs/handoffs.md | 2 +- docs/running_agents.md | 5 +- src/agents/_run_impl.py | 5 +- src/agents/extensions/handoff_filters.py | 48 ++++++++-------- src/agents/handoffs.py | 3 + src/agents/run.py | 14 ++++- tests/test_agent_runner.py | 41 +++++++------- tests/test_agent_runner_streamed.py | 6 +- tests/test_extension_filters.py | 71 +++++++++++++++++------- 9 files changed, 118 insertions(+), 77 deletions(-) diff --git a/docs/handoffs.md b/docs/handoffs.md index c909b61bc..1211f3843 100644 --- a/docs/handoffs.md +++ b/docs/handoffs.md @@ -82,7 +82,7 @@ handoff_obj = handoff( When a handoff occurs, it's as though the new agent takes over the conversation, and gets to see the entire previous conversation history. If you want to change this, you can set an [`input_filter`][agents.handoffs.Handoff.input_filter]. An input filter is a function that receives the existing input via a [`HandoffInputData`][agents.handoffs.HandoffInputData], and must return a new `HandoffInputData`. -By default the runner now wraps the prior transcript inside a developer-role summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). The summary appears inside a `` block that keeps appending new turns when multiple handoffs happen during the same run. That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. +By default the runner now collapses the prior transcript into a single assistant summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). The summary appears inside a `` block that keeps appending new turns when multiple handoffs happen during the same run. You can provide your own mapping function via [`RunConfig.handoff_history_mapper`][agents.run.RunConfig.handoff_history_mapper] to replace the generated message without writing a full `input_filter`. That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. There are some common patterns (for example removing all tool calls from the history), which are implemented for you in [`agents.extensions.handoff_filters`][] diff --git a/docs/running_agents.md b/docs/running_agents.md index 6a53b20fb..7f6b8d2b5 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -51,13 +51,14 @@ The `run_config` parameter lets you configure some global settings for the agent - [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`. - [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs. - [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details. -- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner wraps the prior transcript in a developer-role summary message, placing the content inside a `` block while keeping the latest user turn separate before invoking the next agent. The block automatically appends new turns as subsequent handoffs occur. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. You can also call [`nest_handoff_history`](agents.extensions.handoff_filters.nest_handoff_history) from your own filters to reuse the default behavior. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. +- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner collapses the prior transcript into a single assistant message before invoking the next agent. The helper places the content inside a `` block that keeps appending new turns as subsequent handoffs occur. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. +- [`handoff_history_mapper`][agents.run.RunConfig.handoff_history_mapper]: Optional callable that receives the normalized transcript (history + handoff items) whenever `nest_handoff_history` is `True`. It must return the exact list of input items to forward to the next agent, allowing you to replace the built-in summary without writing a full handoff filter. - [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run. - [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs. - [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The group ID is an optional field that lets you link traces across multiple runs. - [`trace_metadata`][agents.run.RunConfig.trace_metadata]: Metadata to include on all traces. -By default, the SDK now nests prior turns inside a developer summary message whenever an agent hands off to another agent. This reduces repeated assistant messages and keeps the most recent user turn explicit for the receiving agent. If you'd like to return to the legacy behavior, pass `RunConfig(nest_handoff_history=False)` or supply a `handoff_input_filter` that forwards the conversation exactly as you need. +By default, the SDK now nests prior turns inside a single assistant summary message whenever an agent hands off to another agent. This reduces repeated assistant messages and keeps the full transcript inside a single block that new agents can scan quickly. If you'd like to return to the legacy behavior, pass `RunConfig(nest_handoff_history=False)` or supply a `handoff_input_filter` (or `handoff_history_mapper`) that forwards the conversation exactly as you need. ## Conversations/chat threads diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 9627ca3ec..839718aaf 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -1042,7 +1042,10 @@ async def execute_handoffs( pre_step_items = list(filtered.pre_handoff_items) new_step_items = list(filtered.new_items) elif run_config.nest_handoff_history and handoff_input_data is not None: - nested = nest_handoff_history(handoff_input_data) + nested = nest_handoff_history( + handoff_input_data, + history_mapper=run_config.handoff_history_mapper, + ) original_input = ( nested.input_history if isinstance(nested.input_history, str) diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index b7c20e631..48ad3fcb8 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -4,7 +4,7 @@ from copy import deepcopy from typing import Any, cast -from ..handoffs import HandoffInputData +from ..handoffs import HandoffHistoryMapper, HandoffInputData from ..items import ( HandoffCallItem, HandoffOutputItem, @@ -43,8 +43,12 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData: _CONVERSATION_HISTORY_END = "" -def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputData: - """Summarizes the previous transcript into a developer message for the next agent.""" +def nest_handoff_history( + handoff_input_data: HandoffInputData, + *, + history_mapper: HandoffHistoryMapper | None = None, +) -> HandoffInputData: + """Summarizes the previous transcript for the next agent.""" normalized_history = _normalize_input_history(handoff_input_data.input_history) flattened_history = _flatten_nested_history_messages(normalized_history) @@ -54,12 +58,8 @@ def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputDa new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items] transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs - developer_message = _build_developer_message(transcript) - latest_user = _find_latest_user_turn(transcript) - history_items: list[TResponseInputItem] = [developer_message] - if latest_user is not None: - history_items.append(latest_user) - + mapper = history_mapper or default_handoff_history_mapper + history_items = mapper(transcript) filtered_pre_items = tuple( item for item in handoff_input_data.pre_handoff_items @@ -67,11 +67,20 @@ def nest_handoff_history(handoff_input_data: HandoffInputData) -> HandoffInputDa ) return handoff_input_data.clone( - input_history=tuple(history_items), + input_history=tuple(deepcopy(item) for item in history_items), pre_handoff_items=filtered_pre_items, ) +def default_handoff_history_mapper( + transcript: list[TResponseInputItem], +) -> list[TResponseInputItem]: + """Returns a single assistant message summarizing the transcript.""" + + summary_message = _build_summary_message(transcript) + return [summary_message] + + def _normalize_input_history( input_history: str | tuple[TResponseInputItem, ...], ) -> list[TResponseInputItem]: @@ -84,7 +93,7 @@ def _run_item_to_plain_input(run_item: RunItem) -> TResponseInputItem: return deepcopy(run_item.to_input_item()) -def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseInputItem: +def _build_summary_message(transcript: list[TResponseInputItem]) -> TResponseInputItem: transcript_copy = [deepcopy(item) for item in transcript] if transcript_copy: summary_lines = [ @@ -96,11 +105,11 @@ def _build_developer_message(transcript: list[TResponseInputItem]) -> TResponseI content_lines = [_CONVERSATION_HISTORY_START, *summary_lines, _CONVERSATION_HISTORY_END] content = "\n".join(content_lines) - developer_message: dict[str, Any] = { - "role": "developer", + assistant_message: dict[str, Any] = { + "role": "assistant", "content": content, } - return cast(TResponseInputItem, developer_message) + return cast(TResponseInputItem, assistant_message) def _format_transcript_item(item: TResponseInputItem) -> str: @@ -133,15 +142,6 @@ def _stringify_content(content: Any) -> str: return str(content) -def _find_latest_user_turn( - transcript: list[TResponseInputItem], -) -> TResponseInputItem | None: - for item in reversed(transcript): - if item.get("role") == "user": - return deepcopy(item) - return None - - def _flatten_nested_history_messages( items: list[TResponseInputItem], ) -> list[TResponseInputItem]: @@ -158,8 +158,6 @@ def _flatten_nested_history_messages( def _extract_nested_history_transcript( item: TResponseInputItem, ) -> list[TResponseInputItem] | None: - if item.get("role") != "developer": - return None content = item.get("content") if not isinstance(content, str): return None diff --git a/src/agents/handoffs.py b/src/agents/handoffs.py index 2c52737ad..84c012af4 100644 --- a/src/agents/handoffs.py +++ b/src/agents/handoffs.py @@ -69,6 +69,9 @@ def clone(self, **kwargs: Any) -> HandoffInputData: HandoffInputFilter: TypeAlias = Callable[[HandoffInputData], MaybeAwaitable[HandoffInputData]] """A function that filters the input data passed to the next agent.""" +HandoffHistoryMapper: TypeAlias = Callable[[list[TResponseInputItem]], list[TResponseInputItem]] +"""A function that rewrites the conversation history before the next agent sees it.""" + @dataclass class Handoff(Generic[TContext, TAgent]): diff --git a/src/agents/run.py b/src/agents/run.py index 6ce2a3565..bbf09197c 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -44,7 +44,7 @@ OutputGuardrail, OutputGuardrailResult, ) -from .handoffs import Handoff, HandoffInputFilter, handoff +from .handoffs import Handoff, HandoffHistoryMapper, HandoffInputFilter, handoff from .items import ( HandoffCallItem, ItemHelpers, @@ -197,8 +197,16 @@ class RunConfig: """ nest_handoff_history: bool = True - """Wrap prior run history in a developer message before handing off when no custom input - filter is set. Set to False to preserve the raw transcript behavior from previous releases. + """Wrap prior run history in a single assistant message before handing off when no custom + input filter is set. Set to False to preserve the raw transcript behavior from previous + releases. + """ + + handoff_history_mapper: HandoffHistoryMapper | None = None + """Optional function that receives the normalized transcript (history + handoff items) and + returns the input history that should be passed to the next agent. When left as `None`, the + runner collapses the transcript into a single assistant message. This function only runs when + `nest_handoff_history` is True. """ input_guardrails: list[InputGuardrail[Any]] | None = None diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 92f00b522..6704eed21 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -172,9 +172,9 @@ async def test_handoffs(): assert result.final_output == "done" assert len(result.raw_responses) == 3, "should have three model responses" - assert len(result.to_input_list()) == 8, ( - "should have 8 inputs: dev summary, latest user input, tool call, tool result, message, " - "handoff, handoff result, and done message" + assert len(result.to_input_list()) == 7, ( + "should have 7 inputs: summary message, tool call, tool result, message, handoff, " + "handoff result, and done message" ) assert result.last_agent == agent_1, "should have handed off to agent_1" @@ -301,15 +301,14 @@ async def test_default_handoff_history_nested_and_filters_respected(): result = await Runner.run(agent_2, input="user_message") assert isinstance(result.input, list) - developer = _as_message(result.input[0]) - assert developer["role"] == "developer" - developer_content = developer["content"] - assert isinstance(developer_content, str) - assert "" in developer_content - assert "triage summary" in developer_content - latest_user = _as_message(result.input[1]) - assert latest_user["role"] == "user" - assert latest_user["content"] == "user_message" + assert len(result.input) == 1 + summary = _as_message(result.input[0]) + assert summary["role"] == "assistant" + summary_content = summary["content"] + assert isinstance(summary_content, str) + assert "" in summary_content + assert "triage summary" in summary_content + assert "user_message" in summary_content passthrough_model = FakeModel() delegate = Agent(name="delegate", model=passthrough_model) @@ -360,16 +359,14 @@ async def test_default_handoff_history_accumulates_across_multiple_handoffs(): assert closer_model.first_turn_args is not None closer_input = closer_model.first_turn_args["input"] assert isinstance(closer_input, list) - developer = _as_message(closer_input[0]) - assert developer["role"] == "developer" - developer_content = developer["content"] - assert isinstance(developer_content, str) - assert developer_content.count("") == 1 - assert "triage summary" in developer_content - assert "delegate update" in developer_content - latest_user = _as_message(closer_input[1]) - assert latest_user["role"] == "user" - assert latest_user["content"] == "user_question" + summary = _as_message(closer_input[0]) + assert summary["role"] == "assistant" + summary_content = summary["content"] + assert isinstance(summary_content, str) + assert summary_content.count("") == 1 + assert "triage summary" in summary_content + assert "delegate update" in summary_content + assert "user_question" in summary_content @pytest.mark.asyncio diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py index dffa4f84f..a49135fa8 100644 --- a/tests/test_agent_runner_streamed.py +++ b/tests/test_agent_runner_streamed.py @@ -175,9 +175,9 @@ async def test_handoffs(): assert result.final_output == "done" assert len(result.raw_responses) == 3, "should have three model responses" - assert len(result.to_input_list()) == 8, ( - "should have 8 inputs: dev summary, latest user input, tool call, tool result, message, " - "handoff, handoff result, and done message" + assert len(result.to_input_list()) == 7, ( + "should have 7 inputs: summary message, tool call, tool result, message, handoff, " + "handoff result, and done message" ) assert result.last_agent == agent_1, "should have handed off to agent_1" diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index 068d5d188..4affeffd8 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -1,3 +1,4 @@ +from copy import deepcopy from typing import Any, cast from openai.types.responses import ResponseOutputMessage, ResponseOutputText @@ -252,16 +253,15 @@ def test_nest_handoff_history_wraps_transcript() -> None: nested = nest_handoff_history(data) assert isinstance(nested.input_history, tuple) - developer = _as_message(nested.input_history[0]) - assert developer["role"] == "developer" - developer_content = developer["content"] - assert isinstance(developer_content, str) - assert "" in developer_content - assert "" in developer_content - assert "Assist reply" in developer_content - latest_user = _as_message(nested.input_history[1]) - assert latest_user["role"] == "user" - assert latest_user["content"] == "Hello" + assert len(nested.input_history) == 1 + summary = _as_message(nested.input_history[0]) + assert summary["role"] == "assistant" + summary_content = summary["content"] + assert isinstance(summary_content, str) + assert "" in summary_content + assert "" in summary_content + assert "Assist reply" in summary_content + assert "Hello" in summary_content assert len(nested.pre_handoff_items) == 0 assert nested.new_items == data.new_items @@ -278,11 +278,11 @@ def test_nest_handoff_history_handles_missing_user() -> None: assert isinstance(nested.input_history, tuple) assert len(nested.input_history) == 1 - developer = _as_message(nested.input_history[0]) - assert developer["role"] == "developer" - developer_content = developer["content"] - assert isinstance(developer_content, str) - assert "reasoning" in developer_content.lower() + summary = _as_message(nested.input_history[0]) + assert summary["role"] == "assistant" + summary_content = summary["content"] + assert isinstance(summary_content, str) + assert "reasoning" in summary_content.lower() def test_nest_handoff_history_appends_existing_history() -> None: @@ -295,10 +295,10 @@ def test_nest_handoff_history_appends_existing_history() -> None: first_nested = nest_handoff_history(first) assert isinstance(first_nested.input_history, tuple) - developer_message = first_nested.input_history[0] + summary_message = first_nested.input_history[0] follow_up_history: tuple[TResponseInputItem, ...] = ( - developer_message, + summary_message, _get_user_input_item("Another question"), ) @@ -312,12 +312,43 @@ def test_nest_handoff_history_appends_existing_history() -> None: second_nested = nest_handoff_history(second) assert isinstance(second_nested.input_history, tuple) - developer = _as_message(second_nested.input_history[0]) - assert developer["role"] == "developer" - content = developer["content"] + summary = _as_message(second_nested.input_history[0]) + assert summary["role"] == "assistant" + content = summary["content"] assert isinstance(content, str) assert content.count("") == 1 assert content.count("") == 1 assert "First reply" in content assert "Second reply" in content assert "Another question" in content + + +def test_nest_handoff_history_supports_custom_mapper() -> None: + data = HandoffInputData( + input_history=(_get_user_input_item("Hello"),), + pre_handoff_items=(_get_message_output_run_item("Assist reply"),), + new_items=(), + run_context=RunContextWrapper(context=()), + ) + + def map_history(items: list[TResponseInputItem]) -> list[TResponseInputItem]: + reversed_items = list(reversed(items)) + return [deepcopy(item) for item in reversed_items] + + nested = nest_handoff_history(data, history_mapper=map_history) + + assert isinstance(nested.input_history, tuple) + assert len(nested.input_history) == 2 + first = _as_message(nested.input_history[0]) + second = _as_message(nested.input_history[1]) + assert first["role"] == "assistant" + first_content = first.get("content") + assert isinstance(first_content, list) + assert any( + isinstance(chunk, dict) + and chunk.get("type") == "output_text" + and chunk.get("text") == "Assist reply" + for chunk in first_content + ) + assert second["role"] == "user" + assert second["content"] == "Hello" From 336420283a868add8039aa5e230ecfc79cb640b7 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Fri, 7 Nov 2025 18:27:46 -0500 Subject: [PATCH 07/19] fixes --- docs/handoffs.md | 2 +- docs/running_agents.md | 6 +- src/agents/__init__.py | 17 +- src/agents/_run_impl.py | 23 +- src/agents/extensions/handoff_filters.py | 192 +------------- .../{handoffs.py => handoffs/__init__.py} | 168 ++++--------- src/agents/handoffs/history.py | 236 ++++++++++++++++++ tests/test_extension_filters.py | 50 +++- tests/test_run_step_processing.py | 104 +++++++- 9 files changed, 484 insertions(+), 314 deletions(-) rename src/agents/{handoffs.py => handoffs/__init__.py} (53%) create mode 100644 src/agents/handoffs/history.py diff --git a/docs/handoffs.md b/docs/handoffs.md index 1211f3843..8a9d1f1b3 100644 --- a/docs/handoffs.md +++ b/docs/handoffs.md @@ -82,7 +82,7 @@ handoff_obj = handoff( When a handoff occurs, it's as though the new agent takes over the conversation, and gets to see the entire previous conversation history. If you want to change this, you can set an [`input_filter`][agents.handoffs.Handoff.input_filter]. An input filter is a function that receives the existing input via a [`HandoffInputData`][agents.handoffs.HandoffInputData], and must return a new `HandoffInputData`. -By default the runner now collapses the prior transcript into a single assistant summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). The summary appears inside a `` block that keeps appending new turns when multiple handoffs happen during the same run. You can provide your own mapping function via [`RunConfig.handoff_history_mapper`][agents.run.RunConfig.handoff_history_mapper] to replace the generated message without writing a full `input_filter`. That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. +By default the runner now collapses the prior transcript into a single assistant summary message (see [`RunConfig.nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]). The summary appears inside a `` block that keeps appending new turns when multiple handoffs happen during the same run. You can provide your own mapping function via [`RunConfig.handoff_history_mapper`][agents.run.RunConfig.handoff_history_mapper] to replace the generated message without writing a full `input_filter`. That default only applies when neither the handoff nor the run supplies an explicit `input_filter`, so existing code that already customizes the payload (including the examples in this repository) keeps its current behavior without changes. You can override the nesting behaviour for a single handoff by passing `nest_handoff_history=True` or `False` to [`handoff(...)`][agents.handoffs.handoff], which sets [`Handoff.nest_handoff_history`][agents.handoffs.Handoff.nest_handoff_history]. If you just need to change the wrapper text for the generated summary, call [`set_conversation_history_wrappers`][agents.handoffs.set_conversation_history_wrappers] (and optionally [`reset_conversation_history_wrappers`][agents.handoffs.reset_conversation_history_wrappers]) before running your agents. There are some common patterns (for example removing all tool calls from the history), which are implemented for you in [`agents.extensions.handoff_filters`][] diff --git a/docs/running_agents.md b/docs/running_agents.md index 7f6b8d2b5..fb3e9aa47 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -51,14 +51,14 @@ The `run_config` parameter lets you configure some global settings for the agent - [`model_settings`][agents.run.RunConfig.model_settings]: Overrides agent-specific settings. For example, you can set a global `temperature` or `top_p`. - [`input_guardrails`][agents.run.RunConfig.input_guardrails], [`output_guardrails`][agents.run.RunConfig.output_guardrails]: A list of input or output guardrails to include on all runs. - [`handoff_input_filter`][agents.run.RunConfig.handoff_input_filter]: A global input filter to apply to all handoffs, if the handoff doesn't already have one. The input filter allows you to edit the inputs that are sent to the new agent. See the documentation in [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] for more details. -- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner collapses the prior transcript into a single assistant message before invoking the next agent. The helper places the content inside a `` block that keeps appending new turns as subsequent handoffs occur. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. +- [`nest_handoff_history`][agents.run.RunConfig.nest_handoff_history]: When `True` (the default) the runner collapses the prior transcript into a single assistant message before invoking the next agent. The helper places the content inside a `` block that keeps appending new turns as subsequent handoffs occur. Set this to `False` or provide a custom handoff filter if you prefer to pass through the raw transcript. All [`Runner` methods](agents.run.Runner) automatically create a `RunConfig` when you do not pass one, so the quickstarts and examples pick up this default automatically, and any explicit [`Handoff.input_filter`][agents.handoffs.Handoff.input_filter] callbacks continue to override it. Individual handoffs can override this setting via [`Handoff.nest_handoff_history`][agents.handoffs.Handoff.nest_handoff_history]. - [`handoff_history_mapper`][agents.run.RunConfig.handoff_history_mapper]: Optional callable that receives the normalized transcript (history + handoff items) whenever `nest_handoff_history` is `True`. It must return the exact list of input items to forward to the next agent, allowing you to replace the built-in summary without writing a full handoff filter. - [`tracing_disabled`][agents.run.RunConfig.tracing_disabled]: Allows you to disable [tracing](tracing.md) for the entire run. - [`trace_include_sensitive_data`][agents.run.RunConfig.trace_include_sensitive_data]: Configures whether traces will include potentially sensitive data, such as LLM and tool call inputs/outputs. - [`workflow_name`][agents.run.RunConfig.workflow_name], [`trace_id`][agents.run.RunConfig.trace_id], [`group_id`][agents.run.RunConfig.group_id]: Sets the tracing workflow name, trace ID and trace group ID for the run. We recommend at least setting `workflow_name`. The group ID is an optional field that lets you link traces across multiple runs. - [`trace_metadata`][agents.run.RunConfig.trace_metadata]: Metadata to include on all traces. -By default, the SDK now nests prior turns inside a single assistant summary message whenever an agent hands off to another agent. This reduces repeated assistant messages and keeps the full transcript inside a single block that new agents can scan quickly. If you'd like to return to the legacy behavior, pass `RunConfig(nest_handoff_history=False)` or supply a `handoff_input_filter` (or `handoff_history_mapper`) that forwards the conversation exactly as you need. +By default, the SDK now nests prior turns inside a single assistant summary message whenever an agent hands off to another agent. This reduces repeated assistant messages and keeps the full transcript inside a single block that new agents can scan quickly. If you'd like to return to the legacy behavior, pass `RunConfig(nest_handoff_history=False)` or supply a `handoff_input_filter` (or `handoff_history_mapper`) that forwards the conversation exactly as you need. You can also opt out (or in) for a specific handoff by setting `handoff(..., nest_handoff_history=False)` or `True`. To change the wrapper text used in the generated summary without writing a custom mapper, call [`set_conversation_history_wrappers`][agents.handoffs.set_conversation_history_wrappers] (and [`reset_conversation_history_wrappers`][agents.handoffs.reset_conversation_history_wrappers] to restore the defaults). ## Conversations/chat threads @@ -204,4 +204,4 @@ The SDK raises exceptions in certain cases. The full list is in [`agents.excepti - Malformed JSON: When the model provides a malformed JSON structure for tool calls or in its direct output, especially if a specific `output_type` is defined. - Unexpected tool-related failures: When the model fails to use tools in an expected manner - [`UserError`][agents.exceptions.UserError]: This exception is raised when you (the person writing code using the SDK) make an error while using the SDK. This typically results from incorrect code implementation, invalid configuration, or misuse of the SDK's API. -- [`InputGuardrailTripwireTriggered`][agents.exceptions.InputGuardrailTripwireTriggered], [`OutputGuardrailTripwireTriggered`][agents.exceptions.OutputGuardrailTripwireTriggered]: This exception is raised when the conditions of an input guardrail or output guardrail are met, respectively. Input guardrails check incoming messages before processing, while output guardrails check the agent's final response before delivery. \ No newline at end of file +- [`InputGuardrailTripwireTriggered`][agents.exceptions.InputGuardrailTripwireTriggered], [`OutputGuardrailTripwireTriggered`][agents.exceptions.OutputGuardrailTripwireTriggered]: This exception is raised when the conditions of an input guardrail or output guardrail are met, respectively. Input guardrails check incoming messages before processing, while output guardrails check the agent's final response before delivery. diff --git a/src/agents/__init__.py b/src/agents/__init__.py index b285d6f8c..8488cd540 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -34,7 +34,17 @@ input_guardrail, output_guardrail, ) -from .handoffs import Handoff, HandoffInputData, HandoffInputFilter, handoff +from .handoffs import ( + Handoff, + HandoffInputData, + HandoffInputFilter, + default_handoff_history_mapper, + get_conversation_history_wrappers, + handoff, + nest_handoff_history, + reset_conversation_history_wrappers, + set_conversation_history_wrappers, +) from .items import ( HandoffCallItem, HandoffOutputItem, @@ -191,6 +201,11 @@ def enable_verbose_stdout_logging(): "StopAtTools", "ToolsToFinalOutputFunction", "ToolsToFinalOutputResult", + "default_handoff_history_mapper", + "get_conversation_history_wrappers", + "nest_handoff_history", + "reset_conversation_history_wrappers", + "set_conversation_history_wrappers", "Runner", "run_demo_loop", "Model", diff --git a/src/agents/_run_impl.py b/src/agents/_run_impl.py index 839718aaf..8be6fbb0d 100644 --- a/src/agents/_run_impl.py +++ b/src/agents/_run_impl.py @@ -51,9 +51,8 @@ ToolOutputGuardrailTripwireTriggered, UserError, ) -from .extensions.handoff_filters import nest_handoff_history from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult -from .handoffs import Handoff, HandoffInputData +from .handoffs import Handoff, HandoffInputData, nest_handoff_history from .items import ( HandoffCallItem, HandoffOutputItem, @@ -999,8 +998,14 @@ async def execute_handoffs( input_filter = handoff.input_filter or ( run_config.handoff_input_filter if run_config else None ) + handoff_nest_setting = handoff.nest_handoff_history + should_nest_history = ( + handoff_nest_setting + if handoff_nest_setting is not None + else run_config.nest_handoff_history + ) handoff_input_data: HandoffInputData | None = None - if input_filter or run_config.nest_handoff_history: + if input_filter or should_nest_history: handoff_input_data = HandoffInputData( input_history=tuple(original_input) if isinstance(original_input, list) @@ -1011,7 +1016,15 @@ async def execute_handoffs( ) if input_filter and handoff_input_data is not None: - logger.debug("Filtering inputs for handoff") + filter_name = getattr(input_filter, "__qualname__", repr(input_filter)) + from_agent = getattr(agent, "name", agent.__class__.__name__) + to_agent = getattr(new_agent, "name", new_agent.__class__.__name__) + logger.debug( + "Filtering handoff inputs with %s for %s -> %s", + filter_name, + from_agent, + to_agent, + ) if not callable(input_filter): _error_tracing.attach_error_to_span( span_handoff, @@ -1041,7 +1054,7 @@ async def execute_handoffs( ) pre_step_items = list(filtered.pre_handoff_items) new_step_items = list(filtered.new_items) - elif run_config.nest_handoff_history and handoff_input_data is not None: + elif should_nest_history and handoff_input_data is not None: nested = nest_handoff_history( handoff_input_data, history_mapper=run_config.handoff_history_mapper, diff --git a/src/agents/extensions/handoff_filters.py b/src/agents/extensions/handoff_filters.py index 48ad3fcb8..85d68c1d8 100644 --- a/src/agents/extensions/handoff_filters.py +++ b/src/agents/extensions/handoff_filters.py @@ -1,14 +1,13 @@ from __future__ import annotations -import json -from copy import deepcopy -from typing import Any, cast - -from ..handoffs import HandoffHistoryMapper, HandoffInputData +from ..handoffs import ( + HandoffInputData, + default_handoff_history_mapper, + nest_handoff_history, +) from ..items import ( HandoffCallItem, HandoffOutputItem, - ItemHelpers, ReasoningItem, RunItem, ToolCallItem, @@ -18,6 +17,12 @@ """Contains common handoff input filters, for convenience. """ +__all__ = [ + "remove_all_tools", + "nest_handoff_history", + "default_handoff_history_mapper", +] + def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData: """Filters out all tool items: file search, web search and function calls+output.""" @@ -39,181 +44,6 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData: ) -_CONVERSATION_HISTORY_START = "" -_CONVERSATION_HISTORY_END = "" - - -def nest_handoff_history( - handoff_input_data: HandoffInputData, - *, - history_mapper: HandoffHistoryMapper | None = None, -) -> HandoffInputData: - """Summarizes the previous transcript for the next agent.""" - - normalized_history = _normalize_input_history(handoff_input_data.input_history) - flattened_history = _flatten_nested_history_messages(normalized_history) - pre_items_as_inputs = [ - _run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items - ] - new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items] - transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs - - mapper = history_mapper or default_handoff_history_mapper - history_items = mapper(transcript) - filtered_pre_items = tuple( - item - for item in handoff_input_data.pre_handoff_items - if _get_run_item_role(item) != "assistant" - ) - - return handoff_input_data.clone( - input_history=tuple(deepcopy(item) for item in history_items), - pre_handoff_items=filtered_pre_items, - ) - - -def default_handoff_history_mapper( - transcript: list[TResponseInputItem], -) -> list[TResponseInputItem]: - """Returns a single assistant message summarizing the transcript.""" - - summary_message = _build_summary_message(transcript) - return [summary_message] - - -def _normalize_input_history( - input_history: str | tuple[TResponseInputItem, ...], -) -> list[TResponseInputItem]: - if isinstance(input_history, str): - return ItemHelpers.input_to_new_input_list(input_history) - return [deepcopy(item) for item in input_history] - - -def _run_item_to_plain_input(run_item: RunItem) -> TResponseInputItem: - return deepcopy(run_item.to_input_item()) - - -def _build_summary_message(transcript: list[TResponseInputItem]) -> TResponseInputItem: - transcript_copy = [deepcopy(item) for item in transcript] - if transcript_copy: - summary_lines = [ - f"{idx + 1}. {_format_transcript_item(item)}" - for idx, item in enumerate(transcript_copy) - ] - else: - summary_lines = ["(no previous turns recorded)"] - - content_lines = [_CONVERSATION_HISTORY_START, *summary_lines, _CONVERSATION_HISTORY_END] - content = "\n".join(content_lines) - assistant_message: dict[str, Any] = { - "role": "assistant", - "content": content, - } - return cast(TResponseInputItem, assistant_message) - - -def _format_transcript_item(item: TResponseInputItem) -> str: - role = item.get("role") - if isinstance(role, str): - prefix = role - name = item.get("name") - if isinstance(name, str) and name: - prefix = f"{prefix} ({name})" - content_str = _stringify_content(item.get("content")) - return f"{prefix}: {content_str}" if content_str else prefix - - item_type = item.get("type", "item") - rest = {k: v for k, v in item.items() if k != "type"} - try: - serialized = json.dumps(rest, ensure_ascii=False, default=str) - except TypeError: - serialized = str(rest) - return f"{item_type}: {serialized}" if serialized else str(item_type) - - -def _stringify_content(content: Any) -> str: - if content is None: - return "" - if isinstance(content, str): - return content - try: - return json.dumps(content, ensure_ascii=False, default=str) - except TypeError: - return str(content) - - -def _flatten_nested_history_messages( - items: list[TResponseInputItem], -) -> list[TResponseInputItem]: - flattened: list[TResponseInputItem] = [] - for item in items: - nested_transcript = _extract_nested_history_transcript(item) - if nested_transcript is not None: - flattened.extend(nested_transcript) - continue - flattened.append(deepcopy(item)) - return flattened - - -def _extract_nested_history_transcript( - item: TResponseInputItem, -) -> list[TResponseInputItem] | None: - content = item.get("content") - if not isinstance(content, str): - return None - start_idx = content.find(_CONVERSATION_HISTORY_START) - end_idx = content.find(_CONVERSATION_HISTORY_END) - if start_idx == -1 or end_idx == -1 or end_idx <= start_idx: - return None - start_idx += len(_CONVERSATION_HISTORY_START) - body = content[start_idx:end_idx] - lines = [line.strip() for line in body.splitlines() if line.strip()] - parsed: list[TResponseInputItem] = [] - for line in lines: - parsed_item = _parse_summary_line(line) - if parsed_item is not None: - parsed.append(parsed_item) - return parsed - - -def _parse_summary_line(line: str) -> TResponseInputItem | None: - stripped = line.strip() - if not stripped: - return None - dot_index = stripped.find(".") - if dot_index != -1 and stripped[:dot_index].isdigit(): - stripped = stripped[dot_index + 1 :].lstrip() - role_part, sep, remainder = stripped.partition(":") - if not sep: - return None - role_text = role_part.strip() - if not role_text: - return None - role, name = _split_role_and_name(role_text) - reconstructed: dict[str, Any] = {"role": role} - if name: - reconstructed["name"] = name - content = remainder.strip() - if content: - reconstructed["content"] = content - return cast(TResponseInputItem, reconstructed) - - -def _split_role_and_name(role_text: str) -> tuple[str, str | None]: - if role_text.endswith(")") and "(" in role_text: - open_idx = role_text.rfind("(") - possible_name = role_text[open_idx + 1 : -1].strip() - role_candidate = role_text[:open_idx].strip() - if possible_name: - return (role_candidate or "developer", possible_name) - return (role_text or "developer", None) - - -def _get_run_item_role(run_item: RunItem) -> str | None: - role_candidate = run_item.to_input_item().get("role") - return role_candidate if isinstance(role_candidate, str) else None - - def _remove_tools_from_items(items: tuple[RunItem, ...]) -> tuple[RunItem, ...]: filtered_items = [] for item in items: diff --git a/src/agents/handoffs.py b/src/agents/handoffs/__init__.py similarity index 53% rename from src/agents/handoffs.py rename to src/agents/handoffs/__init__.py index 84c012af4..8974d39c0 100644 --- a/src/agents/handoffs.py +++ b/src/agents/handoffs/__init__.py @@ -9,22 +9,26 @@ from pydantic import TypeAdapter from typing_extensions import TypeAlias, TypeVar -from .exceptions import ModelBehaviorError, UserError -from .items import RunItem, TResponseInputItem -from .run_context import RunContextWrapper, TContext -from .strict_schema import ensure_strict_json_schema -from .tracing.spans import SpanError -from .util import _error_tracing, _json, _transforms -from .util._types import MaybeAwaitable +from ..exceptions import ModelBehaviorError, UserError +from ..items import RunItem, TResponseInputItem +from ..run_context import RunContextWrapper, TContext +from ..strict_schema import ensure_strict_json_schema +from ..tracing.spans import SpanError +from ..util import _error_tracing, _json, _transforms +from ..util._types import MaybeAwaitable +from .history import ( + default_handoff_history_mapper, + get_conversation_history_wrappers, + nest_handoff_history, + reset_conversation_history_wrappers, + set_conversation_history_wrappers, +) if TYPE_CHECKING: - from .agent import Agent, AgentBase + from ..agent import Agent, AgentBase -# The handoff input type is the type of data passed when the agent is called via a handoff. THandoffInput = TypeVar("THandoffInput", default=Any) - -# The agent type that the handoff returns TAgent = TypeVar("TAgent", bound="AgentBase[Any]", default="Agent[Any]") OnHandoffWithInput = Callable[[RunContextWrapper[Any], THandoffInput], Any] @@ -34,100 +38,31 @@ @dataclass(frozen=True) class HandoffInputData: input_history: str | tuple[TResponseInputItem, ...] - """ - The input history before `Runner.run()` was called. - """ - pre_handoff_items: tuple[RunItem, ...] - """ - The items generated before the agent turn where the handoff was invoked. - """ - new_items: tuple[RunItem, ...] - """ - The new items generated during the current agent turn, including the item that triggered the - handoff and the tool output message representing the response from the handoff output. - """ - run_context: RunContextWrapper[Any] | None = None - """ - The run context at the time the handoff was invoked. - Note that, since this property was added later on, it's optional for backwards compatibility. - """ def clone(self, **kwargs: Any) -> HandoffInputData: - """ - Make a copy of the handoff input data, with the given arguments changed. For example, you - could do: - ``` - new_handoff_input_data = handoff_input_data.clone(new_items=()) - ``` - """ return dataclasses_replace(self, **kwargs) HandoffInputFilter: TypeAlias = Callable[[HandoffInputData], MaybeAwaitable[HandoffInputData]] -"""A function that filters the input data passed to the next agent.""" - HandoffHistoryMapper: TypeAlias = Callable[[list[TResponseInputItem]], list[TResponseInputItem]] -"""A function that rewrites the conversation history before the next agent sees it.""" @dataclass class Handoff(Generic[TContext, TAgent]): - """A handoff is when an agent delegates a task to another agent. - For example, in a customer support scenario you might have a "triage agent" that determines - which agent should handle the user's request, and sub-agents that specialize in different - areas like billing, account management, etc. - """ - tool_name: str - """The name of the tool that represents the handoff.""" - tool_description: str - """The description of the tool that represents the handoff.""" - input_json_schema: dict[str, Any] - """The JSON schema for the handoff input. Can be empty if the handoff does not take an input. - """ - on_invoke_handoff: Callable[[RunContextWrapper[Any], str], Awaitable[TAgent]] - """The function that invokes the handoff. The parameters passed are: - 1. The handoff run context - 2. The arguments from the LLM, as a JSON string. Empty string if input_json_schema is empty. - - Must return an agent. - """ - agent_name: str - """The name of the agent that is being handed off to.""" - input_filter: HandoffInputFilter | None = None - """A function that filters the inputs that are passed to the next agent. By default, the new - agent sees the entire conversation history. In some cases, you may want to filter inputs e.g. - to remove older inputs, or remove tools from existing inputs. - - The function will receive the entire conversation history so far, including the input item - that triggered the handoff and a tool call output item representing the handoff tool's output. - - You are free to modify the input history or new items as you see fit. The next agent that - runs will receive `handoff_input_data.all_items`. - - IMPORTANT: in streaming mode, we will not stream anything as a result of this function. The - items generated before will already have been streamed. - """ - + nest_handoff_history: bool | None = None strict_json_schema: bool = True - """Whether the input JSON schema is in strict mode. We **strongly** recommend setting this to - True, as it increases the likelihood of correct JSON input. - """ - is_enabled: bool | Callable[[RunContextWrapper[Any], AgentBase[Any]], MaybeAwaitable[bool]] = ( True ) - """Whether the handoff is enabled. Either a bool or a Callable that takes the run context and - agent and returns whether the handoff is enabled. You can use this to dynamically enable/disable - a handoff based on your context/state.""" def get_transfer_message(self, agent: AgentBase[Any]) -> str: return json.dumps({"assistant": agent.name}) @@ -146,65 +81,54 @@ def default_tool_description(cls, agent: AgentBase[Any]) -> str: @overload def handoff( - agent: Agent[TContext], + agent: "Agent[TContext]", *, tool_name_override: str | None = None, tool_description_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, - is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, Agent[TContext]]: ... + nest_handoff_history: bool | None = None, + is_enabled: bool | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, "Agent[TContext]"]: ... @overload def handoff( - agent: Agent[TContext], + agent: "Agent[TContext]", *, on_handoff: OnHandoffWithInput[THandoffInput], input_type: type[THandoffInput], tool_description_override: str | None = None, tool_name_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, - is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, Agent[TContext]]: ... + nest_handoff_history: bool | None = None, + is_enabled: bool | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, "Agent[TContext]"]: ... @overload def handoff( - agent: Agent[TContext], + agent: "Agent[TContext]", *, on_handoff: OnHandoffWithoutInput, tool_description_override: str | None = None, tool_name_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, - is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, Agent[TContext]]: ... + nest_handoff_history: bool | None = None, + is_enabled: bool | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, "Agent[TContext]"]: ... def handoff( - agent: Agent[TContext], + agent: "Agent[TContext]", tool_name_override: str | None = None, tool_description_override: str | None = None, on_handoff: OnHandoffWithInput[THandoffInput] | OnHandoffWithoutInput | None = None, input_type: type[THandoffInput] | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, + nest_handoff_history: bool | None = None, is_enabled: bool - | Callable[[RunContextWrapper[Any], Agent[TContext]], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, Agent[TContext]]: - """Create a handoff from an agent. - - Args: - agent: The agent to handoff to, or a function that returns an agent. - tool_name_override: Optional override for the name of the tool that represents the handoff. - tool_description_override: Optional override for the description of the tool that - represents the handoff. - on_handoff: A function that runs when the handoff is invoked. - input_type: the type of the input to the handoff. If provided, the input will be validated - against this type. Only relevant if you pass a function that takes an input. - input_filter: a function that filters the inputs that are passed to the next agent. - is_enabled: Whether the handoff is enabled. Can be a bool or a callable that takes the run - context and agent and returns whether the handoff is enabled. Disabled handoffs are - hidden from the LLM at runtime. - """ + | Callable[[RunContextWrapper[Any], "Agent[TContext]"], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, "Agent[TContext]"]: assert (on_handoff and input_type) or not (on_handoff and input_type), ( "You must provide either both on_handoff and input_type, or neither" ) @@ -227,7 +151,7 @@ def handoff( async def _invoke_handoff( ctx: RunContextWrapper[Any], input_json: str | None = None - ) -> Agent[TContext]: + ) -> "Agent[TContext]": if input_type is not None and type_adapter is not None: if input_json is None: _error_tracing.attach_error_to_current_span( @@ -259,22 +183,17 @@ async def _invoke_handoff( tool_name = tool_name_override or Handoff.default_tool_name(agent) tool_description = tool_description_override or Handoff.default_tool_description(agent) - - # Always ensure the input JSON schema is in strict mode - # If there is a need, we can make this configurable in the future input_json_schema = ensure_strict_json_schema(input_json_schema) - async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) -> bool: - from .agent import Agent + async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: "AgentBase[Any]") -> bool: + from ..agent import Agent assert callable(is_enabled), "is_enabled must be callable here" assert isinstance(agent_base, Agent), "Can't handoff to a non-Agent" result = is_enabled(ctx, agent_base) - if inspect.isawaitable(result): return await result - - return result + return bool(result) return Handoff( tool_name=tool_name, @@ -282,6 +201,21 @@ async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) - input_json_schema=input_json_schema, on_invoke_handoff=_invoke_handoff, input_filter=input_filter, + nest_handoff_history=nest_handoff_history, agent_name=agent.name, is_enabled=_is_enabled if callable(is_enabled) else is_enabled, ) + + +__all__ = [ + "Handoff", + "HandoffHistoryMapper", + "HandoffInputData", + "HandoffInputFilter", + "default_handoff_history_mapper", + "get_conversation_history_wrappers", + "handoff", + "nest_handoff_history", + "reset_conversation_history_wrappers", + "set_conversation_history_wrappers", +] diff --git a/src/agents/handoffs/history.py b/src/agents/handoffs/history.py new file mode 100644 index 000000000..6be8ab4f8 --- /dev/null +++ b/src/agents/handoffs/history.py @@ -0,0 +1,236 @@ +from __future__ import annotations + +import json +from copy import deepcopy +from typing import TYPE_CHECKING, Any, cast + +from ..items import ( + HandoffCallItem, + HandoffOutputItem, + ItemHelpers, + ReasoningItem, + RunItem, + ToolCallItem, + ToolCallOutputItem, + TResponseInputItem, +) + +if TYPE_CHECKING: + from . import HandoffHistoryMapper, HandoffInputData + +__all__ = [ + "default_handoff_history_mapper", + "get_conversation_history_wrappers", + "nest_handoff_history", + "reset_conversation_history_wrappers", + "set_conversation_history_wrappers", +] + +_DEFAULT_CONVERSATION_HISTORY_START = "" +_DEFAULT_CONVERSATION_HISTORY_END = "" +_conversation_history_start = _DEFAULT_CONVERSATION_HISTORY_START +_conversation_history_end = _DEFAULT_CONVERSATION_HISTORY_END + + +def set_conversation_history_wrappers( + *, + start: str | None = None, + end: str | None = None, +) -> None: + """Override the markers that wrap the generated conversation summary. + + Pass ``None`` to leave either side unchanged. + """ + + global _conversation_history_start, _conversation_history_end + if start is not None: + _conversation_history_start = start + if end is not None: + _conversation_history_end = end + + +def reset_conversation_history_wrappers() -> None: + """Restore the default ```` markers.""" + + global _conversation_history_start, _conversation_history_end + _conversation_history_start = _DEFAULT_CONVERSATION_HISTORY_START + _conversation_history_end = _DEFAULT_CONVERSATION_HISTORY_END + + +def get_conversation_history_wrappers() -> tuple[str, str]: + """Return the current start/end markers used for the nested conversation summary.""" + + return (_conversation_history_start, _conversation_history_end) + + +def nest_handoff_history( + handoff_input_data: HandoffInputData, + *, + history_mapper: HandoffHistoryMapper | None = None, +) -> HandoffInputData: + """Summarize the previous transcript for the next agent.""" + + normalized_history = _normalize_input_history(handoff_input_data.input_history) + flattened_history = _flatten_nested_history_messages(normalized_history) + pre_items_as_inputs = [ + _run_item_to_plain_input(item) for item in handoff_input_data.pre_handoff_items + ] + new_items_as_inputs = [_run_item_to_plain_input(item) for item in handoff_input_data.new_items] + transcript = flattened_history + pre_items_as_inputs + new_items_as_inputs + + mapper = history_mapper or default_handoff_history_mapper + history_items = mapper(transcript) + filtered_pre_items = tuple( + item + for item in handoff_input_data.pre_handoff_items + if _get_run_item_role(item) != "assistant" + ) + + return handoff_input_data.clone( + input_history=tuple(deepcopy(item) for item in history_items), + pre_handoff_items=filtered_pre_items, + ) + + +def default_handoff_history_mapper( + transcript: list[TResponseInputItem], +) -> list[TResponseInputItem]: + """Return a single assistant message summarizing the transcript.""" + + summary_message = _build_summary_message(transcript) + return [summary_message] + + +def _normalize_input_history( + input_history: str | tuple[TResponseInputItem, ...], +) -> list[TResponseInputItem]: + if isinstance(input_history, str): + return ItemHelpers.input_to_new_input_list(input_history) + return [deepcopy(item) for item in input_history] + + +def _run_item_to_plain_input(run_item: RunItem) -> TResponseInputItem: + return deepcopy(run_item.to_input_item()) + + +def _build_summary_message(transcript: list[TResponseInputItem]) -> TResponseInputItem: + transcript_copy = [deepcopy(item) for item in transcript] + if transcript_copy: + summary_lines = [ + f"{idx + 1}. {_format_transcript_item(item)}" + for idx, item in enumerate(transcript_copy) + ] + else: + summary_lines = ["(no previous turns recorded)"] + + start_marker, end_marker = get_conversation_history_wrappers() + content_lines = [start_marker, *summary_lines, end_marker] + content = "\n".join(content_lines) + assistant_message: dict[str, Any] = { + "role": "assistant", + "content": content, + } + return cast(TResponseInputItem, assistant_message) + + +def _format_transcript_item(item: TResponseInputItem) -> str: + role = item.get("role") + if isinstance(role, str): + prefix = role + name = item.get("name") + if isinstance(name, str) and name: + prefix = f"{prefix} ({name})" + content_str = _stringify_content(item.get("content")) + return f"{prefix}: {content_str}" if content_str else prefix + + item_type = item.get("type", "item") + rest = {k: v for k, v in item.items() if k != "type"} + try: + serialized = json.dumps(rest, ensure_ascii=False, default=str) + except TypeError: + serialized = str(rest) + return f"{item_type}: {serialized}" if serialized else str(item_type) + + +def _stringify_content(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content + try: + return json.dumps(content, ensure_ascii=False, default=str) + except TypeError: + return str(content) + + +def _flatten_nested_history_messages( + items: list[TResponseInputItem], +) -> list[TResponseInputItem]: + flattened: list[TResponseInputItem] = [] + for item in items: + nested_transcript = _extract_nested_history_transcript(item) + if nested_transcript is not None: + flattened.extend(nested_transcript) + continue + flattened.append(deepcopy(item)) + return flattened + + +def _extract_nested_history_transcript( + item: TResponseInputItem, +) -> list[TResponseInputItem] | None: + content = item.get("content") + if not isinstance(content, str): + return None + start_marker, end_marker = get_conversation_history_wrappers() + start_idx = content.find(start_marker) + end_idx = content.find(end_marker) + if start_idx == -1 or end_idx == -1 or end_idx <= start_idx: + return None + start_idx += len(start_marker) + body = content[start_idx:end_idx] + lines = [line.strip() for line in body.splitlines() if line.strip()] + parsed: list[TResponseInputItem] = [] + for line in lines: + parsed_item = _parse_summary_line(line) + if parsed_item is not None: + parsed.append(parsed_item) + return parsed + + +def _parse_summary_line(line: str) -> TResponseInputItem | None: + stripped = line.strip() + if not stripped: + return None + dot_index = stripped.find(".") + if dot_index != -1 and stripped[:dot_index].isdigit(): + stripped = stripped[dot_index + 1 :].lstrip() + role_part, sep, remainder = stripped.partition(":") + if not sep: + return None + role_text = role_part.strip() + if not role_text: + return None + role, name = _split_role_and_name(role_text) + reconstructed: dict[str, Any] = {"role": role} + if name: + reconstructed["name"] = name + content = remainder.strip() + if content: + reconstructed["content"] = content + return cast(TResponseInputItem, reconstructed) + + +def _split_role_and_name(role_text: str) -> tuple[str, str | None]: + if role_text.endswith(")") and "(" in role_text: + open_idx = role_text.rfind("(") + possible_name = role_text[open_idx + 1 : -1].strip() + role_candidate = role_text[:open_idx].strip() + if possible_name: + return (role_candidate or "developer", possible_name) + return (role_text or "developer", None) + + +def _get_run_item_role(run_item: RunItem) -> str | None: + role_candidate = run_item.to_input_item().get("role") + return role_candidate if isinstance(role_candidate, str) else None diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index c700742e0..2607788f8 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -4,7 +4,14 @@ from openai.types.responses import ResponseOutputMessage, ResponseOutputText from openai.types.responses.response_reasoning_item import ResponseReasoningItem -from agents import Agent, HandoffInputData, RunContextWrapper +from agents import ( + Agent, + HandoffInputData, + RunContextWrapper, + get_conversation_history_wrappers, + reset_conversation_history_wrappers, + set_conversation_history_wrappers, +) from agents.extensions.handoff_filters import nest_handoff_history, remove_all_tools from agents.items import ( HandoffOutputItem, @@ -260,8 +267,9 @@ def test_nest_handoff_history_wraps_transcript() -> None: assert summary["role"] == "assistant" summary_content = summary["content"] assert isinstance(summary_content, str) - assert "" in summary_content - assert "" in summary_content + start_marker, end_marker = get_conversation_history_wrappers() + assert start_marker in summary_content + assert end_marker in summary_content assert "Assist reply" in summary_content assert "Hello" in summary_content assert len(nested.pre_handoff_items) == 0 @@ -318,13 +326,45 @@ def test_nest_handoff_history_appends_existing_history() -> None: assert summary["role"] == "assistant" content = summary["content"] assert isinstance(content, str) - assert content.count("") == 1 - assert content.count("") == 1 + start_marker, end_marker = get_conversation_history_wrappers() + assert content.count(start_marker) == 1 + assert content.count(end_marker) == 1 assert "First reply" in content assert "Second reply" in content assert "Another question" in content +def test_nest_handoff_history_honors_custom_wrappers() -> None: + data = HandoffInputData( + input_history=(_get_user_input_item("Hello"),), + pre_handoff_items=(_get_message_output_run_item("First reply"),), + new_items=(_get_message_output_run_item("Second reply"),), + run_context=RunContextWrapper(context=()), + ) + + set_conversation_history_wrappers(start="<>", end="<>") + try: + nested = nest_handoff_history(data) + assert isinstance(nested.input_history, tuple) + assert len(nested.input_history) == 1 + summary = _as_message(nested.input_history[0]) + summary_content = summary["content"] + assert isinstance(summary_content, str) + assert summary_content.startswith("<>") + assert summary_content.endswith("<>") + + # Ensure the custom markers are parsed correctly when nesting again. + second_nested = nest_handoff_history(nested) + assert isinstance(second_nested.input_history, tuple) + second_summary = _as_message(second_nested.input_history[0]) + content = second_summary["content"] + assert isinstance(content, str) + assert content.count("<>") == 1 + assert content.count("<>") == 1 + finally: + reset_conversation_history_wrappers() + + def test_nest_handoff_history_supports_custom_mapper() -> None: data = HandoffInputData( input_history=(_get_user_input_item("Hello"),), diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index 27d36afa8..1474b4cfd 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Any + import pytest from openai.types.responses import ( ResponseComputerToolCall, @@ -16,14 +18,19 @@ Computer, ComputerTool, Handoff, + HandoffInputData, ModelBehaviorError, ModelResponse, ReasoningItem, + RunConfig, RunContextWrapper, + RunItem, ToolCallItem, Usage, + handoff, ) -from agents._run_impl import RunImpl +from agents._run_impl import RunImpl, ToolRunHandoff +from agents.lifecycle import RunHooksBase from agents.run import AgentRunner from .test_responses import ( @@ -31,6 +38,7 @@ get_function_tool, get_function_tool_call, get_handoff_tool_call, + get_text_input_item, get_text_message, ) @@ -202,6 +210,100 @@ async def test_handoffs_parsed_correctly(): assert handoff_agent == agent_1 +@pytest.mark.asyncio +async def test_handoff_can_disable_run_level_history_nesting(monkeypatch: pytest.MonkeyPatch): + source_agent = Agent(name="source") + target_agent = Agent(name="target") + override_handoff = handoff(target_agent, nest_handoff_history=False) + tool_call = get_handoff_tool_call(target_agent) + run_handoffs = [ToolRunHandoff(handoff=override_handoff, tool_call=tool_call)] + run_config = RunConfig(nest_handoff_history=True) + context_wrapper = RunContextWrapper(context=None) + hooks = RunHooksBase() + original_input = [get_text_input_item("hello")] + pre_step_items: list[RunItem] = [] + new_step_items: list[RunItem] = [] + new_response = ModelResponse(output=[tool_call], usage=Usage(), response_id=None) + + calls: list[HandoffInputData] = [] + + def fake_nest( + handoff_input_data: HandoffInputData, + *, + history_mapper: Any, + ) -> HandoffInputData: + calls.append(handoff_input_data) + return handoff_input_data + + monkeypatch.setattr("agents._run_impl.nest_handoff_history", fake_nest) + + result = await RunImpl.execute_handoffs( + agent=source_agent, + original_input=list(original_input), + pre_step_items=pre_step_items, + new_step_items=new_step_items, + new_response=new_response, + run_handoffs=run_handoffs, + hooks=hooks, + context_wrapper=context_wrapper, + run_config=run_config, + ) + + assert calls == [] + assert result.original_input == original_input + + +@pytest.mark.asyncio +async def test_handoff_can_enable_history_nesting(monkeypatch: pytest.MonkeyPatch): + source_agent = Agent(name="source") + target_agent = Agent(name="target") + override_handoff = handoff(target_agent, nest_handoff_history=True) + tool_call = get_handoff_tool_call(target_agent) + run_handoffs = [ToolRunHandoff(handoff=override_handoff, tool_call=tool_call)] + run_config = RunConfig(nest_handoff_history=False) + context_wrapper = RunContextWrapper(context=None) + hooks = RunHooksBase() + original_input = [get_text_input_item("hello")] + pre_step_items: list[RunItem] = [] + new_step_items: list[RunItem] = [] + new_response = ModelResponse(output=[tool_call], usage=Usage(), response_id=None) + + def fake_nest( + handoff_input_data: HandoffInputData, + *, + history_mapper: Any, + ) -> HandoffInputData: + return handoff_input_data.clone( + input_history=( + { + "role": "assistant", + "content": "nested", + }, + ) + ) + + monkeypatch.setattr("agents._run_impl.nest_handoff_history", fake_nest) + + result = await RunImpl.execute_handoffs( + agent=source_agent, + original_input=list(original_input), + pre_step_items=pre_step_items, + new_step_items=new_step_items, + new_response=new_response, + run_handoffs=run_handoffs, + hooks=hooks, + context_wrapper=context_wrapper, + run_config=run_config, + ) + + assert result.original_input == [ + { + "role": "assistant", + "content": "nested", + } + ] + + @pytest.mark.asyncio async def test_missing_handoff_fails(): agent_1 = Agent(name="test_1") From b22eeb28c5536a64f60d14395bbcb85733971982 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Fri, 7 Nov 2025 19:00:24 -0500 Subject: [PATCH 08/19] update changelog --- docs/release.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/release.md b/docs/release.md index 95a4f67a4..93161a7c8 100644 --- a/docs/release.md +++ b/docs/release.md @@ -25,6 +25,9 @@ This version doesn’t introduce any visible breaking changes, but it includes n - Added support for `RealtimeRunner` to handle [SIP protocol connections](https://platform.openai.com/docs/guides/realtime-sip) - Significantly revised the internal logic of `Runner#run_sync` for Python 3.14 compatibility +- By default handoff history is now packaged into a single assistant message instead of exposing the raw user/assistant turns, giving downstream agents a concise, predictable recap +- The existing single-message handoff transcript now starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap +- The existing single-message handoff transcript now starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap ### 0.4.0 From fff4d349d7b77b1951de8fc687bfe56421fe29cc Mon Sep 17 00:00:00 2001 From: jhills20 Date: Fri, 7 Nov 2025 19:00:50 -0500 Subject: [PATCH 09/19] update default --- src/agents/handoffs/history.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/agents/handoffs/history.py b/src/agents/handoffs/history.py index 6be8ab4f8..8469ddae4 100644 --- a/src/agents/handoffs/history.py +++ b/src/agents/handoffs/history.py @@ -124,7 +124,12 @@ def _build_summary_message(transcript: list[TResponseInputItem]) -> TResponseInp summary_lines = ["(no previous turns recorded)"] start_marker, end_marker = get_conversation_history_wrappers() - content_lines = [start_marker, *summary_lines, end_marker] + content_lines = [ + "For context, here is the conversation so far between the user and the previous agent:", + start_marker, + *summary_lines, + end_marker, + ] content = "\n".join(content_lines) assistant_message: dict[str, Any] = { "role": "assistant", From c6e0f5038b72e7d5db9246ead800e8876a040c75 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Fri, 7 Nov 2025 19:05:43 -0500 Subject: [PATCH 10/19] fix for lint --- src/agents/handoffs/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index 8974d39c0..8db14659f 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -87,7 +87,8 @@ def handoff( tool_description_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, - is_enabled: bool | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, + is_enabled: bool + | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, "Agent[TContext]"]: ... @@ -101,7 +102,8 @@ def handoff( tool_name_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, - is_enabled: bool | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, + is_enabled: bool + | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, "Agent[TContext]"]: ... @@ -114,7 +116,8 @@ def handoff( tool_name_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, - is_enabled: bool | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, + is_enabled: bool + | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, "Agent[TContext]"]: ... From f3c7048dd5dd09f44578b9504eaba81a5ea03793 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Fri, 7 Nov 2025 19:24:17 -0500 Subject: [PATCH 11/19] fix tests --- tests/test_agent_runner.py | 6 +++--- tests/test_agent_runner_streamed.py | 12 ++++++------ tests/test_extension_filters.py | 6 +++++- tests/test_run_step_processing.py | 13 +++++++------ 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index c4d224615..d05496e50 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -226,9 +226,9 @@ async def test_structured_output(): assert result.final_output == Foo(bar="baz") assert len(result.raw_responses) == 4, "should have four model responses" - assert len(result.to_input_list()) == 11, ( - "should have input: 2 orig inputs, function call, function call result, message, handoff, " - "handoff output, preamble message, tool call, tool call result, final output" + assert len(result.to_input_list()) == 10, ( + "should have input: conversation summary, function call, function call result, message, " + "handoff, handoff output, preamble message, tool call, tool call result, final output" ) assert result.last_agent == agent_1, "should have handed off to agent_1" diff --git a/tests/test_agent_runner_streamed.py b/tests/test_agent_runner_streamed.py index a49135fa8..222afda78 100644 --- a/tests/test_agent_runner_streamed.py +++ b/tests/test_agent_runner_streamed.py @@ -231,9 +231,9 @@ async def test_structured_output(): assert result.final_output == Foo(bar="baz") assert len(result.raw_responses) == 4, "should have four model responses" - assert len(result.to_input_list()) == 11, ( - "should have input: 2 orig inputs, function call, function call result, message, handoff, " - "handoff output, preamble message, tool call, tool call result, final output" + assert len(result.to_input_list()) == 10, ( + "should have input: conversation summary, function call, function call result, message, " + "handoff, handoff output, preamble message, tool call, tool call result, final output" ) assert result.last_agent == agent_1, "should have handed off to agent_1" @@ -717,9 +717,9 @@ async def test_streaming_events(): assert result.final_output == Foo(bar="baz") assert len(result.raw_responses) == 4, "should have four model responses" - assert len(result.to_input_list()) == 10, ( - "should have input: 2 orig inputs, function call, function call result, message, handoff, " - "handoff output, tool call, tool call result, final output" + assert len(result.to_input_list()) == 9, ( + "should have input: conversation summary, function call, function call result, message, " + "handoff, handoff output, tool call, tool call result, final output" ) assert result.last_agent == agent_1, "should have handed off to agent_1" diff --git a/tests/test_extension_filters.py b/tests/test_extension_filters.py index 2607788f8..86161bbb7 100644 --- a/tests/test_extension_filters.py +++ b/tests/test_extension_filters.py @@ -350,7 +350,11 @@ def test_nest_handoff_history_honors_custom_wrappers() -> None: summary = _as_message(nested.input_history[0]) summary_content = summary["content"] assert isinstance(summary_content, str) - assert summary_content.startswith("<>") + lines = summary_content.splitlines() + assert lines[0] == ( + "For context, here is the conversation so far between the user and the previous agent:" + ) + assert lines[1].startswith("<>") assert summary_content.endswith("<>") # Ensure the custom markers are parsed correctly when nesting again. diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index 1474b4cfd..e8bb715ad 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -1,11 +1,12 @@ from __future__ import annotations -from typing import Any +from typing import Any, cast import pytest from openai.types.responses import ( ResponseComputerToolCall, ResponseFileSearchToolCall, + ResponseFunctionToolCall, ResponseFunctionWebSearch, ) from openai.types.responses.response_computer_tool_call import ActionClick @@ -30,7 +31,7 @@ handoff, ) from agents._run_impl import RunImpl, ToolRunHandoff -from agents.lifecycle import RunHooksBase +from agents import RunHooks from agents.run import AgentRunner from .test_responses import ( @@ -215,11 +216,11 @@ async def test_handoff_can_disable_run_level_history_nesting(monkeypatch: pytest source_agent = Agent(name="source") target_agent = Agent(name="target") override_handoff = handoff(target_agent, nest_handoff_history=False) - tool_call = get_handoff_tool_call(target_agent) + tool_call = cast(ResponseFunctionToolCall, get_handoff_tool_call(target_agent)) run_handoffs = [ToolRunHandoff(handoff=override_handoff, tool_call=tool_call)] run_config = RunConfig(nest_handoff_history=True) context_wrapper = RunContextWrapper(context=None) - hooks = RunHooksBase() + hooks = RunHooks() original_input = [get_text_input_item("hello")] pre_step_items: list[RunItem] = [] new_step_items: list[RunItem] = [] @@ -258,11 +259,11 @@ async def test_handoff_can_enable_history_nesting(monkeypatch: pytest.MonkeyPatc source_agent = Agent(name="source") target_agent = Agent(name="target") override_handoff = handoff(target_agent, nest_handoff_history=True) - tool_call = get_handoff_tool_call(target_agent) + tool_call = cast(ResponseFunctionToolCall, get_handoff_tool_call(target_agent)) run_handoffs = [ToolRunHandoff(handoff=override_handoff, tool_call=tool_call)] run_config = RunConfig(nest_handoff_history=False) context_wrapper = RunContextWrapper(context=None) - hooks = RunHooksBase() + hooks = RunHooks() original_input = [get_text_input_item("hello")] pre_step_items: list[RunItem] = [] new_step_items: list[RunItem] = [] From 19a7cf83f82242b2462d985f3d934b89424a3fff Mon Sep 17 00:00:00 2001 From: jhills20 Date: Mon, 10 Nov 2025 21:35:39 -0500 Subject: [PATCH 12/19] lint updates --- src/agents/handoffs/__init__.py | 28 ++++++++++++++-------------- src/agents/handoffs/history.py | 5 ----- tests/test_run_step_processing.py | 2 +- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index 8db14659f..b0a9d60a0 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -81,20 +81,20 @@ def default_tool_description(cls, agent: AgentBase[Any]) -> str: @overload def handoff( - agent: "Agent[TContext]", + agent: Agent[TContext], *, tool_name_override: str | None = None, tool_description_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, is_enabled: bool - | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, "Agent[TContext]"]: ... + | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, Agent[TContext]]: ... @overload def handoff( - agent: "Agent[TContext]", + agent: Agent[TContext], *, on_handoff: OnHandoffWithInput[THandoffInput], input_type: type[THandoffInput], @@ -103,13 +103,13 @@ def handoff( input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, is_enabled: bool - | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, "Agent[TContext]"]: ... + | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, Agent[TContext]]: ... @overload def handoff( - agent: "Agent[TContext]", + agent: Agent[TContext], *, on_handoff: OnHandoffWithoutInput, tool_description_override: str | None = None, @@ -117,12 +117,12 @@ def handoff( input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, is_enabled: bool - | Callable[[RunContextWrapper[Any], "Agent[Any]"], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, "Agent[TContext]"]: ... + | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, Agent[TContext]]: ... def handoff( - agent: "Agent[TContext]", + agent: Agent[TContext], tool_name_override: str | None = None, tool_description_override: str | None = None, on_handoff: OnHandoffWithInput[THandoffInput] | OnHandoffWithoutInput | None = None, @@ -130,8 +130,8 @@ def handoff( input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, is_enabled: bool - | Callable[[RunContextWrapper[Any], "Agent[TContext]"], MaybeAwaitable[bool]] = True, -) -> Handoff[TContext, "Agent[TContext]"]: + | Callable[[RunContextWrapper[Any], Agent[TContext]], MaybeAwaitable[bool]] = True, +) -> Handoff[TContext, Agent[TContext]]: assert (on_handoff and input_type) or not (on_handoff and input_type), ( "You must provide either both on_handoff and input_type, or neither" ) @@ -154,7 +154,7 @@ def handoff( async def _invoke_handoff( ctx: RunContextWrapper[Any], input_json: str | None = None - ) -> "Agent[TContext]": + ) -> Agent[TContext]: if input_type is not None and type_adapter is not None: if input_json is None: _error_tracing.attach_error_to_current_span( @@ -188,7 +188,7 @@ async def _invoke_handoff( tool_description = tool_description_override or Handoff.default_tool_description(agent) input_json_schema = ensure_strict_json_schema(input_json_schema) - async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: "AgentBase[Any]") -> bool: + async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) -> bool: from ..agent import Agent assert callable(is_enabled), "is_enabled must be callable here" diff --git a/src/agents/handoffs/history.py b/src/agents/handoffs/history.py index 8469ddae4..dc59547fb 100644 --- a/src/agents/handoffs/history.py +++ b/src/agents/handoffs/history.py @@ -5,13 +5,8 @@ from typing import TYPE_CHECKING, Any, cast from ..items import ( - HandoffCallItem, - HandoffOutputItem, ItemHelpers, - ReasoningItem, RunItem, - ToolCallItem, - ToolCallOutputItem, TResponseInputItem, ) diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index e8bb715ad..a9ae22357 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -25,13 +25,13 @@ ReasoningItem, RunConfig, RunContextWrapper, + RunHooks, RunItem, ToolCallItem, Usage, handoff, ) from agents._run_impl import RunImpl, ToolRunHandoff -from agents import RunHooks from agents.run import AgentRunner from .test_responses import ( From 411d396ee086b67d6972975514d55ab034326df8 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Tue, 11 Nov 2025 10:29:11 -0500 Subject: [PATCH 13/19] update --- src/agents/handoffs/__init__.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index b0a9d60a0..01e3e9dd9 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -87,8 +87,7 @@ def handoff( tool_description_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, - is_enabled: bool - | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, + is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, Agent[TContext]]: ... @@ -102,8 +101,7 @@ def handoff( tool_name_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, - is_enabled: bool - | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, + is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, Agent[TContext]]: ... @@ -116,8 +114,7 @@ def handoff( tool_name_override: str | None = None, input_filter: Callable[[HandoffInputData], HandoffInputData] | None = None, nest_handoff_history: bool | None = None, - is_enabled: bool - | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, + is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[Any]], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, Agent[TContext]]: ... From 0276722338b3a8beaff425aac58d70fc64bf9f5a Mon Sep 17 00:00:00 2001 From: jhills20 Date: Wed, 12 Nov 2025 12:23:59 -0500 Subject: [PATCH 14/19] ruff --- src/agents/handoffs/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index 01e3e9dd9..61480f5c0 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -129,9 +129,9 @@ def handoff( is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[TContext]], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, Agent[TContext]]: - assert (on_handoff and input_type) or not (on_handoff and input_type), ( - "You must provide either both on_handoff and input_type, or neither" - ) + assert (on_handoff and input_type) or not ( + on_handoff and input_type + ), "You must provide either both on_handoff and input_type, or neither" type_adapter: TypeAdapter[Any] | None if input_type is not None: assert callable(on_handoff), "on_handoff must be callable" From 8746b81da22d2a5822f10febd51b1c3bd55c85f1 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Wed, 12 Nov 2025 12:27:45 -0500 Subject: [PATCH 15/19] rufff --- src/agents/handoffs/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index 61480f5c0..01e3e9dd9 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -129,9 +129,9 @@ def handoff( is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[TContext]], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, Agent[TContext]]: - assert (on_handoff and input_type) or not ( - on_handoff and input_type - ), "You must provide either both on_handoff and input_type, or neither" + assert (on_handoff and input_type) or not (on_handoff and input_type), ( + "You must provide either both on_handoff and input_type, or neither" + ) type_adapter: TypeAdapter[Any] | None if input_type is not None: assert callable(on_handoff), "on_handoff must be callable" From 3694c7e954e0c31149307dabd01714256390f2f4 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Wed, 12 Nov 2025 13:32:45 -0500 Subject: [PATCH 16/19] more linting --- src/agents/handoffs/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index 01e3e9dd9..525637717 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -193,7 +193,7 @@ async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) - result = is_enabled(ctx, agent_base) if inspect.isawaitable(result): return await result - return bool(result) + return result return Handoff( tool_name=tool_name, From 298ef8a05ca4f1025d5702d4f09564a6f8135890 Mon Sep 17 00:00:00 2001 From: jhills20 Date: Wed, 12 Nov 2025 13:38:46 -0500 Subject: [PATCH 17/19] fix --- src/agents/handoffs/__init__.py | 99 ++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py index 525637717..655ecbdab 100644 --- a/src/agents/handoffs/__init__.py +++ b/src/agents/handoffs/__init__.py @@ -28,7 +28,10 @@ from ..agent import Agent, AgentBase +# The handoff input type is the type of data passed when the agent is called via a handoff. THandoffInput = TypeVar("THandoffInput", default=Any) + +# The agent type that the handoff returns. TAgent = TypeVar("TAgent", bound="AgentBase[Any]", default="Agent[Any]") OnHandoffWithInput = Callable[[RunContextWrapper[Any], THandoffInput], Any] @@ -38,31 +41,104 @@ @dataclass(frozen=True) class HandoffInputData: input_history: str | tuple[TResponseInputItem, ...] + """ + The input history before `Runner.run()` was called. + """ + pre_handoff_items: tuple[RunItem, ...] + """ + The items generated before the agent turn where the handoff was invoked. + """ + new_items: tuple[RunItem, ...] + """ + The new items generated during the current agent turn, including the item that triggered the + handoff and the tool output message representing the response from the handoff output. + """ + run_context: RunContextWrapper[Any] | None = None + """ + The run context at the time the handoff was invoked. Note that, since this property was added + later on, it is optional for backwards compatibility. + """ def clone(self, **kwargs: Any) -> HandoffInputData: + """ + Make a copy of the handoff input data, with the given arguments changed. For example, you + could do: + + ``` + new_handoff_input_data = handoff_input_data.clone(new_items=()) + ``` + """ + return dataclasses_replace(self, **kwargs) HandoffInputFilter: TypeAlias = Callable[[HandoffInputData], MaybeAwaitable[HandoffInputData]] +"""A function that filters the input data passed to the next agent.""" + HandoffHistoryMapper: TypeAlias = Callable[[list[TResponseInputItem]], list[TResponseInputItem]] +"""A function that maps the previous transcript to the nested summary payload.""" @dataclass class Handoff(Generic[TContext, TAgent]): + """A handoff is when an agent delegates a task to another agent. + + For example, in a customer support scenario you might have a "triage agent" that determines + which agent should handle the user's request, and sub-agents that specialize in different areas + like billing, account management, etc. + """ + tool_name: str + """The name of the tool that represents the handoff.""" + tool_description: str + """The description of the tool that represents the handoff.""" + input_json_schema: dict[str, Any] + """The JSON schema for the handoff input. Can be empty if the handoff does not take an input.""" + on_invoke_handoff: Callable[[RunContextWrapper[Any], str], Awaitable[TAgent]] + """The function that invokes the handoff. + + The parameters passed are: (1) the handoff run context, (2) the arguments from the LLM as a + JSON string (or an empty string if ``input_json_schema`` is empty). Must return an agent. + """ + agent_name: str + """The name of the agent that is being handed off to.""" + input_filter: HandoffInputFilter | None = None + """A function that filters the inputs that are passed to the next agent. + + By default, the new agent sees the entire conversation history. In some cases, you may want to + filter inputs (for example, to remove older inputs or remove tools from existing inputs). The + function receives the entire conversation history so far, including the input item that + triggered the handoff and a tool call output item representing the handoff tool's output. You + are free to modify the input history or new items as you see fit. The next agent that runs will + receive ``handoff_input_data.all_items``. IMPORTANT: in streaming mode, we will not stream + anything as a result of this function. The items generated before will already have been + streamed. + """ + nest_handoff_history: bool | None = None + """Override the run-level ``nest_handoff_history`` behavior for this handoff only.""" + strict_json_schema: bool = True + """Whether the input JSON schema is in strict mode. We strongly recommend setting this to True + because it increases the likelihood of correct JSON input.""" + is_enabled: bool | Callable[[RunContextWrapper[Any], AgentBase[Any]], MaybeAwaitable[bool]] = ( True ) + """Whether the handoff is enabled. + + Either a bool or a callable that takes the run context and agent and returns whether the + handoff is enabled. You can use this to dynamically enable or disable a handoff based on your + context or state. + """ def get_transfer_message(self, agent: AgentBase[Any]) -> str: return json.dumps({"assistant": agent.name}) @@ -129,6 +205,24 @@ def handoff( is_enabled: bool | Callable[[RunContextWrapper[Any], Agent[TContext]], MaybeAwaitable[bool]] = True, ) -> Handoff[TContext, Agent[TContext]]: + """Create a handoff from an agent. + + Args: + agent: The agent to handoff to, or a function that returns an agent. + tool_name_override: Optional override for the name of the tool that represents the handoff. + tool_description_override: Optional override for the description of the tool that + represents the handoff. + on_handoff: A function that runs when the handoff is invoked. + input_type: The type of the input to the handoff. If provided, the input will be validated + against this type. Only relevant if you pass a function that takes an input. + input_filter: A function that filters the inputs that are passed to the next agent. + nest_handoff_history: Optional override for the RunConfig-level ``nest_handoff_history`` + flag. If ``None`` we fall back to the run's configuration. + is_enabled: Whether the handoff is enabled. Can be a bool or a callable that takes the run + context and agent and returns whether the handoff is enabled. Disabled handoffs are + hidden from the LLM at runtime. + """ + assert (on_handoff and input_type) or not (on_handoff and input_type), ( "You must provide either both on_handoff and input_type, or neither" ) @@ -183,6 +277,9 @@ async def _invoke_handoff( tool_name = tool_name_override or Handoff.default_tool_name(agent) tool_description = tool_description_override or Handoff.default_tool_description(agent) + + # Always ensure the input JSON schema is in strict mode. If needed, we can make this + # configurable in the future. input_json_schema = ensure_strict_json_schema(input_json_schema) async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) -> bool: @@ -193,7 +290,7 @@ async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) - result = is_enabled(ctx, agent_base) if inspect.isawaitable(result): return await result - return result + return bool(result) return Handoff( tool_name=tool_name, From adf0c669c9120f4effa3a3150e1368e164369e12 Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Wed, 12 Nov 2025 17:59:35 -0500 Subject: [PATCH 18/19] Update release.md --- docs/release.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/release.md b/docs/release.md index 93161a7c8..f65136ce5 100644 --- a/docs/release.md +++ b/docs/release.md @@ -26,8 +26,7 @@ This version doesn’t introduce any visible breaking changes, but it includes n - Added support for `RealtimeRunner` to handle [SIP protocol connections](https://platform.openai.com/docs/guides/realtime-sip) - Significantly revised the internal logic of `Runner#run_sync` for Python 3.14 compatibility - By default handoff history is now packaged into a single assistant message instead of exposing the raw user/assistant turns, giving downstream agents a concise, predictable recap -- The existing single-message handoff transcript now starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap -- The existing single-message handoff transcript now starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap +- The existing single-message handoff transcript now by default starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap ### 0.4.0 From 65903192cd0ccea7fd74a7cd7d788ac888f7b4b4 Mon Sep 17 00:00:00 2001 From: James Hills <70035505+jhills20@users.noreply.github.com> Date: Mon, 17 Nov 2025 11:36:57 -0800 Subject: [PATCH 19/19] Update release.md --- docs/release.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/release.md b/docs/release.md index f65136ce5..85748fe0c 100644 --- a/docs/release.md +++ b/docs/release.md @@ -19,14 +19,17 @@ We will increment `Z` for non-breaking changes: ## Breaking change changelog +### 0.6.0 + +In this version, the default handoff history is now packaged into a single assistant message instead of exposing the raw user/assistant turns, giving downstream agents a concise, predictable recap +- The existing single-message handoff transcript now by default starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap + ### 0.5.0 This version doesn’t introduce any visible breaking changes, but it includes new features and a few significant updates under the hood: - Added support for `RealtimeRunner` to handle [SIP protocol connections](https://platform.openai.com/docs/guides/realtime-sip) - Significantly revised the internal logic of `Runner#run_sync` for Python 3.14 compatibility -- By default handoff history is now packaged into a single assistant message instead of exposing the raw user/assistant turns, giving downstream agents a concise, predictable recap -- The existing single-message handoff transcript now by default starts with "For context, here is the conversation so far between the user and the previous agent:" before the `` block, so downstream agents get a clearly labeled recap ### 0.4.0