From 251c3ffcfeddac9b7f189e459a37348f03d5c1e5 Mon Sep 17 00:00:00 2001 From: JRMeyer Date: Fri, 28 Nov 2025 11:53:35 +0100 Subject: [PATCH] feat: preserve logprobs from chat completions API in ModelResponse The SDK already accepts `top_logprobs` in ModelSettings and passes it to the API, but the logprobs returned in the response were discarded during conversion. This change: 1. Adds an optional `logprobs` field to ModelResponse dataclass 2. Extracts logprobs from `choice.logprobs.content` in the chat completions model and includes them in the ModelResponse This enables use cases like RLHF training, confidence scoring, and uncertainty estimation that require access to token-level log probabilities. --- src/agents/items.py | 7 +++++++ src/agents/models/openai_chatcompletions.py | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/src/agents/items.py b/src/agents/items.py index 991a7f877..76be5122b 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -356,6 +356,13 @@ class ModelResponse: be passed to `Runner.run`. """ + logprobs: list[Any] | None = None + """Token log probabilities from the model response. + Only populated when using the chat completions API with `top_logprobs` set in ModelSettings. + Each element corresponds to a token and contains the token string, log probability, and + optionally the top alternative tokens with their log probabilities. + """ + def to_input_items(self) -> list[TResponseInputItem]: """Convert the output into a list of input items suitable for passing to the model.""" # We happen to know that the shape of the Pydantic output items are the same as the diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index 594848d3e..ac696dbda 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -129,10 +129,15 @@ async def get_response( items = Converter.message_to_output_items(message) if message is not None else [] + logprobs_data = None + if first_choice and first_choice.logprobs and first_choice.logprobs.content: + logprobs_data = [lp.model_dump() for lp in first_choice.logprobs.content] + return ModelResponse( output=items, usage=usage, response_id=None, + logprobs=logprobs_data, ) async def stream_response(