diff --git a/crates/lib/src/llm/openai.rs b/crates/lib/src/llm/openai.rs index 7056945..c59a1e1 100644 --- a/crates/lib/src/llm/openai.rs +++ b/crates/lib/src/llm/openai.rs @@ -278,6 +278,9 @@ impl OpenAiProvider { "include": [], }); + // The ChatGPT Codex Responses backend currently rejects all token-limit + // request fields (`max_output_tokens`, `max_completion_tokens`, and + // `max_tokens`), so this path cannot forward ProviderRequest::max_tokens. if !request.system_prompt.is_empty() { body["instructions"] = serde_json::Value::String(request.system_prompt.clone()); } @@ -620,12 +623,13 @@ fn spawn_responses_stream( let _ = tx.send(StreamEvent::ContentBlockComplete(block)).await; } } - Some("response.completed") => { - if let Some(u) = parsed.get("response").and_then(|r| r.get("usage")) { + Some("response.completed") | Some("response.incomplete") => { + let response = parsed.get("response"); + if let Some(u) = response.and_then(|r| r.get("usage")) { usage = responses_usage(u); } - if response_has_function_call(parsed.get("response")) { - stop_reason = Some(StopReason::ToolUse); + if let Some(reason) = responses_stop_reason(response) { + stop_reason = Some(reason); } let _ = tx .send(StreamEvent::Done { @@ -805,6 +809,26 @@ fn response_has_function_call(response: Option<&Value>) -> bool { }) } +fn responses_stop_reason(response: Option<&Value>) -> Option { + let response = response?; + if response_has_function_call(Some(response)) { + return Some(StopReason::ToolUse); + } + if response + .get("status") + .and_then(Value::as_str) + .is_some_and(|status| status == "incomplete") + && response + .get("incomplete_details") + .and_then(|details| details.get("reason")) + .and_then(Value::as_str) + .is_some_and(|reason| matches!(reason, "max_output_tokens" | "max_tokens")) + { + return Some(StopReason::MaxTokens); + } + None +} + fn responses_usage(usage: &Value) -> Usage { Usage { input_tokens: usage @@ -950,6 +974,31 @@ mod tests { assert_eq!(body["input"][2]["call_id"], "call_1"); } + #[test] + fn responses_body_omits_codex_unsupported_token_limit_fields() { + let provider = OpenAiProvider::new("https://example.test/v1", "test-key"); + let request = ProviderRequest { + messages: vec![user_message(vec![ContentBlock::Text { + text: "hello".to_string(), + }])], + system_prompt: "be useful".to_string(), + tools: Vec::new(), + model: "gpt-5.4".to_string(), + max_tokens: 1024, + temperature: None, + enable_caching: false, + tool_choice: ToolChoice::None, + metadata: None, + cancel: CancellationToken::new(), + }; + + let body = provider.build_responses_body(&request); + + assert!(body.get("max_output_tokens").is_none()); + assert!(body.get("max_completion_tokens").is_none()); + assert!(body.get("max_tokens").is_none()); + } + #[test] fn response_item_done_maps_function_call_to_tool_use() { let item = serde_json::json!({ @@ -971,6 +1020,41 @@ mod tests { } } + #[test] + fn responses_stop_reason_maps_function_call() { + let response = serde_json::json!({ + "status": "completed", + "output": [ + { + "type": "function_call", + "call_id": "call_7", + "name": "bash", + "arguments": "{}" + } + ] + }); + + assert_eq!( + responses_stop_reason(Some(&response)), + Some(StopReason::ToolUse) + ); + } + + #[test] + fn responses_stop_reason_maps_incomplete_max_output_tokens() { + let response = serde_json::json!({ + "status": "incomplete", + "incomplete_details": { + "reason": "max_output_tokens" + } + }); + + assert_eq!( + responses_stop_reason(Some(&response)), + Some(StopReason::MaxTokens) + ); + } + #[test] fn responses_usage_maps_cached_tokens() { let usage = responses_usage(&serde_json::json!({