@@ -50,10 +50,13 @@ class OpenAIConfig(TypedDict, total=False):
5050 params: Model parameters (e.g., max_tokens).
5151 For a complete list of supported parameters, see
5252 https://platform.openai.com/docs/api-reference/chat/create.
53+ streaming: Optional flag to indicate whether provider streaming should be used.
54+ If omitted, defaults to True (preserves existing behaviour).
5355 """
5456
5557 model_id : str
5658 params : Optional [dict [str , Any ]]
59+ streaming : Optional [bool ]
5760
5861 def __init__ (self , client_args : Optional [dict [str , Any ]] = None , ** model_config : Unpack [OpenAIConfig ]) -> None :
5962 """Initialize provider instance.
@@ -263,7 +266,8 @@ def format_request(
263266 return {
264267 "messages" : self .format_request_messages (messages , system_prompt ),
265268 "model" : self .config ["model_id" ],
266- "stream" : True ,
269+ # Use configured streaming flag; default True to preserve previous behavior.
270+ "stream" : bool (self .get_config ().get ("streaming" , True )),
267271 "stream_options" : {"include_usage" : True },
268272 "tools" : [
269273 {
@@ -352,6 +356,68 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent:
352356 case _:
353357 raise RuntimeError (f"chunk_type=<{ event ['chunk_type' ]} | unknown type" )
354358
359+ def _convert_non_streaming_to_streaming (self , response : Any ) -> list [StreamEvent ]:
360+ """Convert a provider non-streaming response into streaming-style events.
361+
362+ This helper intentionally *does not* emit the initial message_start/content_start events,
363+ because the caller (stream) already yields them to preserve parity with streaming flow.
364+ """
365+ events : list [StreamEvent ] = []
366+
367+ # Extract main text content from first choice if available
368+ if getattr (response , "choices" , None ):
369+ choice = response .choices [0 ]
370+ content = None
371+ if hasattr (choice , "message" ) and hasattr (choice .message , "content" ):
372+ content = choice .message .content
373+
374+ # handle str content
375+ if isinstance (content , str ):
376+ events .append (self .format_chunk ({"chunk_type" : "content_delta" , "data_type" : "text" , "data" : content }))
377+ # handle list content (list of blocks/dicts)
378+ elif isinstance (content , list ):
379+ for block in content :
380+ if isinstance (block , dict ):
381+ # reasoning content
382+ if "reasoningContent" in block and isinstance (block ["reasoningContent" ], dict ):
383+ try :
384+ text = block ["reasoningContent" ]["reasoningText" ]["text" ]
385+ events .append (
386+ self .format_chunk (
387+ {"chunk_type" : "content_delta" , "data_type" : "reasoning_content" , "data" : text }
388+ )
389+ )
390+ except Exception :
391+ # fall back to keeping the block as text if malformed
392+ pass
393+ # text block
394+ elif "text" in block :
395+ events .append (
396+ self .format_chunk (
397+ {"chunk_type" : "content_delta" , "data_type" : "text" , "data" : block ["text" ]}
398+ )
399+ )
400+ # ignore other block types for now
401+ elif isinstance (block , str ):
402+ events .append (
403+ self .format_chunk ({"chunk_type" : "content_delta" , "data_type" : "text" , "data" : block })
404+ )
405+
406+ # content stop
407+ events .append (self .format_chunk ({"chunk_type" : "content_stop" }))
408+
409+ # message stop — convert finish reason if available
410+ stop_reason = None
411+ if getattr (response , "choices" , None ):
412+ stop_reason = getattr (response .choices [0 ], "finish_reason" , None )
413+ events .append (self .format_chunk ({"chunk_type" : "message_stop" , "data" : stop_reason or "stop" }))
414+
415+ # metadata (usage) if present
416+ if getattr (response , "usage" , None ):
417+ events .append (self .format_chunk ({"chunk_type" : "metadata" , "data" : response .usage }))
418+
419+ return events
420+
355421 @override
356422 async def stream (
357423 self ,
@@ -409,50 +475,63 @@ async def stream(
409475
410476 tool_calls : dict [int , list [Any ]] = {}
411477
412- async for event in response :
413- # Defensive: skip events with empty or missing choices
414- if not getattr (event , "choices" , None ):
415- continue
416- choice = event .choices [0 ]
417-
418- if choice .delta .content :
419- yield self .format_chunk (
420- {"chunk_type" : "content_delta" , "data_type" : "text" , "data" : choice .delta .content }
421- )
422-
423- if hasattr (choice .delta , "reasoning_content" ) and choice .delta .reasoning_content :
424- yield self .format_chunk (
425- {
426- "chunk_type" : "content_delta" ,
427- "data_type" : "reasoning_content" ,
428- "data" : choice .delta .reasoning_content ,
429- }
430- )
478+ streaming = bool (self .get_config ().get ("streaming" , True ))
479+
480+ if streaming :
481+ # response is an async iterator when streaming=True
482+ async for event in response :
483+ # Defensive: skip events with empty or missing choices
484+ if not getattr (event , "choices" , None ):
485+ continue
486+ choice = event .choices [0 ]
487+
488+ if choice .delta .content :
489+ yield self .format_chunk (
490+ {"chunk_type" : "content_delta" , "data_type" : "text" , "data" : choice .delta .content }
491+ )
492+
493+ if hasattr (choice .delta , "reasoning_content" ) and choice .delta .reasoning_content :
494+ yield self .format_chunk (
495+ {
496+ "chunk_type" : "content_delta" ,
497+ "data_type" : "reasoning_content" ,
498+ "data" : choice .delta .reasoning_content ,
499+ }
500+ )
431501
432- for tool_call in choice .delta .tool_calls or []:
433- tool_calls .setdefault (tool_call .index , []).append (tool_call )
502+ for tool_call in choice .delta .tool_calls or []:
503+ tool_calls .setdefault (tool_call .index , []).append (tool_call )
434504
435- if choice .finish_reason :
436- break
505+ if choice .finish_reason :
506+ break
437507
438- yield self .format_chunk ({"chunk_type" : "content_stop" , "data_type" : "text" })
508+ yield self .format_chunk ({"chunk_type" : "content_stop" , "data_type" : "text" })
439509
440- for tool_deltas in tool_calls .values ():
441- yield self .format_chunk ({"chunk_type" : "content_start" , "data_type" : "tool" , "data" : tool_deltas [0 ]})
510+ for tool_deltas in tool_calls .values ():
511+ yield self .format_chunk (
512+ {"chunk_type" : "content_start" , "data_type" : "tool" , "data" : tool_deltas [0 ]}
513+ )
442514
443- for tool_delta in tool_deltas :
444- yield self .format_chunk ({"chunk_type" : "content_delta" , "data_type" : "tool" , "data" : tool_delta })
515+ for tool_delta in tool_deltas :
516+ yield self .format_chunk (
517+ {"chunk_type" : "content_delta" , "data_type" : "tool" , "data" : tool_delta }
518+ )
445519
446- yield self .format_chunk ({"chunk_type" : "content_stop" , "data_type" : "tool" })
520+ yield self .format_chunk ({"chunk_type" : "content_stop" , "data_type" : "tool" })
447521
448- yield self .format_chunk ({"chunk_type" : "message_stop" , "data" : choice .finish_reason })
522+ yield self .format_chunk ({"chunk_type" : "message_stop" , "data" : choice .finish_reason })
449523
450- # Skip remaining events as we don't have use for anything except the final usage payload
451- async for event in response :
452- _ = event
524+ # Skip remaining events as we don't have use for anything except the final usage payload
525+ async for event in response :
526+ _ = event
453527
454- if event .usage :
455- yield self .format_chunk ({"chunk_type" : "metadata" , "data" : event .usage })
528+ if event .usage :
529+ yield self .format_chunk ({"chunk_type" : "metadata" , "data" : event .usage })
530+ else :
531+ # Non-streaming provider response — convert to streaming-style events (excluding the initial
532+ # message_start/content_start because we already emitted them above).
533+ for ev in self ._convert_non_streaming_to_streaming (response ):
534+ yield ev
456535
457536 logger .debug ("finished streaming response from model" )
458537
0 commit comments