@@ -34,7 +34,10 @@ def get_current_time() -> str:
3434
3535agent = RealtimeAgent (
3636 name = "Twilio Assistant" ,
37- instructions = "You are a helpful assistant that starts every conversation with a creative greeting. Keep responses concise and friendly since this is a phone conversation." ,
37+ instructions = (
38+ "You are a helpful assistant that starts every conversation with a creative greeting. "
39+ "Keep responses concise and friendly since this is a phone conversation."
40+ ),
3841 tools = [get_weather , get_current_time ],
3942)
4043
@@ -46,21 +49,39 @@ def __init__(self, twilio_websocket: WebSocket):
4649 self .session : RealtimeSession | None = None
4750 self .playback_tracker = RealtimePlaybackTracker ()
4851
49- # Audio buffering configuration (matching CLI demo)
50- self .CHUNK_LENGTH_S = 0.05 # 50ms chunks like CLI demo
51- self .SAMPLE_RATE = 8000 # Twilio uses 8kHz for g711_ulaw
52- self .BUFFER_SIZE_BYTES = int (self .SAMPLE_RATE * self .CHUNK_LENGTH_S ) # 50ms worth of audio
52+ # Audio chunking (matches CLI demo)
53+ self .CHUNK_LENGTH_S = 0.05 # 50ms chunks
54+ self .SAMPLE_RATE = 8000 # Twilio g711_ulaw at 8kHz
55+ self .BUFFER_SIZE_BYTES = int (self .SAMPLE_RATE * self .CHUNK_LENGTH_S ) # ~400 bytes per 50ms
5356
5457 self ._stream_sid : str | None = None
5558 self ._audio_buffer : bytearray = bytearray ()
5659 self ._last_buffer_send_time = time .time ()
5760
58- # Mark event tracking for playback
61+ # Playback tracking for outbound audio
5962 self ._mark_counter = 0
6063 self ._mark_data : dict [
6164 str , tuple [str , int , int ]
6265 ] = {} # mark_id -> (item_id, content_index, byte_count)
6366
67+ # ---- Deterministic startup warm-up (preferred over sleep) ----
68+ # Buffer the first N chunks before sending to OpenAI; then mark warmed.
69+ try :
70+ self .STARTUP_BUFFER_CHUNKS = max (0 , int (os .getenv ("TWILIO_STARTUP_BUFFER_CHUNKS" , "3" )))
71+ except Exception :
72+ self .STARTUP_BUFFER_CHUNKS = 3
73+
74+ self ._startup_buffer = bytearray ()
75+ self ._startup_warmed = (
76+ self .STARTUP_BUFFER_CHUNKS == 0
77+ ) # if 0, considered warmed immediately
78+
79+ # Optional delay (defaults 0.0 because buffering is preferred)
80+ try :
81+ self .STARTUP_DELAY_S = float (os .getenv ("TWILIO_STARTUP_DELAY_S" , "0.0" ))
82+ except Exception :
83+ self .STARTUP_DELAY_S = 0.0
84+
6485 async def start (self ) -> None :
6586 """Start the session."""
6687 runner = RealtimeRunner (agent )
@@ -89,6 +110,11 @@ async def start(self) -> None:
89110 await self .twilio_websocket .accept ()
90111 print ("Twilio WebSocket connection accepted" )
91112
113+ # Optional tiny delay (kept configurable; default 0.0)
114+ if self .STARTUP_DELAY_S > 0 :
115+ await asyncio .sleep (self .STARTUP_DELAY_S )
116+
117+ # Start loops after handshake
92118 self ._realtime_session_task = asyncio .create_task (self ._realtime_session_loop ())
93119 self ._message_loop_task = asyncio .create_task (self ._twilio_message_loop ())
94120 self ._buffer_flush_task = asyncio .create_task (self ._buffer_flush_loop ())
@@ -197,7 +223,7 @@ async def _handle_media_event(self, message: dict[str, Any]) -> None:
197223 # Add original µ-law to buffer for OpenAI (they expect µ-law)
198224 self ._audio_buffer .extend (ulaw_bytes )
199225
200- # Send buffered audio if we have enough data
226+ # Send buffered audio if we have enough data for one chunk
201227 if len (self ._audio_buffer ) >= self .BUFFER_SIZE_BYTES :
202228 await self ._flush_audio_buffer ()
203229
@@ -210,47 +236,55 @@ async def _handle_mark_event(self, message: dict[str, Any]) -> None:
210236 mark_data = message .get ("mark" , {})
211237 mark_id = mark_data .get ("name" , "" )
212238
213- # Look up stored data for this mark ID
214239 if mark_id in self ._mark_data :
215240 item_id , item_content_index , byte_count = self ._mark_data [mark_id ]
216-
217- # Convert byte count back to bytes for playback tracker
218- audio_bytes = b"\x00 " * byte_count # Placeholder bytes
219-
220- # Update playback tracker
241+ audio_bytes = b"\x00 " * byte_count # Placeholder bytes for tracker
221242 self .playback_tracker .on_play_bytes (item_id , item_content_index , audio_bytes )
222243 print (
223244 f"Playback tracker updated: { item_id } , index { item_content_index } , { byte_count } bytes"
224245 )
225-
226- # Clean up the stored data
227246 del self ._mark_data [mark_id ]
228247
229248 except Exception as e :
230249 print (f"Error handling mark event: { e } " )
231250
232251 async def _flush_audio_buffer (self ) -> None :
233- """Send buffered audio to OpenAI."""
252+ """Send buffered audio to OpenAI with deterministic startup warm-up ."""
234253 if not self ._audio_buffer or not self .session :
235254 return
236255
237256 try :
238- # Send the buffered audio
239257 buffer_data = bytes (self ._audio_buffer )
240- await self .session .send_audio (buffer_data )
241-
242- # Clear the buffer
243258 self ._audio_buffer .clear ()
244259 self ._last_buffer_send_time = time .time ()
245260
261+ # During startup, accumulate first N chunks before sending anything
262+ if not self ._startup_warmed :
263+ self ._startup_buffer .extend (buffer_data )
264+
265+ # target bytes = N chunks * bytes-per-chunk
266+ target_bytes = self .BUFFER_SIZE_BYTES * max (0 , self .STARTUP_BUFFER_CHUNKS )
267+
268+ if len (self ._startup_buffer ) >= target_bytes :
269+ # Warm-up complete: flush all buffered data in order
270+ await self .session .send_audio (bytes (self ._startup_buffer ))
271+ self ._startup_buffer .clear ()
272+ self ._startup_warmed = True
273+ else :
274+ # Not enough yet; keep buffering and return
275+ return
276+ else :
277+ # Already warmed: send immediately
278+ await self .session .send_audio (buffer_data )
279+
246280 except Exception as e :
247281 print (f"Error sending buffered audio to OpenAI: { e } " )
248282
249283 async def _buffer_flush_loop (self ) -> None :
250284 """Periodically flush audio buffer to prevent stale data."""
251285 try :
252286 while True :
253- await asyncio .sleep (self .CHUNK_LENGTH_S ) # Check every 50ms
287+ await asyncio .sleep (self .CHUNK_LENGTH_S ) # check every 50ms
254288
255289 # If buffer has data and it's been too long since last send, flush it
256290 current_time = time .time ()
0 commit comments