From bf1df741e3206850a8a830c2e2eab9cd8b8741e5 Mon Sep 17 00:00:00 2001 From: Ashish Datta Date: Fri, 16 Jan 2026 10:28:01 -0500 Subject: [PATCH] fix(agents-realtime): preserve audio transcripts --- .changeset/fresh-brooms-relax.md | 5 +++ packages/agents-realtime/src/utils.ts | 39 +++++++++++++++------ packages/agents-realtime/test/utils.test.ts | 37 +++++++++++++++++++ 3 files changed, 70 insertions(+), 11 deletions(-) create mode 100644 .changeset/fresh-brooms-relax.md diff --git a/.changeset/fresh-brooms-relax.md b/.changeset/fresh-brooms-relax.md new file mode 100644 index 000000000..9285e7743 --- /dev/null +++ b/.changeset/fresh-brooms-relax.md @@ -0,0 +1,5 @@ +--- +'@openai/agents-realtime': patch +--- + +Preserve assistant audio transcripts when realtime updates omit content. diff --git a/packages/agents-realtime/src/utils.ts b/packages/agents-realtime/src/utils.ts index b3c218ace..d368e0347 100644 --- a/packages/agents-realtime/src/utils.ts +++ b/packages/agents-realtime/src/utils.ts @@ -151,7 +151,8 @@ export function removeAudioFromContent( return { ...item, content: item.content.map((entry) => { - if ((entry as any).type === 'output_audio') { + const entryType = (entry as any).type; + if (entryType === 'output_audio' || entryType === 'audio') { return { ...entry, audio: null, @@ -192,27 +193,43 @@ function preserveAssistantAudioTranscripts( return incoming; } + if (incoming.content.length === 0 && existing.content.length > 0) { + return { + ...incoming, + content: existing.content, + }; + } + const mergedContent = incoming.content.map((entry, index) => { - if (entry.type !== 'output_audio') { + const entryType = (entry as any).type; + if (entryType !== 'output_audio' && entryType !== 'audio') { return entry; } + const entryTranscript = (entry as any).transcript; const transcriptMissing = - typeof entry.transcript !== 'string' || entry.transcript.length === 0; + typeof entryTranscript !== 'string' || entryTranscript.length === 0; if (!transcriptMissing) { return entry; } const previousEntry = existing.content[index]; - if ( - previousEntry && - previousEntry.type === 'output_audio' && - typeof previousEntry.transcript === 'string' && - previousEntry.transcript.length > 0 - ) { + if (!previousEntry) { + return entry; + } + + const previousType = (previousEntry as any).type; + const previousTranscript = + (previousType === 'output_audio' || previousType === 'audio') && + typeof (previousEntry as any).transcript === 'string' && + (previousEntry as any).transcript.length > 0 + ? (previousEntry as any).transcript + : null; + + if (previousTranscript) { return { - ...entry, - transcript: previousEntry.transcript, + ...(entry as any), + transcript: previousTranscript, }; } diff --git a/packages/agents-realtime/test/utils.test.ts b/packages/agents-realtime/test/utils.test.ts index cc4c9d94c..1b774f488 100644 --- a/packages/agents-realtime/test/utils.test.ts +++ b/packages/agents-realtime/test/utils.test.ts @@ -195,6 +195,33 @@ describe('realtime utils', () => { } }); + it('preserves assistant audio transcript when incoming content is empty', () => { + const transcript = 'voice transcript'; + const history: RealtimeMessageItem[] = [ + { + itemId: '4', + type: 'message', + role: 'assistant', + status: 'completed', + content: [{ type: 'audio', transcript }], + } as RealtimeMessageItem, + ]; + + const incoming: RealtimeMessageItem = { + itemId: '4', + type: 'message', + role: 'assistant', + status: 'in_progress', + content: [], + } as RealtimeMessageItem; + + const updated = updateRealtimeHistory(history, incoming, false); + const updatedMessage = updated[0] as RealtimeMessageItem; + const content = updatedMessage.content[0] as any; + expect(content.transcript).toBe(transcript); + expect(updatedMessage.status).toBe('in_progress'); + }); + it('prefers new transcript value when provided', () => { const history: RealtimeMessageItem[] = [ { @@ -234,12 +261,22 @@ describe('realtime utils', () => { status: 'completed', content: [{ type: 'output_audio', audio: 'out', transcript: 'bye' }], }; + const assistantAudioItem: RealtimeMessageItem = { + itemId: 'a2', + type: 'message', + role: 'assistant', + status: 'completed', + content: [{ type: 'audio', audio: 'out', transcript: 'bye' }], + } as RealtimeMessageItem; expect( (removeAudioFromContent(userItem).content[0] as any).audio, ).toBeNull(); expect( (removeAudioFromContent(assistantItem).content[0] as any).audio, ).toBeNull(); + expect( + (removeAudioFromContent(assistantAudioItem).content[0] as any).audio, + ).toBeNull(); }); it('hasWebRTCSupport detects window availability', () => {