diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts index 9905d0c5..17b154c9 100644 --- a/plugins/deepgram/src/stt.ts +++ b/plugins/deepgram/src/stt.ts @@ -316,15 +316,24 @@ export class SpeechStream extends stt.SpeechStream { const liveTranscriptionToSpeechData = ( language: STTLanguages | string, - data: { [id: string]: any }, + data: { [id: string]: unknown }, ): stt.SpeechData[] => { - const alts: any[] = data['channel']['alternatives']; - - return alts.map((alt) => ({ - language, - startTime: alt['words'].length ? alt['words'][0]['start'] : 0, - endTime: alt['words'].length ? alt['words'][alt['words'].length - 1]['end'] : 0, - confidence: alt['confidence'], - text: alt['transcript'], - })); + const alts: unknown[] = data['channel']['alternatives']; + + return alts.map((alt) => { + // Check if words array exists and has speaker information + const hasSpeaker = alt['words']?.length > 0 && 'speaker' in alt['words'][0]; + + // Get the speaker if available (all words in the same alternative have the same speaker) + const speaker = hasSpeaker ? alt['words'][0].speaker : undefined; + + return { + language, + startTime: alt['words']?.length ? alt['words'][0]['start'] : 0, + endTime: alt['words']?.length ? alt['words'][alt['words'].length - 1]['end'] : 0, + confidence: alt['confidence'], + text: alt['transcript'], + ...(speaker !== undefined && { speaker }), + }; + }); };