From 1c6aa586f9baf0714b8eba4d32364769ac870d15 Mon Sep 17 00:00:00 2001 From: Yujong Lee Date: Sun, 17 Aug 2025 10:15:57 -0700 Subject: [PATCH] wip --- .../src/service/streaming.rs | 2 ++ crates/whisper-local/src/stream.rs | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/crates/transcribe-whisper-local/src/service/streaming.rs b/crates/transcribe-whisper-local/src/service/streaming.rs index 4bd91d6f0..c2001d23b 100644 --- a/crates/transcribe-whisper-local/src/service/streaming.rs +++ b/crates/transcribe-whisper-local/src/service/streaming.rs @@ -297,6 +297,8 @@ where Ok(chunk) => Some(hypr_whisper_local::SimpleAudioChunk { samples: chunk.samples, meta: Some(serde_json::json!({ "source": source_name })), + start_timestamp_ms: Some(chunk.start_timestamp_ms), + end_timestamp_ms: Some(chunk.end_timestamp_ms), }), }) }) diff --git a/crates/whisper-local/src/stream.rs b/crates/whisper-local/src/stream.rs index ad6b407a0..ae991df13 100644 --- a/crates/whisper-local/src/stream.rs +++ b/crates/whisper-local/src/stream.rs @@ -20,12 +20,16 @@ pub struct TranscriptionTask { pub trait AudioChunk: Send + 'static { fn samples(&self) -> &[f32]; fn meta(&self) -> Option; + fn start_timestamp_ms(&self) -> Option; + fn end_timestamp_ms(&self) -> Option; } #[derive(Default)] pub struct SimpleAudioChunk { pub samples: Vec, pub meta: Option, + pub start_timestamp_ms: Option, + pub end_timestamp_ms: Option, } impl AudioChunk for SimpleAudioChunk { @@ -36,6 +40,14 @@ impl AudioChunk for SimpleAudioChunk { fn meta(&self) -> Option { self.meta.clone() } + + fn start_timestamp_ms(&self) -> Option { + self.start_timestamp_ms + } + + fn end_timestamp_ms(&self) -> Option { + self.end_timestamp_ms + } } pub struct AudioChunkStream(pub S); @@ -116,6 +128,7 @@ where &samples, &mut this.current_segment_task, None, + (None, None), ) { Poll::Ready(result) => return Poll::Ready(result), Poll::Pending => continue, @@ -156,11 +169,14 @@ where let meta = chunk.meta(); let samples = chunk.samples(); + let timestamps = (chunk.start_timestamp_ms(), chunk.end_timestamp_ms()); + match process_transcription( &mut this.whisper, samples, &mut this.current_segment_task, meta, + timestamps, ) { Poll::Ready(result) => return Poll::Ready(result), Poll::Pending => continue, @@ -178,6 +194,7 @@ fn process_transcription<'a>( samples: &'a [f32], current_segment_task: &'a mut Option + Send>>>, meta: Option, + timestamps: (Option, Option), ) -> Poll> { if !samples.is_empty() { match whisper.transcribe(samples) { @@ -190,6 +207,11 @@ fn process_transcription<'a>( Ok(mut segments) => { for segment in &mut segments { segment.meta = meta.clone(); + + if let (Some(start_ms), Some(end_ms)) = timestamps { + segment.start = start_ms as f64 / 1000.0; + segment.end = end_ms as f64 / 1000.0; + } } *current_segment_task = Some(Box::pin(futures_util::stream::iter(segments)));