diff --git a/src/media/engine.rs b/src/media/engine.rs index 4fa4cfd..7d20da2 100644 --- a/src/media/engine.rs +++ b/src/media/engine.rs @@ -258,12 +258,10 @@ impl StreamEngine { let new_handle = SynthesisHandle::new(tx, play_id.clone(), ssrc); let tts_client = engine.create_tts_client(streaming, tts_option).await?; let sample_rate = tts_option.samplerate.unwrap_or(16000) as u32; - let leading_silence_ms = tts_option.leading_silence_ms.unwrap_or(0); let tts_track = TtsTrack::new(track_id, session_id, streaming, play_id, rx, tts_client) .with_ssrc(ssrc) .with_sample_rate(sample_rate) - .with_cancel_token(cancel_token) - .with_leading_silence(leading_silence_ms); + .with_cancel_token(cancel_token); Ok((new_handle, Box::new(tts_track) as Box)) } diff --git a/src/media/track/rtc.rs b/src/media/track/rtc.rs index ddf4443..e7009c7 100644 --- a/src/media/track/rtc.rs +++ b/src/media/track/rtc.rs @@ -432,21 +432,7 @@ impl RtcTrack { let src_codec = match CodecType::try_from(payload_type) { Ok(c) => c, Err(_) => { - // Forward unknown payload types (e.g. telephone-event/DTMF) as-is - // so the downstream DTMF detector in MediaStream can process them. - let af = AudioFrame { - track_id: track_id.clone(), - samples: crate::media::Samples::RTP { - payload_type, - payload: frame.data.to_vec(), - sequence_number: frame.sequence_number.unwrap_or(0), - }, - timestamp: crate::media::get_timestamp(), - sample_rate: 8000, - channels: 1, - ..Default::default() - }; - sender.send(af).ok(); + debug!(track_id=%track_id, "Unknown payload type {}, skipping frame", payload_type); return; } }; diff --git a/src/media/track/tts.rs b/src/media/track/tts.rs index 117e5f8..22a40ca 100644 --- a/src/media/track/tts.rs +++ b/src/media/track/tts.rs @@ -667,18 +667,6 @@ impl TtsTask { } entry.first_chunk = false; entry.ttfb = crate::media::get_timestamp() - entry.recv_time; - - // Insert leading silence before the first audio chunk to prevent - // initial syllable clipping on SIP/RTP channels where the audio - // path may not be fully established when playback starts. - if self.leading_silence_ms > 0 { - let silence_bytes = (self.sample_rate as usize * 2 * self.leading_silence_ms as usize) / 1000; - let silence = Bytes::from(vec![0u8; silence_bytes]); - self.get_emit_entry_mut(assume_seq).map(|entry| { - entry.chunks.push_back(silence); - }); - debug!("inserted {}ms leading silence ({} bytes)", self.leading_silence_ms, silence_bytes); - } } entry.total_bytes += chunk.len(); @@ -874,8 +862,6 @@ pub struct TtsTrack { graceful: Arc, min_buffer_duration: Duration, max_buffer_wait: Duration, - /// Leading silence in ms before first TTS audio (for SIP/RTP channel readiness) - leading_silence_ms: u32, } impl SynthesisHandle { @@ -923,7 +909,6 @@ impl TtsTrack { ssrc: 0, min_buffer_duration: Duration::from_millis(200), // Default 200ms max_buffer_wait: Duration::from_millis(500), // Default 500ms - leading_silence_ms: 0, } } pub fn with_ssrc(mut self, ssrc: u32) -> Self { @@ -956,11 +941,6 @@ impl TtsTrack { self } - pub fn with_leading_silence(mut self, ms: u32) -> Self { - self.leading_silence_ms = ms; - self - } - pub fn with_jitter_buffer(mut self, min: Duration, max: Duration) -> Self { self.min_buffer_duration = min; self.max_buffer_wait = max; diff --git a/src/synthesis/mod.rs b/src/synthesis/mod.rs index f7b8e01..2237274 100644 --- a/src/synthesis/mod.rs +++ b/src/synthesis/mod.rs @@ -106,12 +106,6 @@ pub struct SynthesisOption { pub extra: Option>, pub max_concurrent_tasks: Option, pub session_id: Option, - /// Leading silence duration in milliseconds before the first TTS audio chunk. - /// Useful for SIP/RTP scenarios where the audio channel may not be fully - /// established when the first chunk arrives, causing the initial syllable - /// to be clipped. Set to 200-300 for SIP calls. Default: 0 (disabled). - #[serde(alias = "leadingSilenceMs")] - pub leading_silence_ms: Option, } impl SynthesisOption { @@ -135,7 +129,6 @@ impl SynthesisOption { extra: other.extra.or(self.extra.clone()), max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks), session_id: other.session_id.or(self.session_id.clone()), - leading_silence_ms: other.leading_silence_ms.or(self.leading_silence_ms), } } else { self.clone()