Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/media/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -258,12 +258,10 @@ impl StreamEngine {
let new_handle = SynthesisHandle::new(tx, play_id.clone(), ssrc);
let tts_client = engine.create_tts_client(streaming, tts_option).await?;
let sample_rate = tts_option.samplerate.unwrap_or(16000) as u32;
let leading_silence_ms = tts_option.leading_silence_ms.unwrap_or(0);
let tts_track = TtsTrack::new(track_id, session_id, streaming, play_id, rx, tts_client)
.with_ssrc(ssrc)
.with_sample_rate(sample_rate)
.with_cancel_token(cancel_token)
.with_leading_silence(leading_silence_ms);
.with_cancel_token(cancel_token);
Ok((new_handle, Box::new(tts_track) as Box<dyn Track>))
}

Expand Down
16 changes: 1 addition & 15 deletions src/media/track/rtc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,21 +432,7 @@ impl RtcTrack {
let src_codec = match CodecType::try_from(payload_type) {
Ok(c) => c,
Err(_) => {
// Forward unknown payload types (e.g. telephone-event/DTMF) as-is
// so the downstream DTMF detector in MediaStream can process them.
let af = AudioFrame {
track_id: track_id.clone(),
samples: crate::media::Samples::RTP {
payload_type,
payload: frame.data.to_vec(),
sequence_number: frame.sequence_number.unwrap_or(0),
},
timestamp: crate::media::get_timestamp(),
sample_rate: 8000,
channels: 1,
..Default::default()
};
sender.send(af).ok();
debug!(track_id=%track_id, "Unknown payload type {}, skipping frame", payload_type);
return;
}
};
Expand Down
20 changes: 0 additions & 20 deletions src/media/track/tts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -667,18 +667,6 @@ impl TtsTask {
}
entry.first_chunk = false;
entry.ttfb = crate::media::get_timestamp() - entry.recv_time;

// Insert leading silence before the first audio chunk to prevent
// initial syllable clipping on SIP/RTP channels where the audio
// path may not be fully established when playback starts.
if self.leading_silence_ms > 0 {
let silence_bytes = (self.sample_rate as usize * 2 * self.leading_silence_ms as usize) / 1000;
let silence = Bytes::from(vec![0u8; silence_bytes]);
self.get_emit_entry_mut(assume_seq).map(|entry| {
entry.chunks.push_back(silence);
});
debug!("inserted {}ms leading silence ({} bytes)", self.leading_silence_ms, silence_bytes);
}
}

entry.total_bytes += chunk.len();
Expand Down Expand Up @@ -874,8 +862,6 @@ pub struct TtsTrack {
graceful: Arc<AtomicBool>,
min_buffer_duration: Duration,
max_buffer_wait: Duration,
/// Leading silence in ms before first TTS audio (for SIP/RTP channel readiness)
leading_silence_ms: u32,
}

impl SynthesisHandle {
Expand Down Expand Up @@ -923,7 +909,6 @@ impl TtsTrack {
ssrc: 0,
min_buffer_duration: Duration::from_millis(200), // Default 200ms
max_buffer_wait: Duration::from_millis(500), // Default 500ms
leading_silence_ms: 0,
}
}
pub fn with_ssrc(mut self, ssrc: u32) -> Self {
Expand Down Expand Up @@ -956,11 +941,6 @@ impl TtsTrack {
self
}

pub fn with_leading_silence(mut self, ms: u32) -> Self {
self.leading_silence_ms = ms;
self
}

pub fn with_jitter_buffer(mut self, min: Duration, max: Duration) -> Self {
self.min_buffer_duration = min;
self.max_buffer_wait = max;
Expand Down
7 changes: 0 additions & 7 deletions src/synthesis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,6 @@ pub struct SynthesisOption {
pub extra: Option<HashMap<String, String>>,
pub max_concurrent_tasks: Option<usize>,
pub session_id: Option<String>,
/// Leading silence duration in milliseconds before the first TTS audio chunk.
/// Useful for SIP/RTP scenarios where the audio channel may not be fully
/// established when the first chunk arrives, causing the initial syllable
/// to be clipped. Set to 200-300 for SIP calls. Default: 0 (disabled).
#[serde(alias = "leadingSilenceMs")]
pub leading_silence_ms: Option<u32>,
}

impl SynthesisOption {
Expand All @@ -135,7 +129,6 @@ impl SynthesisOption {
extra: other.extra.or(self.extra.clone()),
max_concurrent_tasks: other.max_concurrent_tasks.or(self.max_concurrent_tasks),
session_id: other.session_id.or(self.session_id.clone()),
leading_silence_ms: other.leading_silence_ms.or(self.leading_silence_ms),
}
} else {
self.clone()
Expand Down
Loading