diff --git a/voice/engine/src/audio_ml/vad.rs b/voice/engine/src/audio_ml/vad.rs
index b757883..b16e739 100644
--- a/voice/engine/src/audio_ml/vad.rs
+++ b/voice/engine/src/audio_ml/vad.rs
@@ -14,6 +14,19 @@ pub enum VadEvent {
     SpeechEnd,
 }
 
+/// Default VAD threshold used when the bot is silent / listening.
+pub const VAD_THRESHOLD_IDLE: f32 = 0.70;
+
+/// VAD threshold used during bot playback on the standard Reactor path.
+/// Audio is pre-filtered by the denoiser before reaching VAD, so 0.85 gives
+/// meaningful noise suppression without requiring the user to shout to barge in.
+pub const VAD_THRESHOLD_PLAYBACK: f32 = 0.85;
+
+/// VAD threshold used during Gemini Live bot playback.
+/// Higher than `VAD_THRESHOLD_PLAYBACK` because audio on this path is raw
+/// (undenoised) — no denoiser pre-filters mic input before reaching VAD.
+pub const VAD_THRESHOLD_PLAYBACK_RAW: f32 = 0.90;
+
 #[derive(Debug, Clone)]
 pub struct VadConfig {
     pub threshold: f32,
@@ -27,7 +40,7 @@ pub struct VadConfig {
 impl Default for VadConfig {
     fn default() -> Self {
         Self {
-            threshold: 0.7,
+            threshold: VAD_THRESHOLD_IDLE,
             min_volume: 0.0035,
             silence_frames: 6,
             min_speech_frames: 6, // Increased from 3 (96ms) to 6 (192ms) to filter pops/echo
@@ -59,6 +72,17 @@ pub struct SileroVad {
 }
 
 impl SileroVad {
+    pub fn threshold(&self) -> f32 {
+        self.config.threshold
+    }
+
+    pub fn set_threshold(&mut self, threshold: f32) {
+        // Equality is safe here: both sides always come from named constants
+        // (VAD_THRESHOLD_*). If threshold is ever derived by arithmetic, switch
+        // to an epsilon comparison to avoid IEEE 754 surprises.
+        self.config.threshold = threshold;
+    }
+
     pub fn new(model_path: &str, config: VadConfig) -> Self {
         let ctx = config.context_size;
         Self {
@@ -327,3 +351,75 @@ impl SileroVad {
         None
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Build a synthetic PCM-16 frame at the given RMS amplitude (0.0–1.0).
+    /// 512 samples at 16kHz = 32ms, matching FRAME_SIZE.
+    fn make_frame(amplitude: f32) -> Vec<u8> {
+        let n = FRAME_SIZE;
+        let sample = (amplitude * 32767.0) as i16;
+        let mut frame = Vec::with_capacity(n * 2);
+        for _ in 0..n {
+            frame.extend_from_slice(&sample.to_le_bytes());
+        }
+        frame
+    }
+
+    fn make_vad(threshold: f32) -> SileroVad {
+        SileroVad::new(
+            "",
+            VadConfig {
+                threshold,
+                min_volume: 0.0, // disable volume gate so tests focus on threshold
+                silence_frames: 6,
+                min_speech_frames: 1, // fire SpeechStart on the first positive frame
+                lookback_frames: 0,
+                context_size: 0,
+            },
+        )
+    }
+
+    /// prob=0.80 should fire SpeechStart at the idle threshold (0.70) but be
+    /// treated as silence at the playback threshold (0.85).
+    #[test]
+    fn threshold_controls_speech_detection() {
+        let frame = make_frame(0.1);
+
+        // At idle threshold: 0.80 >= 0.70 → speech
+        let mut vad = make_vad(VAD_THRESHOLD_IDLE);
+        let result = vad.process_with_prob(0.80, &frame);
+        assert_eq!(result, Some(VadEvent::SpeechStart));
+
+        // At playback threshold: 0.80 < 0.85 → silence, no event
+        let mut vad = make_vad(VAD_THRESHOLD_PLAYBACK);
+        let result = vad.process_with_prob(0.80, &frame);
+        assert_eq!(result, None);
+    }
+
+    /// set_threshold mid-stream updates the comparison boundary immediately.
+    #[test]
+    fn set_threshold_takes_effect_immediately() {
+        let frame = make_frame(0.1);
+        let mut vad = make_vad(VAD_THRESHOLD_IDLE);
+
+        // Prime with sub-threshold prob so is_speaking stays false
+        vad.process_with_prob(0.50, &frame);
+        assert!(!vad.is_speaking());
+
+        // Elevate to playback threshold — 0.80 should now be below the gate
+        vad.set_threshold(VAD_THRESHOLD_PLAYBACK);
+        let result = vad.process_with_prob(0.80, &frame);
+        assert_eq!(
+            result, None,
+            "prob 0.80 should be below playback threshold 0.85"
+        );
+
+        // Drop back to idle — same prob should now trigger
+        vad.set_threshold(VAD_THRESHOLD_IDLE);
+        let result = vad.process_with_prob(0.80, &frame);
+        assert_eq!(result, Some(VadEvent::SpeechStart));
+    }
+}
diff --git a/voice/engine/src/reactor/audio.rs b/voice/engine/src/reactor/audio.rs
index c0cb425..b1a529f 100644
--- a/voice/engine/src/reactor/audio.rs
+++ b/voice/engine/src/reactor/audio.rs
@@ -24,7 +24,36 @@ impl Reactor {
         //    SAFETY NOTE: The closure is sync and non-recursive; the reactor's
         //    on_vad_event is async but is called *after* process_frames completes
         //    (we collect the VAD result inside the closure, not await inside it).
+
+        // True while TTS audio is actively being streamed to the client.
+        // bot_audio_sent: set on first TTS chunk, cleared on barge-in/cancel.
+        // tts.is_active(): true between start_ws/http() and mark_finished()/cancel().
+        // Combined this is narrower than is_pipeline_active(), which also covers LLM.
+        let is_playing = self.bot_audio_sent && self.tts.is_active();
         let mut vad_event: Option<crate::types::VadEvent> = None;
+
+        // Threshold is a packet-level decision: is_playing doesn't change within a
+        // process_frames batch, so set it once here rather than once per frame.
+        //
+        // When the denoiser is disabled, audio reaches VAD unfiltered (raw mic),
+        // so we use VAD_THRESHOLD_PLAYBACK_RAW (0.90) instead of the denoised
+        // playback threshold (0.85) to maintain equivalent echo rejection.
+        //
+        // Known behaviour: if is_playing flips true while the user is already
+        // mid-utterance, the threshold elevation can cause a premature SpeechEnd
+        // (~192 ms / 6 silence frames). This is acceptable because the bot does not
+        // normally start TTS while the user is speaking (barge-in clears TTS first);
+        // re-engagement prompts are the only realistic scenario.
+        self.vad.set_threshold(if is_playing {
+            if self.denoiser.is_enabled() {
+                crate::audio_ml::vad::VAD_THRESHOLD_PLAYBACK
+            } else {
+                crate::audio_ml::vad::VAD_THRESHOLD_PLAYBACK_RAW
+            }
+        } else {
+            crate::audio_ml::vad::VAD_THRESHOLD_IDLE
+        });
+
         self.ring_buffer.process_frames(&resampled, |frame| {
             // Denoise (inline ONNX, or passthrough if disabled).
             // denoiser.process() allocates for the model output; the frame
diff --git a/voice/engine/src/reactor/proc/denoiser.rs b/voice/engine/src/reactor/proc/denoiser.rs
index 69cdacd..14dc7e4 100644
--- a/voice/engine/src/reactor/proc/denoiser.rs
+++ b/voice/engine/src/reactor/proc/denoiser.rs
@@ -48,6 +48,11 @@ impl DenoiserStage {
         }
     }
 
+    /// Returns `true` when a denoiser model is active; `false` in passthrough mode.
+    pub fn is_enabled(&self) -> bool {
+        self.inner.is_some()
+    }
+
     /// Process a single 16kHz PCM-16 LE audio frame.
     /// Returns cleaned audio (or the original if denoising is disabled/failed).
     pub fn process(&mut self, frame: &[u8]) -> Vec<u8> {
diff --git a/voice/engine/src/reactor/proc/vad.rs b/voice/engine/src/reactor/proc/vad.rs
index 6ae1988..9987d9b 100644
--- a/voice/engine/src/reactor/proc/vad.rs
+++ b/voice/engine/src/reactor/proc/vad.rs
@@ -14,6 +14,14 @@ pub struct VadStage {
 }
 
 impl VadStage {
+    pub fn set_threshold(&mut self, threshold: f32) {
+        // Safe to use != here: callers always supply named constants (VAD_THRESHOLD_*).
+        // See the note in SileroVad::set_threshold if arithmetic thresholds are ever added.
+        if self.inner.threshold() != threshold {
+            self.inner.set_threshold(threshold);
+        }
+    }
+
     pub fn new(model_path: &str, config: VadConfig) -> Self {
         Self {
             inner: SileroVad::new(model_path, config),
diff --git a/voice/engine/src/session.rs b/voice/engine/src/session.rs
index 3cbda1c..072e3a4 100644
--- a/voice/engine/src/session.rs
+++ b/voice/engine/src/session.rs
@@ -455,7 +455,6 @@ async fn run_native_multimodal(
     let mut hangup_target: Option<tokio::time::Instant> = None;
     let mut hangup_max_target: Option<tokio::time::Instant> = None;
 
-
     // ── Main event loop ────────────────────────────────────────────
     loop {
         tokio::select! {
@@ -496,6 +495,19 @@ async fn run_native_multimodal(
                         // Frame-align for VAD; collect audio to push async afterward.
                         let mut pending_pcm: Vec<Vec<i16>> = Vec::new();
                         let mut vad_event: Option<crate::types::VadEvent> = None;
+
+                        // Threshold is a packet-level decision: bot_speaking doesn't change
+                        // within a process_frames batch, so set it once here.
+                        // Raw (undenoised) audio goes to Gemini — raise threshold during playback
+                        // to suppress background noise from falsely triggering a local barge-in.
+                        if vad_ok {
+                            vad.set_threshold(if bot_speaking {
+                                crate::audio_ml::vad::VAD_THRESHOLD_PLAYBACK_RAW
+                            } else {
+                                crate::audio_ml::vad::VAD_THRESHOLD_IDLE
+                            });
+                        }
+
                         ring.process_frames(&resampled, |frame| {
                             if recording_enabled {
                                 tracer.emit(Event::UserAudio {