From ca921f3998820b2f502aa85b8ea344b8ebd7831e Mon Sep 17 00:00:00 2001 From: ElwinLiu Date: Sun, 12 Apr 2026 20:23:47 -0400 Subject: [PATCH] fix(audio): restore reliable feedback mute behavior Coordinate the start cue with recording startup so mute-while-recording is applied after on-demand recording actually begins. Refresh the cached feedback player when using the system default output so cue playback follows runtime device changes. --- src-tauri/src/actions.rs | 55 ++--- src-tauri/src/audio_feedback.rs | 226 +++++++++++++++++---- src-tauri/src/managers/audio.rs | 67 +++++- src-tauri/src/shortcut/handler.rs | 3 + src-tauri/src/signal_handle.rs | 9 +- src-tauri/src/transcription_coordinator.rs | 15 +- 6 files changed, 296 insertions(+), 79 deletions(-) diff --git a/src-tauri/src/actions.rs b/src-tauri/src/actions.rs index 62771f63..9cf4bcfa 100644 --- a/src-tauri/src/actions.rs +++ b/src-tauri/src/actions.rs @@ -14,7 +14,7 @@ use ferrous_opencc::{config::BuiltinConfig, OpenCC}; use log::{debug, error, info, warn}; use once_cell::sync::Lazy; use std::collections::HashMap; -use std::sync::Arc; +use std::sync::{mpsc, Arc}; use std::time::Instant; use tauri::AppHandle; use tauri::Emitter; @@ -52,6 +52,22 @@ pub trait ShortcutAction: Send + Sync { // Transcribe Action pub struct TranscribeAction; +fn play_start_feedback_and_apply_mute( + app: &AppHandle, + rm: Arc, +) -> mpsc::Sender<()> { + let (recording_started_tx, recording_started_rx) = mpsc::channel(); + let app_clone = app.clone(); + std::thread::spawn(move || { + play_feedback_sound_blocking(&app_clone, SoundType::Start); + if recording_started_rx.recv().is_ok() && rm.is_recording() { + rm.apply_mute(); + } + }); + + recording_started_tx +} + async fn maybe_convert_chinese_variant( settings: &AppSettings, transcription: &str, @@ -111,11 +127,14 @@ impl ShortcutAction for TranscribeAction { tm.initiate_model_load(); let binding_id = binding_id.to_string(); + let rm = app.state::>(); + + // Start the cue immediately on press; recording startup can continue in parallel. + let recording_started_tx = play_start_feedback_and_apply_mute(app, Arc::clone(&rm)); + change_tray_icon(app, TrayIconState::Recording); show_recording_overlay(app); - let rm = app.state::>(); - // Get the microphone mode to determine audio feedback timing let settings = get_settings(app); let is_always_on = settings.always_on_microphone; @@ -191,39 +210,23 @@ impl ShortcutAction for TranscribeAction { let mut recording_started = false; if is_always_on { - // Always-on mode: Play audio feedback immediately, then apply mute after sound finishes - debug!("Always-on mode: Playing audio feedback immediately"); - let rm_clone = Arc::clone(&rm); - let app_clone = app.clone(); - std::thread::spawn(move || { - play_feedback_sound_blocking(&app_clone, SoundType::Start); - rm_clone.apply_mute(); - }); - recording_started = rm.try_start_recording(&binding_id, stream_tap_tx); debug!("Recording started: {}", recording_started); } else { - // On-demand mode: Start recording first, then play audio feedback, then apply mute - debug!("On-demand mode: Starting recording first, then audio feedback"); + // On-demand mode: start recording immediately; feedback is already playing. + debug!("On-demand mode: Starting recording in parallel with audio feedback"); let recording_start_time = Instant::now(); let started = rm.try_start_recording(&binding_id, stream_tap_tx); if started { recording_started = true; debug!("Recording started in {:?}", recording_start_time.elapsed()); - let app_clone = app.clone(); - let rm_clone = Arc::clone(&rm); - std::thread::spawn(move || { - std::thread::sleep(std::time::Duration::from_millis(100)); - debug!("Handling delayed audio feedback/mute sequence"); - play_feedback_sound_blocking(&app_clone, SoundType::Start); - rm_clone.apply_mute(); - }); } else { debug!("Failed to start recording"); } } if recording_started { + let _ = recording_started_tx.send(()); shortcut::register_cancel_shortcut(app); } @@ -256,15 +259,15 @@ impl ShortcutAction for TranscribeAction { let streaming_state = Arc::clone(&app.state::()); let pipeline_handle = Arc::clone(&app.state::()); - change_tray_icon(app, TrayIconState::Transcribing); - show_transcribing_overlay(app); - // Unmute before playing audio feedback so the stop sound is audible rm.remove_mute(); - // Play audio feedback for recording stop + // Trigger the stop cue as early as possible on key release. play_feedback_sound(app, SoundType::Stop); + change_tray_icon(app, TrayIconState::Transcribing); + show_transcribing_overlay(app); + let binding_id = binding_id.to_string(); // Look up the post-processing prompt for this binding. diff --git a/src-tauri/src/audio_feedback.rs b/src-tauri/src/audio_feedback.rs index f5b95610..8ef3b1ce 100644 --- a/src-tauri/src/audio_feedback.rs +++ b/src-tauri/src/audio_feedback.rs @@ -2,11 +2,13 @@ use crate::settings::SoundTheme; use crate::settings::{self, AppSettings}; use cpal::traits::{DeviceTrait, HostTrait}; use log::{debug, error, warn}; -use rodio::OutputStreamBuilder; -use std::fs::File; -use std::io::BufReader; +use once_cell::sync::Lazy; +use rodio::{buffer::SamplesBuffer, OutputStream, OutputStreamBuilder, Sink}; +use std::collections::HashMap; use std::path::{Path, PathBuf}; +use std::sync::mpsc::{self, Sender}; use std::thread; +use std::time::Duration; use tauri::{AppHandle, Manager}; pub enum SoundType { @@ -14,6 +16,73 @@ pub enum SoundType { Stop, } +#[derive(Clone)] +struct SoundBuffer { + channels: u16, + sample_rate: u32, + samples: Vec, + duration: Duration, +} + +struct FeedbackPlayer { + device_key: Option, + stream: OutputStream, + sounds: HashMap, +} + +impl FeedbackPlayer { + fn new(device_key: Option) -> Result> { + Ok(Self { + stream: create_output_stream(device_key.as_deref())?, + device_key, + sounds: HashMap::new(), + }) + } + + fn sound_buffer(&mut self, path: &Path) -> Result> { + if let Some(buffer) = self.sounds.get(path) { + return Ok(buffer.clone()); + } + + let buffer = load_sound_buffer(path)?; + self.sounds.insert(path.to_path_buf(), buffer.clone()); + Ok(buffer) + } +} + +struct PlayRequest { + path: PathBuf, + selected_device: Option, + volume: f32, + completion: Option>>, +} + +static FEEDBACK_AUDIO_TX: Lazy> = Lazy::new(|| { + let (tx, rx) = mpsc::channel::(); + + thread::spawn(move || { + let mut player: Option = None; + + while let Ok(request) = rx.recv() { + let result = play_request( + &mut player, + request.path, + request.selected_device, + request.volume, + ) + .map_err(|err| err.to_string()); + + if let Some(completion) = request.completion { + let _ = completion.send(result); + } else if let Err(err) = result { + error!("Failed to play feedback sound: {err}"); + } + } + }); + + tx +}); + fn resolve_sound_path( app: &AppHandle, settings: &AppSettings, @@ -62,54 +131,83 @@ pub fn play_test_sound(app: &AppHandle, sound_type: SoundType) { } fn play_sound_async(app: &AppHandle, path: PathBuf) { - let app_handle = app.clone(); - thread::spawn(move || { - if let Err(e) = play_sound_at_path(&app_handle, path.as_path()) { - error!("Failed to play sound '{}': {}", path.display(), e); - } - }); + if let Err(err) = play_sound_at_path(app, path.as_path(), false) { + error!("Failed to play sound '{}': {}", path.display(), err); + } } fn play_sound_blocking(app: &AppHandle, path: &Path) { - if let Err(e) = play_sound_at_path(app, path) { + if let Err(e) = play_sound_at_path(app, path, true) { error!("Failed to play sound '{}': {}", path.display(), e); } } -fn play_sound_at_path(app: &AppHandle, path: &Path) -> Result<(), Box> { +fn play_sound_at_path( + app: &AppHandle, + path: &Path, + blocking: bool, +) -> Result<(), Box> { let settings = settings::get_settings(app); let volume = settings.audio_feedback_volume; - let selected_device = settings.selected_output_device.clone(); - play_audio_file(path, selected_device, volume) + let selected_device = normalize_device_name(settings.selected_output_device.clone()); + play_cached_audio(path, selected_device, volume, blocking) } -fn play_audio_file( - path: &std::path::Path, +fn play_cached_audio( + path: &Path, selected_device: Option, volume: f32, + blocking: bool, ) -> Result<(), Box> { + if blocking { + let (tx, rx) = mpsc::channel(); + FEEDBACK_AUDIO_TX.send(PlayRequest { + path: path.to_path_buf(), + selected_device, + volume, + completion: Some(tx), + })?; + let duration = rx.recv()?.map_err(std::io::Error::other)?; + thread::sleep(duration); + } else { + FEEDBACK_AUDIO_TX.send(PlayRequest { + path: path.to_path_buf(), + selected_device, + volume, + completion: None, + })?; + } + + Ok(()) +} + +fn normalize_device_name(device_name: Option) -> Option { + match device_name.as_deref() { + Some("default") | Some("Default") | None => None, + Some(name) => Some(name.to_string()), + } +} + +fn create_output_stream( + selected_device: Option<&str>, +) -> Result> { let stream_builder = if let Some(device_name) = selected_device { - if device_name == "Default" { - debug!("Using default device"); - OutputStreamBuilder::from_default_device()? - } else { - let host = crate::audio_toolkit::get_cpal_host(); - let devices = host.output_devices()?; - - let mut found_device = None; - for device in devices { - if device.name()? == device_name { - found_device = Some(device); - break; - } + let host = crate::audio_toolkit::get_cpal_host(); + let devices = host.output_devices()?; + + let mut found_device = None; + for device in devices { + if device.name()? == device_name { + found_device = Some(device); + break; } + } - match found_device { - Some(device) => OutputStreamBuilder::from_device(device)?, - None => { - warn!("Device '{}' not found, using default device", device_name); - OutputStreamBuilder::from_default_device()? - } + match found_device { + Some(device) => OutputStreamBuilder::from_device(device)?, + None => { + warn!("Device '{}' not found, using default device", device_name); + OutputStreamBuilder::from_default_device()? } } } else { @@ -117,15 +215,61 @@ fn play_audio_file( OutputStreamBuilder::from_default_device()? }; - let stream_handle = stream_builder.open_stream()?; - let mixer = stream_handle.mixer(); + Ok(stream_builder.open_stream()?) +} - let file = File::open(path)?; - let buf_reader = BufReader::new(file); +fn play_request( + player: &mut Option, + path: PathBuf, + selected_device: Option, + volume: f32, +) -> Result> { + // Reopen the stream whenever Handless is following the system default + // output so runtime device switches are picked up on the next cue. + let recreate_player = if selected_device.is_none() { + true + } else { + player + .as_ref() + .is_none_or(|current| current.device_key != selected_device) + }; + if recreate_player { + *player = Some(FeedbackPlayer::new(selected_device.clone())?); + } - let sink = rodio::play(mixer, buf_reader)?; + let player = player.as_mut().expect("feedback player initialized"); + let buffer = player.sound_buffer(&path)?; + let sink = Sink::connect_new(player.stream.mixer()); sink.set_volume(volume); - sink.sleep_until_end(); + sink.append(SamplesBuffer::new( + buffer.channels, + buffer.sample_rate, + buffer.samples, + )); + sink.detach(); + Ok(buffer.duration) +} - Ok(()) +fn load_sound_buffer(path: &Path) -> Result> { + let mut reader = hound::WavReader::open(path)?; + let spec = reader.spec(); + let samples = match spec.sample_format { + hound::SampleFormat::Float => reader.samples::().collect::, _>>()?, + hound::SampleFormat::Int => { + let max_value = ((1_i64 << (spec.bits_per_sample - 1)) - 1) as f32; + reader + .samples::() + .map(|sample| sample.map(|value| value as f32 / max_value)) + .collect::, _>>()? + } + }; + + Ok(SoundBuffer { + channels: spec.channels, + sample_rate: spec.sample_rate, + duration: Duration::from_secs_f64( + samples.len() as f64 / (spec.sample_rate as f64 * spec.channels as f64), + ), + samples, + }) } diff --git a/src-tauri/src/managers/audio.rs b/src-tauri/src/managers/audio.rs index e785d2ab..42262f84 100644 --- a/src-tauri/src/managers/audio.rs +++ b/src-tauri/src/managers/audio.rs @@ -87,12 +87,67 @@ fn set_mute(mute: bool) { #[cfg(target_os = "macos")] { - use std::process::Command; - let script = format!( - "set volume output muted {}", - if mute { "true" } else { "false" } - ); - let _ = Command::new("osascript").args(["-e", &script]).output(); + use coreaudio_sys::{ + kAudioDevicePropertyMute, kAudioHardwarePropertyDefaultOutputDevice, + kAudioObjectPropertyElementMaster, kAudioObjectPropertyScopeGlobal, + kAudioObjectPropertyScopeOutput, kAudioObjectSystemObject, AudioDeviceID, + AudioObjectGetPropertyData, AudioObjectPropertyAddress, AudioObjectSetPropertyData, + }; + use std::{ffi::c_void, mem, process::Command, ptr}; + + unsafe fn fallback_to_osascript(mute: bool) { + let script = format!( + "set volume output muted {}", + if mute { "true" } else { "false" } + ); + let _ = Command::new("osascript").args(["-e", &script]).output(); + } + + let default_output_address = AudioObjectPropertyAddress { + mSelector: kAudioHardwarePropertyDefaultOutputDevice, + mScope: kAudioObjectPropertyScopeGlobal, + mElement: kAudioObjectPropertyElementMaster, + }; + + let mut device_id: AudioDeviceID = 0; + let mut device_id_size = mem::size_of::() as u32; + let default_output_status = unsafe { + AudioObjectGetPropertyData( + kAudioObjectSystemObject, + &default_output_address, + 0, + ptr::null(), + &mut device_id_size, + &mut device_id as *mut _ as *mut c_void, + ) + }; + + if default_output_status != 0 || device_id == 0 { + unsafe { fallback_to_osascript(mute) }; + return; + } + + let mute_address = AudioObjectPropertyAddress { + mSelector: kAudioDevicePropertyMute, + mScope: kAudioObjectPropertyScopeOutput, + mElement: kAudioObjectPropertyElementMaster, + }; + + let mute_value: u32 = u32::from(mute); + let mute_status = unsafe { + AudioObjectSetPropertyData( + device_id, + &mute_address, + 0, + ptr::null(), + mem::size_of_val(&mute_value) as u32, + &mute_value as *const _ as *const c_void, + ) + }; + + if mute_status != 0 { + unsafe { fallback_to_osascript(mute) }; + } } } diff --git a/src-tauri/src/shortcut/handler.rs b/src-tauri/src/shortcut/handler.rs index 193a2518..ec4ce3a3 100644 --- a/src-tauri/src/shortcut/handler.rs +++ b/src-tauri/src/shortcut/handler.rs @@ -5,6 +5,7 @@ use log::warn; use std::sync::Arc; +use std::time::Instant; use tauri::{AppHandle, Manager}; use crate::actions::ACTION_MAP; @@ -32,6 +33,7 @@ pub fn handle_shortcut_event( hotkey_string: &str, is_pressed: bool, ) { + let event_time = Instant::now(); let settings = get_settings(app); // Transcribe bindings are handled by the coordinator. @@ -42,6 +44,7 @@ pub fn handle_shortcut_event( hotkey_string, is_pressed, settings.activation_mode, + event_time, ); } else { warn!("TranscriptionCoordinator is not initialized"); diff --git a/src-tauri/src/signal_handle.rs b/src-tauri/src/signal_handle.rs index 391c0779..bab7e5a9 100644 --- a/src-tauri/src/signal_handle.rs +++ b/src-tauri/src/signal_handle.rs @@ -1,6 +1,7 @@ use crate::settings::ActivationMode; use crate::TranscriptionCoordinator; use log::{debug, warn}; +use std::time::Instant; use tauri::{AppHandle, Manager}; #[cfg(unix)] @@ -14,7 +15,13 @@ use std::thread; /// Used by signal handlers, CLI flags, and any other external trigger. pub fn send_transcription_input(app: &AppHandle, binding_id: &str, source: &str) { if let Some(c) = app.try_state::() { - c.send_input(binding_id, source, true, ActivationMode::Toggle); + c.send_input( + binding_id, + source, + true, + ActivationMode::Toggle, + Instant::now(), + ); } else { warn!("TranscriptionCoordinator not initialized"); } diff --git a/src-tauri/src/transcription_coordinator.rs b/src-tauri/src/transcription_coordinator.rs index 6762364e..f55fe72f 100644 --- a/src-tauri/src/transcription_coordinator.rs +++ b/src-tauri/src/transcription_coordinator.rs @@ -25,6 +25,7 @@ enum Command { hotkey_string: String, is_pressed: bool, activation_mode: ActivationMode, + event_time: Instant, }, Cancel { recording_was_active: bool, @@ -71,16 +72,18 @@ impl TranscriptionCoordinator { hotkey_string, is_pressed, activation_mode, + event_time, } => { // Debounce rapid-fire press events (key repeat / double-tap). // Releases always pass through for hold-based modes. if is_pressed { - let now = Instant::now(); - if last_press.is_some_and(|t| now.duration_since(t) < DEBOUNCE) { + if last_press + .is_some_and(|t| event_time.duration_since(t) < DEBOUNCE) + { debug!("Debounced press for '{binding_id}'"); continue; } - last_press = Some(now); + last_press = Some(event_time); } match activation_mode { @@ -124,14 +127,14 @@ impl TranscriptionCoordinator { press_start = None; } else if matches!(stage, Stage::Idle) { // Start recording and record timestamp. - press_start = Some(Instant::now()); + press_start = Some(event_time); toggled = false; start(&app, &mut stage, &binding_id, &hotkey_string); } } else if matches!(&stage, Stage::Recording(id) if id == &binding_id) { if let Some(t) = press_start { - if t.elapsed() >= HOLD_THRESHOLD { + if event_time.duration_since(t) >= HOLD_THRESHOLD { // Held long enough: treat as hold → stop. stop(&app, &mut stage, &binding_id, &hotkey_string); press_start = None; @@ -194,6 +197,7 @@ impl TranscriptionCoordinator { hotkey_string: &str, is_pressed: bool, activation_mode: ActivationMode, + event_time: Instant, ) { if self .tx @@ -202,6 +206,7 @@ impl TranscriptionCoordinator { hotkey_string: hotkey_string.to_string(), is_pressed, activation_mode, + event_time, }) .is_err() {