diff --git a/src/agent/channel.rs b/src/agent/channel.rs index 7bb716a59..44f4c2c8e 100644 --- a/src/agent/channel.rs +++ b/src/agent/channel.rs @@ -2491,7 +2491,15 @@ async fn transcribe_audio_attachment( } let (provider_id, model_name) = match deps.llm_manager.resolve_model(voice_model) { - Ok(parts) => parts, + Ok(parts) => { + tracing::debug!( + provider = %parts.0, + model = %parts.1, + voice_config = %voice_model, + "resolved voice model for transcription" + ); + parts + } Err(error) => { tracing::warn!(%error, model = %voice_model, "invalid voice model route"); return UserContent::text(format!( @@ -2502,7 +2510,15 @@ async fn transcribe_audio_attachment( }; let provider = match deps.llm_manager.get_provider(&provider_id) { - Ok(provider) => provider, + Ok(provider) => { + tracing::debug!( + provider = %provider_id, + base_url = %provider.base_url, + api_type = ?provider.api_type, + "got provider for voice transcription" + ); + provider + } Err(error) => { tracing::warn!(%error, provider = %provider_id, "voice provider not configured"); return UserContent::text(format!( @@ -2527,6 +2543,11 @@ async fn transcribe_audio_attachment( "{}/v1/chat/completions", provider.base_url.trim_end_matches('/') ); + tracing::debug!( + endpoint = %endpoint, + model = %model_name, + "sending voice transcription request" + ); let body = serde_json::json!({ "model": model_name, "messages": [{ diff --git a/src/llm/manager.rs b/src/llm/manager.rs index 4febe761d..adc25dff5 100644 --- a/src/llm/manager.rs +++ b/src/llm/manager.rs @@ -289,12 +289,20 @@ impl LlmManager { } /// Resolve a model name to provider and model components. - /// Format: "provider/model-name" or just "model-name" (defaults to anthropic). + /// Format: "provider/model-name" or just "model-name" (defaults to openai for voice models). pub fn resolve_model(&self, model_name: &str) -> Result<(String, String)> { if let Some((provider, model)) = model_name.split_once('/') { + tracing::debug!(provider = %provider, model = %model, "resolved model with explicit provider"); Ok((provider.to_string(), model.to_string())) } else { - Ok(("anthropic".into(), model_name.into())) + // Default to openai for voice models (most common for Whisper/vision) + // rather than anthropic, since anthropic doesn't support input_audio + tracing::debug!( + model = %model_name, + "no provider prefix specified in model name, defaulting to 'openai'. \ + Specify as 'provider/model' (e.g., 'openai/whisper-1') to use a different provider." + ); + Ok(("openai".into(), model_name.into())) } }