From f74153d66598c5ce2bf498d1289b8fbdb7e6fd44 Mon Sep 17 00:00:00 2001 From: ElwinLiu Date: Mon, 27 Apr 2026 10:13:57 -0400 Subject: [PATCH] feat(models): add local transcription engines --- src-tauri/Cargo.lock | 143 +++++---------- src-tauri/Cargo.toml | 5 +- src-tauri/src/commands/models.rs | 26 ++- src-tauri/src/managers/model.rs | 118 ++++++++++++ src-tauri/src/managers/transcription.rs | 230 +++++++++++++++++------- src/bindings.ts | 2 +- src/components/onboarding/ModelCard.tsx | 14 +- src/i18n/locales/ar/translation.json | 16 ++ src/i18n/locales/cs/translation.json | 16 ++ src/i18n/locales/de/translation.json | 16 ++ src/i18n/locales/en/translation.json | 16 ++ src/i18n/locales/es/translation.json | 16 ++ src/i18n/locales/fr/translation.json | 16 ++ src/i18n/locales/it/translation.json | 16 ++ src/i18n/locales/ja/translation.json | 16 ++ src/i18n/locales/ko/translation.json | 16 ++ src/i18n/locales/pl/translation.json | 16 ++ src/i18n/locales/pt/translation.json | 16 ++ src/i18n/locales/ru/translation.json | 16 ++ src/i18n/locales/tr/translation.json | 16 ++ src/i18n/locales/uk/translation.json | 16 ++ src/i18n/locales/vi/translation.json | 16 ++ src/i18n/locales/zh-TW/translation.json | 16 ++ src/i18n/locales/zh/translation.json | 16 ++ src/lib/utils/modelSettings.ts | 10 +- 25 files changed, 652 insertions(+), 168 deletions(-) diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 95be057b..f5408cf7 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -426,29 +426,6 @@ dependencies = [ "virtue", ] -[[package]] -name = "bindgen" -version = "0.69.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" -dependencies = [ - "bitflags 2.10.0", - "cexpr", - "clang-sys", - "itertools", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.108", - "which", -] - [[package]] name = "bindgen" version = "0.72.1" @@ -459,10 +436,12 @@ dependencies = [ "cexpr", "clang-sys", "itertools", + "log", + "prettyplease", "proc-macro2", "quote", "regex", - "rustc-hash 2.1.1", + "rustc-hash", "shlex", "syn 2.0.108", ] @@ -780,7 +759,7 @@ version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" dependencies = [ - "smallvec 1.15.1", + "smallvec", "target-lexicon 0.12.16", ] @@ -790,7 +769,7 @@ version = "0.20.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78cef5b5a1a6827c7322ae2a636368a573006b27cfa76c7ebd53e834daeaab6a" dependencies = [ - "smallvec 1.15.1", + "smallvec", "target-lexicon 0.13.3", ] @@ -1122,7 +1101,7 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ceec7a6067e62d6f931a2baf6f3a751f4a892595bcec1461a3c94ef9949864b6" dependencies = [ - "bindgen 0.72.1", + "bindgen", ] [[package]] @@ -1213,7 +1192,7 @@ dependencies = [ "phf 0.10.1", "proc-macro2", "quote", - "smallvec 1.15.1", + "smallvec", "syn 1.0.109", ] @@ -2271,7 +2250,7 @@ dependencies = [ "libc", "once_cell", "pin-project-lite", - "smallvec 1.15.1", + "smallvec", "thiserror 1.0.69", ] @@ -2307,7 +2286,7 @@ dependencies = [ "libc", "memchr", "once_cell", - "smallvec 1.15.1", + "smallvec", "thiserror 1.0.69", ] @@ -2620,13 +2599,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] -name = "home" -version = "0.5.12" +name = "hmac-sha256" +version = "1.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" -dependencies = [ - "windows-sys 0.61.2", -] +checksum = "ec9d92d097f4749b64e8cc33d924d9f40a2d4eb91402b458014b781f5733d60f" [[package]] name = "hound" @@ -2715,7 +2691,7 @@ dependencies = [ "itoa", "pin-project-lite", "pin-utils", - "smallvec 1.15.1", + "smallvec", "tokio", "want", ] @@ -2833,7 +2809,7 @@ dependencies = [ "icu_normalizer_data", "icu_properties", "icu_provider", - "smallvec 1.15.1", + "smallvec", "zerovec", ] @@ -2895,7 +2871,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", - "smallvec 1.15.1", + "smallvec", "utf8_iter", ] @@ -3175,12 +3151,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "libappindicator" version = "0.9.0" @@ -3295,6 +3265,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" +[[package]] +name = "lzma-rust2" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1670343e58806300d87950e3401e820b519b9384281bbabfb15e3636689ffd69" + [[package]] name = "mac" version = "0.1.1" @@ -3514,9 +3490,9 @@ dependencies = [ [[package]] name = "ndarray" -version = "0.16.1" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" +checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" dependencies = [ "matrixmultiply", "num-complex", @@ -4104,26 +4080,25 @@ dependencies = [ [[package]] name = "ort" -version = "2.0.0-rc.10" +version = "2.0.0-rc.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa7e49bd669d32d7bc2a15ec540a527e7764aec722a45467814005725bcd721" +checksum = "d7de3af33d24a745ffb8fab904b13478438d1cd52868e6f17735ef6e1f8bf133" dependencies = [ "ndarray", "ort-sys", - "smallvec 2.0.0-alpha.10", + "smallvec", "tracing", + "ureq", ] [[package]] name = "ort-sys" -version = "2.0.0-rc.10" +version = "2.0.0-rc.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2aba9f5c7c479925205799216e7e5d07cc1d4fa76ea8058c60a9a30f6a4e890" +checksum = "d7b497d21a8b6fbb4b5a544f8fadb77e801a09ae0add9e411d31c6f89e3c1e90" dependencies = [ - "flate2", - "pkg-config", - "sha2", - "tar", + "hmac-sha256", + "lzma-rust2", "ureq", ] @@ -4213,7 +4188,7 @@ dependencies = [ "cfg-if", "libc", "redox_syscall", - "smallvec 1.15.1", + "smallvec", "windows-link 0.2.1", ] @@ -4725,7 +4700,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "socket2", "thiserror 2.0.17", @@ -4746,7 +4721,7 @@ dependencies = [ "lru-slab", "rand 0.9.2", "ring", - "rustc-hash 2.1.1", + "rustc-hash", "rustls", "rustls-pki-types", "slab", @@ -5215,7 +5190,7 @@ dependencies = [ "fallible-streaming-iterator", "hashlink", "libsqlite3-sys", - "smallvec 1.15.1", + "smallvec", "sqlite-wasm-rs", ] @@ -5255,12 +5230,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.1.1" @@ -5539,7 +5508,7 @@ dependencies = [ "phf_codegen 0.8.0", "precomputed-hash", "servo_arc", - "smallvec 1.15.1", + "smallvec", ] [[package]] @@ -5793,12 +5762,6 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -[[package]] -name = "smallvec" -version = "2.0.0-alpha.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d44cfb396c3caf6fbfd0ab422af02631b69ddd96d2eff0b0f0724f9024051b" - [[package]] name = "socket2" version = "0.6.1" @@ -7234,9 +7197,9 @@ dependencies = [ [[package]] name = "transcribe-rs" -version = "0.2.9" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6fe25a69f6aa6ca4a8cb1dfc201e94c4cc7acb43f69353aa9ebb6aa4d0d26a9" +checksum = "fb9d5cfb8e68a2a8fc27fb548987473221d3cc3776a331ccde46c8bd8b2eda0f" dependencies = [ "base64 0.22.1", "derive_builder", @@ -7509,8 +7472,8 @@ dependencies = [ [[package]] name = "vad-rs" -version = "0.1.5" -source = "git+https://github.com/cjpais/vad-rs#88b3a01f72f83a5d80d0e7ea9bacfc0d897fd03f" +version = "0.1.6" +source = "git+https://github.com/cjpais/vad-rs#2a412ed858695b9251f3f5a1a20d95b59fa7c498" dependencies = [ "eyre", "ndarray", @@ -7689,7 +7652,7 @@ dependencies = [ "cc", "downcast-rs", "rustix 1.1.4", - "smallvec 1.15.1", + "smallvec", "wayland-sys", ] @@ -7865,37 +7828,27 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3" -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.44", -] - [[package]] name = "whisper-rs" -version = "0.13.2" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6fc553156b521663bfa8e713e7ad58c7ca262d46de9998cd7f2e4de5ba0d9" +checksum = "2088172d00f936c348d6a72f488dc2660ab3f507263a195df308a3c2383229f6" dependencies = [ + "libc", "whisper-rs-sys", ] [[package]] name = "whisper-rs-sys" -version = "0.11.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bab42b2c319e3a1e0280137c59368072348d3277873c7588b6466a127dca58" +checksum = "6986c0fe081241d391f09b9a071fbcbb59720c3563628c3c829057cf69f2a56f" dependencies = [ - "bindgen 0.69.5", + "bindgen", "cfg-if", "cmake", "fs_extra", + "semver", ] [[package]] diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index cdf05877..5e0ccb88 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -72,7 +72,7 @@ rusqlite = { version = "0.38", features = ["bundled"] } tar = "0.4.44" flate2 = "1.0" uuid = { version = "1", features = ["v4"] } -transcribe-rs = { version = "0.2.9", features = ["whisper", "parakeet", "moonshine", "sense_voice"] } +transcribe-rs = { version = "0.3.8", features = ["whisper-cpp", "onnx"] } handy-keys = "0.2.2" ferrous-opencc = "0.3.0" clap = { version = "4", features = ["derive"] } @@ -92,6 +92,7 @@ tauri-plugin-single-instance = "2.4.0" tauri-plugin-updater = "2.10.0" [target.'cfg(windows)'.dependencies] +transcribe-rs = { version = "0.3.8", features = ["whisper-vulkan", "ort-directml"] } windows = { version = "0.61", features = [ "Win32_Media_Audio_Endpoints", "Win32_System_Com_StructuredStorage", @@ -106,10 +107,12 @@ coreaudio-sys = "0.2" objc2 = "0.6.3" objc2-app-kit = { version = "0.3.2", features = ["NSScreen", "NSGraphics"] } objc2-foundation = { version = "0.3.2", features = ["NSGeometry"] } +transcribe-rs = { version = "0.3.8", features = ["whisper-metal"] } [target.'cfg(target_os = "linux")'.dependencies] gtk-layer-shell = { version = "0.8", features = ["v0_6"] } gtk = "0.18" +transcribe-rs = { version = "0.3.8", features = ["whisper-vulkan"] } [patch.crates-io] tauri-runtime = { git = "https://github.com/cjpais/tauri.git", branch = "handy-2.9.1" } diff --git a/src-tauri/src/commands/models.rs b/src-tauri/src/commands/models.rs index 898e270a..ea4cf001 100644 --- a/src-tauri/src/commands/models.rs +++ b/src-tauri/src/commands/models.rs @@ -1,4 +1,4 @@ -use crate::managers::model::{ModelInfo, ModelManager}; +use crate::managers::model::{EngineType, ModelInfo, ModelManager}; use crate::managers::transcription::TranscriptionManager; use crate::settings::{get_settings, write_settings, SttProviderType}; use crate::stt_provider::{cloud_provider_registry, SttProviderInfo}; @@ -80,6 +80,16 @@ pub async fn set_active_model( // (emitted during load_model) read the correct selected_model let mut settings = get_settings(&app_handle); settings.selected_model = model_id.clone(); + let selected_language_supported = settings.selected_language == "auto" + || model_info.supported_languages.is_empty() + || model_info + .supported_languages + .contains(&settings.selected_language); + if !supports_auto_language(&model_info.engine_type) + && (settings.selected_language == "auto" || !selected_language_supported) + { + settings.selected_language = default_language_for_model(&model_info); + } write_settings(&app_handle, settings); // Load the model in the transcription manager @@ -90,6 +100,20 @@ pub async fn set_active_model( Ok(()) } +fn supports_auto_language(engine_type: &EngineType) -> bool { + matches!(engine_type, EngineType::Whisper | EngineType::SenseVoice) +} + +fn default_language_for_model(model_info: &ModelInfo) -> String { + model_info + .supported_languages + .iter() + .find(|language| language.as_str() == "en") + .or_else(|| model_info.supported_languages.first()) + .cloned() + .unwrap_or_else(|| "en".to_string()) +} + #[tauri::command] #[specta::specta] pub async fn get_current_model(app_handle: AppHandle) -> Result { diff --git a/src-tauri/src/managers/model.rs b/src-tauri/src/managers/model.rs index 25beaf20..ab645c5d 100644 --- a/src-tauri/src/managers/model.rs +++ b/src-tauri/src/managers/model.rs @@ -23,6 +23,9 @@ pub enum EngineType { Moonshine, MoonshineStreaming, SenseVoice, + GigaAM, + Canary, + Cohere, } #[derive(Debug, Clone, Serialize, Deserialize, Type)] @@ -398,6 +401,121 @@ impl ModelManager { }, ); + let gigaam_languages: Vec = vec!["ru"].into_iter().map(String::from).collect(); + + available_models.insert( + "gigaam-v3-e2e-ctc".to_string(), + ModelInfo { + id: "gigaam-v3-e2e-ctc".to_string(), + name: "GigaAM v3".to_string(), + description: "onboarding.models.gigaam-v3-e2e-ctc.description".to_string(), + filename: "giga-am-v3-int8".to_string(), + url: Some("https://blob.handy.computer/giga-am-v3-int8.tar.gz".to_string()), + size_mb: 151, + is_downloaded: false, + is_downloading: false, + partial_size: 0, + is_directory: true, + engine_type: EngineType::GigaAM, + accuracy_score: 0.85, + speed_score: 0.75, + supports_translation: false, + is_recommended: false, + supported_languages: gigaam_languages, + is_custom: false, + }, + ); + + let canary_flash_languages: Vec = vec!["en", "de", "es", "fr"] + .into_iter() + .map(String::from) + .collect(); + + available_models.insert( + "canary-180m-flash".to_string(), + ModelInfo { + id: "canary-180m-flash".to_string(), + name: "Canary 180M Flash".to_string(), + description: "onboarding.models.canary-180m-flash.description".to_string(), + filename: "canary-180m-flash".to_string(), + url: Some("https://blob.handy.computer/canary-180m-flash.tar.gz".to_string()), + size_mb: 146, + is_downloaded: false, + is_downloading: false, + partial_size: 0, + is_directory: true, + engine_type: EngineType::Canary, + accuracy_score: 0.75, + speed_score: 0.85, + supports_translation: true, + is_recommended: false, + supported_languages: canary_flash_languages, + is_custom: false, + }, + ); + + let canary_1b_languages: Vec = vec![ + "bg", "hr", "cs", "da", "nl", "en", "et", "fi", "fr", "de", "el", "hu", "it", "lv", + "lt", "mt", "pl", "pt", "ro", "sk", "sl", "es", "sv", "ru", "uk", + ] + .into_iter() + .map(String::from) + .collect(); + + available_models.insert( + "canary-1b-v2".to_string(), + ModelInfo { + id: "canary-1b-v2".to_string(), + name: "Canary 1B v2".to_string(), + description: "onboarding.models.canary-1b-v2.description".to_string(), + filename: "canary-1b-v2".to_string(), + url: Some("https://blob.handy.computer/canary-1b-v2.tar.gz".to_string()), + size_mb: 691, + is_downloaded: false, + is_downloading: false, + partial_size: 0, + is_directory: true, + engine_type: EngineType::Canary, + accuracy_score: 0.85, + speed_score: 0.70, + supports_translation: true, + is_recommended: false, + supported_languages: canary_1b_languages, + is_custom: false, + }, + ); + + let cohere_languages: Vec = vec![ + "en", "fr", "de", "it", "es", "pt", "el", "nl", "pl", "zh", "zh-Hans", "zh-Hant", "ja", + "ko", "vi", "ar", + ] + .into_iter() + .map(String::from) + .collect(); + + available_models.insert( + "cohere-int8".to_string(), + ModelInfo { + id: "cohere-int8".to_string(), + name: "Cohere".to_string(), + description: "onboarding.models.cohere-int8.description".to_string(), + filename: "cohere-int8".to_string(), + url: Some("https://blob.handy.computer/cohere-int8.tar.gz".to_string()), + size_mb: 1708, + is_downloaded: false, + is_downloading: false, + partial_size: 0, + is_directory: true, + engine_type: EngineType::Cohere, + accuracy_score: 0.90, + speed_score: 0.60, + supports_translation: false, + is_recommended: false, + supported_languages: cohere_languages, + is_custom: false, + }, + ); + // Auto-discover custom Whisper models (.bin files) in the models directory if let Err(e) = Self::discover_custom_whisper_models(&models_dir, &mut available_models) { warn!("Failed to discover custom models: {}", e); diff --git a/src-tauri/src/managers/transcription.rs b/src-tauri/src/managers/transcription.rs index 77ef2783..0ee41764 100644 --- a/src-tauri/src/managers/transcription.rs +++ b/src-tauri/src/managers/transcription.rs @@ -11,21 +11,17 @@ use std::thread; use std::time::{Duration, SystemTime}; use tauri::{AppHandle, Emitter}; use transcribe_rs::{ - engines::{ - moonshine::{ - ModelVariant, MoonshineEngine, MoonshineModelParams, MoonshineStreamingEngine, - StreamingModelParams, - }, - parakeet::{ - ParakeetEngine, ParakeetInferenceParams, ParakeetModelParams, TimestampGranularity, - }, - sense_voice::{ - Language as SenseVoiceLanguage, SenseVoiceEngine, SenseVoiceInferenceParams, - SenseVoiceModelParams, - }, - whisper::{WhisperEngine, WhisperInferenceParams}, + onnx::{ + canary::CanaryModel, + cohere::CohereModel, + gigaam::GigaAMModel, + moonshine::{MoonshineModel, MoonshineVariant, StreamingModel}, + parakeet::{ParakeetModel, ParakeetParams, TimestampGranularity}, + sense_voice::{SenseVoiceModel, SenseVoiceParams}, + Quantization, }, - TranscriptionEngine, + whisper_cpp::{WhisperEngine, WhisperInferenceParams}, + SpeechModel, TranscribeOptions, }; #[derive(Clone, Debug, Serialize)] @@ -38,10 +34,13 @@ pub struct ModelStateEvent { enum LoadedEngine { Whisper(WhisperEngine), - Parakeet(ParakeetEngine), - Moonshine(MoonshineEngine), - MoonshineStreaming(MoonshineStreamingEngine), - SenseVoice(SenseVoiceEngine), + Parakeet(ParakeetModel), + Moonshine(MoonshineModel), + MoonshineStreaming(StreamingModel), + SenseVoice(SenseVoiceModel), + GigaAM(GigaAMModel), + Canary(CanaryModel), + Cohere(CohereModel), } #[derive(Clone)] @@ -164,15 +163,6 @@ impl TranscriptionManager { { let mut engine = self.lock_engine(); - if let Some(ref mut loaded_engine) = *engine { - match loaded_engine { - LoadedEngine::Whisper(ref mut e) => e.unload_model(), - LoadedEngine::Parakeet(ref mut e) => e.unload_model(), - LoadedEngine::Moonshine(ref mut e) => e.unload_model(), - LoadedEngine::MoonshineStreaming(ref mut e) => e.unload_model(), - LoadedEngine::SenseVoice(ref mut e) => e.unload_model(), - } - } *engine = None; // Drop the engine to free memory } { @@ -270,43 +260,49 @@ impl TranscriptionManager { // Create appropriate engine based on model type let loaded_engine = match model_info.engine_type { EngineType::Whisper => { - let mut engine = WhisperEngine::new(); - engine - .load_model(&model_path) + let engine = WhisperEngine::load(&model_path) .map_err(|e| emit_load_failure("whisper", &e))?; LoadedEngine::Whisper(engine) } EngineType::Parakeet => { - let mut engine = ParakeetEngine::new(); - engine - .load_model_with_params(&model_path, ParakeetModelParams::int8()) + let engine = ParakeetModel::load(&model_path, &Quantization::Int8) .map_err(|e| emit_load_failure("parakeet", &e))?; LoadedEngine::Parakeet(engine) } EngineType::Moonshine => { - let mut engine = MoonshineEngine::new(); - engine - .load_model_with_params( - &model_path, - MoonshineModelParams::variant(ModelVariant::Base), - ) - .map_err(|e| emit_load_failure("moonshine", &e))?; + let engine = MoonshineModel::load( + &model_path, + MoonshineVariant::Base, + &Quantization::default(), + ) + .map_err(|e| emit_load_failure("moonshine", &e))?; LoadedEngine::Moonshine(engine) } EngineType::MoonshineStreaming => { - let mut engine = MoonshineStreamingEngine::new(); - engine - .load_model_with_params(&model_path, StreamingModelParams::default()) + let engine = StreamingModel::load(&model_path, 0, &Quantization::default()) .map_err(|e| emit_load_failure("moonshine streaming", &e))?; LoadedEngine::MoonshineStreaming(engine) } EngineType::SenseVoice => { - let mut engine = SenseVoiceEngine::new(); - engine - .load_model_with_params(&model_path, SenseVoiceModelParams::int8()) + let engine = SenseVoiceModel::load(&model_path, &Quantization::Int8) .map_err(|e| emit_load_failure("SenseVoice", &e))?; LoadedEngine::SenseVoice(engine) } + EngineType::GigaAM => { + let engine = GigaAMModel::load(&model_path, &Quantization::Int8) + .map_err(|e| emit_load_failure("gigaam", &e))?; + LoadedEngine::GigaAM(engine) + } + EngineType::Canary => { + let engine = CanaryModel::load(&model_path, &Quantization::Int8) + .map_err(|e| emit_load_failure("canary", &e))?; + LoadedEngine::Canary(engine) + } + EngineType::Cohere => { + let engine = CohereModel::load(&model_path, &Quantization::Int8) + .map_err(|e| emit_load_failure("cohere", &e))?; + LoadedEngine::Cohere(engine) + } }; // Update the current engine and model ID @@ -504,6 +500,71 @@ impl TranscriptionManager { } } + let model_info = self.model_manager.get_model_info(&settings.selected_model); + let supports_auto_language = model_info + .as_ref() + .map(|info| { + matches!( + info.engine_type, + EngineType::Whisper | EngineType::SenseVoice + ) + }) + .unwrap_or(true); + let fallback_language = || { + model_info + .as_ref() + .and_then(|info| { + info.supported_languages + .iter() + .find(|language| language.as_str() == "en") + .or_else(|| info.supported_languages.first()) + }) + .cloned() + .unwrap_or_else(|| "en".to_string()) + }; + + let validated_language = if settings.selected_language == "auto" { + if supports_auto_language { + "auto".to_string() + } else { + let fallback_language = fallback_language(); + warn!( + "Current model does not support automatic language detection, falling back to '{}'", + fallback_language + ); + fallback_language + } + } else { + let is_supported = model_info + .as_ref() + .map(|info| { + info.supported_languages.is_empty() + || info + .supported_languages + .contains(&settings.selected_language) + }) + .unwrap_or(true); + + if is_supported { + settings.selected_language.clone() + } else { + if supports_auto_language { + warn!( + "Language '{}' not supported by current model, falling back to auto-detect", + settings.selected_language + ); + "auto".to_string() + } else { + let fallback_language = fallback_language(); + warn!( + "Language '{}' not supported by current model, falling back to '{}'", + settings.selected_language, fallback_language + ); + fallback_language + } + } + }; + // Perform transcription with the appropriate engine. // We use catch_unwind to prevent engine panics from poisoning the mutex, // which would make the app hang indefinitely on subsequent operations. @@ -529,15 +590,15 @@ impl TranscriptionManager { || -> Result { match &mut engine { LoadedEngine::Whisper(whisper_engine) => { - let whisper_language = if settings.selected_language == "auto" { + let whisper_language = if validated_language == "auto" { None } else { - let normalized = if settings.selected_language == "zh-Hans" - || settings.selected_language == "zh-Hant" + let normalized = if validated_language == "zh-Hans" + || validated_language == "zh-Hant" { "zh".to_string() } else { - settings.selected_language.clone() + validated_language.clone() }; Some(normalized) }; @@ -549,46 +610,83 @@ impl TranscriptionManager { }; whisper_engine - .transcribe_samples(audio, Some(params)) + .transcribe_with(&audio, ¶ms) .map_err(|e| anyhow::anyhow!("Whisper transcription failed: {}", e)) } LoadedEngine::Parakeet(parakeet_engine) => { - let params = ParakeetInferenceParams { - timestamp_granularity: TimestampGranularity::Segment, + let params = ParakeetParams { + timestamp_granularity: Some(TimestampGranularity::Segment), + ..Default::default() }; parakeet_engine - .transcribe_samples(audio, Some(params)) + .transcribe_with(&audio, ¶ms) .map_err(|e| { anyhow::anyhow!("Parakeet transcription failed: {}", e) }) } LoadedEngine::Moonshine(moonshine_engine) => moonshine_engine - .transcribe_samples(audio, None) + .transcribe(&audio, &TranscribeOptions::default()) .map_err(|e| anyhow::anyhow!("Moonshine transcription failed: {}", e)), LoadedEngine::MoonshineStreaming(streaming_engine) => streaming_engine - .transcribe_samples(audio, None) + .transcribe(&audio, &TranscribeOptions::default()) .map_err(|e| { anyhow::anyhow!("Moonshine streaming transcription failed: {}", e) }), LoadedEngine::SenseVoice(sense_voice_engine) => { - let language = match settings.selected_language.as_str() { - "zh" | "zh-Hans" | "zh-Hant" => SenseVoiceLanguage::Chinese, - "en" => SenseVoiceLanguage::English, - "ja" => SenseVoiceLanguage::Japanese, - "ko" => SenseVoiceLanguage::Korean, - "yue" => SenseVoiceLanguage::Cantonese, - _ => SenseVoiceLanguage::Auto, + let language = match validated_language.as_str() { + "zh" | "zh-Hans" | "zh-Hant" => Some("zh".to_string()), + "en" => Some("en".to_string()), + "ja" => Some("ja".to_string()), + "ko" => Some("ko".to_string()), + "yue" => Some("yue".to_string()), + _ => None, }; - let params = SenseVoiceInferenceParams { + let params = SenseVoiceParams { language, - use_itn: true, + use_itn: Some(true), }; sense_voice_engine - .transcribe_samples(audio, Some(params)) + .transcribe_with(&audio, ¶ms) .map_err(|e| { anyhow::anyhow!("SenseVoice transcription failed: {}", e) }) } + LoadedEngine::GigaAM(gigaam_engine) => gigaam_engine + .transcribe(&audio, &TranscribeOptions::default()) + .map_err(|e| anyhow::anyhow!("GigaAM transcription failed: {}", e)), + LoadedEngine::Canary(canary_engine) => { + let language = if validated_language == "auto" { + None + } else { + Some(validated_language.clone()) + }; + let options = TranscribeOptions { + language, + translate: settings.translate_to_english, + ..Default::default() + }; + canary_engine + .transcribe(&audio, &options) + .map_err(|e| anyhow::anyhow!("Canary transcription failed: {}", e)) + } + LoadedEngine::Cohere(cohere_engine) => { + let language = if validated_language == "auto" { + None + } else if validated_language == "zh-Hans" + || validated_language == "zh-Hant" + { + Some("zh".to_string()) + } else { + Some(validated_language.clone()) + }; + let options = TranscribeOptions { + language, + ..Default::default() + }; + cohere_engine + .transcribe(&audio, &options) + .map_err(|e| anyhow::anyhow!("Cohere transcription failed: {}", e)) + } } }, )); diff --git a/src/bindings.ts b/src/bindings.ts index 50be37d2..12daf492 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -961,7 +961,7 @@ export type CloudOptionType = { type: "Text" } | { type: "Number"; min: number; export type CloudProviderOption = { key: string; label: string; option_type: CloudOptionType; description: string; default_value?: CloudOptionDefault | null } export type CustomSounds = { start: boolean; stop: boolean } export type DailySpeakingStats = { date: string; total_word_count: number; total_duration_ms: number; transcription_count: number; avg_wpm: number } -export type EngineType = "Whisper" | "Parakeet" | "Moonshine" | "MoonshineStreaming" | "SenseVoice" +export type EngineType = "Whisper" | "Parakeet" | "Moonshine" | "MoonshineStreaming" | "SenseVoice" | "GigaAM" | "Canary" | "Cohere" export type HistoryEntry = { id: number; file_name: string; timestamp: number; saved: boolean; title: string; transcription_text: string; post_processed_text: string | null; post_process_prompt: string | null } export type HistoryPage = { entries: HistoryEntry[]; total_count: number } /** diff --git a/src/components/onboarding/ModelCard.tsx b/src/components/onboarding/ModelCard.tsx index 0d3189f1..2d900eb6 100644 --- a/src/components/onboarding/ModelCard.tsx +++ b/src/components/onboarding/ModelCard.tsx @@ -55,6 +55,7 @@ interface ModelCardProps { showSettings?: boolean; supportedLanguages?: string[]; supportsTranslation?: boolean; + supportsAutoLanguage?: boolean; } const ModelCard: React.FC = ({ @@ -75,6 +76,7 @@ const ModelCard: React.FC = ({ showSettings = false, supportedLanguages, supportsTranslation = false, + supportsAutoLanguage = true, }) => { const { t } = useTranslation(); const [expanded, setExpanded] = useState(false); @@ -86,11 +88,17 @@ const ModelCard: React.FC = ({ (lang) => !supportedLanguages || supportedLanguages.length === 0 || - lang.value === "auto" || + (supportsAutoLanguage && lang.value === "auto") || supportedLanguages.includes(lang.value), ).map((lang) => ({ value: lang.value, label: lang.label })), - [supportedLanguages], + [supportedLanguages, supportsAutoLanguage], ); + const selectedLanguage = getSetting("selected_language") || "auto"; + const selectedLanguageValue = languageOptions.some( + (option) => option.value === selectedLanguage, + ) + ? selectedLanguage + : (languageOptions[0]?.value ?? selectedLanguage); const isFeatured = variant === "featured"; const isCloud = provider.backend.type === "Cloud"; const isLocal = provider.backend.type === "Local"; @@ -193,7 +201,7 @@ const ModelCard: React.FC = ({ {t("settings.general.language.title")} updateSetting("selected_language", val)} placeholder={t("settings.general.language.auto")} diff --git a/src/i18n/locales/ar/translation.json b/src/i18n/locales/ar/translation.json index 612ba534..3ec72d12 100644 --- a/src/i18n/locales/ar/translation.json +++ b/src/i18n/locales/ar/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "سريع جداً. الصينية، الإنجليزية، اليابانية، الكورية، الكانتونية." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/cs/translation.json b/src/i18n/locales/cs/translation.json index 7eddc5c8..50f89e33 100644 --- a/src/i18n/locales/cs/translation.json +++ b/src/i18n/locales/cs/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Velmi rychlý. Čínština, angličtina, japonština, korejština, kantonština." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/de/translation.json b/src/i18n/locales/de/translation.json index c5e095b0..15044d55 100644 --- a/src/i18n/locales/de/translation.json +++ b/src/i18n/locales/de/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Sehr schnell. Chinesisch, Englisch, Japanisch, Koreanisch, Kantonesisch." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index 294cbb01..55bb159a 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Very fast. Chinese, English, Japanese, Korean, Cantonese." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "cloud": { diff --git a/src/i18n/locales/es/translation.json b/src/i18n/locales/es/translation.json index 3cc724b3..e76f013c 100644 --- a/src/i18n/locales/es/translation.json +++ b/src/i18n/locales/es/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Muy rápido. Chino, inglés, japonés, coreano, cantonés." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/fr/translation.json b/src/i18n/locales/fr/translation.json index ca101484..fabe7929 100644 --- a/src/i18n/locales/fr/translation.json +++ b/src/i18n/locales/fr/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Très rapide. Chinois, anglais, japonais, coréen, cantonais." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/it/translation.json b/src/i18n/locales/it/translation.json index 13ae3b1c..95f7acb3 100644 --- a/src/i18n/locales/it/translation.json +++ b/src/i18n/locales/it/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Molto veloce. Cinese, inglese, giapponese, coreano, cantonese." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/ja/translation.json b/src/i18n/locales/ja/translation.json index a9d6dfcd..9f152810 100644 --- a/src/i18n/locales/ja/translation.json +++ b/src/i18n/locales/ja/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "非常に高速。中国語、英語、日本語、韓国語、広東語。" + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/ko/translation.json b/src/i18n/locales/ko/translation.json index fc6f1eb4..01e6ebc8 100644 --- a/src/i18n/locales/ko/translation.json +++ b/src/i18n/locales/ko/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "매우 빠름. 중국어, 영어, 일본어, 한국어, 광둥어." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/pl/translation.json b/src/i18n/locales/pl/translation.json index 6bf4cc5c..d445a83a 100644 --- a/src/i18n/locales/pl/translation.json +++ b/src/i18n/locales/pl/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Bardzo szybki. Chiński, angielski, japoński, koreański, kantoński." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/pt/translation.json b/src/i18n/locales/pt/translation.json index 4e25b74a..5529bcac 100644 --- a/src/i18n/locales/pt/translation.json +++ b/src/i18n/locales/pt/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Muito rápido. Chinês, inglês, japonês, coreano, cantonês." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/ru/translation.json b/src/i18n/locales/ru/translation.json index 78b350d2..2e30de26 100644 --- a/src/i18n/locales/ru/translation.json +++ b/src/i18n/locales/ru/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Очень быстрый. Китайский, английский, японский, корейский, кантонский." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/tr/translation.json b/src/i18n/locales/tr/translation.json index 7eb75ae0..7bbd5250 100644 --- a/src/i18n/locales/tr/translation.json +++ b/src/i18n/locales/tr/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Çok hızlı. Çince, İngilizce, Japonca, Korece, Kantonca." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/uk/translation.json b/src/i18n/locales/uk/translation.json index d25d2d35..844c462c 100644 --- a/src/i18n/locales/uk/translation.json +++ b/src/i18n/locales/uk/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Дуже швидкий. Китайська, англійська, японська, корейська, кантонська." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/vi/translation.json b/src/i18n/locales/vi/translation.json index c13e8506..f5b087af 100644 --- a/src/i18n/locales/vi/translation.json +++ b/src/i18n/locales/vi/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "Rất nhanh. Tiếng Trung, tiếng Anh, tiếng Nhật, tiếng Hàn, tiếng Quảng Đông." + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/zh-TW/translation.json b/src/i18n/locales/zh-TW/translation.json index fc732d40..fad0ebbc 100644 --- a/src/i18n/locales/zh-TW/translation.json +++ b/src/i18n/locales/zh-TW/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "速度極快。支援中文、英語、日語、韓語、粵語。" + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/i18n/locales/zh/translation.json b/src/i18n/locales/zh/translation.json index 1f175207..5138458e 100644 --- a/src/i18n/locales/zh/translation.json +++ b/src/i18n/locales/zh/translation.json @@ -77,6 +77,22 @@ "sense-voice-int8": { "name": "SenseVoice", "description": "非常快速。支持中文、英语、日语、韩语、粤语。" + }, + "gigaam-v3-e2e-ctc": { + "name": "GigaAM v3", + "description": "Russian speech recognition. Fast and accurate." + }, + "canary-180m-flash": { + "name": "Canary 180M Flash", + "description": "Very fast. English, German, Spanish, French. Supports translation." + }, + "canary-1b-v2": { + "name": "Canary 1B v2", + "description": "Accurate multilingual. 25 European languages. Supports translation." + }, + "cohere-int8": { + "name": "Cohere", + "description": "A large, slower, but very accurate multilingual model." } }, "errors": { diff --git a/src/lib/utils/modelSettings.ts b/src/lib/utils/modelSettings.ts index ceea93f9..8ecd09fa 100644 --- a/src/lib/utils/modelSettings.ts +++ b/src/lib/utils/modelSettings.ts @@ -9,11 +9,19 @@ export const getModelSettingsProps = (provider: SttProviderInfo) => { provider.backend.type === "Local" ? provider.backend.engine_type : undefined; + const supportsAutoLanguage = + provider.backend.type !== "Local" || + engineType === "Whisper" || + engineType === "SenseVoice"; const supportsLanguageSelection = - engineType === "Whisper" || engineType === "SenseVoice"; + engineType === "Whisper" || + engineType === "SenseVoice" || + engineType === "Canary" || + engineType === "Cohere"; return { showSettings: supportsLanguageSelection || provider.supports_translation, supportedLanguages: provider.supported_languages, supportsTranslation: provider.supports_translation, + supportsAutoLanguage, }; };