diff --git a/Cargo.lock b/Cargo.lock
index ffcc64c5e..1846f49df 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -621,6 +621,17 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+[[package]]
+name = "audiopus_sys"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62314a1546a2064e033665d658e88c620a62904be945f8147e6b16c3db9f8651"
+dependencies = [
+ "cmake",
+ "log",
+ "pkg-config",
+]
+
[[package]]
name = "autocfg"
version = "1.5.0"
@@ -640,7 +651,7 @@ dependencies = [
"log",
"num-rational",
"num-traits",
- "pastey 0.1.1",
+ "pastey",
"rayon",
"thiserror 2.0.18",
"v_frame",
@@ -702,10 +713,10 @@ dependencies = [
"bytes",
"form_urlencoded",
"futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
"http-body-util",
- "hyper 1.8.1",
+ "hyper",
"hyper-util",
"itoa",
"matchit",
@@ -718,7 +729,7 @@ dependencies = [
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
- "sync_wrapper 1.0.2",
+ "sync_wrapper",
"tokio",
"tower",
"tower-layer",
@@ -734,12 +745,12 @@ checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
dependencies = [
"bytes",
"futures-core",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
"http-body-util",
"mime",
"pin-project-lite",
- "sync_wrapper 1.0.2",
+ "sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
@@ -751,12 +762,6 @@ version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
-[[package]]
-name = "base64"
-version = "0.21.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
-
[[package]]
name = "base64"
version = "0.22.1"
@@ -782,6 +787,26 @@ dependencies = [
"num-traits",
]
+[[package]]
+name = "bindgen"
+version = "0.71.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
+dependencies = [
+ "bitflags 2.10.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn 2.0.114",
+]
+
[[package]]
name = "bit_field"
version = "0.10.3"
@@ -895,9 +920,9 @@ dependencies = [
"futures-core",
"futures-util",
"hex",
- "http 1.4.0",
+ "http",
"http-body-util",
- "hyper 1.8.1",
+ "hyper",
"hyper-named-pipe",
"hyper-util",
"hyperlocal",
@@ -1112,10 +1137,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0"
[[package]]
-name = "cesu8"
-version = "1.1.0"
+name = "cexpr"
+version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom 7.1.3",
+]
[[package]]
name = "cff-parser"
@@ -1151,7 +1179,7 @@ dependencies = [
"futures",
"futures-timer",
"pin-project-lite",
- "reqwest 0.12.28",
+ "reqwest",
"serde",
"serde_json",
"thiserror 1.0.69",
@@ -1235,6 +1263,17 @@ dependencies = [
"inout",
]
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
[[package]]
name = "clap"
version = "4.5.58"
@@ -1305,16 +1344,6 @@ dependencies = [
"windows-sys 0.61.2",
]
-[[package]]
-name = "combine"
-version = "4.6.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd"
-dependencies = [
- "bytes",
- "memchr",
-]
-
[[package]]
name = "comfy-table"
version = "7.2.2"
@@ -1399,6 +1428,19 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "console"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4"
+dependencies = [
+ "encode_unicode",
+ "libc",
+ "once_cell",
+ "unicode-width",
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "const-oid"
version = "0.9.6"
@@ -1455,6 +1497,35 @@ dependencies = [
"unicode-segmentation",
]
+[[package]]
+name = "cookie"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747"
+dependencies = [
+ "percent-encoding",
+ "time",
+ "version_check",
+]
+
+[[package]]
+name = "cookie_store"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15b2c103cf610ec6cae3da84a766285b42fd16aad564758459e6ecf128c75206"
+dependencies = [
+ "cookie",
+ "document-features",
+ "idna",
+ "indexmap 2.13.0",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "time",
+ "url",
+]
+
[[package]]
name = "core-foundation"
version = "0.9.4"
@@ -1620,7 +1691,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73736a89c4aff73035ba2ed2e565061954da00d4970fc9ac25dcc85a2a20d790"
dependencies = [
"dispatch2",
- "nix 0.30.1",
+ "nix",
"windows-sys 0.61.2",
]
@@ -1653,16 +1724,6 @@ dependencies = [
"darling_macro 0.21.3",
]
-[[package]]
-name = "darling"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
-dependencies = [
- "darling_core 0.23.0",
- "darling_macro 0.23.0",
-]
-
[[package]]
name = "darling_core"
version = "0.20.11"
@@ -1691,19 +1752,6 @@ dependencies = [
"syn 2.0.114",
]
-[[package]]
-name = "darling_core"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
-dependencies = [
- "ident_case",
- "proc-macro2",
- "quote",
- "strsim 0.11.1",
- "syn 2.0.114",
-]
-
[[package]]
name = "darling_macro"
version = "0.20.11"
@@ -1726,17 +1774,6 @@ dependencies = [
"syn 2.0.114",
]
-[[package]]
-name = "darling_macro"
-version = "0.23.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
-dependencies = [
- "darling_core 0.23.0",
- "quote",
- "syn 2.0.114",
-]
-
[[package]]
name = "dary_heap"
version = "0.3.8"
@@ -2537,7 +2574,7 @@ version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de"
dependencies = [
- "console",
+ "console 0.15.11",
"shell-words",
"tempfile",
"thiserror 1.0.69",
@@ -2609,6 +2646,15 @@ dependencies = [
"const-random",
]
+[[package]]
+name = "document-features"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
+dependencies = [
+ "litrs",
+]
+
[[package]]
name = "dotenvy"
version = "0.15.7"
@@ -2849,6 +2895,12 @@ dependencies = [
"zune-inflate",
]
+[[package]]
+name = "extended"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af9673d8203fcb076b19dfd17e38b3d4ae9f44959416ea532ce72415a6020365"
+
[[package]]
name = "fast-float2"
version = "0.2.3"
@@ -2868,7 +2920,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04c269a76bfc6cea69553b7d040acb16c793119cebd97c756d21e08d0f075ff8"
dependencies = [
"anyhow",
- "hf-hub",
+ "hf-hub 0.4.3",
"image",
"ndarray",
"ort",
@@ -3402,25 +3454,6 @@ dependencies = [
"regex-syntax",
]
-[[package]]
-name = "h2"
-version = "0.3.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
-dependencies = [
- "bytes",
- "fnv",
- "futures-core",
- "futures-sink",
- "futures-util",
- "http 0.2.12",
- "indexmap 2.13.0",
- "slab",
- "tokio",
- "tokio-util",
- "tracing",
-]
-
[[package]]
name = "h2"
version = "0.4.13"
@@ -3432,7 +3465,7 @@ dependencies = [
"fnv",
"futures-core",
"futures-sink",
- "http 1.4.0",
+ "http",
"indexmap 2.13.0",
"slab",
"tokio",
@@ -3544,20 +3577,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
dependencies = [
"dirs",
- "http 1.4.0",
- "indicatif",
+ "http",
+ "indicatif 0.17.11",
"libc",
"log",
"native-tls",
"rand 0.9.2",
- "reqwest 0.12.28",
+ "reqwest",
"serde",
"serde_json",
"thiserror 2.0.18",
- "ureq",
+ "ureq 2.12.1",
"windows-sys 0.60.2",
]
+[[package]]
+name = "hf-hub"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aef3982638978efa195ff11b305f51f1f22f4f0a6cabee7af79b383ebee6a213"
+dependencies = [
+ "dirs",
+ "futures",
+ "http",
+ "indicatif 0.18.4",
+ "libc",
+ "log",
+ "native-tls",
+ "num_cpus",
+ "rand 0.9.2",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "ureq 3.2.0",
+ "windows-sys 0.61.2",
+]
+
[[package]]
name = "hkdf"
version = "0.12.4"
@@ -3591,17 +3648,6 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
-[[package]]
-name = "http"
-version = "0.2.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
-dependencies = [
- "bytes",
- "fnv",
- "itoa",
-]
-
[[package]]
name = "http"
version = "1.4.0"
@@ -3612,17 +3658,6 @@ dependencies = [
"itoa",
]
-[[package]]
-name = "http-body"
-version = "0.4.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
-dependencies = [
- "bytes",
- "http 0.2.12",
- "pin-project-lite",
-]
-
[[package]]
name = "http-body"
version = "1.0.1"
@@ -3630,7 +3665,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
dependencies = [
"bytes",
- "http 1.4.0",
+ "http",
]
[[package]]
@@ -3641,8 +3676,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
dependencies = [
"bytes",
"futures-core",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
"pin-project-lite",
]
@@ -3670,30 +3705,6 @@ version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
-[[package]]
-name = "hyper"
-version = "0.14.32"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
-dependencies = [
- "bytes",
- "futures-channel",
- "futures-core",
- "futures-util",
- "h2 0.3.27",
- "http 0.2.12",
- "http-body 0.4.6",
- "httparse",
- "httpdate",
- "itoa",
- "pin-project-lite",
- "socket2 0.5.10",
- "tokio",
- "tower-service",
- "tracing",
- "want",
-]
-
[[package]]
name = "hyper"
version = "1.8.1"
@@ -3704,9 +3715,9 @@ dependencies = [
"bytes",
"futures-channel",
"futures-core",
- "h2 0.4.13",
- "http 1.4.0",
- "http-body 1.0.1",
+ "h2",
+ "http",
+ "http-body",
"httparse",
"httpdate",
"itoa",
@@ -3724,7 +3735,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278"
dependencies = [
"hex",
- "hyper 1.8.1",
+ "hyper",
"hyper-util",
"pin-project-lite",
"tokio",
@@ -3732,28 +3743,14 @@ dependencies = [
"winapi",
]
-[[package]]
-name = "hyper-rustls"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
-dependencies = [
- "futures-util",
- "http 0.2.12",
- "hyper 0.14.32",
- "rustls 0.21.12",
- "tokio",
- "tokio-rustls 0.24.1",
-]
-
[[package]]
name = "hyper-rustls"
version = "0.27.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
- "http 1.4.0",
- "hyper 1.8.1",
+ "http",
+ "hyper",
"hyper-util",
"log",
"rustls 0.23.36",
@@ -3773,7 +3770,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
dependencies = [
"bytes",
"http-body-util",
- "hyper 1.8.1",
+ "hyper",
"hyper-util",
"native-tls",
"tokio",
@@ -3791,15 +3788,15 @@ dependencies = [
"bytes",
"futures-channel",
"futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "hyper 1.8.1",
+ "http",
+ "http-body",
+ "hyper",
"ipnet",
"libc",
"percent-encoding",
"pin-project-lite",
- "socket2 0.6.2",
- "system-configuration 0.7.0",
+ "socket2",
+ "system-configuration",
"tokio",
"tower-service",
"tracing",
@@ -3814,7 +3811,7 @@ checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7"
dependencies = [
"hex",
"http-body-util",
- "hyper 1.8.1",
+ "hyper",
"hyper-util",
"pin-project-lite",
"tokio",
@@ -4121,13 +4118,26 @@ version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
- "console",
+ "console 0.15.11",
"number_prefix",
"portable-atomic",
"unicode-width",
"web-time",
]
+[[package]]
+name = "indicatif"
+version = "0.18.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb"
+dependencies = [
+ "console 0.16.2",
+ "portable-atomic",
+ "unicode-width",
+ "unit-prefix",
+ "web-time",
+]
+
[[package]]
name = "indoc"
version = "2.0.7"
@@ -4311,28 +4321,6 @@ dependencies = [
"jiff-tzdb",
]
-[[package]]
-name = "jni"
-version = "0.21.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97"
-dependencies = [
- "cesu8",
- "cfg-if",
- "combine",
- "jni-sys",
- "log",
- "thiserror 1.0.69",
- "walkdir",
- "windows-sys 0.45.0",
-]
-
-[[package]]
-name = "jni-sys"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
-
[[package]]
name = "jobserver"
version = "0.1.34"
@@ -4853,7 +4841,7 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2acdba67f84190067532fce07b51a435dd390d7cdc1129a05003e5cb3274cf0"
dependencies = [
- "reqwest 0.12.28",
+ "reqwest",
"serde",
"serde_json",
"serde_repr",
@@ -5058,9 +5046,9 @@ dependencies = [
[[package]]
name = "libc"
-version = "0.2.180"
+version = "0.2.181"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc"
+checksum = "459427e2af2b9c839b132acb702a1c654d95e10f8c326bfc2ad11310e458b1c5"
[[package]]
name = "libfuzzer-sys"
@@ -5072,6 +5060,16 @@ dependencies = [
"cc",
]
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link 0.2.1",
+]
+
[[package]]
name = "libm"
version = "0.2.16"
@@ -5118,6 +5116,12 @@ version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
+[[package]]
+name = "litrs"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
+
[[package]]
name = "lock_api"
version = "0.4.14"
@@ -5476,7 +5480,7 @@ dependencies = [
"bytes",
"encoding_rs",
"futures-util",
- "http 1.4.0",
+ "http",
"httparse",
"memchr",
"mime",
@@ -5555,18 +5559,6 @@ dependencies = [
"libc",
]
-[[package]]
-name = "nix"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66"
-dependencies = [
- "bitflags 2.10.0",
- "cfg-if",
- "cfg_aliases",
- "libc",
-]
-
[[package]]
name = "nom"
version = "7.1.3"
@@ -5805,7 +5797,7 @@ dependencies = [
"bytes",
"chrono",
"futures",
- "http 1.4.0",
+ "http",
"humantime",
"itertools 0.14.0",
"parking_lot",
@@ -5819,6 +5811,15 @@ dependencies = [
"web-time",
]
+[[package]]
+name = "ogg"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdab8dcd8d4052eaacaf8fb07a3ccd9a6e26efadb42878a413c68fc4af1dee2b"
+dependencies = [
+ "byteorder",
+]
+
[[package]]
name = "once_cell"
version = "1.21.3"
@@ -5948,9 +5949,9 @@ checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed"
dependencies = [
"async-trait",
"bytes",
- "http 1.4.0",
+ "http",
"opentelemetry",
- "reqwest 0.12.28",
+ "reqwest",
"tracing",
]
@@ -5961,13 +5962,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656"
dependencies = [
"futures-core",
- "http 1.4.0",
+ "http",
"opentelemetry",
"opentelemetry-http",
"opentelemetry-proto",
"opentelemetry_sdk",
"prost 0.13.5",
- "reqwest 0.12.28",
+ "reqwest",
"thiserror 2.0.18",
]
@@ -6016,12 +6017,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d"
[[package]]
-name = "ordered-float"
-version = "5.1.0"
+name = "opus"
+version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d"
+checksum = "4d3809943dff6fbad5f0484449ea26bdb9cb7d8efdf26ed50d3c7f227f69eb5c"
dependencies = [
- "num-traits",
+ "audiopus_sys",
+]
+
+[[package]]
+name = "ordered-float"
+version = "5.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d"
+dependencies = [
+ "num-traits",
]
[[package]]
@@ -6055,7 +6065,7 @@ dependencies = [
"pkg-config",
"sha2",
"tar",
- "ureq",
+ "ureq 2.12.1",
]
[[package]]
@@ -6108,12 +6118,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec"
-[[package]]
-name = "pastey"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec"
-
[[package]]
name = "path_abs"
version = "0.5.1"
@@ -6447,20 +6451,6 @@ dependencies = [
"unicode-ident",
]
-[[package]]
-name = "process-wrap"
-version = "9.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccd9713fe2c91c3c85ac388b31b89de339365d2c995146e630b5e0da9d06526a"
-dependencies = [
- "futures",
- "indexmap 2.13.0",
- "nix 0.31.1",
- "tokio",
- "tracing",
- "windows",
-]
-
[[package]]
name = "profiling"
version = "1.0.17"
@@ -6633,7 +6623,7 @@ dependencies = [
"quinn-udp",
"rustc-hash",
"rustls 0.23.36",
- "socket2 0.6.2",
+ "socket2",
"thiserror 2.0.18",
"tokio",
"tracing",
@@ -6646,7 +6636,6 @@ version = "0.11.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
dependencies = [
- "aws-lc-rs",
"bytes",
"getrandom 0.3.4",
"lru-slab",
@@ -6671,7 +6660,7 @@ dependencies = [
"cfg_aliases",
"libc",
"once_cell",
- "socket2 0.6.2",
+ "socket2",
"tracing",
"windows-sys 0.60.2",
]
@@ -6993,47 +6982,6 @@ version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
-[[package]]
-name = "reqwest"
-version = "0.11.27"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62"
-dependencies = [
- "base64 0.21.7",
- "bytes",
- "encoding_rs",
- "futures-core",
- "futures-util",
- "h2 0.3.27",
- "http 0.2.12",
- "http-body 0.4.6",
- "hyper 0.14.32",
- "hyper-rustls 0.24.2",
- "ipnet",
- "js-sys",
- "log",
- "mime",
- "once_cell",
- "percent-encoding",
- "pin-project-lite",
- "rustls 0.21.12",
- "rustls-pemfile",
- "serde",
- "serde_json",
- "serde_urlencoded",
- "sync_wrapper 0.1.2",
- "system-configuration 0.5.1",
- "tokio",
- "tokio-rustls 0.24.1",
- "tower-service",
- "url",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
- "webpki-roots 0.25.4",
- "winreg",
-]
-
[[package]]
name = "reqwest"
version = "0.12.28"
@@ -7046,12 +6994,12 @@ dependencies = [
"futures-channel",
"futures-core",
"futures-util",
- "h2 0.4.13",
- "http 1.4.0",
- "http-body 1.0.1",
+ "h2",
+ "http",
+ "http-body",
"http-body-util",
- "hyper 1.8.1",
- "hyper-rustls 0.27.7",
+ "hyper",
+ "hyper-rustls",
"hyper-tls",
"hyper-util",
"js-sys",
@@ -7068,7 +7016,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
- "sync_wrapper 1.0.2",
+ "sync_wrapper",
"tokio",
"tokio-native-tls",
"tokio-rustls 0.26.4",
@@ -7079,51 +7027,11 @@ dependencies = [
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
- "wasm-streams 0.4.2",
+ "wasm-streams",
"web-sys",
"webpki-roots 1.0.6",
]
-[[package]]
-name = "reqwest"
-version = "0.13.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801"
-dependencies = [
- "base64 0.22.1",
- "bytes",
- "futures-core",
- "futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
- "http-body-util",
- "hyper 1.8.1",
- "hyper-rustls 0.27.7",
- "hyper-util",
- "js-sys",
- "log",
- "percent-encoding",
- "pin-project-lite",
- "quinn",
- "rustls 0.23.36",
- "rustls-pki-types",
- "rustls-platform-verifier",
- "serde",
- "serde_json",
- "sync_wrapper 1.0.2",
- "tokio",
- "tokio-rustls 0.26.4",
- "tokio-util",
- "tower",
- "tower-http",
- "tower-service",
- "url",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "wasm-streams 0.5.0",
- "web-sys",
-]
-
[[package]]
name = "rgb"
version = "0.8.52"
@@ -7148,13 +7056,13 @@ dependencies = [
"futures",
"futures-timer",
"glob",
- "http 1.4.0",
+ "http",
"mime",
"mime_guess",
"nanoid",
"ordered-float",
"pin-project-lite",
- "reqwest 0.12.28",
+ "reqwest",
"rig-derive",
"schemars 1.2.1",
"serde",
@@ -7195,46 +7103,6 @@ dependencies = [
"windows-sys 0.52.0",
]
-[[package]]
-name = "rmcp"
-version = "0.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc4c9c94680f75470ee8083a0667988b5d7b5beb70b9f998a8e51de7c682ce60"
-dependencies = [
- "async-trait",
- "base64 0.22.1",
- "chrono",
- "futures",
- "http 1.4.0",
- "pastey 0.2.1",
- "pin-project-lite",
- "process-wrap",
- "reqwest 0.13.2",
- "rmcp-macros",
- "schemars 1.2.1",
- "serde",
- "serde_json",
- "sse-stream",
- "thiserror 2.0.18",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tracing",
-]
-
-[[package]]
-name = "rmcp-macros"
-version = "0.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90c23c8f26cae4da838fbc3eadfaecf2d549d97c04b558e7bd90526a9c28b42a"
-dependencies = [
- "darling 0.23.0",
- "proc-macro2",
- "quote",
- "serde_json",
- "syn 2.0.114",
-]
-
[[package]]
name = "roaring"
version = "0.10.12"
@@ -7486,15 +7354,6 @@ dependencies = [
"security-framework 3.5.1",
]
-[[package]]
-name = "rustls-pemfile"
-version = "1.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
-dependencies = [
- "base64 0.21.7",
-]
-
[[package]]
name = "rustls-pki-types"
version = "1.14.0"
@@ -7505,33 +7364,6 @@ dependencies = [
"zeroize",
]
-[[package]]
-name = "rustls-platform-verifier"
-version = "0.6.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784"
-dependencies = [
- "core-foundation 0.10.1",
- "core-foundation-sys",
- "jni",
- "log",
- "once_cell",
- "rustls 0.23.36",
- "rustls-native-certs",
- "rustls-platform-verifier-android",
- "rustls-webpki 0.103.9",
- "security-framework 3.5.1",
- "security-framework-sys",
- "webpki-root-certs",
- "windows-sys 0.52.0",
-]
-
-[[package]]
-name = "rustls-platform-verifier-android"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
-
[[package]]
name = "rustls-webpki"
version = "0.101.7"
@@ -7645,7 +7477,6 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc"
dependencies = [
- "chrono",
"dyn-clone",
"ref-cast",
"schemars_derive 1.2.1",
@@ -7937,7 +7768,7 @@ dependencies = [
"mime_guess",
"parking_lot",
"percent-encoding",
- "reqwest 0.12.28",
+ "reqwest",
"rustc-hash",
"secrecy",
"serde",
@@ -8121,10 +7952,10 @@ dependencies = [
"futures-util",
"hex",
"hmac",
- "http 1.4.0",
+ "http",
"http-body-util",
- "hyper 1.8.1",
- "hyper-rustls 0.27.7",
+ "hyper",
+ "hyper-rustls",
"hyper-util",
"lazy_static",
"mime",
@@ -8176,16 +8007,6 @@ dependencies = [
"syn 2.0.114",
]
-[[package]]
-name = "socket2"
-version = "0.5.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
[[package]]
name = "socket2"
version = "0.6.2"
@@ -8209,7 +8030,7 @@ dependencies = [
[[package]]
name = "spacebot"
-version = "0.1.14"
+version = "0.1.12"
dependencies = [
"aes-gcm",
"anyhow",
@@ -8224,7 +8045,6 @@ dependencies = [
"chromiumoxide",
"chromiumoxide_cdp",
"chrono",
- "chrono-tz",
"clap",
"config",
"daemonize",
@@ -8232,7 +8052,7 @@ dependencies = [
"dirs",
"fastembed",
"futures",
- "hex",
+ "hf-hub 0.5.0",
"ignore",
"indoc",
"lance-index",
@@ -8241,20 +8061,21 @@ dependencies = [
"mime_guess",
"minijinja",
"notify",
+ "ogg",
"open",
"opentelemetry",
"opentelemetry-otlp",
"opentelemetry-semantic-conventions",
"opentelemetry_sdk",
+ "opus",
"pdf-extract",
"pin-project",
"prometheus",
"rand 0.9.2",
"redb",
"regex",
- "reqwest 0.12.28",
+ "reqwest",
"rig-core",
- "rmcp",
"rust-embed",
"rustls 0.23.36",
"schemars 0.8.22",
@@ -8265,6 +8086,7 @@ dependencies = [
"sha2",
"slack-morphism",
"sqlx",
+ "symphonia",
"teloxide",
"tempfile",
"thiserror 2.0.18",
@@ -8281,6 +8103,7 @@ dependencies = [
"twitch-irc",
"urlencoding",
"uuid",
+ "whisper-rs",
"zip",
]
@@ -8544,19 +8367,6 @@ dependencies = [
"uuid",
]
-[[package]]
-name = "sse-stream"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb4dc4d33c68ec1f27d386b5610a351922656e1fdf5c05bbaad930cd1519479a"
-dependencies = [
- "bytes",
- "futures-util",
- "http-body 1.0.1",
- "http-body-util",
- "pin-project-lite",
-]
-
[[package]]
name = "stable_deref_trait"
version = "1.2.1"
@@ -8645,6 +8455,178 @@ version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+[[package]]
+name = "symphonia"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5773a4c030a19d9bfaa090f49746ff35c75dfddfa700df7a5939d5e076a57039"
+dependencies = [
+ "lazy_static",
+ "symphonia-bundle-flac",
+ "symphonia-bundle-mp3",
+ "symphonia-codec-aac",
+ "symphonia-codec-adpcm",
+ "symphonia-codec-pcm",
+ "symphonia-codec-vorbis",
+ "symphonia-core",
+ "symphonia-format-isomp4",
+ "symphonia-format-mkv",
+ "symphonia-format-ogg",
+ "symphonia-format-riff",
+ "symphonia-metadata",
+]
+
+[[package]]
+name = "symphonia-bundle-flac"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c91565e180aea25d9b80a910c546802526ffd0072d0b8974e3ebe59b686c9976"
+dependencies = [
+ "log",
+ "symphonia-core",
+ "symphonia-metadata",
+ "symphonia-utils-xiph",
+]
+
+[[package]]
+name = "symphonia-bundle-mp3"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4872dd6bb56bf5eac799e3e957aa1981086c3e613b27e0ac23b176054f7c57ed"
+dependencies = [
+ "lazy_static",
+ "log",
+ "symphonia-core",
+ "symphonia-metadata",
+]
+
+[[package]]
+name = "symphonia-codec-aac"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c263845aa86881416849c1729a54c7f55164f8b96111dba59de46849e73a790"
+dependencies = [
+ "lazy_static",
+ "log",
+ "symphonia-core",
+]
+
+[[package]]
+name = "symphonia-codec-adpcm"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dddc50e2bbea4cfe027441eece77c46b9f319748605ab8f3443350129ddd07f"
+dependencies = [
+ "log",
+ "symphonia-core",
+]
+
+[[package]]
+name = "symphonia-codec-pcm"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e89d716c01541ad3ebe7c91ce4c8d38a7cf266a3f7b2f090b108fb0cb031d95"
+dependencies = [
+ "log",
+ "symphonia-core",
+]
+
+[[package]]
+name = "symphonia-codec-vorbis"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f025837c309cd69ffef572750b4a2257b59552c5399a5e49707cc5b1b85d1c73"
+dependencies = [
+ "log",
+ "symphonia-core",
+ "symphonia-utils-xiph",
+]
+
+[[package]]
+name = "symphonia-core"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea00cc4f79b7f6bb7ff87eddc065a1066f3a43fe1875979056672c9ef948c2af"
+dependencies = [
+ "arrayvec",
+ "bitflags 1.3.2",
+ "bytemuck",
+ "lazy_static",
+ "log",
+]
+
+[[package]]
+name = "symphonia-format-isomp4"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "243739585d11f81daf8dac8d9f3d18cc7898f6c09a259675fc364b382c30e0a5"
+dependencies = [
+ "encoding_rs",
+ "log",
+ "symphonia-core",
+ "symphonia-metadata",
+ "symphonia-utils-xiph",
+]
+
+[[package]]
+name = "symphonia-format-mkv"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "122d786d2c43a49beb6f397551b4a050d8229eaa54c7ddf9ee4b98899b8742d0"
+dependencies = [
+ "lazy_static",
+ "log",
+ "symphonia-core",
+ "symphonia-metadata",
+ "symphonia-utils-xiph",
+]
+
+[[package]]
+name = "symphonia-format-ogg"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b4955c67c1ed3aa8ae8428d04ca8397fbef6a19b2b051e73b5da8b1435639cb"
+dependencies = [
+ "log",
+ "symphonia-core",
+ "symphonia-metadata",
+ "symphonia-utils-xiph",
+]
+
+[[package]]
+name = "symphonia-format-riff"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2d7c3df0e7d94efb68401d81906eae73c02b40d5ec1a141962c592d0f11a96f"
+dependencies = [
+ "extended",
+ "log",
+ "symphonia-core",
+ "symphonia-metadata",
+]
+
+[[package]]
+name = "symphonia-metadata"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36306ff42b9ffe6e5afc99d49e121e0bd62fe79b9db7b9681d48e29fa19e6b16"
+dependencies = [
+ "encoding_rs",
+ "lazy_static",
+ "log",
+ "symphonia-core",
+]
+
+[[package]]
+name = "symphonia-utils-xiph"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee27c85ab799a338446b68eec77abf42e1a6f1bb490656e121c6e27bfbab9f16"
+dependencies = [
+ "symphonia-core",
+ "symphonia-metadata",
+]
+
[[package]]
name = "syn"
version = "1.0.109"
@@ -8667,12 +8649,6 @@ dependencies = [
"unicode-ident",
]
-[[package]]
-name = "sync_wrapper"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
-
[[package]]
name = "sync_wrapper"
version = "1.0.2"
@@ -8693,17 +8669,6 @@ dependencies = [
"syn 2.0.114",
]
-[[package]]
-name = "system-configuration"
-version = "0.5.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
-dependencies = [
- "bitflags 1.3.2",
- "core-foundation 0.9.4",
- "system-configuration-sys 0.5.0",
-]
-
[[package]]
name = "system-configuration"
version = "0.7.0"
@@ -8712,17 +8677,7 @@ checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
dependencies = [
"bitflags 2.10.0",
"core-foundation 0.9.4",
- "system-configuration-sys 0.6.0",
-]
-
-[[package]]
-name = "system-configuration-sys"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
-dependencies = [
- "core-foundation-sys",
- "libc",
+ "system-configuration-sys",
]
[[package]]
@@ -8958,7 +8913,7 @@ dependencies = [
"once_cell",
"pin-project",
"rc-box",
- "reqwest 0.12.28",
+ "reqwest",
"rgb",
"serde",
"serde_json",
@@ -9168,7 +9123,7 @@ dependencies = [
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
- "socket2 0.6.2",
+ "socket2",
"tokio-macros",
"tracing",
"windows-sys 0.61.2",
@@ -9397,8 +9352,8 @@ dependencies = [
"async-trait",
"base64 0.22.1",
"bytes",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
"http-body-util",
"percent-encoding",
"pin-project",
@@ -9418,7 +9373,7 @@ dependencies = [
"futures-core",
"futures-util",
"pin-project-lite",
- "sync_wrapper 1.0.2",
+ "sync_wrapper",
"tokio",
"tower-layer",
"tower-service",
@@ -9436,8 +9391,8 @@ dependencies = [
"bytes",
"futures-core",
"futures-util",
- "http 1.4.0",
- "http-body 1.0.1",
+ "http",
+ "http-body",
"http-body-util",
"http-range-header",
"httpdate",
@@ -9597,7 +9552,7 @@ dependencies = [
"byteorder",
"bytes",
"data-encoding",
- "http 1.4.0",
+ "http",
"httparse",
"log",
"rand 0.8.5",
@@ -9617,7 +9572,7 @@ checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442"
dependencies = [
"bytes",
"data-encoding",
- "http 1.4.0",
+ "http",
"httparse",
"log",
"rand 0.9.2",
@@ -9640,8 +9595,6 @@ dependencies = [
"either",
"enum_dispatch",
"futures-util",
- "reqwest 0.11.27",
- "serde",
"smallvec",
"thiserror 1.0.69",
"tokio",
@@ -9789,6 +9742,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
+[[package]]
+name = "unit-prefix"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3"
+
[[package]]
name = "universal-hash"
version = "0.5.1"
@@ -9825,6 +9784,42 @@ dependencies = [
"webpki-roots 0.26.11",
]
+[[package]]
+name = "ureq"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc97a28575b85cfedf2a7e7d3cc64b3e11bd8ac766666318003abbacc7a21fc"
+dependencies = [
+ "base64 0.22.1",
+ "cookie_store",
+ "der",
+ "flate2",
+ "log",
+ "native-tls",
+ "percent-encoding",
+ "rustls 0.23.36",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "socks",
+ "ureq-proto",
+ "utf-8",
+ "webpki-root-certs",
+ "webpki-roots 1.0.6",
+]
+
+[[package]]
+name = "ureq-proto"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f"
+dependencies = [
+ "base64 0.22.1",
+ "http",
+ "httparse",
+ "log",
+]
+
[[package]]
name = "url"
version = "2.5.8"
@@ -10052,19 +10047,6 @@ dependencies = [
"web-sys",
]
-[[package]]
-name = "wasm-streams"
-version = "0.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d1ec4f6517c9e11ae630e200b2b65d193279042e28edd4a2cda233e46670bbb"
-dependencies = [
- "futures-util",
- "js-sys",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
-]
-
[[package]]
name = "wasmparser"
version = "0.244.0"
@@ -10099,9 +10081,9 @@ dependencies = [
[[package]]
name = "webpki-root-certs"
-version = "1.0.5"
+version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36a29fc0408b113f68cf32637857ab740edfafdf460c326cd2afaa2d84cc05dc"
+checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca"
dependencies = [
"rustls-pki-types",
]
@@ -10147,6 +10129,28 @@ dependencies = [
"winsafe",
]
+[[package]]
+name = "whisper-rs"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71ea5d2401f30f51d08126a2d133fee4c1955136519d7ac6cf6f5ac0a91e6bc8"
+dependencies = [
+ "libc",
+ "whisper-rs-sys",
+]
+
+[[package]]
+name = "whisper-rs-sys"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e2a6e06e7ac7b8f53c53a5f50bb0bc823ba69b63ecd887339f807a5598bbd2"
+dependencies = [
+ "bindgen",
+ "cfg-if",
+ "cmake",
+ "fs_extra",
+]
+
[[package]]
name = "whoami"
version = "1.6.1"
@@ -10188,27 +10192,6 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-[[package]]
-name = "windows"
-version = "0.62.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "527fadee13e0c05939a6a05d5bd6eec6cd2e3dbd648b9f8e447c6518133d8580"
-dependencies = [
- "windows-collections",
- "windows-core",
- "windows-future",
- "windows-numerics",
-]
-
-[[package]]
-name = "windows-collections"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23b2d95af1a8a14a3c7367e1ed4fc9c20e0a26e79551b1454d72583c97cc6610"
-dependencies = [
- "windows-core",
-]
-
[[package]]
name = "windows-core"
version = "0.62.2"
@@ -10222,17 +10205,6 @@ dependencies = [
"windows-strings 0.5.1",
]
-[[package]]
-name = "windows-future"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1d6f90251fe18a279739e78025bd6ddc52a7e22f921070ccdc67dde84c605cb"
-dependencies = [
- "windows-core",
- "windows-link 0.2.1",
- "windows-threading",
-]
-
[[package]]
name = "windows-implement"
version = "0.60.2"
@@ -10267,16 +10239,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
-[[package]]
-name = "windows-numerics"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e2e40844ac143cdb44aead537bbf727de9b044e107a0f1220392177d15b0f26"
-dependencies = [
- "windows-core",
- "windows-link 0.2.1",
-]
-
[[package]]
name = "windows-registry"
version = "0.5.3"
@@ -10335,15 +10297,6 @@ dependencies = [
"windows-link 0.2.1",
]
-[[package]]
-name = "windows-sys"
-version = "0.45.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
-dependencies = [
- "windows-targets 0.42.2",
-]
-
[[package]]
name = "windows-sys"
version = "0.48.0"
@@ -10389,21 +10342,6 @@ dependencies = [
"windows-link 0.2.1",
]
-[[package]]
-name = "windows-targets"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
-dependencies = [
- "windows_aarch64_gnullvm 0.42.2",
- "windows_aarch64_msvc 0.42.2",
- "windows_i686_gnu 0.42.2",
- "windows_i686_msvc 0.42.2",
- "windows_x86_64_gnu 0.42.2",
- "windows_x86_64_gnullvm 0.42.2",
- "windows_x86_64_msvc 0.42.2",
-]
-
[[package]]
name = "windows-targets"
version = "0.48.5"
@@ -10452,21 +10390,6 @@ dependencies = [
"windows_x86_64_msvc 0.53.1",
]
-[[package]]
-name = "windows-threading"
-version = "0.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37"
-dependencies = [
- "windows-link 0.2.1",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
-
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
@@ -10485,12 +10408,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
-
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
@@ -10509,12 +10426,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
-[[package]]
-name = "windows_i686_gnu"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
-
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
@@ -10545,12 +10456,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
-[[package]]
-name = "windows_i686_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
-
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
@@ -10569,12 +10474,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
-
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
@@ -10593,12 +10492,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
-
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
@@ -10617,12 +10510,6 @@ version = "0.53.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
-
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
@@ -10659,16 +10546,6 @@ dependencies = [
"memchr",
]
-[[package]]
-name = "winreg"
-version = "0.50.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
[[package]]
name = "winsafe"
version = "0.0.19"
diff --git a/Cargo.toml b/Cargo.toml
index ba7a53a3c..5a2ef8480 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -137,11 +137,17 @@ tempfile = "3"
# Prometheus metrics (optional, behind "metrics" feature)
prometheus = { version = "0.13", optional = true }
+whisper-rs = { version = "0.15", optional = true, features = ["vulkan"] }
+hf-hub = { version = "0.5", optional = true }
+symphonia = { version = "0.5", features = ["mp3", "aac", "flac", "ogg", "wav", "isomp4"], optional = true }
+ogg = { version = "0.9", optional = true }
+opus = { version = "0.3", optional = true }
pdf-extract = "0.10.0"
open = "5.3.3"
urlencoding = "2.1.3"
[features]
+stt-whisper = ["dep:whisper-rs", "dep:hf-hub", "dep:symphonia", "dep:ogg", "dep:opus"]
metrics = ["dep:prometheus"]
[lints.clippy]
diff --git a/README.md b/README.md
index 8a7bafb1d..33894ae6d 100644
--- a/README.md
+++ b/README.md
@@ -193,6 +193,30 @@ channel = "my-provider/my-model"
Additional built-in providers include **NVIDIA**, **MiniMax**, **Moonshot AI (Kimi)**, and **Z.AI Coding Plan** — configure with `nvidia_key`, `minimax_key`, `moonshot_key`, or `zai_coding_plan_key` in `[llm]`.
+### Voice Transcription
+
+Audio attachments (voice messages, audio files) are transcribed before being passed to the channel. Set `routing.voice` to choose the backend:
+
+**Provider-based** — route through any configured LLM provider that supports audio input:
+
+```toml
+[defaults.routing]
+voice = "openai/whisper-1"
+```
+
+**Local Whisper** (`stt-whisper` feature, requires `--features stt-whisper` at build time) — run inference locally via [whisper-rs](https://codeberg.org/tazz4843/whisper-rs), no API call needed:
+
+```toml
+[defaults.routing]
+voice = "whisper-local://small"
+```
+
+The model is downloaded automatically from [`ggerganov/whisper.cpp`](https://huggingface.co/ggerganov/whisper.cpp) on first use and cached in `~/.cache/huggingface/hub`. Supported size names: `tiny`, `tiny.en`, `base`, `base.en`, `small`, `small.en`, `medium`, `medium.en`, `large`, `large-v1`, `large-v2`, `large-v3`. An absolute path to a GGML model file also works.
+
+GPU acceleration via Vulkan is enabled automatically when a compatible device is detected. The loaded model is cached for the process lifetime — restart to switch models.
+
+Ogg/Opus audio (Telegram voice messages) is decoded natively. All other formats are handled via symphonia.
+
### Skills
Extensible skill system integrated with [skills.sh](https://skills.sh):
diff --git a/prompts/en/tools/transcribe_audio_description.md.j2 b/prompts/en/tools/transcribe_audio_description.md.j2
new file mode 100644
index 000000000..b00515fba
--- /dev/null
+++ b/prompts/en/tools/transcribe_audio_description.md.j2
@@ -0,0 +1 @@
+Transcribe an audio file to text using local speech-to-text. Provide the path to the audio file. Supports ogg, opus, mp3, flac, wav, and m4a formats. Use this instead of external whisper CLI tools.
diff --git a/src/agent/channel.rs b/src/agent/channel.rs
index fd1a83d71..9f4b617f7 100644
--- a/src/agent/channel.rs
+++ b/src/agent/channel.rs
@@ -4,7 +4,7 @@ use crate::agent::branch::Branch;
use crate::agent::compactor::Compactor;
use crate::agent::status::StatusBlock;
use crate::agent::worker::Worker;
-use crate::config::ApiType;
+
use crate::conversation::{ChannelStore, ConversationLogger, ProcessRunLogger};
use crate::error::{AgentError, Result};
use crate::hooks::SpacebotHook;
@@ -1956,193 +1956,32 @@ async fn transcribe_audio_attachment(
);
let routing = deps.runtime_config.routing.load();
- let voice_model = routing.voice.trim();
- if voice_model.is_empty() {
- return UserContent::text(format!(
+ let voice_model = routing.voice.clone();
+
+ match crate::stt::transcribe_bytes(&voice_model, &bytes, &attachment.mime_type, &deps.llm_manager, http).await {
+ Ok(transcript) => UserContent::text(format!(
+ "\n{}\n",
+ attachment.filename, attachment.mime_type, transcript
+ )),
+ Err(crate::stt::SttError::NotConfigured) => UserContent::text(format!(
"[Audio attachment received but no voice model is configured in routing.voice: {}]",
attachment.filename
- ));
- }
-
- let (provider_id, model_name) = match deps.llm_manager.resolve_model(voice_model) {
- Ok(parts) => parts,
- Err(error) => {
- tracing::warn!(%error, model = %voice_model, "invalid voice model route");
- return UserContent::text(format!(
- "[Audio transcription failed for {}: invalid voice model '{}']",
- attachment.filename, voice_model
- ));
- }
- };
-
- let provider = match deps.llm_manager.get_provider(&provider_id) {
- Ok(provider) => provider,
- Err(error) => {
- tracing::warn!(%error, provider = %provider_id, "voice provider not configured");
- return UserContent::text(format!(
- "[Audio transcription failed for {}: provider '{}' is not configured]",
- attachment.filename, provider_id
- ));
- }
- };
-
- if provider.api_type == ApiType::Anthropic {
- return UserContent::text(format!(
- "[Audio transcription failed for {}: provider '{}' does not support input_audio on this endpoint]",
- attachment.filename, provider_id
- ));
- }
-
- let format = audio_format_for_attachment(attachment);
- use base64::Engine as _;
- let base64_audio = base64::engine::general_purpose::STANDARD.encode(&bytes);
-
- let endpoint = format!(
- "{}/v1/chat/completions",
- provider.base_url.trim_end_matches('/')
- );
- let body = serde_json::json!({
- "model": model_name,
- "messages": [{
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": "Transcribe this audio verbatim. Return only the transcription text."
- },
- {
- "type": "input_audio",
- "input_audio": {
- "data": base64_audio,
- "format": format,
- }
- }
- ]
- }],
- "temperature": 0
- });
-
- let response = match http
- .post(&endpoint)
- .header("authorization", format!("Bearer {}", provider.api_key))
- .header("content-type", "application/json")
- .json(&body)
- .send()
- .await
- {
- Ok(response) => response,
- Err(error) => {
- tracing::warn!(%error, model = %voice_model, "voice transcription request failed");
- return UserContent::text(format!(
- "[Audio transcription failed for {}]",
+ )),
+ Err(crate::stt::SttError::EmptyResult) => {
+ tracing::warn!(filename = %attachment.filename, "transcription returned empty text");
+ UserContent::text(format!(
+ "[Audio transcription returned empty text for {}]",
attachment.filename
- ));
+ ))
}
- };
-
- let status = response.status();
- let response_body = match response.json::().await {
- Ok(body) => body,
Err(error) => {
- tracing::warn!(%error, model = %voice_model, "invalid transcription response");
- return UserContent::text(format!(
- "[Audio transcription failed for {}]",
- attachment.filename
- ));
+ tracing::warn!(%error, filename = %attachment.filename, "audio transcription failed");
+ UserContent::text(format!(
+ "[Audio transcription failed for {}: {}]",
+ attachment.filename, error
+ ))
}
- };
-
- if !status.is_success() {
- let message = response_body["error"]["message"]
- .as_str()
- .unwrap_or("unknown error");
- tracing::warn!(
- status = %status,
- model = %voice_model,
- error = %message,
- "voice transcription provider returned error"
- );
- return UserContent::text(format!(
- "[Audio transcription failed for {}: {}]",
- attachment.filename, message
- ));
- }
-
- let transcript = extract_transcript_text(&response_body);
- if transcript.is_empty() {
- tracing::warn!(model = %voice_model, "empty transcription returned");
- return UserContent::text(format!(
- "[Audio transcription returned empty text for {}]",
- attachment.filename
- ));
- }
-
- UserContent::text(format!(
- "\n{}\n",
- attachment.filename, attachment.mime_type, transcript
- ))
-}
-
-fn audio_format_for_attachment(attachment: &crate::Attachment) -> &'static str {
- let mime = attachment.mime_type.to_lowercase();
- if mime.contains("mpeg") || mime.contains("mp3") {
- return "mp3";
- }
- if mime.contains("wav") {
- return "wav";
}
- if mime.contains("flac") {
- return "flac";
- }
- if mime.contains("aac") {
- return "aac";
- }
- if mime.contains("ogg") {
- return "ogg";
- }
- if mime.contains("mp4") || mime.contains("m4a") {
- return "m4a";
- }
-
- match attachment
- .filename
- .rsplit('.')
- .next()
- .unwrap_or_default()
- .to_lowercase()
- .as_str()
- {
- "mp3" => "mp3",
- "wav" => "wav",
- "flac" => "flac",
- "aac" => "aac",
- "m4a" | "mp4" => "m4a",
- "oga" | "ogg" => "ogg",
- _ => "ogg",
- }
-}
-
-fn extract_transcript_text(body: &serde_json::Value) -> String {
- if let Some(text) = body["choices"][0]["message"]["content"].as_str() {
- return text.trim().to_string();
- }
-
- let Some(parts) = body["choices"][0]["message"]["content"].as_array() else {
- return String::new();
- };
-
- parts
- .iter()
- .filter_map(|part| {
- if part["type"].as_str() == Some("text") {
- part["text"].as_str().map(str::trim)
- } else {
- None
- }
- })
- .filter(|text| !text.is_empty())
- .collect::>()
- .join("\n")
}
/// Download a text attachment and inline its content for the LLM.
diff --git a/src/agent/worker.rs b/src/agent/worker.rs
index ae369ad70..5b7d8fcc1 100644
--- a/src/agent/worker.rs
+++ b/src/agent/worker.rs
@@ -1,5 +1,7 @@
//! Worker: Independent task execution process.
+use std::sync::Arc;
+
use crate::agent::compactor::estimate_history_tokens;
use crate::config::BrowserConfig;
use crate::error::Result;
@@ -193,6 +195,9 @@ impl Worker {
let mcp_tools = self.deps.mcp_manager.get_tools().await;
// Create per-worker ToolServer with task tools
+ let routing = self.deps.runtime_config.routing.load();
+ let voice_model = routing.voice.clone();
+
let worker_tool_server = crate::tools::create_worker_tool_server(
self.deps.agent_id.clone(),
self.id,
@@ -204,9 +209,10 @@ impl Worker {
self.deps.runtime_config.workspace_dir.clone(),
self.deps.runtime_config.instance_dir.clone(),
mcp_tools,
+ voice_model,
+ Arc::clone(&self.deps.llm_manager),
+ self.deps.llm_manager.http_client().clone(),
);
-
- let routing = self.deps.runtime_config.routing.load();
let model_name = routing.resolve(ProcessType::Worker, None).to_string();
let model = SpacebotModel::make(&self.deps.llm_manager, &model_name)
.with_context(&*self.deps.agent_id, "worker")
diff --git a/src/lib.rs b/src/lib.rs
index 98b4eac3f..4844f0550 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -20,6 +20,7 @@ pub mod prompts;
pub mod secrets;
pub mod settings;
pub mod skills;
+pub mod stt;
#[cfg(feature = "metrics")]
pub mod telemetry;
pub mod tools;
diff --git a/src/prompts/text.rs b/src/prompts/text.rs
index d9614adae..7840882e1 100644
--- a/src/prompts/text.rs
+++ b/src/prompts/text.rs
@@ -158,6 +158,9 @@ fn lookup(lang: &str, key: &str) -> &'static str {
("en", "tools/send_message_to_another_channel") => {
include_str!("../../prompts/en/tools/send_message_description.md.j2")
}
+ ("en", "tools/transcribe_audio") => {
+ include_str!("../../prompts/en/tools/transcribe_audio_description.md.j2")
+ }
// Fallback: unknown language or key -> try English
(lang, key) if lang != "en" => {
diff --git a/src/stt.rs b/src/stt.rs
new file mode 100644
index 000000000..735fc5bdf
--- /dev/null
+++ b/src/stt.rs
@@ -0,0 +1,523 @@
+//! Speech-to-text transcription.
+//!
+//! Provides a unified `transcribe_bytes` function that dispatches to either:
+//! - The local Whisper backend (`whisper-local://`) when the `stt-whisper`
+//! feature is enabled.
+//! - An OpenAI-compatible HTTP provider (anything else) via `input_audio`.
+
+use crate::llm::manager::LlmManager;
+use crate::config::ApiType;
+
+#[cfg(feature = "stt-whisper")]
+pub use local::transcribe;
+
+/// Unified error type for all STT backends.
+#[derive(Debug, thiserror::Error)]
+pub enum SttError {
+ #[error("no voice model configured in routing.voice")]
+ NotConfigured,
+ #[error("local Whisper STT is not available in this build")]
+ WhisperNotBuilt,
+ #[error("whisper error: {0}")]
+ #[cfg(feature = "stt-whisper")]
+ Whisper(#[from] local::WhisperError),
+ #[error("provider '{0}' is not configured")]
+ ProviderNotConfigured(String),
+ #[error("provider '{0}' does not support audio transcription on this endpoint")]
+ ProviderUnsupported(String),
+ #[error("invalid voice model spec '{0}': {1}")]
+ InvalidModel(String, String),
+ #[error("transcription request failed: {0}")]
+ Http(String),
+ #[error("transcription returned empty result")]
+ EmptyResult,
+}
+
+/// Transcribe raw audio bytes using the configured voice model.
+///
+/// `voice_model` is the full value from `routing.voice`, e.g.:
+/// - `"whisper-local://small"` — local Whisper
+/// - `"openai/whisper-1"` — OpenAI-compatible HTTP provider
+///
+/// `mime_type` is used to set the audio format hint for HTTP providers.
+pub async fn transcribe_bytes(
+ voice_model: &str,
+ audio: &[u8],
+ mime_type: &str,
+ llm_manager: &LlmManager,
+ http: &reqwest::Client,
+) -> Result {
+ let voice_model = voice_model.trim();
+ if voice_model.is_empty() {
+ return Err(SttError::NotConfigured);
+ }
+
+ // Local Whisper backend.
+ if let Some(model_spec) = voice_model.strip_prefix("whisper-local://") {
+ #[cfg(feature = "stt-whisper")]
+ {
+ return local::transcribe(model_spec, audio)
+ .await
+ .map_err(SttError::Whisper);
+ }
+ #[cfg(not(feature = "stt-whisper"))]
+ {
+ let _ = (model_spec, audio);
+ return Err(SttError::WhisperNotBuilt);
+ }
+ }
+
+ // HTTP provider path.
+ let (provider_id, model_name) = llm_manager
+ .resolve_model(voice_model)
+ .map_err(|e| SttError::InvalidModel(voice_model.to_string(), e.to_string()))?;
+
+ let provider = llm_manager
+ .get_provider(&provider_id)
+ .map_err(|_| SttError::ProviderNotConfigured(provider_id.clone()))?;
+
+ if provider.api_type == ApiType::Anthropic {
+ return Err(SttError::ProviderUnsupported(provider_id));
+ }
+
+ let format = audio_format_for_mime(mime_type);
+ use base64::Engine as _;
+ let base64_audio = base64::engine::general_purpose::STANDARD.encode(audio);
+
+ let endpoint = format!(
+ "{}/v1/chat/completions",
+ provider.base_url.trim_end_matches('/')
+ );
+ let body = serde_json::json!({
+ "model": model_name,
+ "messages": [{
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Transcribe this audio verbatim. Return only the transcription text."
+ },
+ {
+ "type": "input_audio",
+ "input_audio": {
+ "data": base64_audio,
+ "format": format,
+ }
+ }
+ ]
+ }],
+ "temperature": 0
+ });
+
+ let response = http
+ .post(&endpoint)
+ .header("authorization", format!("Bearer {}", provider.api_key))
+ .header("content-type", "application/json")
+ .json(&body)
+ .send()
+ .await
+ .map_err(|e| SttError::Http(e.to_string()))?;
+
+ let status = response.status();
+ let response_body = response
+ .json::()
+ .await
+ .map_err(|e| SttError::Http(e.to_string()))?;
+
+ if !status.is_success() {
+ let message = response_body["error"]["message"]
+ .as_str()
+ .unwrap_or("unknown error");
+ return Err(SttError::Http(format!("{status}: {message}")));
+ }
+
+ let transcript = extract_transcript_text(&response_body);
+ if transcript.is_empty() {
+ return Err(SttError::EmptyResult);
+ }
+
+ Ok(transcript)
+}
+
+/// Infer the audio format string from a MIME type.
+pub fn audio_format_for_mime(mime_type: &str) -> &'static str {
+ let mime = mime_type.to_lowercase();
+ if mime.contains("mpeg") || mime.contains("mp3") {
+ return "mp3";
+ }
+ if mime.contains("wav") {
+ return "wav";
+ }
+ if mime.contains("flac") {
+ return "flac";
+ }
+ if mime.contains("aac") {
+ return "aac";
+ }
+ if mime.contains("ogg") {
+ return "ogg";
+ }
+ if mime.contains("mp4") || mime.contains("m4a") {
+ return "m4a";
+ }
+ "ogg"
+}
+
+/// Extract the transcript text from an OpenAI-compatible chat completion response.
+fn extract_transcript_text(body: &serde_json::Value) -> String {
+ if let Some(text) = body["choices"][0]["message"]["content"].as_str() {
+ return text.trim().to_string();
+ }
+
+ let Some(parts) = body["choices"][0]["message"]["content"].as_array() else {
+ return String::new();
+ };
+
+ parts
+ .iter()
+ .filter_map(|part| {
+ if part["type"].as_str() == Some("text") {
+ part["text"].as_str().map(str::trim)
+ } else {
+ None
+ }
+ })
+ .filter(|text| !text.is_empty())
+ .collect::>()
+ .join("\n")
+}
+
+#[cfg(feature = "stt-whisper")]
+mod local {
+ use std::sync::OnceLock;
+
+ use hf_hub::api::sync::Api;
+ use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};
+
+ /// Known model size names and their GGML filenames on `ggerganov/whisper.cpp`.
+ const KNOWN_SIZES: &[(&str, &str)] = &[
+ ("tiny", "ggml-tiny.bin"),
+ ("tiny.en", "ggml-tiny.en.bin"),
+ ("base", "ggml-base.bin"),
+ ("base.en", "ggml-base.en.bin"),
+ ("small", "ggml-small.bin"),
+ ("small.en", "ggml-small.en.bin"),
+ ("medium", "ggml-medium.bin"),
+ ("medium.en", "ggml-medium.en.bin"),
+ ("large", "ggml-large-v3.bin"),
+ ("large-v1", "ggml-large-v1.bin"),
+ ("large-v2", "ggml-large-v2.bin"),
+ ("large-v3", "ggml-large-v3.bin"),
+ ];
+
+ /// Cached (model_spec, WhisperContext) — one per process.
+ ///
+ /// If the user changes `routing.voice` at runtime we just keep using the
+ /// already-loaded model; a restart is required to switch models.
+ static CONTEXT: OnceLock<(String, WhisperContext)> = OnceLock::new();
+
+ #[derive(Debug, thiserror::Error)]
+ pub enum WhisperError {
+ #[error("model not found and could not be downloaded: {0}")]
+ ModelNotFound(String),
+ #[error("hf-hub error: {0}")]
+ HfHub(String),
+ #[error("failed to load whisper model: {0}")]
+ Load(String),
+ #[error("failed to create whisper state: {0}")]
+ State(String),
+ #[error("transcription failed: {0}")]
+ Transcription(String),
+ #[error("audio decode error: {0}")]
+ Decode(String),
+ }
+
+ /// Transcribe raw audio bytes using the local Whisper model.
+ ///
+ /// `model_spec` is the part after `whisper-local://`:
+ /// - A known size name (`small`, `medium`, `large`, …) — downloaded from HF
+ /// into the HF cache on first use.
+ /// - An absolute path (`/path/to/ggml-small.bin`) — loaded directly.
+ pub async fn transcribe(model_spec: &str, audio: &[u8]) -> Result {
+ let model_spec = model_spec.to_owned();
+ let audio = audio.to_vec();
+
+ // Whisper inference is CPU-bound and blocking — run on a thread pool.
+ tokio::task::spawn_blocking(move || transcribe_blocking(&model_spec, &audio))
+ .await
+ .map_err(|e| WhisperError::Transcription(e.to_string()))?
+ }
+
+ fn transcribe_blocking(model_spec: &str, audio: &[u8]) -> Result {
+ let ctx = get_or_load_context(model_spec)?;
+
+ let mut state = ctx
+ .create_state()
+ .map_err(|e| WhisperError::State(e.to_string()))?;
+
+ let samples = decode_to_f32(audio)?;
+
+ let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 1 });
+ params.set_language(Some("auto"));
+ params.set_print_progress(false);
+ params.set_print_realtime(false);
+ params.set_print_timestamps(false);
+
+ state
+ .full(params, &samples)
+ .map_err(|e| WhisperError::Transcription(e.to_string()))?;
+
+ let n = state.full_n_segments();
+ let mut parts = Vec::with_capacity(n as usize);
+ for i in 0..n {
+ if let Some(segment) = state.get_segment(i) {
+ if let Ok(text) = segment.to_str() {
+ let trimmed = text.trim();
+ if !trimmed.is_empty() {
+ parts.push(trimmed.to_owned());
+ }
+ }
+ }
+ }
+
+ Ok(parts.join(" "))
+ }
+
+ /// Return the cached context, loading it first if necessary.
+ fn get_or_load_context(model_spec: &str) -> Result<&'static WhisperContext, WhisperError> {
+ if let Some((_, ctx)) = CONTEXT.get() {
+ return Ok(ctx);
+ }
+
+ let model_path = resolve_model_path(model_spec)?;
+
+ tracing::info!(model_path = %model_path, "loading local Whisper model");
+
+ let params = WhisperContextParameters::default();
+ let ctx = WhisperContext::new_with_params(&model_path, params)
+ .map_err(|e| WhisperError::Load(e.to_string()))?;
+
+ let _ = CONTEXT.set((model_spec.to_owned(), ctx));
+
+ tracing::info!(model_path = %model_path, "Whisper model loaded and cached");
+
+ Ok(&CONTEXT.get().unwrap().1)
+ }
+
+ /// Resolve a model spec to an absolute path on disk, downloading via hf-hub if needed.
+ fn resolve_model_path(spec: &str) -> Result {
+ // Absolute path — use directly.
+ if spec.starts_with('/') {
+ if std::path::Path::new(spec).exists() {
+ return Ok(spec.to_owned());
+ }
+ return Err(WhisperError::ModelNotFound(format!(
+ "model file not found: {spec}"
+ )));
+ }
+
+ // Known size name — fetch via hf-hub (uses HF_HOME cache, downloads if missing).
+ let filename = KNOWN_SIZES
+ .iter()
+ .find(|(name, _)| *name == spec)
+ .map(|(_, file)| *file)
+ .ok_or_else(|| {
+ WhisperError::ModelNotFound(format!(
+ "unknown model size '{spec}'; use one of: {}",
+ KNOWN_SIZES
+ .iter()
+ .map(|(n, _)| *n)
+ .collect::>()
+ .join(", ")
+ ))
+ })?;
+
+ tracing::info!(model = %spec, filename = %filename, "fetching Whisper model via hf-hub");
+
+ let api = Api::new().map_err(|e| WhisperError::HfHub(e.to_string()))?;
+ let repo = api.model("ggerganov/whisper.cpp".to_owned());
+ let path = repo
+ .get(filename)
+ .map_err(|e| WhisperError::HfHub(e.to_string()))?;
+
+ Ok(path.to_string_lossy().to_string())
+ }
+
+ /// Decode arbitrary audio bytes to 16 kHz mono f32 samples for Whisper.
+ ///
+ /// Ogg/Opus (Telegram voice messages) is handled directly via the `ogg` +
+ /// `opus` crates. Everything else falls through to symphonia.
+ fn decode_to_f32(audio: &[u8]) -> Result, WhisperError> {
+ if is_ogg_opus(audio) {
+ return decode_ogg_opus(audio);
+ }
+
+ use symphonia::core::codecs::DecoderOptions;
+ use symphonia::core::formats::FormatOptions;
+ use symphonia::core::io::MediaSourceStream;
+ use symphonia::core::meta::MetadataOptions;
+ use symphonia::core::probe::Hint;
+
+ let cursor = std::io::Cursor::new(audio.to_vec());
+ let mss = MediaSourceStream::new(Box::new(cursor), Default::default());
+
+ let probed = symphonia::default::get_probe()
+ .format(
+ &Hint::new(),
+ mss,
+ &FormatOptions::default(),
+ &MetadataOptions::default(),
+ )
+ .map_err(|e| WhisperError::Decode(e.to_string()))?;
+
+ let mut format = probed.format;
+ let track = format
+ .tracks()
+ .iter()
+ .find(|t| t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL)
+ .ok_or_else(|| WhisperError::Decode("no audio track found".into()))?
+ .clone();
+
+ let mut decoder = symphonia::default::get_codecs()
+ .make(&track.codec_params, &DecoderOptions::default())
+ .map_err(|e| WhisperError::Decode(e.to_string()))?;
+
+ let track_id = track.id;
+ let sample_rate = track.codec_params.sample_rate.unwrap_or(16000);
+ let channels = track.codec_params.channels.map(|c| c.count()).unwrap_or(1);
+
+ let mut raw_samples: Vec = Vec::new();
+
+ loop {
+ let packet = match format.next_packet() {
+ Ok(p) => p,
+ Err(symphonia::core::errors::Error::IoError(_)) => break,
+ Err(symphonia::core::errors::Error::ResetRequired) => break,
+ Err(e) => return Err(WhisperError::Decode(e.to_string())),
+ };
+
+ if packet.track_id() != track_id {
+ continue;
+ }
+
+ let decoded = decoder
+ .decode(&packet)
+ .map_err(|e| WhisperError::Decode(e.to_string()))?;
+
+ // Convert to f32 mono using a sample-converting audio buffer.
+ use symphonia::core::audio::{AudioBuffer, Signal as _};
+
+ let mut f32_buf: AudioBuffer =
+ AudioBuffer::new(decoded.capacity() as u64, decoded.spec().clone());
+ decoded.convert(&mut f32_buf);
+
+ // Mix down to mono.
+ let frames = f32_buf.frames();
+ for frame in 0..frames {
+ let mut sum = 0f32;
+ for ch in 0..channels {
+ sum += f32_buf.chan(ch)[frame];
+ }
+ raw_samples.push(sum / channels as f32);
+ }
+ }
+
+ // Resample to 16 kHz if needed.
+ if sample_rate != 16000 {
+ raw_samples = resample(raw_samples, sample_rate, 16000);
+ }
+
+ Ok(raw_samples)
+ }
+
+ /// Check if the audio is an Ogg container with an Opus stream.
+ fn is_ogg_opus(audio: &[u8]) -> bool {
+ // OggS capture pattern at offset 0, and OpusHead magic at offset 28
+ // (first packet of the first logical stream).
+ audio.starts_with(b"OggS") && audio.len() > 36 && &audio[28..36] == b"OpusHead"
+ }
+
+ /// Decode Ogg/Opus audio to 16 kHz mono f32 samples.
+ fn decode_ogg_opus(audio: &[u8]) -> Result, WhisperError> {
+ use ogg::reading::PacketReader;
+
+ let cursor = std::io::Cursor::new(audio);
+ let mut reader = PacketReader::new(cursor);
+
+ // Skip the OpusHead and OpusTags header packets.
+ let mut header_packets = 0;
+ let mut decoder: Option = None;
+ let mut sample_rate = 48000u32;
+ let mut channels = 1usize;
+ let mut samples: Vec = Vec::new();
+
+ while let Ok(Some(packet)) = reader.read_packet() {
+ if header_packets < 2 {
+ if header_packets == 0 {
+ // Parse OpusHead to get channel count and pre-skip.
+ if packet.data.len() >= 11 && &packet.data[0..8] == b"OpusHead" {
+ channels = packet.data[9] as usize;
+ // Output sample rate is always 48000 for libopus.
+ sample_rate = 48000;
+ }
+ decoder = Some(
+ opus::Decoder::new(
+ sample_rate,
+ if channels == 2 {
+ opus::Channels::Stereo
+ } else {
+ opus::Channels::Mono
+ },
+ )
+ .map_err(|e| WhisperError::Decode(e.to_string()))?,
+ );
+ }
+ header_packets += 1;
+ continue;
+ }
+
+ let dec = decoder.as_mut().unwrap();
+ // Max Opus frame: 120ms at 48kHz = 5760 samples per channel.
+ let max_samples = 5760 * channels;
+ let mut pcm = vec![0f32; max_samples];
+ let n = dec
+ .decode_float(&packet.data, &mut pcm, false)
+ .map_err(|e| WhisperError::Decode(e.to_string()))?;
+
+ // Mix down to mono.
+ if channels == 1 {
+ samples.extend_from_slice(&pcm[..n]);
+ } else {
+ for frame in 0..n {
+ let mut sum = 0f32;
+ for ch in 0..channels {
+ sum += pcm[frame * channels + ch];
+ }
+ samples.push(sum / channels as f32);
+ }
+ }
+ }
+
+ // Resample from 48 kHz to 16 kHz.
+ Ok(resample(samples, sample_rate, 16000))
+ }
+
+ /// Simple linear resampler (good enough for speech; not for music).
+ fn resample(samples: Vec, from_hz: u32, to_hz: u32) -> Vec {
+ if from_hz == to_hz {
+ return samples;
+ }
+ let ratio = from_hz as f64 / to_hz as f64;
+ let out_len = (samples.len() as f64 / ratio) as usize;
+ let mut out = Vec::with_capacity(out_len);
+ for i in 0..out_len {
+ let pos = i as f64 * ratio;
+ let idx = pos as usize;
+ let frac = (pos - idx as f64) as f32;
+ let a = samples.get(idx).copied().unwrap_or(0.0);
+ let b = samples.get(idx + 1).copied().unwrap_or(0.0);
+ out.push(a + frac * (b - a));
+ }
+ out
+ }
+}
diff --git a/src/tools.rs b/src/tools.rs
index d4caa2c69..3e3bb3dcb 100644
--- a/src/tools.rs
+++ b/src/tools.rs
@@ -41,6 +41,7 @@ pub mod set_status;
pub mod shell;
pub mod skip;
pub mod spawn_worker;
+pub mod transcribe_audio;
pub mod web_search;
pub use branch_tool::{BranchArgs, BranchError, BranchOutput, BranchTool};
@@ -76,10 +77,14 @@ pub use set_status::{SetStatusArgs, SetStatusError, SetStatusOutput, SetStatusTo
pub use shell::{ShellArgs, ShellError, ShellOutput, ShellResult, ShellTool};
pub use skip::{SkipArgs, SkipError, SkipFlag, SkipOutput, SkipTool, new_skip_flag};
pub use spawn_worker::{SpawnWorkerArgs, SpawnWorkerError, SpawnWorkerOutput, SpawnWorkerTool};
+pub use transcribe_audio::{
+ TranscribeAudioArgs, TranscribeAudioError, TranscribeAudioOutput, TranscribeAudioTool,
+};
pub use web_search::{SearchResult, WebSearchArgs, WebSearchError, WebSearchOutput, WebSearchTool};
use crate::agent::channel::ChannelState;
use crate::config::BrowserConfig;
+use crate::llm::manager::LlmManager;
use crate::memory::MemorySearch;
use crate::{AgentId, ChannelId, OutboundResponse, ProcessEvent, WorkerId};
use rig::tool::Tool as _;
@@ -272,6 +277,9 @@ pub fn create_worker_tool_server(
workspace: PathBuf,
instance_dir: PathBuf,
mcp_tools: Vec,
+ voice_model: String,
+ llm_manager: Arc,
+ http: reqwest::Client,
) -> ToolServerHandle {
let mut server = ToolServer::new()
.tool(ShellTool::new(instance_dir.clone(), workspace.clone()))
@@ -281,6 +289,14 @@ pub fn create_worker_tool_server(
agent_id, worker_id, channel_id, event_tx,
));
+ if !voice_model.is_empty() {
+ server = server.tool(TranscribeAudioTool::new(
+ voice_model,
+ llm_manager,
+ http,
+ ));
+ }
+
if browser_config.enabled {
server = server.tool(BrowserTool::new(browser_config, screenshot_dir));
}
diff --git a/src/tools/transcribe_audio.rs b/src/tools/transcribe_audio.rs
new file mode 100644
index 000000000..f9b1194a8
--- /dev/null
+++ b/src/tools/transcribe_audio.rs
@@ -0,0 +1,117 @@
+//! Transcribe audio tool for workers.
+//!
+//! Allows workers to transcribe audio files using whatever STT backend is
+//! configured in `routing.voice` — local Whisper or an HTTP provider.
+
+use std::sync::Arc;
+
+use rig::completion::ToolDefinition;
+use rig::tool::Tool;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use crate::llm::manager::LlmManager;
+
+/// Tool for transcribing audio files to text.
+#[derive(Clone)]
+pub struct TranscribeAudioTool {
+ /// The configured voice model spec (full `routing.voice` value).
+ voice_model: String,
+ llm_manager: Arc,
+ http: reqwest::Client,
+}
+
+impl TranscribeAudioTool {
+ /// Create a new transcribe audio tool.
+ pub fn new(
+ voice_model: impl Into,
+ llm_manager: Arc,
+ http: reqwest::Client,
+ ) -> Self {
+ Self {
+ voice_model: voice_model.into(),
+ llm_manager,
+ http,
+ }
+ }
+}
+
+/// Error type for transcribe audio tool.
+#[derive(Debug, thiserror::Error)]
+#[error("Audio transcription failed: {0}")]
+pub struct TranscribeAudioError(String);
+
+/// Arguments for transcribe audio tool.
+#[derive(Debug, Deserialize, JsonSchema)]
+pub struct TranscribeAudioArgs {
+ /// Path to the audio file to transcribe (absolute or relative to the workspace).
+ /// Supports ogg, opus, mp3, flac, wav, m4a.
+ pub path: String,
+}
+
+/// Output from transcribe audio tool.
+#[derive(Debug, Serialize)]
+pub struct TranscribeAudioOutput {
+ /// The transcribed text.
+ pub transcript: String,
+}
+
+impl Tool for TranscribeAudioTool {
+ const NAME: &'static str = "transcribe_audio";
+
+ type Error = TranscribeAudioError;
+ type Args = TranscribeAudioArgs;
+ type Output = TranscribeAudioOutput;
+
+ async fn definition(&self, _prompt: String) -> ToolDefinition {
+ ToolDefinition {
+ name: Self::NAME.to_string(),
+ description: crate::prompts::text::get("tools/transcribe_audio").to_string(),
+ parameters: serde_json::json!({
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "Path to the audio file to transcribe (absolute or relative to the workspace). Supports ogg, opus, mp3, flac, wav, m4a."
+ }
+ },
+ "required": ["path"]
+ }),
+ }
+ }
+
+ async fn call(&self, args: Self::Args) -> Result {
+ let audio = tokio::fs::read(&args.path)
+ .await
+ .map_err(|e| TranscribeAudioError(format!("failed to read {}: {}", args.path, e)))?;
+
+ // Infer mime type from file extension for the HTTP provider path.
+ let mime_type = mime_from_path(&args.path);
+
+ let transcript =
+ crate::stt::transcribe_bytes(&self.voice_model, &audio, mime_type, &self.llm_manager, &self.http)
+ .await
+ .map_err(|e| TranscribeAudioError(e.to_string()))?;
+
+ Ok(TranscribeAudioOutput { transcript })
+ }
+}
+
+/// Infer a MIME type string from a file path extension.
+fn mime_from_path(path: &str) -> &'static str {
+ match path
+ .rsplit('.')
+ .next()
+ .unwrap_or_default()
+ .to_lowercase()
+ .as_str()
+ {
+ "mp3" => "audio/mpeg",
+ "wav" => "audio/wav",
+ "flac" => "audio/flac",
+ "aac" => "audio/aac",
+ "m4a" | "mp4" => "audio/mp4",
+ "opus" => "audio/opus",
+ _ => "audio/ogg",
+ }
+}