pragmatrix · pragmatrix · Apr 2, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/.env.example b/.env.example
@@ -11,5 +11,8 @@ ARISTECH_SECRET=
 AZURE_SUBSCRIPTION_KEY=your_azure_key
 AZURE_REGION=your_azure_region
 
+# ElevenLabs Configuration
+ELEVENLABS_API_KEY=your_elevenlabs_api_key
+
 # Audio Knife Configuration
 AUDIO_KNIFE_ADDRESS=127.0.0.1:8123
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -11,3 +11,4 @@
 ## Code Minimalism
 - Avoid defensive code unless there is concrete evidence it is necessary.
 - Avoid redundant logic and repeated calls; keep only the minimal behavior required for correctness.
+- Do not add tests unless explicitly requested by the user.
diff --git a/Cargo.toml b/Cargo.toml
@@ -12,6 +12,7 @@ members = [
     "filter-test",
     "services/aristech",
     "services/azure",
+    "services/elevenlabs",
     "services/google-transcribe", 
     "services/openai-dialog", 
     "services/playback",
@@ -27,6 +28,7 @@ openai-dialog = { path = "services/openai-dialog" }
 azure = { workspace = true }
 azure-speech = { workspace = true }
 aristech = { workspace = true }
+elevenlabs = { workspace = true }
 
 # basic
 
@@ -84,6 +86,7 @@ context-switch-core = { path = "core" }
 azure = { path = "services/azure" }
 playback = { path = "services/playback" }
 aristech = { path = "services/aristech" }
+elevenlabs = { path = "services/elevenlabs" }
 
 anyhow = "1.0.102"
 derive_more = { version = "2.1.1", features = ["full"] }

diff --git a/README.md b/README.md
@@ -8,6 +8,7 @@ Context Switch is a Rust-based framework for building real-time conversational a
 - Pluggable service architecture
 - Integration with:
   - Azure Speech Services (transcription, translation, synthesis)
+  - ElevenLabs realtime speech-to-text (Scribe v2 Realtime)
   - OpenAI dialog services
 - Asynchronous processing using Tokio
 
@@ -16,6 +17,7 @@ Context Switch is a Rust-based framework for building real-time conversational a
 - `core/`: Core functionality and interfaces
 - `services/`: Implementation of various service integrations
   - `azure/`: Azure Speech Services integration
+  - `elevenlabs/`: ElevenLabs speech-to-text integration
   - `google-transcribe/`: Google Speech-to-Text integration (WIP)
   - `openai-dialog/`: OpenAI conversational services integration
 - `audio-knife/`: WebSocket server that implements the [mod_audio_fork](https://github.com/questnet/freeswitch-modules/tree/questnet/mod_audio_fork) protocol for real-time audio streaming from telephony systems via [FreeSWITCH](https://signalwire.com/freeswitch). Provides a bridge between audio sources and the Context Switch framework.
@@ -61,6 +63,9 @@ cargo run --example openai-dialog
 # Run Azure transcribe example
 cargo run --example azure-transcribe
 
+# Run ElevenLabs transcribe example
+cargo run --example elevenlabs-transcribe
+
 # Run Azure synthesize example
 cargo run --example azure-synthesize
 ```
@@ -90,6 +95,9 @@ OPENAI_REALTIME_API_MODEL=gpt-4o-mini-realtime-preview
 AZURE_SUBSCRIPTION_KEY=your_azure_key
 AZURE_REGION=your_azure_region
 
+# ElevenLabs Configuration
+ELEVENLABS_API_KEY=your_elevenlabs_key
+
 # Audio Knife Configuration
 AUDIO_KNIFE_ADDRESS=127.0.0.1:8123
 ```

diff --git a/core/Cargo.toml b/core/Cargo.toml
@@ -11,4 +11,6 @@ derive_more = { workspace = true }
 
 serde = { workspace = true }
 # For function calling parameters.
-serde_json = { workspace = true }
+serde_json = { workspace = true }
+isolang = "2.4.0"
+oxilangtag = "0.1.5"
diff --git a/core/src/language.rs b/core/src/language.rs
@@ -0,0 +1,112 @@
+use std::fmt;
+
+use isolang::Language;
+use oxilangtag::LanguageTag;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum LanguageCodeError {
+    InvalidBcp47Tag { tag: String, message: String },
+    UnsupportedLanguage { language: String },
+}
+
+impl fmt::Display for LanguageCodeError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            LanguageCodeError::InvalidBcp47Tag { tag, message } => {
+                write!(f, "Invalid BCP 47 tag '{tag}': {message}")
+            }
+            LanguageCodeError::UnsupportedLanguage { language } => {
+                write!(f, "Unsupported language subtag '{language}'")
+            }
+        }
+    }
+}
+
+impl std::error::Error for LanguageCodeError {}
+
+/// Converts a BCP 47 language tag into its ISO 639-3 language code.
+///
+/// The conversion uses the primary language subtag only and ignores script, region, variant,
+/// and extension subtags.
+pub fn bcp47_to_iso639_3(tag: &str) -> Result<&'static str, LanguageCodeError> {
+    let parsed = LanguageTag::parse(tag).map_err(|error| LanguageCodeError::InvalidBcp47Tag {
+        tag: tag.to_string(),
+        message: error.to_string(),
+    })?;
+
+    let primary_language = parsed.primary_language();
+    let language = match primary_language.len() {
+        2 => Language::from_639_1(primary_language),
+        3 => Language::from_639_3(primary_language),
+        _ => None,
+    };
+
+    language
+        .map(|x| x.to_639_3())
+        .ok_or_else(|| LanguageCodeError::UnsupportedLanguage {
+            language: primary_language.to_string(),
+        })
+}
+
+/// Converts an ISO 639 language code into a BCP 47 language tag.
+///
+/// The conversion returns a primary language tag only. If a matching ISO 639-1 code exists,
+/// that 2-letter code is preferred (for example `eng` -> `en`). Otherwise the original ISO
+/// 639-3 code is used as the BCP 47 primary language subtag.
+///
+/// Supports ISO 639-1 (2-letter) and ISO 639-3 (3-letter) input codes.
+pub fn iso639_to_bcp47(code: &str) -> Result<String, LanguageCodeError> {
+    let language = match code.len() {
+        2 => Language::from_639_1(code),
+        3 => Language::from_639_3(code),
+        _ => None,
+    }
+    .ok_or_else(|| LanguageCodeError::UnsupportedLanguage {
+        language: code.to_string(),
+    })?;
+
+    Ok(language
+        .to_639_1()
+        .map(str::to_string)
+        .unwrap_or_else(|| language.to_639_3().to_string()))
+}
+
+/// Converts an ISO 639-3 language code into a BCP 47 language tag.
+pub fn iso639_3_to_bcp47(code: &str) -> Result<String, LanguageCodeError> {
+    iso639_to_bcp47(code)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn bcp47_to_iso639_3_for_primary_language_tags() {
+        assert_eq!(bcp47_to_iso639_3("en").unwrap(), "eng");
+        assert_eq!(bcp47_to_iso639_3("de").unwrap(), "deu");
+        assert_eq!(bcp47_to_iso639_3("fr").unwrap(), "fra");
+    }
+
+    #[test]
+    fn bcp47_to_iso639_3_ignores_non_primary_subtags() {
+        assert_eq!(bcp47_to_iso639_3("en-US").unwrap(), "eng");
+        assert_eq!(bcp47_to_iso639_3("zh-Hant-TW").unwrap(), "zho");
+    }
+
+    #[test]
+    fn bcp47_to_iso639_3_rejects_malformed_tags() {
+        let err = bcp47_to_iso639_3("en--US").unwrap_err();
+        assert!(matches!(err, LanguageCodeError::InvalidBcp47Tag { .. }));
+    }
+
+    #[test]
+    fn bcp47_to_iso639_3_rejects_unsupported_primary_language() {
+        let err = bcp47_to_iso639_3("qaa").unwrap_err();
+        assert_eq!(
+            err,
+            LanguageCodeError::UnsupportedLanguage {
+                language: "qaa".to_string(),
+            }
+        );
+    }
+}
diff --git a/core/src/lib.rs b/core/src/lib.rs
@@ -3,6 +3,7 @@ pub mod billing_collector;
 mod billing_context;
 pub mod conversation;
 mod duration;
+pub mod language;
 mod protocol;
 mod registry;
 pub mod service;

diff --git a/examples/elevenlabs-transcribe.rs b/examples/elevenlabs-transcribe.rs
@@ -0,0 +1,161 @@
+use std::{env, path::Path, time::Duration};
+
+use anyhow::{Context, Result, bail};
+use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
+use rodio::DeviceSinkBuilder;
+use tokio::{
+    select,
+    sync::mpsc::{channel, unbounded_channel},
+};
+
+use context_switch::{
+    AudioConsumer, InputModality, OutputModality, services::ElevenLabsTranscribe,
+};
+use context_switch_core::{
+    AudioFormat, AudioFrame, audio,
+    conversation::{Conversation, Input},
+    service::Service,
+};
+
+const LANGUAGE: &str = "de-DE";
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    dotenvy::dotenv_override()?;
+    tracing_subscriber::fmt::init();
+
+    let mut args = env::args();
+    match args.len() {
+        1 => recognize_from_microphone().await?,
+        2 => recognize_from_wav(Path::new(&args.nth(1).unwrap())).await?,
+        _ => bail!("Invalid number of arguments, expect zero or one"),
+    }
+
+    Ok(())
+}
+
+async fn recognize_from_wav(file: &Path) -> Result<()> {
+    let format = AudioFormat {
+        channels: 1,
+        sample_rate: 16_000,
+    };
+
+    let frames = playback::audio_file_to_frames(file, format)?;
+    if frames.is_empty() {
+        bail!("No frames in the audio file");
+    }
+
+    let (producer, input_consumer) = format.new_channel();
+    for frame in frames {
+        producer.produce(frame)?;
+    }
+
+    recognize(format, input_consumer).await
+}
+
+async fn recognize_from_microphone() -> Result<()> {
+    // Keep an output sink alive so Bluetooth headsets (e.g. AirPods) can switch to a
+    // bidirectional profile. Without this, some devices report an input stream of zeros.
+    let _output_sink = match DeviceSinkBuilder::open_default_sink() {
+        Ok(sink) => {
+            println!("Opened default output sink for headset profile");
+            Some(sink)
+        }
+        Err(e) => {
+            println!("Warning: Failed to open default output sink: {e}");
+            None
+        }
+    };
+
+    let host = cpal::default_host();
+    let device = host
+        .default_input_device()
+        .context("Failed to get default input device")?;
+    let config = device
+        .default_input_config()
+        .expect("Failed to get default input config");
+
+    println!("config: {config:?}");
+
+    let channels = config.channels();
+    let sample_rate = config.sample_rate();
+    let format = AudioFormat::new(channels, sample_rate);
+
+    let (producer, input_consumer) = format.new_channel();
+
+    let stream = device
+        .build_input_stream(
+            &config.into(),
+            move |data: &[f32], _: &cpal::InputCallbackInfo| {
+                let samples = audio::into_i16(data);
+
+                let frame = AudioFrame { format, samples };
+                if producer.produce(frame).is_err() {
+                    println!("Failed to send audio data");
+                }
+            },
+            move |err| {
+                eprintln!("Error occurred on stream: {err}");
+            },
+            Some(Duration::from_secs(1)),
+        )
+        .expect("Failed to build input stream");
+
+    stream.play().expect("Failed to play stream");
+
+    recognize(format, input_consumer).await
+}
+
+async fn recognize(format: AudioFormat, mut input_consumer: AudioConsumer) -> Result<()> {
+    let params = elevenlabs::transcribe::Params {
+        api_key: env::var("ELEVENLABS_API_KEY").context("ELEVENLABS_API_KEY undefined")?,
+        model: None,
+        host: None,
+        language: Some(LANGUAGE.to_owned()),
+        include_language_detection: Some(false),
+        vad_silence_threshold_secs: None,
+        vad_threshold: None,
+        min_speech_duration_ms: None,
+        min_silence_duration_ms: None,
+        previous_text: None,
+    };
+
+    let (output_producer, mut output_consumer) = unbounded_channel();
+    let (conv_input_producer, conv_input_consumer) = channel(16_384);
+
+    let service = ElevenLabsTranscribe;
+    let mut conversation = service.conversation(
+        params,
+        Conversation::new(
+            InputModality::Audio { format },
+            [OutputModality::Text, OutputModality::InterimText],
+            conv_input_consumer,
+            output_producer,
+        ),
+    );
+
+    loop {
+        select! {
+            result = &mut conversation => {
+                result.context("Conversation stopped")?;
+                break;
+            }
+            input = input_consumer.consume() => {
+                if let Some(frame) = input {
+                    conv_input_producer.try_send(Input::Audio { frame })?;
+                } else {
+                    break;
+                }
+            }
+            output = output_consumer.recv() => {
+                if let Some(output) = output {
+                    println!("{output:?}");
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
diff --git a/services/elevenlabs/Cargo.toml b/services/elevenlabs/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "elevenlabs"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+context-switch-core = { workspace = true }
+
+anyhow = { workspace = true }
+async-trait = { workspace = true }
+base64 = { workspace = true }
+futures = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = ["macros", "rt", "time"] }
+tokio-tungstenite = { version = "0.28.0", features = ["connect", "native-tls"] }
+tracing = { workspace = true }
+url = { workspace = true }
diff --git a/services/elevenlabs/src/lib.rs b/services/elevenlabs/src/lib.rs
@@ -0,0 +1,3 @@
+pub mod transcribe;
+
+pub use transcribe::ElevenLabsTranscribe;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		pub mod transcribe;

		pub use transcribe::ElevenLabsTranscribe;