Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,8 @@ ARISTECH_SECRET=
AZURE_SUBSCRIPTION_KEY=your_azure_key
AZURE_REGION=your_azure_region

# ElevenLabs Configuration
ELEVENLABS_API_KEY=your_elevenlabs_api_key

# Audio Knife Configuration
AUDIO_KNIFE_ADDRESS=127.0.0.1:8123
1 change: 1 addition & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
## Code Minimalism
- Avoid defensive code unless there is concrete evidence it is necessary.
- Avoid redundant logic and repeated calls; keep only the minimal behavior required for correctness.
- Do not add tests unless explicitly requested by the user.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ members = [
"filter-test",
"services/aristech",
"services/azure",
"services/elevenlabs",
"services/google-transcribe",
"services/openai-dialog",
"services/playback",
Expand All @@ -27,6 +28,7 @@ openai-dialog = { path = "services/openai-dialog" }
azure = { workspace = true }
azure-speech = { workspace = true }
aristech = { workspace = true }
elevenlabs = { workspace = true }

# basic

Expand Down Expand Up @@ -84,6 +86,7 @@ context-switch-core = { path = "core" }
azure = { path = "services/azure" }
playback = { path = "services/playback" }
aristech = { path = "services/aristech" }
elevenlabs = { path = "services/elevenlabs" }

anyhow = "1.0.102"
derive_more = { version = "2.1.1", features = ["full"] }
Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Context Switch is a Rust-based framework for building real-time conversational a
- Pluggable service architecture
- Integration with:
- Azure Speech Services (transcription, translation, synthesis)
- ElevenLabs realtime speech-to-text (Scribe v2 Realtime)
- OpenAI dialog services
- Asynchronous processing using Tokio

Expand All @@ -16,6 +17,7 @@ Context Switch is a Rust-based framework for building real-time conversational a
- `core/`: Core functionality and interfaces
- `services/`: Implementation of various service integrations
- `azure/`: Azure Speech Services integration
- `elevenlabs/`: ElevenLabs speech-to-text integration
- `google-transcribe/`: Google Speech-to-Text integration (WIP)
- `openai-dialog/`: OpenAI conversational services integration
- `audio-knife/`: WebSocket server that implements the [mod_audio_fork](https://github.com/questnet/freeswitch-modules/tree/questnet/mod_audio_fork) protocol for real-time audio streaming from telephony systems via [FreeSWITCH](https://signalwire.com/freeswitch). Provides a bridge between audio sources and the Context Switch framework.
Expand Down Expand Up @@ -61,6 +63,9 @@ cargo run --example openai-dialog
# Run Azure transcribe example
cargo run --example azure-transcribe

# Run ElevenLabs transcribe example
cargo run --example elevenlabs-transcribe

# Run Azure synthesize example
cargo run --example azure-synthesize
```
Expand Down Expand Up @@ -90,6 +95,9 @@ OPENAI_REALTIME_API_MODEL=gpt-4o-mini-realtime-preview
AZURE_SUBSCRIPTION_KEY=your_azure_key
AZURE_REGION=your_azure_region

# ElevenLabs Configuration
ELEVENLABS_API_KEY=your_elevenlabs_key

# Audio Knife Configuration
AUDIO_KNIFE_ADDRESS=127.0.0.1:8123
```
Expand Down
4 changes: 3 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ derive_more = { workspace = true }

serde = { workspace = true }
# For function calling parameters.
serde_json = { workspace = true }
serde_json = { workspace = true }
isolang = "2.4.0"
oxilangtag = "0.1.5"
112 changes: 112 additions & 0 deletions core/src/language.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
use std::fmt;

use isolang::Language;
use oxilangtag::LanguageTag;

#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LanguageCodeError {
InvalidBcp47Tag { tag: String, message: String },
UnsupportedLanguage { language: String },
}

impl fmt::Display for LanguageCodeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LanguageCodeError::InvalidBcp47Tag { tag, message } => {
write!(f, "Invalid BCP 47 tag '{tag}': {message}")
}
LanguageCodeError::UnsupportedLanguage { language } => {
write!(f, "Unsupported language subtag '{language}'")
}
}
}
}

impl std::error::Error for LanguageCodeError {}

/// Converts a BCP 47 language tag into its ISO 639-3 language code.
///
/// The conversion uses the primary language subtag only and ignores script, region, variant,
/// and extension subtags.
pub fn bcp47_to_iso639_3(tag: &str) -> Result<&'static str, LanguageCodeError> {
let parsed = LanguageTag::parse(tag).map_err(|error| LanguageCodeError::InvalidBcp47Tag {
tag: tag.to_string(),
message: error.to_string(),
})?;

let primary_language = parsed.primary_language();
let language = match primary_language.len() {
2 => Language::from_639_1(primary_language),
3 => Language::from_639_3(primary_language),
_ => None,
};

language
.map(|x| x.to_639_3())
.ok_or_else(|| LanguageCodeError::UnsupportedLanguage {
language: primary_language.to_string(),
})
}

/// Converts an ISO 639 language code into a BCP 47 language tag.
///
/// The conversion returns a primary language tag only. If a matching ISO 639-1 code exists,
/// that 2-letter code is preferred (for example `eng` -> `en`). Otherwise the original ISO
/// 639-3 code is used as the BCP 47 primary language subtag.
///
/// Supports ISO 639-1 (2-letter) and ISO 639-3 (3-letter) input codes.
pub fn iso639_to_bcp47(code: &str) -> Result<String, LanguageCodeError> {
let language = match code.len() {
2 => Language::from_639_1(code),
3 => Language::from_639_3(code),
_ => None,
}
.ok_or_else(|| LanguageCodeError::UnsupportedLanguage {
language: code.to_string(),
})?;

Ok(language
.to_639_1()
.map(str::to_string)
.unwrap_or_else(|| language.to_639_3().to_string()))
}

/// Converts an ISO 639-3 language code into a BCP 47 language tag.
pub fn iso639_3_to_bcp47(code: &str) -> Result<String, LanguageCodeError> {
iso639_to_bcp47(code)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn bcp47_to_iso639_3_for_primary_language_tags() {
assert_eq!(bcp47_to_iso639_3("en").unwrap(), "eng");
assert_eq!(bcp47_to_iso639_3("de").unwrap(), "deu");
assert_eq!(bcp47_to_iso639_3("fr").unwrap(), "fra");
}

#[test]
fn bcp47_to_iso639_3_ignores_non_primary_subtags() {
assert_eq!(bcp47_to_iso639_3("en-US").unwrap(), "eng");
assert_eq!(bcp47_to_iso639_3("zh-Hant-TW").unwrap(), "zho");
}

#[test]
fn bcp47_to_iso639_3_rejects_malformed_tags() {
let err = bcp47_to_iso639_3("en--US").unwrap_err();
assert!(matches!(err, LanguageCodeError::InvalidBcp47Tag { .. }));
}

#[test]
fn bcp47_to_iso639_3_rejects_unsupported_primary_language() {
let err = bcp47_to_iso639_3("qaa").unwrap_err();
assert_eq!(
err,
LanguageCodeError::UnsupportedLanguage {
language: "qaa".to_string(),
}
);
}
}
1 change: 1 addition & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod billing_collector;
mod billing_context;
pub mod conversation;
mod duration;
pub mod language;
mod protocol;
mod registry;
pub mod service;
Expand Down
161 changes: 161 additions & 0 deletions examples/elevenlabs-transcribe.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
use std::{env, path::Path, time::Duration};

use anyhow::{Context, Result, bail};
use cpal::traits::{DeviceTrait, HostTrait, StreamTrait};
use rodio::DeviceSinkBuilder;
use tokio::{
select,
sync::mpsc::{channel, unbounded_channel},
};

use context_switch::{
AudioConsumer, InputModality, OutputModality, services::ElevenLabsTranscribe,
};
use context_switch_core::{
AudioFormat, AudioFrame, audio,
conversation::{Conversation, Input},
service::Service,
};

const LANGUAGE: &str = "de-DE";

#[tokio::main]
async fn main() -> Result<()> {
dotenvy::dotenv_override()?;
tracing_subscriber::fmt::init();

let mut args = env::args();
match args.len() {
1 => recognize_from_microphone().await?,
2 => recognize_from_wav(Path::new(&args.nth(1).unwrap())).await?,
_ => bail!("Invalid number of arguments, expect zero or one"),
}

Ok(())
}

async fn recognize_from_wav(file: &Path) -> Result<()> {
let format = AudioFormat {
channels: 1,
sample_rate: 16_000,
};

let frames = playback::audio_file_to_frames(file, format)?;
if frames.is_empty() {
bail!("No frames in the audio file");
}

let (producer, input_consumer) = format.new_channel();
for frame in frames {
producer.produce(frame)?;
}

recognize(format, input_consumer).await
}

async fn recognize_from_microphone() -> Result<()> {
// Keep an output sink alive so Bluetooth headsets (e.g. AirPods) can switch to a
// bidirectional profile. Without this, some devices report an input stream of zeros.
let _output_sink = match DeviceSinkBuilder::open_default_sink() {
Ok(sink) => {
println!("Opened default output sink for headset profile");
Some(sink)
}
Err(e) => {
println!("Warning: Failed to open default output sink: {e}");
None
}
};

let host = cpal::default_host();
let device = host
.default_input_device()
.context("Failed to get default input device")?;
let config = device
.default_input_config()
.expect("Failed to get default input config");

println!("config: {config:?}");

let channels = config.channels();
let sample_rate = config.sample_rate();
let format = AudioFormat::new(channels, sample_rate);

let (producer, input_consumer) = format.new_channel();

let stream = device
.build_input_stream(
&config.into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let samples = audio::into_i16(data);

let frame = AudioFrame { format, samples };
if producer.produce(frame).is_err() {
println!("Failed to send audio data");
}
},
move |err| {
eprintln!("Error occurred on stream: {err}");
},
Some(Duration::from_secs(1)),
)
.expect("Failed to build input stream");

stream.play().expect("Failed to play stream");

recognize(format, input_consumer).await
}

async fn recognize(format: AudioFormat, mut input_consumer: AudioConsumer) -> Result<()> {
let params = elevenlabs::transcribe::Params {
api_key: env::var("ELEVENLABS_API_KEY").context("ELEVENLABS_API_KEY undefined")?,
model: None,
host: None,
language: Some(LANGUAGE.to_owned()),
include_language_detection: Some(false),
vad_silence_threshold_secs: None,
vad_threshold: None,
min_speech_duration_ms: None,
min_silence_duration_ms: None,
previous_text: None,
};

let (output_producer, mut output_consumer) = unbounded_channel();
let (conv_input_producer, conv_input_consumer) = channel(16_384);

let service = ElevenLabsTranscribe;
let mut conversation = service.conversation(
params,
Conversation::new(
InputModality::Audio { format },
[OutputModality::Text, OutputModality::InterimText],
conv_input_consumer,
output_producer,
),
);

loop {
select! {
result = &mut conversation => {
result.context("Conversation stopped")?;
break;
}
input = input_consumer.consume() => {
if let Some(frame) = input {
conv_input_producer.try_send(Input::Audio { frame })?;
} else {
break;
}
}
output = output_consumer.recv() => {
if let Some(output) = output {
println!("{output:?}");
} else {
break;
}
}
}
}

Ok(())
}
18 changes: 18 additions & 0 deletions services/elevenlabs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[package]
name = "elevenlabs"
version = "0.1.0"
edition = "2024"

[dependencies]
context-switch-core = { workspace = true }

anyhow = { workspace = true }
async-trait = { workspace = true }
base64 = { workspace = true }
futures = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt", "time"] }
tokio-tungstenite = { version = "0.28.0", features = ["connect", "native-tls"] }
tracing = { workspace = true }
url = { workspace = true }
3 changes: 3 additions & 0 deletions services/elevenlabs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod transcribe;

pub use transcribe::ElevenLabsTranscribe;
Loading
Loading