KittenML · rahuldshetty · Feb 28, 2026 · Mar 1, 2026
diff --git a/.gitignore b/.gitignore
@@ -241,4 +241,7 @@ marimo/_lsp/
 __marimo__/
 
 # Streamlit
-.streamlit/secrets.toml
+.streamlit/secrets.toml
+
+
+kitten-tts-mini-0.8/
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "kittentts-rs/crates/espeak-ng-sys/espeak-ng"]
+	path = kittentts-rs/crates/espeak-ng-sys/espeak-ng
+	url = https://github.com/espeak-ng/espeak-ng
diff --git a/kittentts-rs/.gitignore b/kittentts-rs/.gitignore
@@ -0,0 +1,7 @@
+/target/
+Cargo.lock
+espeak-ng.dll
+*.wav
+*.onnx
+*.npz
+.DS_Store
diff --git a/kittentts-rs/Cargo.toml b/kittentts-rs/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "kittentts-rs"
+version = "0.1.0"
+edition = "2021"
+description = "Ultra-lightweight text-to-speech inference in Rust — port of KittenTTS"
+license = "MIT"
+
+[dependencies]
+ort = { version = "2.0.0-rc.11", features = ["ndarray"] }
+ndarray = "0.16"
+ndarray-npy = { version = "0.9", features = ["npz"] }
+hound = "3.5"
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+regex = "1.10"
+clap = { version = "4.4", features = ["derive"] }
+anyhow = "1.0"
+once_cell = "1.19"
+fancy-regex = "0.13"
+espeak-ng-sys = { path = "crates/espeak-ng-sys" }
diff --git a/kittentts-rs/README.md b/kittentts-rs/README.md
@@ -0,0 +1,43 @@
+# KittenTTS-RS
+
+Ultra-lightweight text-to-speech inference in Rust using ONNX Runtime and eSpeak-NG.
+
+This is a Rust port of the KittenTTS project.
+
+## Features
+
+- **Fast & Efficient**: Low-latency synthesis via ONNX Runtime (`ort`).
+- **High Quality**: Accurate pronunciation using IPA phonemes via eSpeak-NG.
+- **Self-Contained**: eSpeak-NG is built-in as a submodule—no system installation required.
+- **Configurable**: Multiple voices, speed control, and text preprocessing.
+
+## Prerequisites
+
+- **Rust**: [Install Rust](https://www.rust-lang.org/tools/install)
+- **ONNX Model**: A directory containing `config.json`, `.onnx` model, and `voices.npz`.
+
+## Running Locally
+
+```bash
+# download model weights (onnx)
+git clone https://huggingface.co/KittenML/kitten-tts-mini-0.8
+
+cargo run -- \
+  --text "Hello, I am KittenTTS-RS!" \
+  --model-dir "../kitten-tts-mini-0.8" \
+  --voice "Luna" \
+  --output "output.wav"
+```
+
+### Options
+
+- `--text`: Text to synthesize.
+- `--model-dir`: Directory containing ONNX model files.
+- `--voice`: Voice name (e.g., "Luna", "Leo").
+- `--speed`: Speech speed (default: 1.0).
+- `--output`: Output file path (default: `output.wav`).
+- `--no-clean`: Skip text preprocessing.
+
+---
+
+*Coded with ❤️ by Antigravity.*
diff --git a/kittentts-rs/crates/espeak-ng-sys/Cargo.toml b/kittentts-rs/crates/espeak-ng-sys/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "espeak-ng-sys"
+version = "0.1.0"
+edition = "2021"
+links = "espeak-ng"
+include = [
+    "src/**/*",
+    "espeak-ng/espeak-ng-data/**/*",
+    "espeak-ng/dictsource/**/*",
+    "build.rs",
+    "wrapper.h",
+]
+
+[dependencies]
+# Piper uses these for path resolution
+once_cell = "1.19"
+
+[build-dependencies]
+cmake = "0.1"
+bindgen = "0.70"
+glob = "0.3"
diff --git a/kittentts-rs/crates/espeak-ng-sys/build.rs b/kittentts-rs/crates/espeak-ng-sys/build.rs
@@ -0,0 +1,68 @@
+use std::env;
+use std::path::PathBuf;
+
+fn main() {
+    let dst = cmake::Config::new("espeak-ng")
+        .define("BUILD_SHARED_LIBS", "OFF")
+        .define("USE_LIBPCAUDIO", "OFF")
+        .define("USE_KLATT", "OFF")
+        .define("USE_MBROLA", "OFF")
+        .define("USE_ASYNC", "OFF")
+        .define("ENABLE_TESTS", "OFF")
+        .build();
+
+    let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
+    let cmake_config = if profile == "release" {
+        "Release"
+    } else {
+        "Debug"
+    };
+
+    // espeak-ng builds sub-libraries in specific subdirectories
+    let mut search_paths = vec![
+        dst.join("lib"),
+        dst.join("build/src/ucd-tools"),
+        dst.join("build/src/speechPlayer"),
+    ];
+
+    if cfg!(target_os = "windows") {
+        // MSVC adds a config-specific subdirectory
+        search_paths.push(dst.join("build/src/ucd-tools").join(cmake_config));
+        search_paths.push(dst.join("build/src/speechPlayer").join(cmake_config));
+        // Some versions might put espeak-ng.lib in lib/Debug too
+        search_paths.push(dst.join("lib").join(cmake_config));
+    }
+
+    for path in &search_paths {
+        if path.exists() {
+            println!("cargo:rustc-link-search=native={}", path.display());
+        }
+    }
+
+    println!("cargo:rustc-link-lib=static=espeak-ng");
+    println!("cargo:rustc-link-lib=static=ucd");
+    // Piper links speechPlayer too
+    println!("cargo:rustc-link-lib=static=speechPlayer");
+
+    if cfg!(target_os = "windows") {
+        println!("cargo:rustc-link-lib=dylib=user32");
+        println!("cargo:rustc-link-lib=dylib=shell32");
+    } else if cfg!(target_os = "linux") {
+        println!("cargo:rustc-link-lib=dylib=stdc++");
+    } else if cfg!(target_os = "macos") {
+        println!("cargo:rustc-link-lib=framework=Foundation");
+        println!("cargo:rustc-link-lib=dylib=c++");
+    }
+
+    let bindings = bindgen::Builder::default()
+        .header("wrapper.h")
+        .clang_arg("-Iespeak-ng/src/include")
+        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
+        .generate()
+        .expect("Unable to generate bindings");
+
+    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
+    bindings
+        .write_to_file(out_path.join("bindings.rs"))
+        .expect("Couldn't write bindings!");
+}
diff --git a/kittentts-rs/crates/espeak-ng-sys/espeak-ng b/kittentts-rs/crates/espeak-ng-sys/espeak-ng
diff --git a/kittentts-rs/crates/espeak-ng-sys/src/lib.rs b/kittentts-rs/crates/espeak-ng-sys/src/lib.rs
@@ -0,0 +1,5 @@
+#![allow(non_upper_case_globals)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
diff --git a/kittentts-rs/crates/espeak-ng-sys/wrapper.h b/kittentts-rs/crates/espeak-ng-sys/wrapper.h
@@ -0,0 +1,2 @@
+#include "espeak-ng/speak_lib.h"
+#include "espeak-ng/espeak_ng.h"
diff --git a/kittentts-rs/src/config.rs b/kittentts-rs/src/config.rs
@@ -0,0 +1,31 @@
+use serde::Deserialize;
+use std::collections::HashMap;
+use std::path::Path;
+
+/// Model configuration matching the JSON config.json format.
+#[derive(Debug, Deserialize)]
+pub struct ModelConfig {
+    pub name: String,
+    pub version: String,
+    #[serde(rename = "type")]
+    pub model_type: String,
+    pub model: String,
+    pub voices: String,
+    pub model_file: String,
+    #[serde(default)]
+    pub speed_priors: HashMap<String, f32>,
+    #[serde(default)]
+    pub voice_aliases: HashMap<String, String>,
+}
+
+impl ModelConfig {
+    /// Load configuration from a config.json file.
+    pub fn load(path: &Path) -> anyhow::Result<Self> {
+        let data = std::fs::read_to_string(path)?;
+        let config: ModelConfig = serde_json::from_str(&data)?;
+        if config.model_type != "ONNX1" && config.model_type != "ONNX2" {
+            anyhow::bail!("Unsupported model type: {}", config.model_type);
+        }
+        Ok(config)
+    }
+}
diff --git a/kittentts-rs/src/espeak.rs b/kittentts-rs/src/espeak.rs
@@ -0,0 +1,87 @@
+use espeak_ng_sys as espeak_ng;
+use std::ffi::{CStr, CString};
+use std::ptr;
+
+pub struct Espeak {
+    _private: (),
+}
+
+impl Espeak {
+    pub fn new(data_path: Option<&str>) -> anyhow::Result<Self> {
+        let c_path = data_path.map(|s| CString::new(s).unwrap());
+
+        let path_ptr = c_path.as_ref().map(|s| s.as_ptr()).unwrap_or(ptr::null());
+
+        // output = Retrieval, buflength = 0, options = 0
+        let sample_rate = unsafe {
+            espeak_ng::espeak_Initialize(
+                espeak_ng::espeak_AUDIO_OUTPUT_AUDIO_OUTPUT_RETRIEVAL,
+                0,
+                path_ptr,
+                0,
+            )
+        };
+
+        if sample_rate <= 0 {
+            anyhow::bail!("Failed to initialize eSpeak-NG (returned {})", sample_rate);
+        }
+
+        Ok(Espeak { _private: () })
+    }
+
+    pub fn set_voice(&self, name: &str) -> anyhow::Result<()> {
+        let c_name = CString::new(name)?;
+        let result = unsafe { espeak_ng::espeak_SetVoiceByName(c_name.as_ptr()) };
+        if result == espeak_ng::espeak_ERROR_EE_OK {
+            Ok(())
+        } else {
+            anyhow::bail!("Failed to set eSpeak voice to {}", name)
+        }
+    }
+
+    pub fn text_to_phonemes(&self, text: &str) -> anyhow::Result<String> {
+        let c_text = CString::new(text)?;
+        let mut text_ptr = c_text.as_ptr() as *const std::os::raw::c_char;
+        let mut all_phonemes = String::new();
+
+        while !text_ptr.is_null() && unsafe { *text_ptr != 0 } {
+            let initial_ptr = text_ptr;
+            let mut current_ptr = text_ptr as *const std::os::raw::c_void;
+
+            // textmode 1 = UTF8, phonememode 66 = IPA + Punctuation
+            let result_ptr = unsafe { espeak_ng::espeak_TextToPhonemes(&mut current_ptr, 1, 66) };
+
+            if !result_ptr.is_null() {
+                let c_str = unsafe { CStr::from_ptr(result_ptr) };
+                let phonemes = c_str.to_string_lossy();
+                if !all_phonemes.is_empty()
+                    && !all_phonemes.ends_with(' ')
+                    && !phonemes.starts_with(' ')
+                {
+                    all_phonemes.push(' ');
+                }
+                all_phonemes.push_str(&phonemes);
+            }
+
+            text_ptr = current_ptr as *const std::os::raw::c_char;
+
+            if text_ptr.is_null() || text_ptr == initial_ptr {
+                break;
+            }
+        }
+
+        if all_phonemes.is_empty() {
+            anyhow::bail!("eSpeak failed to generate phonemes");
+        }
+
+        Ok(all_phonemes)
+    }
+}
+
+impl Drop for Espeak {
+    fn drop(&mut self) {
+        unsafe {
+            espeak_ng::espeak_Terminate();
+        }
+    }
+}
diff --git a/kittentts-rs/src/lib.rs b/kittentts-rs/src/lib.rs
@@ -0,0 +1,15 @@
+//! # kittentts-rs
+//!
+//! Ultra-lightweight text-to-speech inference in Rust.
+//!
+//! A Rust port of the [KittenTTS](https://github.com/kittenml/kittentts) Python
+//! package, using `ort` for ONNX Runtime inference and `espeakng` for phonemisation.
+
+pub mod config;
+pub mod espeak;
+pub mod model;
+pub mod preprocess;
+pub mod text_cleaner;
+
+// Re-export the main model type for convenience.
+pub use model::KittenTTSModel;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#include "espeak-ng/speak_lib.h"
		#include "espeak-ng/espeak_ng.h"