wavekat · wavekat-eason · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.github/workflows/update-bench.yml b/.github/workflows/update-bench.yml
@@ -0,0 +1,36 @@
+name: Update benchmark table
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'bench/results/**.csv'
+  workflow_dispatch:
+
+permissions:
+  contents: write
+
+jobs:
+  update-bench-table:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Regenerate benchmark table in README.md
+        run: python scripts/update_bench_table.py
+
+      - name: Commit if changed
+        run: |
+          git config user.name  "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          if git diff --quiet README.md; then
+            echo "README.md unchanged — nothing to commit."
+          else
+            git add README.md
+            git commit -m "docs: update benchmark table from bench/results"
+            git push
+          fi
diff --git a/Makefile b/Makefile
@@ -1,10 +1,20 @@
-.PHONY: help check test test-qwen3 test-all fmt clippy doc
+.PHONY: help check ci test test-qwen3 test-all fmt clippy doc bench-rtf bench-rtf-cuda bench-rtf-trt bench-csv bench-csv-cuda bench-csv-trt update-readme
 
 help: ## Show this help
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  \033[36m%-14s\033[0m %s\n", $$1, $$2}'
 
 check: fmt clippy test ## Run fmt, clippy, and test (no features)
 
+ci: ## Run the full GitHub Actions CI check locally
+	cargo fmt --all -- --check
+	cargo clippy --workspace -- -D warnings
+	cargo test --workspace
+	cargo doc --no-deps -p wavekat-tts --all-features
+	cargo test -p wavekat-tts --no-default-features --features ""
+	cargo test -p wavekat-tts --no-default-features --features "qwen3-tts"
+	cargo test -p wavekat-tts --no-default-features --features "cosyvoice"
+	cargo test -p wavekat-tts --no-default-features --features "qwen3-tts,cosyvoice"
+
 fmt: ## Check formatting
 	cargo fmt --all -- --check
 
@@ -18,8 +28,38 @@ test: ## Run tests (no features)
 test-qwen3: ## Run tests with qwen3-tts feature
 	cargo test --features qwen3-tts
 
+test-qwen3-cuda: ## Run tests with qwen3-tts + CUDA
+	cargo test --features "qwen3-tts,cuda"
+
 test-all: ## Run tests with all features
 	cargo test --all-features
 
 doc: ## Build and open docs
 	cargo doc --all-features --no-deps --open
+
+bench-rtf: ## RTF benchmark on CPU (int4)
+	cargo run --release --example bench_rtf --features qwen3-tts
+
+bench-rtf-cuda: ## RTF benchmark on CUDA (int4) — for Azure T4
+	cargo run --release --example bench_rtf --features "qwen3-tts,cuda" -- --provider cuda
+
+bench-rtf-trt: ## RTF benchmark on TensorRT (int4) — for Azure T4
+	cargo run --release --example bench_rtf --features "qwen3-tts,tensorrt" -- --provider tensorrt
+
+bench-csv: ## RTF benchmark on CPU (int4), save CSV to bench/results/
+	@mkdir -p bench/results
+	cargo run --release --example bench_rtf --features qwen3-tts -- \
+		--hardware Standard_NC4as_T4_v3 --csv > bench/results/cpu-int4.csv
+
+bench-csv-cuda: ## RTF benchmark on CUDA T4 (int4), save CSV to bench/results/
+	@mkdir -p bench/results
+	cargo run --release --example bench_rtf --features "qwen3-tts,cuda" -- \
+		--provider cuda --hardware t4 --csv > bench/results/cuda-t4-int4.csv
+
+update-readme: ## Update README benchmark table from bench/results/*.csv
+	python3 scripts/update_bench_table.py
+
+bench-csv-trt: ## RTF benchmark on TensorRT T4 (int4), save CSV to bench/results/
+	@mkdir -p bench/results
+	cargo run --release --example bench_rtf --features "qwen3-tts,tensorrt" -- \
+		--provider tensorrt --hardware t4 --csv > bench/results/trt-t4-int4.csv
diff --git a/README.md b/README.md
@@ -90,14 +90,42 @@ cargo run --example synthesize --features qwen3-tts -- --precision fp32 "Hello"
 cargo run --example synthesize --features qwen3-tts -- --model-dir /path/to/model --output hello.wav "Hello"
 ```
 
+## Performance
+
+<!-- bench:start -->
+| Backend | Precision | Provider | Hardware | RTF short | RTF medium | RTF long | Date |
+|---------|-----------|----------|----------|:-----------:|:-----------:|:-----------:|------|
+| qwen3-tts | int4 | CPU | Standard_NC4as_T4_v3 | 1.98 | 2.04 | 2.34 | 2026-04-07 |
+| qwen3-tts | int4 | CUDA | Standard_NC4as_T4_v3 | **0.78** | **0.85** | 1.07 | 2026-04-07 |
+
+_RTF < 1.0 = faster-than-real-time. Lower is better._  
+_To update: run `make bench-csv-cuda` on target hardware, then commit `bench/results/`._
+<!-- bench:end -->
+
+## Try it on Google Colab
+
+No local GPU needed — run Qwen3-TTS on a free T4 in the browser:
+
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1qtc6lAk9RsAsvF1ojft0ACO2-PzFX4pi?usp=sharing)
+
 ## Feature flags
 
+### Backends
+
 | Flag | Default | Description |
 |------|---------|-------------|
 | `qwen3-tts` | off | Qwen3-TTS local ONNX inference |
 | `cosyvoice` | off | CosyVoice local ONNX inference (planned) |
 
-WAV I/O (`write_wav` / `from_wav`) is provided by `wavekat-core` via its `wav` feature flag.
+### Execution providers
+
+Composable with any backend flag. Selects the inference hardware at build time.
+
+| Flag | Description |
+|------|-------------|
+| `cuda` | NVIDIA CUDA GPU |
+| `tensorrt` | NVIDIA TensorRT |
+| `coreml` | Apple CoreML (macOS) |
 
 ## License
 

diff --git a/bench/results/.gitkeep b/bench/results/.gitkeep
diff --git a/bench/results/cpu-int4.csv b/bench/results/cpu-int4.csv
@@ -0,0 +1,16 @@
+backend,precision,provider,hardware,date,sample,chars,iteration,synth_secs,audio_secs,rtf
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,short,66,1,7.882000,4.040000,1.950000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,short,66,2,7.036000,3.580000,1.967000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,short,66,3,7.033000,3.560000,1.974000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,short,66,4,9.177000,4.440000,2.065000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,short,66,5,8.204000,4.210000,1.949000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,medium,243,1,46.086000,22.290000,2.068000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,medium,243,2,45.984000,22.530000,2.041000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,medium,243,3,37.957000,18.840000,2.015000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,medium,243,4,38.513000,18.890000,2.039000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,medium,243,5,41.841000,20.650000,2.026000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,long,655,1,85.839000,37.480000,2.290000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,long,655,2,122.740000,51.160000,2.399000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,long,655,3,154.986000,61.930000,2.503000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,long,655,4,27.911000,12.940000,2.157000
+qwen3-tts,int4,cpu,Standard_NC4as_T4_v3,2026-04-07,long,655,5,119.948000,50.500000,2.375000
diff --git a/bench/results/cuda-t4-int4.csv b/bench/results/cuda-t4-int4.csv
@@ -0,0 +1,16 @@
+backend,precision,provider,hardware,date,sample,chars,iteration,synth_secs,audio_secs,rtf
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,short,66,1,3.181000,4.057792,0.783924
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,short,66,2,3.790286,4.806042,0.788650
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,short,66,3,3.114891,3.978583,0.782915
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,short,66,4,3.171773,4.053667,0.782446
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,short,66,5,3.472673,4.462417,0.778205
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,medium,243,1,19.550738,22.549375,0.867019
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,medium,243,2,16.825036,19.462083,0.864503
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,medium,243,3,15.813848,18.711875,0.845124
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,medium,243,4,16.724455,19.930542,0.839137
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,medium,243,5,17.720516,20.934250,0.846484
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,long,655,1,46.378647,43.818125,1.058435
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,long,655,2,53.422495,49.133000,1.087304
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,long,655,3,46.295702,44.013667,1.051848
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,long,655,4,48.479280,45.570792,1.063824
+qwen3-tts,int4,cuda,Standard_NC4as_T4_v3,2026-04-07,long,655,5,47.073837,44.205833,1.064878
diff --git a/crates/wavekat-tts/Cargo.toml b/crates/wavekat-tts/Cargo.toml
@@ -15,6 +15,11 @@ default = []
 qwen3-tts = ["dep:ort", "dep:ndarray", "dep:tokenizers", "dep:npyz", "dep:rand", "dep:hf-hub"]
 cosyvoice = ["dep:ort", "dep:ndarray"]
 
+# Execution providers — composable with any ONNX backend feature
+coreml   = ["ort?/coreml"]
+cuda     = ["ort?/cuda"]
+tensorrt = ["ort?/tensorrt"]
+
 [dependencies]
 wavekat-core = { version = "0.0.5", features = ["wav"] }
 thiserror = "2"
@@ -32,3 +37,7 @@ hf-hub = { version = "0.5", optional = true, default-features = false, features
 [[example]]
 name = "synthesize"
 required-features = ["qwen3-tts"]
+
+[[example]]
+name = "bench_rtf"
+required-features = ["qwen3-tts"]