Skip to content

Commit 715302c

Browse files
committed
chore(gallery): add vibevoice
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent b9474c0 commit 715302c

File tree

3 files changed

+106
-101
lines changed

3 files changed

+106
-101
lines changed

backend/python/vibevoice/example-config.yaml

Lines changed: 0 additions & 101 deletions
This file was deleted.

gallery/index.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,32 @@
11
---
2+
- &vibevoice
3+
url: "github:mudler/LocalAI/gallery/vibevoice.yaml@master"
4+
icon: https://github.com/microsoft/VibeVoice/raw/main/Figures/VibeVoice_logo_white.png
5+
license: mit
6+
tags:
7+
- text-to-speech
8+
- TTS
9+
name: "vibevoice"
10+
urls:
11+
- https://github.com/microsoft/VibeVoice
12+
13+
# Download voice preset files
14+
# Voice presets are downloaded to: {models_dir}/voices/streaming_model/
15+
# The voices_dir option above tells the backend to look in this location
16+
files:
17+
# English voices
18+
- filename: voices/streaming_model/en-Frank_man.pt
19+
uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Frank_man.pt
20+
- filename: voices/streaming_model/en-Grace_woman.pt
21+
uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Grace_woman.pt
22+
- filename: voices/streaming_model/en-Mike_man.pt
23+
uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Mike_man.pt
24+
- filename: voices/streaming_model/en-Emma_woman.pt
25+
uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Emma_woman.pt
26+
- filename: voices/streaming_model/en-Carter_man.pt
27+
uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Carter_man.pt
28+
- filename: voices/streaming_model/en-Davis_man.pt
29+
uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Davis_man.pt
230
- &qwen3vl
331
url: "github:mudler/LocalAI/gallery/qwen3.yaml@master"
432
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png

gallery/vibevoice.yaml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
---
2+
name: localai
3+
4+
config_file: |-
5+
name: vibevoice
6+
backend: vibevoice
7+
description: |
8+
VibeVoice-Realtime is a real-time text-to-speech model that generates natural-sounding speech.
9+
This model supports voice cloning through voice preset files (.pt files).
10+
11+
parameters:
12+
model: microsoft/VibeVoice-Realtime-0.5B
13+
14+
# TTS configuration
15+
tts:
16+
# Voice selection - can be:
17+
# 1. Voice preset name (e.g., "Frank", "en-Frank_man", "Grace") - looks for .pt files in voices/streaming_model/
18+
# 2. Path to a voice preset .pt file (relative to model directory or absolute)
19+
# Available English voices: Carter, Davis, Emma, Frank, Grace, Mike
20+
voice: "Frank"
21+
# Alternative: use audio_path to specify a voice file directly
22+
# audio_path: "voices/streaming_model/en-Frank_man.pt"
23+
24+
known_usecases:
25+
- tts
26+
27+
# Backend-specific options
28+
# These are passed as "key:value" strings to the backend
29+
options:
30+
# CFG (Classifier-Free Guidance) scale for generation (default: 1.5)
31+
# Higher values can improve quality but may slow generation
32+
- "cfg_scale:1.5"
33+
# Number of inference steps for the diffusion process (default: 5)
34+
# More steps = better quality but slower. Typical range: 3-10
35+
- "inference_steps:5"
36+
# Enable sampling (default: false)
37+
# When true, uses temperature and top_p for sampling
38+
- "do_sample:false"
39+
# Temperature for sampling (only used if do_sample=true, default: 0.9)
40+
- "temperature:0.9"
41+
# Top-p (nucleus) sampling (only used if do_sample=true, default: 0.9)
42+
- "top_p:0.9"
43+
# Voices directory path
44+
# This explicitly sets where to look for voice preset files (.pt files)
45+
# Since we're downloading voices to voices/streaming_model/, we set it here
46+
#
47+
# Examples:
48+
# - Relative path (relative to models directory): "voices/streaming_model"
49+
# - Absolute path: "/custom/path/to/voices/streaming_model"
50+
# - Custom relative path: "my_custom_voices/streaming_model"
51+
#
52+
# If not specified, the backend will auto-detect from common locations:
53+
# 1. {ModelFile directory}/voices/streaming_model/
54+
# 2. {models_dir}/voices/streaming_model/
55+
# 3. Backend directory
56+
- "voices_dir:voices/streaming_model"
57+
# # Download voice preset files
58+
# # Voice presets are downloaded to: {models_dir}/voices/streaming_model/
59+
# # The voices_dir option above tells the backend to look in this location
60+
# download_files:
61+
# # English voices
62+
# - filename: voices/streaming_model/en-Frank_man.pt
63+
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Frank_man.pt
64+
# - filename: voices/streaming_model/en-Grace_woman.pt
65+
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Grace_woman.pt
66+
# - filename: voices/streaming_model/en-Mike_man.pt
67+
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Mike_man.pt
68+
# - filename: voices/streaming_model/en-Emma_woman.pt
69+
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Emma_woman.pt
70+
# - filename: voices/streaming_model/en-Carter_man.pt
71+
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Carter_man.pt
72+
# - filename: voices/streaming_model/en-Davis_man.pt
73+
# uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/en-Davis_man.pt
74+
# # Uncomment to add more languages:
75+
# # - filename: voices/streaming_model/fr-Spk0_man.pt
76+
# # uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/fr-Spk0_man.pt
77+
# # - filename: voices/streaming_model/de-Spk0_man.pt
78+
# # uri: https://raw.githubusercontent.com/microsoft/VibeVoice/main/demo/voices/streaming_model/de-Spk0_man.pt

0 commit comments

Comments
 (0)