Skip to content

Commit 32dcb58

Browse files
authored
feat(vibevoice): add new backend (#7494)
* feat(vibevoice): add backend Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore: add workflow and backend index Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * chore(gallery): add vibevoice Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Use self-hosted for intel builds Signed-off-by: Ettore Di Giacinto <mudler@localai.io> * Pin python version for l4t Signed-off-by: Ettore Di Giacinto <mudler@localai.io> --------- Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent ef44ace commit 32dcb58

21 files changed

+1164
-1
lines changed

.github/workflows/backend.yml

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,19 @@ jobs:
198198
context: "./backend"
199199
ubuntu-version: '2204'
200200
# CUDA 12 builds
201+
- build-type: 'cublas'
202+
cuda-major-version: "12"
203+
cuda-minor-version: "0"
204+
platforms: 'linux/amd64'
205+
tag-latest: 'auto'
206+
tag-suffix: '-gpu-nvidia-cuda-12-vibevoice'
207+
runs-on: 'ubuntu-latest'
208+
base-image: "ubuntu:22.04"
209+
skip-drivers: 'false'
210+
backend: "vibevoice"
211+
dockerfile: "./backend/Dockerfile.python"
212+
context: "./backend"
213+
ubuntu-version: '2204'
201214
- build-type: 'cublas'
202215
cuda-major-version: "12"
203216
cuda-minor-version: "0"
@@ -407,6 +420,19 @@ jobs:
407420
dockerfile: "./backend/Dockerfile.python"
408421
context: "./backend"
409422
ubuntu-version: '2204'
423+
- build-type: 'cublas'
424+
cuda-major-version: "13"
425+
cuda-minor-version: "0"
426+
platforms: 'linux/amd64'
427+
tag-latest: 'auto'
428+
tag-suffix: '-gpu-nvidia-cuda-13-vibevoice'
429+
runs-on: 'ubuntu-latest'
430+
base-image: "ubuntu:22.04"
431+
skip-drivers: 'false'
432+
backend: "vibevoice"
433+
dockerfile: "./backend/Dockerfile.python"
434+
context: "./backend"
435+
ubuntu-version: '2204'
410436
- build-type: 'cublas'
411437
cuda-major-version: "13"
412438
cuda-minor-version: "0"
@@ -459,6 +485,19 @@ jobs:
459485
dockerfile: "./backend/Dockerfile.python"
460486
context: "./backend"
461487
ubuntu-version: '2204'
488+
- build-type: 'l4t'
489+
cuda-major-version: "13"
490+
cuda-minor-version: "0"
491+
platforms: 'linux/arm64'
492+
tag-latest: 'auto'
493+
tag-suffix: '-nvidia-l4t-cuda-13-arm64-vibevoice'
494+
runs-on: 'ubuntu-24.04-arm'
495+
base-image: "ubuntu:24.04"
496+
skip-drivers: 'false'
497+
ubuntu-version: '2404'
498+
backend: "vibevoice"
499+
dockerfile: "./backend/Dockerfile.python"
500+
context: "./backend"
462501
- build-type: 'l4t'
463502
cuda-major-version: "13"
464503
cuda-minor-version: "0"
@@ -669,6 +708,19 @@ jobs:
669708
dockerfile: "./backend/Dockerfile.python"
670709
context: "./backend"
671710
ubuntu-version: '2204'
711+
- build-type: 'hipblas'
712+
cuda-major-version: ""
713+
cuda-minor-version: ""
714+
platforms: 'linux/amd64'
715+
tag-latest: 'auto'
716+
tag-suffix: '-gpu-rocm-hipblas-vibevoice'
717+
runs-on: 'arc-runner-set'
718+
base-image: "rocm/dev-ubuntu-22.04:6.4.3"
719+
skip-drivers: 'false'
720+
backend: "vibevoice"
721+
dockerfile: "./backend/Dockerfile.python"
722+
context: "./backend"
723+
ubuntu-version: '2204'
672724
- build-type: 'hipblas'
673725
cuda-major-version: ""
674726
cuda-minor-version: ""
@@ -787,6 +839,19 @@ jobs:
787839
dockerfile: "./backend/Dockerfile.python"
788840
context: "./backend"
789841
ubuntu-version: '2204'
842+
- build-type: 'l4t'
843+
cuda-major-version: "12"
844+
cuda-minor-version: "0"
845+
platforms: 'linux/arm64'
846+
tag-latest: 'auto'
847+
tag-suffix: '-nvidia-l4t-vibevoice'
848+
runs-on: 'ubuntu-24.04-arm'
849+
base-image: "nvcr.io/nvidia/l4t-jetpack:r36.4.0"
850+
skip-drivers: 'true'
851+
backend: "vibevoice"
852+
dockerfile: "./backend/Dockerfile.python"
853+
context: "./backend"
854+
ubuntu-version: '2204'
790855
- build-type: 'l4t'
791856
cuda-major-version: "12"
792857
cuda-minor-version: "0"
@@ -827,6 +892,19 @@ jobs:
827892
dockerfile: "./backend/Dockerfile.python"
828893
context: "./backend"
829894
ubuntu-version: '2204'
895+
- build-type: 'intel'
896+
cuda-major-version: ""
897+
cuda-minor-version: ""
898+
platforms: 'linux/amd64'
899+
tag-latest: 'auto'
900+
tag-suffix: '-gpu-intel-vibevoice'
901+
runs-on: 'arc-runner-set'
902+
base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
903+
skip-drivers: 'false'
904+
backend: "vibevoice"
905+
dockerfile: "./backend/Dockerfile.python"
906+
context: "./backend"
907+
ubuntu-version: '2204'
830908
- build-type: 'intel'
831909
cuda-major-version: ""
832910
cuda-minor-version: ""
@@ -1319,6 +1397,19 @@ jobs:
13191397
dockerfile: "./backend/Dockerfile.python"
13201398
context: "./backend"
13211399
ubuntu-version: '2204'
1400+
- build-type: ''
1401+
cuda-major-version: ""
1402+
cuda-minor-version: ""
1403+
platforms: 'linux/amd64,linux/arm64'
1404+
tag-latest: 'auto'
1405+
tag-suffix: '-cpu-vibevoice'
1406+
runs-on: 'ubuntu-latest'
1407+
base-image: "ubuntu:22.04"
1408+
skip-drivers: 'false'
1409+
backend: "vibevoice"
1410+
dockerfile: "./backend/Dockerfile.python"
1411+
context: "./backend"
1412+
ubuntu-version: '2204'
13221413
backend-jobs-darwin:
13231414
uses: ./.github/workflows/backend_build_darwin.yml
13241415
strategy:

Makefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,12 +287,14 @@ prepare-test-extra: protogen-python
287287
$(MAKE) -C backend/python/diffusers
288288
$(MAKE) -C backend/python/chatterbox
289289
$(MAKE) -C backend/python/vllm
290+
$(MAKE) -C backend/python/vibevoice
290291

291292
test-extra: prepare-test-extra
292293
$(MAKE) -C backend/python/transformers test
293294
$(MAKE) -C backend/python/diffusers test
294295
$(MAKE) -C backend/python/chatterbox test
295296
$(MAKE) -C backend/python/vllm test
297+
$(MAKE) -C backend/python/vibevoice test
296298

297299
DOCKER_IMAGE?=local-ai
298300
DOCKER_AIO_IMAGE?=local-ai-aio
@@ -389,6 +391,9 @@ backends/neutts: docker-build-neutts docker-save-neutts build
389391
backends/vllm: docker-build-vllm docker-save-vllm build
390392
./local-ai backends install "ocifile://$(abspath ./backend-images/vllm.tar)"
391393

394+
backends/vibevoice: docker-build-vibevoice docker-save-vibevoice build
395+
./local-ai backends install "ocifile://$(abspath ./backend-images/vibevoice.tar)"
396+
392397
build-darwin-python-backend: build
393398
bash ./scripts/build/python-darwin.sh
394399

@@ -445,6 +450,9 @@ docker-save-kitten-tts: backend-images
445450
docker-save-chatterbox: backend-images
446451
docker save local-ai-backend:chatterbox -o backend-images/chatterbox.tar
447452

453+
docker-save-vibevoice: backend-images
454+
docker save local-ai-backend:vibevoice -o backend-images/vibevoice.tar
455+
448456
docker-build-neutts:
449457
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:neutts -f backend/Dockerfile.python --build-arg BACKEND=neutts ./backend
450458

@@ -523,10 +531,13 @@ docker-build-bark:
523531
docker-build-chatterbox:
524532
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:chatterbox -f backend/Dockerfile.python --build-arg BACKEND=chatterbox ./backend
525533

534+
docker-build-vibevoice:
535+
docker build --progress=plain --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:vibevoice -f backend/Dockerfile.python --build-arg BACKEND=vibevoice ./backend
536+
526537
docker-build-exllama2:
527538
docker build --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg BASE_IMAGE=$(BASE_IMAGE) -t local-ai-backend:exllama2 -f backend/Dockerfile.python --build-arg BACKEND=exllama2 .
528539

529-
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-exllama2
540+
docker-build-backends: docker-build-llama-cpp docker-build-rerankers docker-build-vllm docker-build-transformers docker-build-diffusers docker-build-kokoro docker-build-faster-whisper docker-build-coqui docker-build-bark docker-build-chatterbox docker-build-vibevoice docker-build-exllama2
530541

531542
########################################################
532543
### END Backends

backend/index.yaml

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,28 @@
390390
nvidia-cuda-12: "cuda12-chatterbox"
391391
nvidia-l4t-cuda-12: "nvidia-l4t-arm64-chatterbox"
392392
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-chatterbox"
393+
- &vibevoice
394+
urls:
395+
- https://github.com/microsoft/VibeVoice
396+
description: |
397+
VibeVoice-Realtime is a real-time text-to-speech model that generates natural-sounding speech.
398+
tags:
399+
- text-to-speech
400+
- TTS
401+
license: mit
402+
name: "vibevoice"
403+
alias: "vibevoice"
404+
capabilities:
405+
nvidia: "cuda12-vibevoice"
406+
intel: "intel-vibevoice"
407+
amd: "rocm-vibevoice"
408+
nvidia-l4t: "nvidia-l4t-vibevoice"
409+
default: "cpu-vibevoice"
410+
nvidia-cuda-13: "cuda13-vibevoice"
411+
nvidia-cuda-12: "cuda12-vibevoice"
412+
nvidia-l4t-cuda-12: "nvidia-l4t-vibevoice"
413+
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-vibevoice"
414+
icon: https://avatars.githubusercontent.com/u/6154722?s=200&v=4
393415
- &piper
394416
name: "piper"
395417
uri: "quay.io/go-skynet/local-ai-backends:latest-piper"
@@ -1571,3 +1593,86 @@
15711593
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-chatterbox"
15721594
mirrors:
15731595
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-chatterbox
1596+
## vibevoice
1597+
- !!merge <<: *vibevoice
1598+
name: "vibevoice-development"
1599+
capabilities:
1600+
nvidia: "cuda12-vibevoice-development"
1601+
intel: "intel-vibevoice-development"
1602+
amd: "rocm-vibevoice-development"
1603+
nvidia-l4t: "nvidia-l4t-vibevoice-development"
1604+
default: "cpu-vibevoice-development"
1605+
nvidia-cuda-13: "cuda13-vibevoice-development"
1606+
nvidia-cuda-12: "cuda12-vibevoice-development"
1607+
nvidia-l4t-cuda-12: "nvidia-l4t-vibevoice-development"
1608+
nvidia-l4t-cuda-13: "cuda13-nvidia-l4t-arm64-vibevoice-development"
1609+
- !!merge <<: *vibevoice
1610+
name: "cpu-vibevoice"
1611+
uri: "quay.io/go-skynet/local-ai-backends:latest-cpu-vibevoice"
1612+
mirrors:
1613+
- localai/localai-backends:latest-cpu-vibevoice
1614+
- !!merge <<: *vibevoice
1615+
name: "cpu-vibevoice-development"
1616+
uri: "quay.io/go-skynet/local-ai-backends:master-cpu-vibevoice"
1617+
mirrors:
1618+
- localai/localai-backends:master-cpu-vibevoice
1619+
- !!merge <<: *vibevoice
1620+
name: "cuda12-vibevoice"
1621+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vibevoice"
1622+
mirrors:
1623+
- localai/localai-backends:latest-gpu-nvidia-cuda-12-vibevoice
1624+
- !!merge <<: *vibevoice
1625+
name: "cuda12-vibevoice-development"
1626+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vibevoice"
1627+
mirrors:
1628+
- localai/localai-backends:master-gpu-nvidia-cuda-12-vibevoice
1629+
- !!merge <<: *vibevoice
1630+
name: "cuda13-vibevoice"
1631+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-13-vibevoice"
1632+
mirrors:
1633+
- localai/localai-backends:latest-gpu-nvidia-cuda-13-vibevoice
1634+
- !!merge <<: *vibevoice
1635+
name: "cuda13-vibevoice-development"
1636+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-13-vibevoice"
1637+
mirrors:
1638+
- localai/localai-backends:master-gpu-nvidia-cuda-13-vibevoice
1639+
- !!merge <<: *vibevoice
1640+
name: "intel-vibevoice"
1641+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-vibevoice"
1642+
mirrors:
1643+
- localai/localai-backends:latest-gpu-intel-vibevoice
1644+
- !!merge <<: *vibevoice
1645+
name: "intel-vibevoice-development"
1646+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-vibevoice"
1647+
mirrors:
1648+
- localai/localai-backends:master-gpu-intel-vibevoice
1649+
- !!merge <<: *vibevoice
1650+
name: "rocm-vibevoice"
1651+
uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vibevoice"
1652+
mirrors:
1653+
- localai/localai-backends:latest-gpu-rocm-hipblas-vibevoice
1654+
- !!merge <<: *vibevoice
1655+
name: "rocm-vibevoice-development"
1656+
uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vibevoice"
1657+
mirrors:
1658+
- localai/localai-backends:master-gpu-rocm-hipblas-vibevoice
1659+
- !!merge <<: *vibevoice
1660+
name: "nvidia-l4t-vibevoice"
1661+
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-vibevoice"
1662+
mirrors:
1663+
- localai/localai-backends:latest-nvidia-l4t-vibevoice
1664+
- !!merge <<: *vibevoice
1665+
name: "nvidia-l4t-vibevoice-development"
1666+
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-vibevoice"
1667+
mirrors:
1668+
- localai/localai-backends:master-nvidia-l4t-vibevoice
1669+
- !!merge <<: *vibevoice
1670+
name: "cuda13-nvidia-l4t-arm64-vibevoice"
1671+
uri: "quay.io/go-skynet/local-ai-backends:latest-nvidia-l4t-cuda-13-arm64-vibevoice"
1672+
mirrors:
1673+
- localai/localai-backends:latest-nvidia-l4t-cuda-13-arm64-vibevoice
1674+
- !!merge <<: *vibevoice
1675+
name: "cuda13-nvidia-l4t-arm64-vibevoice-development"
1676+
uri: "quay.io/go-skynet/local-ai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice"
1677+
mirrors:
1678+
- localai/localai-backends:master-nvidia-l4t-cuda-13-arm64-vibevoice

backend/python/vibevoice/Makefile

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
.PHONY: vibevoice
2+
vibevoice:
3+
bash install.sh
4+
5+
.PHONY: run
6+
run: vibevoice
7+
@echo "Running vibevoice..."
8+
bash run.sh
9+
@echo "vibevoice run."
10+
11+
.PHONY: test
12+
test: vibevoice
13+
@echo "Testing vibevoice..."
14+
bash test.sh
15+
@echo "vibevoice tested."
16+
17+
.PHONY: protogen-clean
18+
protogen-clean:
19+
$(RM) backend_pb2_grpc.py backend_pb2.py
20+
21+
.PHONY: clean
22+
clean: protogen-clean
23+
rm -rf venv __pycache__

0 commit comments

Comments
 (0)