Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ stt:
language: "en"
transformers: false
transformers_engine: "huggingface"
download_root: "cache/models/{transformers_engine if transformers else engine}/"
# download_root: null # Let Python code build the path dynamically

# Text-to-Speech configuration
tts:
engine: "piper"
model_name: "jarvis-medium.onnx"
model_path: "cache/models/{engine}/{model_name}"
# model_path: null # Let Python code build the path dynamically
cli_mode: false

# Voice Activity Detection configuration
Expand Down
28 changes: 8 additions & 20 deletions examples/STT/whisper/STT_offline_whisper_rpi5.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,41 +26,29 @@ sudo apt update && sudo apt install -y ffmpeg python3 python3-pip git portaudio1
pip3 install git+https://github.com/openai/whisper.git
pip3 install blobfile
```
To manage resource constraints on the Raspberry Pi 5, it is recommended to use smaller Whisper models. Download and cache the necessary models, vocabulary, and encoder files for offline use with the following script:

Alternatively, you can clone it:
Set REPO_ROOT to your cloned main repository path before running (e.g. /Users/USER_NAME/AI-Autonomous-Assistant).

```bash
git clone https://github.com/openai/whisper.git
cd whisper
pip3 install -e .
```
and Install modules to execute tests into whisper repository

```bash
pip3 install jiwer scipy pytest
```

Load models, vocabulary and encoder files to be able to use whisper offline with the following script:

```bash
cd ~/AI-Autonomous-Assistant/scripts/models/audio
cd $REPO_ROOT/scripts/models/audio
python3 whisper_objects.py
```

The vocabulary, encoder and models files will be store in ($HOME_USER_DIR)/.cache/whisper
The vocabulary, encoder and models files will be store in $REPO_ROOT/cache/models/whisper

3. SpeechRecognition:

Follow installation from <https://github.com/Uberi/speech_recognition.git>
Mainly:
Update file links in your local copy of openai_public.py which will be installed in your python folder e.g. /lib/python3.11/site-packages/tiktoken_ext/openai_public.py to point to where you downloaded the files.
Remove the URL "<https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/>" and replace it with your local copy, e.g. "($HOME_USER_DIR)/.cache/whisper/vocab.bpe" and "($HOME_USER_DIR).cache/whisper/encoder.json"
Remove the URL "<https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/>" and replace it with your local copy, e.g. "$REPO_ROOT/cache/models/whisper/vocab.bpe" and "$REPO_ROOT/cache/models/whisper/encoder.json"

```bash
def gpt2():
mergeable_ranks = data_gym_to_mergeable_bpe_ranks(
vocab_bpe_file="/$HOME/.cache/whisper/vocab.bpe",
encoder_json_file="/$HOME/.cache/whisper/encoder.json",
vocab_bpe_file="$REPO_ROOT/cache/models/whisper/vocab.bpe",
encoder_json_file="$REPO_ROOT/cache/models/whisper/encoder.json",
)
```

Expand Down Expand Up @@ -105,7 +93,7 @@ Secondly load models and data sets from:
<https://huggingface.co/datasets/distil-whisper/librispeech_long>


with the following python script that will store the model and data set in ($HOME_USER_DIR)/.cache/huggingface directory
with the following python script that will store the model and data set in $REPO_ROOT/ cache/models/huggingface directory
(following : https://huggingface.co/docs/huggingface_hub/guides/download)

```bash
Expand Down
35 changes: 23 additions & 12 deletions scripts/models/audio/fast_whisper_objects.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,47 @@
#!/usr/bin/env python3
"""Script to download and save Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""

import os

from huggingface_hub import snapshot_download
from models_check import model_exists

from src.utils.sysutils import detect_raspberry_pi_model

MODELS_NAMES = (
MODELS_NAMES_BASE = (
"Systran/faster-whisper-small",
"Systran/faster-whisper-small.en",
"Systran/faster-whisper-tiny",
"Systran/faster-whisper-tiny.en",
"Systran/faster-distil-whisper-small.en",
)
# Add larger models if not on Raspberry Pi
if not detect_raspberry_pi_model():
MODELS_NAMES += (
"Systran/faster-whisper-base",
"Systran/faster-whisper-base.en",
"Systran/faster-whisper-medium",
"Systran/faster-whisper-medium.en",
"Systran/faster-whisper-large-v3",
"Systran/faster-distil-whisper-large-v3",
)

MODELS_NAMES_EXTENDED = (
"Systran/faster-whisper-base",
"Systran/faster-whisper-base.en",
"Systran/faster-whisper-medium",
"Systran/faster-whisper-medium.en",
"Systran/faster-whisper-large-v3",
"Systran/faster-distil-whisper-large-v3",
)

cache_dir = os.path.join(os.path.expanduser("~"), "cache/models/huggingface")


def get_models_to_download() -> tuple:
"""Get the list of models to download based on platform."""
# Add larger models if not on Raspberry Pi
if not detect_raspberry_pi_model():
return MODELS_NAMES_BASE + MODELS_NAMES_EXTENDED
return MODELS_NAMES_BASE


def run() -> None:
"""Downloads and saves Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""
for model_name in MODELS_NAMES:
models_to_download = get_models_to_download()
for model_name in models_to_download:
if model_exists(model_name, cache_dir):
print(f"Model {model_name} already exists.")
continue
Expand Down
38 changes: 38 additions & 0 deletions scripts/models/audio/load_all.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3
"""Script to load all models."""

import fast_whisper_objects
import load_huggingface_objects
import piper_models
import vosk_models
import whisper_objects


def main() -> None:
"""Function to load all models"""
print("==================================================")
print("🚀 Starting Master Model Loading Process")
print("==================================================")

print("\n--- Phase 1: Whisper Models ---")
whisper_objects.run()

print("\n--- Phase 2: Fast Whisper Models ---")
fast_whisper_objects.run()

print("\n--- Phase 3: Hugging Face Objects ---")
load_huggingface_objects.run()

print("\n--- Phase 4: Vosk Models ---")
vosk_models.run()

print("\n--- Phase 5: Piper Models ---")
piper_models.run()

print("\n==================================================")
print("✅ All model loading tasks completed!")
print("==================================================")
Comment thread
chcavignx marked this conversation as resolved.


if __name__ == "__main__":
main()
32 changes: 21 additions & 11 deletions scripts/models/audio/load_huggingface_objects.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#!/usr/bin/env python3
"""
Downloads and saves Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory.
"""
"""Script to download and save Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""

import os

from huggingface_hub import snapshot_download
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
from models_check import model_exists

model_names = (
Expand All @@ -32,9 +31,15 @@ def run() -> None:
print(f"Model {model_name} already exists.")
continue
print(f"Downloading and saving {model_name} to {cache_dir}")

snapshot_download(repo_id=model_name, repo_type="model", cache_dir=cache_dir)
print(f"Model saved to: {os.path.join(cache_dir, model_name)}")
try:
snapshot_download(
repo_id=model_name, repo_type="model", cache_dir=cache_dir
)
print(f"Model saved to: {os.path.join(cache_dir, model_name)}")
except RepositoryNotFoundError:
print(f"Model {model_name} not found on Hugging Face.")
except GatedRepoError:
print(f"Model {model_name} is gated and requires authentication.")
print("All huggingface models have been downloaded and saved.")

for data_set_name in data_set_names:
Expand All @@ -43,10 +48,15 @@ def run() -> None:
continue
print(f"Downloading and saving {data_set_name} to {cache_dir}")
# Load a hosted dataset
snapshot_download(
repo_id=data_set_name, repo_type="dataset", cache_dir=cache_dir
)
print(f"Data_sets saved to: {os.path.join(cache_dir, data_set_name)}")
try:
snapshot_download(
repo_id=data_set_name, repo_type="dataset", cache_dir=cache_dir
)
print(f"Data_sets saved to: {os.path.join(cache_dir, data_set_name)}")
except RepositoryNotFoundError:
print(f"Data_set {data_set_name} not found on Hugging Face.")
except GatedRepoError:
print(f"Data_set {data_set_name} is gated and requires authentication.")
print("All data_sets have been downloaded and saved.")


Expand Down
6 changes: 4 additions & 2 deletions scripts/models/audio/models_check.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin/env python3
"""Script to check if a model exists in the target directory."""

from pathlib import Path


def model_exists(model_name: str, target_dir: str) -> bool:
"""
Check if a model exists in the target directory.
"""Check if a model exists in the target directory.
Handles symlinks and checks for both directories and files (e.g., .pt files).
"""
target_path = Path(target_dir).resolve()
Expand Down
7 changes: 4 additions & 3 deletions scripts/models/audio/piper_models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# !/usr/bin/env python3
"""Script to move data from data/models/piper to cache/models/piper"""

import os

from utils import config
from utils.config import load_config, setup_python_path

config.setup_python_path()
config = config.load_config()
setup_python_path()
config = load_config()

PIPER_DIR = "models/piper"

Expand Down
5 changes: 2 additions & 3 deletions scripts/models/audio/vosk_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python3
"""
Downloads and extracts Vosk models.
"""
"""Script to download and extract Vosk speech recognition models
to a local cache directory."""

import os
import zipfile
Expand Down
58 changes: 35 additions & 23 deletions scripts/models/audio/whisper_objects.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#!/usr/bin/env python3
"""
Downloads and saves Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory.
"""
"""Script to download and save Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""

import os
import urllib.error
Expand All @@ -16,7 +14,9 @@

# Define the target directory
cache_dir = os.path.join(os.path.expanduser("~"), "cache/models/whisper")
MODELS = {

# Base models for all platforms (including Raspberry Pi)
MODELS_BASE = {
"tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt",
"tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
"base.en": "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt",
Expand All @@ -25,20 +25,17 @@
"large-v3-turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
}

# Add larger models if not on Raspberry Pi
if not detect_raspberry_pi_model():
MODELS.update(
{
"small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt",
"small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
"medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt",
"medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
"large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt",
"large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
"large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt",
"turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
}
)
# Extended models for non-Raspberry Pi platforms
MODELS_EXTENDED = {
"small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt",
"small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt",
"medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt",
"medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt",
"large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt",
"large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt",
"large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt",
"turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt",
}

# Add GPT-2 models if needed
GPT2 = [
Expand All @@ -47,7 +44,16 @@
]


def get_models_to_download() -> dict:
"""Get the dictionary of models to download based on platform."""
# Add larger models if not on Raspberry Pi
if not detect_raspberry_pi_model():
return {**MODELS_BASE, **MODELS_EXTENDED}
return MODELS_BASE


def download_file(url: str, target_dir: str, filename: str = None) -> None:
"""Downloads a file from a URL to a target directory."""
if not filename:
filename = url.split("/")[-1]

Expand Down Expand Up @@ -112,19 +118,25 @@ def download_file(url: str, target_dir: str, filename: str = None) -> None:
print(f"A file system error occurred: {e}")


def run():
def run() -> None:
"""Downloads and saves Whisper models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""
print(f"Target directory: {cache_dir}")

models_to_download = get_models_to_download()
try:
print("Using whisper library to download models...")
for model_name, model_url in MODELS.items():
for model_name, model_url in models_to_download.items():
if model_exists(model_name, cache_dir):
print(f"Model {model_name} already exists.")
continue
print(f"Downloading {model_name} via whisper.load_model...")
whisper.load_model(model_name, download_dir=cache_dir)
except RuntimeError:
for model_name, model_url in MODELS.items():
except RuntimeError as e:
print(
f"Whisper library download failed: {e}. Falling back to manual download..."
)
for model_name, model_url in models_to_download.items():
download_file(model_url, cache_dir, filename=f"{model_name}.pt")

for url in GPT2:
Expand Down
6 changes: 4 additions & 2 deletions src/audio/voice_agent_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,10 @@ def __init__(self, tts_config: AgentConfig):
def _validate_piper_model(self):
"""Validate Piper model exists"""
if not os.path.exists(self.config.tts_model_path):
print(f"⚠️ Piper model not found: {self.config.tts_model_path}")
print("Download from: https://huggingface.co/rhasspy/piper-voices")
raise FileNotFoundError(
f"Piper model not found: {self.config.tts_model_path}\n"
"Download from: https://huggingface.co/rhasspy/piper-voices"
)
# Create a Piper object
self.voice = PiperVoice.load(self.config.tts_model_path)

Expand Down
Loading