diff --git a/config.yaml b/config.yaml index 4240719..3aa0aaa 100644 --- a/config.yaml +++ b/config.yaml @@ -14,13 +14,13 @@ stt: language: "en" transformers: false transformers_engine: "huggingface" - download_root: "cache/models/{transformers_engine if transformers else engine}/" + # download_root: null # Let Python code build the path dynamically # Text-to-Speech configuration tts: engine: "piper" model_name: "jarvis-medium.onnx" - model_path: "cache/models/{engine}/{model_name}" + # model_path: null # Let Python code build the path dynamically cli_mode: false # Voice Activity Detection configuration diff --git a/examples/STT/whisper/STT_offline_whisper_rpi5.md b/examples/STT/whisper/STT_offline_whisper_rpi5.md index 3543d21..0b5db26 100644 --- a/examples/STT/whisper/STT_offline_whisper_rpi5.md +++ b/examples/STT/whisper/STT_offline_whisper_rpi5.md @@ -26,41 +26,29 @@ sudo apt update && sudo apt install -y ffmpeg python3 python3-pip git portaudio1 pip3 install git+https://github.com/openai/whisper.git pip3 install blobfile ``` +To manage resource constraints on the Raspberry Pi 5, it is recommended to use smaller Whisper models. Download and cache the necessary models, vocabulary, and encoder files for offline use with the following script: -Alternatively, you can clone it: +Set REPO_ROOT to your cloned main repository path before running (e.g. /Users/USER_NAME/AI-Autonomous-Assistant). ```bash -git clone https://github.com/openai/whisper.git -cd whisper -pip3 install -e . -``` -and Install modules to execute tests into whisper repository - -```bash -pip3 install jiwer scipy pytest -``` - -Load models, vocabulary and encoder files to be able to use whisper offline with the following script: - -```bash -cd ~/AI-Autonomous-Assistant/scripts/models/audio +cd $REPO_ROOT/scripts/models/audio python3 whisper_objects.py ``` -The vocabulary, encoder and models files will be store in ($HOME_USER_DIR)/.cache/whisper +The vocabulary, encoder and models files will be store in $REPO_ROOT/cache/models/whisper 3. SpeechRecognition: Follow installation from Mainly: Update file links in your local copy of openai_public.py which will be installed in your python folder e.g. /lib/python3.11/site-packages/tiktoken_ext/openai_public.py to point to where you downloaded the files. -Remove the URL "" and replace it with your local copy, e.g. "($HOME_USER_DIR)/.cache/whisper/vocab.bpe" and "($HOME_USER_DIR).cache/whisper/encoder.json" +Remove the URL "" and replace it with your local copy, e.g. "$REPO_ROOT/cache/models/whisper/vocab.bpe" and "$REPO_ROOT/cache/models/whisper/encoder.json" ```bash def gpt2(): mergeable_ranks = data_gym_to_mergeable_bpe_ranks( - vocab_bpe_file="/$HOME/.cache/whisper/vocab.bpe", - encoder_json_file="/$HOME/.cache/whisper/encoder.json", + vocab_bpe_file="$REPO_ROOT/cache/models/whisper/vocab.bpe", + encoder_json_file="$REPO_ROOT/cache/models/whisper/encoder.json", ) ``` @@ -105,7 +93,7 @@ Secondly load models and data sets from: -with the following python script that will store the model and data set in ($HOME_USER_DIR)/.cache/huggingface directory +with the following python script that will store the model and data set in $REPO_ROOT/ cache/models/huggingface directory (following : https://huggingface.co/docs/huggingface_hub/guides/download) ```bash diff --git a/scripts/models/audio/fast_whisper_objects.py b/scripts/models/audio/fast_whisper_objects.py index d01cca7..c78ddc9 100644 --- a/scripts/models/audio/fast_whisper_objects.py +++ b/scripts/models/audio/fast_whisper_objects.py @@ -1,4 +1,7 @@ #!/usr/bin/env python3 +"""Script to download and save Hugging Face models, tokenizers, processors, +and their associated datasets to a local backup in your user cache directory.""" + import os from huggingface_hub import snapshot_download @@ -6,31 +9,39 @@ from src.utils.sysutils import detect_raspberry_pi_model -MODELS_NAMES = ( +MODELS_NAMES_BASE = ( "Systran/faster-whisper-small", "Systran/faster-whisper-small.en", "Systran/faster-whisper-tiny", "Systran/faster-whisper-tiny.en", "Systran/faster-distil-whisper-small.en", ) -# Add larger models if not on Raspberry Pi -if not detect_raspberry_pi_model(): - MODELS_NAMES += ( - "Systran/faster-whisper-base", - "Systran/faster-whisper-base.en", - "Systran/faster-whisper-medium", - "Systran/faster-whisper-medium.en", - "Systran/faster-whisper-large-v3", - "Systran/faster-distil-whisper-large-v3", - ) + +MODELS_NAMES_EXTENDED = ( + "Systran/faster-whisper-base", + "Systran/faster-whisper-base.en", + "Systran/faster-whisper-medium", + "Systran/faster-whisper-medium.en", + "Systran/faster-whisper-large-v3", + "Systran/faster-distil-whisper-large-v3", +) cache_dir = os.path.join(os.path.expanduser("~"), "cache/models/huggingface") +def get_models_to_download() -> tuple: + """Get the list of models to download based on platform.""" + # Add larger models if not on Raspberry Pi + if not detect_raspberry_pi_model(): + return MODELS_NAMES_BASE + MODELS_NAMES_EXTENDED + return MODELS_NAMES_BASE + + def run() -> None: """Downloads and saves Hugging Face models, tokenizers, processors, and their associated datasets to a local backup in your user cache directory.""" - for model_name in MODELS_NAMES: + models_to_download = get_models_to_download() + for model_name in models_to_download: if model_exists(model_name, cache_dir): print(f"Model {model_name} already exists.") continue diff --git a/scripts/models/audio/load_all.py b/scripts/models/audio/load_all.py new file mode 100644 index 0000000..1b920c0 --- /dev/null +++ b/scripts/models/audio/load_all.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +"""Script to load all models.""" + +import fast_whisper_objects +import load_huggingface_objects +import piper_models +import vosk_models +import whisper_objects + + +def main() -> None: + """Function to load all models""" + print("==================================================") + print("🚀 Starting Master Model Loading Process") + print("==================================================") + + print("\n--- Phase 1: Whisper Models ---") + whisper_objects.run() + + print("\n--- Phase 2: Fast Whisper Models ---") + fast_whisper_objects.run() + + print("\n--- Phase 3: Hugging Face Objects ---") + load_huggingface_objects.run() + + print("\n--- Phase 4: Vosk Models ---") + vosk_models.run() + + print("\n--- Phase 5: Piper Models ---") + piper_models.run() + + print("\n==================================================") + print("✅ All model loading tasks completed!") + print("==================================================") + + +if __name__ == "__main__": + main() diff --git a/scripts/models/audio/load_huggingface_objects.py b/scripts/models/audio/load_huggingface_objects.py index c968ed4..07116f4 100644 --- a/scripts/models/audio/load_huggingface_objects.py +++ b/scripts/models/audio/load_huggingface_objects.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 -""" -Downloads and saves Hugging Face models, tokenizers, processors, -and their associated datasets to a local backup in your user cache directory. -""" +"""Script to download and save Hugging Face models, tokenizers, processors, +and their associated datasets to a local backup in your user cache directory.""" import os from huggingface_hub import snapshot_download +from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError from models_check import model_exists model_names = ( @@ -32,9 +31,15 @@ def run() -> None: print(f"Model {model_name} already exists.") continue print(f"Downloading and saving {model_name} to {cache_dir}") - - snapshot_download(repo_id=model_name, repo_type="model", cache_dir=cache_dir) - print(f"Model saved to: {os.path.join(cache_dir, model_name)}") + try: + snapshot_download( + repo_id=model_name, repo_type="model", cache_dir=cache_dir + ) + print(f"Model saved to: {os.path.join(cache_dir, model_name)}") + except RepositoryNotFoundError: + print(f"Model {model_name} not found on Hugging Face.") + except GatedRepoError: + print(f"Model {model_name} is gated and requires authentication.") print("All huggingface models have been downloaded and saved.") for data_set_name in data_set_names: @@ -43,10 +48,15 @@ def run() -> None: continue print(f"Downloading and saving {data_set_name} to {cache_dir}") # Load a hosted dataset - snapshot_download( - repo_id=data_set_name, repo_type="dataset", cache_dir=cache_dir - ) - print(f"Data_sets saved to: {os.path.join(cache_dir, data_set_name)}") + try: + snapshot_download( + repo_id=data_set_name, repo_type="dataset", cache_dir=cache_dir + ) + print(f"Data_sets saved to: {os.path.join(cache_dir, data_set_name)}") + except RepositoryNotFoundError: + print(f"Data_set {data_set_name} not found on Hugging Face.") + except GatedRepoError: + print(f"Data_set {data_set_name} is gated and requires authentication.") print("All data_sets have been downloaded and saved.") diff --git a/scripts/models/audio/models_check.py b/scripts/models/audio/models_check.py index 86a4057..2e0f423 100644 --- a/scripts/models/audio/models_check.py +++ b/scripts/models/audio/models_check.py @@ -1,9 +1,11 @@ +#!/usr/bin/env python3 +"""Script to check if a model exists in the target directory.""" + from pathlib import Path def model_exists(model_name: str, target_dir: str) -> bool: - """ - Check if a model exists in the target directory. + """Check if a model exists in the target directory. Handles symlinks and checks for both directories and files (e.g., .pt files). """ target_path = Path(target_dir).resolve() diff --git a/scripts/models/audio/piper_models.py b/scripts/models/audio/piper_models.py index fc1fe54..5a7e257 100644 --- a/scripts/models/audio/piper_models.py +++ b/scripts/models/audio/piper_models.py @@ -1,11 +1,12 @@ # !/usr/bin/env python3 +"""Script to move data from data/models/piper to cache/models/piper""" import os -from utils import config +from utils.config import load_config, setup_python_path -config.setup_python_path() -config = config.load_config() +setup_python_path() +config = load_config() PIPER_DIR = "models/piper" diff --git a/scripts/models/audio/vosk_models.py b/scripts/models/audio/vosk_models.py index 31c464e..88e3ce8 100644 --- a/scripts/models/audio/vosk_models.py +++ b/scripts/models/audio/vosk_models.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -""" -Downloads and extracts Vosk models. -""" +"""Script to download and extract Vosk speech recognition models +to a local cache directory.""" import os import zipfile diff --git a/scripts/models/audio/whisper_objects.py b/scripts/models/audio/whisper_objects.py index ca2af77..f7349bd 100644 --- a/scripts/models/audio/whisper_objects.py +++ b/scripts/models/audio/whisper_objects.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 -""" -Downloads and saves Hugging Face models, tokenizers, processors, -and their associated datasets to a local backup in your user cache directory. -""" +"""Script to download and save Hugging Face models, tokenizers, processors, +and their associated datasets to a local backup in your user cache directory.""" import os import urllib.error @@ -16,7 +14,9 @@ # Define the target directory cache_dir = os.path.join(os.path.expanduser("~"), "cache/models/whisper") -MODELS = { + +# Base models for all platforms (including Raspberry Pi) +MODELS_BASE = { "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt", "tiny": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt", "base.en": "https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt", @@ -25,20 +25,17 @@ "large-v3-turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", } -# Add larger models if not on Raspberry Pi -if not detect_raspberry_pi_model(): - MODELS.update( - { - "small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt", - "small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt", - "medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt", - "medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt", - "large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt", - "large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt", - "large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt", - "turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", - } - ) +# Extended models for non-Raspberry Pi platforms +MODELS_EXTENDED = { + "small.en": "https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt", + "small": "https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt", + "medium.en": "https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt", + "medium": "https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt", + "large-v1": "https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt", + "large-v2": "https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt", + "large": "https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt", + "turbo": "https://openaipublic.azureedge.net/main/whisper/models/aff26ae408abcba5fbf8813c21e62b0941638c5f6eebfb145be0c9839262a19a/large-v3-turbo.pt", +} # Add GPT-2 models if needed GPT2 = [ @@ -47,7 +44,16 @@ ] +def get_models_to_download() -> dict: + """Get the dictionary of models to download based on platform.""" + # Add larger models if not on Raspberry Pi + if not detect_raspberry_pi_model(): + return {**MODELS_BASE, **MODELS_EXTENDED} + return MODELS_BASE + + def download_file(url: str, target_dir: str, filename: str = None) -> None: + """Downloads a file from a URL to a target directory.""" if not filename: filename = url.split("/")[-1] @@ -112,19 +118,25 @@ def download_file(url: str, target_dir: str, filename: str = None) -> None: print(f"A file system error occurred: {e}") -def run(): +def run() -> None: + """Downloads and saves Whisper models, tokenizers, processors, + and their associated datasets to a local backup in your user cache directory.""" print(f"Target directory: {cache_dir}") + models_to_download = get_models_to_download() try: print("Using whisper library to download models...") - for model_name, model_url in MODELS.items(): + for model_name, model_url in models_to_download.items(): if model_exists(model_name, cache_dir): print(f"Model {model_name} already exists.") continue print(f"Downloading {model_name} via whisper.load_model...") whisper.load_model(model_name, download_dir=cache_dir) - except RuntimeError: - for model_name, model_url in MODELS.items(): + except RuntimeError as e: + print( + f"Whisper library download failed: {e}. Falling back to manual download..." + ) + for model_name, model_url in models_to_download.items(): download_file(model_url, cache_dir, filename=f"{model_name}.pt") for url in GPT2: diff --git a/src/audio/voice_agent_offline.py b/src/audio/voice_agent_offline.py index 6e0fab7..f416113 100644 --- a/src/audio/voice_agent_offline.py +++ b/src/audio/voice_agent_offline.py @@ -535,8 +535,10 @@ def __init__(self, tts_config: AgentConfig): def _validate_piper_model(self): """Validate Piper model exists""" if not os.path.exists(self.config.tts_model_path): - print(f"⚠️ Piper model not found: {self.config.tts_model_path}") - print("Download from: https://huggingface.co/rhasspy/piper-voices") + raise FileNotFoundError( + f"Piper model not found: {self.config.tts_model_path}\n" + "Download from: https://huggingface.co/rhasspy/piper-voices" + ) # Create a Piper object self.voice = PiperVoice.load(self.config.tts_model_path)