Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions scripts/models/audio/fast_whisper_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,24 @@


def get_models_to_download() -> tuple:
"""Get the list of models to download based on platform."""
"""
Select which Hugging Face model identifiers should be downloaded for the current platform.

Returns:
tuple: Tuple of model identifier strings — on Raspberry Pi this is the base models tuple, otherwise the base models concatenated with the extended models tuple.
"""
# Add larger models if not on Raspberry Pi
if not detect_raspberry_pi_model():
return MODELS_NAMES_BASE + MODELS_NAMES_EXTENDED
return MODELS_NAMES_BASE


def run() -> None:
"""Downloads and saves Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""
"""
Download the selected Hugging Face models and store them in the user's local cache.

Selects models appropriate for the current platform, skips models that are already present in the cache, downloads any missing models into the configured cache directory, and prints progress messages for each model.
"""
models_to_download = get_models_to_download()
for model_name in models_to_download:
if model_exists(model_name, cache_dir):
Expand All @@ -53,4 +61,4 @@ def run() -> None:


if __name__ == "__main__":
run()
run()
8 changes: 6 additions & 2 deletions scripts/models/audio/load_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@


def main() -> None:
"""Function to load all models"""
"""
Orchestrates loading of all audio-related models in a fixed sequence.

Prints progress banners for each phase and invokes the model-loading routines for Whisper, Fast Whisper, Hugging Face objects, Vosk, and Piper in order.
"""
print("==================================================")
print("🚀 Starting Master Model Loading Process")
print("==================================================")
Expand All @@ -35,4 +39,4 @@ def main() -> None:


if __name__ == "__main__":
main()
main()
9 changes: 6 additions & 3 deletions scripts/models/audio/load_huggingface_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@


def run() -> None:
"""Downloads and saves Hugging Face models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""
"""
Download and save configured Hugging Face models, tokenizers, processors, and datasets to the local user cache.

This function iterates over the module-level `model_names` and `data_set_names`, skipping entries already present in `cache_dir`. For each missing repository it attempts to download a snapshot into `cache_dir` and prints progress and completion messages. If a repository is not found or is gated, it prints a corresponding message and continues with the next item.
"""
# repo_type="model" if None is by default "model" - Not mandatory but for clarity
for model_name in model_names:
if model_exists(model_name, cache_dir):
Expand Down Expand Up @@ -61,4 +64,4 @@ def run() -> None:


if __name__ == "__main__":
run()
run()
11 changes: 8 additions & 3 deletions scripts/models/audio/models_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@


def model_exists(model_name: str, target_dir: str) -> bool:
"""Check if a model exists in the target directory.
Handles symlinks and checks for both directories and files (e.g., .pt files).
"""
Determine whether a model whose name contains the given substring exists in the target directory.

Searches the resolved target directory for any entry whose name contains model_name. Symlinks are followed and both directories and regular files (e.g., model files like `.pt`) are considered matches.

Returns:
True if a matching file or directory exists in target_dir, False otherwise.
"""
target_path = Path(target_dir).resolve()
if not target_path.exists():
Expand All @@ -18,4 +23,4 @@ def model_exists(model_name: str, target_dir: str) -> bool:
# entry.exists() follows symlinks by default
if entry.is_dir() or entry.is_file():
return True
return False
return False
27 changes: 22 additions & 5 deletions scripts/models/audio/whisper_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,29 @@


def get_models_to_download() -> dict:
"""Get the dictionary of models to download based on platform."""
"""
Selects the set of Whisper model download mappings appropriate for the current platform.

Returns:
dict: Mapping of model names to their download URLs. On Raspberry Pi systems returns `MODELS_BASE`; on other platforms returns a merged mapping of `MODELS_BASE` and `MODELS_EXTENDED`.
"""
# Add larger models if not on Raspberry Pi
if not detect_raspberry_pi_model():
return {**MODELS_BASE, **MODELS_EXTENDED}
return MODELS_BASE


def download_file(url: str, target_dir: str, filename: str = None) -> None:
"""Downloads a file from a URL to a target directory."""
"""
Download a file from a URL into a target directory, skipping or resuming as appropriate.

Checks for an existing model/file using `model_exists` and skips download if present. Ensures the target directory exists (resolving symlinks), then downloads the URL to the given filename (defaults to the URL's final path segment). If a partial file is present, attempts to resume using HTTP Range requests; if the server does not support resuming, restarts the download. Handles HTTP 416 as an already-complete file and reports network or filesystem errors via printed messages.

Parameters:
url (str): The source URL of the file to download.
target_dir (str): Directory path where the file will be saved; created if missing.
filename (str, optional): Filename to use for the saved file. Defaults to the last path segment of `url`.
"""
if not filename:
filename = url.split("/")[-1]

Expand Down Expand Up @@ -119,8 +133,11 @@ def download_file(url: str, target_dir: str, filename: str = None) -> None:


def run() -> None:
"""Downloads and saves Whisper models, tokenizers, processors,
and their associated datasets to a local backup in your user cache directory."""
"""
Download Whisper models and related tokenizer and processor files into the module cache directory.

Uses the whisper library to fetch models returned by get_models_to_download and saves them under cache_dir; if the library download fails, falls back to manually downloading model weight files. Also downloads configured GPT-2 support files into the same cache location.
"""
print(f"Target directory: {cache_dir}")

models_to_download = get_models_to_download()
Expand All @@ -146,4 +163,4 @@ def run() -> None:


if __name__ == "__main__":
run()
run()
9 changes: 7 additions & 2 deletions src/audio/voice_agent_offline.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,12 @@ def __init__(self, tts_config: AgentConfig):
self._validate_piper_model()

def _validate_piper_model(self):
"""Validate Piper model exists"""
"""
Ensure the configured Piper TTS model file exists and load it into the engine.

Raises:
FileNotFoundError: If `self.config.tts_model_path` does not point to an existing file.
"""
if not os.path.exists(self.config.tts_model_path):
raise FileNotFoundError(
f"Piper model not found: {self.config.tts_model_path}\n"
Expand Down Expand Up @@ -997,4 +1002,4 @@ def main():


if __name__ == "__main__":
main()
main()
Loading