From 2663f2edb5b0968f7ccc7e7d0f910bdf805093b9 Mon Sep 17 00:00:00 2001 From: Adrian Wan Date: Thu, 9 Oct 2025 16:27:07 +0900 Subject: [PATCH 1/3] doc: fix diarize import in example script (#1192) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ff9d2aa5..e62426bb 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,7 @@ See more examples in other languages [here](EXAMPLES.md). ```python import whisperx import gc +from whisperx.diarize import DiarizationPipeline device = "cuda" audio_file = "audio.mp3" @@ -196,7 +197,7 @@ print(result["segments"]) # after alignment # import gc; import torch; gc.collect(); torch.cuda.empty_cache(); del model_a # 3. Assign speaker labels -diarize_model = whisperx.diarize.DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device) +diarize_model = DiarizationPipeline(use_auth_token=YOUR_HF_TOKEN, device=device) # add min/max number of speakers if known diarize_segments = diarize_model(audio) From 64e307cc2975c349ecc5ffa436ff1778ba8279c4 Mon Sep 17 00:00:00 2001 From: 3manifold <22544721+3manifold@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:32:02 +0200 Subject: [PATCH 2/3] chore: remove redundant variable & improve load_model function documentation (#1197) * Remove redundant variable * Improve function documentation --- whisperx/asr.py | 3 ++- whisperx/transcribe.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/whisperx/asr.py b/whisperx/asr.py index 0b47127d..67d8a8be 100644 --- a/whisperx/asr.py +++ b/whisperx/asr.py @@ -319,7 +319,8 @@ def load_model( whisper_arch - The name of the Whisper model to load. device - The device to load the model on. compute_type - The compute type to use for the model. - vad_method - The vad method to use. vad_model has higher priority if is not None. + vad_model - The vad model to manually assign. + vad_method - The vad method to use. vad_model has a higher priority if it is not None. options - A dictionary of options to use for the model. language - The language of the model. (use English for now) model - The WhisperModel instance to use. diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py index 46a77468..8eca427a 100644 --- a/whisperx/transcribe.py +++ b/whisperx/transcribe.py @@ -119,7 +119,6 @@ def transcribe_task(args: dict, parser: argparse.ArgumentParser): # Part 1: VAD & ASR Loop results = [] - tmp_results = [] # model = load_model(model_name, device=device, download_root=model_dir) model = load_model( model_name, From 027ec57aeef86051a5591ac46552111e6d718f31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Hnyk?= Date: Thu, 9 Oct 2025 09:34:54 +0200 Subject: [PATCH 3/3] doc: update cpu only example (#1164) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e62426bb..fae1e075 100644 --- a/README.md +++ b/README.md @@ -144,7 +144,7 @@ To label the transcript with speaker ID's (set number of speakers if known e.g. To run on CPU instead of GPU (and for running on Mac OS X): - whisperx path/to/audio.wav --compute_type int8 + whisperx path/to/audio.wav --compute_type int8 --device cpu ### Other languages