diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8ded08e..a1caae3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,4 +54,4 @@ jobs: - name: Test with unittest working-directory: ./tests run: | - uv run python -m unittest discover -s . -p 'test_*.py' + uv run --extra full python -m unittest discover -s . -p 'test_*.py' diff --git a/README.md b/README.md index 23bcee6..33e7861 100644 --- a/README.md +++ b/README.md @@ -77,10 +77,10 @@ into `.lrc` subtitles with LLMs such as ## Installation ⚙️ -1. Install CUDA 11.x and [cuDNN 8 for CUDA 11](https://developer.nvidia.com/cudnn) first according +1. Install [CUDA](https://developer.nvidia.com/cuda-toolkit) and [cuDNN](https://developer.nvidia.com/cudnn) according to https://opennmt.net/CTranslate2/installation.html to enable `faster-whisper`. - `faster-whisper` also needs [cuBLAS for CUDA 11](https://developer.nvidia.com/cublas) installed. + `faster-whisper` also needs [cuBLAS](https://developer.nvidia.com/cublas) installed.
For Windows Users (click to expand) @@ -103,7 +103,7 @@ into `.lrc` subtitles with LLMs such as 3. Install [ffmpeg](https://ffmpeg.org/download.html) and add `bin` directory to your `PATH`. -4. This project can be installed from PyPI: +4. Install from PyPI: ```shell pip install openlrc @@ -115,20 +115,12 @@ into `.lrc` subtitles with LLMs such as pip install git+https://github.com/zh-plus/openlrc ``` -5. Install the latest [faster-whisper](https://github.com/guillaumekln/faster-whisper) from source: - ```shell - pip install "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/8327d8cc647266ed66f6cd878cf97eccface7351.tar.gz" - ``` +5. **(Optional)** If you need noise suppression (`noise_suppress=True`), install the full extras + which includes torch and DeepFilterNet: -6. Install [PyTorch](https://pytorch.org/get-started/locally/): - ```shell - pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 - ``` - -7. Fix the `typing-extensions` issue: - ```shell - pip install typing-extensions -U - ``` + ```shell + pip install openlrc[full] + ``` ## Lightweight Imports @@ -156,8 +148,9 @@ Heavy dependencies are loaded only when the corresponding features are first use - `lingua` is loaded when language detection helpers are used. > [!NOTE] -> Lightweight imports improve import-time behavior only. They do not change installation requirements: -> `pip install openlrc` still installs the full dependency set declared by the package. +> The base `pip install openlrc` does **not** include torch or DeepFilterNet. +> These are only installed with `pip install openlrc[full]` and are only needed +> for noise suppression (`noise_suppress=True`). ## Usage 🐍 @@ -213,7 +206,7 @@ if __name__ == '__main__': lrcer = LRCer(transcription=TranscriptionConfig(vad_options=vad_options)) lrcer.run('./data/test.mp3', target_lang='zh-cn') - # Enhance the audio using noise suppression (consume more time). + # Enhance the audio using noise suppression (requires openlrc[full], consumes more time). lrcer.run('./data/test.mp3', target_lang='zh-cn', noise_suppress=True) # Change the translation model diff --git a/openlrc/preprocess.py b/openlrc/preprocess.py index c299cad..7fca01b 100644 --- a/openlrc/preprocess.py +++ b/openlrc/preprocess.py @@ -61,8 +61,14 @@ def noise_suppression(self, audio_paths: list[Path], atten_lim_db: int = 15): if not audio_paths: return [] - import torch - from df.enhance import enhance, init_df, load_audio, save_audio + try: + import torch + from df.enhance import enhance, init_df, load_audio, save_audio + except ImportError: + raise ImportError( + "Noise suppression requires torch and deepfilternet. " + "Install them with: pip install openlrc[full]" + ) if "atten_lim_db" in self.options: atten_lim_db = self.options["atten_lim_db"] diff --git a/openlrc/utils.py b/openlrc/utils.py index 222d9a4..1784153 100644 --- a/openlrc/utils.py +++ b/openlrc/utils.py @@ -12,7 +12,6 @@ if TYPE_CHECKING: from spacy.language import Language as SpacyLanguage - import torch from openlrc.defaults import supported_languages_lingua from openlrc.logger import logger @@ -102,12 +101,14 @@ def get_audio_duration(path: str | Path) -> float: return audio.duration -def release_memory(model: torch.nn.Module) -> None: - import torch +def release_memory(model: Any) -> None: + try: + import torch + except ImportError: + return - # gc.collect() - torch.cuda.empty_cache() - del model + if isinstance(model, torch.nn.Module): + torch.cuda.empty_cache() def normalize(text): diff --git a/pyproject.toml b/pyproject.toml index 93fc4fb..8584d0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,15 +36,21 @@ dependencies = [ "pysbd>=0.3.4,<0.4", "faster-whisper>=1.1.1,<2", "ffmpeg-normalize>=1.27.5,<2", - "deepfilternet>=0.5.6,<0.6", "google-genai==1.11.0", "json_repair==0.25.2", "onnxruntime>=1.20.0,<1.24; python_version < '3.11'", "onnxruntime>=1.20.0,<2; python_version >= '3.11'", + "pip>=25.1", +] + +[project.optional-dependencies] +# Noise suppression via DeepFilterNet (requires torch). +# Install with: pip install openlrc[full] +# Only needed when using noise_suppress=True in LRCer.run() or Preprocessor.run(). +full = [ "torch>=2.6.0", - "torchvision>=0.21.0", "torchaudio>=2.0.0", - "pip>=25.1", + "deepfilternet>=0.5.6,<0.6", ] [project.urls]