Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion openlrc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright (C) 2024. Hao Zheng
# All rights reserved.

from openlrc.config import TranscriptionConfig, TranslationConfig
from openlrc.models import ModelConfig, ModelProvider, list_chatbot_models
from openlrc.openlrc import LRCer, TranscriptionConfig, TranslationConfig
from openlrc.openlrc import LRCer

__all__ = ("LRCer", "TranscriptionConfig", "TranslationConfig", "ModelConfig", "list_chatbot_models", "ModelProvider")
__version__ = "1.6.1"
Expand Down
58 changes: 58 additions & 0 deletions openlrc/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright (C) 2025. Hao Zheng
# All rights reserved.

from dataclasses import dataclass
from pathlib import Path

from openlrc.models import ModelConfig


@dataclass
class TranscriptionConfig:
"""
Configuration for the transcription stage.

Args:
whisper_model: Name of whisper model. Default: ``large-v3``
compute_type: Computation type (``default``, ``int8``, ``int8_float16``,
``int16``, ``float16``, ``float32``). Default: ``float16``
device: Device for computation. Default: ``cuda``
asr_options: Parameters for whisper model.
vad_options: Parameters for VAD model.
preprocess_options: Options for audio preprocessing.
"""

whisper_model: str = "large-v3"
compute_type: str = "float16"
device: str = "cuda"
asr_options: dict | None = None
vad_options: dict | None = None
preprocess_options: dict | None = None


@dataclass
class TranslationConfig:
"""
Configuration for the translation stage.

Args:
chatbot_model: The chatbot model to use. Can be a string like
``'gpt-4.1-nano'`` or ``'provider:model-name'``, or a ``ModelConfig``
instance. Default: ``gpt-4.1-nano``
fee_limit: Maximum fee per translation call in USD. Default: ``0.8``
consumer_thread: Number of parallel translation threads. Default: ``4``
proxy: Proxy for API requests. e.g. ``'http://127.0.0.1:7890'``
base_url_config: Base URL dict for OpenAI & Anthropic.
glossary: Dictionary or path mapping source words to translations.
retry_model: Fallback model for translation retries.
is_force_glossary_used: Force glossary usage in context. Default: ``False``
"""

chatbot_model: str | ModelConfig = "gpt-4.1-nano"
fee_limit: float = 0.8
consumer_thread: int = 4
proxy: str | None = None
base_url_config: dict | None = None
glossary: dict | str | Path | None = None
retry_model: str | ModelConfig | None = None
is_force_glossary_used: bool = False
74 changes: 14 additions & 60 deletions openlrc/openlrc.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,28 @@
# Copyright (C) 2025. Hao Zheng
# All rights reserved.

from __future__ import annotations

import concurrent.futures
import json
import shutil
import traceback
import warnings
from copy import deepcopy
from dataclasses import dataclass
from pathlib import Path
from pprint import pformat
from queue import Queue
from threading import Lock
from typing import Any
from typing import TYPE_CHECKING, Any

from faster_whisper.transcribe import Segment
if TYPE_CHECKING:
from faster_whisper.transcribe import Segment

from openlrc.context import TranslateInfo
from openlrc.config import TranscriptionConfig, TranslationConfig
from openlrc.defaults import default_asr_options, default_preprocess_options, default_vad_options
from openlrc.logger import logger
from openlrc.models import ModelConfig
from openlrc.opt import SubtitleOptimizer
from openlrc.preprocess import Preprocessor
from openlrc.subtitle import BilingualSubtitle, Subtitle
from openlrc.transcribe import Transcriber
from openlrc.translate import LLMTranslator
from openlrc.utils import (
Timer,
extend_filename,
Expand All @@ -38,57 +36,6 @@
_SENTINEL = object()


@dataclass
class TranscriptionConfig:
"""
Configuration for the transcription stage.

Args:
whisper_model: Name of whisper model. Default: ``large-v3``
compute_type: Computation type (``default``, ``int8``, ``int8_float16``,
``int16``, ``float16``, ``float32``). Default: ``float16``
device: Device for computation. Default: ``cuda``
asr_options: Parameters for whisper model.
vad_options: Parameters for VAD model.
preprocess_options: Options for audio preprocessing.
"""

whisper_model: str = "large-v3"
compute_type: str = "float16"
device: str = "cuda"
asr_options: dict | None = None
vad_options: dict | None = None
preprocess_options: dict | None = None


@dataclass
class TranslationConfig:
"""
Configuration for the translation stage.

Args:
chatbot_model: The chatbot model to use. Can be a string like
``'gpt-4.1-nano'`` or ``'provider:model-name'``, or a ``ModelConfig``
instance. Default: ``gpt-4.1-nano``
fee_limit: Maximum fee per translation call in USD. Default: ``0.8``
consumer_thread: Number of parallel translation threads. Default: ``4``
proxy: Proxy for API requests. e.g. ``'http://127.0.0.1:7890'``
base_url_config: Base URL dict for OpenAI & Anthropic.
glossary: Dictionary or path mapping source words to translations.
retry_model: Fallback model for translation retries.
is_force_glossary_used: Force glossary usage in context. Default: ``False``
"""

chatbot_model: str | ModelConfig = "gpt-4.1-nano"
fee_limit: float = 0.8
consumer_thread: int = 4
proxy: str | None = None
base_url_config: dict | None = None
glossary: dict | str | Path | None = None
retry_model: str | ModelConfig | None = None
is_force_glossary_used: bool = False


class LRCer:
"""
Orchestrator for audio/video transcription and translation.
Expand Down Expand Up @@ -220,9 +167,11 @@ def __init__(
self.transcribed_paths = []

@property
def transcriber(self) -> Transcriber:
def transcriber(self):
"""Lazily initialize and return the Transcriber instance (thread-safe)."""
if self._transcriber is None:
from openlrc.transcribe import Transcriber

with self._transcriber_lock:
if self._transcriber is None:
self._transcriber = Transcriber(
Expand Down Expand Up @@ -567,6 +516,9 @@ def _translate(self, audio_name, target_lang, transcribed_opt_sub, translated_pa
This method handles the translation process, including context preparation,
actual translation, and post-processing of the translated subtitles.
"""
from openlrc.context import TranslateInfo
from openlrc.translate import LLMTranslator

context = TranslateInfo(
title=audio_name, audio_type="Movie", glossary=self.glossary, forced_glossary=self.is_force_glossary_used
)
Expand Down Expand Up @@ -810,6 +762,8 @@ def pre_process(self, paths, noise_suppress=False):
extract_audio(path)
paths[i] = audio_path

from openlrc.preprocess import Preprocessor

return Preprocessor(paths, options=self.preprocess_options).run(noise_suppress)

@staticmethod
Expand Down
5 changes: 3 additions & 2 deletions openlrc/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from concurrent.futures import ProcessPoolExecutor
from pathlib import Path

import torch
from df.enhance import enhance, init_df, load_audio, save_audio
from ffmpeg_normalize import FFmpegNormalize
from tqdm import tqdm

Expand Down Expand Up @@ -63,6 +61,9 @@ def noise_suppression(self, audio_paths: list[Path], atten_lim_db: int = 15):
if not audio_paths:
return []

import torch
from df.enhance import enhance, init_df, load_audio, save_audio

if "atten_lim_db" in self.options:
atten_lim_db = self.options["atten_lim_db"]

Expand Down
24 changes: 20 additions & 4 deletions tests/test_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (C) 2024. Hao Zheng
# All rights reserved.
import shutil
import sys
import types
import unittest
from pathlib import Path
from unittest.mock import Mock, patch
Expand All @@ -9,16 +11,30 @@

from openlrc.preprocess import Preprocessor

# Python 3.10's unittest.mock.patch has a bug where @patch("df.enhance.enhance")
# caches the function object `enhance` as the resolved `df.enhance`, causing
# subsequent @patch("df.enhance.save_audio") to look up `save_audio` on the
# function instead of the module (AttributeError). Fixed in Python 3.11+.
# Workaround: inject a fake df.enhance module via sys.modules and use
# patch.object() to avoid string-based path resolution entirely.
_df_enhance = types.ModuleType("df.enhance")
_df_enhance.enhance = lambda *a, **kw: None # type: ignore[attr-defined]
_df_enhance.init_df = lambda *a, **kw: None # type: ignore[attr-defined]
_df_enhance.load_audio = lambda *a, **kw: None # type: ignore[attr-defined]
_df_enhance.save_audio = lambda *a, **kw: None # type: ignore[attr-defined]
sys.modules.setdefault("df", types.ModuleType("df"))
sys.modules.setdefault("df.enhance", _df_enhance)


class TestPreprocessor(unittest.TestCase):
def tearDown(self) -> None:
preprocessed_path = Path("data/preprocessed")
shutil.rmtree(preprocessed_path, ignore_errors=True)

@patch("openlrc.preprocess.enhance")
@patch("openlrc.preprocess.init_df")
@patch("openlrc.preprocess.load_audio")
@patch("openlrc.preprocess.save_audio")
@patch.object(_df_enhance, "enhance")
@patch.object(_df_enhance, "init_df")
@patch.object(_df_enhance, "load_audio")
@patch.object(_df_enhance, "save_audio")
@patch("openlrc.preprocess.release_memory")
def test_noise_suppression_returns_path_objects(
self, mock_release_memory, mock_save_audio, mock_load_audio, mock_init_df, mock_enhance
Expand Down
Loading