diff --git a/autosub/__init__.py b/autosub/__init__.py index d85e4db90..2a1004d3c 100644 --- a/autosub/__init__.py +++ b/autosub/__init__.py @@ -17,6 +17,7 @@ import wave import json import requests +import pysubs2 try: from json.decoder import JSONDecodeError except ImportError: @@ -34,7 +35,9 @@ DEFAULT_CONCURRENCY = 10 DEFAULT_SRC_LANGUAGE = 'en' DEFAULT_DST_LANGUAGE = 'en' - +MAX_EXT_REGION_LENGTH = 10000 +# Maximum speech to text region length in milliseconds +# when using external speech region control def percentile(arr, percent): """ @@ -210,27 +213,27 @@ def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_reg threshold = percentile(energies, 0.2) - elapsed_time = 0 + region_end = 0 regions = [] region_start = None for energy in energies: is_silence = energy <= threshold - max_exceeded = region_start and elapsed_time - region_start >= max_region_size + max_exceeded = region_start and region_end - region_start >= max_region_size if (max_exceeded or is_silence) and region_start: - if elapsed_time - region_start >= min_region_size: - regions.append((region_start, elapsed_time)) + if region_end - region_start >= min_region_size: + regions.append((region_start, region_end)) region_start = None elif (not region_start) and (not is_silence): - region_start = elapsed_time - elapsed_time += chunk_duration + region_start = region_end + region_end += chunk_duration return regions -def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments +def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments,too-many-branches,too-many-statements source_path, output=None, concurrency=DEFAULT_CONCURRENCY, @@ -238,13 +241,39 @@ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, api_key=None, + ext_regions=None, + ext_max_length=MAX_EXT_REGION_LENGTH ): """ Given an input audio/video file, generate subtitles in the specified language and format. """ audio_filename, audio_rate = extract_audio(source_path) - regions = find_speech_regions(audio_filename) + if not ext_regions: + regions = find_speech_regions(audio_filename) + else: + regions = [] + for event in ext_regions.events: + if not event.is_comment: + # not a comment region + if event.duration <= ext_max_length: + regions.append((float(event.start) / 1000.0, + float(event.start + event.duration) / 1000.0)) + else: + # split too long regions + elapsed_time = event.duration + start_time = event.start + reader = wave.open(audio_filename) + audio_file_length = float(reader.getnframes()) / float(reader.getframerate()) + if float(elapsed_time) / 1000.0 > audio_file_length: + elapsed_time = math.floor(audio_file_length) * 1000 + while elapsed_time > ext_max_length: + regions.append((float(start_time) / 1000.0, + float(start_time + ext_max_length) / 1000.0)) + elapsed_time = elapsed_time - ext_max_length + start_time = start_time + ext_max_length + regions.append((float(start_time) / 1000.0, + float(start_time + elapsed_time) / 1000.0)) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) @@ -362,6 +391,11 @@ def main(): parser.add_argument('-o', '--output', help="Output path for subtitles (by default, subtitles are saved in \ the same directory and name as the source path)") + parser.add_argument('-esr', '--external-speech-regions', + help="Path to the external speech regions, \ + which is one of the formats that pysubs2 supports \ + and overrides the default method to find speech regions", + nargs="?", metavar="path") parser.add_argument('-F', '--format', help="Destination subtitle format", default=DEFAULT_SUBTITLE_FORMAT) parser.add_argument('-S', '--src-language', help="Language spoken in source file", @@ -394,18 +428,38 @@ def main(): return 1 try: - subtitle_file_path = generate_subtitles( - source_path=args.source_path, - concurrency=args.concurrency, - src_language=args.src_language, - dst_language=args.dst_language, - api_key=args.api_key, - subtitle_file_format=args.format, - output=args.output, - ) - print("Subtitles file created at {}".format(subtitle_file_path)) + if args.external_speech_regions: + print("Using external speech regions.") + ext_regions = pysubs2.SSAFile.load(args.external_speech_regions) + subtitle_file_path = generate_subtitles( + source_path=args.source_path, + concurrency=args.concurrency, + src_language=args.src_language, + dst_language=args.dst_language, + api_key=args.api_key, + subtitle_file_format=args.format, + output=args.output, + ext_regions=ext_regions + ) + print("Subtitles file created at {}".format(subtitle_file_path)) + + else: + subtitle_file_path = generate_subtitles( + source_path=args.source_path, + concurrency=args.concurrency, + src_language=args.src_language, + dst_language=args.dst_language, + api_key=args.api_key, + subtitle_file_format=args.format, + output=args.output + ) + print("Subtitles file created at {}".format(subtitle_file_path)) + except KeyboardInterrupt: return 1 + except pysubs2.exceptions.Pysubs2Error: + print("Error: pysubs2.exceptions. Check your file format.") + return 1 return 0 diff --git a/setup.py b/setup.py index c9ac20c0a..7f2835653 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ install_requires=[ 'google-api-python-client>=1.4.2', 'requests>=2.3.0', + 'pysubs2>=0.2.4', 'pysrt>=1.0.1', 'progressbar2>=3.34.3', 'six>=1.11.0',