diff --git a/.gitignore b/.gitignore index 1d42b5b..a05c58b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ dist/ # PyCharm .idea +venv # Documentation docs/_build/ \ No newline at end of file diff --git a/tests/subtitles/styles2.srt b/tests/subtitles/styles2.srt new file mode 100644 index 0000000..8e60436 --- /dev/null +++ b/tests/subtitles/styles2.srt @@ -0,0 +1,23 @@ +1 +00:00:07,120 --> 00:00:09,480 +Musique douce + +2 +00:00:09,720 --> 00:00:29,520 +--- + +3 +00:00:32,439 --> 00:00:35,320 +Some italic and normal coloured text. By the way, 2 < 3 ! + +4 +00:00:35,560 --> 00:02:25,240 +--- + +5 +00:02:25,480 --> 00:02:27,440 +-Stéphane ? Où on se gare ? + +6 +00:02:27,680 --> 00:02:29,280 +-Euh, là-bas, au chêne. \ No newline at end of file diff --git a/tests/subtitles/styles2.vtt b/tests/subtitles/styles2.vtt new file mode 100644 index 0000000..4a07608 --- /dev/null +++ b/tests/subtitles/styles2.vtt @@ -0,0 +1,58 @@ +WEBVTT + +STYLE +::cue { + font-family: Verdana, Arial, Tiresias; + line-height: 125%; +} +::cue(.white) { + color: #ffffff; +} +::cue(.lime) { + color: #00ff00; +} +::cue(.cyan) { + color: #00ffff; +} +::cue(.red) { + color: #ff0000; +} +::cue(.yellow) { + color: #ffff00; +} +::cue(.magenta) { + color: #ff00ff; +} +::cue(.blue) { + color: #0000ff; +} +::cue(.black) { + color: #000000; +} +::cue(.bg_black) { + background: rgba(0, 0, 0, 0.76); +} + +sub0 +00:00:07.120 --> 00:00:09.480 line:-1 +Musique douce + +sub1 +00:00:09.720 --> 00:00:29.520 align:left line:-1 +--- + +sub2 +00:00:32.439 --> 00:00:35.320 line:-1 +Some italic and normal coloured text. By the way, 2 < 3 ! + +sub3 +00:00:35.560 --> 00:02:25.240 align:left line:-1 +--- + +sub4 +00:02:25.480 --> 00:02:27.440 line:-1 +-Stéphane ? Où on se gare ? + +sub5 +00:02:27.680 --> 00:02:29.280 align:left line:-1 +-Euh, là-bas, au chêne. \ No newline at end of file diff --git a/tests/test_srt.py b/tests/test_srt.py index eed186d..34381b1 100644 --- a/tests/test_srt.py +++ b/tests/test_srt.py @@ -33,3 +33,18 @@ def test_convert_from_srt_to_vtt_and_back_gives_same_file(self): converted = f.read() self.assertEqual(original.strip(), converted.strip()) + + def test_convert_to_srt_with_styles(self): + copy(self._get_file('styles2.vtt'), OUTPUT_DIR) + copy(self._get_file('styles2.srt'), OUTPUT_DIR) + + vtt = webvtt.read(os.path.join(OUTPUT_DIR, 'styles2.vtt')) + vtt.save_as_srt(os.path.join(OUTPUT_DIR, 'styles2_converted.srt')) + + with open(os.path.join(OUTPUT_DIR, 'styles2.srt'), 'r', encoding='utf-8') as f: + original = f.read() + + with open(os.path.join(OUTPUT_DIR, 'styles2_converted.srt'), 'r', encoding='utf-8') as f: + converted = f.read() + + self.assertEqual(original.strip(), converted.strip()) diff --git a/webvtt/structures.py b/webvtt/structures.py index 84f376d..6673d09 100644 --- a/webvtt/structures.py +++ b/webvtt/structures.py @@ -1,7 +1,10 @@ +import html import re from .errors import MalformedCaptionError +COLOURS_PATTERN = re.compile(r'::cue\(\.([^)]+)\)\s*{.*?color:(.*?);.*?}') + TIMESTAMP_PATTERN = re.compile('(\d+)?:?(\d{2}):(\d{2})[.,](\d{3})') __all__ = ['Caption'] @@ -43,6 +46,33 @@ def __str__(self): def add_line(self, line): self.lines.append(line) + @staticmethod + def replace_color(x, tag, v): + return ("" if tag == "c" else ("<" + tag + ">")) \ + + "" \ + + html.unescape(x.group(1)) \ + + "" \ + + ("" if tag == "c" else ("")) + + def _replace_colors(self, raw_text, colours, tag): + result = raw_text + for k, v in colours.items(): + regex_string = "<" + tag + "(?:\\..*?)?\\." + str(k) + "(?:\\..*?)?>(.*?)" + if re.search(regex_string, result) is not None: + result = re.sub(regex_string, lambda x: self.replace_color(x, tag, v), result) + return result + + def to_srt_formatted(self, colours): + caption_text = self.raw_text + no_tag_found = True + for tag in ['c', 'i', 'b', 'u']: + if re.search("<" + tag + "\\..*?>.*?", caption_text) is not None: + caption_text = self._replace_colors(caption_text, colours, tag) + no_tag_found = False + if no_tag_found: + caption_text = self.text + return caption_text + def _to_seconds(self, hours, minutes, seconds, milliseconds): return hours * 3600 + minutes * 60 + seconds + milliseconds / 1000 @@ -133,3 +163,12 @@ def text(self, value): if type(value) != str: raise TypeError('The text value must be a string.') self.lines = value.split('\n') + + @property + def colours(self): + """Returns the colours as a dict""" + colours_found = COLOURS_PATTERN.findall(self.text) + colours_classes = list(map(lambda x: x[0], colours_found)) + colours_values = list(map(lambda x: x[1].replace(" ", ""), colours_found)) + colours = dict(zip(colours_classes, colours_values)) + return colours diff --git a/webvtt/webvtt.py b/webvtt/webvtt.py index adec7c9..d0e798f 100644 --- a/webvtt/webvtt.py +++ b/webvtt/webvtt.py @@ -107,7 +107,7 @@ def write(self, f, format='vtt'): if format == 'vtt': WebVTTWriter().write(self._captions, f) elif format == 'srt': - SRTWriter().write(self._captions, f) + SRTWriter().write(self._captions, self._styles, f) # elif output_format == OutputFormat.SBV: # SBVWriter().write(self._captions, f) diff --git a/webvtt/writers.py b/webvtt/writers.py index 5ec551b..2d7a2f0 100644 --- a/webvtt/writers.py +++ b/webvtt/writers.py @@ -20,12 +20,18 @@ def webvtt_content(self, captions): class SRTWriter(object): - def write(self, captions, f): + def write(self, captions, styles, f): + colours = dict() + if styles is not None: + for style in styles: + colours.update(style.colours) + for line_number, caption in enumerate(captions, start=1): f.write('{}\n'.format(line_number)) f.write('{} --> {}\n'.format(self._to_srt_timestamp(caption.start_in_seconds), self._to_srt_timestamp(caption.end_in_seconds))) - f.writelines(['{}\n'.format(l) for l in caption.lines]) + f.write('{}\n'.format(caption.to_srt_formatted(colours))) + # f.writelines(['{}\n'.format(l) for l in caption.lines]) f.write('\n') def _to_srt_timestamp(self, total_seconds):