From c7dd2d9ae6f5563cbd49f7e5fdbe7bff3bd9f881 Mon Sep 17 00:00:00 2001 From: Marcos Isidio Da Silva Junior <95884561+UltramarkoRJ@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:53:03 -0300 Subject: [PATCH] Testing Enhanced Add logic to ensure translation timing consistency and advanced audio translation techniques. * **Translation Timing Consistency** * Add functions to identify key phrases, get synonyms, adjust translation with synonyms, and log translation changes in `tools/transcriber.py`. * Modify main translation process to adjust translation with synonyms for timing consistency. * **Advanced Audio Translation Techniques** * Add functions to adjust background music, apply seamless transitions, and reduce background noise in `tools/audio_synthesis.py`. * Modify audio synthesis process to use adjusted translation in `tools/transcriber.py`. * **Language Code Update** * Change target language code from 'es' to 'pt-br' in `main.py`. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/frrobledo/AutoDub?shareId=XXXX-XXXX-XXXX-XXXX). --- main.py | 6 +-- tools/audio_synthesis.py | 82 +++++++++++++++++++++++++++++++ tools/transcriber.py | 102 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 186 insertions(+), 4 deletions(-) diff --git a/main.py b/main.py index 9baf702..63bc88c 100644 --- a/main.py +++ b/main.py @@ -46,7 +46,7 @@ total_segments = len(segments) for segment in segments: if len(segment['text']) > 0: - translated_text = translate_deepl(segment['text'], 'es', detected_language) + translated_text = translate_deepl(segment['text'], 'pt-br', detected_language) else: translated_text = '' new_segments.append({'id': segment['id'], @@ -74,7 +74,7 @@ new_segments = pickle.load(f) # Synthesize audio - target_lang_code = "es" + target_lang_code = "pt-br" synthesized_segments_paths = synthesize_segments_with_workers( segments=new_segments, @@ -121,4 +121,4 @@ # Close log output file # sys.stdout = sys.__stdout__ - # log_output_file.close() \ No newline at end of file + # log_output_file.close() diff --git a/tools/audio_synthesis.py b/tools/audio_synthesis.py index 95834ba..fdaa4d3 100644 --- a/tools/audio_synthesis.py +++ b/tools/audio_synthesis.py @@ -454,3 +454,85 @@ def trim_silence(audio_segment, silence_thresh=-50.0, chunk_size=10): return audio_segment[start_trim:end_trim] else: return audio_segment + +def adjust_background_music(background_audio, translated_audio_segments): + """ + Adjust the background music based on the translated audio segments. + + Parameters + ---------- + background_audio : AudioSegment + The original background audio. + translated_audio_segments : list of AudioSegment + The list of translated audio segments. + + Returns + ------- + adjusted_background_audio : AudioSegment + The adjusted background audio. + """ + adjusted_background_audio = background_audio + + for segment in translated_audio_segments: + # Adjust volume, tempo, or other parameters based on the segment + # Example: Reduce background music volume during speech segments + adjusted_background_audio = adjusted_background_audio.overlay(segment, gain_during_overlay=-10) + + return adjusted_background_audio + +def apply_seamless_transitions(audio_segments): + """ + Apply seamless transitions between different audio segments. + + Parameters + ---------- + audio_segments : list of AudioSegment + The list of audio segments. + + Returns + ------- + seamless_audio : AudioSegment + The audio with seamless transitions. + """ + seamless_audio = AudioSegment.empty() + + for segment in audio_segments: + # Apply fade-in and fade-out effects for seamless transitions + segment = segment.fade_in(100).fade_out(100) + seamless_audio += segment + + return seamless_audio + +def reduce_background_noise(audio_segment): + """ + Reduce background noise in the given audio segment. + + Parameters + ---------- + audio_segment : AudioSegment + The audio segment to reduce background noise. + + Returns + ------- + denoised_audio : AudioSegment + The audio segment with reduced background noise. + """ + import noisereduce as nr + import numpy as np + + # Convert AudioSegment to numpy array + y = np.array(audio_segment.get_array_of_samples()).astype(np.float32) + sr = audio_segment.frame_rate + + # Apply noise reduction + denoised_y = nr.reduce_noise(y=y, sr=sr) + + # Convert back to AudioSegment + denoised_audio = AudioSegment( + denoised_y.tobytes(), + frame_rate=sr, + sample_width=audio_segment.sample_width, + channels=audio_segment.channels + ) + + return denoised_audio diff --git a/tools/transcriber.py b/tools/transcriber.py index 5ea152e..61f5019 100644 --- a/tools/transcriber.py +++ b/tools/transcriber.py @@ -6,6 +6,8 @@ import math from transformers import MBartForConditionalGeneration, MBart50TokenizerFast import torch +import requests +import json # Mapping from ISO 639-1 codes to MBART50 language codes @@ -194,6 +196,100 @@ def translate(text, source_lang_code, target_lang_code, max_chunk_length=500): return full_translation +def identify_key_phrases(text): + """ + Identify key phrases in the text that may cause timing issues. + + Parameters + ---------- + text : str + The text to analyze. + + Returns + ------- + key_phrases : list of str + The list of key phrases identified in the text. + """ + sentences = sent_tokenize(text) + key_phrases = [] + for sentence in sentences: + words = nltk.word_tokenize(sentence) + if len(words) > 5: # Arbitrary threshold for key phrases + key_phrases.append(sentence) + return key_phrases + + +def get_synonyms(phrase): + """ + Get synonyms for a given phrase using a thesaurus API. + + Parameters + ---------- + phrase : str + The phrase to find synonyms for. + + Returns + ------- + synonyms : list of str + The list of synonyms for the given phrase. + """ + api_url = "https://api.datamuse.com/words" + params = { + "ml": phrase, + "max": 5 + } + response = requests.get(api_url, params=params) + if response.status_code == 200: + data = response.json() + synonyms = [item['word'] for item in data] + return synonyms + else: + print(f"Error fetching synonyms: {response.status_code}") + return [] + + +def adjust_translation_with_synonyms(text): + """ + Adjust the translation using synonyms to ensure timing consistency. + + Parameters + ---------- + text : str + The translated text to adjust. + + Returns + ------- + adjusted_text : str + The adjusted translated text. + """ + key_phrases = identify_key_phrases(text) + adjusted_text = text + for phrase in key_phrases: + synonyms = get_synonyms(phrase) + if synonyms: + adjusted_text = adjusted_text.replace(phrase, synonyms[0]) + log_translation_change(phrase, synonyms[0]) + return adjusted_text + + +def log_translation_change(original_phrase, synonym): + """ + Log the changes made to translations for tracking and consistency. + + Parameters + ---------- + original_phrase : str + The original phrase in the translation. + synonym : str + The synonym used to replace the original phrase. + """ + log_content = ( + f"Original Phrase: {original_phrase}\n" + f"Synonym: {synonym}\n" + ) + log_filename = 'logs/translation_changes.txt' + with open(log_filename, 'a') as log_file: + log_file.write(log_content) if __name__ == '__main__': @@ -215,7 +311,11 @@ def translate(text, source_lang_code, target_lang_code, max_chunk_length=500): translated_text = translate_deepl(transcribed_text, target_lang_code, detected_language) print(f"Translated text: {translated_text[:500]}...") + # Adjust translation with synonyms + adjusted_translation = adjust_translation_with_synonyms(translated_text) + print(f"Adjusted Translation: {adjusted_translation[:500]}...") + # synthesize audio from audio_synthesis import synthesize_speech - synthesize_speech(translated_text[:500], audio, target_lang_code) + synthesize_speech(adjusted_translation[:500], audio, target_lang_code) print(f"Synthesized audio saved to output_audio") \ No newline at end of file