From c7dd2d9ae6f5563cbd49f7e5fdbe7bff3bd9f881 Mon Sep 17 00:00:00 2001
From: Marcos Isidio Da Silva Junior
 <95884561+UltramarkoRJ@users.noreply.github.com>
Date: Tue, 5 Nov 2024 15:53:03 -0300
Subject: [PATCH] Testing Enhanced

Add logic to ensure translation timing consistency and advanced audio translation techniques.

* **Translation Timing Consistency**
  * Add functions to identify key phrases, get synonyms, adjust translation with synonyms, and log translation changes in `tools/transcriber.py`.
  * Modify main translation process to adjust translation with synonyms for timing consistency.

* **Advanced Audio Translation Techniques**
  * Add functions to adjust background music, apply seamless transitions, and reduce background noise in `tools/audio_synthesis.py`.
  * Modify audio synthesis process to use adjusted translation in `tools/transcriber.py`.

* **Language Code Update**
  * Change target language code from 'es' to 'pt-br' in `main.py`.

---

For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/frrobledo/AutoDub?shareId=XXXX-XXXX-XXXX-XXXX).
---
 main.py                  |   6 +--
 tools/audio_synthesis.py |  82 +++++++++++++++++++++++++++++++
 tools/transcriber.py     | 102 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 186 insertions(+), 4 deletions(-)

diff --git a/main.py b/main.py
index 9baf702..63bc88c 100644
--- a/main.py
+++ b/main.py
@@ -46,7 +46,7 @@
     total_segments = len(segments)
     for segment in segments:
         if len(segment['text']) > 0:
-            translated_text = translate_deepl(segment['text'], 'es', detected_language)
+            translated_text = translate_deepl(segment['text'], 'pt-br', detected_language)
         else:
             translated_text = ''
         new_segments.append({'id': segment['id'], 
@@ -74,7 +74,7 @@
         new_segments = pickle.load(f)
 
     # Synthesize audio
-    target_lang_code = "es"
+    target_lang_code = "pt-br"
 
     synthesized_segments_paths = synthesize_segments_with_workers(
         segments=new_segments,
@@ -121,4 +121,4 @@
 
     # Close log output file
     # sys.stdout = sys.__stdout__
-    # log_output_file.close()
\ No newline at end of file
+    # log_output_file.close()
diff --git a/tools/audio_synthesis.py b/tools/audio_synthesis.py
index 95834ba..fdaa4d3 100644
--- a/tools/audio_synthesis.py
+++ b/tools/audio_synthesis.py
@@ -454,3 +454,85 @@ def trim_silence(audio_segment, silence_thresh=-50.0, chunk_size=10):
         return audio_segment[start_trim:end_trim]
     else:
         return audio_segment
+
+def adjust_background_music(background_audio, translated_audio_segments):
+    """
+    Adjust the background music based on the translated audio segments.
+
+    Parameters
+    ----------
+    background_audio : AudioSegment
+        The original background audio.
+    translated_audio_segments : list of AudioSegment
+        The list of translated audio segments.
+
+    Returns
+    -------
+    adjusted_background_audio : AudioSegment
+        The adjusted background audio.
+    """
+    adjusted_background_audio = background_audio
+
+    for segment in translated_audio_segments:
+        # Adjust volume, tempo, or other parameters based on the segment
+        # Example: Reduce background music volume during speech segments
+        adjusted_background_audio = adjusted_background_audio.overlay(segment, gain_during_overlay=-10)
+
+    return adjusted_background_audio
+
+def apply_seamless_transitions(audio_segments):
+    """
+    Apply seamless transitions between different audio segments.
+
+    Parameters
+    ----------
+    audio_segments : list of AudioSegment
+        The list of audio segments.
+
+    Returns
+    -------
+    seamless_audio : AudioSegment
+        The audio with seamless transitions.
+    """
+    seamless_audio = AudioSegment.empty()
+
+    for segment in audio_segments:
+        # Apply fade-in and fade-out effects for seamless transitions
+        segment = segment.fade_in(100).fade_out(100)
+        seamless_audio += segment
+
+    return seamless_audio
+
+def reduce_background_noise(audio_segment):
+    """
+    Reduce background noise in the given audio segment.
+
+    Parameters
+    ----------
+    audio_segment : AudioSegment
+        The audio segment to reduce background noise.
+
+    Returns
+    -------
+    denoised_audio : AudioSegment
+        The audio segment with reduced background noise.
+    """
+    import noisereduce as nr
+    import numpy as np
+
+    # Convert AudioSegment to numpy array
+    y = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
+    sr = audio_segment.frame_rate
+
+    # Apply noise reduction
+    denoised_y = nr.reduce_noise(y=y, sr=sr)
+
+    # Convert back to AudioSegment
+    denoised_audio = AudioSegment(
+        denoised_y.tobytes(),
+        frame_rate=sr,
+        sample_width=audio_segment.sample_width,
+        channels=audio_segment.channels
+    )
+
+    return denoised_audio
diff --git a/tools/transcriber.py b/tools/transcriber.py
index 5ea152e..61f5019 100644
--- a/tools/transcriber.py
+++ b/tools/transcriber.py
@@ -6,6 +6,8 @@
 import math
 from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 import torch
+import requests
+import json
 
 
 # Mapping from ISO 639-1 codes to MBART50 language codes
@@ -194,6 +196,100 @@ def translate(text, source_lang_code, target_lang_code, max_chunk_length=500):
     return full_translation
 
 
+def identify_key_phrases(text):
+    """
+    Identify key phrases in the text that may cause timing issues.
+
+    Parameters
+    ----------
+    text : str
+        The text to analyze.
+
+    Returns
+    -------
+    key_phrases : list of str
+        The list of key phrases identified in the text.
+    """
+    sentences = sent_tokenize(text)
+    key_phrases = []
+    for sentence in sentences:
+        words = nltk.word_tokenize(sentence)
+        if len(words) > 5:  # Arbitrary threshold for key phrases
+            key_phrases.append(sentence)
+    return key_phrases
+
+
+def get_synonyms(phrase):
+    """
+    Get synonyms for a given phrase using a thesaurus API.
+
+    Parameters
+    ----------
+    phrase : str
+        The phrase to find synonyms for.
+
+    Returns
+    -------
+    synonyms : list of str
+        The list of synonyms for the given phrase.
+    """
+    api_url = "https://api.datamuse.com/words"
+    params = {
+        "ml": phrase,
+        "max": 5
+    }
+    response = requests.get(api_url, params=params)
+    if response.status_code == 200:
+        data = response.json()
+        synonyms = [item['word'] for item in data]
+        return synonyms
+    else:
+        print(f"Error fetching synonyms: {response.status_code}")
+        return []
+
+
+def adjust_translation_with_synonyms(text):
+    """
+    Adjust the translation using synonyms to ensure timing consistency.
+
+    Parameters
+    ----------
+    text : str
+        The translated text to adjust.
+
+    Returns
+    -------
+    adjusted_text : str
+        The adjusted translated text.
+    """
+    key_phrases = identify_key_phrases(text)
+    adjusted_text = text
+    for phrase in key_phrases:
+        synonyms = get_synonyms(phrase)
+        if synonyms:
+            adjusted_text = adjusted_text.replace(phrase, synonyms[0])
+            log_translation_change(phrase, synonyms[0])
+    return adjusted_text
+
+
+def log_translation_change(original_phrase, synonym):
+    """
+    Log the changes made to translations for tracking and consistency.
+
+    Parameters
+    ----------
+    original_phrase : str
+        The original phrase in the translation.
+    synonym : str
+        The synonym used to replace the original phrase.
+    """
+    log_content = (
+        f"Original Phrase: {original_phrase}\n"
+        f"Synonym: {synonym}\n"
+    )
+    log_filename = 'logs/translation_changes.txt'
+    with open(log_filename, 'a') as log_file:
+        log_file.write(log_content)
 
 
 if __name__ == '__main__':
@@ -215,7 +311,11 @@ def translate(text, source_lang_code, target_lang_code, max_chunk_length=500):
     translated_text = translate_deepl(transcribed_text, target_lang_code, detected_language)
     print(f"Translated text: {translated_text[:500]}...")
 
+    # Adjust translation with synonyms
+    adjusted_translation = adjust_translation_with_synonyms(translated_text)
+    print(f"Adjusted Translation: {adjusted_translation[:500]}...")
+
     # synthesize audio
     from audio_synthesis import synthesize_speech
-    synthesize_speech(translated_text[:500], audio, target_lang_code)
+    synthesize_speech(adjusted_translation[:500], audio, target_lang_code)
     print(f"Synthesized audio saved to output_audio")
\ No newline at end of file