Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
total_segments = len(segments)
for segment in segments:
if len(segment['text']) > 0:
translated_text = translate_deepl(segment['text'], 'es', detected_language)
translated_text = translate_deepl(segment['text'], 'pt-br', detected_language)
else:
translated_text = ''
new_segments.append({'id': segment['id'],
Expand Down Expand Up @@ -74,7 +74,7 @@
new_segments = pickle.load(f)

# Synthesize audio
target_lang_code = "es"
target_lang_code = "pt-br"

synthesized_segments_paths = synthesize_segments_with_workers(
segments=new_segments,
Expand Down Expand Up @@ -121,4 +121,4 @@

# Close log output file
# sys.stdout = sys.__stdout__
# log_output_file.close()
# log_output_file.close()
82 changes: 82 additions & 0 deletions tools/audio_synthesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,3 +454,85 @@ def trim_silence(audio_segment, silence_thresh=-50.0, chunk_size=10):
return audio_segment[start_trim:end_trim]
else:
return audio_segment

def adjust_background_music(background_audio, translated_audio_segments):
"""
Adjust the background music based on the translated audio segments.

Parameters
----------
background_audio : AudioSegment
The original background audio.
translated_audio_segments : list of AudioSegment
The list of translated audio segments.

Returns
-------
adjusted_background_audio : AudioSegment
The adjusted background audio.
"""
adjusted_background_audio = background_audio

for segment in translated_audio_segments:
# Adjust volume, tempo, or other parameters based on the segment
# Example: Reduce background music volume during speech segments
adjusted_background_audio = adjusted_background_audio.overlay(segment, gain_during_overlay=-10)

return adjusted_background_audio

def apply_seamless_transitions(audio_segments):
"""
Apply seamless transitions between different audio segments.

Parameters
----------
audio_segments : list of AudioSegment
The list of audio segments.

Returns
-------
seamless_audio : AudioSegment
The audio with seamless transitions.
"""
seamless_audio = AudioSegment.empty()

for segment in audio_segments:
# Apply fade-in and fade-out effects for seamless transitions
segment = segment.fade_in(100).fade_out(100)
seamless_audio += segment

return seamless_audio

def reduce_background_noise(audio_segment):
"""
Reduce background noise in the given audio segment.

Parameters
----------
audio_segment : AudioSegment
The audio segment to reduce background noise.

Returns
-------
denoised_audio : AudioSegment
The audio segment with reduced background noise.
"""
import noisereduce as nr
import numpy as np

# Convert AudioSegment to numpy array
y = np.array(audio_segment.get_array_of_samples()).astype(np.float32)
sr = audio_segment.frame_rate

# Apply noise reduction
denoised_y = nr.reduce_noise(y=y, sr=sr)

# Convert back to AudioSegment
denoised_audio = AudioSegment(
denoised_y.tobytes(),
frame_rate=sr,
sample_width=audio_segment.sample_width,
channels=audio_segment.channels
)

return denoised_audio
102 changes: 101 additions & 1 deletion tools/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import math
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import torch
import requests
import json


# Mapping from ISO 639-1 codes to MBART50 language codes
Expand Down Expand Up @@ -194,6 +196,100 @@ def translate(text, source_lang_code, target_lang_code, max_chunk_length=500):
return full_translation


def identify_key_phrases(text):
"""
Identify key phrases in the text that may cause timing issues.

Parameters
----------
text : str
The text to analyze.

Returns
-------
key_phrases : list of str
The list of key phrases identified in the text.
"""
sentences = sent_tokenize(text)
key_phrases = []
for sentence in sentences:
words = nltk.word_tokenize(sentence)
if len(words) > 5: # Arbitrary threshold for key phrases
key_phrases.append(sentence)
return key_phrases


def get_synonyms(phrase):
"""
Get synonyms for a given phrase using a thesaurus API.

Parameters
----------
phrase : str
The phrase to find synonyms for.

Returns
-------
synonyms : list of str
The list of synonyms for the given phrase.
"""
api_url = "https://api.datamuse.com/words"
params = {
"ml": phrase,
"max": 5
}
response = requests.get(api_url, params=params)
if response.status_code == 200:
data = response.json()
synonyms = [item['word'] for item in data]
return synonyms
else:
print(f"Error fetching synonyms: {response.status_code}")
return []


def adjust_translation_with_synonyms(text):
"""
Adjust the translation using synonyms to ensure timing consistency.

Parameters
----------
text : str
The translated text to adjust.

Returns
-------
adjusted_text : str
The adjusted translated text.
"""
key_phrases = identify_key_phrases(text)
adjusted_text = text
for phrase in key_phrases:
synonyms = get_synonyms(phrase)
if synonyms:
adjusted_text = adjusted_text.replace(phrase, synonyms[0])
log_translation_change(phrase, synonyms[0])
return adjusted_text


def log_translation_change(original_phrase, synonym):
"""
Log the changes made to translations for tracking and consistency.

Parameters
----------
original_phrase : str
The original phrase in the translation.
synonym : str
The synonym used to replace the original phrase.
"""
log_content = (
f"Original Phrase: {original_phrase}\n"
f"Synonym: {synonym}\n"
)
log_filename = 'logs/translation_changes.txt'
with open(log_filename, 'a') as log_file:
log_file.write(log_content)


if __name__ == '__main__':
Expand All @@ -215,7 +311,11 @@ def translate(text, source_lang_code, target_lang_code, max_chunk_length=500):
translated_text = translate_deepl(transcribed_text, target_lang_code, detected_language)
print(f"Translated text: {translated_text[:500]}...")

# Adjust translation with synonyms
adjusted_translation = adjust_translation_with_synonyms(translated_text)
print(f"Adjusted Translation: {adjusted_translation[:500]}...")

# synthesize audio
from audio_synthesis import synthesize_speech
synthesize_speech(translated_text[:500], audio, target_lang_code)
synthesize_speech(adjusted_translation[:500], audio, target_lang_code)
print(f"Synthesized audio saved to output_audio")