Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 44 additions & 5 deletions backend/services/lip_sync_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,54 @@ def _get_audio_mel(self, audio_path: str) -> np.ndarray:
target_sr=target_sr
)

# Compute mel spectrogram using Wav2Lip's method
self._setup_wav2lip_paths()
from audio import melspectrogram

mel = melspectrogram(audio_data.astype(np.float32))
# Compute mel spectrogram using Wav2Lip's method.
# Prefer upstream helper if available, otherwise fall back to local implementation
mel = None
try:
self._setup_wav2lip_paths()
from audio import melspectrogram # type: ignore
mel = melspectrogram(audio_data.astype(np.float32))
except Exception:
mel = self._melspectrogram_fallback(audio_data.astype(np.float32))

logger.info(f"Audio mel shape: {mel.shape}")

return mel

def _melspectrogram_fallback(self, wav: np.ndarray):
"""
Local mel-spectrogram implementation used when Wav2Lip's audio.py
is not available on the PYTHONPATH. Mirrors setup_wav2lip.sh logic.
"""
import librosa

num_mels = 80
n_fft = 800
hop_size = 200
win_size = 800
sample_rate = 16000
fmin = 55
fmax = 7600

mel_basis = librosa.filters.mel(
sr=sample_rate,
n_fft=n_fft,
n_mels=num_mels,
fmin=fmin,
fmax=fmax
)

D = librosa.stft(
wav,
n_fft=n_fft,
hop_length=hop_size,
win_length=win_size,
pad_mode="reflect"
)

S = np.abs(D)
mel = np.dot(mel_basis, S)
return np.log10(np.maximum(mel, 1e-10))

async def generate(
self,
Expand Down