diff --git a/backend/services/lip_sync_service.py b/backend/services/lip_sync_service.py index 6819a49..497fa6c 100644 --- a/backend/services/lip_sync_service.py +++ b/backend/services/lip_sync_service.py @@ -265,15 +265,54 @@ def _get_audio_mel(self, audio_path: str) -> np.ndarray: target_sr=target_sr ) - # Compute mel spectrogram using Wav2Lip's method - self._setup_wav2lip_paths() - from audio import melspectrogram - - mel = melspectrogram(audio_data.astype(np.float32)) + # Compute mel spectrogram using Wav2Lip's method. + # Prefer upstream helper if available, otherwise fall back to local implementation + mel = None + try: + self._setup_wav2lip_paths() + from audio import melspectrogram # type: ignore + mel = melspectrogram(audio_data.astype(np.float32)) + except Exception: + mel = self._melspectrogram_fallback(audio_data.astype(np.float32)) logger.info(f"Audio mel shape: {mel.shape}") return mel + + def _melspectrogram_fallback(self, wav: np.ndarray): + """ + Local mel-spectrogram implementation used when Wav2Lip's audio.py + is not available on the PYTHONPATH. Mirrors setup_wav2lip.sh logic. + """ + import librosa + + num_mels = 80 + n_fft = 800 + hop_size = 200 + win_size = 800 + sample_rate = 16000 + fmin = 55 + fmax = 7600 + + mel_basis = librosa.filters.mel( + sr=sample_rate, + n_fft=n_fft, + n_mels=num_mels, + fmin=fmin, + fmax=fmax + ) + + D = librosa.stft( + wav, + n_fft=n_fft, + hop_length=hop_size, + win_length=win_size, + pad_mode="reflect" + ) + + S = np.abs(D) + mel = np.dot(mel_basis, S) + return np.log10(np.maximum(mel, 1e-10)) async def generate( self,