From 4e714d7e08196517becd22ad5a14c934fc315443 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 21:05:16 +0000 Subject: [PATCH 1/2] Initial plan From c2672195aadcd87d512f6fa384b72c3a1cfe7ac4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 21:10:24 +0000 Subject: [PATCH 2/2] fix: ensure lip-sync mel fallback Co-authored-by: Dynamo2k <182001016+Dynamo2k@users.noreply.github.com> --- backend/services/lip_sync_service.py | 49 +++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/backend/services/lip_sync_service.py b/backend/services/lip_sync_service.py index 6819a49..497fa6c 100644 --- a/backend/services/lip_sync_service.py +++ b/backend/services/lip_sync_service.py @@ -265,15 +265,54 @@ def _get_audio_mel(self, audio_path: str) -> np.ndarray: target_sr=target_sr ) - # Compute mel spectrogram using Wav2Lip's method - self._setup_wav2lip_paths() - from audio import melspectrogram - - mel = melspectrogram(audio_data.astype(np.float32)) + # Compute mel spectrogram using Wav2Lip's method. + # Prefer upstream helper if available, otherwise fall back to local implementation + mel = None + try: + self._setup_wav2lip_paths() + from audio import melspectrogram # type: ignore + mel = melspectrogram(audio_data.astype(np.float32)) + except Exception: + mel = self._melspectrogram_fallback(audio_data.astype(np.float32)) logger.info(f"Audio mel shape: {mel.shape}") return mel + + def _melspectrogram_fallback(self, wav: np.ndarray): + """ + Local mel-spectrogram implementation used when Wav2Lip's audio.py + is not available on the PYTHONPATH. Mirrors setup_wav2lip.sh logic. + """ + import librosa + + num_mels = 80 + n_fft = 800 + hop_size = 200 + win_size = 800 + sample_rate = 16000 + fmin = 55 + fmax = 7600 + + mel_basis = librosa.filters.mel( + sr=sample_rate, + n_fft=n_fft, + n_mels=num_mels, + fmin=fmin, + fmax=fmax + ) + + D = librosa.stft( + wav, + n_fft=n_fft, + hop_length=hop_size, + win_length=win_size, + pad_mode="reflect" + ) + + S = np.abs(D) + mel = np.dot(mel_basis, S) + return np.log10(np.maximum(mel, 1e-10)) async def generate( self,