diff --git a/audiotools/core/effects.py b/audiotools/core/effects.py index fb534cbc..f51d16e4 100644 --- a/audiotools/core/effects.py +++ b/audiotools/core/effects.py @@ -276,7 +276,7 @@ def pitch_shift(self, n_semitones: int, quick: bool = True): self.audio_data = self._to_3d(waveform) return self.to(device) - def time_stretch(self, factor: float, quick: bool = True): + def time_stretch(self, factor: float, quick: bool = True, speech: bool = True): """Time stretch the audio signal. Parameters @@ -300,6 +300,9 @@ def time_stretch(self, factor: float, quick: bool = True): if quick: effects[0].insert(1, "-q") + if speech: + effects[0].insert(1, "-s") + waveform = self._to_2d().cpu() waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor( waveform, self.sample_rate, effects, channels_first=True diff --git a/tests/core/test_effects.py b/tests/core/test_effects.py index c0d6765b..7d5d3b8a 100644 --- a/tests/core/test_effects.py +++ b/tests/core/test_effects.py @@ -178,6 +178,13 @@ def test_time_stretch(): batched = spk_batch.deepcopy().time_stretch(0.8) + #non-default case + single = spk.deepcopy().time_stretch(0.8, speech=False) + + spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) + + batched = spk_batch.deepcopy().time_stretch(0.8, speech=False) + assert np.allclose(batched[0].audio_data, single[0].audio_data)