From ed434ed77c035105bc81984ff7e418281fb853ca Mon Sep 17 00:00:00 2001 From: Onur Babacan Date: Wed, 12 Jul 2023 18:30:58 +0900 Subject: [PATCH 1/2] Option to use speech-optimized SoX tempo effect called from time_stretch(), and set to True as default --- audiotools/core/effects.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/audiotools/core/effects.py b/audiotools/core/effects.py index fb534cbc..f51d16e4 100644 --- a/audiotools/core/effects.py +++ b/audiotools/core/effects.py @@ -276,7 +276,7 @@ def pitch_shift(self, n_semitones: int, quick: bool = True): self.audio_data = self._to_3d(waveform) return self.to(device) - def time_stretch(self, factor: float, quick: bool = True): + def time_stretch(self, factor: float, quick: bool = True, speech: bool = True): """Time stretch the audio signal. Parameters @@ -300,6 +300,9 @@ def time_stretch(self, factor: float, quick: bool = True): if quick: effects[0].insert(1, "-q") + if speech: + effects[0].insert(1, "-s") + waveform = self._to_2d().cpu() waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor( waveform, self.sample_rate, effects, channels_first=True From f4655136b0fb1fb2f38365fcf91ba417df8819ef Mon Sep 17 00:00:00 2001 From: Onur Babacan Date: Fri, 21 Jul 2023 10:20:48 +0900 Subject: [PATCH 2/2] added test case for previous default --- tests/core/test_effects.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/core/test_effects.py b/tests/core/test_effects.py index c0d6765b..7d5d3b8a 100644 --- a/tests/core/test_effects.py +++ b/tests/core/test_effects.py @@ -178,6 +178,13 @@ def test_time_stretch(): batched = spk_batch.deepcopy().time_stretch(0.8) + #non-default case + single = spk.deepcopy().time_stretch(0.8, speech=False) + + spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) + + batched = spk_batch.deepcopy().time_stretch(0.8, speech=False) + assert np.allclose(batched[0].audio_data, single[0].audio_data)