From 7b26a680fa11978474c50e624c5ba599abd09700 Mon Sep 17 00:00:00 2001 From: iranroman Date: Thu, 24 Aug 2023 11:47:27 -0400 Subject: [PATCH 1/3] implements rangom pitch shift augmentation --- audio_synthesizer.py | 9 +++++++-- generation_parameters.py | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/audio_synthesizer.py b/audio_synthesizer.py index 000e857..3bd5742 100644 --- a/audio_synthesizer.py +++ b/audio_synthesizer.py @@ -31,6 +31,10 @@ def __init__( self._apply_event_gains = db_config._apply_class_gains self._db_name = params['db_name'] self._fs = params['fs'] + self._pitch_shift = params['random_pitch_shift'] + if self._pitch_shift: + self._bins_per_octave = params['bins_per_octave'] + self._n_bins_up_down = params['n_bins_up_down'] if self._apply_event_gains: self._class_gains = db_config._class_gains @@ -125,8 +129,9 @@ def synthesize_mixtures(self): eventsig, fs_db = librosa.load(filename, sr=self._fs) # here we need librosa since we are loading .mp3 else: raise Exception(f"Unknown event database: {self._db_name}") - - + if self._pitch_shift: + n_steps = np.random.choice(range(-self._n_bins_up_down,self._n_bins_up_down+1)) + eventsig = librosa.effects.pitch_shift(eventsig, sr=fs_db, n_steps=n_steps, bins_per_octave=self._bins_per_octave) if len(np.shape(eventsig)) > 1: diff --git a/generation_parameters.py b/generation_parameters.py index f851457..58a3f3c 100644 --- a/generation_parameters.py +++ b/generation_parameters.py @@ -36,6 +36,9 @@ def get_params(argv='1'): mixture_duration = 60., #in seconds event_time_per_layer = 40., #in seconds (should be less than mixture_duration) audio_format = 'both', # 'foa' (First Order Ambisonics) or 'mic' (four microphones) or 'both' + random_pitch_shift = True, + bins_per_octave = 12, + n_bins_up_down = 6, ) From ce1aeccaeb1a393fb516cf65d562c5838df0e45a Mon Sep 17 00:00:00 2001 From: iranroman Date: Thu, 24 Aug 2023 11:47:27 -0400 Subject: [PATCH 2/3] implements rangom pitch shift augmentation --- audio_synthesizer.py | 9 +++++++-- generation_parameters.py | 3 +++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/audio_synthesizer.py b/audio_synthesizer.py index 0e600d3..c979fbb 100644 --- a/audio_synthesizer.py +++ b/audio_synthesizer.py @@ -32,6 +32,10 @@ def __init__( self._apply_event_gains = db_config._apply_class_gains self._db_name = params['db_name'] self._fs = params['fs'] + self._pitch_shift = params['random_pitch_shift'] + if self._pitch_shift: + self._bins_per_octave = params['bins_per_octave'] + self._n_bins_up_down = params['n_bins_up_down'] if self._apply_event_gains: self._class_gains = db_config._class_gains @@ -97,8 +101,9 @@ def synthesize_mixtures(self): eventsig, fs_db = librosa.load(filename, sr=self._fs) # here we need librosa since we are loading .mp3 else: raise Exception(f"Unknown event database: {self._db_name}") - - + if self._pitch_shift: + n_steps = np.random.choice(range(-self._n_bins_up_down,self._n_bins_up_down+1)) + eventsig = librosa.effects.pitch_shift(eventsig, sr=fs_db, n_steps=n_steps, bins_per_octave=self._bins_per_octave) if len(np.shape(eventsig)) > 1: diff --git a/generation_parameters.py b/generation_parameters.py index 8a6f9f7..557bd70 100644 --- a/generation_parameters.py +++ b/generation_parameters.py @@ -37,6 +37,9 @@ def get_params(argv='1'): mixture_duration = 60., #in seconds event_time_per_layer = 40., #in seconds (should be less than mixture_duration) audio_format = 'both', # 'foa' (First Order Ambisonics) or 'mic' (four microphones) or 'both' + random_pitch_shift = True, + bins_per_octave = 12, + n_bins_up_down = 6, ) From 76f3c60be7c900af2bdc08f085c46e684bd8462c Mon Sep 17 00:00:00 2001 From: iranroman Date: Fri, 25 Aug 2023 01:57:53 -0400 Subject: [PATCH 3/3] changes path to save dataset --- generation_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generation_parameters.py b/generation_parameters.py index 557bd70..0c8732a 100644 --- a/generation_parameters.py +++ b/generation_parameters.py @@ -50,7 +50,7 @@ def get_params(argv='1'): elif argv == '2': ###### FSD50k DATA params['db_name'] = 'fsd50k' params['db_path']= '/home/iran/datasets/FSD50K' - params['mixturepath'] = '/datasets/SELD-dataset-sofa' + params['mixturepath'] = '/datasets/SELD-dataset-pitch' params['active_classes'] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] params['max_polyphony'] = 2