From b26c2f995a9759ef2583a1312b3efebc8a2177b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 1 Nov 2024 18:16:50 +0100 Subject: [PATCH 01/18] Add frames per beat option in compute_beat_sync_features --- msaf/base.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/msaf/base.py b/msaf/base.py index 1ad32d3b..e57095f2 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -184,7 +184,7 @@ def read_ann_beats(self): ) return times, frames - def compute_beat_sync_features(self, beat_frames, beat_times, pad): + def compute_beat_sync_features(self, beat_frames, beat_times, pad, frames_per_beat=1): """Make the features beat-synchronous. Parameters @@ -207,6 +207,16 @@ def compute_beat_sync_features(self, beat_frames, beat_times, pad): """ if beat_frames is None: return None, None + if frames_per_beat != 1: + new_beat_frames = np.empty(0,dtype=int) + for idx in range(len(beat_frames)-1): + this_beat_frame = beat_frames[idx] + next_beat_frame = beat_frames[idx+1] + subdivision = (next_beat_frame - this_beat_frame) + assert (frames_per_beat < subdivision) + frames_in_beat = [int(k * subdivision/frames_per_beat + this_beat_frame) for k in range(frames_per_beat)] + new_beat_frames = np.concatenate((new_beat_frames, frames_in_beat), dtype=int ) + beat_frames = new_beat_frames # Make beat synchronous beatsync_feats = librosa.util.utils.sync( From 81ff57363dfd0a156f7f02ec39e29ebb42d0f028 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 1 Nov 2024 22:43:57 +0100 Subject: [PATCH 02/18] Add (est/ann)_beatsync_features_mfpb to Features Also add writing and reading to json, and new FeatureTypes : (est/ann)_beatsync_features_mfpb. I had to refactor the compute_beatsync_features, but the changes shouldn't affect other usages. --- msaf/base.py | 75 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index e57095f2..e458a4d1 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -29,7 +29,7 @@ # - framesync: Frame-wise synchronous. # - est_beatsync: Beat-synchronous using estimated beats with librosa # - ann_beatsync: Beat-synchronous using annotated beats from ground-truth -FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync") +FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync est_beatsync_mfpb ann_beatsync_mfpb") # All available features features_registry = {} @@ -81,6 +81,7 @@ def __init__(self, file_struct, sr, hop_length, feat_type): self.sr = sr self.hop_length = hop_length self.feat_type = feat_type + self.fpb = 3 # The number of frames per beat computed for mfpb features # The following attributes will be populated, if needed, # once the `features` getter is called @@ -89,6 +90,8 @@ def __init__(self, file_struct, sr, hop_length, feat_type): self._framesync_features = None # Frame-sync features self._est_beatsync_features = None # Estimated Beat-sync features self._ann_beatsync_features = None # Annotated Beat-sync features + self._est_beatsync_features_mfpb = None # Estimated Beat-sync features with multiple frames per beat + self._ann_beatsync_features_mfpb = None # Annotated Beat-sync features with multiple frames per beat self._audio = None # Actual audio signal self._audio_harmonic = None # Harmonic audio signal self._audio_percussive = None # Percussive audio signal @@ -184,15 +187,13 @@ def read_ann_beats(self): ) return times, frames - def compute_beat_sync_features(self, beat_frames, beat_times, pad, frames_per_beat=1): + def compute_beat_sync_features(self, beat_frames, pad, frames_per_beat=1): """Make the features beat-synchronous. Parameters ---------- beat_frames: np.array The frame indices of the beat positions. - beat_times: np.array - The time points of the beat positions (in seconds). pad: boolean If `True`, `beat_frames` is padded to span the full range. @@ -201,12 +202,9 @@ def compute_beat_sync_features(self, beat_frames, beat_times, pad, frames_per_be beatsync_feats: np.array The beat-synchronized features. `None` if the beat_frames was `None`. - beatsync_times: np.array - The beat-synchronized times. - `None` if the beat_frames was `None`. """ if beat_frames is None: - return None, None + return None if frames_per_beat != 1: new_beat_frames = np.empty(0,dtype=int) for idx in range(len(beat_frames)-1): @@ -223,13 +221,29 @@ def compute_beat_sync_features(self, beat_frames, beat_times, pad, frames_per_be self._framesync_features.T, beat_frames, pad=pad ).T - # Assign times (and add last time if padded) + return beatsync_feats + + def pad_beat_times(self, beatsync_feats, beat_times): + """Pad the beat times if necessary + Parameters + ---------- + beatsync_feats: np.array + The beat-synchronized features. + + beat_times: np.array + The time points of the beat positions (in seconds). + + Returns + ------- + beatsync_times: np.array + The updated time points of the beat positions (in seconds) + """ beatsync_times = np.copy(beat_times) if beatsync_times.shape[0] != beatsync_feats.shape[0]: beatsync_times = np.concatenate( (beatsync_times, [self._framesync_times[-1]]) ) - return beatsync_feats, beatsync_times + return beatsync_times def read_features(self, tol=1e-3): """Reads the features from a file and stores them in the current @@ -282,6 +296,7 @@ def read_features(self, tol=1e-3): ) self._framesync_features = np.array(feats[self.get_id()]["framesync"]) self._est_beatsync_features = np.array(feats[self.get_id()]["est_beatsync"]) + self._est_beatsync_features_mfpb = np.array(feats[self.get_id()]["est_beatsync_mfpb"]) # Read annotated beats if available if "ann_beats" in feats.keys(): @@ -293,6 +308,9 @@ def read_features(self, tol=1e-3): self._ann_beatsync_features = np.array( feats[self.get_id()]["ann_beatsync"] ) + self._ann_beatsync_features_mfpb = np.array( + feats[self.get_id()]["ann_beatsync_mfpb"] + ) except KeyError: raise WrongFeaturesFormatError( "The features file %s is not correctly formatted" @@ -372,6 +390,15 @@ def write_features(self): out_json[self.get_id()][ "ann_beatsync" ] = self._ann_beatsync_features.tolist() + + out_json[self.get_id()][ + "est_beatsync_mfpb" + ] = self._est_beatsync_features_mfpb.tolist() + if self._ann_beatsync_features_mfpb is not None: + out_json[self.get_id()][ + "ann_beatsync_mfpb" + ] = self._ann_beatsync_features_mfpb.tolist() + # Save it with open(self.file_struct.features_file, "w") as f: @@ -417,18 +444,13 @@ def _compute_all_features(self): # Beat-Synchronize pad = True # Always append to the end of the features - ( - self._est_beatsync_features, - self._est_beatsync_times, - ) = self.compute_beat_sync_features( - self._est_beats_frames, self._est_beats_times, pad - ) - ( - self._ann_beatsync_features, - self._ann_beatsync_times, - ) = self.compute_beat_sync_features( - self._ann_beats_frames, self._ann_beats_times, pad - ) + self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) + self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) + self._est_beatsync_features_mfpb = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.fpb) + self._ann_beatsync_features_mfpb = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.fpb) + self._est_beatsync_times = self.pad_beat_times(self._est_beatsync_features, self._est_beats_times) + self._ann_beatsync_times = self.pad_beat_times(self._ann_beatsync_features, self._ann_beats_times) + @property def frame_times(self): @@ -496,6 +518,15 @@ def features(self): "were found" % self.feat_type ) self._features = self._ann_beatsync_features + elif self.feat_type is FeatureTypes.est_beatsync_mfpb: + self._features = self._est_beatsync_features_mfpb + elif self.feat_type is FeatureTypes.ann_beatsync_mfpb: + if self._ann_beatsync_features_mfbp is None: + raise FeatureTypeNotFound( + "Feature type %s is not valid because no annotated beats " + "were found" % self.feat_type + ) + self._features = self._ann_beatsync_features_mfbp else: raise FeatureTypeNotFound("Feature type %s is not valid." % self.feat_type) From d9c5783ffb68e1b79e40d7a7d67373953d2f7fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Mon, 4 Nov 2024 17:48:17 +0100 Subject: [PATCH 03/18] Rename ***_beastync_features_mfpb to ***_multibeat_features --- msaf/base.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index e458a4d1..d727e780 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -29,7 +29,7 @@ # - framesync: Frame-wise synchronous. # - est_beatsync: Beat-synchronous using estimated beats with librosa # - ann_beatsync: Beat-synchronous using annotated beats from ground-truth -FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync est_beatsync_mfpb ann_beatsync_mfpb") +FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync est_multibeat ann_multibeat") # All available features features_registry = {} @@ -90,8 +90,8 @@ def __init__(self, file_struct, sr, hop_length, feat_type): self._framesync_features = None # Frame-sync features self._est_beatsync_features = None # Estimated Beat-sync features self._ann_beatsync_features = None # Annotated Beat-sync features - self._est_beatsync_features_mfpb = None # Estimated Beat-sync features with multiple frames per beat - self._ann_beatsync_features_mfpb = None # Annotated Beat-sync features with multiple frames per beat + self._est_multibeat_features = None # Estimated Beat-sync features with multiple frames per beat + self._ann_mutlibeat_features = None # Annotated Beat-sync features with multiple frames per beat self._audio = None # Actual audio signal self._audio_harmonic = None # Harmonic audio signal self._audio_percussive = None # Percussive audio signal @@ -237,7 +237,11 @@ def pad_beat_times(self, beatsync_feats, beat_times): ------- beatsync_times: np.array The updated time points of the beat positions (in seconds) + or None if beatsync_feats is None """ + if beatsync_feats is None: + return None + beatsync_times = np.copy(beat_times) if beatsync_times.shape[0] != beatsync_feats.shape[0]: beatsync_times = np.concatenate( @@ -296,7 +300,7 @@ def read_features(self, tol=1e-3): ) self._framesync_features = np.array(feats[self.get_id()]["framesync"]) self._est_beatsync_features = np.array(feats[self.get_id()]["est_beatsync"]) - self._est_beatsync_features_mfpb = np.array(feats[self.get_id()]["est_beatsync_mfpb"]) + self._est_multibeat_features = np.array(feats[self.get_id()]["est_multibeat"]) # Read annotated beats if available if "ann_beats" in feats.keys(): @@ -308,8 +312,8 @@ def read_features(self, tol=1e-3): self._ann_beatsync_features = np.array( feats[self.get_id()]["ann_beatsync"] ) - self._ann_beatsync_features_mfpb = np.array( - feats[self.get_id()]["ann_beatsync_mfpb"] + self._ann_mutlibeat_features = np.array( + feats[self.get_id()]["ann_multibeat"] ) except KeyError: raise WrongFeaturesFormatError( @@ -392,12 +396,12 @@ def write_features(self): ] = self._ann_beatsync_features.tolist() out_json[self.get_id()][ - "est_beatsync_mfpb" - ] = self._est_beatsync_features_mfpb.tolist() - if self._ann_beatsync_features_mfpb is not None: + "est_multibeat" + ] = self._est_multibeat_features.tolist() + if self._ann_mutlibeat_features is not None: out_json[self.get_id()][ - "ann_beatsync_mfpb" - ] = self._ann_beatsync_features_mfpb.tolist() + "ann_multibeat" + ] = self._ann_mutlibeat_features.tolist() # Save it @@ -446,8 +450,8 @@ def _compute_all_features(self): pad = True # Always append to the end of the features self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) - self._est_beatsync_features_mfpb = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.fpb) - self._ann_beatsync_features_mfpb = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.fpb) + self._est_multibeat_features = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.fpb) + self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.fpb) self._est_beatsync_times = self.pad_beat_times(self._est_beatsync_features, self._est_beats_times) self._ann_beatsync_times = self.pad_beat_times(self._ann_beatsync_features, self._ann_beats_times) @@ -518,9 +522,9 @@ def features(self): "were found" % self.feat_type ) self._features = self._ann_beatsync_features - elif self.feat_type is FeatureTypes.est_beatsync_mfpb: - self._features = self._est_beatsync_features_mfpb - elif self.feat_type is FeatureTypes.ann_beatsync_mfpb: + elif self.feat_type is FeatureTypes.est_multibeat: + self._features = self._est_multibeat_features + elif self.feat_type is FeatureTypes.ann_multibeat: if self._ann_beatsync_features_mfbp is None: raise FeatureTypeNotFound( "Feature type %s is not valid because no annotated beats " From 84a4edd8c2a266daa38f527cbc22bd5965a55d18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Mon, 4 Nov 2024 18:49:46 +0100 Subject: [PATCH 04/18] Add option to select features for multibeat and option to change frames per beat --- msaf/base.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index d727e780..823c4efd 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -25,10 +25,13 @@ WrongFeaturesFormatError, ) -# Three types of features at the moment: +# Five types of features at the moment: # - framesync: Frame-wise synchronous. # - est_beatsync: Beat-synchronous using estimated beats with librosa # - ann_beatsync: Beat-synchronous using annotated beats from ground-truth +# - est_mutlibeat: Multiple frames per beat-synchronous using estimated beats +# - ann_multibeat: Multiple frames per beat-synchronous using annotated beats from ground-truth + FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync est_multibeat ann_multibeat") # All available features @@ -61,7 +64,7 @@ class Features(metaclass=MetaFeatures): features per frames. """ - def __init__(self, file_struct, sr, hop_length, feat_type): + def __init__(self, file_struct, sr, hop_length, feat_type, frame_per_beat=3): """Init function for the base class to make sure all features have at least these parameters as attributes. @@ -75,13 +78,15 @@ def __init__(self, file_struct, sr, hop_length, feat_type): Hop in frames of the features to be computed. feat_type: `FeatureTypes` Enum containing the type of feature. + frame_per_beat: int > 0 + Number of frames per beat used in multibeat features """ # Set the global parameters self.file_struct = file_struct self.sr = sr self.hop_length = hop_length self.feat_type = feat_type - self.fpb = 3 # The number of frames per beat computed for mfpb features + self.frame_per_beat = frame_per_beat # The number of frames per beat computed for mfpb features # The following attributes will be populated, if needed, # once the `features` getter is called @@ -450,8 +455,8 @@ def _compute_all_features(self): pad = True # Always append to the end of the features self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) - self._est_multibeat_features = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.fpb) - self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.fpb) + self._est_multibeat_features = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.frame_per_beat) + self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.frame_per_beat) self._est_beatsync_times = self.pad_beat_times(self._est_beatsync_features, self._est_beats_times) self._ann_beatsync_times = self.pad_beat_times(self._ann_beatsync_features, self._ann_beats_times) @@ -537,7 +542,7 @@ def features(self): return self._features @classmethod - def select_features(cls, features_id, file_struct, annot_beats, framesync): + def select_features(cls, features_id, file_struct, annot_beats, framesync, multibeat=False): """Selects the features from the given parameters. Parameters @@ -560,9 +565,15 @@ def select_features(cls, features_id, file_struct, annot_beats, framesync): if not annot_beats and framesync: feat_type = FeatureTypes.framesync elif annot_beats and not framesync: - feat_type = FeatureTypes.ann_beatsync + if multibeat: + feat_type = FeatureTypes.ann_multibeat + else: + feat_type = FeatureTypes.ann_beatsync elif not annot_beats and not framesync: - feat_type = FeatureTypes.est_beatsync + if multibeat: + feat_type = FeatureTypes.est_multibeat + else: + feat_type = FeatureTypes.est_beatsync else: raise FeatureTypeNotFound("Type of features not valid.") From bf7f36f368526922ef98a38929e7fde8afc818f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Mon, 4 Nov 2024 18:51:06 +0100 Subject: [PATCH 05/18] Add tests for multibeat --- tests/test_features.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/test_features.py b/tests/test_features.py index 041094d6..49888918 100644 --- a/tests/test_features.py +++ b/tests/test_features.py @@ -48,6 +48,8 @@ def run_framesync(features_class): assert "framesync" in data[features_class.get_id()].keys() assert "est_beatsync" in data[features_class.get_id()].keys() assert "ann_beatsync" in data[features_class.get_id()].keys() + assert "est_multibeat" in data[features_class.get_id()].keys() + assert "ann_multibeat" in data[features_class.get_id()].keys() read_feats = np.array(data[features_class.get_id()]["framesync"]) assert np.array_equal(feats, read_feats) @@ -312,6 +314,14 @@ def test_select_features(): assert isinstance(feature, CQT) assert feature.feat_type == FeatureTypes.ann_beatsync + feature = Features.select_features("mfcc", my_file_struct, False, False, multibeat=True) + assert isinstance(feature, MFCC) + assert feature.feat_type == FeatureTypes.est_multibeat + + feature = Features.select_features("cqt", my_file_struct, True, False, multibeat=True) + assert isinstance(feature, CQT) + assert feature.feat_type == FeatureTypes.ann_multibeat + def test_wrong_select_features(): with raises(FeatureTypeNotFound): From cc8e345352ce400e0d64136430f985a7fe348a2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Thu, 7 Nov 2024 17:09:20 +0100 Subject: [PATCH 06/18] Add frames per beat to global params --- msaf/base.py | 34 +++++++++++--------------- tests/features/chirp_noaudio.json | 4 ++- tests/fixtures/01_-_Come_Together.json | 7 ++++-- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index 823c4efd..330e47b7 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -64,7 +64,7 @@ class Features(metaclass=MetaFeatures): features per frames. """ - def __init__(self, file_struct, sr, hop_length, feat_type, frame_per_beat=3): + def __init__(self, file_struct, sr, hop_length, feat_type, frames_per_beat=3): """Init function for the base class to make sure all features have at least these parameters as attributes. @@ -86,7 +86,7 @@ def __init__(self, file_struct, sr, hop_length, feat_type, frame_per_beat=3): self.sr = sr self.hop_length = hop_length self.feat_type = feat_type - self.frame_per_beat = frame_per_beat # The number of frames per beat computed for mfpb features + self.frames_per_beat = frames_per_beat # The number of frames per beat computed for mfpb features # The following attributes will be populated, if needed, # once the `features` getter is called @@ -116,6 +116,7 @@ def __init__(self, file_struct, sr, hop_length, feat_type, frame_per_beat=3): "feat_type", "hop_length", "dur", + "frames_per_beat", ] def compute_HPSS(self): @@ -279,6 +280,7 @@ def read_features(self, tol=1e-3): assert os.path.basename(self.file_struct.audio_file) == os.path.basename( feats["globals"]["audio_file"] ) + assert self.frames_per_beat == int(feats["globals"]["frames_per_beat"]) # Check for specific features params feat_params_err = FeatureParamsError( @@ -365,6 +367,7 @@ def write_features(self): "sample_rate": self.sr, "hop_length": self.hop_length, "audio_file": self.file_struct.audio_file, + "frames_per_beat": self.frames_per_beat, } # Beats @@ -392,23 +395,14 @@ def write_features(self): # Actual features out_json[self.get_id()]["framesync"] = self._framesync_features.tolist() - out_json[self.get_id()][ - "est_beatsync" - ] = self._est_beatsync_features.tolist() + out_json[self.get_id()]["est_beatsync"] = self._est_beatsync_features.tolist() + if self._ann_beatsync_features is not None: - out_json[self.get_id()][ - "ann_beatsync" - ] = self._ann_beatsync_features.tolist() + out_json[self.get_id()]["ann_beatsync"] = self._ann_beatsync_features.tolist() - out_json[self.get_id()][ - "est_multibeat" - ] = self._est_multibeat_features.tolist() + out_json[self.get_id()]["est_multibeat"] = self._est_multibeat_features.tolist() if self._ann_mutlibeat_features is not None: - out_json[self.get_id()][ - "ann_multibeat" - ] = self._ann_mutlibeat_features.tolist() - - + out_json[self.get_id()]["ann_multibeat"] = self._ann_mutlibeat_features.tolist() # Save it with open(self.file_struct.features_file, "w") as f: json.dump(out_json, f, indent=2) @@ -455,8 +449,8 @@ def _compute_all_features(self): pad = True # Always append to the end of the features self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) - self._est_multibeat_features = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.frame_per_beat) - self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.frame_per_beat) + self._est_multibeat_features = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.frames_per_beat) + self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.frames_per_beat) self._est_beatsync_times = self.pad_beat_times(self._est_beatsync_features, self._est_beats_times) self._ann_beatsync_times = self.pad_beat_times(self._ann_beatsync_features, self._ann_beats_times) @@ -530,12 +524,12 @@ def features(self): elif self.feat_type is FeatureTypes.est_multibeat: self._features = self._est_multibeat_features elif self.feat_type is FeatureTypes.ann_multibeat: - if self._ann_beatsync_features_mfbp is None: + if self._ann_multibeat_features is None: raise FeatureTypeNotFound( "Feature type %s is not valid because no annotated beats " "were found" % self.feat_type ) - self._features = self._ann_beatsync_features_mfbp + self._features = self._ann_mutlibeat_features else: raise FeatureTypeNotFound("Feature type %s is not valid." % self.feat_type) diff --git a/tests/features/chirp_noaudio.json b/tests/features/chirp_noaudio.json index b670ca38..53b53344 100644 --- a/tests/features/chirp_noaudio.json +++ b/tests/features/chirp_noaudio.json @@ -11,12 +11,14 @@ "audio_file": "fixtures/chirp_noaudio.mp3", "dur": 10.0, "sample_rate": 22050, - "hop_length": 1024 + "hop_length": 1024, + "frames_per_beat": 3 }, "est_beats": [], "est_beatsync_times": [], "cqt": { "est_beatsync": [], + "est_multibeat": [], "params": { "filter_scale": "1.0", "ref_power": "amax", diff --git a/tests/fixtures/01_-_Come_Together.json b/tests/fixtures/01_-_Come_Together.json index ad6369d2..b21ba1c4 100644 --- a/tests/fixtures/01_-_Come_Together.json +++ b/tests/fixtures/01_-_Come_Together.json @@ -89078,7 +89078,8 @@ 0.0, 0.0 ] - ] + ], + "est_multibeat" :[] }, "mfcc": { "est_beatsync": [ @@ -100267,6 +100268,7 @@ 1.4796745923739383 ] ], + "est_multibeat" :[], "params": { "ref_power": "amax", "n_mels": "128", @@ -190088,7 +190090,8 @@ "audio_file": "/home/uri/datasets/BeatlesTUT/audio/01_-_Come_Together.wav", "dur": 260.6497959183674, "sample_rate": 22050, - "hop_length": 1024 + "hop_length": 1024, + "frames_per_beat": 3 }, "est_beats": [ 1.207437641723356, From c873611995b70c80b9017fe08db87d3e395263ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 8 Nov 2024 18:19:04 +0100 Subject: [PATCH 07/18] Add multibeat times and add proper padding to multibeat Had to refactor the computing of frames and times to do this. --- msaf/base.py | 119 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 85 insertions(+), 34 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index 330e47b7..dc346fae 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -107,6 +107,10 @@ def __init__(self, file_struct, sr, hop_length, feat_type, frames_per_beat=3): self._ann_beatsync_times = None # Annotated beat-sync times self._ann_beats_times = None # Annotated beat times self._ann_beats_frames = None # Annotated beats in frames + self._est_multibeat_frames = None # Estimated multibeat frames + self._est_multibeat_times = None # Estimated multibeat times + self._ann_multibeat_frames = None # Annotated multibeat frames + self._ann_multibeat_times = None # Annotated multibeat times # Differentiate global params from subclass attributes. # This is a bit hacky... I accept Pull Requests ^_^ @@ -193,7 +197,7 @@ def read_ann_beats(self): ) return times, frames - def compute_beat_sync_features(self, beat_frames, pad, frames_per_beat=1): + def compute_beat_sync_features(self, beat_frames, pad): """Make the features beat-synchronous. Parameters @@ -211,17 +215,7 @@ def compute_beat_sync_features(self, beat_frames, pad, frames_per_beat=1): """ if beat_frames is None: return None - if frames_per_beat != 1: - new_beat_frames = np.empty(0,dtype=int) - for idx in range(len(beat_frames)-1): - this_beat_frame = beat_frames[idx] - next_beat_frame = beat_frames[idx+1] - subdivision = (next_beat_frame - this_beat_frame) - assert (frames_per_beat < subdivision) - frames_in_beat = [int(k * subdivision/frames_per_beat + this_beat_frame) for k in range(frames_per_beat)] - new_beat_frames = np.concatenate((new_beat_frames, frames_in_beat), dtype=int ) - beat_frames = new_beat_frames - + # Make beat synchronous beatsync_feats = librosa.util.utils.sync( self._framesync_features.T, beat_frames, pad=pad @@ -229,31 +223,69 @@ def compute_beat_sync_features(self, beat_frames, pad, frames_per_beat=1): return beatsync_feats - def pad_beat_times(self, beatsync_feats, beat_times): + def _compute_multibeat(self, beat_frames): + """Compute frames index evenly distributed between beats + Parameters + ---------- + beat_frames: np.array + the frames index of beats + Returns + ------- + multibeat_frames: np.array + the frames index of multibeats + """ + if beat_frames is None: + return None, None + multibeat_frames = np.empty(0,dtype=int) + for idx in range(len(beat_frames)-1): + this_beat_frame = beat_frames[idx] + next_beat_frame = beat_frames[idx+1] + subdivision = (next_beat_frame - this_beat_frame) + assert (self.frames_per_beat < subdivision) + frames_in_beat = [int(k * subdivision/self.frames_per_beat + this_beat_frame) for k in range(self.frames_per_beat)] + multibeat_frames = np.concatenate((multibeat_frames, frames_in_beat), dtype=int ) + + # Compute times of frames + multibeat_times = librosa.frames_to_time(multibeat_frames, sr=self.sr, hop_length=self.hop_length) + + return multibeat_times, multibeat_frames + + def _pad_beats(self, beat_times, beat_frames): """Pad the beat times if necessary Parameters ---------- - beatsync_feats: np.array - The beat-synchronized features. - beat_times: np.array The time points of the beat positions (in seconds). + beat_frames: np.array + The beat-synchronized frames. + + Returns ------- - beatsync_times: np.array + padded_beat_times: np.array The updated time points of the beat positions (in seconds) - or None if beatsync_feats is None + or None if beat_frames is None + + padded_beat_frames: np.array + The frames padded to the full range of self._framesync + or None if beat_frames is None + """ - if beatsync_feats is None: - return None - - beatsync_times = np.copy(beat_times) - if beatsync_times.shape[0] != beatsync_feats.shape[0]: - beatsync_times = np.concatenate( - (beatsync_times, [self._framesync_times[-1]]) + if beat_frames is None: + return None, None + if beat_frames.shape[0] == 0: + return beat_times, beat_frames + assert (beat_frames.shape[0] == beat_times.shape[0]) + padded_beat_frames = np.copy(beat_frames) + padded_beat_times = np.copy(beat_times) + if padded_beat_frames[-1] < self._framesync_features.T.shape[0]: + padded_beat_frames = np.append(padded_beat_frames, self._framesync_features.T.shape[0]) + padded_beat_times = np.concatenate( + (padded_beat_times, [self._framesync_times[-1]]) ) - return beatsync_times + assert (padded_beat_frames.shape[0] == padded_beat_times.shape[0]) + return padded_beat_times, padded_beat_frames def read_features(self, tol=1e-3): """Reads the features from a file and stores them in the current @@ -307,12 +339,15 @@ def read_features(self, tol=1e-3): ) self._framesync_features = np.array(feats[self.get_id()]["framesync"]) self._est_beatsync_features = np.array(feats[self.get_id()]["est_beatsync"]) + + self._est_multibeat_times = np.array(feats[self.get_id()]["est_multibeat_times"]) self._est_multibeat_features = np.array(feats[self.get_id()]["est_multibeat"]) # Read annotated beats if available if "ann_beats" in feats.keys(): self._ann_beats_times = np.array(feats["ann_beats"]) self._ann_beatsync_times = np.array(feats["ann_beatsync_times"]) + self._ann_multibeat_times = np.array(feats["ann_multibeat_times"]) self._ann_beats_frames = librosa.core.time_to_frames( self._ann_beats_times, sr=self.sr, hop_length=self.hop_length ) @@ -373,9 +408,11 @@ def write_features(self): # Beats out_json["est_beats"] = self._est_beats_times.tolist() out_json["est_beatsync_times"] = self._est_beatsync_times.tolist() + out_json["est_multibeat_times"] = self._est_multibeat_times.tolist() if self._ann_beats_times is not None: out_json["ann_beats"] = self._ann_beats_times.tolist() out_json["ann_beatsync_times"] = self._ann_beatsync_times.tolist() + out_json["ann_multibeat_times"] = self._ann_multibeat_times.tolist() except FeatureParamsError: # We have other features in the file, simply add these ones with open(self.file_struct.features_file) as f: @@ -441,18 +478,27 @@ def _compute_all_features(self): # Compute framesync times self._compute_framesync_times() - # Compute/Read beats + # Compute/Read beats times and frames self._est_beats_times, self._est_beats_frames = self.estimate_beats() self._ann_beats_times, self._ann_beats_frames = self.read_ann_beats() - # Beat-Synchronize - pad = True # Always append to the end of the features + # Pad beats + self._est_beatsync_times, self._est_beats_frames = self._pad_beats(self._est_beats_times, self._est_beats_frames) + self._ann_beatsync_times, self._ann_beats_frames = self._pad_beats(self._ann_beats_times, self._ann_beats_frames) + + # Compute multibeats timees and frames + self._est_multibeat_times, self._est_multibeat_frames = self._compute_multibeat(self._est_beats_frames) + self._ann_multibeat_times, self._ann_multibeat_frames = self._compute_multibeat(self._ann_beats_frames) + + # Compute frames features on beat + pad = False # We already padded the beat frames append to the end of the features self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) - self._est_multibeat_features = self.compute_beat_sync_features(self._est_beats_frames, pad, frames_per_beat=self.frames_per_beat) - self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_beats_frames, pad, frames_per_beat=self.frames_per_beat) - self._est_beatsync_times = self.pad_beat_times(self._est_beatsync_features, self._est_beats_times) - self._ann_beatsync_times = self.pad_beat_times(self._ann_beatsync_features, self._ann_beats_times) + + # Compute frames features on multibeat + self._est_multibeat_features = self.compute_beat_sync_features(self._est_multibeat_frames, pad) + self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_multibeat_frames, pad) + @property @@ -469,7 +515,12 @@ def frame_times(self): frame_times = self._est_beatsync_times elif self.feat_type is FeatureTypes.ann_beatsync: frame_times = self._ann_beatsync_times - + elif self.feat_type is FeatureTypes.est_multibeat: + frame_times = self._est_multibeat_times + elif self.feat_type is FeatureTypes.ann_multibeat: + frame_times = self._ann_multibeat_times + else: + raise FeatureTypeNotFound("Type of features not valid.") return frame_times @property From 3f663456ad978a68b321dc776fe92da92236f775 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 8 Nov 2024 18:28:13 +0100 Subject: [PATCH 08/18] Add frames per beat parameter to all features --- msaf/features.py | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/msaf/features.py b/msaf/features.py index 69fec786..cdc5dc5e 100644 --- a/msaf/features.py +++ b/msaf/features.py @@ -41,6 +41,7 @@ def __init__( norm=config.cqt.norm, filter_scale=config.cqt.filter_scale, ref_power=config.cqt.ref_power, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -64,10 +65,12 @@ def __init__( ref_power: str The reference power for logarithmic scaling. See `configdefaults.py` for the possible values. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the CQT parameters self.n_bins = n_bins @@ -134,6 +137,7 @@ def __init__( n_mels=config.mel.n_mels, f_min=config.mel.f_min, f_max=config.mel.f_max, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -156,10 +160,12 @@ def __init__( Minimum frequency. f_min: int > 0 Maximal frequency. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) self.n_fft = n_fft # Init the Mel parameters @@ -213,6 +219,7 @@ def __init__( n_mels=config.mel.n_mels, f_min=config.mel.f_min, f_max=config.mel.f_max, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -235,10 +242,12 @@ def __init__( Minimum frequency. f_min: int > 0 Maximal frequency. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) self.n_fft = n_fft # Init the Mel parameters @@ -290,6 +299,7 @@ def __init__( n_mels=config.mfcc.n_mels, n_mfcc=config.mfcc.n_mfcc, ref_power=config.mfcc.ref_power, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -312,10 +322,12 @@ def __init__( Number of mel coefficients. ref_power: function The reference power for logarithmic scaling. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the MFCC parameters self.n_fft = n_fft @@ -372,6 +384,7 @@ def __init__( norm=config.pcp.norm, f_min=config.pcp.f_min, n_octaves=config.pcp.n_octaves, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -394,10 +407,12 @@ def __init__( Minimum frequency. n_octaves: int > 0 Number of octaves. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the PCP parameters self.n_bins = n_bins @@ -460,6 +475,7 @@ def __init__( norm=config.tonnetz.norm, f_min=config.tonnetz.f_min, n_octaves=config.tonnetz.n_octaves, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -482,10 +498,12 @@ def __init__( Minimum frequency. n_octaves: int > 0 Number of octaves. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the local parameters self.n_bins = n_bins @@ -534,6 +552,7 @@ def __init__( sr=config.sample_rate, hop_length=config.hop_size, win_length=config.tempogram.win_length, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -550,10 +569,12 @@ def __init__( Hop size in frames for the analysis. win_length: int > 0 The size of the window for the tempogram. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the local parameters self.win_length = win_length From 3853aa5dda9955381dbe83d0f040a88b6d29cc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 8 Nov 2024 18:28:51 +0100 Subject: [PATCH 09/18] Add frames per beat to config --- msaf/configdefaults.py | 1 + msaf/input_output.py | 3 ++- msaf/run.py | 9 ++++++--- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/msaf/configdefaults.py b/msaf/configdefaults.py index dd67ebf5..61727a6b 100644 --- a/msaf/configdefaults.py +++ b/msaf/configdefaults.py @@ -33,6 +33,7 @@ AddConfigVar("sample_rate", "Default Sample Rate to be used.", IntParam(22050)) AddConfigVar("n_fft", "FFT size", IntParam(4096)) AddConfigVar("hop_size", "Hop length in samples", IntParam(1024)) +AddConfigVar("frames_per_beat","The number of framees computed per beat on multibeat", IntParam(3)) # Files and dirs diff --git a/msaf/input_output.py b/msaf/input_output.py index 2547fbbc..7d0609ea 100644 --- a/msaf/input_output.py +++ b/msaf/input_output.py @@ -356,13 +356,14 @@ def get_all_label_algorithms(): return algo_ids -def get_configuration(feature, annot_beats, framesync, boundaries_id, labels_id): +def get_configuration(feature, annot_beats, framesync, multibeat, boundaries_id, labels_id): """Gets the configuration dictionary from the current parameters of the algorithms to be evaluated.""" config = {} config["annot_beats"] = annot_beats config["feature"] = feature config["framesync"] = framesync + config["multibeat"] = multibeat bound_config = {} if boundaries_id != "gt": bound_config = eval(msaf.algorithms.__name__ + "." + boundaries_id).config diff --git a/msaf/run.py b/msaf/run.py index ed27424e..93c164e4 100644 --- a/msaf/run.py +++ b/msaf/run.py @@ -254,7 +254,7 @@ def process_track(file_struct, boundaries_id, labels_id, config, annotator_id=0) # Get features config["features"] = Features.select_features( - config["feature"], file_struct, config["annot_beats"], config["framesync"] + config["feature"], file_struct, config["annot_beats"], config["framesync"], multibeat=config["multibeat"] ) # Get estimations @@ -286,6 +286,7 @@ def process( config=None, out_bounds="out_bounds.wav", out_sr=22050, + multibeat=False ): """Main process to segment a file or a collection of files. @@ -324,6 +325,8 @@ def process( mode, when sonify_bounds is True. out_sr : int Sampling rate for the sonified bounds. + multibeat : bool + Whether to use multibeat. Returns ------- @@ -338,7 +341,7 @@ def process( # Set up configuration based on algorithms parameters if config is None: config = io.get_configuration( - feature, annot_beats, framesync, boundaries_id, labels_id + feature, annot_beats, framesync, multibeat, boundaries_id, labels_id ) config["features"] = None @@ -356,7 +359,7 @@ def process( # Get features config["features"] = Features.select_features( - feature, file_struct, annot_beats, framesync + feature, file_struct, annot_beats, framesync, multibeat=multibeat ) # And run the algorithms From 578c41be01c6d0f36981f58c06433e0f73757270 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 8 Nov 2024 18:32:15 +0100 Subject: [PATCH 10/18] Add multibeat parameter to msaf example --- examples/run_msaf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/run_msaf.py b/examples/run_msaf.py index 383861f4..773a9192 100755 --- a/examples/run_msaf.py +++ b/examples/run_msaf.py @@ -63,6 +63,13 @@ help="Use frame-synchronous features", default=False, ) + parser.add_argument( + "-mb", + action="store_true", + dest="multibeat", + help="Compute mulitple frames per beat", + default=False, + ) parser.add_argument( "-j", action="store", @@ -113,6 +120,7 @@ "annot_beats": args.annot_beats, "feature": args.feature, "framesync": args.framesync, + "multibeat": args.multibeat, "boundaries_id": args.boundaries_id, "labels_id": args.labels_id, "n_jobs": args.n_jobs, From b41452e0b5667b64d3e7d12175b5acec15292492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Fri, 8 Nov 2024 18:32:40 +0100 Subject: [PATCH 11/18] Add multibeat param to SegmenterInterface --- msaf/algorithms/interface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/msaf/algorithms/interface.py b/msaf/algorithms/interface.py index 8758e581..ef4392b1 100644 --- a/msaf/algorithms/interface.py +++ b/msaf/algorithms/interface.py @@ -51,6 +51,7 @@ def __init__( feature="pcp", annot_beats=False, framesync=False, + multibeat=False, features=None, **config ): @@ -80,6 +81,7 @@ def __init__( self.feature_str = feature self.annot_beats = annot_beats self.framesync = framesync + self.multibeat = multibeat self.config = config self.features = features From a988d282cea26748b81eae3f4c908a3e7517a1fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Sat, 9 Nov 2024 18:05:39 +0100 Subject: [PATCH 12/18] Add multibeat option in eval process --- msaf/eval.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/msaf/eval.py b/msaf/eval.py index 7f497c33..c3867c21 100644 --- a/msaf/eval.py +++ b/msaf/eval.py @@ -337,6 +337,7 @@ def process( labels_id=msaf.config.default_label_id, annot_beats=False, framesync=False, + multibeat=False, feature="pcp", hier=False, save=False, @@ -389,7 +390,7 @@ def process( # Set up configuration based on algorithms parameters if config is None: config = io.get_configuration( - feature, annot_beats, framesync, boundaries_id, labels_id + feature, annot_beats, framesync, multibeat, boundaries_id, labels_id ) # Hierarchical segmentation From d84282de00d6ab312cfe8b95951b5b86ce166353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Sat, 9 Nov 2024 18:06:41 +0100 Subject: [PATCH 13/18] Rework padding beats to original state The way I did it before didn't work and had no advantages --- msaf/base.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index dc346fae..0096585c 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -251,7 +251,7 @@ def _compute_multibeat(self, beat_frames): return multibeat_times, multibeat_frames def _pad_beats(self, beat_times, beat_frames): - """Pad the beat times if necessary + """Pad the beat frames Parameters ---------- beat_times: np.array @@ -286,7 +286,29 @@ def _pad_beats(self, beat_times, beat_frames): ) assert (padded_beat_frames.shape[0] == padded_beat_times.shape[0]) return padded_beat_times, padded_beat_frames - + + def _pad_beats_times(self, beat_times, beatsync_feats): + """Pad the beat_times with the last frametimes if necessary + Parameters + ----------- + beat_times: np.array + the beats times to pad + beatsync_features: np.array + The features corresponding to the beats + Returns + --------- + beatsync_times: np.array + the padded beats times + """ + if beatsync_feats is None : + return None + beatsync_times = np.copy(beat_times) + if beatsync_times.shape[0] != beatsync_feats.shape[0]: + beatsync_times = np.concatenate( + (beatsync_times, [self._framesync_times[-1]]) + ) + return beatsync_times + def read_features(self, tol=1e-3): """Reads the features from a file and stores them in the current object. @@ -482,16 +504,12 @@ def _compute_all_features(self): self._est_beats_times, self._est_beats_frames = self.estimate_beats() self._ann_beats_times, self._ann_beats_frames = self.read_ann_beats() - # Pad beats - self._est_beatsync_times, self._est_beats_frames = self._pad_beats(self._est_beats_times, self._est_beats_frames) - self._ann_beatsync_times, self._ann_beats_frames = self._pad_beats(self._ann_beats_times, self._ann_beats_frames) - # Compute multibeats timees and frames self._est_multibeat_times, self._est_multibeat_frames = self._compute_multibeat(self._est_beats_frames) self._ann_multibeat_times, self._ann_multibeat_frames = self._compute_multibeat(self._ann_beats_frames) # Compute frames features on beat - pad = False # We already padded the beat frames append to the end of the features + pad = True # pad the beat frames self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) @@ -499,7 +517,9 @@ def _compute_all_features(self): self._est_multibeat_features = self.compute_beat_sync_features(self._est_multibeat_frames, pad) self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_multibeat_frames, pad) - + # Pad beatsync times + self._est_beatsync_times = self._pad_beats_times(self._est_beats_times, self._est_beatsync_features) + self._ann_beatsync_times = self._pad_beats_times(self._ann_beats_times, self._ann_beatsync_features) @property def frame_times(self): From 05251860e93c6dcbf3893aced65001eea4d1ad0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Sat, 9 Nov 2024 18:08:32 +0100 Subject: [PATCH 14/18] Add multibeat parameter to test_run --- tests/test_run.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/test_run.py b/tests/test_run.py index c405ef8b..a4c9e8dd 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -76,6 +76,7 @@ def test_run_algorithms(): feature = "pcp" annot_beats = False framesync = False + multibeat = False file_struct = msaf.io.FileStruct(audio_file) file_struct.features_file = msaf.config.features_tmp_file @@ -84,7 +85,7 @@ def test_run_algorithms(): for label_id in label_ids: print(f"bound_id: {bound_id},\tlabel_id: {label_id}") config = msaf.io.get_configuration( - feature, annot_beats, framesync, bound_id, label_id + feature, annot_beats, framesync, multibeat, bound_id, label_id ) config["hier"] = False config["features"] = Features.select_features( @@ -105,7 +106,7 @@ def test_run_algorithms(): def _test_run_msaf(bound_id, label_id, hier=False): print(f"bound_id: {bound_id},\tlabel_id: {label_id}") config = msaf.io.get_configuration( - feature, annot_beats, framesync, bound_id, label_id + feature, annot_beats, framesync, multibeat, bound_id, label_id ) config["hier"] = hier config["features"] = Features.select_features( @@ -175,6 +176,7 @@ def test_process_track(): config["annot_beats"] = False config["framesync"] = False config["hier"] = False + config["multibeat"] = False est_times, est_labels = msaf.run.process_track( file_struct, bounds_id, labels_id, config ) From 0afa7a2ca3ae893d8e4892e17bfae2c28ab0c943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Sun, 10 Nov 2024 18:02:14 +0100 Subject: [PATCH 15/18] Shape multi beat to beatwise matrix --- msaf/base.py | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/msaf/base.py b/msaf/base.py index 0096585c..e3a1fb2a 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -235,7 +235,7 @@ def _compute_multibeat(self, beat_frames): the frames index of multibeats """ if beat_frames is None: - return None, None + return None multibeat_frames = np.empty(0,dtype=int) for idx in range(len(beat_frames)-1): this_beat_frame = beat_frames[idx] @@ -245,10 +245,34 @@ def _compute_multibeat(self, beat_frames): frames_in_beat = [int(k * subdivision/self.frames_per_beat + this_beat_frame) for k in range(self.frames_per_beat)] multibeat_frames = np.concatenate((multibeat_frames, frames_in_beat), dtype=int ) - # Compute times of frames - multibeat_times = librosa.frames_to_time(multibeat_frames, sr=self.sr, hop_length=self.hop_length) + return multibeat_frames - return multibeat_times, multibeat_frames + def _shape_beatwise(self, multibeat_features): + """Transform the multibeat_features matrix into a beatwise features matrix + Parameters + ----------- + multibeat_features: np.array + The features to transform + Returns + ---------- + beatwise_feature: np.array + The features shaped as a beatwise matrix + or None if multibeat_features is None + """ + if multibeat_features is None: + return None + if multibeat_features.shape[0] == 0: + return multibeat_features + assert(multibeat_features.shape[0]%self.frames_per_beat == 0,"The size of array must be a multiple of self.frames_per_beat") + nummber_of_beats = int(multibeat_features.shape[0]/self.frames_per_beat) + tensor = [] + for k in range(nummber_of_beats): + beat=[] + for f in range(self.frames_per_beat): + beat.append(multibeat_features[k*self.frames_per_beat + f]) + tensor.append(beat) + tensor = np.array(tensor) + return np.reshape(tensor,(tensor.shape[0], -1), order='C') def _pad_beats(self, beat_times, beat_frames): """Pad the beat frames @@ -505,8 +529,12 @@ def _compute_all_features(self): self._ann_beats_times, self._ann_beats_frames = self.read_ann_beats() # Compute multibeats timees and frames - self._est_multibeat_times, self._est_multibeat_frames = self._compute_multibeat(self._est_beats_frames) - self._ann_multibeat_times, self._ann_multibeat_frames = self._compute_multibeat(self._ann_beats_frames) + self._est_multibeat_frames = self._compute_multibeat(self._est_beats_frames) + self._ann_multibeat_frames = self._compute_multibeat(self._ann_beats_frames) + + # Multibeat times is beats time (before padding) + self._est_multibeat_times = np.copy(self._est_beats_times) + self._ann_multibeat_times = np.copy(self._ann_beats_times) # Compute frames features on beat pad = True # pad the beat frames @@ -517,6 +545,10 @@ def _compute_all_features(self): self._est_multibeat_features = self.compute_beat_sync_features(self._est_multibeat_frames, pad) self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_multibeat_frames, pad) + # Transform multibeat into beatwise matrix + self._est_multibeat_features = self._shape_beatwise(self._est_multibeat_features) + self._ann_mutlibeat_features = self._shape_beatwise(self._ann_mutlibeat_features) + # Pad beatsync times self._est_beatsync_times = self._pad_beats_times(self._est_beats_times, self._est_beatsync_features) self._ann_beatsync_times = self._pad_beats_times(self._ann_beats_times, self._ann_beatsync_features) From 51fe348114e3ee06ae872ac70d6221b202c5db44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Mon, 11 Nov 2024 16:25:37 +0100 Subject: [PATCH 16/18] Add documentation for multibeat --- docs/config.rst | 7 +++++++ docs/features.rst | 18 +++++++++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/config.rst b/docs/config.rst index a7f41e7b..9103ef84 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -124,6 +124,13 @@ import MSAF and print the config variable, as in: The size of the hop size, which should be smaller than the ``n_fft`` value, such that overlap is allowed. +.. attribute:: frames_per_beat + + Positive int value, default: 3 + + The number of frames kept per beat when using the multibeat features. + Must be lower than the number of computed between two beats. + .. attribute:: features_tmp_file String value, default ``'.features_msaf_tmp.json'`` diff --git a/docs/features.rst b/docs/features.rst index 543328cc..3d545342 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -24,7 +24,8 @@ The format of the ``json`` file is as follows: "audio_file": "", "dur": "", "sample_rate": "", - "hop_length": "" + "hop_length": "", + "frames_per_beat": "" }, "metadata": { "timestamp": "", @@ -47,6 +48,14 @@ The format of the ``json`` file is as follows: [ 0.0, 0.0, "..." ], "..." ], + "est_multibeat": [ + [0.0, 0.0, "..."], + "..." + ], + "ann_multibeat": [ + [0.0, 0.0, "..."], + "..." + ], "params": { "": "", "": "", @@ -55,6 +64,8 @@ The format of the ``json`` file is as follows: } "est_beatsync_times": [ 0.0, 1.0, "..." ], "ann_beatsync_times": [ 0.0, 1.0, "..." ], + "est_multibeat_times": [ 0.0, 1.0, "..." ], + "ann_multibeat_times": [ 0.0, 1.0, "..." ], "est_beats": [ 0.0, 1.0, "..." ], "ann_beats": [ 0.0, 1.0, "..." ] } @@ -67,11 +78,16 @@ A brief description for the main keys of this ``json`` file follows: * ``ann_beats``: contains the set of reference beats, in seconds (only exists if reference beats are available). * ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature (might differ with `est_beats` in the beginning or end). * ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature (might differ with `ann_beats` in the beginning or end). +* ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature when using multibeat(might differ with `est_beatsync_times` in the end). +* ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature when using multibeat(might differ with `ann_beatsync_times` in the r end). + * ```` (e.g., ``pcp``, ``mfcc``): contains the actual features of the given audio file. Inside this key the following sub-keys can be found: * ``framesync``: Actual frame-wise features. * ``est_beatsync``: Features synchronized to the estimated beats. * ``ann_beatsync``: Features synchronized to the reference beats (only exists if reference beats are available). + * ``est_multibeat``: Features synchronized to the estimated beats (`frames_per_beat` frames are computed for each beat). + * ``ann_multibeat``: Features synchronized to the reference beats (`frames_per_beat` frames are computed for each beat) (only exists if reference beats are available).. * ``params``: A set of parameters of the actual type of features. Pre-computed features for the `SPAM dataset `_ can be found `here `_. From 7050a53bbb0dc3bbe6c95a622b5027fa491f1714 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Tue, 12 Nov 2024 18:30:10 +0100 Subject: [PATCH 17/18] Add test for multibeat Also add FramesPerBeatTooHigh exceptions and remove padbeats (unused functions) --- msaf/base.py | 43 ++++-------------------------- msaf/exceptions.py | 3 +++ tests/test_multibeat.py | 59 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 38 deletions(-) create mode 100644 tests/test_multibeat.py diff --git a/msaf/base.py b/msaf/base.py index e3a1fb2a..a8c838b6 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -23,6 +23,7 @@ NoAudioFileError, NoFeaturesFileError, WrongFeaturesFormatError, + FramePerBeatTooHigh, ) # Five types of features at the moment: @@ -236,12 +237,15 @@ def _compute_multibeat(self, beat_frames): """ if beat_frames is None: return None + if beat_frames == []: + return [] multibeat_frames = np.empty(0,dtype=int) for idx in range(len(beat_frames)-1): this_beat_frame = beat_frames[idx] next_beat_frame = beat_frames[idx+1] subdivision = (next_beat_frame - this_beat_frame) - assert (self.frames_per_beat < subdivision) + if not (self.frames_per_beat < subdivision): + raise FramePerBeatTooHigh frames_in_beat = [int(k * subdivision/self.frames_per_beat + this_beat_frame) for k in range(self.frames_per_beat)] multibeat_frames = np.concatenate((multibeat_frames, frames_in_beat), dtype=int ) @@ -274,43 +278,6 @@ def _shape_beatwise(self, multibeat_features): tensor = np.array(tensor) return np.reshape(tensor,(tensor.shape[0], -1), order='C') - def _pad_beats(self, beat_times, beat_frames): - """Pad the beat frames - Parameters - ---------- - beat_times: np.array - The time points of the beat positions (in seconds). - - beat_frames: np.array - The beat-synchronized frames. - - - Returns - ------- - padded_beat_times: np.array - The updated time points of the beat positions (in seconds) - or None if beat_frames is None - - padded_beat_frames: np.array - The frames padded to the full range of self._framesync - or None if beat_frames is None - - """ - if beat_frames is None: - return None, None - if beat_frames.shape[0] == 0: - return beat_times, beat_frames - assert (beat_frames.shape[0] == beat_times.shape[0]) - padded_beat_frames = np.copy(beat_frames) - padded_beat_times = np.copy(beat_times) - if padded_beat_frames[-1] < self._framesync_features.T.shape[0]: - padded_beat_frames = np.append(padded_beat_frames, self._framesync_features.T.shape[0]) - padded_beat_times = np.concatenate( - (padded_beat_times, [self._framesync_times[-1]]) - ) - assert (padded_beat_frames.shape[0] == padded_beat_times.shape[0]) - return padded_beat_times, padded_beat_frames - def _pad_beats_times(self, beat_times, beatsync_feats): """Pad the beat_times with the last frametimes if necessary Parameters diff --git a/msaf/exceptions.py b/msaf/exceptions.py index 7aa87dc9..2fb0cfa5 100644 --- a/msaf/exceptions.py +++ b/msaf/exceptions.py @@ -43,3 +43,6 @@ class NoEstimationsError(MSAFError): class WrongAlgorithmID(MSAFError): """This algorithm was not found in msaf.""" + +class FramePerBeatTooHigh(MSAFError): + """Frames per beat is higher than the number of frames between two beats""" diff --git a/tests/test_multibeat.py b/tests/test_multibeat.py new file mode 100644 index 00000000..fd29cfc5 --- /dev/null +++ b/tests/test_multibeat.py @@ -0,0 +1,59 @@ +import json +import os +import pytest +from pytest import fixture +from enum import Enum + +import librosa +import numpy as np +from pytest import raises + +import msaf +from msaf.base import FeatureTypes +from msaf.exceptions import FramePerBeatTooHigh +from msaf.features import Features, CQT +from msaf.input_output import FileStruct + +# Move to __file__ path +os.chdir(os.path.dirname(__file__)) + +# Global vars +audio_file = os.path.join("fixtures", "chirp.mp3") +file_struct = FileStruct(audio_file) +file_struct.ref_file = os.path.join("fixtures", "chirp.jams") +msaf.utils.ensure_dir("features") +features_file = os.path.join("features", "chirp.json") +file_struct.features_file = features_file +try: + os.remove(features_file) +except OSError: + pass + +multibeat_feature = np.array([[k*2, k*2+1] for k in range(10)]) +print(multibeat_feature) + +@fixture +def feat_class(): + return CQT(file_struct, FeatureTypes.est_multibeat) + +def test_compute_multibeat(feat_class): + assert (feat_class._compute_multibeat(None) is None) + assert (feat_class._compute_multibeat([]) == []) + frame_beats = [k*100 for k in range (10)] + assert (isinstance(feat_class._compute_multibeat(frame_beats), np.ndarray)) + assert (feat_class._compute_multibeat(frame_beats).shape[0] == feat_class.frames_per_beat * (len(frame_beats) - 1)) + feat_class.frames_per_beat = 100 + with raises(FramePerBeatTooHigh): + feat_class._compute_multibeat(frame_beats) + +def test_shape_beatwise(feat_class): + assert (feat_class._shape_beatwise(None) is None) + assert (feat_class._shape_beatwise(np.array([])).shape[0] == 0) + multibeat_feature = np.array([[k*2, k*2+1] for k in range(50)]) + multibeat_shaped = feat_class._shape_beatwise(multibeat_feature) + assert(isinstance(multibeat_shaped, np.ndarray)) + assert(multibeat_shaped.shape[0] == len(multibeat_feature)//feat_class.frames_per_beat) + assert(multibeat_shaped.shape[1] == 2 * feat_class.frames_per_beat) + assert(np.equal(multibeat_shaped[0],np.array([0,1,2,3,4,5])).all()) + + From 37e37805aefebc327d600da8dcc1ffa7d07ad669 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Kastenbaum?= Date: Thu, 19 Dec 2024 15:52:45 +0100 Subject: [PATCH 18/18] Fix typos --- docs/features.rst | 6 +++--- msaf/base.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/features.rst b/docs/features.rst index 3d545342..cf4f3e90 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -78,8 +78,8 @@ A brief description for the main keys of this ``json`` file follows: * ``ann_beats``: contains the set of reference beats, in seconds (only exists if reference beats are available). * ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature (might differ with `est_beats` in the beginning or end). * ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature (might differ with `ann_beats` in the beginning or end). -* ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature when using multibeat(might differ with `est_beatsync_times` in the end). -* ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature when using multibeat(might differ with `ann_beatsync_times` in the r end). +* ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature when using multibeat (might differ with `est_beatsync_times` in the end). +* ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature when using multibeat (might differ with `ann_beatsync_times` in the end). * ```` (e.g., ``pcp``, ``mfcc``): contains the actual features of the given audio file. Inside this key the following sub-keys can be found: @@ -87,7 +87,7 @@ A brief description for the main keys of this ``json`` file follows: * ``est_beatsync``: Features synchronized to the estimated beats. * ``ann_beatsync``: Features synchronized to the reference beats (only exists if reference beats are available). * ``est_multibeat``: Features synchronized to the estimated beats (`frames_per_beat` frames are computed for each beat). - * ``ann_multibeat``: Features synchronized to the reference beats (`frames_per_beat` frames are computed for each beat) (only exists if reference beats are available).. + * ``ann_multibeat``: Features synchronized to the reference beats (`frames_per_beat` frames are computed for each beat) (only exists if reference beats are available). * ``params``: A set of parameters of the actual type of features. Pre-computed features for the `SPAM dataset `_ can be found `here `_. diff --git a/msaf/base.py b/msaf/base.py index a8c838b6..1d87d7c7 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -620,7 +620,8 @@ def select_features(cls, features_id, file_struct, annot_beats, framesync, multi Whether to use annotated (`True`) or estimated (`False`) beats framesync: boolean Whether to use framesync (`True`) or beatsync (`False`) features - + multibeat: boolean + Whether to use multibeat ('True`) or not. Default is `False` Returns ------- features: obj