diff --git a/docs/config.rst b/docs/config.rst index a7f41e7b..9103ef84 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -124,6 +124,13 @@ import MSAF and print the config variable, as in: The size of the hop size, which should be smaller than the ``n_fft`` value, such that overlap is allowed. +.. attribute:: frames_per_beat + + Positive int value, default: 3 + + The number of frames kept per beat when using the multibeat features. + Must be lower than the number of computed between two beats. + .. attribute:: features_tmp_file String value, default ``'.features_msaf_tmp.json'`` diff --git a/docs/features.rst b/docs/features.rst index 543328cc..cf4f3e90 100644 --- a/docs/features.rst +++ b/docs/features.rst @@ -24,7 +24,8 @@ The format of the ``json`` file is as follows: "audio_file": "", "dur": "", "sample_rate": "", - "hop_length": "" + "hop_length": "", + "frames_per_beat": "" }, "metadata": { "timestamp": "", @@ -47,6 +48,14 @@ The format of the ``json`` file is as follows: [ 0.0, 0.0, "..." ], "..." ], + "est_multibeat": [ + [0.0, 0.0, "..."], + "..." + ], + "ann_multibeat": [ + [0.0, 0.0, "..."], + "..." + ], "params": { "": "", "": "", @@ -55,6 +64,8 @@ The format of the ``json`` file is as follows: } "est_beatsync_times": [ 0.0, 1.0, "..." ], "ann_beatsync_times": [ 0.0, 1.0, "..." ], + "est_multibeat_times": [ 0.0, 1.0, "..." ], + "ann_multibeat_times": [ 0.0, 1.0, "..." ], "est_beats": [ 0.0, 1.0, "..." ], "ann_beats": [ 0.0, 1.0, "..." ] } @@ -67,11 +78,16 @@ A brief description for the main keys of this ``json`` file follows: * ``ann_beats``: contains the set of reference beats, in seconds (only exists if reference beats are available). * ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature (might differ with `est_beats` in the beginning or end). * ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature (might differ with `ann_beats` in the beginning or end). +* ``est_beatsync_times``: contains the set times associated with each (estimated-)beat-synchronous feature when using multibeat (might differ with `est_beatsync_times` in the end). +* ``ann_beatsync_times``: contains the set times associated with each (annotated-)beat-synchronous feature when using multibeat (might differ with `ann_beatsync_times` in the end). + * ```` (e.g., ``pcp``, ``mfcc``): contains the actual features of the given audio file. Inside this key the following sub-keys can be found: * ``framesync``: Actual frame-wise features. * ``est_beatsync``: Features synchronized to the estimated beats. * ``ann_beatsync``: Features synchronized to the reference beats (only exists if reference beats are available). + * ``est_multibeat``: Features synchronized to the estimated beats (`frames_per_beat` frames are computed for each beat). + * ``ann_multibeat``: Features synchronized to the reference beats (`frames_per_beat` frames are computed for each beat) (only exists if reference beats are available). * ``params``: A set of parameters of the actual type of features. Pre-computed features for the `SPAM dataset `_ can be found `here `_. diff --git a/examples/run_msaf.py b/examples/run_msaf.py index 383861f4..773a9192 100755 --- a/examples/run_msaf.py +++ b/examples/run_msaf.py @@ -63,6 +63,13 @@ help="Use frame-synchronous features", default=False, ) + parser.add_argument( + "-mb", + action="store_true", + dest="multibeat", + help="Compute mulitple frames per beat", + default=False, + ) parser.add_argument( "-j", action="store", @@ -113,6 +120,7 @@ "annot_beats": args.annot_beats, "feature": args.feature, "framesync": args.framesync, + "multibeat": args.multibeat, "boundaries_id": args.boundaries_id, "labels_id": args.labels_id, "n_jobs": args.n_jobs, diff --git a/msaf/algorithms/interface.py b/msaf/algorithms/interface.py index 8758e581..ef4392b1 100644 --- a/msaf/algorithms/interface.py +++ b/msaf/algorithms/interface.py @@ -51,6 +51,7 @@ def __init__( feature="pcp", annot_beats=False, framesync=False, + multibeat=False, features=None, **config ): @@ -80,6 +81,7 @@ def __init__( self.feature_str = feature self.annot_beats = annot_beats self.framesync = framesync + self.multibeat = multibeat self.config = config self.features = features diff --git a/msaf/base.py b/msaf/base.py index 1ad32d3b..1d87d7c7 100644 --- a/msaf/base.py +++ b/msaf/base.py @@ -23,13 +23,17 @@ NoAudioFileError, NoFeaturesFileError, WrongFeaturesFormatError, + FramePerBeatTooHigh, ) -# Three types of features at the moment: +# Five types of features at the moment: # - framesync: Frame-wise synchronous. # - est_beatsync: Beat-synchronous using estimated beats with librosa # - ann_beatsync: Beat-synchronous using annotated beats from ground-truth -FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync") +# - est_mutlibeat: Multiple frames per beat-synchronous using estimated beats +# - ann_multibeat: Multiple frames per beat-synchronous using annotated beats from ground-truth + +FeatureTypes = Enum("FeatureTypes", "framesync est_beatsync ann_beatsync est_multibeat ann_multibeat") # All available features features_registry = {} @@ -61,7 +65,7 @@ class Features(metaclass=MetaFeatures): features per frames. """ - def __init__(self, file_struct, sr, hop_length, feat_type): + def __init__(self, file_struct, sr, hop_length, feat_type, frames_per_beat=3): """Init function for the base class to make sure all features have at least these parameters as attributes. @@ -75,12 +79,15 @@ def __init__(self, file_struct, sr, hop_length, feat_type): Hop in frames of the features to be computed. feat_type: `FeatureTypes` Enum containing the type of feature. + frame_per_beat: int > 0 + Number of frames per beat used in multibeat features """ # Set the global parameters self.file_struct = file_struct self.sr = sr self.hop_length = hop_length self.feat_type = feat_type + self.frames_per_beat = frames_per_beat # The number of frames per beat computed for mfpb features # The following attributes will be populated, if needed, # once the `features` getter is called @@ -89,6 +96,8 @@ def __init__(self, file_struct, sr, hop_length, feat_type): self._framesync_features = None # Frame-sync features self._est_beatsync_features = None # Estimated Beat-sync features self._ann_beatsync_features = None # Annotated Beat-sync features + self._est_multibeat_features = None # Estimated Beat-sync features with multiple frames per beat + self._ann_mutlibeat_features = None # Annotated Beat-sync features with multiple frames per beat self._audio = None # Actual audio signal self._audio_harmonic = None # Harmonic audio signal self._audio_percussive = None # Percussive audio signal @@ -99,6 +108,10 @@ def __init__(self, file_struct, sr, hop_length, feat_type): self._ann_beatsync_times = None # Annotated beat-sync times self._ann_beats_times = None # Annotated beat times self._ann_beats_frames = None # Annotated beats in frames + self._est_multibeat_frames = None # Estimated multibeat frames + self._est_multibeat_times = None # Estimated multibeat times + self._ann_multibeat_frames = None # Annotated multibeat frames + self._ann_multibeat_times = None # Annotated multibeat times # Differentiate global params from subclass attributes. # This is a bit hacky... I accept Pull Requests ^_^ @@ -108,6 +121,7 @@ def __init__(self, file_struct, sr, hop_length, feat_type): "feat_type", "hop_length", "dur", + "frames_per_beat", ] def compute_HPSS(self): @@ -184,15 +198,13 @@ def read_ann_beats(self): ) return times, frames - def compute_beat_sync_features(self, beat_frames, beat_times, pad): + def compute_beat_sync_features(self, beat_frames, pad): """Make the features beat-synchronous. Parameters ---------- beat_frames: np.array The frame indices of the beat positions. - beat_times: np.array - The time points of the beat positions (in seconds). pad: boolean If `True`, `beat_frames` is padded to span the full range. @@ -201,26 +213,93 @@ def compute_beat_sync_features(self, beat_frames, beat_times, pad): beatsync_feats: np.array The beat-synchronized features. `None` if the beat_frames was `None`. - beatsync_times: np.array - The beat-synchronized times. - `None` if the beat_frames was `None`. """ if beat_frames is None: - return None, None - + return None + # Make beat synchronous beatsync_feats = librosa.util.utils.sync( self._framesync_features.T, beat_frames, pad=pad ).T - # Assign times (and add last time if padded) + return beatsync_feats + + def _compute_multibeat(self, beat_frames): + """Compute frames index evenly distributed between beats + Parameters + ---------- + beat_frames: np.array + the frames index of beats + Returns + ------- + multibeat_frames: np.array + the frames index of multibeats + """ + if beat_frames is None: + return None + if beat_frames == []: + return [] + multibeat_frames = np.empty(0,dtype=int) + for idx in range(len(beat_frames)-1): + this_beat_frame = beat_frames[idx] + next_beat_frame = beat_frames[idx+1] + subdivision = (next_beat_frame - this_beat_frame) + if not (self.frames_per_beat < subdivision): + raise FramePerBeatTooHigh + frames_in_beat = [int(k * subdivision/self.frames_per_beat + this_beat_frame) for k in range(self.frames_per_beat)] + multibeat_frames = np.concatenate((multibeat_frames, frames_in_beat), dtype=int ) + + return multibeat_frames + + def _shape_beatwise(self, multibeat_features): + """Transform the multibeat_features matrix into a beatwise features matrix + Parameters + ----------- + multibeat_features: np.array + The features to transform + Returns + ---------- + beatwise_feature: np.array + The features shaped as a beatwise matrix + or None if multibeat_features is None + """ + if multibeat_features is None: + return None + if multibeat_features.shape[0] == 0: + return multibeat_features + assert(multibeat_features.shape[0]%self.frames_per_beat == 0,"The size of array must be a multiple of self.frames_per_beat") + nummber_of_beats = int(multibeat_features.shape[0]/self.frames_per_beat) + tensor = [] + for k in range(nummber_of_beats): + beat=[] + for f in range(self.frames_per_beat): + beat.append(multibeat_features[k*self.frames_per_beat + f]) + tensor.append(beat) + tensor = np.array(tensor) + return np.reshape(tensor,(tensor.shape[0], -1), order='C') + + def _pad_beats_times(self, beat_times, beatsync_feats): + """Pad the beat_times with the last frametimes if necessary + Parameters + ----------- + beat_times: np.array + the beats times to pad + beatsync_features: np.array + The features corresponding to the beats + Returns + --------- + beatsync_times: np.array + the padded beats times + """ + if beatsync_feats is None : + return None beatsync_times = np.copy(beat_times) if beatsync_times.shape[0] != beatsync_feats.shape[0]: beatsync_times = np.concatenate( (beatsync_times, [self._framesync_times[-1]]) ) - return beatsync_feats, beatsync_times - + return beatsync_times + def read_features(self, tol=1e-3): """Reads the features from a file and stores them in the current object. @@ -246,6 +325,7 @@ def read_features(self, tol=1e-3): assert os.path.basename(self.file_struct.audio_file) == os.path.basename( feats["globals"]["audio_file"] ) + assert self.frames_per_beat == int(feats["globals"]["frames_per_beat"]) # Check for specific features params feat_params_err = FeatureParamsError( @@ -273,16 +353,23 @@ def read_features(self, tol=1e-3): self._framesync_features = np.array(feats[self.get_id()]["framesync"]) self._est_beatsync_features = np.array(feats[self.get_id()]["est_beatsync"]) + self._est_multibeat_times = np.array(feats[self.get_id()]["est_multibeat_times"]) + self._est_multibeat_features = np.array(feats[self.get_id()]["est_multibeat"]) + # Read annotated beats if available if "ann_beats" in feats.keys(): self._ann_beats_times = np.array(feats["ann_beats"]) self._ann_beatsync_times = np.array(feats["ann_beatsync_times"]) + self._ann_multibeat_times = np.array(feats["ann_multibeat_times"]) self._ann_beats_frames = librosa.core.time_to_frames( self._ann_beats_times, sr=self.sr, hop_length=self.hop_length ) self._ann_beatsync_features = np.array( feats[self.get_id()]["ann_beatsync"] ) + self._ann_mutlibeat_features = np.array( + feats[self.get_id()]["ann_multibeat"] + ) except KeyError: raise WrongFeaturesFormatError( "The features file %s is not correctly formatted" @@ -328,14 +415,17 @@ def write_features(self): "sample_rate": self.sr, "hop_length": self.hop_length, "audio_file": self.file_struct.audio_file, + "frames_per_beat": self.frames_per_beat, } # Beats out_json["est_beats"] = self._est_beats_times.tolist() out_json["est_beatsync_times"] = self._est_beatsync_times.tolist() + out_json["est_multibeat_times"] = self._est_multibeat_times.tolist() if self._ann_beats_times is not None: out_json["ann_beats"] = self._ann_beats_times.tolist() out_json["ann_beatsync_times"] = self._ann_beatsync_times.tolist() + out_json["ann_multibeat_times"] = self._ann_multibeat_times.tolist() except FeatureParamsError: # We have other features in the file, simply add these ones with open(self.file_struct.features_file) as f: @@ -355,14 +445,14 @@ def write_features(self): # Actual features out_json[self.get_id()]["framesync"] = self._framesync_features.tolist() - out_json[self.get_id()][ - "est_beatsync" - ] = self._est_beatsync_features.tolist() - if self._ann_beatsync_features is not None: - out_json[self.get_id()][ - "ann_beatsync" - ] = self._ann_beatsync_features.tolist() + out_json[self.get_id()]["est_beatsync"] = self._est_beatsync_features.tolist() + if self._ann_beatsync_features is not None: + out_json[self.get_id()]["ann_beatsync"] = self._ann_beatsync_features.tolist() + + out_json[self.get_id()]["est_multibeat"] = self._est_multibeat_features.tolist() + if self._ann_mutlibeat_features is not None: + out_json[self.get_id()]["ann_multibeat"] = self._ann_mutlibeat_features.tolist() # Save it with open(self.file_struct.features_file, "w") as f: json.dump(out_json, f, indent=2) @@ -401,24 +491,34 @@ def _compute_all_features(self): # Compute framesync times self._compute_framesync_times() - # Compute/Read beats + # Compute/Read beats times and frames self._est_beats_times, self._est_beats_frames = self.estimate_beats() self._ann_beats_times, self._ann_beats_frames = self.read_ann_beats() - # Beat-Synchronize - pad = True # Always append to the end of the features - ( - self._est_beatsync_features, - self._est_beatsync_times, - ) = self.compute_beat_sync_features( - self._est_beats_frames, self._est_beats_times, pad - ) - ( - self._ann_beatsync_features, - self._ann_beatsync_times, - ) = self.compute_beat_sync_features( - self._ann_beats_frames, self._ann_beats_times, pad - ) + # Compute multibeats timees and frames + self._est_multibeat_frames = self._compute_multibeat(self._est_beats_frames) + self._ann_multibeat_frames = self._compute_multibeat(self._ann_beats_frames) + + # Multibeat times is beats time (before padding) + self._est_multibeat_times = np.copy(self._est_beats_times) + self._ann_multibeat_times = np.copy(self._ann_beats_times) + + # Compute frames features on beat + pad = True # pad the beat frames + self._est_beatsync_features = self.compute_beat_sync_features(self._est_beats_frames, pad) + self._ann_beatsync_features = self.compute_beat_sync_features(self._ann_beats_frames, pad) + + # Compute frames features on multibeat + self._est_multibeat_features = self.compute_beat_sync_features(self._est_multibeat_frames, pad) + self._ann_mutlibeat_features = self.compute_beat_sync_features(self._ann_multibeat_frames, pad) + + # Transform multibeat into beatwise matrix + self._est_multibeat_features = self._shape_beatwise(self._est_multibeat_features) + self._ann_mutlibeat_features = self._shape_beatwise(self._ann_mutlibeat_features) + + # Pad beatsync times + self._est_beatsync_times = self._pad_beats_times(self._est_beats_times, self._est_beatsync_features) + self._ann_beatsync_times = self._pad_beats_times(self._ann_beats_times, self._ann_beatsync_features) @property def frame_times(self): @@ -434,7 +534,12 @@ def frame_times(self): frame_times = self._est_beatsync_times elif self.feat_type is FeatureTypes.ann_beatsync: frame_times = self._ann_beatsync_times - + elif self.feat_type is FeatureTypes.est_multibeat: + frame_times = self._est_multibeat_times + elif self.feat_type is FeatureTypes.ann_multibeat: + frame_times = self._ann_multibeat_times + else: + raise FeatureTypeNotFound("Type of features not valid.") return frame_times @property @@ -486,13 +591,22 @@ def features(self): "were found" % self.feat_type ) self._features = self._ann_beatsync_features + elif self.feat_type is FeatureTypes.est_multibeat: + self._features = self._est_multibeat_features + elif self.feat_type is FeatureTypes.ann_multibeat: + if self._ann_multibeat_features is None: + raise FeatureTypeNotFound( + "Feature type %s is not valid because no annotated beats " + "were found" % self.feat_type + ) + self._features = self._ann_mutlibeat_features else: raise FeatureTypeNotFound("Feature type %s is not valid." % self.feat_type) return self._features @classmethod - def select_features(cls, features_id, file_struct, annot_beats, framesync): + def select_features(cls, features_id, file_struct, annot_beats, framesync, multibeat=False): """Selects the features from the given parameters. Parameters @@ -506,7 +620,8 @@ def select_features(cls, features_id, file_struct, annot_beats, framesync): Whether to use annotated (`True`) or estimated (`False`) beats framesync: boolean Whether to use framesync (`True`) or beatsync (`False`) features - + multibeat: boolean + Whether to use multibeat ('True`) or not. Default is `False` Returns ------- features: obj @@ -515,9 +630,15 @@ def select_features(cls, features_id, file_struct, annot_beats, framesync): if not annot_beats and framesync: feat_type = FeatureTypes.framesync elif annot_beats and not framesync: - feat_type = FeatureTypes.ann_beatsync + if multibeat: + feat_type = FeatureTypes.ann_multibeat + else: + feat_type = FeatureTypes.ann_beatsync elif not annot_beats and not framesync: - feat_type = FeatureTypes.est_beatsync + if multibeat: + feat_type = FeatureTypes.est_multibeat + else: + feat_type = FeatureTypes.est_beatsync else: raise FeatureTypeNotFound("Type of features not valid.") diff --git a/msaf/configdefaults.py b/msaf/configdefaults.py index dd67ebf5..61727a6b 100644 --- a/msaf/configdefaults.py +++ b/msaf/configdefaults.py @@ -33,6 +33,7 @@ AddConfigVar("sample_rate", "Default Sample Rate to be used.", IntParam(22050)) AddConfigVar("n_fft", "FFT size", IntParam(4096)) AddConfigVar("hop_size", "Hop length in samples", IntParam(1024)) +AddConfigVar("frames_per_beat","The number of framees computed per beat on multibeat", IntParam(3)) # Files and dirs diff --git a/msaf/eval.py b/msaf/eval.py index 7f497c33..c3867c21 100644 --- a/msaf/eval.py +++ b/msaf/eval.py @@ -337,6 +337,7 @@ def process( labels_id=msaf.config.default_label_id, annot_beats=False, framesync=False, + multibeat=False, feature="pcp", hier=False, save=False, @@ -389,7 +390,7 @@ def process( # Set up configuration based on algorithms parameters if config is None: config = io.get_configuration( - feature, annot_beats, framesync, boundaries_id, labels_id + feature, annot_beats, framesync, multibeat, boundaries_id, labels_id ) # Hierarchical segmentation diff --git a/msaf/exceptions.py b/msaf/exceptions.py index 7aa87dc9..2fb0cfa5 100644 --- a/msaf/exceptions.py +++ b/msaf/exceptions.py @@ -43,3 +43,6 @@ class NoEstimationsError(MSAFError): class WrongAlgorithmID(MSAFError): """This algorithm was not found in msaf.""" + +class FramePerBeatTooHigh(MSAFError): + """Frames per beat is higher than the number of frames between two beats""" diff --git a/msaf/features.py b/msaf/features.py index 69fec786..cdc5dc5e 100644 --- a/msaf/features.py +++ b/msaf/features.py @@ -41,6 +41,7 @@ def __init__( norm=config.cqt.norm, filter_scale=config.cqt.filter_scale, ref_power=config.cqt.ref_power, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -64,10 +65,12 @@ def __init__( ref_power: str The reference power for logarithmic scaling. See `configdefaults.py` for the possible values. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the CQT parameters self.n_bins = n_bins @@ -134,6 +137,7 @@ def __init__( n_mels=config.mel.n_mels, f_min=config.mel.f_min, f_max=config.mel.f_max, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -156,10 +160,12 @@ def __init__( Minimum frequency. f_min: int > 0 Maximal frequency. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) self.n_fft = n_fft # Init the Mel parameters @@ -213,6 +219,7 @@ def __init__( n_mels=config.mel.n_mels, f_min=config.mel.f_min, f_max=config.mel.f_max, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -235,10 +242,12 @@ def __init__( Minimum frequency. f_min: int > 0 Maximal frequency. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) self.n_fft = n_fft # Init the Mel parameters @@ -290,6 +299,7 @@ def __init__( n_mels=config.mfcc.n_mels, n_mfcc=config.mfcc.n_mfcc, ref_power=config.mfcc.ref_power, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -312,10 +322,12 @@ def __init__( Number of mel coefficients. ref_power: function The reference power for logarithmic scaling. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the MFCC parameters self.n_fft = n_fft @@ -372,6 +384,7 @@ def __init__( norm=config.pcp.norm, f_min=config.pcp.f_min, n_octaves=config.pcp.n_octaves, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -394,10 +407,12 @@ def __init__( Minimum frequency. n_octaves: int > 0 Number of octaves. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the PCP parameters self.n_bins = n_bins @@ -460,6 +475,7 @@ def __init__( norm=config.tonnetz.norm, f_min=config.tonnetz.f_min, n_octaves=config.tonnetz.n_octaves, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -482,10 +498,12 @@ def __init__( Minimum frequency. n_octaves: int > 0 Number of octaves. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the local parameters self.n_bins = n_bins @@ -534,6 +552,7 @@ def __init__( sr=config.sample_rate, hop_length=config.hop_size, win_length=config.tempogram.win_length, + frames_per_beat=config.frames_per_beat ): """Constructor of the class. @@ -550,10 +569,12 @@ def __init__( Hop size in frames for the analysis. win_length: int > 0 The size of the window for the tempogram. + frames_per_beat: int + The number of frames to compute when using multibeat """ # Init the parent super().__init__( - file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type + file_struct=file_struct, sr=sr, hop_length=hop_length, feat_type=feat_type, frames_per_beat=frames_per_beat ) # Init the local parameters self.win_length = win_length diff --git a/msaf/input_output.py b/msaf/input_output.py index 2547fbbc..7d0609ea 100644 --- a/msaf/input_output.py +++ b/msaf/input_output.py @@ -356,13 +356,14 @@ def get_all_label_algorithms(): return algo_ids -def get_configuration(feature, annot_beats, framesync, boundaries_id, labels_id): +def get_configuration(feature, annot_beats, framesync, multibeat, boundaries_id, labels_id): """Gets the configuration dictionary from the current parameters of the algorithms to be evaluated.""" config = {} config["annot_beats"] = annot_beats config["feature"] = feature config["framesync"] = framesync + config["multibeat"] = multibeat bound_config = {} if boundaries_id != "gt": bound_config = eval(msaf.algorithms.__name__ + "." + boundaries_id).config diff --git a/msaf/run.py b/msaf/run.py index ed27424e..93c164e4 100644 --- a/msaf/run.py +++ b/msaf/run.py @@ -254,7 +254,7 @@ def process_track(file_struct, boundaries_id, labels_id, config, annotator_id=0) # Get features config["features"] = Features.select_features( - config["feature"], file_struct, config["annot_beats"], config["framesync"] + config["feature"], file_struct, config["annot_beats"], config["framesync"], multibeat=config["multibeat"] ) # Get estimations @@ -286,6 +286,7 @@ def process( config=None, out_bounds="out_bounds.wav", out_sr=22050, + multibeat=False ): """Main process to segment a file or a collection of files. @@ -324,6 +325,8 @@ def process( mode, when sonify_bounds is True. out_sr : int Sampling rate for the sonified bounds. + multibeat : bool + Whether to use multibeat. Returns ------- @@ -338,7 +341,7 @@ def process( # Set up configuration based on algorithms parameters if config is None: config = io.get_configuration( - feature, annot_beats, framesync, boundaries_id, labels_id + feature, annot_beats, framesync, multibeat, boundaries_id, labels_id ) config["features"] = None @@ -356,7 +359,7 @@ def process( # Get features config["features"] = Features.select_features( - feature, file_struct, annot_beats, framesync + feature, file_struct, annot_beats, framesync, multibeat=multibeat ) # And run the algorithms diff --git a/tests/features/chirp_noaudio.json b/tests/features/chirp_noaudio.json index b670ca38..53b53344 100644 --- a/tests/features/chirp_noaudio.json +++ b/tests/features/chirp_noaudio.json @@ -11,12 +11,14 @@ "audio_file": "fixtures/chirp_noaudio.mp3", "dur": 10.0, "sample_rate": 22050, - "hop_length": 1024 + "hop_length": 1024, + "frames_per_beat": 3 }, "est_beats": [], "est_beatsync_times": [], "cqt": { "est_beatsync": [], + "est_multibeat": [], "params": { "filter_scale": "1.0", "ref_power": "amax", diff --git a/tests/fixtures/01_-_Come_Together.json b/tests/fixtures/01_-_Come_Together.json index ad6369d2..b21ba1c4 100644 --- a/tests/fixtures/01_-_Come_Together.json +++ b/tests/fixtures/01_-_Come_Together.json @@ -89078,7 +89078,8 @@ 0.0, 0.0 ] - ] + ], + "est_multibeat" :[] }, "mfcc": { "est_beatsync": [ @@ -100267,6 +100268,7 @@ 1.4796745923739383 ] ], + "est_multibeat" :[], "params": { "ref_power": "amax", "n_mels": "128", @@ -190088,7 +190090,8 @@ "audio_file": "/home/uri/datasets/BeatlesTUT/audio/01_-_Come_Together.wav", "dur": 260.6497959183674, "sample_rate": 22050, - "hop_length": 1024 + "hop_length": 1024, + "frames_per_beat": 3 }, "est_beats": [ 1.207437641723356, diff --git a/tests/test_features.py b/tests/test_features.py index 041094d6..49888918 100644 --- a/tests/test_features.py +++ b/tests/test_features.py @@ -48,6 +48,8 @@ def run_framesync(features_class): assert "framesync" in data[features_class.get_id()].keys() assert "est_beatsync" in data[features_class.get_id()].keys() assert "ann_beatsync" in data[features_class.get_id()].keys() + assert "est_multibeat" in data[features_class.get_id()].keys() + assert "ann_multibeat" in data[features_class.get_id()].keys() read_feats = np.array(data[features_class.get_id()]["framesync"]) assert np.array_equal(feats, read_feats) @@ -312,6 +314,14 @@ def test_select_features(): assert isinstance(feature, CQT) assert feature.feat_type == FeatureTypes.ann_beatsync + feature = Features.select_features("mfcc", my_file_struct, False, False, multibeat=True) + assert isinstance(feature, MFCC) + assert feature.feat_type == FeatureTypes.est_multibeat + + feature = Features.select_features("cqt", my_file_struct, True, False, multibeat=True) + assert isinstance(feature, CQT) + assert feature.feat_type == FeatureTypes.ann_multibeat + def test_wrong_select_features(): with raises(FeatureTypeNotFound): diff --git a/tests/test_multibeat.py b/tests/test_multibeat.py new file mode 100644 index 00000000..fd29cfc5 --- /dev/null +++ b/tests/test_multibeat.py @@ -0,0 +1,59 @@ +import json +import os +import pytest +from pytest import fixture +from enum import Enum + +import librosa +import numpy as np +from pytest import raises + +import msaf +from msaf.base import FeatureTypes +from msaf.exceptions import FramePerBeatTooHigh +from msaf.features import Features, CQT +from msaf.input_output import FileStruct + +# Move to __file__ path +os.chdir(os.path.dirname(__file__)) + +# Global vars +audio_file = os.path.join("fixtures", "chirp.mp3") +file_struct = FileStruct(audio_file) +file_struct.ref_file = os.path.join("fixtures", "chirp.jams") +msaf.utils.ensure_dir("features") +features_file = os.path.join("features", "chirp.json") +file_struct.features_file = features_file +try: + os.remove(features_file) +except OSError: + pass + +multibeat_feature = np.array([[k*2, k*2+1] for k in range(10)]) +print(multibeat_feature) + +@fixture +def feat_class(): + return CQT(file_struct, FeatureTypes.est_multibeat) + +def test_compute_multibeat(feat_class): + assert (feat_class._compute_multibeat(None) is None) + assert (feat_class._compute_multibeat([]) == []) + frame_beats = [k*100 for k in range (10)] + assert (isinstance(feat_class._compute_multibeat(frame_beats), np.ndarray)) + assert (feat_class._compute_multibeat(frame_beats).shape[0] == feat_class.frames_per_beat * (len(frame_beats) - 1)) + feat_class.frames_per_beat = 100 + with raises(FramePerBeatTooHigh): + feat_class._compute_multibeat(frame_beats) + +def test_shape_beatwise(feat_class): + assert (feat_class._shape_beatwise(None) is None) + assert (feat_class._shape_beatwise(np.array([])).shape[0] == 0) + multibeat_feature = np.array([[k*2, k*2+1] for k in range(50)]) + multibeat_shaped = feat_class._shape_beatwise(multibeat_feature) + assert(isinstance(multibeat_shaped, np.ndarray)) + assert(multibeat_shaped.shape[0] == len(multibeat_feature)//feat_class.frames_per_beat) + assert(multibeat_shaped.shape[1] == 2 * feat_class.frames_per_beat) + assert(np.equal(multibeat_shaped[0],np.array([0,1,2,3,4,5])).all()) + + diff --git a/tests/test_run.py b/tests/test_run.py index c405ef8b..a4c9e8dd 100644 --- a/tests/test_run.py +++ b/tests/test_run.py @@ -76,6 +76,7 @@ def test_run_algorithms(): feature = "pcp" annot_beats = False framesync = False + multibeat = False file_struct = msaf.io.FileStruct(audio_file) file_struct.features_file = msaf.config.features_tmp_file @@ -84,7 +85,7 @@ def test_run_algorithms(): for label_id in label_ids: print(f"bound_id: {bound_id},\tlabel_id: {label_id}") config = msaf.io.get_configuration( - feature, annot_beats, framesync, bound_id, label_id + feature, annot_beats, framesync, multibeat, bound_id, label_id ) config["hier"] = False config["features"] = Features.select_features( @@ -105,7 +106,7 @@ def test_run_algorithms(): def _test_run_msaf(bound_id, label_id, hier=False): print(f"bound_id: {bound_id},\tlabel_id: {label_id}") config = msaf.io.get_configuration( - feature, annot_beats, framesync, bound_id, label_id + feature, annot_beats, framesync, multibeat, bound_id, label_id ) config["hier"] = hier config["features"] = Features.select_features( @@ -175,6 +176,7 @@ def test_process_track(): config["annot_beats"] = False config["framesync"] = False config["hier"] = False + config["multibeat"] = False est_times, est_labels = msaf.run.process_track( file_struct, bounds_id, labels_id, config )