diff --git a/matchmaker/assets/simple_mozart_k265_var1.musicxml b/matchmaker/assets/simple_mozart_k265_var1.musicxml index 56bebfc..9130440 100644 --- a/matchmaker/assets/simple_mozart_k265_var1.musicxml +++ b/matchmaker/assets/simple_mozart_k265_var1.musicxml @@ -132,7 +132,7 @@ 4 - + D 5 @@ -145,7 +145,7 @@ begin begin - + C 5 @@ -158,7 +158,7 @@ continue continue - + B 4 @@ -171,7 +171,7 @@ continue continue - + C 5 @@ -184,7 +184,7 @@ end end - + B 4 @@ -197,7 +197,7 @@ begin begin - + C 5 @@ -210,7 +210,7 @@ continue continue - + B 4 @@ -223,7 +223,7 @@ continue continue - + C 5 @@ -239,7 +239,7 @@ 8 - + C 3 @@ -250,7 +250,7 @@ up 2 - + C 4 @@ -263,7 +263,7 @@ - + A 5 @@ -276,7 +276,7 @@ begin begin - + G 5 @@ -289,7 +289,7 @@ continue continue - + F 1 @@ -304,7 +304,7 @@ continue continue - + G 5 @@ -317,7 +317,7 @@ end end - + F 1 @@ -331,7 +331,7 @@ begin begin - + G 5 @@ -344,7 +344,7 @@ continue continue - + F 1 @@ -358,7 +358,7 @@ continue continue - + G 5 @@ -374,7 +374,7 @@ 8 - + E 4 @@ -385,7 +385,7 @@ down 2 - + C 4 @@ -398,7 +398,7 @@ - + G 1 @@ -413,7 +413,7 @@ begin begin - + A 5 @@ -426,7 +426,7 @@ continue continue - + C 6 @@ -439,7 +439,7 @@ continue continue - + B 5 @@ -452,7 +452,7 @@ end end - + D 6 @@ -465,7 +465,7 @@ begin begin - + C 6 @@ -478,7 +478,7 @@ continue continue - + B 5 @@ -491,7 +491,7 @@ continue continue - + A 5 @@ -507,7 +507,7 @@ 8 - + F 4 @@ -518,7 +518,7 @@ down 2 - + C 4 @@ -531,7 +531,7 @@ - + A 5 @@ -547,7 +547,7 @@ - + G 5 @@ -564,7 +564,7 @@ - + E 6 @@ -577,7 +577,7 @@ continue continue - + D 6 @@ -590,7 +590,7 @@ end end - + C 6 @@ -603,7 +603,7 @@ begin begin - + B 5 @@ -616,7 +616,7 @@ continue continue - + A 5 @@ -629,7 +629,7 @@ continue continue - + G 5 @@ -645,7 +645,7 @@ 8 - + C 4 @@ -656,7 +656,7 @@ down 2 - + E @@ -668,7 +668,7 @@ down 2 - + 3 5 @@ -676,7 +676,7 @@ 2 - + C 1 @@ -703,7 +703,7 @@ 65 - + G 5 @@ -719,7 +719,7 @@ - + F 5 @@ -735,7 +735,7 @@ - + D 6 @@ -748,7 +748,7 @@ continue continue - + C 6 @@ -761,7 +761,7 @@ end end - + B 5 @@ -774,7 +774,7 @@ begin begin - + A 5 @@ -787,7 +787,7 @@ continue continue - + G 5 @@ -800,7 +800,7 @@ continue continue - + F 5 @@ -816,7 +816,7 @@ 8 - + D 4 @@ -827,7 +827,7 @@ down 2 - + 3 5 @@ -835,7 +835,7 @@ 2 - + B 3 @@ -848,7 +848,7 @@ - + F 5 @@ -864,7 +864,7 @@ - + E 5 @@ -880,7 +880,7 @@ - + C 6 @@ -893,7 +893,7 @@ continue continue - + B 5 @@ -906,7 +906,7 @@ end end - + A 5 @@ -919,7 +919,7 @@ begin begin - + G 5 @@ -932,7 +932,7 @@ continue continue - + F 5 @@ -945,7 +945,7 @@ continue continue - + E 5 @@ -961,7 +961,7 @@ 8 - + C 4 @@ -973,7 +973,7 @@ down 2 - + 3 5 @@ -981,7 +981,7 @@ 2 - + A 3 @@ -994,7 +994,7 @@ - + D 5 @@ -1006,7 +1006,7 @@ 1 begin - + A 5 @@ -1021,7 +1021,7 @@ - + G 5 @@ -1036,7 +1036,7 @@ - + B 4 @@ -1051,7 +1051,7 @@ 8 - + F 3 @@ -1062,7 +1062,7 @@ down 2 - + G 3 @@ -1075,7 +1075,7 @@ - + C 5 @@ -1086,7 +1086,7 @@ down 1 - + 4 1 @@ -1096,7 +1096,7 @@ 8 - + C 4 @@ -1107,7 +1107,7 @@ down 2 - + C 3 diff --git a/matchmaker/dp/oltw_arzt.py b/matchmaker/dp/oltw_arzt.py index 89d0d5a..a6749cf 100644 --- a/matchmaker/dp/oltw_arzt.py +++ b/matchmaker/dp/oltw_arzt.py @@ -98,6 +98,9 @@ def __init__( current_position: int = 0, frame_rate: int = FRAME_RATE, queue: Optional[RECVQueue] = None, + state_to_ref_time_map = None, + ref_to_state_time_map = None, + state_space = None, **kwargs, ) -> None: super().__init__(reference_features=reference_features) @@ -173,6 +176,9 @@ def __init__( "max_latency": 0, "min_latency": float("inf"), } + self.state_to_ref_time_map = state_to_ref_time_map + self.ref_to_state_time_map = ref_to_state_time_map + self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"]) @property def warping_path(self) -> NDArray[np.int32]: @@ -289,7 +295,7 @@ def step(self, input_features: NDArray[np.float32]) -> None: if self.input_index == 0: # enforce the first time step to stay at the # initial position - self.current_position = min( + self.current_position = min( # TODO: Is this necessary? max(self.current_position, min_index), self.current_position, ) diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index 1bd66a1..cb21790 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -7,6 +7,7 @@ import partitura from partitura.io.exportmidi import get_ppq from partitura.score import Part, merge_parts +from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon from matchmaker.features.audio import ( @@ -42,7 +43,7 @@ transfer_from_score_to_predicted_perf, ) from matchmaker.utils.misc import ( - adjust_tempo_for_performance_audio, + adjust_tempo_for_performance_file, generate_score_audio, get_tempo_from_score, is_audio_file, @@ -77,7 +78,10 @@ "dixon": { "window_size": 10, }, - "arzt": {}, + "arzt": { + "window_size": 5, + "start_window_size": 0.25, + "step_size" : 5,}, "audio_outerhmm": { "sample_rate": 16000, "frame_rate": 50, @@ -87,6 +91,9 @@ "arzt": { "processor": "pianoroll", "piano_range": True, + "window_size": 200, + "start_window_size": 200, + "step_size": 5, }, "dixon": { "processor": "pianoroll", @@ -166,7 +173,7 @@ def __init__( # raise ValueError(f"Invalid input_type {input_type}") self.input_type = input_type self.feature_type = feature_type - self.frame_rate = frame_rate + self.frame_rate = frame_rate if input_type == "audio" else 1 self.sample_rate = sample_rate self.hop_length = sample_rate // self.frame_rate self.score_part: Optional[Part] = None @@ -185,7 +192,7 @@ def __init__( raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}") self.method = method - self.config = kwargs[input_type][self.method] + self.config = kwargs[self.input_type][self.method] self.auto_adjust_tempo = auto_adjust_tempo # Apply method-specific defaults from config (only if not explicitly provided by caller) @@ -276,7 +283,7 @@ def __init__( # setup distance function if distance_func is None: - distance_func = DEFAULT_DISTANCE_FUNCS[method] + distance_func = DEFAULT_DISTANCE_FUNCS[self.method] # setup stream device if self.input_type == "audio": @@ -299,18 +306,24 @@ def __init__( else: raise ValueError(f"Invalid input type {self.input_type}") - use_score_audio = self.input_type == "audio" and method in {"dixon", "arzt"} - self.reference_features = self.preprocess_score(use_score_audio) + self.reference_features = self.preprocess_score() if distance_func is None: distance_func = DEFAULT_DISTANCE_FUNCS[method] if method == "arzt": + state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps() self.score_follower = OnlineTimeWarpingArzt( reference_features=self.reference_features, queue=self.stream.queue, distance_func=distance_func, frame_rate=self.frame_rate, + window_size=self.config["window_size"], + start_window_size=self.config["start_window_size"], + state_to_ref_time_map=state_to_ref_time_map, + ref_to_state_time_map=ref_to_state_time_map, + step_size=self.config["step_size"], + state_space=np.unique(self.score_part.note_array()["onset_beat"]) ) elif method == "dixon": self.score_follower = OnlineTimeWarpingDixon( @@ -327,11 +340,6 @@ def __init__( has_insertions=True, piano_range=self.config["piano_range"], ) - elif method == "outerhmm" and self.input_type == "midi": - self.score_follower = OuterProductHMM( - reference_features=self.reference_features, - queue=self.stream.queue, - ) elif method == "pthmm" and self.input_type == "audio": self.score_follower = GaussianAudioPitchTempoHMM( reference_features=self.reference_features, @@ -348,7 +356,6 @@ def __init__( elif method == "pthmm" and self.input_type == "midi": self.score_follower = PitchHMM( reference_features=self.reference_features, - # observation_model=obs_model, queue=self.stream.queue, has_insertions=True, piano_range=self.config["piano_range"], @@ -361,22 +368,43 @@ def __init__( else: raise ValueError("Invalid method") - def preprocess_score(self, use_score_audio: bool = False): + def preprocess_score(self): """Preprocess score to extract reference features.""" if self.auto_adjust_tempo and self.performance_file is not None: - self.tempo = adjust_tempo_for_performance_audio( + self.tempo = adjust_tempo_for_performance_file( self.score_part, self.performance_file, self.tempo ) - if use_score_audio: - self.score_audio = generate_score_audio( - self.score_part, self.tempo, self.sample_rate - ).astype(np.float32) - reference_features = self.processor(self.score_audio) - self.processor.reset() - return reference_features + if self.method in {"arzt", "dixon"}: + self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) + self.ppart.sustain_pedal_threshold = 127 + if self.input_type == "audio": + self.score_audio = generate_score_audio( + self.score_part, self.tempo, self.sample_rate + ).astype(np.float32) + reference_features = self.processor(self.score_audio) + self.processor.reset() + return reference_features + else: + polling_period = 0.01 + reference_features = ( + partitura.utils.music.compute_pianoroll( + note_info=self.ppart, + time_unit="sec", + time_div=int(np.round(1 / polling_period)), + binary=True, + piano_range=self.config["piano_range"], + ) + .toarray() + .T + ).astype(np.float32) + return reference_features else: return self.score_part.note_array() + + def get_time_maps(self): + alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]] + return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment) def _convert_frame_to_beat(self, current_frame: int) -> float: """ diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py index b8be392..ce58269 100644 --- a/matchmaker/utils/misc.py +++ b/matchmaker/utils/misc.py @@ -13,6 +13,7 @@ from typing import Any, Dict, Iterable, List, Optional, Union import librosa +import mido import numpy as np import partitura import scipy @@ -425,25 +426,28 @@ def get_tempo_at_beat( return current_tempo -def adjust_tempo_for_performance_audio( - score: ScoreLike, performance_audio: Path, default_tempo: int = 120 +def adjust_tempo_for_performance_file( + score: ScoreLike, performance_file: Path, default_tempo: int = 120 ): """ - Adjust the tempo of the score part to match the performance audio. + Adjust the tempo of the score part to match the performance file. We round up the tempo to the nearest 20 bpm to avoid too much optimization. Parameters ---------- score : partitura.score.ScoreLike The score to adjust the tempo of. - performance_audio : Path - The performance audio file to adjust the tempo to. + performance_file : Path + The performance file to adjust the tempo to. default_tempo : int The default tempo of the score. """ score_midi = partitura.save_score_midi(score, out=None) source_length = score_midi.length - target_length = librosa.get_duration(path=str(performance_audio)) + if is_midi_file(performance_file): + target_length = mido.MidiFile(performance_file).length + else: + target_length = librosa.get_duration(path=str(performance_file)) ratio = target_length / source_length rounded_tempo = int( (default_tempo / ratio + 19) // 20 * 20