From 2aa75e5bf88a2f1a32f05865d8b5ea9f9f13e6f1 Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Sat, 7 Feb 2026 12:22:51 +0100 Subject: [PATCH 1/8] create symbolic oltw-arzt tracker --- matchmaker/dp/oltw_arzt.py | 14 ++-- matchmaker/features/audio.py | 2 + matchmaker/features/midi.py | 4 + matchmaker/matchmaker.py | 155 ++++++++++++++++++++++++++--------- matchmaker/utils/misc.py | 15 ++-- 5 files changed, 138 insertions(+), 52 deletions(-) diff --git a/matchmaker/dp/oltw_arzt.py b/matchmaker/dp/oltw_arzt.py index f087017..e4fb8b9 100644 --- a/matchmaker/dp/oltw_arzt.py +++ b/matchmaker/dp/oltw_arzt.py @@ -13,7 +13,7 @@ from matchmaker.base import OnlineAlignment from matchmaker.dp.dtw_loop import oltw_arzt_loop -from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT, WINDOW_SIZE +from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT, WINDOW_SIZE, START_WINDOW_SIZE, STEP_SIZE from matchmaker.utils import ( CYTHONIZED_METRICS_W_ARGUMENTS, CYTHONIZED_METRICS_WO_ARGUMENTS, @@ -27,10 +27,6 @@ set_latency_stats, ) -STEP_SIZE: int = 5 -START_WINDOW_SIZE: Union[float, int] = 0.25 - - class OnlineTimeWarpingArzt(OnlineAlignment): """ Fast On-line Time Warping @@ -97,6 +93,9 @@ def __init__( current_position: int = 0, frame_rate: int = FRAME_RATE, queue: Optional[RECVQueue] = None, + state_to_ref_time_map = None, + ref_to_state_time_map = None, + state_space = None, **kwargs, ) -> None: super().__init__(reference_features=reference_features) @@ -172,6 +171,9 @@ def __init__( "max_latency": 0, "min_latency": float("inf"), } + self.state_to_ref_time_map = state_to_ref_time_map + self.ref_to_state_time_map = ref_to_state_time_map + self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"]) @property def warping_path(self) -> NDArray[np.int32]: @@ -288,7 +290,7 @@ def step(self, input_features: NDArray[np.float32]) -> None: if self.input_index == 0: # enforce the first time step to stay at the # initial position - self.current_position = min( + self.current_position = min( # TODO: Is this necessary? max(self.current_position, min_index), self.current_position, ) diff --git a/matchmaker/features/audio.py b/matchmaker/features/audio.py index 25b1ff7..6ded530 100644 --- a/matchmaker/features/audio.py +++ b/matchmaker/features/audio.py @@ -22,6 +22,8 @@ FEATURES = "chroma" QUEUE_TIMEOUT = 10 WINDOW_SIZE = 5 +STEP_SIZE = 5 +START_WINDOW_SIZE = 0.25 # Type hint for Input Audio frame. InputAudioSeries = np.ndarray diff --git a/matchmaker/features/midi.py b/matchmaker/features/midi.py index bd2d946..095e31f 100644 --- a/matchmaker/features/midi.py +++ b/matchmaker/features/midi.py @@ -20,6 +20,10 @@ ) from matchmaker.utils.typing import InputMIDIFrame, NDArrayFloat +WINDOW_SIZE_MIDI = 200 +START_WINDOW_SIZE_MIDI = 200 +STEP_SIZE_MIDI = 5 + class PitchProcessor(Processor): """ diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index ecc0aef..b69ec79 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -7,18 +7,27 @@ import partitura from partitura.io.exportmidi import get_ppq from partitura.score import Part +from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon from matchmaker.features.audio import ( FRAME_RATE, SAMPLE_RATE, + WINDOW_SIZE, + STEP_SIZE, ChromagramProcessor, CQTProcessor, LogSpectralEnergyProcessor, MelSpectrogramProcessor, MFCCProcessor, ) -from matchmaker.features.midi import PianoRollProcessor, PitchIOIProcessor +from matchmaker.features.midi import ( + WINDOW_SIZE_MIDI, + START_WINDOW_SIZE_MIDI, + STEP_SIZE_MIDI, + PianoRollProcessor, + PitchIOIProcessor, +) from matchmaker.io.audio import AudioStream from matchmaker.io.midi import MidiStream from matchmaker.prob.hmm import ( @@ -35,7 +44,7 @@ transfer_from_score_to_predicted_perf, ) from matchmaker.utils.misc import ( - adjust_tempo_for_performance_audio, + adjust_tempo_for_performance_file, generate_score_audio, is_audio_file, is_midi_file, @@ -48,14 +57,16 @@ "arzt": OnlineTimeWarpingArzt.DEFAULT_DISTANCE_FUNC, "dixon": OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC, "hmm": None, + "outerhmm": None, + "pthmm": None, } DEFAULT_METHODS = { "audio": "arzt", - "midi": "hmm", + "midi": "outerhmm", } -AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm"] +AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"] class Matchmaker(object): @@ -101,7 +112,7 @@ def __init__( ) self.input_type = input_type self.feature_type = feature_type - self.frame_rate = frame_rate + self.frame_rate = frame_rate if input_type == "audio" else 1 self.score_part: Optional[Part] = None self.distance_func = distance_func self.device_name_or_index = device_name_or_index @@ -118,14 +129,23 @@ def __init__( raise ValueError("Score file is required") try: - self.score_part = partitura.load_score_as_part(self.score_file) - + self.score_part = partitura.load_score_as_part(self.score_file) + # if score_file is an xml file, load_score_as_part() uses load_score() -> load_musicxml() which imports invisible objects (e.g. trills) by default + # load_score_part() doesn't support an 'ignore_invisible_objects' parameter yet, thus we have to bypass this issue in the following way: + # TODO: find a better solution: + unfold = True + if self.score_file.endswith('musicxml'): + self.score_part = partitura.load_musicxml(self.score_file, ignore_invisible_objects=True) + if unfold: + self.score_part = partitura.score.unfold_part_maximal(self.score_part).parts[0] + else: + self.score_part = self.score_part.parts[0] except Exception as e: raise ValueError(f"Invalid score file: {e}") # setup feature processor if self.feature_type is None: - self.feature_type = "chroma" if input_type == "audio" else "pitchclass" + self.feature_type = "chroma" if input_type == "audio" else "pitch_ioi" if self.feature_type == "chroma": self.processor = ChromagramProcessor( @@ -147,8 +167,10 @@ def __init__( self.processor = LogSpectralEnergyProcessor( sample_rate=sample_rate, ) - elif self.feature_type == "pitchclass": + elif self.feature_type == "pitch_ioi": self.processor = PitchIOIProcessor(piano_range=True) + elif self.feature_type == "pitchclass": + self.processor = PitchClassPianoRollProcessor() elif self.feature_type == "pianoroll": self.processor = PianoRollProcessor(piano_range=True) else: @@ -165,7 +187,17 @@ def __init__( raise ValueError( f"Invalid performance file. Expected MIDI file, but got {self.performance_file}" ) + + # validate method first + if method is None: + method = DEFAULT_METHODS[self.input_type] + elif method not in AVAILABLE_METHODS: + raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}") + # setup distance function + if distance_func is None: + distance_func = DEFAULT_DISTANCE_FUNCS[method] + # setup stream device if self.input_type == "audio": self.stream = AudioStream( @@ -175,7 +207,14 @@ def __init__( wait=wait, target_sr=SAMPLE_RATE, ) - elif self.input_type == "midi": + elif self.input_type == "midi" and method == "outerhmm": + self.stream = MidiStream( + processor=self.processor, + port=self.device_name_or_index, + file_path=self.performance_file, + polling_period=None, + ) + elif self.input_type == "midi" and method != "outerhmm": self.stream = MidiStream( processor=self.processor, port=self.device_name_or_index, @@ -187,23 +226,21 @@ def __init__( # preprocess score (setting reference features, tempo) self.preprocess_score() - # validate method first - if method is None: - method = DEFAULT_METHODS[self.input_type] - elif method not in AVAILABLE_METHODS: - raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}") - - # setup distance function - if distance_func is None: - distance_func = DEFAULT_DISTANCE_FUNCS[method] - # setup score follower if method == "arzt": + alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]] + state_to_ref_time_map, ref_to_state_time_map = get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment) self.score_follower = OnlineTimeWarpingArzt( reference_features=self.reference_features, queue=self.stream.queue, distance_func=distance_func, frame_rate=self.frame_rate, + window_size=WINDOW_SIZE if self.input_type == "audio" else WINDOW_SIZE_MIDI, + start_window_size=START_WINDOW_SIZE if self.input_type == "audio" else START_WINDOW_SIZE_MIDI, + state_to_ref_time_map=state_to_ref_time_map, + ref_to_state_time_map=ref_to_state_time_map, + step_size=STEP_SIZE if self.input_type == "audio" else STEP_SIZE_MIDI, + state_space=np.unique(self.score_part.note_array()["onset_beat"]) ) elif method == "dixon": self.score_follower = OnlineTimeWarpingDixon( @@ -235,12 +272,29 @@ def __init__( # ioi_precision=2, transition_scale=0.05, ) + elif method == "pthmm" and self.input_type == "midi": + self.score_follower = PitchHMM( + reference_features=self.reference_features, + # observation_model=obs_model, + queue=self.stream.queue, + tempo_model=tempo_model, + has_insertions=True, + piano_range=piano_range, + ) + elif method == "outerhmm" and self.input_type == "midi": + self.score_follower = OuterProductHMM( + reference_features=self.reference_features, + queue=self.stream.queue, + piano_range=piano_range, + ) + else: + raise ValueError("Invalid method") def preprocess_score(self): if self.input_type == "audio": if self.performance_file is not None: # tempo is slightly adjusted to reflect the tempo of the performance audio - self.tempo = adjust_tempo_for_performance_audio( + self.tempo = adjust_tempo_for_performance_file( self.score_part, self.performance_file, self.tempo ) @@ -252,7 +306,28 @@ def preprocess_score(self): reference_features = self.processor(self.score_audio) self.reference_features = reference_features else: - self.reference_features = self.score_part.note_array() + if self.method == "arzt": + if self.performance_file is not None: + # tempo is slightly adjusted to reflect the tempo of the performance midi + self.tempo = adjust_tempo_for_performance_file( + self.score_part, self.performance_file, self.tempo + ) + self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) + self.ppart.sustain_pedal_threshold = 127 + polling_period = 0.01 + self.reference_features = ( + partitura.utils.music.compute_pianoroll( + note_info=self.ppart, + time_unit="sec", + time_div=int(np.round(1 / polling_period)), + binary=True, + piano_range=True, + ) + .toarray() + .T + ).astype(np.float32) + else: + self.reference_features = self.score_part.note_array() def _convert_frame_to_beat(self, current_frame: int) -> float: """ @@ -398,21 +473,21 @@ def run_evaluation( f"Length of the annotation changed: {original_perf_annots_length} -> {len(perf_annots_predicted)}" ) - if debug: - save_debug_results( - self.score_file, - self.score_audio, - score_annots, - score_annots_predicted, - self.performance_file, - perf_annots, - perf_annots_predicted, - self.score_follower, - self.frame_rate, - save_dir, - run_name, - ) - + if self.input_type == 'audio': + if debug: + save_debug_results( + self.score_file, + self.score_audio if self.input_type=="audio" else None, + score_annots, + score_annots_predicted, + self.performance_file, + perf_annots, + perf_annots_predicted, + self.score_follower, + self.frame_rate, + save_dir, + run_name, + ) if in_seconds: eval_results = get_evaluation_results( perf_annots, @@ -434,9 +509,9 @@ def run_evaluation( tolerances=tolerances, in_seconds=False, ) - - latency_results = self.get_latency_stats() - eval_results.update(latency_results) + if self.input_type == 'audio': + latency_results = self.get_latency_stats() + eval_results.update(latency_results) return eval_results def run(self, verbose: bool = True, wait: bool = True): diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py index 2265ead..b9ffa7c 100644 --- a/matchmaker/utils/misc.py +++ b/matchmaker/utils/misc.py @@ -191,25 +191,28 @@ def interleave_with_constant( return interleaved_array -def adjust_tempo_for_performance_audio( - score: ScoreLike, performance_audio: Path, default_tempo: int = 120 +def adjust_tempo_for_performance_file( + score: ScoreLike, performance_file: Path, default_tempo: int = 120 ): """ - Adjust the tempo of the score part to match the performance audio. + Adjust the tempo of the score part to match the performance file. We round up the tempo to the nearest 20 bpm to avoid too much optimization. Parameters ---------- score : partitura.score.ScoreLike The score to adjust the tempo of. - performance_audio : Path - The performance audio file to adjust the tempo to. + performance_file : Path + The performance file to adjust the tempo to. default_tempo : int The default tempo of the score. """ score_midi = partitura.save_score_midi(score, out=None) source_length = score_midi.length - target_length = librosa.get_duration(path=str(performance_audio)) + if is_midi_file(performance_file): + target_length = mido.MidiFile(performance_file).length + else: + target_length = librosa.get_duration(path=str(performance_file)) ratio = target_length / source_length rounded_tempo = int( (default_tempo / ratio + 19) // 20 * 20 From eee7925d26ef96ce9989ee9090c7f3ae87cac3d2 Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Thu, 12 Feb 2026 18:08:50 +0100 Subject: [PATCH 2/8] incorporate kwargs parameter and restructure --- matchmaker/matchmaker.py | 73 +++++++++++++++++++++++++++++++--------- 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index b69ec79..a983535 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -22,9 +22,6 @@ MFCCProcessor, ) from matchmaker.features.midi import ( - WINDOW_SIZE_MIDI, - START_WINDOW_SIZE_MIDI, - STEP_SIZE_MIDI, PianoRollProcessor, PitchIOIProcessor, ) @@ -36,6 +33,8 @@ GaussianAudioPitchTempoHMM, PitchIOIHMM, ) +from matchmaker.utils.tempo_models import KalmanTempoModel + from matchmaker.utils.eval import ( TOLERANCES_IN_BEATS, TOLERANCES_IN_MILLISECONDS, @@ -68,6 +67,45 @@ AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"] +KWARGS = { + "audio": + {"arzt": + {"window_size": 5, + "start_window_size": 0.25, + "step_size" : 5, + }, + "dixon": + {"window_size": 10, + }, + }, + "midi": + {"arzt": + {"processor": "pianoroll", + "piano_range": True, + "window_size": 200, + "start_window_size": 200, + "step_size": 5, + }, + "dixon": + {"processor": "pianoroll", + "piano_range": True, + "window_size": 30, + }, + "hmm": + {"processor": "pitch_ioi", + "tempo_model": KalmanTempoModel, + "piano_range": True, + }, + "pthmm": + {"processor": "pitch_ioi", + "piano_range": True, + }, + "outerhmm": + {"processor": "pitch_ioi", + "piano_range": True, + }, + }, +} class Matchmaker(object): """ @@ -105,6 +143,8 @@ def __init__( device_name_or_index: Union[str, int] = None, sample_rate: int = SAMPLE_RATE, frame_rate: int = FRAME_RATE, + kwargs = KWARGS, + unfold_score = True, ): self.score_file = str(score_file) self.performance_file = ( @@ -123,21 +163,18 @@ def __init__( self.tempo = DEFAULT_TEMPO # bpm for quarter note self._has_run = False self.method = method + self.config = kwargs[input_type][method] # setup score file if score_file is None: raise ValueError("Score file is required") try: - self.score_part = partitura.load_score_as_part(self.score_file) - # if score_file is an xml file, load_score_as_part() uses load_score() -> load_musicxml() which imports invisible objects (e.g. trills) by default - # load_score_part() doesn't support an 'ignore_invisible_objects' parameter yet, thus we have to bypass this issue in the following way: # TODO: find a better solution: - unfold = True if self.score_file.endswith('musicxml'): - self.score_part = partitura.load_musicxml(self.score_file, ignore_invisible_objects=True) - if unfold: - self.score_part = partitura.score.unfold_part_maximal(self.score_part).parts[0] + self.score_part = partitura.load_musicxml(self.score_file, force_note_ids=True, ignore_invisible_objects=True) + if unfold_score: + self.score_part = partitura.score.unfold_part_maximal(self.score_part, ignore_leaps = False).parts[0] else: self.score_part = self.score_part.parts[0] except Exception as e: @@ -228,18 +265,17 @@ def __init__( # setup score follower if method == "arzt": - alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]] - state_to_ref_time_map, ref_to_state_time_map = get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment) + state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps() self.score_follower = OnlineTimeWarpingArzt( reference_features=self.reference_features, queue=self.stream.queue, distance_func=distance_func, frame_rate=self.frame_rate, - window_size=WINDOW_SIZE if self.input_type == "audio" else WINDOW_SIZE_MIDI, - start_window_size=START_WINDOW_SIZE if self.input_type == "audio" else START_WINDOW_SIZE_MIDI, + window_size=self.config["window_size"], + start_window_size=self.config["start_window_size"], state_to_ref_time_map=state_to_ref_time_map, ref_to_state_time_map=ref_to_state_time_map, - step_size=STEP_SIZE if self.input_type == "audio" else STEP_SIZE_MIDI, + step_size=self.config["step_size"], state_space=np.unique(self.score_part.note_array()["onset_beat"]) ) elif method == "dixon": @@ -297,7 +333,8 @@ def preprocess_score(self): self.tempo = adjust_tempo_for_performance_file( self.score_part, self.performance_file, self.tempo ) - + self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) # needed for time maps + self.ppart.sustain_pedal_threshold = 127 # generate score audio self.score_audio = generate_score_audio( self.score_part, self.tempo, SAMPLE_RATE @@ -329,6 +366,10 @@ def preprocess_score(self): else: self.reference_features = self.score_part.note_array() + def get_time_maps(self): + alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]] + return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment) + def _convert_frame_to_beat(self, current_frame: int) -> float: """ Convert frame number to relative beat position in the score. From c4182a1a4e8aa0a983279b3619a251c6ce13ca5f Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Thu, 19 Feb 2026 10:23:36 +0100 Subject: [PATCH 3/8] add missing oltw arzt kwargs --- matchmaker/matchmaker.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index de5f79d..35c6c03 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -79,12 +79,18 @@ "dixon": { "window_size": 10, }, - "arzt": {}, + "arzt": { + "window_size": 5, + "start_window_size": 0.25, + "step_size" : 5,}, }, "midi": { "arzt": { "processor": "pianoroll", "piano_range": True, + "window_size": 200, + "start_window_size": 200, + "step_size": 5, }, "dixon": { "processor": "pianoroll", From d75bc2f3173ebb1debd54a83aad76ee91f3cf440 Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Thu, 19 Feb 2026 11:01:05 +0100 Subject: [PATCH 4/8] remove redundant default midi parameters --- matchmaker/features/midi.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/matchmaker/features/midi.py b/matchmaker/features/midi.py index 095e31f..bd2d946 100644 --- a/matchmaker/features/midi.py +++ b/matchmaker/features/midi.py @@ -20,10 +20,6 @@ ) from matchmaker.utils.typing import InputMIDIFrame, NDArrayFloat -WINDOW_SIZE_MIDI = 200 -START_WINDOW_SIZE_MIDI = 200 -STEP_SIZE_MIDI = 5 - class PitchProcessor(Processor): """ From 53e18635c66677d3dbc550be4d225fd32e9045e1 Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Fri, 20 Feb 2026 16:04:24 +0100 Subject: [PATCH 5/8] fix merge conflicts --- matchmaker/matchmaker.py | 339 ++++++++++++++++++------------- matchmaker/utils/misc.py | 427 ++++++++++++++++----------------------- 2 files changed, 364 insertions(+), 402 deletions(-) diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index 35c6c03..36ac6be 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -1,12 +1,12 @@ import os import sys +from pathlib import Path from typing import Optional, Union import numpy as np - import partitura from partitura.io.exportmidi import get_ppq -from partitura.score import Part +from partitura.score import Part, merge_parts from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon @@ -15,6 +15,7 @@ SAMPLE_RATE, ChromagramProcessor, CQTProcessor, + CQTSpectralFluxProcessor, LogSpectralEnergyProcessor, MelSpectrogramProcessor, MFCCProcessor, @@ -31,11 +32,9 @@ GaussianAudioPitchTempoHMM, PitchHMM, PitchIOIHMM, - PitchHMM, ) from matchmaker.prob.outer_product_hmm import OuterProductHMM -from matchmaker.utils.tempo_models import KalmanTempoModel - +from matchmaker.prob.outer_product_hmm_audio import AudioOuterProductHMM from matchmaker.utils.eval import ( TOLERANCES_IN_BEATS, TOLERANCES_IN_MILLISECONDS, @@ -52,18 +51,19 @@ save_debug_results, ) from matchmaker.utils.tempo_models import KalmanTempoModel -from partitura.io.exportmidi import get_ppq -from partitura.score import Part sys.setrecursionlimit(10_000) PathLike = Union[str, bytes, os.PathLike] DEFAULT_TEMPO = 120 + + DEFAULT_DISTANCE_FUNCS = { "arzt": OnlineTimeWarpingArzt.DEFAULT_DISTANCE_FUNC, "dixon": OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC, "hmm": None, "outerhmm": None, + "audio_outerhmm": None, "pthmm": None, } @@ -72,8 +72,7 @@ "midi": "outerhmm", } -AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"] - +AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm", "audio_outerhmm"] KWARGS = { "audio": { "dixon": { @@ -83,6 +82,10 @@ "window_size": 5, "start_window_size": 0.25, "step_size" : 5,}, + "audio_outerhmm": { + "sample_rate": 16000, + "frame_rate": 50, + }, }, "midi": { "arzt": { @@ -157,9 +160,9 @@ def __init__( sample_rate: int = SAMPLE_RATE, frame_rate: int = FRAME_RATE, tempo: Optional[float] = None, - adjust_tempo: bool = False, - kwargs = KWARGS, - unfold_score = True, + kwargs=KWARGS, + unfold_score=True, + auto_adjust_tempo: bool = False, ): self.score_file = str(score_file) self.performance_file = ( @@ -171,6 +174,8 @@ def __init__( self.input_type = input_type self.feature_type = feature_type self.frame_rate = frame_rate if input_type == "audio" else 1 + self.sample_rate = sample_rate + self.hop_length = sample_rate // self.frame_rate self.score_part: Optional[Part] = None self.distance_func = distance_func self.device_name_or_index = device_name_or_index @@ -187,75 +192,86 @@ def __init__( raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}") self.method = method - self.config = kwargs[input_type][method] - self.adjust_tempo = adjust_tempo - self.config = kwargs[input_type][method] + self.config = kwargs[self.input_type][self.method] + self.auto_adjust_tempo = auto_adjust_tempo - # setup score file - if score_file is None: - raise ValueError("Score file is required") + # Apply method-specific defaults from config (only if not explicitly provided by caller) + if sample_rate == SAMPLE_RATE and "sample_rate" in self.config: + self.sample_rate = self.config["sample_rate"] + if frame_rate == FRAME_RATE and "frame_rate" in self.config: + self.frame_rate = self.config["frame_rate"] + self.hop_length = self.sample_rate // self.frame_rate + # setup score file try: - # TODO: find a better solution: - if self.score_file.endswith("musicxml"): - self.score_part = partitura.load_musicxml( + ext = Path(self.score_file).suffix.lower() + if ext in (".musicxml", ".xml", ".mxl"): + score = partitura.load_musicxml( self.score_file, ignore_invisible_objects=True ) - if unfold_score: - self.score_part = partitura.score.unfold_part_maximal( - self.score_part, ignore_leaps=False - ).parts[0] - else: - self.score_part = self.score_part.parts[0] + else: + score = partitura.load_score(self.score_file) + + if unfold_score: + score = partitura.score.unfold_part_maximal(score, ignore_leaps=False) + self.score_part = merge_parts(score.parts) except Exception as e: raise ValueError(f"Invalid score file: {e}") - # Set tempo: user-provided > score marking > default (120 BPM) - # _user_specified_tempo: if True, use uniform tempo; if False, use score tempo map + # Set tempo: user-provided > adjust_tempo (always 120) > score marking > default (120 BPM) if tempo is not None: self.tempo = float(tempo) - self._user_specified_tempo = True + elif auto_adjust_tempo: + self.tempo = DEFAULT_TEMPO else: - self._user_specified_tempo = False score_tempo = get_tempo_from_score(self.score_part, self.score_file) self.tempo = score_tempo if score_tempo is not None else DEFAULT_TEMPO # setup feature processor if self.feature_type is None: - self.feature_type = "chroma" if input_type == "audio" else "pitch_ioi" + if input_type == "audio": + self.feature_type = ( + "cqt_spectral_flux" if method == "audio_outerhmm" else "chroma" + ) + else: + self.feature_type = "pitch_ioi" if self.feature_type == "chroma": self.processor = ChromagramProcessor( - sample_rate=sample_rate, + sample_rate=self.sample_rate, + hop_length=self.hop_length, ) elif self.feature_type == "mfcc": self.processor = MFCCProcessor( - sample_rate=sample_rate, + sample_rate=self.sample_rate, ) elif self.feature_type == "cqt": self.processor = CQTProcessor( - sample_rate=sample_rate, + sample_rate=self.sample_rate, ) elif self.feature_type == "mel": self.processor = MelSpectrogramProcessor( - sample_rate=sample_rate, + sample_rate=self.sample_rate, ) elif self.feature_type == "lse": self.processor = LogSpectralEnergyProcessor( - sample_rate=sample_rate, + sample_rate=self.sample_rate, ) elif self.feature_type == "pitch_ioi": self.processor = PitchIOIProcessor(piano_range=self.config["piano_range"]) - elif self.feature_type == "pitchclass": + elif self.feature_type == "pitchclass": self.processor = PitchClassPianoRollProcessor() elif self.feature_type == "pianoroll": self.processor = PianoRollProcessor(piano_range=self.config["piano_range"]) + elif self.feature_type == "cqt_spectral_flux": + self.processor = CQTSpectralFluxProcessor( + sample_rate=self.sample_rate, + hop_length=self.hop_length, + ) else: raise ValueError(f"Invalid feature type `{self.feature_type}`") - # validate performance file and input_type if self.performance_file is not None: - # check performance file type matches input type if self.input_type == "audio" and not is_audio_file(self.performance_file): raise ValueError( f"Invalid performance file. Expected audio file, but got {self.performance_file}" @@ -264,45 +280,38 @@ def __init__( raise ValueError( f"Invalid performance file. Expected MIDI file, but got {self.performance_file}" ) - - # validate method first - if method is None: - method = DEFAULT_METHODS[self.input_type] - elif method not in AVAILABLE_METHODS: - raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}") # setup distance function if distance_func is None: - distance_func = DEFAULT_DISTANCE_FUNCS[method] + distance_func = DEFAULT_DISTANCE_FUNCS[self.method] # setup stream device + if self.input_type == "audio": self.stream = AudioStream( processor=self.processor, device_name_or_index=self.device_name_or_index, file_path=self.performance_file, wait=wait, - target_sr=SAMPLE_RATE, + target_sr=self.sample_rate, + sample_rate=self.sample_rate, + hop_length=self.hop_length, ) - elif self.input_type == "midi" and method == "outerhmm": - self.stream = MidiStream( - processor=self.processor, - port=self.device_name_or_index, - file_path=self.performance_file, - polling_period=None, - ) - elif self.input_type == "midi" and method != "outerhmm": + elif self.input_type == "midi": self.stream = MidiStream( processor=self.processor, port=self.device_name_or_index, file_path=self.performance_file, + **({"polling_period": None} if method == "outerhmm" else {}), ) else: raise ValueError(f"Invalid input type {self.input_type}") - # preprocess score (setting reference features, tempo) - self.preprocess_score() + use_score_audio = self.input_type == "audio" and method in {"dixon", "arzt"} + self.reference_features = self.preprocess_score(use_score_audio) + + if distance_func is None: + distance_func = DEFAULT_DISTANCE_FUNCS[method] - # setup score follower if method == "arzt": state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps() self.score_follower = OnlineTimeWarpingArzt( @@ -332,22 +341,18 @@ def __init__( has_insertions=True, piano_range=self.config["piano_range"], ) - elif method == "hmm" and self.input_type == "audio": - # state_space = self._convert_frame_to_beat(np.arange(len(self.reference_features))) - self.score_follower = GaussianAudioPitchHMM( + elif method == "pthmm" and self.input_type == "audio": + self.score_follower = GaussianAudioPitchTempoHMM( reference_features=self.reference_features, queue=self.stream.queue, - # state_space=state_space, - # patience=50, ) - elif method == "pthmm" and self.input_type == "audio": - self.score_follower = GaussianAudioPitchTempoHMM( + elif method == "audio_outerhmm" and self.input_type == "audio": + self.score_follower = AudioOuterProductHMM( reference_features=self.reference_features, - # observation_model=obs_model, queue=self.stream.queue, - # pitch_precision=0.5, - # ioi_precision=2, - transition_scale=0.05, + tempo=self.tempo, + sample_rate=self.sample_rate, + hop_length=self.hop_length, ) elif method == "pthmm" and self.input_type == "midi": self.score_follower = PitchHMM( @@ -364,47 +369,39 @@ def __init__( else: raise ValueError("Invalid method") - def preprocess_score(self): - if self.input_type == "audio": - # Adjust tempo based on performance audio if requested - if self.adjust_tempo and self.performance_file is not None: - self.tempo = adjust_tempo_for_performance_file( - self.score_part, self.performance_file, self.tempo - ) - self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) # needed for time maps - self.ppart.sustain_pedal_threshold = 127 - # generate score audio + def preprocess_score(self, use_score_audio: bool = False): + """Preprocess score to extract reference features.""" + if self.auto_adjust_tempo and self.performance_file is not None: + self.tempo = adjust_tempo_for_performance_file( + self.score_part, self.performance_file, self.tempo + ) + + if use_score_audio: self.score_audio = generate_score_audio( - self.score_part, self.tempo, SAMPLE_RATE + self.score_part, self.tempo, self.sample_rate ).astype(np.float32) - reference_features = self.processor(self.score_audio) - self.reference_features = reference_features self.processor.reset() + return reference_features + elif self.method in {"arzt", "dixon"}: # only midi + self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) + self.ppart.sustain_pedal_threshold = 127 + polling_period = 0.01 + reference_features = ( + partitura.utils.music.compute_pianoroll( + note_info=self.ppart, + time_unit="sec", + time_div=int(np.round(1 / polling_period)), + binary=True, + piano_range=self.config["piano_range"], + ) + .toarray() + .T + ).astype(np.float32) + return reference_features else: - if self.method == "arzt": - if self.performance_file is not None: - # tempo is slightly adjusted to reflect the tempo of the performance midi - self.tempo = adjust_tempo_for_performance_file( - self.score_part, self.performance_file, self.tempo - ) - self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) - self.ppart.sustain_pedal_threshold = 127 - polling_period = 0.01 - self.reference_features = ( - partitura.utils.music.compute_pianoroll( - note_info=self.ppart, - time_unit="sec", - time_div=int(np.round(1 / polling_period)), - binary=True, - piano_range=self.config["piano_range"], - ) - .toarray() - .T - ).astype(np.float32) - else: - self.reference_features = self.score_part.note_array() - + return self.score_part.note_array() + def get_time_maps(self): alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]] return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment) @@ -428,26 +425,58 @@ def _convert_frame_to_beat(self, current_frame: int) -> float: ) return beat_position - def build_score_annotations(self, level="beat", musical_beat: bool = False): + def build_score_annotations( + self, + level="beat", + musical_beat: bool = False, + return_type: str = "beats", # "beat" or "seconds" + ): + """ + Build score annotations in beat or second unit. + + Parameters + ---------- + level : str + Level of annotations to use: beat or note (chord onset level) + musical_beat : bool + Whether to use musical beat + return_type : {"beat", "seconds"} + Type of annotations to return: beat or seconds (time unit) + + Returns + ------- + score_annots : np.ndarray + Array of score annotations in beat or second unit + """ score_annots = [] - if level == "beat": # TODO: add bar-level, note-level + if level == "beat": if musical_beat: self.score_part.use_musical_beat() # for asap dataset note_array = np.unique(self.score_part.note_array()["onset_beat"]) start_beat = np.ceil(note_array.min()) end_beat = np.floor(note_array.max()) - self.beats = np.arange(start_beat, end_beat + 1) + score_annots_in_beat = np.arange(start_beat, end_beat + 1) + elif level == "note": + snote_array = self.score_part.note_array() + score_annots_in_beat = np.unique(snote_array["onset_beat"]) + else: + raise ValueError(f"Invalid score annotation level: {level}") - beat_timestamp = [ + if return_type == "beats": + return score_annots_in_beat + elif return_type == "seconds": + score_annots_in_seconds = [ self.score_part.inv_beat_map(beat) / self.score_part.quarter_duration_map( self.score_part.inv_beat_map(beat) ) * (60 / self.tempo) - for beat in self.beats + for beat in score_annots_in_beat ] + return np.array(score_annots_in_seconds) + else: + raise ValueError(f"Invalid return type: {return_type}") - score_annots = np.array(beat_timestamp) return score_annots def convert_timestamps_to_beats(self, timestamps): @@ -498,13 +527,13 @@ def get_latency_stats(self): def run_evaluation( self, perf_annotations: Union[PathLike, np.ndarray], - level: str = "beat", + level: str = "note", tolerances: list = TOLERANCES_IN_MILLISECONDS, musical_beat: bool = False, # beat annots are difference in some dataset debug: bool = False, save_dir: PathLike = None, run_name: str = None, - in_seconds: bool = True, # 'True' for performance-based, 'False' for score-based + domain: str = "performance", # "score" or "performance" ) -> dict: """ Evaluate the score following process @@ -520,8 +549,9 @@ def run_evaluation( Tolerances to use for evaluation (in milliseconds) debug : bool Whether to save the score and performance audio with beat annotations - axis : str - Evaluation axis, either 'score' or 'performance' + domain : str + Evaluation domain, either "score" or "performance". + "score" domain evaluates in beat unit, "performance" domain evaluates in second unit. (Default: "performance") Returns ------- @@ -536,66 +566,83 @@ def run_evaluation( perf_annots = perf_annotations else: perf_annots = np.loadtxt(fname=perf_annotations, delimiter="\t", usecols=0) - score_annots = self.build_score_annotations(level, musical_beat) - original_perf_annots_length = len(perf_annots) + + return_type = "seconds" if domain == "performance" else "beats" + score_annots = self.build_score_annotations(level, musical_beat, return_type) + + original_perf_annots_counts = len(perf_annots) min_length = min(len(score_annots), len(perf_annots)) score_annots = score_annots[:min_length] perf_annots = perf_annots[:min_length] + mode = ( + "state" + if (self.input_type == "midi" or self.method == "audio_outerhmm") + else "frame" + ) perf_annots_predicted = transfer_from_score_to_predicted_perf( - self.score_follower.warping_path, score_annots, frame_rate=self.frame_rate + self.score_follower.warping_path, + score_annots, + frame_rate=self.frame_rate, + mode=mode, ) score_annots_predicted = transfer_from_perf_to_predicted_score( - self.score_follower.warping_path, perf_annots, frame_rate=self.frame_rate + self.score_follower.warping_path, + perf_annots, + frame_rate=self.frame_rate, + mode=mode, ) score_annots = score_annots[: len(score_annots_predicted)] - if original_perf_annots_length != len(perf_annots_predicted): + if original_perf_annots_counts != len(perf_annots_predicted): print( - f"Length of the annotation changed: {original_perf_annots_length} -> {len(perf_annots_predicted)}" + f"Length of the annotation changed: {original_perf_annots_counts} -> {len(perf_annots_predicted)}" ) - if self.input_type == "audio": - if debug: - save_debug_results( - self.score_file, - self.score_audio if self.input_type == "audio" else None, - score_annots, - score_annots_predicted, - self.performance_file, - perf_annots, - perf_annots_predicted, - self.score_follower, - self.frame_rate, - save_dir, - run_name, - ) - if in_seconds: + # Evaluation metrics + if domain == "performance": eval_results = get_evaluation_results( perf_annots, perf_annots_predicted, - total_length=original_perf_annots_length, + total_counts=original_perf_annots_counts, tolerances=tolerances, ) else: - score_annots = self.beats score_annots_predicted = self.convert_timestamps_to_beats( score_annots_predicted ) if tolerances == TOLERANCES_IN_MILLISECONDS: - tolerances = TOLERANCES_IN_BEATS # switch to beats + tolerances = TOLERANCES_IN_BEATS eval_results = get_evaluation_results( score_annots, score_annots_predicted, - total_length=original_perf_annots_length, + total_counts=original_perf_annots_counts, tolerances=tolerances, in_seconds=False, ) if self.input_type == "audio": latency_results = self.get_latency_stats() eval_results.update(latency_results) + + # Debug: save warping path TSV, results JSON, and plots + if debug and save_dir is not None: + save_debug_results( + warping_path=self.score_follower.warping_path, + score_annots=score_annots, + perf_annots=perf_annots, + perf_annots_predicted=perf_annots_predicted, + eval_results=eval_results, + frame_rate=self.frame_rate, + save_dir=save_dir, + run_name=run_name or "results", + state_space=getattr(self.score_follower, "state_space", None), + ref_features=getattr(self.score_follower, "reference_features", None), + input_features=getattr(self.score_follower, "input_features", None), + distance_func=getattr(self.score_follower, "distance_func", None), + ) + return eval_results def run(self, verbose: bool = True, wait: bool = True): @@ -613,12 +660,12 @@ def run(self, verbose: bool = True, wait: bool = True): Alignment results with warping path """ with self.stream: - for current_frame in self.score_follower.run(verbose=verbose): - if self.input_type == "audio": - position_in_beat = self._convert_frame_to_beat(current_frame) + for current_position in self.score_follower.run(verbose=verbose): + if self.input_type == "audio" and self.method != "audio_outerhmm": + position_in_beat = self._convert_frame_to_beat(current_position) yield position_in_beat else: - yield float(self.score_follower.state_space[current_frame]) + yield float(self.score_follower.state_space[current_position]) self._has_run = True return self.score_follower.warping_path diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py index a2d5f57..ce58269 100644 --- a/matchmaker/utils/misc.py +++ b/matchmaker/utils/misc.py @@ -6,7 +6,6 @@ import csv import numbers -import os import re import xml.etree.ElementTree as ET from pathlib import Path @@ -18,13 +17,10 @@ import numpy as np import partitura import scipy -import soundfile as sf from matplotlib import pyplot as plt from numpy.typing import NDArray from partitura.score import ScoreLike -from matchmaker.features.audio import SAMPLE_RATE - # Tempo marking to BPM mapping # Reference: https://en.wikipedia.org/wiki/Tempo#Basic_tempo_markings TEMPO_MARKING_TO_BPM = { @@ -325,6 +321,7 @@ def get_tempo_from_score( Tries multiple sources in order: 1. Partitura Tempo objects (explicit BPM) 2. MusicXML element (if score_file provided) + 3. Text tempo marking (e.g., "Allegro", "Andante") converted to approximate BPM Parameters ---------- @@ -363,113 +360,11 @@ def get_tempo_from_score( except Exception: pass - return None - - -def get_tempo_at_beat( - score_part: ScoreLike, - beat: float, - default_tempo: float = 120.0, -) -> float: - """ - Get tempo (BPM) at a specific beat position in the score. - - Uses score tempo markings if available. Falls back to default_tempo otherwise. - - Parameters - ---------- - score_part : ScoreLike - Partitura score part - beat : float - Beat position in the score - default_tempo : float - Default tempo to use if no tempo markings found - - Returns - ------- - float - Tempo in BPM at the given beat position - """ - if score_part is None: - return default_tempo - - # Collect all tempo markings with their positions - tempo_changes = [] - try: - for tempo_obj in score_part.iter_all(partitura.score.Tempo): - if hasattr(tempo_obj, "bpm") and tempo_obj.bpm is not None: - # Get beat position of tempo marking - start_time = getattr(tempo_obj, "start", None) - if start_time is not None: - tempo_beat = score_part.beat_map(start_time.t) - tempo_changes.append((tempo_beat, float(tempo_obj.bpm))) - except Exception: - pass - - if not tempo_changes: - return default_tempo - - # Sort by beat position - tempo_changes.sort(key=lambda x: x[0]) - - # Find the tempo at the given beat (last tempo marking before or at beat) - current_tempo = default_tempo - for tempo_beat, bpm in tempo_changes: - if tempo_beat <= beat: - current_tempo = bpm - else: - break - - return current_tempo - - -def get_tempo_from_score( - score_part: ScoreLike, - score_file: Optional[Union[str, Path]] = None, -) -> Optional[float]: - """ - Extract first tempo marking from score if available. - - Tries multiple sources in order: - 1. Partitura Tempo objects (explicit BPM) - 2. MusicXML element (if score_file provided) - - Parameters - ---------- - score_part : ScoreLike - Partitura score part - score_file : str or Path, optional - Path to the score file. Used as fallback to parse MusicXML directly - when partitura doesn't extract tempo. - - Returns - ------- - float or None - Tempo in BPM if found in score, None otherwise. - """ - # Try partitura Tempo objects first - if score_part is not None: - try: - for tempo_obj in score_part.iter_all(partitura.score.Tempo): - if hasattr(tempo_obj, "bpm") and tempo_obj.bpm is not None: - return float(tempo_obj.bpm) - except Exception: - pass - - # Fallback: parse MusicXML directly for + # Fallback: extract from text tempo marking (e.g., "Allegro", "Andante") if score_file is not None: - try: - import xml.etree.ElementTree as ET - - tree = ET.parse(str(score_file)) - root = tree.getroot() - - for sound_elem in root.iter("sound"): - tempo_attr = sound_elem.get("tempo") - if tempo_attr is not None: - return float(tempo_attr) - except Exception: - pass + text_tempo = extract_tempo_marking_from_musicxml(score_file) + if text_tempo is not None: + return text_tempo return None @@ -626,161 +521,181 @@ def save_nparray_to_csv(array: NDArray, save_path: str): writer.writerows(array) -def save_mixed_audio( - audio: Union[np.ndarray, str, os.PathLike], - annots: np.ndarray, - save_path: Union[str, os.PathLike], - sr: int = SAMPLE_RATE, +def plot_alignment( + warping_path: np.ndarray, + perf_annots: np.ndarray, + perf_annots_predicted: np.ndarray, + save_dir: Path, + name: str, + score_y: Optional[np.ndarray] = None, + frame_rate: float = 1.0, + state_space: Optional[np.ndarray] = None, + ref_features: Optional[np.ndarray] = None, + input_features: Optional[np.ndarray] = None, + distance_func=None, ): - if not isinstance(audio, np.ndarray): - audio, _ = librosa.load(audio, sr=sr) - - annots_audio = librosa.clicks( - times=annots, - sr=sr, - click_freq=1000, - length=len(audio), - ) - audio_mixed = audio + annots_audio - sf.write(str(save_path), audio_mixed, sr, subtype="PCM_24") - - -def plot_and_save_score_following_result( - wp, - ref_features, - input_features, - distance_func, - save_dir, - score_annots, - perf_annots, - frame_rate, - name=None, -): - xmin = 0 # performance range - xmax = None - ymin = 0 # score range - ymax = None - - xmax = xmax if xmax is not None else input_features.shape[0] - 1 - ymax = ymax if ymax is not None else ref_features.shape[0] - 1 - x_indices = range(xmin, xmax + 1) - y_indices = range(ymin, ymax + 1) - - run_name = name or "results" - save_path = save_dir / f"wp_{run_name}.tsv" - save_nparray_to_csv(wp.T, save_path.as_posix()) - - dist = scipy.spatial.distance.cdist( - ref_features[y_indices, :], - input_features[x_indices, :], - metric=distance_func, - ) # [d, wy] - plt.figure(figsize=(10, 10)) - plt.imshow( - dist, - aspect="auto", - origin="lower", - interpolation="nearest", - extent=(xmin, xmax, ymin, ymax), + """Plot warping path, GT annotations, and predicted points in one figure. + + Layers (back to front): distance matrix → warping path → predicted → GT. + """ + save_dir.mkdir(parents=True, exist_ok=True) + gt = np.asarray(perf_annots, dtype=float) + pred = np.asarray(perf_annots_predicted, dtype=float) + n = min(len(gt), len(pred)) + gt, pred = gt[:n], pred[:n] + + has_dist_matrix = ( + ref_features is not None + and input_features is not None + and distance_func is not None ) - mask_perf = (xmin <= perf_annots * frame_rate) & (perf_annots * frame_rate <= xmax) - mask_score = (ymin <= score_annots * frame_rate) & ( - score_annots * frame_rate <= ymax + + fig, ax = plt.subplots(figsize=(30, 30)) + + if has_dist_matrix: + # DTW mode: everything in frame space + dist = scipy.spatial.distance.cdist( + ref_features, + input_features, + metric=distance_func, + ) + ax.imshow( + dist, + aspect="auto", + origin="lower", + interpolation="nearest", + extent=(0, input_features.shape[0] - 1, 0, ref_features.shape[0] - 1), + ) + x_gt = gt * float(frame_rate) + x_pred = pred * float(frame_rate) + if score_y is not None: + y = np.asarray(score_y, dtype=float)[:n] * float(frame_rate) + else: + y = np.arange(n) + ylabel = "score (frames)" + wp_x = warping_path[1] + wp_y = warping_path[0] + else: + # HMM mode: x in frames, y in beats via state_space + x_gt = gt * float(frame_rate) + x_pred = pred * float(frame_rate) + if score_y is None: + y = np.arange(n) + ylabel = "annotation index" + else: + y = np.asarray(score_y, dtype=float)[:n] + ylabel = "score position (beats)" + wp_x = warping_path[1] + if state_space is not None: + wp_y = state_space[warping_path[0]] + else: + wp_y = warping_path[0] + + # 1. Warping path + if has_dist_matrix: + ax.plot( + wp_x, + wp_y, + ".", + color="white", + alpha=0.7, + markersize=15, + label="warping path", + zorder=2, + ) + else: + ax.plot( + wp_x, + wp_y, + ".", + color="lime", + alpha=0.5, + markersize=15, + label="warping path", + zorder=2, + ) + + # 2. Predicted points + ax.scatter( + x_pred, + y, + label="predicted", + s=80, + alpha=0.9, + marker="o", + color="blue", + linewidths=0, + zorder=3, ) - plt.title( - f"[{save_dir.name}/{run_name}] \n Matchmaker alignment path with ground-truth labels", - fontsize=15, + + # 3. GT annotations (front) + ax.scatter( + x_gt, + y, + label="ground truth", + s=120, + alpha=0.9, + marker="x", + color="red", + linewidths=3, + zorder=4, ) - plt.xlabel("Performance Features", fontsize=15) - plt.ylabel("Score Features", fontsize=15) - - # plot online DTW path - cropped_history = [ - (ref, target) - for (ref, target) in wp.T - if xmin <= target <= xmax and ymin <= ref <= ymax - ] - for ref, target in cropped_history: - plt.plot(target, ref, ".", color="cyan", alpha=0.5, markersize=3) - - # plot ground-truth labels - for ref, target in zip(score_annots, perf_annots): - if (xmin <= target * frame_rate <= xmax) and (ymin <= ref * frame_rate <= ymax): - plt.plot( - target * frame_rate, - ref * frame_rate, - "x", - color="r", - alpha=1, - markersize=3, - markeredgewidth=3, - ) - plt.savefig(save_dir / f"{run_name}.png") + + ax.set_xlabel("performance frame") + ax.set_ylabel(ylabel) + ax.set_title(f"[{save_dir.name}] alignment ({name})") + ax.grid(True, alpha=0.2) + ax.legend(loc="best") + fig.tight_layout() + fig.savefig(save_dir / f"{name}.png", dpi=150) + plt.close(fig) def save_debug_results( - score_file, - score_audio, - score_annots, - score_annots_predicted, - perf_file, - perf_annots, - perf_annots_predicted, - model, - frame_rate, - save_dir=None, - run_name=None, + warping_path: np.ndarray, + score_annots: np.ndarray, + perf_annots: np.ndarray, + perf_annots_predicted: np.ndarray, + eval_results: dict, + frame_rate: float, + save_dir: Path, + run_name: str = "results", + state_space: Optional[np.ndarray] = None, + ref_features: Optional[np.ndarray] = None, + input_features: Optional[np.ndarray] = None, + distance_func=None, ): - # save score audio with beat annotations - score_audio_dir = Path("./score_audio") - score_audio_dir.mkdir(parents=True, exist_ok=True) - run_name_suffix = ( - f"{Path(perf_file).stem}_{run_name}" if run_name else f"{Path(perf_file).stem}" - ) - save_mixed_audio( - score_audio, - score_annots, - save_path=score_audio_dir - / f"score_audio_{Path(score_file).parent.parent.name}_{Path(score_file).stem}_{run_name_suffix}.wav", - ) - # save performance audio with beat annotations - perf_audio_dir = Path("./performance_audio") - perf_audio_dir.mkdir(parents=True, exist_ok=True) - save_mixed_audio( - perf_file, + """Save debug outputs: warping path TSV, results JSON, and alignment plot.""" + save_dir = Path(save_dir) + save_dir.mkdir(parents=True, exist_ok=True) + + # 1. Warping path TSV + results JSON + save_nparray_to_csv(warping_path.T, (save_dir / f"wp_{run_name}.tsv").as_posix()) + import json + + with open(save_dir / f"{run_name}.json", "w") as f: + json.dump(eval_results, f, indent=4) + + # 2. Alignment plot + if state_space is not None: + score_y = state_space + else: + sx = np.asarray(score_annots, dtype=float) + score_y = ( + sx + if sx.ndim == 1 and len(sx) == len(perf_annots) and np.all(np.diff(sx) >= 0) + else None + ) + plot_alignment( + warping_path, perf_annots, - save_path=perf_audio_dir - / f"perf_audio_{Path(perf_file).parent.parent.name}_{Path(perf_file).parent.name}_{run_name_suffix}.wav", - ) - # save score audio with predicted beat annotations - score_predicted_audio_dir = Path("./score_audio_predicted") - score_predicted_audio_dir.mkdir(parents=True, exist_ok=True) - save_mixed_audio( - score_audio, - score_annots_predicted, - save_path=score_predicted_audio_dir - / f"score_audio_{Path(score_file).parent.parent.name}_{Path(score_file).parent.name}_{run_name_suffix}.wav", - ) - # save performance audio with predicted beat annotations - perf_predicted_audio_dir = Path("./performance_audio_predicted") - perf_predicted_audio_dir.mkdir(parents=True, exist_ok=True) - save_mixed_audio( - perf_file, perf_annots_predicted, - save_path=perf_predicted_audio_dir - / f"perf_audio_{Path(perf_file).parent.parent.name}_{Path(perf_file).parent.name}_{run_name_suffix}.wav", - ) - # save score following plot result - save_dir = save_dir or Path("./tests/results") - save_dir.mkdir(parents=True, exist_ok=True) - plot_and_save_score_following_result( - model.warping_path, - model.reference_features, - model.input_features, - model.distance_func, save_dir, - score_annots, - perf_annots, - frame_rate, - name=run_name, + run_name, + score_y=score_y, + frame_rate=frame_rate, + state_space=state_space, + ref_features=ref_features, + input_features=input_features, + distance_func=distance_func, ) From 49faa707b0470e63fe08fd7c2212b0a3e9b4f3ca Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Fri, 20 Feb 2026 16:22:26 +0100 Subject: [PATCH 6/8] add score performance for time mapping --- matchmaker/matchmaker.py | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index 36ac6be..0aad12e 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -306,7 +306,6 @@ def __init__( else: raise ValueError(f"Invalid input type {self.input_type}") - use_score_audio = self.input_type == "audio" and method in {"dixon", "arzt"} self.reference_features = self.preprocess_score(use_score_audio) if distance_func is None: @@ -369,36 +368,37 @@ def __init__( else: raise ValueError("Invalid method") - def preprocess_score(self, use_score_audio: bool = False): + def preprocess_score(self): """Preprocess score to extract reference features.""" if self.auto_adjust_tempo and self.performance_file is not None: self.tempo = adjust_tempo_for_performance_file( self.score_part, self.performance_file, self.tempo ) - if use_score_audio: - self.score_audio = generate_score_audio( - self.score_part, self.tempo, self.sample_rate - ).astype(np.float32) - reference_features = self.processor(self.score_audio) - self.processor.reset() - return reference_features - elif self.method in {"arzt", "dixon"}: # only midi + if self.method in {"arzt", "dixon"}: self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) self.ppart.sustain_pedal_threshold = 127 - polling_period = 0.01 - reference_features = ( - partitura.utils.music.compute_pianoroll( - note_info=self.ppart, - time_unit="sec", - time_div=int(np.round(1 / polling_period)), - binary=True, - piano_range=self.config["piano_range"], - ) - .toarray() - .T - ).astype(np.float32) - return reference_features + if self.input_type == "audio": + self.score_audio = generate_score_audio( + self.score_part, self.tempo, self.sample_rate + ).astype(np.float32) + reference_features = self.processor(self.score_audio) + self.processor.reset() + return reference_features + else: + polling_period = 0.01 + reference_features = ( + partitura.utils.music.compute_pianoroll( + note_info=self.ppart, + time_unit="sec", + time_div=int(np.round(1 / polling_period)), + binary=True, + piano_range=self.config["piano_range"], + ) + .toarray() + .T + ).astype(np.float32) + return reference_features else: return self.score_part.note_array() From bf61950a4758aa94a35e61ebd2794743921d9aa0 Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Fri, 20 Feb 2026 16:27:21 +0100 Subject: [PATCH 7/8] remove unused parameter --- matchmaker/matchmaker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py index 0aad12e..cb21790 100644 --- a/matchmaker/matchmaker.py +++ b/matchmaker/matchmaker.py @@ -306,7 +306,7 @@ def __init__( else: raise ValueError(f"Invalid input type {self.input_type}") - self.reference_features = self.preprocess_score(use_score_audio) + self.reference_features = self.preprocess_score() if distance_func is None: distance_func = DEFAULT_DISTANCE_FUNCS[method] From 13b6f1086ecb20c3c0c5045ad15e4bb6f7e879f4 Mon Sep 17 00:00:00 2001 From: darth-alexus Date: Fri, 20 Feb 2026 18:00:27 +0100 Subject: [PATCH 8/8] add note ids to the test score file --- .../assets/simple_mozart_k265_var1.musicxml | 148 +++++++++--------- 1 file changed, 74 insertions(+), 74 deletions(-) diff --git a/matchmaker/assets/simple_mozart_k265_var1.musicxml b/matchmaker/assets/simple_mozart_k265_var1.musicxml index 56bebfc..9130440 100644 --- a/matchmaker/assets/simple_mozart_k265_var1.musicxml +++ b/matchmaker/assets/simple_mozart_k265_var1.musicxml @@ -132,7 +132,7 @@ 4 - + D 5 @@ -145,7 +145,7 @@ begin begin - + C 5 @@ -158,7 +158,7 @@ continue continue - + B 4 @@ -171,7 +171,7 @@ continue continue - + C 5 @@ -184,7 +184,7 @@ end end - + B 4 @@ -197,7 +197,7 @@ begin begin - + C 5 @@ -210,7 +210,7 @@ continue continue - + B 4 @@ -223,7 +223,7 @@ continue continue - + C 5 @@ -239,7 +239,7 @@ 8 - + C 3 @@ -250,7 +250,7 @@ up 2 - + C 4 @@ -263,7 +263,7 @@ - + A 5 @@ -276,7 +276,7 @@ begin begin - + G 5 @@ -289,7 +289,7 @@ continue continue - + F 1 @@ -304,7 +304,7 @@ continue continue - + G 5 @@ -317,7 +317,7 @@ end end - + F 1 @@ -331,7 +331,7 @@ begin begin - + G 5 @@ -344,7 +344,7 @@ continue continue - + F 1 @@ -358,7 +358,7 @@ continue continue - + G 5 @@ -374,7 +374,7 @@ 8 - + E 4 @@ -385,7 +385,7 @@ down 2 - + C 4 @@ -398,7 +398,7 @@ - + G 1 @@ -413,7 +413,7 @@ begin begin - + A 5 @@ -426,7 +426,7 @@ continue continue - + C 6 @@ -439,7 +439,7 @@ continue continue - + B 5 @@ -452,7 +452,7 @@ end end - + D 6 @@ -465,7 +465,7 @@ begin begin - + C 6 @@ -478,7 +478,7 @@ continue continue - + B 5 @@ -491,7 +491,7 @@ continue continue - + A 5 @@ -507,7 +507,7 @@ 8 - + F 4 @@ -518,7 +518,7 @@ down 2 - + C 4 @@ -531,7 +531,7 @@ - + A 5 @@ -547,7 +547,7 @@ - + G 5 @@ -564,7 +564,7 @@ - + E 6 @@ -577,7 +577,7 @@ continue continue - + D 6 @@ -590,7 +590,7 @@ end end - + C 6 @@ -603,7 +603,7 @@ begin begin - + B 5 @@ -616,7 +616,7 @@ continue continue - + A 5 @@ -629,7 +629,7 @@ continue continue - + G 5 @@ -645,7 +645,7 @@ 8 - + C 4 @@ -656,7 +656,7 @@ down 2 - + E @@ -668,7 +668,7 @@ down 2 - + 3 5 @@ -676,7 +676,7 @@ 2 - + C 1 @@ -703,7 +703,7 @@ 65 - + G 5 @@ -719,7 +719,7 @@ - + F 5 @@ -735,7 +735,7 @@ - + D 6 @@ -748,7 +748,7 @@ continue continue - + C 6 @@ -761,7 +761,7 @@ end end - + B 5 @@ -774,7 +774,7 @@ begin begin - + A 5 @@ -787,7 +787,7 @@ continue continue - + G 5 @@ -800,7 +800,7 @@ continue continue - + F 5 @@ -816,7 +816,7 @@ 8 - + D 4 @@ -827,7 +827,7 @@ down 2 - + 3 5 @@ -835,7 +835,7 @@ 2 - + B 3 @@ -848,7 +848,7 @@ - + F 5 @@ -864,7 +864,7 @@ - + E 5 @@ -880,7 +880,7 @@ - + C 6 @@ -893,7 +893,7 @@ continue continue - + B 5 @@ -906,7 +906,7 @@ end end - + A 5 @@ -919,7 +919,7 @@ begin begin - + G 5 @@ -932,7 +932,7 @@ continue continue - + F 5 @@ -945,7 +945,7 @@ continue continue - + E 5 @@ -961,7 +961,7 @@ 8 - + C 4 @@ -973,7 +973,7 @@ down 2 - + 3 5 @@ -981,7 +981,7 @@ 2 - + A 3 @@ -994,7 +994,7 @@ - + D 5 @@ -1006,7 +1006,7 @@ 1 begin - + A 5 @@ -1021,7 +1021,7 @@ - + G 5 @@ -1036,7 +1036,7 @@ - + B 4 @@ -1051,7 +1051,7 @@ 8 - + F 3 @@ -1062,7 +1062,7 @@ down 2 - + G 3 @@ -1075,7 +1075,7 @@ - + C 5 @@ -1086,7 +1086,7 @@ down 1 - + 4 1 @@ -1096,7 +1096,7 @@ 8 - + C 4 @@ -1107,7 +1107,7 @@ down 2 - + C 3