From 2aa75e5bf88a2f1a32f05865d8b5ea9f9f13e6f1 Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Sat, 7 Feb 2026 12:22:51 +0100
Subject: [PATCH 1/8] create symbolic oltw-arzt tracker

---
 matchmaker/dp/oltw_arzt.py   |  14 ++--
 matchmaker/features/audio.py |   2 +
 matchmaker/features/midi.py  |   4 +
 matchmaker/matchmaker.py     | 155 ++++++++++++++++++++++++++---------
 matchmaker/utils/misc.py     |  15 ++--
 5 files changed, 138 insertions(+), 52 deletions(-)

diff --git a/matchmaker/dp/oltw_arzt.py b/matchmaker/dp/oltw_arzt.py
index f087017..e4fb8b9 100644
--- a/matchmaker/dp/oltw_arzt.py
+++ b/matchmaker/dp/oltw_arzt.py
@@ -13,7 +13,7 @@
 
 from matchmaker.base import OnlineAlignment
 from matchmaker.dp.dtw_loop import oltw_arzt_loop
-from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT, WINDOW_SIZE
+from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT, WINDOW_SIZE, START_WINDOW_SIZE, STEP_SIZE
 from matchmaker.utils import (
     CYTHONIZED_METRICS_W_ARGUMENTS,
     CYTHONIZED_METRICS_WO_ARGUMENTS,
@@ -27,10 +27,6 @@
     set_latency_stats,
 )
 
-STEP_SIZE: int = 5
-START_WINDOW_SIZE: Union[float, int] = 0.25
-
-
 class OnlineTimeWarpingArzt(OnlineAlignment):
     """
     Fast On-line Time Warping
@@ -97,6 +93,9 @@ def __init__(
         current_position: int = 0,
         frame_rate: int = FRAME_RATE,
         queue: Optional[RECVQueue] = None,
+        state_to_ref_time_map = None,
+        ref_to_state_time_map = None,
+        state_space = None,
         **kwargs,
     ) -> None:
         super().__init__(reference_features=reference_features)
@@ -172,6 +171,9 @@ def __init__(
             "max_latency": 0,
             "min_latency": float("inf"),
         }
+        self.state_to_ref_time_map = state_to_ref_time_map
+        self.ref_to_state_time_map = ref_to_state_time_map
+        self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"])
 
     @property
     def warping_path(self) -> NDArray[np.int32]:
@@ -288,7 +290,7 @@ def step(self, input_features: NDArray[np.float32]) -> None:
         if self.input_index == 0:
             # enforce the first time step to stay at the
             # initial position
-            self.current_position = min(
+            self.current_position = min( # TODO: Is this necessary?
                 max(self.current_position, min_index),
                 self.current_position,
             )
diff --git a/matchmaker/features/audio.py b/matchmaker/features/audio.py
index 25b1ff7..6ded530 100644
--- a/matchmaker/features/audio.py
+++ b/matchmaker/features/audio.py
@@ -22,6 +22,8 @@
 FEATURES = "chroma"
 QUEUE_TIMEOUT = 10
 WINDOW_SIZE = 5
+STEP_SIZE = 5
+START_WINDOW_SIZE = 0.25
 
 # Type hint for Input Audio frame.
 InputAudioSeries = np.ndarray
diff --git a/matchmaker/features/midi.py b/matchmaker/features/midi.py
index bd2d946..095e31f 100644
--- a/matchmaker/features/midi.py
+++ b/matchmaker/features/midi.py
@@ -20,6 +20,10 @@
 )
 from matchmaker.utils.typing import InputMIDIFrame, NDArrayFloat
 
+WINDOW_SIZE_MIDI = 200
+START_WINDOW_SIZE_MIDI = 200
+STEP_SIZE_MIDI = 5
+
 
 class PitchProcessor(Processor):
     """
diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index ecc0aef..b69ec79 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -7,18 +7,27 @@
 import partitura
 from partitura.io.exportmidi import get_ppq
 from partitura.score import Part
+from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment
 
 from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon
 from matchmaker.features.audio import (
     FRAME_RATE,
     SAMPLE_RATE,
+    WINDOW_SIZE,
+    STEP_SIZE,
     ChromagramProcessor,
     CQTProcessor,
     LogSpectralEnergyProcessor,
     MelSpectrogramProcessor,
     MFCCProcessor,
 )
-from matchmaker.features.midi import PianoRollProcessor, PitchIOIProcessor
+from matchmaker.features.midi import (
+    WINDOW_SIZE_MIDI,
+    START_WINDOW_SIZE_MIDI,
+    STEP_SIZE_MIDI,
+    PianoRollProcessor, 
+    PitchIOIProcessor,
+)
 from matchmaker.io.audio import AudioStream
 from matchmaker.io.midi import MidiStream
 from matchmaker.prob.hmm import (
@@ -35,7 +44,7 @@
     transfer_from_score_to_predicted_perf,
 )
 from matchmaker.utils.misc import (
-    adjust_tempo_for_performance_audio,
+    adjust_tempo_for_performance_file,
     generate_score_audio,
     is_audio_file,
     is_midi_file,
@@ -48,14 +57,16 @@
     "arzt": OnlineTimeWarpingArzt.DEFAULT_DISTANCE_FUNC,
     "dixon": OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC,
     "hmm": None,
+    "outerhmm": None,
+    "pthmm": None,
 }
 
 DEFAULT_METHODS = {
     "audio": "arzt",
-    "midi": "hmm",
+    "midi": "outerhmm",
 }
 
-AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm"]
+AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"]
 
 
 class Matchmaker(object):
@@ -101,7 +112,7 @@ def __init__(
         )
         self.input_type = input_type
         self.feature_type = feature_type
-        self.frame_rate = frame_rate
+        self.frame_rate = frame_rate if input_type == "audio" else 1
         self.score_part: Optional[Part] = None
         self.distance_func = distance_func
         self.device_name_or_index = device_name_or_index
@@ -118,14 +129,23 @@ def __init__(
             raise ValueError("Score file is required")
 
         try:
-            self.score_part = partitura.load_score_as_part(self.score_file)
-
+            self.score_part = partitura.load_score_as_part(self.score_file) 
+            # if score_file is an xml file, load_score_as_part() uses load_score() -> load_musicxml() which imports invisible objects (e.g. trills) by default
+            # load_score_part() doesn't support an 'ignore_invisible_objects' parameter yet, thus we have to bypass this issue in the following way:
+            # TODO: find a better solution: 
+            unfold = True
+            if self.score_file.endswith('musicxml'):
+                self.score_part = partitura.load_musicxml(self.score_file, ignore_invisible_objects=True)
+                if unfold:
+                    self.score_part = partitura.score.unfold_part_maximal(self.score_part).parts[0]
+                else:
+                    self.score_part = self.score_part.parts[0]
         except Exception as e:
             raise ValueError(f"Invalid score file: {e}")
 
         # setup feature processor
         if self.feature_type is None:
-            self.feature_type = "chroma" if input_type == "audio" else "pitchclass"
+            self.feature_type = "chroma" if input_type == "audio" else "pitch_ioi"
 
         if self.feature_type == "chroma":
             self.processor = ChromagramProcessor(
@@ -147,8 +167,10 @@ def __init__(
             self.processor = LogSpectralEnergyProcessor(
                 sample_rate=sample_rate,
             )
-        elif self.feature_type == "pitchclass":
+        elif self.feature_type == "pitch_ioi":
             self.processor = PitchIOIProcessor(piano_range=True)
+        elif self.feature_type == "pitchclass":    
+            self.processor = PitchClassPianoRollProcessor()
         elif self.feature_type == "pianoroll":
             self.processor = PianoRollProcessor(piano_range=True)
         else:
@@ -165,7 +187,17 @@ def __init__(
                 raise ValueError(
                     f"Invalid performance file. Expected MIDI file, but got {self.performance_file}"
                 )
+            
+        # validate method first
+        if method is None:
+            method = DEFAULT_METHODS[self.input_type]
+        elif method not in AVAILABLE_METHODS:
+            raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}")
 
+        # setup distance function
+        if distance_func is None:
+            distance_func = DEFAULT_DISTANCE_FUNCS[method]
+        
         # setup stream device
         if self.input_type == "audio":
             self.stream = AudioStream(
@@ -175,7 +207,14 @@ def __init__(
                 wait=wait,
                 target_sr=SAMPLE_RATE,
             )
-        elif self.input_type == "midi":
+        elif self.input_type == "midi" and method == "outerhmm":
+            self.stream = MidiStream(
+                processor=self.processor,
+                port=self.device_name_or_index,
+                file_path=self.performance_file,
+                polling_period=None,
+            )
+        elif self.input_type == "midi" and method != "outerhmm":
             self.stream = MidiStream(
                 processor=self.processor,
                 port=self.device_name_or_index,
@@ -187,23 +226,21 @@ def __init__(
         # preprocess score (setting reference features, tempo)
         self.preprocess_score()
 
-        # validate method first
-        if method is None:
-            method = DEFAULT_METHODS[self.input_type]
-        elif method not in AVAILABLE_METHODS:
-            raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}")
-
-        # setup distance function
-        if distance_func is None:
-            distance_func = DEFAULT_DISTANCE_FUNCS[method]
-
         # setup score follower
         if method == "arzt":
+            alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]]
+            state_to_ref_time_map, ref_to_state_time_map = get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment)
             self.score_follower = OnlineTimeWarpingArzt(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
                 distance_func=distance_func,
                 frame_rate=self.frame_rate,
+                window_size=WINDOW_SIZE if self.input_type == "audio" else WINDOW_SIZE_MIDI,
+                start_window_size=START_WINDOW_SIZE if self.input_type == "audio" else START_WINDOW_SIZE_MIDI,
+                state_to_ref_time_map=state_to_ref_time_map,
+                ref_to_state_time_map=ref_to_state_time_map,
+                step_size=STEP_SIZE if self.input_type == "audio" else STEP_SIZE_MIDI,
+                state_space=np.unique(self.score_part.note_array()["onset_beat"])
             )
         elif method == "dixon":
             self.score_follower = OnlineTimeWarpingDixon(
@@ -235,12 +272,29 @@ def __init__(
                 # ioi_precision=2,
                 transition_scale=0.05,
             )
+        elif method == "pthmm" and self.input_type == "midi":
+            self.score_follower = PitchHMM(
+                reference_features=self.reference_features,
+                # observation_model=obs_model,
+                queue=self.stream.queue,
+                tempo_model=tempo_model,
+                has_insertions=True,
+                piano_range=piano_range,
+            )
+        elif method == "outerhmm" and self.input_type == "midi":
+            self.score_follower = OuterProductHMM(
+                reference_features=self.reference_features,
+                queue=self.stream.queue,
+                piano_range=piano_range,
+            )
+        else:
+            raise ValueError("Invalid method")
 
     def preprocess_score(self):
         if self.input_type == "audio":
             if self.performance_file is not None:
                 # tempo is slightly adjusted to reflect the tempo of the performance audio
-                self.tempo = adjust_tempo_for_performance_audio(
+                self.tempo = adjust_tempo_for_performance_file(
                     self.score_part, self.performance_file, self.tempo
                 )
 
@@ -252,7 +306,28 @@ def preprocess_score(self):
             reference_features = self.processor(self.score_audio)
             self.reference_features = reference_features
         else:
-            self.reference_features = self.score_part.note_array()
+            if self.method == "arzt":
+                if self.performance_file is not None:
+                    # tempo is slightly adjusted to reflect the tempo of the performance midi
+                    self.tempo = adjust_tempo_for_performance_file(
+                        self.score_part, self.performance_file, self.tempo
+                    )
+                self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo)
+                self.ppart.sustain_pedal_threshold = 127
+                polling_period = 0.01
+                self.reference_features = (
+                    partitura.utils.music.compute_pianoroll(
+                        note_info=self.ppart,
+                        time_unit="sec",
+                        time_div=int(np.round(1 / polling_period)),
+                        binary=True,
+                        piano_range=True,
+                    )
+                    .toarray()
+                    .T
+                ).astype(np.float32)
+            else:
+                self.reference_features = self.score_part.note_array()
 
     def _convert_frame_to_beat(self, current_frame: int) -> float:
         """
@@ -398,21 +473,21 @@ def run_evaluation(
                 f"Length of the annotation changed: {original_perf_annots_length} -> {len(perf_annots_predicted)}"
             )
 
-        if debug:
-            save_debug_results(
-                self.score_file,
-                self.score_audio,
-                score_annots,
-                score_annots_predicted,
-                self.performance_file,
-                perf_annots,
-                perf_annots_predicted,
-                self.score_follower,
-                self.frame_rate,
-                save_dir,
-                run_name,
-            )
-
+        if self.input_type == 'audio':
+            if debug:
+                save_debug_results(
+                    self.score_file,
+                    self.score_audio if self.input_type=="audio" else None,
+                    score_annots,
+                    score_annots_predicted,
+                    self.performance_file,
+                    perf_annots,
+                    perf_annots_predicted,
+                    self.score_follower,
+                    self.frame_rate,
+                    save_dir,
+                    run_name,
+                )
         if in_seconds:
             eval_results = get_evaluation_results(
                 perf_annots,
@@ -434,9 +509,9 @@ def run_evaluation(
                 tolerances=tolerances,
                 in_seconds=False,
             )
-
-        latency_results = self.get_latency_stats()
-        eval_results.update(latency_results)
+        if self.input_type == 'audio':
+            latency_results = self.get_latency_stats()
+            eval_results.update(latency_results)
         return eval_results
 
     def run(self, verbose: bool = True, wait: bool = True):
diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py
index 2265ead..b9ffa7c 100644
--- a/matchmaker/utils/misc.py
+++ b/matchmaker/utils/misc.py
@@ -191,25 +191,28 @@ def interleave_with_constant(
     return interleaved_array
 
 
-def adjust_tempo_for_performance_audio(
-    score: ScoreLike, performance_audio: Path, default_tempo: int = 120
+def adjust_tempo_for_performance_file(
+    score: ScoreLike, performance_file: Path, default_tempo: int = 120
 ):
     """
-    Adjust the tempo of the score part to match the performance audio.
+    Adjust the tempo of the score part to match the performance file.
     We round up the tempo to the nearest 20 bpm to avoid too much optimization.
 
     Parameters
     ----------
     score : partitura.score.ScoreLike
         The score to adjust the tempo of.
-    performance_audio : Path
-        The performance audio file to adjust the tempo to.
+    performance_file : Path
+        The performance file to adjust the tempo to.
     default_tempo : int
         The default tempo of the score.
     """
     score_midi = partitura.save_score_midi(score, out=None)
     source_length = score_midi.length
-    target_length = librosa.get_duration(path=str(performance_audio))
+    if is_midi_file(performance_file):
+        target_length = mido.MidiFile(performance_file).length
+    else:
+        target_length = librosa.get_duration(path=str(performance_file))
     ratio = target_length / source_length
     rounded_tempo = int(
         (default_tempo / ratio + 19) // 20 * 20

From eee7925d26ef96ce9989ee9090c7f3ae87cac3d2 Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Thu, 12 Feb 2026 18:08:50 +0100
Subject: [PATCH 2/8] incorporate kwargs parameter and restructure

---
 matchmaker/matchmaker.py | 73 +++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index b69ec79..a983535 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -22,9 +22,6 @@
     MFCCProcessor,
 )
 from matchmaker.features.midi import (
-    WINDOW_SIZE_MIDI,
-    START_WINDOW_SIZE_MIDI,
-    STEP_SIZE_MIDI,
     PianoRollProcessor, 
     PitchIOIProcessor,
 )
@@ -36,6 +33,8 @@
     GaussianAudioPitchTempoHMM,
     PitchIOIHMM,
 )
+from matchmaker.utils.tempo_models import KalmanTempoModel
+
 from matchmaker.utils.eval import (
     TOLERANCES_IN_BEATS,
     TOLERANCES_IN_MILLISECONDS,
@@ -68,6 +67,45 @@
 
 AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"]
 
+KWARGS = {
+    "audio":
+        {"arzt":
+            {"window_size": 5,
+             "start_window_size": 0.25,
+             "step_size" : 5,
+             },
+        "dixon":
+            {"window_size": 10,
+             },
+        },
+    "midi": 
+        {"arzt": 
+            {"processor": "pianoroll",
+             "piano_range": True,
+             "window_size": 200,
+             "start_window_size": 200,
+             "step_size": 5,
+             },
+        "dixon":
+            {"processor": "pianoroll",
+             "piano_range": True,
+             "window_size": 30,
+             },
+        "hmm": 
+            {"processor": "pitch_ioi",
+             "tempo_model": KalmanTempoModel,
+             "piano_range": True,
+             },
+        "pthmm":
+            {"processor": "pitch_ioi",
+             "piano_range": True,
+             },
+        "outerhmm":
+            {"processor": "pitch_ioi",
+             "piano_range": True,
+             },
+        },
+}
 
 class Matchmaker(object):
     """
@@ -105,6 +143,8 @@ def __init__(
         device_name_or_index: Union[str, int] = None,
         sample_rate: int = SAMPLE_RATE,
         frame_rate: int = FRAME_RATE,
+        kwargs = KWARGS,
+        unfold_score = True,
     ):
         self.score_file = str(score_file)
         self.performance_file = (
@@ -123,21 +163,18 @@ def __init__(
         self.tempo = DEFAULT_TEMPO  # bpm for quarter note
         self._has_run = False
         self.method = method
+        self.config = kwargs[input_type][method]
 
         # setup score file
         if score_file is None:
             raise ValueError("Score file is required")
 
         try:
-            self.score_part = partitura.load_score_as_part(self.score_file) 
-            # if score_file is an xml file, load_score_as_part() uses load_score() -> load_musicxml() which imports invisible objects (e.g. trills) by default
-            # load_score_part() doesn't support an 'ignore_invisible_objects' parameter yet, thus we have to bypass this issue in the following way:
             # TODO: find a better solution: 
-            unfold = True
             if self.score_file.endswith('musicxml'):
-                self.score_part = partitura.load_musicxml(self.score_file, ignore_invisible_objects=True)
-                if unfold:
-                    self.score_part = partitura.score.unfold_part_maximal(self.score_part).parts[0]
+                self.score_part = partitura.load_musicxml(self.score_file, force_note_ids=True, ignore_invisible_objects=True)
+                if unfold_score:
+                    self.score_part = partitura.score.unfold_part_maximal(self.score_part, ignore_leaps = False).parts[0]
                 else:
                     self.score_part = self.score_part.parts[0]
         except Exception as e:
@@ -228,18 +265,17 @@ def __init__(
 
         # setup score follower
         if method == "arzt":
-            alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]]
-            state_to_ref_time_map, ref_to_state_time_map = get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment)
+            state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
             self.score_follower = OnlineTimeWarpingArzt(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
                 distance_func=distance_func,
                 frame_rate=self.frame_rate,
-                window_size=WINDOW_SIZE if self.input_type == "audio" else WINDOW_SIZE_MIDI,
-                start_window_size=START_WINDOW_SIZE if self.input_type == "audio" else START_WINDOW_SIZE_MIDI,
+                window_size=self.config["window_size"],
+                start_window_size=self.config["start_window_size"],
                 state_to_ref_time_map=state_to_ref_time_map,
                 ref_to_state_time_map=ref_to_state_time_map,
-                step_size=STEP_SIZE if self.input_type == "audio" else STEP_SIZE_MIDI,
+                step_size=self.config["step_size"],
                 state_space=np.unique(self.score_part.note_array()["onset_beat"])
             )
         elif method == "dixon":
@@ -297,7 +333,8 @@ def preprocess_score(self):
                 self.tempo = adjust_tempo_for_performance_file(
                     self.score_part, self.performance_file, self.tempo
                 )
-
+            self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) # needed for time maps
+            self.ppart.sustain_pedal_threshold = 127
             # generate score audio
             self.score_audio = generate_score_audio(
                 self.score_part, self.tempo, SAMPLE_RATE
@@ -329,6 +366,10 @@ def preprocess_score(self):
             else:
                 self.reference_features = self.score_part.note_array()
 
+    def get_time_maps(self):
+        alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]]
+        return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment)
+
     def _convert_frame_to_beat(self, current_frame: int) -> float:
         """
         Convert frame number to relative beat position in the score.

From c4182a1a4e8aa0a983279b3619a251c6ce13ca5f Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Thu, 19 Feb 2026 10:23:36 +0100
Subject: [PATCH 3/8] add missing oltw arzt kwargs

---
 matchmaker/matchmaker.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index de5f79d..35c6c03 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -79,12 +79,18 @@
         "dixon": {
             "window_size": 10,
         },
-        "arzt": {},
+        "arzt": {
+            "window_size": 5,
+            "start_window_size": 0.25,
+            "step_size" : 5,},
     },
     "midi": {
         "arzt": {
             "processor": "pianoroll",
             "piano_range": True,
+            "window_size": 200,
+            "start_window_size": 200,
+            "step_size": 5,
         },
         "dixon": {
             "processor": "pianoroll",

From d75bc2f3173ebb1debd54a83aad76ee91f3cf440 Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Thu, 19 Feb 2026 11:01:05 +0100
Subject: [PATCH 4/8] remove redundant default midi parameters

---
 matchmaker/features/midi.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/matchmaker/features/midi.py b/matchmaker/features/midi.py
index 095e31f..bd2d946 100644
--- a/matchmaker/features/midi.py
+++ b/matchmaker/features/midi.py
@@ -20,10 +20,6 @@
 )
 from matchmaker.utils.typing import InputMIDIFrame, NDArrayFloat
 
-WINDOW_SIZE_MIDI = 200
-START_WINDOW_SIZE_MIDI = 200
-STEP_SIZE_MIDI = 5
-
 
 class PitchProcessor(Processor):
     """

From 53e18635c66677d3dbc550be4d225fd32e9045e1 Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Fri, 20 Feb 2026 16:04:24 +0100
Subject: [PATCH 5/8] fix merge conflicts

---
 matchmaker/matchmaker.py | 339 ++++++++++++++++++-------------
 matchmaker/utils/misc.py | 427 ++++++++++++++++-----------------------
 2 files changed, 364 insertions(+), 402 deletions(-)

diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index 35c6c03..36ac6be 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -1,12 +1,12 @@
 import os
 import sys
+from pathlib import Path
 from typing import Optional, Union
 
 import numpy as np
-
 import partitura
 from partitura.io.exportmidi import get_ppq
-from partitura.score import Part
+from partitura.score import Part, merge_parts
 from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment
 
 from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon
@@ -15,6 +15,7 @@
     SAMPLE_RATE,
     ChromagramProcessor,
     CQTProcessor,
+    CQTSpectralFluxProcessor,
     LogSpectralEnergyProcessor,
     MelSpectrogramProcessor,
     MFCCProcessor,
@@ -31,11 +32,9 @@
     GaussianAudioPitchTempoHMM,
     PitchHMM,
     PitchIOIHMM,
-    PitchHMM,
 )
 from matchmaker.prob.outer_product_hmm import OuterProductHMM
-from matchmaker.utils.tempo_models import KalmanTempoModel
-
+from matchmaker.prob.outer_product_hmm_audio import AudioOuterProductHMM
 from matchmaker.utils.eval import (
     TOLERANCES_IN_BEATS,
     TOLERANCES_IN_MILLISECONDS,
@@ -52,18 +51,19 @@
     save_debug_results,
 )
 from matchmaker.utils.tempo_models import KalmanTempoModel
-from partitura.io.exportmidi import get_ppq
-from partitura.score import Part
 
 sys.setrecursionlimit(10_000)
 
 PathLike = Union[str, bytes, os.PathLike]
 DEFAULT_TEMPO = 120
+
+
 DEFAULT_DISTANCE_FUNCS = {
     "arzt": OnlineTimeWarpingArzt.DEFAULT_DISTANCE_FUNC,
     "dixon": OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC,
     "hmm": None,
     "outerhmm": None,
+    "audio_outerhmm": None,
     "pthmm": None,
 }
 
@@ -72,8 +72,7 @@
     "midi": "outerhmm",
 }
 
-AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"]
-
+AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm", "audio_outerhmm"]
 KWARGS = {
     "audio": {
         "dixon": {
@@ -83,6 +82,10 @@
             "window_size": 5,
             "start_window_size": 0.25,
             "step_size" : 5,},
+        "audio_outerhmm": {
+            "sample_rate": 16000,
+            "frame_rate": 50,
+        },
     },
     "midi": {
         "arzt": {
@@ -157,9 +160,9 @@ def __init__(
         sample_rate: int = SAMPLE_RATE,
         frame_rate: int = FRAME_RATE,
         tempo: Optional[float] = None,
-        adjust_tempo: bool = False,
-        kwargs = KWARGS,
-        unfold_score = True,
+        kwargs=KWARGS,
+        unfold_score=True,
+        auto_adjust_tempo: bool = False,
     ):
         self.score_file = str(score_file)
         self.performance_file = (
@@ -171,6 +174,8 @@ def __init__(
         self.input_type = input_type
         self.feature_type = feature_type
         self.frame_rate = frame_rate if input_type == "audio" else 1
+        self.sample_rate = sample_rate
+        self.hop_length = sample_rate // self.frame_rate
         self.score_part: Optional[Part] = None
         self.distance_func = distance_func
         self.device_name_or_index = device_name_or_index
@@ -187,75 +192,86 @@ def __init__(
             raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}")
 
         self.method = method
-        self.config = kwargs[input_type][method]
-        self.adjust_tempo = adjust_tempo
-        self.config = kwargs[input_type][method]
+        self.config = kwargs[self.input_type][self.method]
+        self.auto_adjust_tempo = auto_adjust_tempo
 
-        # setup score file
-        if score_file is None:
-            raise ValueError("Score file is required")
+        # Apply method-specific defaults from config (only if not explicitly provided by caller)
+        if sample_rate == SAMPLE_RATE and "sample_rate" in self.config:
+            self.sample_rate = self.config["sample_rate"]
+        if frame_rate == FRAME_RATE and "frame_rate" in self.config:
+            self.frame_rate = self.config["frame_rate"]
+        self.hop_length = self.sample_rate // self.frame_rate
 
+        # setup score file
         try:
-            # TODO: find a better solution:
-            if self.score_file.endswith("musicxml"):
-                self.score_part = partitura.load_musicxml(
+            ext = Path(self.score_file).suffix.lower()
+            if ext in (".musicxml", ".xml", ".mxl"):
+                score = partitura.load_musicxml(
                     self.score_file, ignore_invisible_objects=True
                 )
-                if unfold_score:
-                    self.score_part = partitura.score.unfold_part_maximal(
-                        self.score_part, ignore_leaps=False
-                    ).parts[0]
-                else:
-                    self.score_part = self.score_part.parts[0]
+            else:
+                score = partitura.load_score(self.score_file)
+
+            if unfold_score:
+                score = partitura.score.unfold_part_maximal(score, ignore_leaps=False)
+            self.score_part = merge_parts(score.parts)
         except Exception as e:
             raise ValueError(f"Invalid score file: {e}")
 
-        # Set tempo: user-provided > score marking > default (120 BPM)
-        # _user_specified_tempo: if True, use uniform tempo; if False, use score tempo map
+        # Set tempo: user-provided > adjust_tempo (always 120) > score marking > default (120 BPM)
         if tempo is not None:
             self.tempo = float(tempo)
-            self._user_specified_tempo = True
+        elif auto_adjust_tempo:
+            self.tempo = DEFAULT_TEMPO
         else:
-            self._user_specified_tempo = False
             score_tempo = get_tempo_from_score(self.score_part, self.score_file)
             self.tempo = score_tempo if score_tempo is not None else DEFAULT_TEMPO
 
         # setup feature processor
         if self.feature_type is None:
-            self.feature_type = "chroma" if input_type == "audio" else "pitch_ioi"
+            if input_type == "audio":
+                self.feature_type = (
+                    "cqt_spectral_flux" if method == "audio_outerhmm" else "chroma"
+                )
+            else:
+                self.feature_type = "pitch_ioi"
 
         if self.feature_type == "chroma":
             self.processor = ChromagramProcessor(
-                sample_rate=sample_rate,
+                sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
         elif self.feature_type == "mfcc":
             self.processor = MFCCProcessor(
-                sample_rate=sample_rate,
+                sample_rate=self.sample_rate,
             )
         elif self.feature_type == "cqt":
             self.processor = CQTProcessor(
-                sample_rate=sample_rate,
+                sample_rate=self.sample_rate,
             )
         elif self.feature_type == "mel":
             self.processor = MelSpectrogramProcessor(
-                sample_rate=sample_rate,
+                sample_rate=self.sample_rate,
             )
         elif self.feature_type == "lse":
             self.processor = LogSpectralEnergyProcessor(
-                sample_rate=sample_rate,
+                sample_rate=self.sample_rate,
             )
         elif self.feature_type == "pitch_ioi":
             self.processor = PitchIOIProcessor(piano_range=self.config["piano_range"])
-        elif self.feature_type == "pitchclass":    
+        elif self.feature_type == "pitchclass":
             self.processor = PitchClassPianoRollProcessor()
         elif self.feature_type == "pianoroll":
             self.processor = PianoRollProcessor(piano_range=self.config["piano_range"])
+        elif self.feature_type == "cqt_spectral_flux":
+            self.processor = CQTSpectralFluxProcessor(
+                sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
+            )
         else:
             raise ValueError(f"Invalid feature type `{self.feature_type}`")
 
-        # validate performance file and input_type
         if self.performance_file is not None:
-            # check performance file type matches input type
             if self.input_type == "audio" and not is_audio_file(self.performance_file):
                 raise ValueError(
                     f"Invalid performance file. Expected audio file, but got {self.performance_file}"
@@ -264,45 +280,38 @@ def __init__(
                 raise ValueError(
                     f"Invalid performance file. Expected MIDI file, but got {self.performance_file}"
                 )
-            
-        # validate method first
-        if method is None:
-            method = DEFAULT_METHODS[self.input_type]
-        elif method not in AVAILABLE_METHODS:
-            raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}")
 
         # setup distance function
         if distance_func is None:
-            distance_func = DEFAULT_DISTANCE_FUNCS[method]
+            distance_func = DEFAULT_DISTANCE_FUNCS[self.method]
         # setup stream device
+
         if self.input_type == "audio":
             self.stream = AudioStream(
                 processor=self.processor,
                 device_name_or_index=self.device_name_or_index,
                 file_path=self.performance_file,
                 wait=wait,
-                target_sr=SAMPLE_RATE,
+                target_sr=self.sample_rate,
+                sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
-        elif self.input_type == "midi" and method == "outerhmm":
-            self.stream = MidiStream(
-                processor=self.processor,
-                port=self.device_name_or_index,
-                file_path=self.performance_file,
-                polling_period=None,
-            )
-        elif self.input_type == "midi" and method != "outerhmm":
+        elif self.input_type == "midi":
             self.stream = MidiStream(
                 processor=self.processor,
                 port=self.device_name_or_index,
                 file_path=self.performance_file,
+                **({"polling_period": None} if method == "outerhmm" else {}),
             )
         else:
             raise ValueError(f"Invalid input type {self.input_type}")
 
-        # preprocess score (setting reference features, tempo)
-        self.preprocess_score()
+        use_score_audio = self.input_type == "audio" and method in {"dixon", "arzt"}
+        self.reference_features = self.preprocess_score(use_score_audio)
+
+        if distance_func is None:
+            distance_func = DEFAULT_DISTANCE_FUNCS[method]
 
-        # setup score follower
         if method == "arzt":
             state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
             self.score_follower = OnlineTimeWarpingArzt(
@@ -332,22 +341,18 @@ def __init__(
                 has_insertions=True,
                 piano_range=self.config["piano_range"],
             )
-        elif method == "hmm" and self.input_type == "audio":
-            # state_space = self._convert_frame_to_beat(np.arange(len(self.reference_features)))
-            self.score_follower = GaussianAudioPitchHMM(
+        elif method == "pthmm" and self.input_type == "audio":
+            self.score_follower = GaussianAudioPitchTempoHMM(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
-                # state_space=state_space,
-                # patience=50,
             )
-        elif method == "pthmm" and self.input_type == "audio":
-            self.score_follower = GaussianAudioPitchTempoHMM(
+        elif method == "audio_outerhmm" and self.input_type == "audio":
+            self.score_follower = AudioOuterProductHMM(
                 reference_features=self.reference_features,
-                # observation_model=obs_model,
                 queue=self.stream.queue,
-                # pitch_precision=0.5,
-                # ioi_precision=2,
-                transition_scale=0.05,
+                tempo=self.tempo,
+                sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
         elif method == "pthmm" and self.input_type == "midi":
             self.score_follower = PitchHMM(
@@ -364,47 +369,39 @@ def __init__(
         else:
             raise ValueError("Invalid method")
 
-    def preprocess_score(self):
-        if self.input_type == "audio":
-            # Adjust tempo based on performance audio if requested
-            if self.adjust_tempo and self.performance_file is not None:
-                self.tempo = adjust_tempo_for_performance_file(
-                    self.score_part, self.performance_file, self.tempo
-                )
-            self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo) # needed for time maps
-            self.ppart.sustain_pedal_threshold = 127
-            # generate score audio
+    def preprocess_score(self, use_score_audio: bool = False):
+        """Preprocess score to extract reference features."""
+        if self.auto_adjust_tempo and self.performance_file is not None:
+            self.tempo = adjust_tempo_for_performance_file(
+                self.score_part, self.performance_file, self.tempo
+            )
+
+        if use_score_audio:
             self.score_audio = generate_score_audio(
-                self.score_part, self.tempo, SAMPLE_RATE
+                self.score_part, self.tempo, self.sample_rate
             ).astype(np.float32)
-
             reference_features = self.processor(self.score_audio)
-            self.reference_features = reference_features
             self.processor.reset()
+            return reference_features
+        elif self.method in {"arzt", "dixon"}: # only midi
+            self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo)
+            self.ppart.sustain_pedal_threshold = 127
+            polling_period = 0.01
+            reference_features = (
+                partitura.utils.music.compute_pianoroll(
+                    note_info=self.ppart,
+                    time_unit="sec",
+                    time_div=int(np.round(1 / polling_period)),
+                    binary=True,
+                    piano_range=self.config["piano_range"],
+                )
+                .toarray()
+                .T
+            ).astype(np.float32)
+            return reference_features
         else:
-            if self.method == "arzt":
-                if self.performance_file is not None:
-                    # tempo is slightly adjusted to reflect the tempo of the performance midi
-                    self.tempo = adjust_tempo_for_performance_file(
-                        self.score_part, self.performance_file, self.tempo
-                    )
-                self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo)
-                self.ppart.sustain_pedal_threshold = 127
-                polling_period = 0.01
-                self.reference_features = (
-                    partitura.utils.music.compute_pianoroll(
-                        note_info=self.ppart,
-                        time_unit="sec",
-                        time_div=int(np.round(1 / polling_period)),
-                        binary=True,
-                        piano_range=self.config["piano_range"],
-                    )
-                    .toarray()
-                    .T
-                ).astype(np.float32)
-            else:
-                self.reference_features = self.score_part.note_array()
-
+            return self.score_part.note_array()
+    
     def get_time_maps(self):
         alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]]
         return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment)
@@ -428,26 +425,58 @@ def _convert_frame_to_beat(self, current_frame: int) -> float:
         )
         return beat_position
 
-    def build_score_annotations(self, level="beat", musical_beat: bool = False):
+    def build_score_annotations(
+        self,
+        level="beat",
+        musical_beat: bool = False,
+        return_type: str = "beats",  # "beat" or "seconds"
+    ):
+        """
+        Build score annotations in beat or second unit.
+
+        Parameters
+        ----------
+        level : str
+            Level of annotations to use: beat or note (chord onset level)
+        musical_beat : bool
+            Whether to use musical beat
+        return_type : {"beat", "seconds"}
+            Type of annotations to return: beat or seconds (time unit)
+
+        Returns
+        -------
+        score_annots : np.ndarray
+            Array of score annotations in beat or second unit
+        """
         score_annots = []
-        if level == "beat":  # TODO: add bar-level, note-level
+        if level == "beat":
             if musical_beat:
                 self.score_part.use_musical_beat()  # for asap dataset
             note_array = np.unique(self.score_part.note_array()["onset_beat"])
             start_beat = np.ceil(note_array.min())
             end_beat = np.floor(note_array.max())
-            self.beats = np.arange(start_beat, end_beat + 1)
+            score_annots_in_beat = np.arange(start_beat, end_beat + 1)
+        elif level == "note":
+            snote_array = self.score_part.note_array()
+            score_annots_in_beat = np.unique(snote_array["onset_beat"])
+        else:
+            raise ValueError(f"Invalid score annotation level: {level}")
 
-            beat_timestamp = [
+        if return_type == "beats":
+            return score_annots_in_beat
+        elif return_type == "seconds":
+            score_annots_in_seconds = [
                 self.score_part.inv_beat_map(beat)
                 / self.score_part.quarter_duration_map(
                     self.score_part.inv_beat_map(beat)
                 )
                 * (60 / self.tempo)
-                for beat in self.beats
+                for beat in score_annots_in_beat
             ]
+            return np.array(score_annots_in_seconds)
+        else:
+            raise ValueError(f"Invalid return type: {return_type}")
 
-            score_annots = np.array(beat_timestamp)
         return score_annots
 
     def convert_timestamps_to_beats(self, timestamps):
@@ -498,13 +527,13 @@ def get_latency_stats(self):
     def run_evaluation(
         self,
         perf_annotations: Union[PathLike, np.ndarray],
-        level: str = "beat",
+        level: str = "note",
         tolerances: list = TOLERANCES_IN_MILLISECONDS,
         musical_beat: bool = False,  # beat annots are difference in some dataset
         debug: bool = False,
         save_dir: PathLike = None,
         run_name: str = None,
-        in_seconds: bool = True,  # 'True' for performance-based, 'False' for score-based
+        domain: str = "performance",  # "score" or "performance"
     ) -> dict:
         """
         Evaluate the score following process
@@ -520,8 +549,9 @@ def run_evaluation(
             Tolerances to use for evaluation (in milliseconds)
         debug : bool
             Whether to save the score and performance audio with beat annotations
-        axis : str
-            Evaluation axis, either 'score' or 'performance'
+        domain : str
+            Evaluation domain, either "score" or "performance".
+            "score" domain evaluates in beat unit, "performance" domain evaluates in second unit. (Default: "performance")
 
         Returns
         -------
@@ -536,66 +566,83 @@ def run_evaluation(
             perf_annots = perf_annotations
         else:
             perf_annots = np.loadtxt(fname=perf_annotations, delimiter="\t", usecols=0)
-        score_annots = self.build_score_annotations(level, musical_beat)
-        original_perf_annots_length = len(perf_annots)
+
+        return_type = "seconds" if domain == "performance" else "beats"
+        score_annots = self.build_score_annotations(level, musical_beat, return_type)
+
+        original_perf_annots_counts = len(perf_annots)
 
         min_length = min(len(score_annots), len(perf_annots))
         score_annots = score_annots[:min_length]
         perf_annots = perf_annots[:min_length]
 
+        mode = (
+            "state"
+            if (self.input_type == "midi" or self.method == "audio_outerhmm")
+            else "frame"
+        )
         perf_annots_predicted = transfer_from_score_to_predicted_perf(
-            self.score_follower.warping_path, score_annots, frame_rate=self.frame_rate
+            self.score_follower.warping_path,
+            score_annots,
+            frame_rate=self.frame_rate,
+            mode=mode,
         )
 
         score_annots_predicted = transfer_from_perf_to_predicted_score(
-            self.score_follower.warping_path, perf_annots, frame_rate=self.frame_rate
+            self.score_follower.warping_path,
+            perf_annots,
+            frame_rate=self.frame_rate,
+            mode=mode,
         )
         score_annots = score_annots[: len(score_annots_predicted)]
 
-        if original_perf_annots_length != len(perf_annots_predicted):
+        if original_perf_annots_counts != len(perf_annots_predicted):
             print(
-                f"Length of the annotation changed: {original_perf_annots_length} -> {len(perf_annots_predicted)}"
+                f"Length of the annotation changed: {original_perf_annots_counts} -> {len(perf_annots_predicted)}"
             )
 
-        if self.input_type == "audio":
-            if debug:
-                save_debug_results(
-                    self.score_file,
-                    self.score_audio if self.input_type == "audio" else None,
-                    score_annots,
-                    score_annots_predicted,
-                    self.performance_file,
-                    perf_annots,
-                    perf_annots_predicted,
-                    self.score_follower,
-                    self.frame_rate,
-                    save_dir,
-                    run_name,
-                )
-        if in_seconds:
+        # Evaluation metrics
+        if domain == "performance":
             eval_results = get_evaluation_results(
                 perf_annots,
                 perf_annots_predicted,
-                total_length=original_perf_annots_length,
+                total_counts=original_perf_annots_counts,
                 tolerances=tolerances,
             )
         else:
-            score_annots = self.beats
             score_annots_predicted = self.convert_timestamps_to_beats(
                 score_annots_predicted
             )
             if tolerances == TOLERANCES_IN_MILLISECONDS:
-                tolerances = TOLERANCES_IN_BEATS  # switch to beats
+                tolerances = TOLERANCES_IN_BEATS
             eval_results = get_evaluation_results(
                 score_annots,
                 score_annots_predicted,
-                total_length=original_perf_annots_length,
+                total_counts=original_perf_annots_counts,
                 tolerances=tolerances,
                 in_seconds=False,
             )
         if self.input_type == "audio":
             latency_results = self.get_latency_stats()
             eval_results.update(latency_results)
+
+        # Debug: save warping path TSV, results JSON, and plots
+        if debug and save_dir is not None:
+            save_debug_results(
+                warping_path=self.score_follower.warping_path,
+                score_annots=score_annots,
+                perf_annots=perf_annots,
+                perf_annots_predicted=perf_annots_predicted,
+                eval_results=eval_results,
+                frame_rate=self.frame_rate,
+                save_dir=save_dir,
+                run_name=run_name or "results",
+                state_space=getattr(self.score_follower, "state_space", None),
+                ref_features=getattr(self.score_follower, "reference_features", None),
+                input_features=getattr(self.score_follower, "input_features", None),
+                distance_func=getattr(self.score_follower, "distance_func", None),
+            )
+
         return eval_results
 
     def run(self, verbose: bool = True, wait: bool = True):
@@ -613,12 +660,12 @@ def run(self, verbose: bool = True, wait: bool = True):
             Alignment results with warping path
         """
         with self.stream:
-            for current_frame in self.score_follower.run(verbose=verbose):
-                if self.input_type == "audio":
-                    position_in_beat = self._convert_frame_to_beat(current_frame)
+            for current_position in self.score_follower.run(verbose=verbose):
+                if self.input_type == "audio" and self.method != "audio_outerhmm":
+                    position_in_beat = self._convert_frame_to_beat(current_position)
                     yield position_in_beat
                 else:
-                    yield float(self.score_follower.state_space[current_frame])
+                    yield float(self.score_follower.state_space[current_position])
 
         self._has_run = True
         return self.score_follower.warping_path
diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py
index a2d5f57..ce58269 100644
--- a/matchmaker/utils/misc.py
+++ b/matchmaker/utils/misc.py
@@ -6,7 +6,6 @@
 
 import csv
 import numbers
-import os
 import re
 import xml.etree.ElementTree as ET
 from pathlib import Path
@@ -18,13 +17,10 @@
 import numpy as np
 import partitura
 import scipy
-import soundfile as sf
 from matplotlib import pyplot as plt
 from numpy.typing import NDArray
 from partitura.score import ScoreLike
 
-from matchmaker.features.audio import SAMPLE_RATE
-
 # Tempo marking to BPM mapping
 # Reference: https://en.wikipedia.org/wiki/Tempo#Basic_tempo_markings
 TEMPO_MARKING_TO_BPM = {
@@ -325,6 +321,7 @@ def get_tempo_from_score(
     Tries multiple sources in order:
     1. Partitura Tempo objects (explicit BPM)
     2. MusicXML <sound tempo="..."/> element (if score_file provided)
+    3. Text tempo marking (e.g., "Allegro", "Andante") converted to approximate BPM
 
     Parameters
     ----------
@@ -363,113 +360,11 @@ def get_tempo_from_score(
         except Exception:
             pass
 
-    return None
-
-
-def get_tempo_at_beat(
-    score_part: ScoreLike,
-    beat: float,
-    default_tempo: float = 120.0,
-) -> float:
-    """
-    Get tempo (BPM) at a specific beat position in the score.
-
-    Uses score tempo markings if available. Falls back to default_tempo otherwise.
-
-    Parameters
-    ----------
-    score_part : ScoreLike
-        Partitura score part
-    beat : float
-        Beat position in the score
-    default_tempo : float
-        Default tempo to use if no tempo markings found
-
-    Returns
-    -------
-    float
-        Tempo in BPM at the given beat position
-    """
-    if score_part is None:
-        return default_tempo
-
-    # Collect all tempo markings with their positions
-    tempo_changes = []
-    try:
-        for tempo_obj in score_part.iter_all(partitura.score.Tempo):
-            if hasattr(tempo_obj, "bpm") and tempo_obj.bpm is not None:
-                # Get beat position of tempo marking
-                start_time = getattr(tempo_obj, "start", None)
-                if start_time is not None:
-                    tempo_beat = score_part.beat_map(start_time.t)
-                    tempo_changes.append((tempo_beat, float(tempo_obj.bpm)))
-    except Exception:
-        pass
-
-    if not tempo_changes:
-        return default_tempo
-
-    # Sort by beat position
-    tempo_changes.sort(key=lambda x: x[0])
-
-    # Find the tempo at the given beat (last tempo marking before or at beat)
-    current_tempo = default_tempo
-    for tempo_beat, bpm in tempo_changes:
-        if tempo_beat <= beat:
-            current_tempo = bpm
-        else:
-            break
-
-    return current_tempo
-
-
-def get_tempo_from_score(
-    score_part: ScoreLike,
-    score_file: Optional[Union[str, Path]] = None,
-) -> Optional[float]:
-    """
-    Extract first tempo marking from score if available.
-
-    Tries multiple sources in order:
-    1. Partitura Tempo objects (explicit BPM)
-    2. MusicXML <sound tempo="..."/> element (if score_file provided)
-
-    Parameters
-    ----------
-    score_part : ScoreLike
-        Partitura score part
-    score_file : str or Path, optional
-        Path to the score file. Used as fallback to parse MusicXML directly
-        when partitura doesn't extract tempo.
-
-    Returns
-    -------
-    float or None
-        Tempo in BPM if found in score, None otherwise.
-    """
-    # Try partitura Tempo objects first
-    if score_part is not None:
-        try:
-            for tempo_obj in score_part.iter_all(partitura.score.Tempo):
-                if hasattr(tempo_obj, "bpm") and tempo_obj.bpm is not None:
-                    return float(tempo_obj.bpm)
-        except Exception:
-            pass
-
-    # Fallback: parse MusicXML directly for <sound tempo="..."/>
+    # Fallback: extract from text tempo marking (e.g., "Allegro", "Andante")
     if score_file is not None:
-        try:
-            import xml.etree.ElementTree as ET
-
-            tree = ET.parse(str(score_file))
-            root = tree.getroot()
-
-            for sound_elem in root.iter("sound"):
-                tempo_attr = sound_elem.get("tempo")
-                if tempo_attr is not None:
-                    return float(tempo_attr)
-        except Exception:
-            pass
+        text_tempo = extract_tempo_marking_from_musicxml(score_file)
+        if text_tempo is not None:
+            return text_tempo
 
     return None
 
@@ -626,161 +521,181 @@ def save_nparray_to_csv(array: NDArray, save_path: str):
         writer.writerows(array)
 
 
-def save_mixed_audio(
-    audio: Union[np.ndarray, str, os.PathLike],
-    annots: np.ndarray,
-    save_path: Union[str, os.PathLike],
-    sr: int = SAMPLE_RATE,
+def plot_alignment(
+    warping_path: np.ndarray,
+    perf_annots: np.ndarray,
+    perf_annots_predicted: np.ndarray,
+    save_dir: Path,
+    name: str,
+    score_y: Optional[np.ndarray] = None,
+    frame_rate: float = 1.0,
+    state_space: Optional[np.ndarray] = None,
+    ref_features: Optional[np.ndarray] = None,
+    input_features: Optional[np.ndarray] = None,
+    distance_func=None,
 ):
-    if not isinstance(audio, np.ndarray):
-        audio, _ = librosa.load(audio, sr=sr)
-
-    annots_audio = librosa.clicks(
-        times=annots,
-        sr=sr,
-        click_freq=1000,
-        length=len(audio),
-    )
-    audio_mixed = audio + annots_audio
-    sf.write(str(save_path), audio_mixed, sr, subtype="PCM_24")
-
-
-def plot_and_save_score_following_result(
-    wp,
-    ref_features,
-    input_features,
-    distance_func,
-    save_dir,
-    score_annots,
-    perf_annots,
-    frame_rate,
-    name=None,
-):
-    xmin = 0  # performance range
-    xmax = None
-    ymin = 0  # score range
-    ymax = None
-
-    xmax = xmax if xmax is not None else input_features.shape[0] - 1
-    ymax = ymax if ymax is not None else ref_features.shape[0] - 1
-    x_indices = range(xmin, xmax + 1)
-    y_indices = range(ymin, ymax + 1)
-
-    run_name = name or "results"
-    save_path = save_dir / f"wp_{run_name}.tsv"
-    save_nparray_to_csv(wp.T, save_path.as_posix())
-
-    dist = scipy.spatial.distance.cdist(
-        ref_features[y_indices, :],
-        input_features[x_indices, :],
-        metric=distance_func,
-    )  # [d, wy]
-    plt.figure(figsize=(10, 10))
-    plt.imshow(
-        dist,
-        aspect="auto",
-        origin="lower",
-        interpolation="nearest",
-        extent=(xmin, xmax, ymin, ymax),
+    """Plot warping path, GT annotations, and predicted points in one figure.
+
+    Layers (back to front): distance matrix → warping path → predicted → GT.
+    """
+    save_dir.mkdir(parents=True, exist_ok=True)
+    gt = np.asarray(perf_annots, dtype=float)
+    pred = np.asarray(perf_annots_predicted, dtype=float)
+    n = min(len(gt), len(pred))
+    gt, pred = gt[:n], pred[:n]
+
+    has_dist_matrix = (
+        ref_features is not None
+        and input_features is not None
+        and distance_func is not None
     )
-    mask_perf = (xmin <= perf_annots * frame_rate) & (perf_annots * frame_rate <= xmax)
-    mask_score = (ymin <= score_annots * frame_rate) & (
-        score_annots * frame_rate <= ymax
+
+    fig, ax = plt.subplots(figsize=(30, 30))
+
+    if has_dist_matrix:
+        # DTW mode: everything in frame space
+        dist = scipy.spatial.distance.cdist(
+            ref_features,
+            input_features,
+            metric=distance_func,
+        )
+        ax.imshow(
+            dist,
+            aspect="auto",
+            origin="lower",
+            interpolation="nearest",
+            extent=(0, input_features.shape[0] - 1, 0, ref_features.shape[0] - 1),
+        )
+        x_gt = gt * float(frame_rate)
+        x_pred = pred * float(frame_rate)
+        if score_y is not None:
+            y = np.asarray(score_y, dtype=float)[:n] * float(frame_rate)
+        else:
+            y = np.arange(n)
+        ylabel = "score (frames)"
+        wp_x = warping_path[1]
+        wp_y = warping_path[0]
+    else:
+        # HMM mode: x in frames, y in beats via state_space
+        x_gt = gt * float(frame_rate)
+        x_pred = pred * float(frame_rate)
+        if score_y is None:
+            y = np.arange(n)
+            ylabel = "annotation index"
+        else:
+            y = np.asarray(score_y, dtype=float)[:n]
+            ylabel = "score position (beats)"
+        wp_x = warping_path[1]
+        if state_space is not None:
+            wp_y = state_space[warping_path[0]]
+        else:
+            wp_y = warping_path[0]
+
+    # 1. Warping path
+    if has_dist_matrix:
+        ax.plot(
+            wp_x,
+            wp_y,
+            ".",
+            color="white",
+            alpha=0.7,
+            markersize=15,
+            label="warping path",
+            zorder=2,
+        )
+    else:
+        ax.plot(
+            wp_x,
+            wp_y,
+            ".",
+            color="lime",
+            alpha=0.5,
+            markersize=15,
+            label="warping path",
+            zorder=2,
+        )
+
+    # 2. Predicted points
+    ax.scatter(
+        x_pred,
+        y,
+        label="predicted",
+        s=80,
+        alpha=0.9,
+        marker="o",
+        color="blue",
+        linewidths=0,
+        zorder=3,
     )
-    plt.title(
-        f"[{save_dir.name}/{run_name}] \n Matchmaker alignment path with ground-truth labels",
-        fontsize=15,
+
+    # 3. GT annotations (front)
+    ax.scatter(
+        x_gt,
+        y,
+        label="ground truth",
+        s=120,
+        alpha=0.9,
+        marker="x",
+        color="red",
+        linewidths=3,
+        zorder=4,
     )
-    plt.xlabel("Performance Features", fontsize=15)
-    plt.ylabel("Score Features", fontsize=15)
-
-    # plot online DTW path
-    cropped_history = [
-        (ref, target)
-        for (ref, target) in wp.T
-        if xmin <= target <= xmax and ymin <= ref <= ymax
-    ]
-    for ref, target in cropped_history:
-        plt.plot(target, ref, ".", color="cyan", alpha=0.5, markersize=3)
-
-    # plot ground-truth labels
-    for ref, target in zip(score_annots, perf_annots):
-        if (xmin <= target * frame_rate <= xmax) and (ymin <= ref * frame_rate <= ymax):
-            plt.plot(
-                target * frame_rate,
-                ref * frame_rate,
-                "x",
-                color="r",
-                alpha=1,
-                markersize=3,
-                markeredgewidth=3,
-            )
-    plt.savefig(save_dir / f"{run_name}.png")
+
+    ax.set_xlabel("performance frame")
+    ax.set_ylabel(ylabel)
+    ax.set_title(f"[{save_dir.name}] alignment ({name})")
+    ax.grid(True, alpha=0.2)
+    ax.legend(loc="best")
+    fig.tight_layout()
+    fig.savefig(save_dir / f"{name}.png", dpi=150)
+    plt.close(fig)
 
 
 def save_debug_results(
-    score_file,
-    score_audio,
-    score_annots,
-    score_annots_predicted,
-    perf_file,
-    perf_annots,
-    perf_annots_predicted,
-    model,
-    frame_rate,
-    save_dir=None,
-    run_name=None,
+    warping_path: np.ndarray,
+    score_annots: np.ndarray,
+    perf_annots: np.ndarray,
+    perf_annots_predicted: np.ndarray,
+    eval_results: dict,
+    frame_rate: float,
+    save_dir: Path,
+    run_name: str = "results",
+    state_space: Optional[np.ndarray] = None,
+    ref_features: Optional[np.ndarray] = None,
+    input_features: Optional[np.ndarray] = None,
+    distance_func=None,
 ):
-    # save score audio with beat annotations
-    score_audio_dir = Path("./score_audio")
-    score_audio_dir.mkdir(parents=True, exist_ok=True)
-    run_name_suffix = (
-        f"{Path(perf_file).stem}_{run_name}" if run_name else f"{Path(perf_file).stem}"
-    )
-    save_mixed_audio(
-        score_audio,
-        score_annots,
-        save_path=score_audio_dir
-        / f"score_audio_{Path(score_file).parent.parent.name}_{Path(score_file).stem}_{run_name_suffix}.wav",
-    )
-    # save performance audio with beat annotations
-    perf_audio_dir = Path("./performance_audio")
-    perf_audio_dir.mkdir(parents=True, exist_ok=True)
-    save_mixed_audio(
-        perf_file,
+    """Save debug outputs: warping path TSV, results JSON, and alignment plot."""
+    save_dir = Path(save_dir)
+    save_dir.mkdir(parents=True, exist_ok=True)
+
+    # 1. Warping path TSV + results JSON
+    save_nparray_to_csv(warping_path.T, (save_dir / f"wp_{run_name}.tsv").as_posix())
+    import json
+
+    with open(save_dir / f"{run_name}.json", "w") as f:
+        json.dump(eval_results, f, indent=4)
+
+    # 2. Alignment plot
+    if state_space is not None:
+        score_y = state_space
+    else:
+        sx = np.asarray(score_annots, dtype=float)
+        score_y = (
+            sx
+            if sx.ndim == 1 and len(sx) == len(perf_annots) and np.all(np.diff(sx) >= 0)
+            else None
+        )
+    plot_alignment(
+        warping_path,
         perf_annots,
-        save_path=perf_audio_dir
-        / f"perf_audio_{Path(perf_file).parent.parent.name}_{Path(perf_file).parent.name}_{run_name_suffix}.wav",
-    )
-    # save score audio with predicted beat annotations
-    score_predicted_audio_dir = Path("./score_audio_predicted")
-    score_predicted_audio_dir.mkdir(parents=True, exist_ok=True)
-    save_mixed_audio(
-        score_audio,
-        score_annots_predicted,
-        save_path=score_predicted_audio_dir
-        / f"score_audio_{Path(score_file).parent.parent.name}_{Path(score_file).parent.name}_{run_name_suffix}.wav",
-    )
-    # save performance audio with predicted beat annotations
-    perf_predicted_audio_dir = Path("./performance_audio_predicted")
-    perf_predicted_audio_dir.mkdir(parents=True, exist_ok=True)
-    save_mixed_audio(
-        perf_file,
         perf_annots_predicted,
-        save_path=perf_predicted_audio_dir
-        / f"perf_audio_{Path(perf_file).parent.parent.name}_{Path(perf_file).parent.name}_{run_name_suffix}.wav",
-    )
-    # save score following plot result
-    save_dir = save_dir or Path("./tests/results")
-    save_dir.mkdir(parents=True, exist_ok=True)
-    plot_and_save_score_following_result(
-        model.warping_path,
-        model.reference_features,
-        model.input_features,
-        model.distance_func,
         save_dir,
-        score_annots,
-        perf_annots,
-        frame_rate,
-        name=run_name,
+        run_name,
+        score_y=score_y,
+        frame_rate=frame_rate,
+        state_space=state_space,
+        ref_features=ref_features,
+        input_features=input_features,
+        distance_func=distance_func,
     )

From 49faa707b0470e63fe08fd7c2212b0a3e9b4f3ca Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Fri, 20 Feb 2026 16:22:26 +0100
Subject: [PATCH 6/8] add score performance for time mapping

---
 matchmaker/matchmaker.py | 46 ++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index 36ac6be..0aad12e 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -306,7 +306,6 @@ def __init__(
         else:
             raise ValueError(f"Invalid input type {self.input_type}")
 
-        use_score_audio = self.input_type == "audio" and method in {"dixon", "arzt"}
         self.reference_features = self.preprocess_score(use_score_audio)
 
         if distance_func is None:
@@ -369,36 +368,37 @@ def __init__(
         else:
             raise ValueError("Invalid method")
 
-    def preprocess_score(self, use_score_audio: bool = False):
+    def preprocess_score(self):
         """Preprocess score to extract reference features."""
         if self.auto_adjust_tempo and self.performance_file is not None:
             self.tempo = adjust_tempo_for_performance_file(
                 self.score_part, self.performance_file, self.tempo
             )
 
-        if use_score_audio:
-            self.score_audio = generate_score_audio(
-                self.score_part, self.tempo, self.sample_rate
-            ).astype(np.float32)
-            reference_features = self.processor(self.score_audio)
-            self.processor.reset()
-            return reference_features
-        elif self.method in {"arzt", "dixon"}: # only midi
+        if self.method in {"arzt", "dixon"}:
             self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo)
             self.ppart.sustain_pedal_threshold = 127
-            polling_period = 0.01
-            reference_features = (
-                partitura.utils.music.compute_pianoroll(
-                    note_info=self.ppart,
-                    time_unit="sec",
-                    time_div=int(np.round(1 / polling_period)),
-                    binary=True,
-                    piano_range=self.config["piano_range"],
-                )
-                .toarray()
-                .T
-            ).astype(np.float32)
-            return reference_features
+            if self.input_type == "audio":
+                self.score_audio = generate_score_audio(
+                    self.score_part, self.tempo, self.sample_rate
+                ).astype(np.float32)
+                reference_features = self.processor(self.score_audio)
+                self.processor.reset()
+                return reference_features
+            else:
+                polling_period = 0.01
+                reference_features = (
+                    partitura.utils.music.compute_pianoroll(
+                        note_info=self.ppart,
+                        time_unit="sec",
+                        time_div=int(np.round(1 / polling_period)),
+                        binary=True,
+                        piano_range=self.config["piano_range"],
+                    )
+                    .toarray()
+                    .T
+                ).astype(np.float32)
+                return reference_features
         else:
             return self.score_part.note_array()
     

From bf61950a4758aa94a35e61ebd2794743921d9aa0 Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Fri, 20 Feb 2026 16:27:21 +0100
Subject: [PATCH 7/8] remove unused parameter

---
 matchmaker/matchmaker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index 0aad12e..cb21790 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -306,7 +306,7 @@ def __init__(
         else:
             raise ValueError(f"Invalid input type {self.input_type}")
 
-        self.reference_features = self.preprocess_score(use_score_audio)
+        self.reference_features = self.preprocess_score()
 
         if distance_func is None:
             distance_func = DEFAULT_DISTANCE_FUNCS[method]

From 13b6f1086ecb20c3c0c5045ad15e4bb6f7e879f4 Mon Sep 17 00:00:00 2001
From: darth-alexus <alexandermaxneuhauser@gmail.com>
Date: Fri, 20 Feb 2026 18:00:27 +0100
Subject: [PATCH 8/8] add note ids to the test score file

---
 .../assets/simple_mozart_k265_var1.musicxml   | 148 +++++++++---------
 1 file changed, 74 insertions(+), 74 deletions(-)

diff --git a/matchmaker/assets/simple_mozart_k265_var1.musicxml b/matchmaker/assets/simple_mozart_k265_var1.musicxml
index 56bebfc..9130440 100644
--- a/matchmaker/assets/simple_mozart_k265_var1.musicxml
+++ b/matchmaker/assets/simple_mozart_k265_var1.musicxml
@@ -132,7 +132,7 @@
           <line>4</line>
           </clef>
         </attributes>
-      <note default-x="82.98" default-y="-10">
+      <note id="n1" default-x="82.98" default-y="-10">
         <pitch>
           <step>D</step>
           <octave>5</octave>
@@ -145,7 +145,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="108.93" default-y="-15">
+      <note id="n2" default-x="108.93" default-y="-15">
         <pitch>
           <step>C</step>
           <octave>5</octave>
@@ -158,7 +158,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="134.88" default-y="-20">
+      <note id="n3" default-x="134.88" default-y="-20">
         <pitch>
           <step>B</step>
           <octave>4</octave>
@@ -171,7 +171,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="160.83" default-y="-15">
+      <note id="n4" default-x="160.83" default-y="-15">
         <pitch>
           <step>C</step>
           <octave>5</octave>
@@ -184,7 +184,7 @@
         <beam number="1">end</beam>
         <beam number="2">end</beam>
         </note>
-      <note default-x="186.79" default-y="-20">
+      <note id="n5" default-x="186.79" default-y="-20">
         <pitch>
           <step>B</step>
           <octave>4</octave>
@@ -197,7 +197,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="212.74" default-y="-15">
+      <note id="n6" default-x="212.74" default-y="-15">
         <pitch>
           <step>C</step>
           <octave>5</octave>
@@ -210,7 +210,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="238.69" default-y="-20">
+      <note id="n7" default-x="238.69" default-y="-20">
         <pitch>
           <step>B</step>
           <octave>4</octave>
@@ -223,7 +223,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="264.65" default-y="-15">
+      <note id="n8" default-x="264.65" default-y="-15">
         <pitch>
           <step>C</step>
           <octave>5</octave>
@@ -239,7 +239,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="82.98" default-y="-130">
+      <note id="n9" default-x="82.98" default-y="-130">
         <pitch>
           <step>C</step>
           <octave>3</octave>
@@ -250,7 +250,7 @@
         <stem>up</stem>
         <staff>2</staff>
         </note>
-      <note default-x="186.79" default-y="-95">
+      <note id="n10" default-x="186.79" default-y="-95">
         <pitch>
           <step>C</step>
           <octave>4</octave>
@@ -263,7 +263,7 @@
         </note>
       </measure>
     <measure number="2" width="226.76">
-      <note default-x="12.5" default-y="10">
+      <note id="n11" default-x="12.5" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -276,7 +276,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="38.45" default-y="5">
+      <note id="n12" default-x="38.45" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -289,7 +289,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="67.2" default-y="0">
+      <note id="n13" default-x="67.2" default-y="0">
         <pitch>
           <step>F</step>
           <alter>1</alter>
@@ -304,7 +304,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="93.16" default-y="5">
+      <note id="n14" default-x="93.16" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -317,7 +317,7 @@
         <beam number="1">end</beam>
         <beam number="2">end</beam>
         </note>
-      <note default-x="119.11" default-y="0">
+      <note id="n15" default-x="119.11" default-y="0">
         <pitch>
           <step>F</step>
           <alter>1</alter>
@@ -331,7 +331,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="145.06" default-y="5">
+      <note id="n16" default-x="145.06" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -344,7 +344,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="171.02" default-y="0">
+      <note id="n17" default-x="171.02" default-y="0">
         <pitch>
           <step>F</step>
           <alter>1</alter>
@@ -358,7 +358,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="196.97" default-y="5">
+      <note id="n18" default-x="196.97" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -374,7 +374,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="12.5" default-y="-85">
+      <note id="n19" default-x="12.5" default-y="-85">
         <pitch>
           <step>E</step>
           <octave>4</octave>
@@ -385,7 +385,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="119.11" default-y="-95">
+      <note id="n20" default-x="119.11" default-y="-95">
         <pitch>
           <step>C</step>
           <octave>4</octave>
@@ -398,7 +398,7 @@
         </note>
       </measure>
     <measure number="3" width="230.22">
-      <note default-x="18.76" default-y="5">
+      <note id="n21" default-x="18.76" default-y="5">
         <pitch>
           <step>G</step>
           <alter>1</alter>
@@ -413,7 +413,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="44.71" default-y="10">
+      <note id="n22" default-x="44.71" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -426,7 +426,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="70.66" default-y="20">
+      <note id="n23" default-x="70.66" default-y="20">
         <pitch>
           <step>C</step>
           <octave>6</octave>
@@ -439,7 +439,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="96.62" default-y="15">
+      <note id="n24" default-x="96.62" default-y="15">
         <pitch>
           <step>B</step>
           <octave>5</octave>
@@ -452,7 +452,7 @@
         <beam number="1">end</beam>
         <beam number="2">end</beam>
         </note>
-      <note default-x="122.57" default-y="25">
+      <note id="n25" default-x="122.57" default-y="25">
         <pitch>
           <step>D</step>
           <octave>6</octave>
@@ -465,7 +465,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="148.52" default-y="20">
+      <note id="n26" default-x="148.52" default-y="20">
         <pitch>
           <step>C</step>
           <octave>6</octave>
@@ -478,7 +478,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="174.48" default-y="15">
+      <note id="n27" default-x="174.48" default-y="15">
         <pitch>
           <step>B</step>
           <octave>5</octave>
@@ -491,7 +491,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="200.43" default-y="10">
+      <note id="n28" default-x="200.43" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -507,7 +507,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="18.76" default-y="-80">
+      <note id="n29" default-x="18.76" default-y="-80">
         <pitch>
           <step>F</step>
           <octave>4</octave>
@@ -518,7 +518,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="122.57" default-y="-95">
+      <note id="n30" default-x="122.57" default-y="-95">
         <pitch>
           <step>C</step>
           <octave>4</octave>
@@ -531,7 +531,7 @@
         </note>
       </measure>
     <measure number="4" width="227.14">
-      <note default-x="12.5" default-y="10">
+      <note id="n31" default-x="12.5" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -547,7 +547,7 @@
           <slur type="start" orientation="over" placement="above" number="1"/>
           </notations>
         </note>
-      <note default-x="41.63" default-y="5">
+      <note id="n32" default-x="41.63" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -564,7 +564,7 @@
           <slur type="stop" number="1"/>
           </notations>
         </note>
-      <note default-x="67.58" default-y="30">
+      <note id="n33" default-x="67.58" default-y="30">
         <pitch>
           <step>E</step>
           <octave>6</octave>
@@ -577,7 +577,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="93.54" default-y="25">
+      <note id="n34" default-x="93.54" default-y="25">
         <pitch>
           <step>D</step>
           <octave>6</octave>
@@ -590,7 +590,7 @@
         <beam number="1">end</beam>
         <beam number="2">end</beam>
         </note>
-      <note default-x="119.49" default-y="20">
+      <note id="n35" default-x="119.49" default-y="20">
         <pitch>
           <step>C</step>
           <octave>6</octave>
@@ -603,7 +603,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="145.44" default-y="15">
+      <note id="n36" default-x="145.44" default-y="15">
         <pitch>
           <step>B</step>
           <octave>5</octave>
@@ -616,7 +616,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="171.4" default-y="10">
+      <note id="n37" default-x="171.4" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -629,7 +629,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="197.35" default-y="5">
+      <note id="n38" default-x="197.35" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -645,7 +645,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="12.5" default-y="-95">
+      <note id="n39" default-x="12.5" default-y="-95">
         <pitch>
           <step>C</step>
           <octave>4</octave>
@@ -656,7 +656,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="12.5" default-y="-85">
+      <note id="n40" default-x="12.5" default-y="-85">
         <chord/>
         <pitch>
           <step>E</step>
@@ -668,7 +668,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="119.49" default-y="-125">
+      <note id="n41" default-x="119.49" default-y="-125">
         <rest/>
         <duration>3</duration>
         <voice>5</voice>
@@ -676,7 +676,7 @@
         <dot default-x="135.53" default-y="-120"/>
         <staff>2</staff>
         </note>
-      <note default-x="197.35" default-y="-95">
+      <note id="n42" default-x="197.35" default-y="-95">
         <pitch>
           <step>C</step>
           <alter>1</alter>
@@ -703,7 +703,7 @@
           <staff-distance>65</staff-distance>
           </staff-layout>
         </print>
-      <note default-x="60.86" default-y="5">
+      <note id="n43" default-x="60.86" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -719,7 +719,7 @@
           <slur type="start" orientation="over" placement="above" number="1"/>
           </notations>
         </note>
-      <note default-x="95.12" default-y="0">
+      <note id="n44" default-x="95.12" default-y="0">
         <pitch>
           <step>F</step>
           <octave>5</octave>
@@ -735,7 +735,7 @@
           <slur type="stop" number="1"/>
           </notations>
         </note>
-      <note default-x="129.38" default-y="25">
+      <note id="n45" default-x="129.38" default-y="25">
         <pitch>
           <step>D</step>
           <octave>6</octave>
@@ -748,7 +748,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="163.63" default-y="20">
+      <note id="n46" default-x="163.63" default-y="20">
         <pitch>
           <step>C</step>
           <octave>6</octave>
@@ -761,7 +761,7 @@
         <beam number="1">end</beam>
         <beam number="2">end</beam>
         </note>
-      <note default-x="197.89" default-y="15">
+      <note id="n47" default-x="197.89" default-y="15">
         <pitch>
           <step>B</step>
           <octave>5</octave>
@@ -774,7 +774,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="232.15" default-y="10">
+      <note id="n48" default-x="232.15" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -787,7 +787,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="266.41" default-y="5">
+      <note id="n49" default-x="266.41" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -800,7 +800,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="300.67" default-y="0">
+      <note id="n50" default-x="300.67" default-y="0">
         <pitch>
           <step>F</step>
           <octave>5</octave>
@@ -816,7 +816,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="60.86" default-y="-90">
+      <note id="n51" default-x="60.86" default-y="-90">
         <pitch>
           <step>D</step>
           <octave>4</octave>
@@ -827,7 +827,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="197.89" default-y="-125">
+      <note id="n52" default-x="197.89" default-y="-125">
         <rest/>
         <duration>3</duration>
         <voice>5</voice>
@@ -835,7 +835,7 @@
         <dot default-x="213.93" default-y="-120"/>
         <staff>2</staff>
         </note>
-      <note default-x="300.67" default-y="-100">
+      <note id="n53" default-x="300.67" default-y="-100">
         <pitch>
           <step>B</step>
           <octave>3</octave>
@@ -848,7 +848,7 @@
         </note>
       </measure>
     <measure number="6" width="294.31">
-      <note default-x="18.44" default-y="0">
+      <note id="n54" default-x="18.44" default-y="0">
         <pitch>
           <step>F</step>
           <octave>5</octave>
@@ -864,7 +864,7 @@
           <slur type="start" orientation="over" placement="above" number="1"/>
           </notations>
         </note>
-      <note default-x="52.7" default-y="-5">
+      <note id="n55" default-x="52.7" default-y="-5">
         <pitch>
           <step>E</step>
           <octave>5</octave>
@@ -880,7 +880,7 @@
           <slur type="stop" number="1"/>
           </notations>
         </note>
-      <note default-x="86.96" default-y="20">
+      <note id="n56" default-x="86.96" default-y="20">
         <pitch>
           <step>C</step>
           <octave>6</octave>
@@ -893,7 +893,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="121.22" default-y="15">
+      <note id="n57" default-x="121.22" default-y="15">
         <pitch>
           <step>B</step>
           <octave>5</octave>
@@ -906,7 +906,7 @@
         <beam number="1">end</beam>
         <beam number="2">end</beam>
         </note>
-      <note default-x="155.48" default-y="10">
+      <note id="n58" default-x="155.48" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -919,7 +919,7 @@
         <beam number="1">begin</beam>
         <beam number="2">begin</beam>
         </note>
-      <note default-x="189.74" default-y="5">
+      <note id="n59" default-x="189.74" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -932,7 +932,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="223.99" default-y="0">
+      <note id="n60" default-x="223.99" default-y="0">
         <pitch>
           <step>F</step>
           <octave>5</octave>
@@ -945,7 +945,7 @@
         <beam number="1">continue</beam>
         <beam number="2">continue</beam>
         </note>
-      <note default-x="258.25" default-y="-5">
+      <note id="n61" default-x="258.25" default-y="-5">
         <pitch>
           <step>E</step>
           <octave>5</octave>
@@ -961,7 +961,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="18.44" default-y="-95">
+      <note id="n62" default-x="18.44" default-y="-95">
         <pitch>
           <step>C</step>
           <octave>4</octave>
@@ -973,7 +973,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="155.48" default-y="-125">
+      <note id="n63" default-x="155.48" default-y="-125">
         <rest/>
         <duration>3</duration>
         <voice>5</voice>
@@ -981,7 +981,7 @@
         <dot default-x="171.51" default-y="-120"/>
         <staff>2</staff>
         </note>
-      <note default-x="258.25" default-y="-105">
+      <note id="n64" default-x="258.25" default-y="-105">
         <pitch>
           <step>A</step>
           <octave>3</octave>
@@ -994,7 +994,7 @@
         </note>
       </measure>
     <measure number="7" width="219.86">
-      <note default-x="12.5" default-y="-10">
+      <note id="n65" default-x="12.5" default-y="-10">
         <pitch>
           <step>D</step>
           <octave>5</octave>
@@ -1006,7 +1006,7 @@
         <staff>1</staff>
         <beam number="1">begin</beam>
         </note>
-      <note default-x="63.89" default-y="10">
+      <note id="n66" default-x="63.89" default-y="10">
         <pitch>
           <step>A</step>
           <octave>5</octave>
@@ -1021,7 +1021,7 @@
           <slur type="start" orientation="over" placement="above" number="1"/>
           </notations>
         </note>
-      <note default-x="115.28" default-y="5">
+      <note id="n67" default-x="115.28" default-y="5">
         <pitch>
           <step>G</step>
           <octave>5</octave>
@@ -1036,7 +1036,7 @@
           <slur type="stop" number="1"/>
           </notations>
         </note>
-      <note default-x="166.67" default-y="-20">
+      <note id="n68" default-x="166.67" default-y="-20">
         <pitch>
           <step>B</step>
           <octave>4</octave>
@@ -1051,7 +1051,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="12.5" default-y="-115">
+      <note id="n69" default-x="12.5" default-y="-115">
         <pitch>
           <step>F</step>
           <octave>3</octave>
@@ -1062,7 +1062,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="115.28" default-y="-110">
+      <note id="n70" default-x="115.28" default-y="-110">
         <pitch>
           <step>G</step>
           <octave>3</octave>
@@ -1075,7 +1075,7 @@
         </note>
       </measure>
     <measure number="8" width="177.67">
-      <note default-x="12.5" default-y="-15">
+      <note id="n71" default-x="12.5" default-y="-15">
         <pitch>
           <step>C</step>
           <octave>5</octave>
@@ -1086,7 +1086,7 @@
         <stem>down</stem>
         <staff>1</staff>
         </note>
-      <note default-x="89.58" default-y="-20">
+      <note id="n72" default-x="89.58" default-y="-20">
         <rest/>
         <duration>4</duration>
         <voice>1</voice>
@@ -1096,7 +1096,7 @@
       <backup>
         <duration>8</duration>
         </backup>
-      <note default-x="12.5" default-y="-95">
+      <note id="n73" default-x="12.5" default-y="-95">
         <pitch>
           <step>C</step>
           <octave>4</octave>
@@ -1107,7 +1107,7 @@
         <stem>down</stem>
         <staff>2</staff>
         </note>
-      <note default-x="89.58" default-y="-130">
+      <note id="n74" default-x="89.58" default-y="-130">
         <pitch>
           <step>C</step>
           <octave>3</octave>