fix per-frame evaluation based on eval_symbolic

laurenceyoon · laurenceyoon · commit 34f04854aead · 2026-03-23T18:00:05.000Z
diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import partitura
+import scipy.interpolate
 from partitura.io.exportmidi import get_ppq
 from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment
 from partitura.score import Part, merge_parts
@@ -73,7 +74,7 @@
             "step_size": 3,
         },
         "audio_outerhmm": {
-            "sample_rate": 16000,
+            "sample_rate": 24000,
             "frame_rate": 25,
             "s_j": 0.0,
         },
@@ -594,81 +595,57 @@ def run_evaluation(
         # perf_annots = perf_annots[:min_length]
 
         wp = self.score_follower.warping_path
-        mode = "state" if self.input_type == "midi" else "beat"
+        score_annots_beats = self.build_score_annotations(
+            level, musical_beat, return_type="beats"
+        )
 
-        if mode == "beat":
-            # Beat mode forward needs beat positions to match against wp[0].
-            score_annots_beats = self.build_score_annotations(
-                level, musical_beat, return_type="beats"
-            )
-            perf_annots_predicted = transfer_from_score_to_predicted_perf(
-                wp,
-                score_annots_beats,
-                frame_rate=self.frame_rate,
-                mode=mode,
-            )
+        # --- Per-frame evaluation ---
+        # Build GT interpolator: score beat → perf time (seconds)
+        valid_gt = np.isfinite(perf_annots)
+        gt_interp = scipy.interpolate.interp1d(
+            score_annots_beats[valid_gt],
+            perf_annots[valid_gt],
+            bounds_error=False,
+            fill_value=np.nan,
+        )
+
+        wp_score = wp[0].astype(float)
+        wp_perf = wp[1].astype(float)
+
+        # Convert wp perf axis to seconds
+        if self.input_type == "midi":
+            # MIDI: wp_perf is IOI-accumulated from 0; shift by first note onset
+            _perf = partitura.load_performance_midi(self.performance_file)
+            midi_offset = float(_perf.note_array()["onset_sec"].min())
+            wp_perf_sec = wp_perf + midi_offset
         else:
-            perf_annots_predicted = transfer_from_score_to_predicted_perf(
-                wp,
-                score_annots,
-                frame_rate=self.frame_rate,
-                mode=mode,
-            )
+            # Audio: wp_perf is frame index
+            wp_perf_sec = wp_perf / self.frame_rate
 
-        score_annots_predicted = transfer_from_perf_to_predicted_score(
+        # For each wp entry: GT perf time for predicted beat vs actual perf time
+        gt_perf_times = gt_interp(wp_score)
+        perf_annots_predicted = transfer_from_score_to_predicted_perf(
             wp,
-            perf_annots,
+            score_annots_beats,
             frame_rate=self.frame_rate,
-            mode=mode,
+            mode="beat",
         )
-        score_annots = score_annots[: len(score_annots_predicted)]
 
-        if original_perf_annots_counts != len(perf_annots_predicted):
-            print(
-                f"Length of the annotation changed: {original_perf_annots_counts} -> {len(perf_annots_predicted)}"
-            )
-
-        # Evaluation metrics
         if domain == "performance":
             eval_results = get_evaluation_results(
-                perf_annots,
-                perf_annots_predicted,
-                total_counts=original_perf_annots_counts,
+                gt_perf_times,
+                wp_perf_sec,
+                total_counts=len(wp_score),
                 tolerances=tolerances,
-                perf_times=perf_annots,
+                perf_times=wp_perf_sec,
                 alignment_duration=self.alignment_duration,
             )
         else:
-            if mode == "beat":
-                # Beat mode reverse already returns beat positions directly.
-                # score_annots_predicted was already computed above with mode="beat",
-                # which returns beats directly from transfer_positions.
-                pass
-            elif mode == "state":
-                # State mode reverse returns state indices (via causal lookup).
-                # Map state indices directly to beats through state_space.
-                state_space = self.score_follower.state_space
-                raw_states = transfer_from_perf_to_predicted_score(
-                    wp,
-                    perf_annots,
-                    frame_rate=self.frame_rate,
-                    mode=mode,
-                    output="frames",
-                )
-                score_annots_predicted = np.array(
-                    [
-                        (
-                            float(state_space[int(s)])
-                            if not np.isnan(s) and 0 <= int(s) < len(state_space)
-                            else np.nan
-                        )
-                        for s in raw_states
-                    ]
-                )
-            else:
-                score_annots_predicted = self.convert_timestamps_to_beats(
-                    score_annots_predicted
-                )
+            # Score domain: compare predicted beats vs GT beats
+            score_annots_predicted = transfer_from_perf_to_predicted_score(
+                wp, perf_annots, frame_rate=self.frame_rate, mode="beat"
+            )
+            score_annots = score_annots[: len(score_annots_predicted)]
             if tolerances == TOLERANCES_IN_MILLISECONDS:
                 tolerances = TOLERANCES_IN_BEATS
             eval_results = get_evaluation_results(
@@ -687,7 +664,7 @@ def run_evaluation(
         # Debug: save warping path TSV, results JSON, and plots
         if debug and save_dir is not None:
             # For plot y-axis: use beats when wp[0] is in beats
-            debug_score_annots = score_annots_beats if mode == "beat" else score_annots
+            debug_score_annots = score_annots_beats
             save_debug_results(
                 warping_path=self.score_follower.warping_path,
                 score_annots=debug_score_annots,