diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml
index 5cfc5dd..e944c12 100644
--- a/.github/workflows/pypi_publish.yml
+++ b/.github/workflows/pypi_publish.yml
@@ -26,7 +26,7 @@ jobs:
       - name: Build wheels
         run: python -m cibuildwheel
         env:
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
           CIBW_ARCHS: "x86_64"
           CIBW_ARCHS_MACOS: "x86_64 arm64"
           CIBW_BEFORE_BUILD: |
diff --git a/.github/workflows/test_pypi_publish.yml b/.github/workflows/test_pypi_publish.yml
index 8328b29..cbdf340 100644
--- a/.github/workflows/test_pypi_publish.yml
+++ b/.github/workflows/test_pypi_publish.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Build wheels
         run: python -m cibuildwheel
         env:
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
           CIBW_ARCHS: "x86_64"
           CIBW_ARCHS_MACOS: "x86_64 arm64"
           CIBW_BEFORE_BUILD: |
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 6c9bc9a..ff04d20 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -11,7 +11,7 @@ jobs:
       fail-fast: false
       matrix:
         platform: [ubuntu-latest, macos-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
 
     runs-on: ${{ matrix.platform }}
     steps:
diff --git a/README.md b/README.md
index 4750e38..d5d9706 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ The full documentation for matchmaker is available online at [readthedocs.org](h
 
 ### Prerequisites
 
-- Available Python version: 3.12
+- Available Python version: 3.11, 3.12, 3.13
 - [Fluidsynth](https://www.fluidsynth.org/)
 - [PortAudio](http://www.portaudio.com/)
 
diff --git a/matchmaker/dp/oltw_arzt.py b/matchmaker/dp/oltw_arzt.py
index a6749cf..9f7e657 100644
--- a/matchmaker/dp/oltw_arzt.py
+++ b/matchmaker/dp/oltw_arzt.py
@@ -13,7 +13,8 @@
 
 from matchmaker.base import OnlineAlignment
 from matchmaker.dp.dtw_loop import oltw_arzt_loop
-from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
+from matchmaker.features.audio import FRAME_RATE
+from matchmaker.io.audio import QUEUE_TIMEOUT
 from matchmaker.utils import (
     CYTHONIZED_METRICS_W_ARGUMENTS,
     CYTHONIZED_METRICS_WO_ARGUMENTS,
@@ -26,10 +27,11 @@
     RECVQueue,
     set_latency_stats,
 )
+from matchmaker.utils.stream import STREAM_END
 
-STEP_SIZE: int = 5
-WINDOW_SIZE: int = 5
-START_WINDOW_SIZE: Union[float, int] = 0.25
+STEP_SIZE: int = 3
+WINDOW_SIZE: int = 10
+START_WINDOW_SIZE: Union[float, int] = 0.1
 
 
 class OnlineTimeWarpingArzt(OnlineAlignment):
@@ -98,9 +100,9 @@ def __init__(
         current_position: int = 0,
         frame_rate: int = FRAME_RATE,
         queue: Optional[RECVQueue] = None,
-        state_to_ref_time_map = None,
-        ref_to_state_time_map = None,
-        state_space = None,
+        state_to_ref_time_map=None,
+        ref_to_state_time_map=None,
+        state_space=None,
         **kwargs,
     ) -> None:
         super().__init__(reference_features=reference_features)
@@ -154,7 +156,7 @@ def __init__(
 
         self.N_ref: int = self.reference_features.shape[0]
         self.frame_rate = frame_rate
-        self.window_size: int = window_size * self.frame_rate
+        self.window_size: int = int(np.round(window_size * self.frame_rate))
         self.step_size: int = step_size
         self.start_window_size: int = int(np.round(start_window_size * frame_rate))
         self.init_position: int = current_position
@@ -178,12 +180,22 @@ def __init__(
         }
         self.state_to_ref_time_map = state_to_ref_time_map
         self.ref_to_state_time_map = ref_to_state_time_map
-        self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"])
+        self.state_space = state_space
+        self._ref_frame_to_beat: Optional[NDArray[np.float32]] = kwargs.get(
+            "ref_frame_to_beat", None
+        )
+
+    @property
+    def current_beat(self) -> float:
+        """Current score position in beats."""
+        if self._ref_frame_to_beat is not None:
+            idx = min(self.current_position, len(self._ref_frame_to_beat) - 1)
+            return float(self._ref_frame_to_beat[idx])
+        return float(self.current_position)
 
     @property
-    def warping_path(self) -> NDArray[np.int32]:
-        wp = (np.array(self._warping_path).T).astype(np.int32)
-        return wp
+    def warping_path(self) -> NDArray[np.float32]:
+        return np.array(self._warping_path).T
 
     def __call__(self, input: NDArray[np.float32]) -> int:
         self.step(input)
@@ -211,10 +223,18 @@ def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]
         self.reset()
 
         if verbose:
-            pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
+            pbar = progressbar.ProgressBar(
+                max_value=len(self.state_space),
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
+            pbar.start()
 
         while self.is_still_following():
-            features, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
+            item = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if item is STREAM_END:
+                break
+            features, f_time = item
             self.last_queue_update = time.time()
             self.input_features = (
                 np.concatenate((self.input_features, features))
@@ -224,13 +244,13 @@ def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]
             self.step(features)
 
             if verbose:
-                pbar.update(int(self.current_position))
+                pbar.update(int(np.searchsorted(self.state_space, self.current_beat)))
 
             latency = time.time() - self.last_queue_update
             self.latency_stats = set_latency_stats(
                 latency, self.latency_stats, self.input_index
             )
-            yield self.current_position
+            yield self.current_beat
 
         if verbose:
             pbar.finish()
@@ -289,23 +309,17 @@ def step(self, input_features: NDArray[np.float32]) -> None:
             min_index=min_index,
         )
 
-        # adapt current_position: do not go backwards,
-        # but also go a maximum of N steps forward
-
-        if self.input_index == 0:
-            # enforce the first time step to stay at the
-            # initial position
-            self.current_position = min( # TODO: Is this necessary?
-                max(self.current_position, min_index),
-                self.current_position,
-            )
-        else:
-            self.current_position = min(
-                max(self.current_position, min_index),
-                self.current_position + self.step_size,
+        # Clamp new position: no backwards, max step_size forward per frame
+        if self.input_index > 0:
+            self.current_position = int(
+                np.clip(
+                    min_index,
+                    self.current_position,
+                    self.current_position + self.step_size,
+                )
             )
 
-        self._warping_path.append((self.current_position, self.input_index))
+        self._warping_path.append((self.current_beat, self.input_index))
         # update input index
         self.input_index += 1
 
diff --git a/matchmaker/dp/oltw_dixon.py b/matchmaker/dp/oltw_dixon.py
index 8c55a05..11e46eb 100644
--- a/matchmaker/dp/oltw_dixon.py
+++ b/matchmaker/dp/oltw_dixon.py
@@ -14,8 +14,10 @@
 from numpy.typing import NDArray
 
 from matchmaker.base import OnlineAlignment
-from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
+from matchmaker.features.audio import FRAME_RATE
+from matchmaker.io.audio import QUEUE_TIMEOUT
 from matchmaker.utils.misc import set_latency_stats
+from matchmaker.utils.stream import STREAM_END
 
 
 class Direction(IntEnum):
@@ -74,9 +76,9 @@ def __init__(
         max_run_count=MAX_RUN_COUNT,
         frame_per_seg=FRAME_PER_SEG,
         frame_rate=FRAME_RATE,
-        state_to_ref_time_map = None,
-        ref_to_state_time_map = None,
-        state_space = None,
+        state_to_ref_time_map=None,
+        ref_to_state_time_map=None,
+        state_space=None,
         **kwargs,
     ):
         super().__init__(reference_features=reference_features)
@@ -90,6 +92,7 @@ def __init__(
         self.state_to_ref_time_map = state_to_ref_time_map
         self.ref_to_state_time_map = ref_to_state_time_map
         self.state_space = state_space
+        self._ref_frame_to_beat = kwargs.get("ref_frame_to_beat", None)
         self.reset()
 
     def reset(self):
@@ -114,6 +117,14 @@ def reset(self):
         }
         self._initialized = False
 
+    @property
+    def current_beat(self) -> float:
+        """Current score position in beats."""
+        if self._ref_frame_to_beat is not None:
+            idx = min(self.best_ref, len(self._ref_frame_to_beat) - 1)
+            return float(self._ref_frame_to_beat[idx])
+        return float(self.best_ref)
+
     @property
     def warping_path(self) -> NDArray[np.float32]:  # [shape=(2, T)]
         return self.wp
@@ -276,8 +287,8 @@ def get_expand_direction(self):
             return Direction.REF
 
     def save_history(self):
-        """Append current best alignment point to warping path."""
-        new_point = np.array([[self.best_ref], [self.best_input]])
+        """Append current best alignment point to warping path (beats, input_frame)."""
+        new_point = np.array([[self.current_beat], [self.best_input]])
         self.wp = np.concatenate((self.wp, new_point), axis=1)
 
     def __call__(self, input_features: NDArray[np.float32]) -> int:
@@ -363,21 +374,29 @@ def run(self, verbose=True):
         self.reset()
 
         if verbose:
-            pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
+            pbar = progressbar.ProgressBar(
+                max_value=len(self.state_space),
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
+            pbar.start()
 
         while self.is_still_following():
-            input_feature, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
+            item = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if item is STREAM_END:
+                break
+            input_feature, f_time = item
             self.last_queue_update = time.time()
             self.step(input_feature)
 
             if verbose:
-                pbar.update(int(self.current_position))
+                pbar.update(int(np.searchsorted(self.state_space, self.current_beat)))
 
             latency = time.time() - self.last_queue_update
             self.latency_stats = set_latency_stats(
                 latency, self.latency_stats, self.input_index
             )
-            yield self.current_position
+            yield self.current_beat
 
         if verbose:
             pbar.finish()
diff --git a/matchmaker/features/audio.py b/matchmaker/features/audio.py
index 3a9c27b..f68641d 100644
--- a/matchmaker/features/audio.py
+++ b/matchmaker/features/audio.py
@@ -20,7 +20,6 @@
 DCT_TYPE = 2
 NORM = np.inf
 FEATURES = "chroma"
-QUEUE_TIMEOUT = 1
 
 # Type hint for Input Audio frame.
 InputAudioSeries = np.ndarray
diff --git a/matchmaker/io/audio.py b/matchmaker/io/audio.py
index 043ecf9..ee04156 100644
--- a/matchmaker/io/audio.py
+++ b/matchmaker/io/audio.py
@@ -12,16 +12,21 @@
 import numpy as np
 import pyaudio
 
-from matchmaker.features.audio import HOP_LENGTH, SAMPLE_RATE, ChromagramProcessor
+from matchmaker.features.audio import (
+    HOP_LENGTH,
+    SAMPLE_RATE,
+    ChromagramProcessor,
+)
 from matchmaker.utils.audio import (
     get_audio_devices,
     get_default_input_device_index,
     get_device_index_from_name,
 )
 from matchmaker.utils.misc import RECVQueue, set_latency_stats
-from matchmaker.utils.stream import Stream
+from matchmaker.utils.stream import STREAM_END, Stream
 
 CHANNELS = 1
+QUEUE_TIMEOUT = 10
 
 
 class AudioStream(Stream):
@@ -53,7 +58,7 @@ def __init__(
         hop_length: int = HOP_LENGTH,
         queue: Optional[RECVQueue] = None,
         device_name_or_index: Optional[Union[str, int]] = None,
-        wait: bool = True,
+        wait: bool = False,
         target_sr: int = SAMPLE_RATE,
     ):
         if processor is None:
@@ -123,9 +128,14 @@ def __init__(
             "min_latency": float("inf"),
         }
         self.input_index = 0
+        self._preloaded_audio = None
 
         if self.mock:
             self.run = self.run_offline
+            # Pre-load and resample audio so the stream thread can start
+            # producing frames immediately (avoids queue-timeout race condition
+            # when librosa.load takes longer than QUEUE_TIMEOUT).
+            self._preload_audio()
         else:
             self.run = self.run_online
 
@@ -159,6 +169,8 @@ def _process_frame(
         # initial y
         target_audio = np.frombuffer(data, dtype=np.float32)
         self._process_feature(target_audio, time_info["input_buffer_adc_time"])
+        if not self.stream_start.is_set():
+            self.stream_start.set()
 
         return (data, pyaudio.paContinue)
 
@@ -225,6 +237,13 @@ def stop_listening(self) -> None:
             self.audio_interface.terminate()
         self.listen = False
 
+    def _preload_audio(self) -> None:
+        """Pre-load and resample audio file so run_offline can start immediately."""
+        audio_y, sr = librosa.load(self.file_path, sr=None)
+        if sr != self.target_sr:
+            audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
+        self._preloaded_audio = audio_y
+
     def run_offline(self) -> None:
         """Process audio file in offline mode.
 
@@ -240,12 +259,17 @@ def run_offline(self) -> None:
         self.start_listening()
         self.init_time = time.time()
 
-        audio_y, sr = librosa.load(self.file_path, sr=None)
-        if sr != self.target_sr:
-            audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
-            sr = self.target_sr
+        if self._preloaded_audio is not None:
+            audio_y = self._preloaded_audio
+            self._preloaded_audio = None  # free memory
+        else:
+            audio_y, sr = librosa.load(self.file_path, sr=None)
+            if sr != self.target_sr:
+                audio_y = librosa.resample(
+                    y=audio_y, orig_sr=sr, target_sr=self.target_sr
+                )
+        sr = self.target_sr
 
-        time_interval = self.hop_length / float(sr)
         # Pad to next hop_length boundary so no trailing samples are lost
         remainder = len(audio_y) % self.hop_length
         if remainder > 0:
@@ -253,6 +277,7 @@ def run_offline(self) -> None:
                 (audio_y, np.zeros(self.hop_length - remainder, dtype=np.float32))
             )
         trimmed_audio = audio_y
+        time_interval = self.hop_length / float(sr)
         # Do not stop early on digital silence (all-zeros tails).
         while trimmed_audio.size > 0:
             self.input_index += 1
@@ -260,11 +285,15 @@ def run_offline(self) -> None:
             target_audio = trimmed_audio[: self.hop_length]
             self._process_feature(target_audio, self.last_data_received)
             trimmed_audio = trimmed_audio[self.hop_length :]
-            elapsed_time = time.time() - self.last_data_received
+
+            if not self.stream_start.is_set():
+                self.stream_start.set()
 
             if self.wait:
+                elapsed_time = time.time() - self.last_data_received
                 time.sleep(max(time_interval - elapsed_time, 0))
 
+        self.queue.put(STREAM_END)
         self.stop_listening()
 
     def run_online(self) -> None:
diff --git a/matchmaker/io/midi.py b/matchmaker/io/midi.py
index cfdca59..489fec8 100644
--- a/matchmaker/io/midi.py
+++ b/matchmaker/io/midi.py
@@ -139,6 +139,8 @@ def _process_frame_message(
             self.queue.put(((data, c_time), output))
         else:
             self.queue.put(output)
+        if not self.stream_start.is_set():
+            self.stream_start.set()
 
     def _process_frame_window(
         self,
diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index 776e081..8f772f8 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -1,13 +1,15 @@
 import os
 import sys
+import time
 from pathlib import Path
 from typing import Optional, Union
 
 import numpy as np
 import partitura
+import scipy.interpolate
 from partitura.io.exportmidi import get_ppq
-from partitura.score import Part, merge_parts
 from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment
+from partitura.score import Part, merge_parts
 
 from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon
 from matchmaker.features.audio import (
@@ -39,8 +41,7 @@
     TOLERANCES_IN_BEATS,
     TOLERANCES_IN_MILLISECONDS,
     get_evaluation_results,
-    transfer_from_perf_to_predicted_score,
-    transfer_from_score_to_predicted_perf,
+    transfer_positions,
 )
 from matchmaker.utils.misc import (
     adjust_tempo_for_performance_file,
@@ -52,39 +53,31 @@
 )
 from matchmaker.utils.tempo_models import KalmanTempoModel
 
+PathLike = Union[str, bytes, os.PathLike]
 sys.setrecursionlimit(10_000)
 
-PathLike = Union[str, bytes, os.PathLike]
 DEFAULT_TEMPO = 120
-
-
-DEFAULT_DISTANCE_FUNCS = {
-    "arzt": OnlineTimeWarpingArzt.DEFAULT_DISTANCE_FUNC,
-    "dixon": OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC,
-    "hmm": None,
-    "outerhmm": None,
-    "audio_outerhmm": None,
-    "pthmm": None,
-}
-
 DEFAULT_METHODS = {
     "audio": "arzt",
     "midi": "outerhmm",
 }
-
 AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm", "audio_outerhmm"]
 KWARGS = {
     "audio": {
         "dixon": {
+            "feature_type": "lse",
             "window_size": 10,
         },
         "arzt": {
-            "window_size": 5,
-            "start_window_size": 0.25,
-            "step_size" : 5,},
+            "window_size": 10,
+            "start_window_size": 0.1,
+            "step_size": 3,
+        },
         "audio_outerhmm": {
+            "feature_type": "cqt_spectral_flux",
             "sample_rate": 16000,
-            "frame_rate": 50,
+            "frame_rate": 25,
+            "s_j": 0.0,
         },
     },
     "midi": {
@@ -151,39 +144,37 @@ def __init__(
         self,
         score_file: PathLike,
         performance_file: Union[PathLike, None] = None,
-        wait: bool = True,  # only for offline option. For debugging or fast testing, set to False
-        input_type: str = "audio",  # 'audio' or 'midi'
-        feature_type: str = None,
+        input_type: str = "audio",
         method: str = None,
-        distance_func: Optional[str] = None,
+        *,
+        feature_type: str = None,
         device_name_or_index: Union[str, int] = None,
+        tempo: Optional[float] = None,
         sample_rate: int = SAMPLE_RATE,
         frame_rate: int = FRAME_RATE,
-        tempo: Optional[float] = None,
-        kwargs=KWARGS,
-        unfold_score=True,
         auto_adjust_tempo: bool = False,
+        wait: bool = False,
+        unfold_score=True,
+        kwargs=KWARGS,
     ):
         self.score_file = str(score_file)
         self.performance_file = (
             str(performance_file) if performance_file is not None else None
         )
 
-        # if input_type not in ("audio", "midi"):
-        #     raise ValueError(f"Invalid input_type {input_type}")
         self.input_type = input_type
         self.feature_type = feature_type
         self.frame_rate = frame_rate if input_type == "audio" else 1
         self.sample_rate = sample_rate
         self.hop_length = sample_rate // self.frame_rate
         self.score_part: Optional[Part] = None
-        self.distance_func = distance_func
         self.device_name_or_index = device_name_or_index
         self.processor = None
         self.stream = None
         self.score_follower = None
         self.reference_features = None
         self._has_run = False
+        self.alignment_duration = None
 
         # validate method first
         if method is None:
@@ -192,7 +183,7 @@ def __init__(
             raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}")
 
         self.method = method
-        self.config = kwargs[self.input_type][self.method]
+        self.config = dict(kwargs[self.input_type][self.method])
         self.auto_adjust_tempo = auto_adjust_tempo
 
         # Apply method-specific defaults from config (only if not explicitly provided by caller)
@@ -213,8 +204,22 @@ def __init__(
                 score = partitura.load_score(self.score_file)
 
             if unfold_score:
-                score = partitura.score.unfold_part_maximal(score, ignore_leaps=False)
-            self.score_part = merge_parts(score.parts)
+                try:
+                    # Ensure recursion limit is high enough for deepcopy of
+                    # complex scores. External libraries (e.g. madmom) may
+                    # lower it during processing.
+                    _prev_limit = sys.getrecursionlimit()
+                    sys.setrecursionlimit(max(_prev_limit, 10_000))
+                    unfolded = partitura.score.unfold_part_maximal(
+                        score, ignore_leaps=False
+                    )
+                    self.score_part = merge_parts(unfolded.parts)
+                    sys.setrecursionlimit(_prev_limit)
+                except Exception:
+                    sys.setrecursionlimit(max(sys.getrecursionlimit(), 10_000))
+                    self.score_part = merge_parts(score.parts)
+            else:
+                self.score_part = merge_parts(score.parts)
         except Exception as e:
             raise ValueError(f"Invalid score file: {e}")
 
@@ -229,12 +234,8 @@ def __init__(
 
         # setup feature processor
         if self.feature_type is None:
-            if input_type == "audio":
-                self.feature_type = (
-                    "cqt_spectral_flux" if method == "audio_outerhmm" else "chroma"
-                )
-            else:
-                self.feature_type = "pitch_ioi"
+            default = "chroma" if input_type == "audio" else "pitch_ioi"
+            self.feature_type = self.config.get("feature_type", default)
 
         if self.feature_type == "chroma":
             self.processor = ChromagramProcessor(
@@ -244,18 +245,22 @@ def __init__(
         elif self.feature_type == "mfcc":
             self.processor = MFCCProcessor(
                 sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
         elif self.feature_type == "cqt":
             self.processor = CQTProcessor(
                 sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
         elif self.feature_type == "mel":
             self.processor = MelSpectrogramProcessor(
                 sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
         elif self.feature_type == "lse":
             self.processor = LogSpectralEnergyProcessor(
                 sample_rate=self.sample_rate,
+                hop_length=self.hop_length,
             )
         elif self.feature_type == "pitch_ioi":
             self.processor = PitchIOIProcessor(piano_range=self.config["piano_range"])
@@ -281,9 +286,6 @@ def __init__(
                     f"Invalid performance file. Expected MIDI file, but got {self.performance_file}"
                 )
 
-        # setup distance function
-        if distance_func is None:
-            distance_func = DEFAULT_DISTANCE_FUNCS[self.method]
         # setup stream device
 
         if self.input_type == "audio":
@@ -308,67 +310,69 @@ def __init__(
 
         self.reference_features = self.preprocess_score()
 
-        if distance_func is None:
-            distance_func = DEFAULT_DISTANCE_FUNCS[method]
-
         if method == "arzt":
-            state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
+            try:
+                state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
+            except Exception:
+                state_to_ref_time_map, ref_to_state_time_map = None, None
             self.score_follower = OnlineTimeWarpingArzt(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
-                distance_func=distance_func,
                 frame_rate=self.frame_rate,
-                window_size=self.config["window_size"],
-                start_window_size=self.config["start_window_size"],
                 state_to_ref_time_map=state_to_ref_time_map,
                 ref_to_state_time_map=ref_to_state_time_map,
-                step_size=self.config["step_size"],
-                state_space=np.unique(self.score_part.note_array()["onset_beat"])
+                state_space=np.unique(self.score_part.note_array()["onset_beat"]),
+                ref_frame_to_beat=self._build_ref_frame_to_beat(),
+                **self.config,
             )
         elif method == "dixon":
-            state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
+            try:
+                state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
+            except Exception:
+                state_to_ref_time_map, ref_to_state_time_map = None, None
             self.score_follower = OnlineTimeWarpingDixon(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
-                distance_func=distance_func,
                 frame_rate=self.frame_rate,
-                window_size=self.config["window_size"],
                 state_to_ref_time_map=state_to_ref_time_map,
                 ref_to_state_time_map=ref_to_state_time_map,
-                state_space=np.unique(self.score_part.note_array()["onset_beat"])
+                state_space=np.unique(self.score_part.note_array()["onset_beat"]),
+                ref_frame_to_beat=self._build_ref_frame_to_beat(),
+                **self.config,
             )
         elif method == "hmm" and self.input_type == "midi":
             self.score_follower = PitchIOIHMM(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
-                tempo_model=self.config["tempo_model"],
                 has_insertions=True,
-                piano_range=self.config["piano_range"],
+                **self.config,
             )
         elif method == "pthmm" and self.input_type == "audio":
             self.score_follower = GaussianAudioPitchTempoHMM(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
+                **self.config,
             )
         elif method == "audio_outerhmm" and self.input_type == "audio":
             self.score_follower = AudioOuterProductHMM(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
                 tempo=self.tempo,
-                sample_rate=self.sample_rate,
                 hop_length=self.hop_length,
+                **self.config,
             )
         elif method == "pthmm" and self.input_type == "midi":
             self.score_follower = PitchHMM(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
                 has_insertions=True,
-                piano_range=self.config["piano_range"],
+                **self.config,
             )
         elif method == "outerhmm" and self.input_type == "midi":
             self.score_follower = OuterProductHMM(
                 reference_features=self.reference_features,
                 queue=self.stream.queue,
+                **self.config,
             )
         else:
             raise ValueError("Invalid method")
@@ -381,7 +385,9 @@ def preprocess_score(self):
             )
 
         if self.method in {"arzt", "dixon"}:
-            self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo)
+            self.ppart = partitura.utils.music.performance_from_part(
+                self.score_part, bpm=self.tempo
+            )
             self.ppart.sustain_pedal_threshold = 127
             if self.input_type == "audio":
                 self.score_audio = generate_score_audio(
@@ -406,22 +412,27 @@ def preprocess_score(self):
                 return reference_features
         else:
             return self.score_part.note_array()
-    
+
     def get_time_maps(self):
-        alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]]
-        return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment)
+        sna = self.score_part.note_array()
+        pna = self.ppart.note_array()
+        note_ids = sna["id"]
+        # If note IDs are missing, use index-based IDs
+        if len(set(note_ids)) <= 1:
+            synth_ids = [f"n{i}" for i in range(len(sna))]
+            sna = sna.copy()
+            sna["id"] = synth_ids
+            pna = pna.copy()
+            pna["id"] = synth_ids[: len(pna)]
+            note_ids = synth_ids
+        alignment = [
+            {"label": "match", "score_id": nid, "performance_id": nid}
+            for nid in note_ids
+        ]
+        return get_time_maps_from_alignment(pna, sna, alignment)
 
     def _convert_frame_to_beat(self, current_frame: int) -> float:
-        """
-        Convert frame number to relative beat position in the score.
-
-        Parameters
-        ----------
-        frame_rate : int
-            Frame rate of the audio stream
-        current_frame : int
-            Current frame number
-        """
+        """Convert frame number to beat position in the score."""
         tick = get_ppq(self.score_part)
         timeline_time = (current_frame / self.frame_rate) * tick * (self.tempo / 60)
         beat_position = np.round(
@@ -430,6 +441,13 @@ def _convert_frame_to_beat(self, current_frame: int) -> float:
         )
         return beat_position
 
+    def _build_ref_frame_to_beat(self) -> np.ndarray:
+        """Precompute beat position for each reference feature frame."""
+        n_ref = self.reference_features.shape[0]
+        return np.array(
+            [self._convert_frame_to_beat(i) for i in range(n_ref)],
+        )
+
     def build_score_annotations(
         self,
         level="beat",
@@ -533,37 +551,45 @@ def run_evaluation(
         self,
         perf_annotations: Union[PathLike, np.ndarray],
         level: str = "note",
-        tolerances: list = TOLERANCES_IN_MILLISECONDS,
-        musical_beat: bool = False,  # beat annots are difference in some dataset
+        tolerances: list = None,
+        musical_beat: bool = False,
         debug: bool = False,
         save_dir: PathLike = None,
         run_name: str = None,
-        domain: str = "performance",  # "score" or "performance"
+        domain: str = "score",
+        plot_dist_matrix: bool = True,
     ) -> dict:
         """
-        Evaluate the score following process
+        Evaluate the score following process.
+
+        When domain="score" (default), returns beat-based metrics as primary
+        and ms-based metrics under "ms" key. When domain="performance",
+        returns ms-based metrics only (legacy behavior).
 
         Parameters
         ----------
         perf_annotations : PathLike or np.ndarray
-            Path to the performance annotations file (tab-separated),
-            or numpy array of annotation times in seconds.
+            Path to the performance annotations file or numpy array of onset times (seconds).
         level : str
-            Level of annotations to use: bar, beat or note
-        tolerance : list
-            Tolerances to use for evaluation (in milliseconds)
+            Annotation level: "beat" or "note"
+        tolerances : list or None
+            Tolerances for evaluation. If None, uses default for the domain.
+        musical_beat : bool
+            Whether to use musical beat
         debug : bool
-            Whether to save the score and performance audio with beat annotations
+            Whether to save debug outputs
         domain : str
-            Evaluation domain, either "score" or "performance".
-            "score" domain evaluates in beat unit, "performance" domain evaluates in second unit. (Default: "performance")
+            "score" (default, beat-based primary) or "performance" (ms-based, legacy)
 
         Returns
         -------
         dict
-            Evaluation results with mean, median, std, skewness, kurtosis, and
-            accuracy for each tolerance
+            Evaluation results. If domain="score", includes both beat and ms metrics.
         """
+        if tolerances is None:
+            tolerances = (
+                TOLERANCES_IN_BEATS if domain == "score" else TOLERANCES_IN_MILLISECONDS
+            )
         if not self._has_run:
             raise ValueError("Must call run() before evaluation")
 
@@ -577,65 +603,101 @@ def run_evaluation(
 
         original_perf_annots_counts = len(perf_annots)
 
-        min_length = min(len(score_annots), len(perf_annots))
-        score_annots = score_annots[:min_length]
-        perf_annots = perf_annots[:min_length]
+        # min_length = min(len(score_annots), len(perf_annots))
+        # score_annots = score_annots[:min_length]
+        # perf_annots = perf_annots[:min_length]
 
-        mode = (
-            "state"
-            if (self.input_type == "midi" or self.method == "audio_outerhmm")
-            else "frame"
+        wp = self.score_follower.warping_path
+        score_annots_beats = self.build_score_annotations(
+            level, musical_beat, return_type="beats"
         )
-        perf_annots_predicted = transfer_from_score_to_predicted_perf(
-            self.score_follower.warping_path,
-            score_annots,
-            frame_rate=self.frame_rate,
-            mode=mode,
+
+        # --- Per-frame evaluation ---
+        # Build GT interpolator: score beat → perf time (seconds)
+        valid_gt = np.isfinite(perf_annots)
+        gt_interp = scipy.interpolate.interp1d(
+            score_annots_beats[valid_gt],
+            perf_annots[valid_gt],
+            bounds_error=False,
+            fill_value=np.nan,
         )
 
-        score_annots_predicted = transfer_from_perf_to_predicted_score(
-            self.score_follower.warping_path,
-            perf_annots,
+        wp_score = wp[0].astype(float)
+        wp_perf = wp[1].astype(float)
+
+        # Convert wp perf axis to seconds
+        if self.input_type == "midi":
+            # MIDI: wp_perf is IOI-accumulated from 0; shift by first note onset
+            _perf = partitura.load_performance_midi(self.performance_file)
+            midi_offset = float(_perf.note_array()["onset_sec"].min())
+            wp_perf_sec = wp_perf + midi_offset
+        else:
+            # Audio: wp_perf is frame index
+            wp_perf_sec = wp_perf / self.frame_rate
+
+        # For each wp entry: GT perf time for predicted beat vs actual perf time
+        gt_perf_times = gt_interp(wp_score)
+        perf_annots_predicted = transfer_positions(
+            wp,
+            score_annots_beats,
             frame_rate=self.frame_rate,
-            mode=mode,
+            domain="performance",
         )
-        score_annots = score_annots[: len(score_annots_predicted)]
 
-        if original_perf_annots_counts != len(perf_annots_predicted):
-            print(
-                f"Length of the annotation changed: {original_perf_annots_counts} -> {len(perf_annots_predicted)}"
-            )
-
-        # Evaluation metrics
         if domain == "performance":
             eval_results = get_evaluation_results(
-                perf_annots,
-                perf_annots_predicted,
-                total_counts=original_perf_annots_counts,
+                gt_perf_times,
+                wp_perf_sec,
+                total_counts=len(wp_score),
                 tolerances=tolerances,
             )
         else:
-            score_annots_predicted = self.convert_timestamps_to_beats(
-                score_annots_predicted
+            # Score domain: beat-based (primary) + ms-based (secondary)
+            score_annots_predicted = transfer_positions(
+                wp, perf_annots, frame_rate=self.frame_rate, domain="score"
             )
-            if tolerances == TOLERANCES_IN_MILLISECONDS:
-                tolerances = TOLERANCES_IN_BEATS
-            eval_results = get_evaluation_results(
+            score_annots = score_annots[: len(score_annots_predicted)]
+            beat_tolerances = (
+                tolerances
+                if tolerances != TOLERANCES_IN_MILLISECONDS
+                else TOLERANCES_IN_BEATS
+            )
+            beat_results = get_evaluation_results(
                 score_annots,
                 score_annots_predicted,
                 total_counts=original_perf_annots_counts,
-                tolerances=tolerances,
+                tolerances=beat_tolerances,
                 in_seconds=False,
             )
+            ms_results = get_evaluation_results(
+                gt_perf_times,
+                wp_perf_sec,
+                total_counts=len(wp_score),
+                tolerances=TOLERANCES_IN_MILLISECONDS,
+            )
+            eval_results = {"beat": beat_results, "ms": ms_results}
+
+        # Real-Time Factor (domain-independent)
+        if self.alignment_duration is not None:
+            finite_perf = perf_annots[np.isfinite(perf_annots)]
+            if len(finite_perf) > 0:
+                perf_duration = float(np.max(finite_perf) - np.min(finite_perf))
+                if perf_duration > 0:
+                    eval_results["rtf"] = float(
+                        f"{self.alignment_duration / perf_duration:.4f}"
+                    )
+
         if self.input_type == "audio":
             latency_results = self.get_latency_stats()
             eval_results.update(latency_results)
 
         # Debug: save warping path TSV, results JSON, and plots
         if debug and save_dir is not None:
+            # For plot y-axis: use beats when wp[0] is in beats
+            debug_score_annots = score_annots_beats
             save_debug_results(
                 warping_path=self.score_follower.warping_path,
-                score_annots=score_annots,
+                score_annots=debug_score_annots,
                 perf_annots=perf_annots,
                 perf_annots_predicted=perf_annots_predicted,
                 eval_results=eval_results,
@@ -643,16 +705,36 @@ def run_evaluation(
                 save_dir=save_dir,
                 run_name=run_name or "results",
                 state_space=getattr(self.score_follower, "state_space", None),
-                ref_features=getattr(self.score_follower, "reference_features", None),
-                input_features=getattr(self.score_follower, "input_features", None),
-                distance_func=getattr(self.score_follower, "distance_func", None),
+                ref_features=(
+                    getattr(self.score_follower, "reference_features", None)
+                    if plot_dist_matrix
+                    else None
+                ),
+                input_features=(
+                    getattr(self.score_follower, "input_features", None)
+                    if plot_dist_matrix
+                    else None
+                ),
+                distance_func=(
+                    getattr(self.score_follower, "distance_func", None)
+                    if plot_dist_matrix
+                    else None
+                ),
+                ref_frame_to_beat=getattr(
+                    self.score_follower, "_ref_frame_to_beat", None
+                ),
             )
 
         return eval_results
 
     def run(self, verbose: bool = True, wait: bool = True):
         """
-        Run the score following process
+        Run the score following process.
+
+        Measures wall-clock time as ``alignment_duration`` (seconds),
+        which covers both feature extraction (producer thread) and
+        score following inference (main thread) running concurrently.
+        RTF is computed as ``alignment_duration / performance_duration``.
 
         Yields
         ------
@@ -665,12 +747,11 @@ def run(self, verbose: bool = True, wait: bool = True):
             Alignment results with warping path
         """
         with self.stream:
+            self.stream.stream_start.wait()
+            t0 = time.time()
             for current_position in self.score_follower.run(verbose=verbose):
-                if self.input_type == "audio" and self.method != "audio_outerhmm":
-                    position_in_beat = self._convert_frame_to_beat(current_position)
-                    yield position_in_beat
-                else:
-                    yield float(self.score_follower.state_space[current_position])
+                yield current_position
+        self.alignment_duration = time.time() - t0
 
         self._has_run = True
         return self.score_follower.warping_path
diff --git a/matchmaker/prob/hmm.py b/matchmaker/prob/hmm.py
index bfbaf4c..c6210e2 100644
--- a/matchmaker/prob/hmm.py
+++ b/matchmaker/prob/hmm.py
@@ -47,6 +47,7 @@
 DEFAULT_GUMBEL_AUDIO_SCALE = 0.05
 QUEUE_TIMEOUT = 10
 
+
 class BaseHMM(HiddenMarkovModel):
     """
     Base class for Hidden Markov Model alignment methods.
@@ -193,6 +194,7 @@ def __init__(
         initial_probabilities: Optional[np.ndarray] = None,
         has_insertions: bool = True,
         piano_range: bool = True,
+        **kwargs,
     ) -> None:
         """
         Initialize the object.
@@ -283,9 +285,9 @@ def __init__(
 
     def __call__(self, input, *args, **kwargs):
         frame_index = args[0] if args else None
-        
+
         pitch_obs = input
-            
+
         current_state = self.forward_algorithm_step(
             observation=pitch_obs,
             log_probabilities=False,
@@ -339,7 +341,7 @@ def _build_hmm_modules(
         transition_matrix = stable_transition_matrix(
             n_states=len(unique_onsets_s),
             dist=gumbel_l,
-            scale=1.0,#0.5,
+            scale=1.0,  # 0.5,
             inserted_states=inserted_states,
         )
         initial_probabilities = init_dist(
@@ -547,7 +549,9 @@ def gumbel_transition_matrix(  # TODO check works for audio (parameter)
                     np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
                 )
         else:
-            transition_matrix[i] = gumbel_l.pdf(np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale)
+            transition_matrix[i] = gumbel_l.pdf(
+                np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
+            )
 
     # Normalize transition matrix (so that it is a proper stochastic matrix):
     transition_matrix /= transition_matrix.sum(1, keepdims=True)
@@ -555,10 +559,11 @@ def gumbel_transition_matrix(  # TODO check works for audio (parameter)
     # Return the computed transition matrix:
     return transition_matrix
 
+
 def stable_transition_matrix(  # TODO check works for audio (parameter)
     n_states: int,
     mp_trans_state: int = 1,
-    dist = gumbel_l,
+    dist=gumbel_l,
     scale: float = 0.5,
     inserted_states: bool = False,
 ) -> NDArrayFloat:
@@ -606,7 +611,9 @@ def stable_transition_matrix(  # TODO check works for audio (parameter)
                     np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
                 )
         else:
-            transition_matrix[i] = dist.pdf(np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale)
+            transition_matrix[i] = dist.pdf(
+                np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
+            )
 
     # Normalize transition matrix (so that it is a proper stochastic matrix):
     transition_matrix /= transition_matrix.sum(1, keepdims=True)
@@ -614,6 +621,7 @@ def stable_transition_matrix(  # TODO check works for audio (parameter)
     # Return the computed transition matrix:
     return transition_matrix
 
+
 def init_dist(
     n_states: int,
     dist=gumbel_l,
@@ -784,6 +792,7 @@ def compute_discrete_pitch_profiles(
 
     return pitch_profiles
 
+
 # Old version, to be deprecated.
 def compute_discrete_pitch_profiles_old(
     chord_pitches: NDArrayFloat,
@@ -1131,7 +1140,6 @@ def __init__(
         self.states = np.arange(len(audio_features))
 
     def __call__(self, observation: NDArrayFloat) -> NDArrayFloat:
-
         pitch_obs, tempo_est = observation
 
         if self.current_state is None:
@@ -1152,7 +1160,6 @@ def __call__(self, observation: NDArrayFloat) -> NDArrayFloat:
 
         obs_prob = pitch_prob * tempo_prob
 
-
         return obs_prob
 
 
@@ -1201,7 +1208,6 @@ def __init__(
         self.states = np.arange(len(audio_features))
 
     def __call__(self, observation: NDArrayFloat) -> NDArrayFloat:
-
         pitch_obs, tempo_est = observation
 
         # ioi_idx = self.current_state if self.current_state is not None else 0
@@ -1298,6 +1304,7 @@ def __init__(self, pitch_profiles, ioi_matrix, ioi_precision):
             ioi_prob_args=ioi_prob_args,
         )
 
+
 class ACCPitchIOIObservationModel(ObservationModel):
     """
     Computes the probabilities that an observation was emitted, i.e. the
@@ -1489,6 +1496,7 @@ def __init__(
         initial_probabilities: Optional[np.ndarray] = None,
         has_insertions: bool = False,
         piano_range: bool = False,
+        **kwargs,
     ) -> None:
         """
         Initialize the object.
@@ -1656,17 +1664,17 @@ def _build_hmm_modules(
         self,
         piano_range: bool = False,
         inserted_states: bool = True,
-        observation_model = ACCPitchIOIObservationModel,
-        tempo_model = KalmanTempoModel,
+        observation_model=ACCPitchIOIObservationModel,
+        tempo_model=KalmanTempoModel,
     ):
         snote_array = self.reference_features
-        
+
         unique_sonsets = np.unique(snote_array["onset_beat"])
         unique_sonset_idxs = [
             np.where(snote_array["onset_beat"] == ui)[0] for ui in unique_sonsets
         ]
         chord_pitches = [snote_array["pitch"][uix] for uix in unique_sonset_idxs]
-        
+
         pitch_profiles = compute_discrete_pitch_profiles(
             chord_pitches=chord_pitches,
             piano_range=piano_range,
@@ -1676,7 +1684,7 @@ def _build_hmm_modules(
             unique_onsets=unique_sonsets,
             inserted_states=inserted_states,
         )
-        
+
         observation_model = observation_model(
             pitch_profiles=pitch_profiles,
             ioi_matrix=ioi_matrix,
@@ -1703,11 +1711,11 @@ def _build_hmm_modules(
                 init_score_onset=unique_sonsets.min(),
                 init_beat_period=60 / 100,
             )
-            
+
         transition_matrix = stable_transition_matrix(
             n_states=len(ioi_matrix[0]),
             dist=gumbel_l,
-            scale=1.0,#0.5,
+            scale=1.0,  # 0.5,
             inserted_states=inserted_states,
         )
         initial_probabilities = init_dist(
@@ -1738,14 +1746,14 @@ def run(self, verbose: bool = True):
             # TODO: check self.queue.get() format. maybe this should actually be a tuple
             try:
                 queue_input = self.queue.get(timeout=QUEUE_TIMEOUT)
-                #features, f_time = queue_input
-                #print(f'{features=}, {f_time=}')
+                # features, f_time = queue_input
+                # print(f'{features=}, {f_time=}')
             except:
                 break
-            #TODO: try MidiStream.return_midi_messages = True
+            # TODO: try MidiStream.return_midi_messages = True
 
             if queue_input is not None:
-                #print(f'pitch_ioi: {queue_input=}')
+                # print(f'pitch_ioi: {queue_input=}')
                 current_state = self.__call__(queue_input)
                 empty_counter = 0
                 if current_state == prev_state:
@@ -1755,7 +1763,6 @@ def run(self, verbose: bool = True):
                         break
                 else:
                     same_state_counter = 0
-                
 
                 if verbose:
                     pbar.update(int(current_state))
@@ -1987,6 +1994,7 @@ def __init__(
         initial_probabilities: Optional[np.ndarray] = None,
         state_space: Optional[NDArray] = None,
         patience: int = 200,
+        **kwargs,
     ) -> None:
         """
         Initialize the object.
@@ -2090,7 +2098,6 @@ def __init__(
         self.input_features = None
         self.distance_func = "Euclidean"
 
-
         BaseHMM.__init__(
             self,
             observation_model=observation_model,
diff --git a/matchmaker/prob/outer_product_hmm.py b/matchmaker/prob/outer_product_hmm.py
index adb3d57..fcdde1e 100644
--- a/matchmaker/prob/outer_product_hmm.py
+++ b/matchmaker/prob/outer_product_hmm.py
@@ -13,7 +13,6 @@
     viterbi_step_cy = None
 
 import numpy as np
-
 from partitura.score import Part, Score, ScoreLike
 
 NDArrayFloat = NDArray[np.float32]
@@ -216,6 +215,7 @@ def __init__(
         r: Optional[np.ndarray] = None,
         other_prob: float = 1e-6,
         patience: int = 10,
+        **kwargs,
     ) -> None:
         """
         Outer-product Hidden Markov Model for score following.
diff --git a/matchmaker/prob/outer_product_hmm_audio.py b/matchmaker/prob/outer_product_hmm_audio.py
index e6dc3b9..24bc8df 100644
--- a/matchmaker/prob/outer_product_hmm_audio.py
+++ b/matchmaker/prob/outer_product_hmm_audio.py
@@ -8,32 +8,27 @@
 from partitura.score import Part, Score, ScoreLike
 
 from matchmaker.base import OnlineAlignment
-from matchmaker.features.audio import QUEUE_TIMEOUT
+from matchmaker.io.audio import QUEUE_TIMEOUT
 from matchmaker.utils.misc import RECVQueue, set_latency_stats
+from matchmaker.utils.stream import STREAM_END
 
 NDArrayFloat = NDArray[np.float32]
 NDArrayInt = NDArray[np.int32]
 
-DEFAULT_PITCH_ERROR_PROBS = {
-    "correct_pitch_prob": 0.9497,
-    "semi_tone_error_prob": 0.0145 / 2.0,
-    "whole_tone_error_prob": 0.0224 / 2.0,
-    "octave_error_prob": 0.0047 / 2.0,
-    "within_one_octave_error_prob": 0.0086 / 9.0 / 2.0,
-}
-
-# DEFAULT_TRANSITIONS = [
-#     (1, 1.0),  # normal (i→i+1)
-#     (2, 1e-50),  # deletion (i→i+2), HHMMState_simple.hpp: log10(-50)
-# ]
+# Nakamura et al. 2016 Section IV-B experimental parameters.
+# Neighbourhood transitions a^{(nbh)}_{j,i} from nakamura_data.py:
+#   These are the "small transition probabilities" for the banded structure.
+#   Paper: a_{i,i} = 0 (self-transition handled by bottom HMM a00),
+#          a_{i,i+2} = 1e-50 (deletion, effectively 0).
+# We use the empirical values from [13] (Nakamura JNMR 2014).
 DEFAULT_TRANSITIONS = [
-    (-3, 0.001),
-    (-2, 0.001),
-    (-1, 0.002),
-    (0, 0.01342),
-    (1, 0.96),
-    (2, 0.01),
-    (3, 0.002),
+    (-3, 0.00509),
+    (-2, 0.00516),
+    (-1, 0.00886),
+    (0, 0.01342),  # insertion (staying at same top state)
+    (1, 0.94531),  # normal forward progression
+    (2, 0.00610),  # deletion (skip one note)
+    (3, 0.00073),
 ]
 
 DEFAULT_D1 = 3
@@ -43,8 +38,11 @@
 
 _FLUX_EXIT_BOOST: float = 1.0
 _OTHER_PROB: float = 1e-6
-_PAUSE_ENTRY_PROB: float = 0.01  # probability of entering pause state from sound
-_PAUSE_DURATION_SEC: float = 0.5
+# Paper IV-B:
+#   a_{0,1}^{(i)} = 1e-100  (pause entry: almost never enter pause)
+#   a_{1,1}^{(i)} = 0.999   (pause self-transition: once in pause, stay)
+_PAUSE_ENTRY_PROB: float = 1e-100
+_PAUSE_SELF_TRANSITION: float = 0.999
 _PAUSE_EMISSION_MAX: float = 1e-3
 
 
@@ -127,11 +125,13 @@ def __init__(
         reference_features: np.ndarray,
         queue: Optional[RECVQueue] = None,
         transitions: Optional[List[tuple[int, float]]] = None,
-        pitch_error_probs: Optional[dict[str, float]] = None,
         patience: int = 0,
         tempo: float = 120.0,
         sample_rate: int = 16000,
         hop_length: int = 320,
+        s_j: float = 1e-5,
+        r_i: Optional[np.ndarray] = None,
+        **kwargs,
     ) -> None:
         self.reference_features = reference_features
         OnlineAlignment.__init__(
@@ -173,16 +173,10 @@ def __init__(
         self.transitions = (
             transitions if transitions is not None else DEFAULT_TRANSITIONS
         )
-        self.pitch_error_probs = (
-            pitch_error_probs
-            if pitch_error_probs is not None
-            else DEFAULT_PITCH_ERROR_PROBS
-        )
         self.other_prob = _OTHER_PROB
         self.sample_rate = int(sample_rate)
         self.hop_length = int(hop_length)
         self.pause_entry_prob = _PAUSE_ENTRY_PROB
-        self.pause_duration_sec = _PAUSE_DURATION_SEC
         self.pause_emission_max = _PAUSE_EMISSION_MAX
 
         # Transition setup with banded structure
@@ -202,7 +196,23 @@ def __init__(
                 row_sums = self.alpha.sum(axis=1, keepdims=True)
             self.alpha = self.alpha / row_sums
 
-        self.current_state = 0
+        # Repeat/skip factorization (Nakamura Eq.11):
+        #   a_{j,i} = a^{(nbh)}_{j,i} + s_j * r_i
+        # s_j: probability of stopping at event j before a repeat/skip
+        # r_i: probability of resuming at event i after a repeat/skip
+        self.S = np.full(self.n_states, float(s_j), dtype=float)
+        if r_i is not None:
+            self.r = np.asarray(r_i, dtype=float)
+        else:
+            self.r = np.ones(self.n_states, dtype=float) / self.n_states
+
+        # Renormalize alpha to account for s_j mass:
+        # Σ_i a_{j,i} = Σ_i a^{(nbh)}_{j,i} + s_j * Σ_i r_i = 1
+        # => Σ_i a^{(nbh)}_{j,i} = 1 - s_j  (since Σ_i r_i = 1)
+        if s_j > 0:
+            self.alpha = self.alpha * (1.0 - self.S[:, None])
+
+        self.current_state_index = 0
         self._warping_path = []
         self._current_chord = np.zeros(88, dtype=int)
         self.patience = int(patience)
@@ -223,13 +233,8 @@ def __init__(
             tempo=tempo,
             frame_rate=frame_rate,
         )
-        self.a11 = float(
-            np.clip(
-                self._pause_self_transition_prob(self.pause_duration_sec, frame_rate),
-                0.0,
-                1.0,
-            )
-        )
+        # Paper IV-B: a_{1,1}^{(i)} = 0.999 (pause self-transition)
+        self.a11 = float(_PAUSE_SELF_TRANSITION)
         # Pause entry prob a01 (II-E)
         move_prob = 1.0 - self.a00
         p_pause = float(np.clip(self.pause_entry_prob, 0.0, 1.0))
@@ -243,9 +248,21 @@ def __init__(
         self.e0 = np.clip(1.0 - self.a00 - self.a01, 1e-10, 1.0)
         self.e1 = float(np.clip(1.0 - self.a11, 1e-10, 1.0))
 
+        # Precompute alpha diagonals and sliding window indices for vectorized forward_step
+        self._alpha_diags = []
+        for d in range(-self.D2, self.D1 + 1):
+            self._alpha_diags.append(np.diagonal(self.alpha, offset=-d).copy())
+        self._j_starts = np.maximum(0, np.arange(self.n_states) - self.D2)
+        self._j_ends = np.minimum(self.n_states, np.arange(self.n_states) + self.D1 + 1)
+
+    @property
+    def warping_path(self) -> np.ndarray:
+        return np.array(self._warping_path).T
+
     @property
-    def warping_path(self) -> NDArrayInt:
-        return (np.array(self._warping_path).T).astype(np.int32)
+    def current_position(self) -> float:
+        """Current score position in beats."""
+        return float(self.state_space[self.current_state_index])
 
     @staticmethod
     def _pause_self_transition_prob(
@@ -286,8 +303,8 @@ def _compute_chord_self_transition_probs(
         return np.clip(1.0 - 1.0 / d_i, 1e-6, 1.0 - 1e-6)
 
     def is_still_following(self) -> bool:
-        if self.current_state is not None:
-            return self.current_state <= self.n_states - 1
+        if self.current_state_index is not None:
+            return self.current_state_index <= self.n_states - 1
         return False
 
     def __call__(self, input, *args, **kwargs) -> Optional[int]:
@@ -321,10 +338,12 @@ def __call__(self, input, *args, **kwargs) -> Optional[int]:
         top_scores = probs[0::2] + probs[1::2]
         new_top = int(np.argmax(top_scores))
 
-        self.current_state = new_top
-        self._warping_path.append((self.current_state, self.input_index))
+        self.current_state_index = new_top
+        self._warping_path.append(
+            (float(self.state_space[self.current_state_index]), self.input_index)
+        )
         self.input_index += 1
-        return self.current_state
+        return self.current_state_index
 
     def compute_obs_likelihood(
         self,
@@ -390,14 +409,31 @@ def forward_step(
         prev_sound = np.asarray(prev_probs[0::2], dtype=float)
         prev_pause = np.asarray(prev_probs[1::2], dtype=float)
 
-        # Emission
-        emit_sound = self.compute_obs_likelihood(observation)
-        emit_pause_scalar = self._compute_pause_emission(observation)
+        # --- Single _preprocess_obs call + inline emission computation ---
+        obs = _preprocess_obs(observation)
+        cqt = np.maximum(obs[:88] if obs.size >= 88 else obs, 0.0)
+        cqt_sum = cqt.sum()
+
+        # Sound emission (from compute_obs_likelihood)
+        if cqt_sum <= 0:
+            emit_sound = np.full(N, 1e-300, dtype=float)
+        else:
+            cqt_norm = cqt / cqt_sum
+            em = self.chord_harmonic_mask @ cqt_norm
+            emit_sound = np.maximum(np.nan_to_num(em, nan=1e-12), 1e-12)
+
+        # Pause emission (from _compute_pause_emission)
+        if cqt_sum <= 0:
+            emit_pause_scalar = min(1.0, self.pause_emission_max)
+        else:
+            var = float(np.var(cqt / cqt_sum))
+            emit_pause_scalar = min(
+                max(1.0 / (1.0 + 200.0 * var), 1e-300), self.pause_emission_max
+            )
         emit_pause = np.full(N, emit_pause_scalar, dtype=float)
 
         # Spectral-flux-driven exit boost
-        obs_flat = _preprocess_obs(observation)
-        flux = float(obs_flat[88]) if obs_flat.size > 88 else 0.0
+        flux = float(obs[88]) if obs.size > 88 else 0.0
         f = flux / (flux + 1.0)  # [0,1)
         boost = 1.0 + _FLUX_EXIT_BOOST * f
         e0 = np.clip(self.e0 * boost, 1e-10, 1.0 - self.a01 - 1e-10)
@@ -406,18 +442,27 @@ def forward_step(
         # Exit masses from each top state j (Eq.(6))
         exit_mass = prev_sound * e0 + prev_pause * self.e1  # (N,)
 
-        # Compute neigh_sum_i for each i (banded, Eq.(9))
-        neigh_sum = np.zeros(N, dtype=float)
-        for i in range(N):
-            j_start = max(0, i - self.D2)
-            j_end = min(N, i + self.D1 + 1)
-            ssum = 0.0
-            for j in range(j_start, j_end):
-                a = float(self.alpha[j, i])
-                if a <= 0:
-                    continue
-                ssum += exit_mass[j] * a
-            neigh_sum[i] = ssum
+        # --- Vectorized neighbourhood sum (replaces O(N*(D1+D2)) Python loop) ---
+        # Global skip term: Σ_j exit_mass[j] * S[j]  (O(N), computed once)
+        global_skip_sum = float(np.dot(exit_mass, self.S))
+
+        # Local neighbourhood transition: sum over diagonals of alpha
+        local_nbh = np.zeros(N, dtype=float)
+        for k, d in enumerate(range(-self.D2, self.D1 + 1)):
+            diag = self._alpha_diags[k]
+            L = len(diag)
+            src = max(0, d)  # source index offset in exit_mass
+            dst = max(0, -d)  # destination index offset in local_nbh
+            local_nbh[dst : dst + L] += exit_mass[src : src + L] * diag
+
+        # Local skip via cumsum sliding window: O(N) instead of O(N*D)
+        eS = exit_mass * self.S
+        cumsum_eS = np.empty(N + 1, dtype=float)
+        cumsum_eS[0] = 0.0
+        np.cumsum(eS, out=cumsum_eS[1:])
+        local_skip = cumsum_eS[self._j_ends] - cumsum_eS[self._j_starts]
+
+        neigh_sum = local_nbh + self.r * (global_skip_sum - local_skip)
 
         # Within-top bottom transitions
         within_sound = prev_sound * a00
@@ -448,21 +493,27 @@ def run(
         same_state_counter = 0
         empty_counter = 0
         if verbose:
-            pbar = progressbar.ProgressBar(maxval=self.n_states)
+            pbar = progressbar.ProgressBar(
+                maxval=len(self.state_space),
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
             pbar.start()
 
         while self.is_still_following():
-            prev_state = self.current_state
+            prev_state = self.current_state_index
 
             try:
                 queue_input = self.queue.get(timeout=QUEUE_TIMEOUT)
             except Empty:
                 break
+            if queue_input is STREAM_END:
+                break
             self.last_queue_update = time.time()
             if queue_input is not None:
-                current_state = self(queue_input)
+                self(queue_input)
                 empty_counter = 0
-                if current_state == prev_state:
+                if self.current_state_index == prev_state:
                     if self.patience > 0:
                         if same_state_counter < self.patience:
                             same_state_counter += 1
@@ -472,13 +523,12 @@ def run(
                     same_state_counter = 0
 
                 if verbose:
-                    if current_state is not None:
-                        pbar.update(int(current_state) + 1)  # states starts with 0
+                    pbar.update(self.current_state_index)
                 latency = time.time() - self.last_queue_update
                 self.latency_stats = set_latency_stats(
                     latency, self.latency_stats, self.input_index
                 )
-                yield current_state
+                yield self.current_position
 
         if verbose:
             pbar.finish()
diff --git a/matchmaker/utils/eval.py b/matchmaker/utils/eval.py
index 20e5192..a88d7ac 100644
--- a/matchmaker/utils/eval.py
+++ b/matchmaker/utils/eval.py
@@ -1,5 +1,3 @@
-from typing import TypedDict, Union
-
 import numpy as np
 import scipy
 
@@ -11,159 +9,97 @@ def transfer_positions(
     wp,
     ref_anns,
     frame_rate,
-    reverse=False,
     *,
-    mode: str = "auto",
-    reducer: str = "min",
-    state_offset: Union[int, str] = "auto",
-    output: str = "seconds",
+    domain: str = "score",
+    aggregation_func=None,
 ):
     """
-    Transfer the positions of the reference annotations to the target annotations using the warping path.
-
-    This function supports two common warping-path conventions:
-
-    - **frame mode** (classic DTW-style): wp[0] and wp[1] are frame indices for reference/target features.
-    - **state mode** (HMM/score-state): wp[0] contains *reference state indices* and wp[1] contains *target frame indices*.
+    Transfer positions between score and performance using the warping path.
 
     Parameters
     ----------
     wp : np.array with shape (2, T)
-        array of warping path.
-        warping_path[0] is the index of the reference (score) feature and warping_path[1] is the index of the target(input) feature.
-    ref_ann : List[float]
-        In **frame mode**, reference annotations in seconds.
-        In **state mode**, a sequence whose length equals the number of reference states (e.g., score unique_onsets);
-        the values are not used except for determining the number of states.
+        Warping path. wp[0] = score beats, wp[1] = performance frame indices.
+    ref_anns : array-like
+        Query positions (seconds for domain="score",
+        beats for domain="performance").
     frame_rate : int
-        frame rate of the audio.
-    reverse : bool
-        If True, swap the direction (target -> reference).
-    mode : {"auto", "frame", "state"}
-        Warping-path convention. "auto" picks "state" when wp[0] looks like small discrete state indices.
-    reducer : {"min", "max", "median", "mean"}
-        In **state mode**, how to select a single representative target frame for each state when multiple wp entries
-        map to the same state.
-    state_offset : {"auto"} or int
-        In **state mode**, wp[0] may start at 0 or 1 (or have a leading start-state). "auto" chooses the offset that
-        best matches the expected number of states.
-    output : {"seconds", "frames"}
-        Return unit. "seconds" divides frames by frame_rate; "frames" returns frame indices.
+        Frame rate of the audio.
+    domain : {"score", "performance"}
+        Domain of the output.
+        "score": perf→score lookup. Given performance times (seconds),
+            return predicted score positions (beats).
+        "performance": score→perf lookup. Given score beats, return
+            predicted performance times (seconds).
+    aggregation_func : callable or None
+        Function to aggregate multiple values sharing the same key
+        (e.g., np.max, np.min, np.mean). If None, defaults to:
+        - domain="score": last entry in temporal order (tracker's
+          final decision at that frame)
+        - domain="performance": np.min (earliest arrival at that beat,
+          i.e. first-crossing rule)
 
     Returns
     -------
-    predicted_targets : np.array with shape (T,)
-        Predicted target positions (seconds or frames depending on output).
+    predicted : np.array
+        Predicted positions in the target domain.
     """
-    if output not in {"seconds", "frames"}:
-        raise ValueError(f"Invalid output={output!r}. Use 'seconds' or 'frames'.")
-
-    if reverse:
-        x, y = wp[1], wp[0]
-    else:
-        x, y = wp[0], wp[1]
-
-    if mode not in {"auto", "frame", "state"}:
-        raise ValueError(f"Invalid mode={mode!r}. Use 'auto', 'frame', or 'state'.")
-
-    # Heuristic: state paths have small discrete indices (often << target frames),
-    # while frame paths typically cover most reference frames (unique count is large).
-    if mode == "auto":
-        x_unique = np.unique(x)
-        n_ref = len(ref_anns)
-        looks_like_state = (x_unique.size <= max(4, 2 * n_ref)) and (
-            int(np.max(x)) <= max(10, 5 * n_ref)
-        )
-        mode = "state" if looks_like_state else "frame"
-
-    if mode == "frame":
-        # Causal nearest neighbor interpolation (reference seconds -> reference frames -> target frames)
-        ref_anns_frame = np.round(np.asarray(ref_anns) * frame_rate)
-        predicted_targets = np.ones(len(ref_anns_frame), dtype=float) * np.nan
-
-        for i, r in enumerate(ref_anns_frame):
-            # 1) Scan all x values less than or equal to r and find the largest x value
-            past_indices = np.where(x <= r)[0]
-            if past_indices.size > 0:
-                # Find indices corresponding to the largest x value
-                max_x_val = x[past_indices[-1]]
-                max_x_indices = np.where(x == max_x_val)[0]
-
-                # 2) Among all y values mapped to this x value, select the minimum y value
-                corresponding_y_values = y[max_x_indices]
-                predicted_targets[i] = float(np.min(corresponding_y_values))
-
-        if output == "frames":
-            return predicted_targets
-        return np.asarray(predicted_targets) / frame_rate
-
-    # mode == "state"
-    # Goal: for each reference state index, select representative target frame from wp.
-    num_states = len(ref_anns)
-    predicted_frames = np.ones(num_states, dtype=float) * np.nan
-
-    x_int = np.asarray(x, dtype=int)
-    y_int = np.asarray(y, dtype=int)
-
-    if reducer not in {"min", "max", "median", "mean"}:
-        raise ValueError(
-            f"Invalid reducer={reducer!r}. Use 'min', 'max', 'median', or 'mean'."
-        )
-
-    if state_offset == "auto":
-        # Choose offset that maximizes overlap between expected states and observed wp state indices.
-        observed = np.unique(x_int)
-        candidates = []
-        for off in (0, 1, int(np.min(x_int))):
-            if off not in candidates:
-                candidates.append(off)
-        best_off = candidates[0]
-        best_overlap = -1
-        for off in candidates:
-            expected = np.arange(off, off + num_states, dtype=int)
-            overlap = np.intersect1d(observed, expected).size
-            if overlap > best_overlap:
-                best_overlap = overlap
-                best_off = off
-        offset = best_off
+    if domain not in {"score", "performance"}:
+        raise ValueError(f"Invalid domain={domain!r}. Use 'score' or 'performance'.")
+
+    wp_score = wp[0].astype(float)
+    wp_perf = wp[1].astype(float)
+    queries = np.asarray(ref_anns, dtype=float)
+
+    def _last(arr):
+        return arr[-1]
+
+    if aggregation_func is None:
+        aggregation_func = _last if domain == "score" else np.min
+
+    if domain == "score":
+        # Perf → Score: "at perf time t, what is the tracker's score position?"
+        # Group by perf frame, take the last entry by default (tracker's final decision).
+        query_frames = queries * frame_rate
+
+        sort_idx = np.argsort(wp_perf, kind="stable")
+        wp_perf_sorted = wp_perf[sort_idx]
+        wp_score_sorted = wp_score[sort_idx]
+
+        unique_frames, first_idx = np.unique(wp_perf_sorted, return_index=True)
+        reduced_scores = np.empty(len(unique_frames))
+        for g in range(len(unique_frames)):
+            start = first_idx[g]
+            end = (
+                first_idx[g + 1] if g + 1 < len(unique_frames) else len(wp_score_sorted)
+            )
+            reduced_scores[g] = aggregation_func(wp_score_sorted[start:end])
+
+        # unique_frames is monotonic → searchsorted for last frame ≤ query
+        indices = np.searchsorted(unique_frames, query_frames, side="right") - 1
+        predicted = np.full(len(queries), np.nan)
+        valid = indices >= 0
+        predicted[valid] = reduced_scores[indices[valid]]
+        return predicted
     else:
-        offset = int(state_offset)
-
-    for s in range(num_states):
-        wp_state = s + offset
-        idx = np.where(x_int == wp_state)[0]
-        if idx.size == 0:
-            continue
-        vals = y_int[idx].astype(float)
-        if reducer == "min":
-            predicted_frames[s] = float(np.min(vals))
-        elif reducer == "max":
-            predicted_frames[s] = float(np.max(vals))
-        elif reducer == "median":
-            predicted_frames[s] = float(np.median(vals))
-        else:  # mean
-            predicted_frames[s] = float(np.mean(vals))
-
-    if output == "frames":
-        return predicted_frames
-    return predicted_frames / frame_rate
-
-
-def transfer_from_score_to_predicted_perf(wp, score_annots, frame_rate, mode="auto"):
-    predicted_perf_idx = transfer_positions(
-        wp,
-        score_annots,
-        frame_rate,
-        mode=mode,
-    )
-    return predicted_perf_idx
-
-
-def transfer_from_perf_to_predicted_score(wp, perf_annots, frame_rate, mode="auto"):
-    predicted_score_idx = transfer_positions(
-        wp, perf_annots, frame_rate, reverse=True, mode=mode
-    )
-    return predicted_score_idx
+        # Score → Perf: "when did the tracker first reach beat b?"
+        # Group by score position, aggregate perf frame values per group.
+        sort_idx = np.argsort(wp_score, kind="stable")
+        wp_score_sorted = wp_score[sort_idx]
+        wp_perf_sorted = wp_perf[sort_idx]
+
+        unique_beats, first_idx = np.unique(wp_score_sorted, return_index=True)
+        reduced_perf = np.empty(len(unique_beats))
+        for g in range(len(unique_beats)):
+            start = first_idx[g]
+            end = first_idx[g + 1] if g + 1 < len(unique_beats) else len(wp_perf_sorted)
+            reduced_perf[g] = aggregation_func(wp_perf_sorted[start:end])
+
+        indices = np.searchsorted(unique_beats, queries, side="left")
+        predicted = np.full(len(queries), np.nan)
+        valid = indices < len(unique_beats)
+        predicted[valid] = reduced_perf[indices[valid]]
+        return predicted / frame_rate
 
 
 def get_evaluation_results(
@@ -171,23 +107,25 @@ def get_evaluation_results(
     predicted_annots,
     total_counts,
     tolerances=TOLERANCES_IN_MILLISECONDS,
-    pcr_threshold=2_000,  # 2 seconds
     in_seconds=True,
 ):
     if in_seconds:
-        errors_in_delay = (gt_annots - predicted_annots) * 1000  # in milliseconds
+        errors_in_delay = (gt_annots - predicted_annots) * 1000
     else:
         errors_in_delay = gt_annots - predicted_annots
 
-    filtered_errors_in_delay = errors_in_delay[np.abs(errors_in_delay) <= pcr_threshold]
-    filtered_abs_errors_in_delay = np.abs(filtered_errors_in_delay)
+    abs_errors_in_delay = np.abs(errors_in_delay)
 
     results = {
-        "mean": float(f"{np.nanmean(filtered_abs_errors_in_delay):.4f}"),
-        "median": float(f"{np.nanmedian(filtered_abs_errors_in_delay):.4f}"),
-        "std": float(f"{np.nanstd(filtered_abs_errors_in_delay):.4f}"),
-        "skewness": float(f"{scipy.stats.skew(filtered_errors_in_delay):.4f}"),
-        "kurtosis": float(f"{scipy.stats.kurtosis(filtered_errors_in_delay):.4f}"),
+        "mean": float(f"{np.nanmean(abs_errors_in_delay):.4f}"),
+        "median": float(f"{np.nanmedian(abs_errors_in_delay):.4f}"),
+        "std": float(f"{np.nanstd(abs_errors_in_delay):.4f}"),
+        "skewness": float(
+            f"{scipy.stats.skew(errors_in_delay, nan_policy='omit'):.4f}"
+        ),
+        "kurtosis": float(
+            f"{scipy.stats.kurtosis(errors_in_delay, nan_policy='omit'):.4f}"
+        ),
     }
 
     if in_seconds:
@@ -201,6 +139,4 @@ def get_evaluation_results(
                 f"{np.sum(np.abs(errors_in_delay) <= tau) / total_counts:.4f}"
             )
 
-    results["pcr"] = float(f"{len(filtered_errors_in_delay) / total_counts:.4f}")
-    results["count"] = len(filtered_abs_errors_in_delay)
     return results
diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py
index ce58269..df6c415 100644
--- a/matchmaker/utils/misc.py
+++ b/matchmaker/utils/misc.py
@@ -431,7 +431,7 @@ def adjust_tempo_for_performance_file(
 ):
     """
     Adjust the tempo of the score part to match the performance file.
-    We round up the tempo to the nearest 20 bpm to avoid too much optimization.
+    We round the tempo to the nearest 10 bpm to avoid too much optimization.
 
     Parameters
     ----------
@@ -449,9 +449,7 @@ def adjust_tempo_for_performance_file(
     else:
         target_length = librosa.get_duration(path=str(performance_file))
     ratio = target_length / source_length
-    rounded_tempo = int(
-        (default_tempo / ratio + 19) // 20 * 20
-    )  # round up to nearest 20
+    rounded_tempo = int(round(default_tempo / ratio / 10) * 10)  # round to nearest 10
     print(
         f"default tempo: {default_tempo} (score length: {source_length}) -> adjusted_tempo: {rounded_tempo} (perf length: {target_length})"
     )
@@ -521,6 +519,15 @@ def save_nparray_to_csv(array: NDArray, save_path: str):
         writer.writerows(array)
 
 
+def _beats_to_frames(
+    beats: np.ndarray,
+    ref_frame_to_beat: np.ndarray,
+) -> np.ndarray:
+    """Convert beat positions to (float) frame indices via inverse interpolation."""
+    frames = np.arange(len(ref_frame_to_beat), dtype=float)
+    return np.interp(beats, ref_frame_to_beat, frames)
+
+
 def plot_alignment(
     warping_path: np.ndarray,
     perf_annots: np.ndarray,
@@ -533,92 +540,91 @@ def plot_alignment(
     ref_features: Optional[np.ndarray] = None,
     input_features: Optional[np.ndarray] = None,
     distance_func=None,
+    ref_frame_to_beat: Optional[np.ndarray] = None,
 ):
-    """Plot warping path, GT annotations, and predicted points in one figure.
-
-    Layers (back to front): distance matrix → warping path → predicted → GT.
-    """
+    """Plot warping path, GT annotations, and predicted points."""
     save_dir.mkdir(parents=True, exist_ok=True)
     gt = np.asarray(perf_annots, dtype=float)
     pred = np.asarray(perf_annots_predicted, dtype=float)
     n = min(len(gt), len(pred))
     gt, pred = gt[:n], pred[:n]
 
-    has_dist_matrix = (
+    fig, ax = plt.subplots(figsize=(30, 30))
+
+    # Distance matrix background
+    show_dist = False
+    if (
         ref_features is not None
         and input_features is not None
         and distance_func is not None
-    )
+    ):
+        try:
+            if isinstance(distance_func, str):
+                dist = scipy.spatial.distance.cdist(
+                    ref_features, input_features, metric=distance_func
+                )
+            else:
+                dist = np.array(
+                    [
+                        [distance_func(r, i) for i in input_features]
+                        for r in ref_features
+                    ],
+                    dtype=np.float32,
+                )
+            ax.imshow(
+                dist,
+                aspect="auto",
+                origin="lower",
+                interpolation="nearest",
+                extent=(0, input_features.shape[0] - 1, 0, ref_features.shape[0] - 1),
+            )
+            show_dist = True
+        except Exception:
+            pass
 
-    fig, ax = plt.subplots(figsize=(30, 30))
+    # x-axis: performance time in frames
+    x_gt = gt * float(frame_rate)
+    wp_x = warping_path[1]
 
-    if has_dist_matrix:
-        # DTW mode: everything in frame space
-        dist = scipy.spatial.distance.cdist(
-            ref_features,
-            input_features,
-            metric=distance_func,
-        )
-        ax.imshow(
-            dist,
-            aspect="auto",
-            origin="lower",
-            interpolation="nearest",
-            extent=(0, input_features.shape[0] - 1, 0, ref_features.shape[0] - 1),
-        )
-        x_gt = gt * float(frame_rate)
-        x_pred = pred * float(frame_rate)
-        if score_y is not None:
-            y = np.asarray(score_y, dtype=float)[:n] * float(frame_rate)
-        else:
-            y = np.arange(n)
-        ylabel = "score (frames)"
-        wp_x = warping_path[1]
-        wp_y = warping_path[0]
+    # y-axis: score position (beats)
+    wp_in_beats = np.issubdtype(warping_path[0].dtype, np.floating)
+    if state_space is not None and not wp_in_beats:
+        wp_y = state_space[warping_path[0]]
+    elif show_dist and wp_in_beats and ref_frame_to_beat is not None:
+        wp_y = _beats_to_frames(warping_path[0], ref_frame_to_beat)
     else:
-        # HMM mode: x in frames, y in beats via state_space
-        x_gt = gt * float(frame_rate)
-        x_pred = pred * float(frame_rate)
-        if score_y is None:
-            y = np.arange(n)
-            ylabel = "annotation index"
-        else:
-            y = np.asarray(score_y, dtype=float)[:n]
-            ylabel = "score position (beats)"
-        wp_x = warping_path[1]
-        if state_space is not None:
-            wp_y = state_space[warping_path[0]]
-        else:
-            wp_y = warping_path[0]
-
-    # 1. Warping path
-    if has_dist_matrix:
-        ax.plot(
-            wp_x,
-            wp_y,
-            ".",
-            color="white",
-            alpha=0.7,
-            markersize=15,
-            label="warping path",
-            zorder=2,
-        )
+        wp_y = warping_path[0]
+
+    # GT score positions (y-axis for annotation dots)
+    if score_y is not None:
+        y_gt = np.asarray(score_y, dtype=float)[:n]
+        if show_dist and wp_in_beats and ref_frame_to_beat is not None:
+            y_gt = _beats_to_frames(y_gt, ref_frame_to_beat)
     else:
-        ax.plot(
-            wp_x,
-            wp_y,
-            ".",
-            color="lime",
-            alpha=0.5,
-            markersize=15,
-            label="warping path",
-            zorder=2,
-        )
+        y_gt = np.arange(n)
 
-    # 2. Predicted points
+    # Predicted score positions at GT perf times (perf→score direction)
+    wp_x_sorted = np.asarray(wp_x, dtype=float)
+    wp_y_sorted = np.asarray(wp_y, dtype=float)
+    if len(wp_x_sorted) > 1:
+        y_pred = np.interp(x_gt, wp_x_sorted, wp_y_sorted)
+    else:
+        y_pred = y_gt
+
+    # Plot layers
+    ax.plot(
+        wp_x,
+        wp_y,
+        ".",
+        color="white" if show_dist else "lime",
+        alpha=0.7 if show_dist else 0.5,
+        markersize=15,
+        label="warping path",
+        zorder=2,
+    )
     ax.scatter(
-        x_pred,
-        y,
+        x_gt,
+        y_pred,
         label="predicted",
         s=80,
         alpha=0.9,
@@ -627,11 +633,9 @@ def plot_alignment(
         linewidths=0,
         zorder=3,
     )
-
-    # 3. GT annotations (front)
     ax.scatter(
         x_gt,
-        y,
+        y_gt,
         label="ground truth",
         s=120,
         alpha=0.9,
@@ -641,8 +645,26 @@ def plot_alignment(
         zorder=4,
     )
 
+    if show_dist:
+        ax.set_xlim(0, input_features.shape[0] - 1)
+        ax.set_ylim(0, ref_features.shape[0] - 1)
+
+    # Beat tick labels when projected to frame space
+    if show_dist and wp_in_beats and ref_frame_to_beat is not None:
+        finite_beats = ref_frame_to_beat[np.isfinite(ref_frame_to_beat)]
+        beat_min, beat_max = (
+            finite_beats[0],
+            finite_beats[-1] if len(finite_beats) > 0 else (0, 1),
+        )
+        n_ticks = max(2, min(12, int(beat_max - beat_min) + 1))
+        beat_ticks = np.unique(
+            np.round(np.linspace(beat_min, beat_max, n_ticks)).astype(int)
+        )
+        ax.set_yticks(_beats_to_frames(beat_ticks.astype(float), ref_frame_to_beat))
+        ax.set_yticklabels([str(b) for b in beat_ticks])
+
     ax.set_xlabel("performance frame")
-    ax.set_ylabel(ylabel)
+    ax.set_ylabel("score position (beats)")
     ax.set_title(f"[{save_dir.name}] alignment ({name})")
     ax.grid(True, alpha=0.2)
     ax.legend(loc="best")
@@ -664,28 +686,29 @@ def save_debug_results(
     ref_features: Optional[np.ndarray] = None,
     input_features: Optional[np.ndarray] = None,
     distance_func=None,
+    ref_frame_to_beat: Optional[np.ndarray] = None,
 ):
     """Save debug outputs: warping path TSV, results JSON, and alignment plot."""
     save_dir = Path(save_dir)
     save_dir.mkdir(parents=True, exist_ok=True)
 
-    # 1. Warping path TSV + results JSON
+    # 1. Warping path TSV + results JSON + GT annotations
     save_nparray_to_csv(warping_path.T, (save_dir / f"wp_{run_name}.tsv").as_posix())
+    gt_pairs = np.column_stack([score_annots, perf_annots])
+    save_nparray_to_csv(gt_pairs, (save_dir / f"gt_{run_name}.tsv").as_posix())
     import json
 
     with open(save_dir / f"{run_name}.json", "w") as f:
         json.dump(eval_results, f, indent=4)
 
     # 2. Alignment plot
-    if state_space is not None:
-        score_y = state_space
-    else:
-        sx = np.asarray(score_annots, dtype=float)
-        score_y = (
-            sx
-            if sx.ndim == 1 and len(sx) == len(perf_annots) and np.all(np.diff(sx) >= 0)
-            else None
-        )
+    # score_y = beat positions for each annotation (y-axis of the plot)
+    sx = np.asarray(score_annots, dtype=float)
+    score_y = (
+        sx
+        if sx.ndim == 1 and len(sx) == len(perf_annots) and np.all(np.diff(sx) >= 0)
+        else None
+    )
     plot_alignment(
         warping_path,
         perf_annots,
@@ -698,4 +721,5 @@ def save_debug_results(
         ref_features=ref_features,
         input_features=input_features,
         distance_func=distance_func,
+        ref_frame_to_beat=ref_frame_to_beat,
     )
diff --git a/matchmaker/utils/stream.py b/matchmaker/utils/stream.py
index ca51389..33753ea 100644
--- a/matchmaker/utils/stream.py
+++ b/matchmaker/utils/stream.py
@@ -6,11 +6,15 @@
 
 from __future__ import annotations
 
+import threading
 import time
 from threading import Thread
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Callable, Optional, Type, Union
 
+STREAM_START = threading.Event  # call STREAM_START() to create per-instance event
+STREAM_END = object()  # put into queue to signal end-of-stream
+
 if TYPE_CHECKING:  # pragma: no cover
     from matchmaker.utils.processor import Processor
 
@@ -43,6 +47,7 @@ def __init__(
         self.mock = mock
         self.listen = False
         self.init_time = None
+        self.stream_start = STREAM_START()
 
     def start_listening(self):
         """
diff --git a/pyproject.toml b/pyproject.toml
index 1dfd23a..005b31d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "pymatchmaker"
 version = "0.2.1"
 description = "A package for real-time music alignment"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = { text = "Apache 2.0" }
 keywords = ["music", "alignment", "midi", "audio"]
 authors = [
@@ -18,6 +18,10 @@ classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Programming Language :: Cython",
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
@@ -31,10 +35,10 @@ dependencies = [
     "python-rtmidi>=1.5.8",
     "mido>=1.3.2",
     "numpy>=1.26.3,<2.0",
-    "scipy>=1.11.4,<1.15",
+    "scipy>=1.11.4",
     "librosa>=0.10.1",
     "pandas>=2.0.0",
-    "partitura>=1.7.0",
+    "partitura>=1.8.0",
     "progressbar2>=4.2.0",
     "python-hiddenmarkov>=0.1.3",
     "pyaudio>=0.2.14",
diff --git a/run_examples.py b/run_examples.py
index 4b13c59..245b1b7 100644
--- a/run_examples.py
+++ b/run_examples.py
@@ -31,6 +31,12 @@ def main():
     group = parser.add_mutually_exclusive_group()
     group.add_argument("--audio", action="store_true", help="Use audio input mode")
     group.add_argument("--midi", action="store_true", help="Use MIDI input mode")
+    parser.add_argument(
+        "--method",
+        type=str,
+        default=None,
+        help="Score following method (e.g., arzt, dixon, outerhmm, audio_outerhmm)",
+    )
     args = parser.parse_args()
 
     input_mode = "midi" if args.midi else "audio"
@@ -39,7 +45,10 @@ def main():
     print(f"Running matchmaker with the score file ({SCORE_FILE.name})...")
     print("-" * 50)
 
-    method = "outerhmm" if input_mode == "midi" else "arzt"
+    if args.method is not None:
+        method = args.method
+    else:
+        method = "outerhmm" if input_mode == "midi" else "arzt"
 
     # Initialize matchmaker (simulation mode)
     try:
@@ -54,7 +63,7 @@ def main():
         return
 
     # Run real-time score following
-    for current_position in mm.run(wait=True):
+    for current_position in mm.run():
         timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]
         print(f"[{timestamp}] Current beat position: {current_position}")
 
diff --git a/tests/test_io_audio.py b/tests/test_io_audio.py
index a313758..f4a55cd 100644
--- a/tests/test_io_audio.py
+++ b/tests/test_io_audio.py
@@ -227,7 +227,11 @@ def test_offline_input(self, mock_stdout=None):
             print("current time", self.stream.current_time)
             self.stream.join()
 
-            outputs = list(self.stream.queue.queue)
+            from matchmaker.utils.stream import STREAM_END
+
+            outputs = [
+                item for item in self.stream.queue.queue if item is not STREAM_END
+            ]
 
             for _, ftime in outputs:
                 self.assertTrue(isinstance(ftime, float))
diff --git a/tests/test_matchmaker.py b/tests/test_matchmaker.py
index 8a7d894..8de2704 100644
--- a/tests/test_matchmaker.py
+++ b/tests/test_matchmaker.py
@@ -5,6 +5,8 @@
 import warnings
 from pathlib import Path
 
+import numpy as np
+
 from matchmaker import Matchmaker
 from matchmaker.dp import OnlineTimeWarpingArzt
 from matchmaker.dp.oltw_dixon import OnlineTimeWarpingDixon
@@ -14,6 +16,7 @@
 from matchmaker.io.midi import MidiStream
 from matchmaker.prob.hmm import PitchIOIHMM
 from matchmaker.prob.outer_product_hmm import OuterProductHMM
+from matchmaker.prob.outer_product_hmm_audio import AudioOuterProductHMM
 
 warnings.filterwarnings("ignore", module="partitura")
 warnings.filterwarnings("ignore", module="librosa")
@@ -22,21 +25,17 @@
 class TestMatchmaker(unittest.TestCase):
     def setUp(self):
         # Set up paths to test files
-        self.score_file = "./tests/resources/Bach-fugue_bwv_858.musicxml"
-        self.performance_file_audio = "./tests/resources/Bach-fugue_bwv_858.mp3"
-        self.performance_file_midi = "./tests/resources/Bach-fugue_bwv_858.mid"
+        self.score_file = "./matchmaker/assets/simple_mozart_k265_var1.musicxml"
+        self.performance_file_audio = "./matchmaker/assets/simple_mozart_k265_var1.mp3"
+        self.performance_file_midi = "./matchmaker/assets/simple_mozart_k265_var1.mid"
         self.performance_file_annotations = (
-            "./tests/resources/Bach-fugue_bwv_858_note_annotations.txt"
+            "./matchmaker/assets/simple_mozart_k265_var1_note_annotations.txt"
+        )
+        self.performance_file_beat_annotations = (
+            "./matchmaker/assets/simple_mozart_k265_var1_beat_annotations.txt"
         )
 
         self.test_datasets = [
-            {
-                "name": "bach_fugue_bwv_858",
-                "score": "./tests/resources/Bach-fugue_bwv_858.musicxml",
-                "audio": "./tests/resources/Bach-fugue_bwv_858.mp3",
-                "midi": "./tests/resources/Bach-fugue_bwv_858.mid",
-                "annotations": "./tests/resources/Bach-fugue_bwv_858_note_annotations.txt",
-            },
             {
                 "name": "simple_mozart_k265_var1",
                 "score": "./matchmaker/assets/simple_mozart_k265_var1.musicxml",
@@ -44,6 +43,13 @@ def setUp(self):
                 "midi": "./matchmaker/assets/simple_mozart_k265_var1.mid",
                 "annotations": "./matchmaker/assets/simple_mozart_k265_var1_note_annotations.txt",
             },
+            {
+                "name": "bach_fugue_bwv_858",
+                "score": "./tests/resources/Bach-fugue_bwv_858.musicxml",
+                "audio": "./tests/resources/Bach-fugue_bwv_858.mp3",
+                "midi": "./tests/resources/Bach-fugue_bwv_858.mid",
+                "annotations": "./tests/resources/Bach-fugue_bwv_858_note_annotations.txt",
+            },
         ]
 
     def test_matchmaker_audio_init(self):
@@ -72,6 +78,7 @@ def test_matchmaker_audio_run(self):
         # When & Then: running the alignment process, the yielded result should be a float values
         for position_in_beat in mm.run(verbose=False):
             self.assertIsInstance(position_in_beat, float)
+            break
 
     def test_matchmaker_audio_run_with_result(self):
         # Given: a Matchmaker instance with audio input
@@ -95,12 +102,11 @@ def test_matchmaker_audio_run_with_result(self):
 
     def test_matchmaker_audio_run_with_evaluation(self):
         for dataset in self.test_datasets:
-            for method in ["arzt", "dixon"]:
+            for method in ["arzt", "dixon", "audio_outerhmm"]:
                 with self.subTest(dataset=dataset["name"], method=method):
                     mm = Matchmaker(
                         score_file=dataset["score"],
                         performance_file=dataset["audio"],
-                        wait=False,
                         input_type="audio",
                         method=method,
                     )
@@ -116,15 +122,15 @@ def test_matchmaker_audio_run_with_evaluation(self):
                     current_test = f"{dataset['name']}_{method}"
                     results = mm.run_evaluation(
                         dataset["annotations"],
-                        debug=True,
-                        save_dir=Path("./tests/results"),
-                        run_name=current_test,
+                        debug=False,
+                        # save_dir=Path("./tests/results"),
+                        # run_name=current_test,
                     )
                     print(f"[{current_test}] RESULTS: {json.dumps(results, indent=4)}")
 
                     # Then: the results should at least be 0.5
                     for threshold in ["300ms", "500ms", "1000ms"]:
-                        self.assertGreaterEqual(results[threshold], 0.5)
+                        self.assertGreaterEqual(results["ms"][threshold], 0.5)
 
     def test_matchmaker_audio_run_with_evaluation_cqt(self):
         # Given: a Matchmaker instance with audio input
@@ -134,7 +140,6 @@ def test_matchmaker_audio_run_with_evaluation_cqt(self):
             wait=False,
             input_type="audio",
             feature_type="cqt",
-            distance_func="Cosine",
             method="arzt",
         )
         try:
@@ -154,7 +159,7 @@ def test_matchmaker_audio_run_with_evaluation_cqt(self):
 
         # Then: the results should at least be 0.5
         for threshold in ["300ms", "500ms", "1000ms"]:
-            self.assertGreaterEqual(results[threshold], 0.5)
+            self.assertGreaterEqual(results["ms"][threshold], 0.5)
 
     def test_matchmaker_audio_run_with_evaluation_in_beats(self):
         # Given: a Matchmaker instance with audio input
@@ -172,18 +177,14 @@ def test_matchmaker_audio_run_with_evaluation_in_beats(self):
             mm._has_run = True
 
         results = mm.run_evaluation(
-            "./tests/resources/Bach-fugue_bwv_858_beat_annotations.txt",
-            level="beat",
-            debug=True,
-            save_dir=Path("./tests/results"),
-            run_name="test_matchmaker_audio_run_with_evaluation_in_beats",
+            self.performance_file_annotations,
             domain="score",
         )
         print(f"RESULTS: {json.dumps(results, indent=4)}")
 
         # Then: the results should at least be 0.5
         for threshold in ["0.3b", "0.5b", "1b"]:
-            self.assertGreaterEqual(results[threshold], 0.5)
+            self.assertGreaterEqual(results["beat"][threshold], 0.5)
 
     def test_matchmaker_audio_run_with_evaluation_before_run(self):
         # Given: a Matchmaker instance with audio input
@@ -226,6 +227,47 @@ def test_matchmaker_audio_arzt_init(self):
         self.assertIsInstance(mm.stream, AudioStream)
         self.assertIsInstance(mm.score_follower, OnlineTimeWarpingArzt)
 
+    def test_matchmaker_audio_outerhmm_init(self):
+        mm = Matchmaker(
+            score_file=self.score_file,
+            performance_file=self.performance_file_audio,
+            input_type="audio",
+            method="audio_outerhmm",
+        )
+
+        self.assertIsInstance(mm.stream, AudioStream)
+        self.assertIsInstance(mm.score_follower, AudioOuterProductHMM)
+
+    def test_matchmaker_audio_outerhmm_run(self):
+        mm = Matchmaker(
+            score_file=self.score_file,
+            performance_file=self.performance_file_audio,
+            input_type="audio",
+            method="audio_outerhmm",
+        )
+
+        for position_in_beat in mm.run(verbose=False):
+            self.assertIsInstance(position_in_beat, float)
+            break
+
+    def test_matchmaker_audio_rtf(self):
+        for method in ["arzt", "dixon", "audio_outerhmm"]:
+            with self.subTest(method=method):
+                mm = Matchmaker(
+                    score_file=self.score_file,
+                    performance_file=self.performance_file_audio,
+                    input_type="audio",
+                    method=method,
+                )
+                list(mm.run(verbose=False))
+
+                results = mm.run_evaluation(
+                    self.performance_file_annotations,
+                )
+                self.assertIn("rtf", results)
+                self.assertGreater(results["rtf"], 0)
+                self.assertLess(results["rtf"], 1.0)
+
     def test_matchmaker_with_frame_rate(self):
         # Given: a Matchmaker instance with audio input
         mm = Matchmaker(
@@ -233,12 +275,12 @@ def test_matchmaker_with_frame_rate(self):
             performance_file=self.performance_file_audio,
             wait=False,
             input_type="audio",
-            frame_rate=100,
+            frame_rate=50,
         )
 
-        # Then: the frame rate should be 100
-        self.assertEqual(mm.frame_rate, 100)
-        self.assertEqual(mm.score_follower.frame_rate, 100)
+        # Then: the frame rate should be 50
+        self.assertEqual(mm.frame_rate, 50)
+        self.assertEqual(mm.score_follower.frame_rate, 50)
 
     def test_matchmaker_invalid_input_type(self):
         # Test Matchmaker with invalid input type
@@ -293,10 +335,10 @@ def test_matchmaker_midi_run(self):
         )
 
         # When & Then: running the alignment process,
-        # the yielded result should be a float values
+        # the yielded result should be numeric (int state index for MIDI)
         for position_in_beat in mm.run():
-            self.assertIsInstance(position_in_beat, float)
-            if position_in_beat >= 130:
+            self.assertIsInstance(position_in_beat, (int, float, np.integer))
+            if position_in_beat >= 10:
                 break
 
 
diff --git a/tests/test_oltw_arzt.py b/tests/test_oltw_arzt.py
index 5bf6083..599a02b 100644
--- a/tests/test_oltw_arzt.py
+++ b/tests/test_oltw_arzt.py
@@ -22,7 +22,7 @@
 
 RNG = np.random.RandomState(1984)
 
-SCIPY_DISTANCES = [
+_ALL_SCIPY_DISTANCES = [
     "braycurtis",
     "canberra",
     "chebyshev",
@@ -36,13 +36,14 @@
     "dice",
     "hamming",
     "jaccard",
-    "kulczynski1",
     "rogerstanimoto",
     "russellrao",
     "sokalmichener",
     "sokalsneath",
     "yule",
 ]
+# Filter to distances available in the installed scipy version
+SCIPY_DISTANCES = [d for d in _ALL_SCIPY_DISTANCES if hasattr(sp_distance, d)]
 
 
 class TestOnlineTimeWarpingArzt(unittest.TestCase):
diff --git a/tests/test_prob_hmm.py b/tests/test_prob_hmm.py
index 9335866..b790820 100644
--- a/tests/test_prob_hmm.py
+++ b/tests/test_prob_hmm.py
@@ -197,7 +197,7 @@ def test_init(self):
             "./tests/resources/Bach-fugue_bwv_858_annotations.txt"
         )
 
-        self.performance = process_audio_offline(
-            perf_info=self.performance_file_audio,
+        self.performance = process_midi_offline(
+            perf_info=self.performance_file_midi,
             processor=PitchProcessor(piano_range=True),
         )