diff --git a/.github/workflows/pypi_publish.yml b/.github/workflows/pypi_publish.yml
index 5cfc5dd..e944c12 100644
--- a/.github/workflows/pypi_publish.yml
+++ b/.github/workflows/pypi_publish.yml
@@ -26,7 +26,7 @@ jobs:
       - name: Build wheels
         run: python -m cibuildwheel
         env:
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
           CIBW_ARCHS: "x86_64"
           CIBW_ARCHS_MACOS: "x86_64 arm64"
           CIBW_BEFORE_BUILD: |
diff --git a/.github/workflows/test_pypi_publish.yml b/.github/workflows/test_pypi_publish.yml
index 8328b29..cbdf340 100644
--- a/.github/workflows/test_pypi_publish.yml
+++ b/.github/workflows/test_pypi_publish.yml
@@ -28,7 +28,7 @@ jobs:
       - name: Build wheels
         run: python -m cibuildwheel
         env:
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
+          CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
           CIBW_ARCHS: "x86_64"
           CIBW_ARCHS_MACOS: "x86_64 arm64"
           CIBW_BEFORE_BUILD: |
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
index 6c9bc9a..ff04d20 100644
--- a/.github/workflows/unittest.yml
+++ b/.github/workflows/unittest.yml
@@ -11,7 +11,7 @@ jobs:
       fail-fast: false
       matrix:
         platform: [ubuntu-latest, macos-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
 
     runs-on: ${{ matrix.platform }}
     steps:
diff --git a/README.md b/README.md
index 4750e38..e845174 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ The full documentation for matchmaker is available online at [readthedocs.org](h
 
 ### Prerequisites
 
-- Available Python version: 3.12
+- Available Python version: 3.10, 3.11, 3.12, 3.13
 - [Fluidsynth](https://www.fluidsynth.org/)
 - [PortAudio](http://www.portaudio.com/)
 
@@ -121,6 +121,7 @@ from matchmaker import Matchmaker
 
 mm = Matchmaker(
     score_file="path/to/score",
+    input_type="audio",
     device_name_or_index="MacBookPro Microphone",
 )
 for current_position in mm.run():
@@ -132,18 +133,18 @@ for current_position in mm.run():
 The repository includes a ready-to-use example script that demonstrates the complete workflow:
 
 ```bash
-# Run with audio input (default)
-python run_examples.py
+# Run with audio input (uses arzt method as default)
+python run_examples.py --audio
 
-# Run with MIDI input
-python run_examples.py --midi
+# Run with MIDI input and specific method
+python run_examples.py --midi --method hmm
 ```
 
 This script runs a complete example with score following and evaluation, saving results to the `results/` directory.
 
 ### Testing with Different Methods or Features
 
-For testing with Audio input, you can specify the alignment method as follows:
+You can specify the alignment method and feature processor as follows:
 
 ```python
 from matchmaker import Matchmaker
@@ -151,37 +152,42 @@ from matchmaker import Matchmaker
 mm = Matchmaker(
     score_file="path/to/score",
     input_type="audio",
-    method="dixon",  # or "arzt" (default)
+    method="arzt",       # see Alignment Methods section
+    processor="chroma",  # see Features section
 )
 for current_position in mm.run():
     print(current_position)
 ```
 
 For options regarding the `method`, please refer to the [Alignment Methods](#alignment-methods) section.
-For options regarding the `feature_type`, please refer to the [Features](#features) section.
+For options regarding the `processor`, please refer to the [Features](#features) section.
 
 
 ## Alignment Methods
 
 Matchmaker currently supports the following alignment methods:
 
-- `"dixon"`: On-line time warping algorithm by S. Dixon (2005). Currently supports audio input; MIDI support coming soon.
-- `"arzt"`: On-line time warping algorithm adapted from Brazier and Widmer (2020) (based on the work by Arzt et al. (2010)). Currently supports audio input; MIDI support coming soon.
-- `"hmm"`: Hidden Markov Model-based score follower by Cancino-Chacón et al. (2023), based on the state-space score followers by Duan et al. (2011) and Jiang and Raphael (2020). Currently supports MIDI input; Audio support coming soon.
+- `"arzt"`: On-line time warping algorithm adapted from Brazier and Widmer (2020) (based on the work by Arzt et al. (2010)). Supports audio and MIDI input.
+- `"dixon"`: On-line time warping algorithm by S. Dixon (2005). Supports audio and MIDI input.
+- `"outerhmm"`: Outer-product HMM score follower by E. Nakamura (2014). Supports audio and MIDI input.
+- `"hmm"`: Hidden Markov Model-based score follower by Cancino-Chacón et al. (2023), based on the state-space score followers by Duan et al. (2011) and Jiang and Raphael (2020). Supports MIDI input.
+- `"pthmm"`: Pitch-based HMM score follower. Supports MIDI input.
 
 ## Features
 
 Matchmaker currently supports the following feature types:
 
 - For audio:
-  - `"chroma"`: Chroma features. Default feature type for audio input.
+  - `"chroma"`: Chroma features. Default for audio input.
   - `"mfcc"`: Mel-frequency cepstral coefficients.
-  - `"mel"`: Mel-Spectrogram.
-  - `"logspectral"`: Log-spectral features used in Dixon (2005).
+  - `"cqt"`: Constant-Q transform.
+  - `"mel"`: Mel-spectrogram.
+  - `"lse"`: Log-spectral energy features used in Dixon (2005).
+  - `"cqt_spectral_flux"`: CQT-based spectral flux used in Nakamura et al. (2014).
 - For MIDI:
-  - `pianoroll`: Piano-roll features. Default feature type for MIDI input.
-  - `"pitch"`: Pitch features for MIDI input.
-  - `"pitchclass"`: Pitch class features for MIDI input.
+  - `"pitch_ioi"`: Pitch and inter-onset interval features. Default for MIDI input.
+  - `"pianoroll"`: Piano-roll features.
+  - `"pitchclass"`: Pitch class features.
 
 ## Configurations
 
@@ -189,10 +195,8 @@ Initialization parameters for the `Matchmaker` class:
 
 - `score_file` (str): Path to the score file.
 - `input_type` (str): Type of input data. Options: `"audio"`, `"midi"`.
-- `feature_type` (str): Type of feature to use. Options: `"chroma"`, `"mfcc"`, `"cqt"`, `"spectrogram"`, `"onset"`.
-- `method` (str): Alignment method to use. Options: `"dixon"`, `"arzt"`, `"hmm"`.
-- `sample_rate` (int): Sample rate of the input audio data.
-- `frame_rate` (int): Frame rate of the input audio/MIDI data.
+- `method` (str): Alignment method to use. See [Alignment Methods](#alignment-methods) for available options.
+- `processor` (str): Type of feature processor to use. See [Features](#features) for available options.
 - `device_name_or_index` (str or int): The audio/MIDI device name or index you want to use. If `None`, the default device will be used.
 
 ## Citing Matchmaker
diff --git a/matchmaker/dp/__init__.py b/matchmaker/dp/__init__.py
index 3903e5f..4a063d3 100644
--- a/matchmaker/dp/__init__.py
+++ b/matchmaker/dp/__init__.py
@@ -1,12 +1,20 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-Top module for dynamic programming-based alignment methods
-"""
+Dynamic programming-based alignment methods.
 
-from .oltw_arzt import OnlineTimeWarpingArzt
-from .oltw_dixon import OnlineTimeWarpingDixon
+Each algorithm has a base class and two variants:
+  - Frame: fixed-rate features (audio)
+  - Event: onset-level features (MIDI)
+"""
 
-# Alias
-OLTWArzt = OnlineTimeWarpingArzt
-OLTWDixon = OnlineTimeWarpingDixon
+from .oltw_arzt import (
+    OnlineTimeWarpingArzt,
+    OnlineTimeWarpingArztEvent,
+    OnlineTimeWarpingArztFrame,
+)
+from .oltw_dixon import (
+    OnlineTimeWarpingDixon,
+    OnlineTimeWarpingDixonEvent,
+    OnlineTimeWarpingDixonFrame,
+)
diff --git a/matchmaker/dp/oltw_arzt.py b/matchmaker/dp/oltw_arzt.py
index a6749cf..422692d 100644
--- a/matchmaker/dp/oltw_arzt.py
+++ b/matchmaker/dp/oltw_arzt.py
@@ -1,7 +1,17 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-On-line Dynamic Time Warping
+On-line Time Warping (OLTWArzt)
+
+OLTW with step-size constraint and adaptive window, based on:
+  Arzt & Widmer (2010) "Simple Tempo Models for Real-Time Music Tracking"
+  Arzt & Widmer (2015) "Real-Time Music Tracking Using Multiple Performances
+                        as a Reference"
+
+Classes:
+  OnlineTimeWarpingArzt      — base class (common properties, step-size clamp, run loop)
+  OnlineTimeWarpingArztFrame — frame-level variant for audio (Cython-accelerated)
+  OnlineTimeWarpingArztEvent — event-level variant for MIDI (onset-by-onset)
 """
 
 import time
@@ -9,80 +19,226 @@
 
 import numpy as np
 import progressbar
+import scipy.spatial.distance
 from numpy.typing import NDArray
 
 from matchmaker.base import OnlineAlignment
 from matchmaker.dp.dtw_loop import oltw_arzt_loop
-from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
+from matchmaker.features.audio import FRAME_RATE
+from matchmaker.io.audio import QUEUE_TIMEOUT
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END
 from matchmaker.utils import (
     CYTHONIZED_METRICS_W_ARGUMENTS,
     CYTHONIZED_METRICS_WO_ARGUMENTS,
     distances,
 )
 from matchmaker.utils.distances import Metric, vdist
-from matchmaker.utils.misc import (
+from matchmaker.utils.errors import (
     MatchmakerInvalidOptionError,
     MatchmakerInvalidParameterTypeError,
-    RECVQueue,
-    set_latency_stats,
 )
+from matchmaker.utils.misc import set_latency_stats
+
+STEP_SIZE: int = 3
+START_WINDOW_SIZE: Union[float, int] = 0.1
+WINDOW_SIZE: int = 10
+
 
-STEP_SIZE: int = 5
-WINDOW_SIZE: int = 5
-START_WINDOW_SIZE: Union[float, int] = 0.25
+# ---------------------------------------------------------------------------
+# Base class
+# ---------------------------------------------------------------------------
 
 
 class OnlineTimeWarpingArzt(OnlineAlignment):
-    """
-    Fast On-line Time Warping
+    """Base class for Arzt-style OLTW with step-size constraint.
+
+    Subclasses must implement ``step()`` and may override ``reset()``,
+    ``get_window()``, and ``_init_distance_func()``.
 
     Parameters
     ----------
     reference_features : np.ndarray
-        A 2D array with dimensions (`n_timesteps`, `n_features`) containing the
-        features of the reference the input is going to be aligned to.
-
+        Feature matrix for the reference (score) sequence.
     window_size : int
-        Size of the window for searching the optimal path in the cumulative
-        cost matrix.
-
+        Search window size (interpretation depends on subclass).
     step_size : int
-        Size of the step
+        Maximum position advance per input step.
+    start_window_size : int
+        Window size during warmup.
+    queue : RECVQueue or None
+        Input queue for streaming.
+    state_space : np.ndarray or None
+        Score positions (beats) for each reference element.
+    """
+
+    def __init__(
+        self,
+        reference_features: NDArray[np.float32],
+        window_size=WINDOW_SIZE,
+        step_size: int = STEP_SIZE,
+        start_window_size=START_WINDOW_SIZE,
+        queue: Optional[RECVQueue] = None,
+        state_space=None,
+        **kwargs,
+    ) -> None:
+        super().__init__(reference_features=reference_features)
+        self.N_ref: int = self.reference_features.shape[0]
+        self.queue = queue or RECVQueue()
+        self.step_size: int = step_size
+        self.state_space = state_space
+        self._ref_frame_to_beat: Optional[NDArray] = kwargs.get(
+            "ref_frame_to_beat", None
+        )
+        # Subclasses set self._window_size and self._start_window_size
+        # after calling super().__init__
+        self.init_position: int = kwargs.get("current_position", 0)
+        self.last_queue_update = time.time()
+        self.latency_stats: Dict[str, float] = {
+            "total_latency": 0,
+            "total_frames": 0,
+            "max_latency": 0,
+            "min_latency": float("inf"),
+        }
+        # Legacy attributes (used by some callers)
+        self.state_to_ref_time_map = kwargs.get("state_to_ref_time_map", None)
+        self.ref_to_state_time_map = kwargs.get("ref_to_state_time_map", None)
 
-    distance_func : str, tuple (str, dict) or callable
-        Distance function for computing pairwise distances.
+        self.reset()
 
-    start_window_size: int
-        Size of the starting window size.
+    def reset(self) -> None:
+        self.current_position: int = self.init_position
+        self.positions: List[int] = []
+        self._warping_path: List = []
+        self.global_cost_matrix: NDArray[np.float32] = np.full(
+            (self.N_ref + 1, 2), np.inf, dtype=np.float32
+        )
+        self.input_index: int = 0
+        self.input_features = None
 
-    Attributes
-    ----------
-    reference_features : np.ndarray
-        See description above.
+    @property
+    def current_beat(self) -> float:
+        if self._ref_frame_to_beat is not None:
+            idx = min(self.current_position, len(self._ref_frame_to_beat) - 1)
+            return float(self._ref_frame_to_beat[idx])
+        if self.state_space is not None:
+            idx = min(self.current_position, len(self.state_space) - 1)
+            return float(self.state_space[idx])
+        return float(self.current_position)
 
-    window_size : int
-        See description above.
+    @property
+    def warping_path(self) -> NDArray[np.float32]:
+        if self._warping_path:
+            return np.array(self._warping_path).T
+        return np.empty((2, 0))
 
-    step_size : int
-        See description above.
+    @property
+    def window_index(self) -> int:
+        return self.current_position
 
-    input_features : list
-        List with the input features (updates every time there is a step).
+    def __call__(self, input: NDArray[np.float32]) -> int:
+        self.step(input)
+        return self.current_position
 
-    current_position : int
-        Index of the current position.
+    def is_still_following(self) -> bool:
+        return self.current_position < self.N_ref - 1
 
-    warping_path : list
-        List of tuples containing the current position and the corresponding
-        index in the array of `reference_features`.
+    def get_window(self) -> Tuple[int, int]:
+        """Return (start, end) of the search window."""
+        raise NotImplementedError
 
-    positions : list
-        List of the positions for each input.
+    def step(self, input_features: NDArray[np.float32]) -> None:
+        """Process one input element and update position."""
+        raise NotImplementedError
+
+    def _clamp_position(self, min_index: int) -> None:
+        """Apply step-size constraint: no backwards, max step_size forward."""
+        if self.input_index > 0:
+            self.current_position = int(
+                np.clip(
+                    min_index,
+                    self.current_position,
+                    self.current_position + self.step_size,
+                )
+            )
+        else:
+            self.current_position = min_index
+
+    def run(self, verbose: bool = True) -> Generator[float, None, NDArray]:
+        """Run alignment from queue.
+
+        Yields
+        ------
+        float
+            Current score position in beats.
+
+        Returns
+        -------
+        np.ndarray
+            Warping path (2, T).
+        """
+        self.reset()
+
+        if verbose:
+            n = len(self.state_space) if self.state_space is not None else self.N_ref
+            pbar = progressbar.ProgressBar(
+                max_value=n,
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
+            pbar.start()
+
+        while self.is_still_following():
+            item = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if item is STREAM_END:
+                break
+            if item is None:
+                continue
+            features = item[0] if isinstance(item, tuple) else item
+            self.last_queue_update = time.time()
+
+            # Frame subclass accumulates input_features for plotting
+            if hasattr(self, "_accumulate_input") and self._accumulate_input:
+                self.input_features = (
+                    np.concatenate((self.input_features, features))
+                    if self.input_features is not None
+                    else features
+                )
+
+            self.step(features)
+
+            if verbose:
+                pbar.update(
+                    int(np.searchsorted(self.state_space, self.current_beat))
+                    if self.state_space is not None
+                    else min(self.current_position, self.N_ref - 1)
+                )
+
+            latency = time.time() - self.last_queue_update
+            self.latency_stats = set_latency_stats(
+                latency, self.latency_stats, self.input_index
+            )
+            yield self.current_beat
+
+        if verbose:
+            pbar.finish()
+
+        return self.warping_path
+
+
+# ---------------------------------------------------------------------------
+# Frame-level subclass (audio)
+# ---------------------------------------------------------------------------
+
+
+class OnlineTimeWarpingArztFrame(OnlineTimeWarpingArzt):
+    """Frame-level OLTW for audio input.
+
+    Uses Cython-accelerated ``oltw_arzt_loop`` and Matchmaker distance
+    functions. Window size is in seconds (converted to frames via frame_rate).
     """
 
     DEFAULT_DISTANCE_FUNC: str = "Manhattan"
-    distance_func: Callable[[NDArray[np.float32]], NDArray[np.float32]]
-    vdist: Callable[[NDArray[np.float32]], NDArray[np.float32]]
 
     def __init__(
         self,
@@ -90,24 +246,38 @@ def __init__(
         window_size: int = WINDOW_SIZE,
         step_size: int = STEP_SIZE,
         distance_func: Union[
-            str,
-            Callable,
-            Tuple[str, Dict[str, Any]],
+            str, Callable, Tuple[str, Dict[str, Any]]
         ] = DEFAULT_DISTANCE_FUNC,
-        start_window_size: int = START_WINDOW_SIZE,
+        start_window_size: Union[float, int] = START_WINDOW_SIZE,
         current_position: int = 0,
         frame_rate: int = FRAME_RATE,
         queue: Optional[RECVQueue] = None,
-        state_to_ref_time_map = None,
-        ref_to_state_time_map = None,
-        state_space = None,
+        state_to_ref_time_map=None,
+        ref_to_state_time_map=None,
+        state_space=None,
         **kwargs,
     ) -> None:
-        super().__init__(reference_features=reference_features)
+        super().__init__(
+            reference_features=reference_features,
+            window_size=window_size,
+            step_size=step_size,
+            start_window_size=start_window_size,
+            queue=queue,
+            state_space=state_space,
+            current_position=current_position,
+            state_to_ref_time_map=state_to_ref_time_map,
+            ref_to_state_time_map=ref_to_state_time_map,
+            **kwargs,
+        )
+        self.frame_rate = frame_rate
+        self._window_size = int(np.round(window_size * self.frame_rate))
+        self._start_window_size = int(np.round(start_window_size * frame_rate))
+        self._accumulate_input = True
 
-        self.input_features: List[NDArray[np.float32]] = None
-        self.queue = queue or RECVQueue()
+        # Set distance function (Cython or callable)
+        self._init_distance_func(distance_func)
 
+    def _init_distance_func(self, distance_func):
         if not (isinstance(distance_func, (str, tuple)) or callable(distance_func)):
             raise MatchmakerInvalidParameterTypeError(
                 parameter_name="distance_func",
@@ -115,7 +285,6 @@ def __init__(
                 actual_parameter_type=type(distance_func),
             )
 
-        # Set local cost function
         if isinstance(distance_func, str):
             if distance_func not in CYTHONIZED_METRICS_WO_ARGUMENTS:
                 raise MatchmakerInvalidOptionError(
@@ -123,9 +292,7 @@ def __init__(
                     valid_options=CYTHONIZED_METRICS_WO_ARGUMENTS,
                     value=distance_func,
                 )
-            # If distance_func is a string
             self.distance_func = getattr(distances, distance_func)()
-
         elif isinstance(distance_func, tuple):
             if distance_func[0] not in CYTHONIZED_METRICS_W_ARGUMENTS:
                 raise MatchmakerInvalidOptionError(
@@ -133,146 +300,32 @@ def __init__(
                     valid_options=CYTHONIZED_METRICS_W_ARGUMENTS,
                     value=distance_func[0],
                 )
-            # distance_func is a tuple with the arguments to instantiate
-            # the cost
             self.distance_func = getattr(distances, distance_func[0])(
                 **distance_func[1]
             )
-
         elif callable(distance_func):
-            # If the local cost is a callable
             self.distance_func = distance_func
 
-        # A callable to compute the distance between the rows of matrix and a vector
         if isinstance(self.distance_func, Metric):
             self.vdist = vdist
         else:
-            # TODO: Speed this up somehow instead of list comprehension
             self.vdist = lambda X, y, lcf: np.array([lcf(x, y) for x in X]).astype(
                 np.float32
             )
 
-        self.N_ref: int = self.reference_features.shape[0]
-        self.frame_rate = frame_rate
-        self.window_size: int = window_size * self.frame_rate
-        self.step_size: int = step_size
-        self.start_window_size: int = int(np.round(start_window_size * frame_rate))
-        self.init_position: int = current_position
-        self.current_position: int = current_position
-        self.positions: List[int] = []
-        self._warping_path: List = []
-        self.global_cost_matrix: NDArray[np.float32] = (
-            np.ones((reference_features.shape[0] + 1, 2), dtype=np.float32) * np.inf
-        ).astype(np.float32)
-        self.input_index: int = 0
-        self.go_backwards: bool = False
-        self.update_window_index: bool = False
-        self.restart: bool = False
-        self.is_following: bool = False
-        self.last_queue_update = time.time()
-        self.latency_stats: Dict[str, float] = {
-            "total_latency": 0,
-            "total_frames": 0,
-            "max_latency": 0,
-            "min_latency": float("inf"),
-        }
-        self.state_to_ref_time_map = state_to_ref_time_map
-        self.ref_to_state_time_map = ref_to_state_time_map
-        self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"])
-
-    @property
-    def warping_path(self) -> NDArray[np.int32]:
-        wp = (np.array(self._warping_path).T).astype(np.int32)
-        return wp
-
-    def __call__(self, input: NDArray[np.float32]) -> int:
-        self.step(input)
-        return self.current_position
-
-    def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]:
-        """Run the online alignment process.
-
-        Parameters
-        ----------
-        verbose : bool, optional
-            Whether to show progress bar, by default True
-
-        Yields
-        ------
-        int
-            Current position in the reference sequence
-
-        Returns
-        -------
-        NDArray[np.float32]
-            The warping path as a 2D array where each column contains
-            (reference_position, input_position)
-        """
-        self.reset()
-
-        if verbose:
-            pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
-
-        while self.is_still_following():
-            features, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
-            self.last_queue_update = time.time()
-            self.input_features = (
-                np.concatenate((self.input_features, features))
-                if self.input_features is not None
-                else features
-            )
-            self.step(features)
-
-            if verbose:
-                pbar.update(int(self.current_position))
-
-            latency = time.time() - self.last_queue_update
-            self.latency_stats = set_latency_stats(
-                latency, self.latency_stats, self.input_index
-            )
-            yield self.current_position
-
-        if verbose:
-            pbar.finish()
-
-        return self.warping_path
-
-    def is_still_following(self):
-        # TODO: check stopping if the follower is stuck.
-        return self.current_position <= self.N_ref - 2
-
-    def reset(self) -> None:
-        self.current_position = self.init_position
-        self.positions = []
-        self._warping_path: List = []
-        self.global_cost_matrix = (
-            np.ones((self.reference_features.shape[0] + 1, 2), dtype=np.float32)
-            * np.inf
-        )
-        self.input_index = 0
-        self.update_window_index = False
-
     def get_window(self) -> Tuple[int, int]:
-        w_size = self.window_size
-        if self.window_index < self.start_window_size:
-            w_size = self.start_window_size
-        window_start = max(self.window_index - w_size, 0)
-        window_end = min(self.window_index + w_size, self.N_ref)
-        return window_start, window_end
-
-    @property
-    def window_index(self) -> int:
-        return self.current_position
+        w = self._window_size
+        if self.window_index < self._start_window_size:
+            w = self._start_window_size
+        start = max(self.window_index - w, 0)
+        end = min(self.window_index + w, self.N_ref)
+        return start, end
 
     def step(self, input_features: NDArray[np.float32]) -> None:
-        """
-        Update the current position and the warping path.
-        """
         min_costs = np.inf
         min_index = max(self.window_index - self.step_size, 0)
 
         window_start, window_end = self.get_window()
-        # compute local cost beforehand as it is much faster (~twice as fast)
         window_cost = self.vdist(
             self.reference_features[window_start:window_end],
             input_features.squeeze(),
@@ -289,25 +342,139 @@ def step(self, input_features: NDArray[np.float32]) -> None:
             min_index=min_index,
         )
 
-        # adapt current_position: do not go backwards,
-        # but also go a maximum of N steps forward
+        self._clamp_position(min_index)
+        self._warping_path.append((self.current_beat, self.input_index))
+        self.input_index += 1
+
+
+# ---------------------------------------------------------------------------
+# Event-level subclass (MIDI)
+# ---------------------------------------------------------------------------
+
+
+class OnlineTimeWarpingArztEvent(OnlineTimeWarpingArzt):
+    """Event-level OLTW for MIDI input.
+
+    Each step processes one onset event. Uses scipy for distance computation
+    and a pure-Python rolling DP loop. Window size is in number of events.
 
-        if self.input_index == 0:
-            # enforce the first time step to stay at the
-            # initial position
-            self.current_position = min( # TODO: Is this necessary?
-                max(self.current_position, min_index),
-                self.current_position,
+    When queue provides ``(feature, timestamp)`` tuples, the warping path
+    records ``(score_beat, perf_time_sec)`` instead of ``(beat, input_index)``.
+    """
+
+    def __init__(
+        self,
+        reference_features: NDArray[np.float32],
+        window_size: int = 30,
+        step_size: int = 5,
+        start_window_size: int = 5,
+        distance_func: str = "cosine",
+        queue: Optional[RECVQueue] = None,
+        state_space=None,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            reference_features=reference_features,
+            window_size=window_size,
+            step_size=step_size,
+            start_window_size=start_window_size,
+            queue=queue,
+            state_space=state_space,
+            **kwargs,
+        )
+        self._window_size = window_size
+        self._start_window_size = start_window_size
+        self._scipy_metric = distance_func
+
+    def get_window(self) -> Tuple[int, int]:
+        w = self._window_size
+        if self.input_index < self._start_window_size:
+            w = self._start_window_size
+        start = max(self.window_index - self.step_size, 0)
+        end = min(start + w, self.N_ref)
+        return start, end
+
+    def step(
+        self, input_features: NDArray[np.float32], perf_time: float = None
+    ) -> None:
+        feat = np.asarray(input_features, dtype=np.float32).squeeze()
+
+        window_start, window_end = self.get_window()
+        window_cost = scipy.spatial.distance.cdist(
+            self.reference_features[window_start:window_end],
+            feat.reshape(1, -1),
+            metric=self._scipy_metric,
+        ).flatten()
+
+        # Rolling 2-column DP (same logic as oltw_arzt_loop)
+        min_costs = np.inf
+        min_index = max(self.window_index - self.step_size, 0)
+
+        for idx_w, score_index in enumerate(range(window_start, window_end)):
+            si = score_index + 1  # 1-indexed in cost matrix
+
+            if score_index == 0 and self.input_index == 0:
+                self.global_cost_matrix[1, 1] = window_cost[idx_w]
+                min_costs = window_cost[idx_w]
+                min_index = 0
+                continue
+
+            local_dist = window_cost[idx_w]
+            d1 = self.global_cost_matrix[si - 1, 1] + local_dist
+            d2 = self.global_cost_matrix[si, 0] + local_dist
+            d3 = self.global_cost_matrix[si - 1, 0] + local_dist
+            best = min(d1, d2, d3)
+            self.global_cost_matrix[si, 1] = best
+
+            norm_cost = best / (self.input_index + score_index + 1.0)
+            if norm_cost < min_costs:
+                min_costs = norm_cost
+                min_index = score_index
+
+        # Shift columns
+        self.global_cost_matrix[:, 0] = self.global_cost_matrix[:, 1]
+        self.global_cost_matrix[:, 1] = np.inf
+
+        self._clamp_position(min_index)
+        t = perf_time if perf_time is not None else self.input_index
+        self._warping_path.append((self.current_beat, t))
+        self.input_index += 1
+
+    def run(self, verbose: bool = True) -> Generator[float, None, NDArray]:
+        """Run from queue. Extracts timestamp from (feature, time) tuples."""
+        self.reset()
+        if verbose:
+            pbar = progressbar.ProgressBar(
+                max_value=self.N_ref,
+                redirect_stdout=True,
+                redirect_stderr=True,
             )
-        else:
-            self.current_position = min(
-                max(self.current_position, min_index),
-                self.current_position + self.step_size,
+            pbar.start()
+
+        while self.is_still_following():
+            item = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if item is STREAM_END:
+                break
+            if item is None:
+                continue
+            if isinstance(item, tuple):
+                features, perf_time = item[0], float(item[1])
+            else:
+                features, perf_time = item, None
+            self.last_queue_update = time.time()
+            self.step(features, perf_time=perf_time)
+
+            if verbose:
+                pbar.update(min(self.current_position, self.N_ref - 1))
+            latency = time.time() - self.last_queue_update
+            self.latency_stats = set_latency_stats(
+                latency, self.latency_stats, self.input_index
             )
+            yield self.current_beat
 
-        self._warping_path.append((self.current_position, self.input_index))
-        # update input index
-        self.input_index += 1
+        if verbose:
+            pbar.finish()
+        return self.warping_path
 
 
 if __name__ == "__main__":
diff --git a/matchmaker/dp/oltw_dixon.py b/matchmaker/dp/oltw_dixon.py
index 8c55a05..0fb1821 100644
--- a/matchmaker/dp/oltw_dixon.py
+++ b/matchmaker/dp/oltw_dixon.py
@@ -1,20 +1,34 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-On-line Dynamic Time Warping (Dixon 2005)
+On-line Time Warping (OLTWDixon)
+
+OLTW with backward-forward search, based on:
+  Dixon (2005) "An On-Line Time Warping Algorithm for Tracking Musical
+               Performances" (IJCAI)
+  Dixon (2005) "Live Tracking of Musical Performances Using On-Line Time
+               Warping" (DAFx)
+
+Classes:
+  OnlineTimeWarpingDixon      — base class (common properties, run loop)
+  OnlineTimeWarpingDixonFrame — frame-level variant for audio (backward-forward search)
+  OnlineTimeWarpingDixonEvent — event-level variant for MIDI (onset-by-onset)
 """
 
 import time
 from enum import IntEnum
-from typing import Dict
+from typing import Dict, Generator, Optional
 
 import numpy as np
 import progressbar
-import scipy
+import scipy.spatial.distance
 from numpy.typing import NDArray
 
 from matchmaker.base import OnlineAlignment
-from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
+from matchmaker.features.audio import FRAME_RATE
+from matchmaker.io.audio import QUEUE_TIMEOUT
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END
 from matchmaker.utils.misc import set_latency_stats
 
 
@@ -25,78 +39,154 @@ class Direction(IntEnum):
 
 
 MAX_RUN_COUNT: int = 3
-FRAME_PER_SEG = 1
-WINDOW_SIZE = 10  # seconds
+WINDOW_SIZE = 10  # seconds for frame, events for event
+
+
+# ---------------------------------------------------------------------------
+# Base class
+# ---------------------------------------------------------------------------
 
 
 class OnlineTimeWarpingDixon(OnlineAlignment):
-    """
-    On-line Dynamic Time Warping (Dixon)
+    """Base class for Dixon-style OLTW.
+
+    Subclasses must implement ``step()``, ``reset()``, ``is_still_following()``,
+    and the ``current_beat`` / ``warping_path`` properties.
 
     Parameters
     ----------
     reference_features : np.ndarray
-        A 2D array with dimensions (`F(n_features)`, `T(n_timesteps)`) containing the
-        features of the reference the input is going to be aligned to.
+        Feature matrix for the reference (score) sequence.
+    queue : RECVQueue or None
+        Input queue for streaming.
+    max_run_count : int
+        Max consecutive same-direction advances.
+    distance_func : str
+        Distance metric.
+    state_space : np.ndarray or None
+        Score positions (beats).
+    """
 
-    distance_func : str, tuple (str, dict) or callable
-        Distance function for computing pairwise distances.
+    DEFAULT_DISTANCE_FUNC: str = "euclidean"
 
-    window_size : int
-        Size of the window for searching the optimal path in the cumulative cost matrix.
+    def __init__(
+        self,
+        reference_features,
+        queue=None,
+        max_run_count=MAX_RUN_COUNT,
+        distance_func=DEFAULT_DISTANCE_FUNC,
+        state_space=None,
+        **kwargs,
+    ):
+        super().__init__(reference_features=reference_features)
+        self.queue = queue
+        self.N_ref = self.reference_features.shape[0]
+        self.max_run_count = max_run_count
+        self.distance_func = (
+            distance_func if isinstance(distance_func, str) else "euclidean"
+        )
+        self.state_space = state_space
+        self._ref_frame_to_beat = kwargs.get("ref_frame_to_beat", None)
+        self.state_to_ref_time_map = kwargs.get("state_to_ref_time_map", None)
+        self.ref_to_state_time_map = kwargs.get("ref_to_state_time_map", None)
+        self.current_position = 0
+        self.input_index = 0
+        self.last_queue_update = time.time()
+        self.latency_stats: Dict[str, float] = {
+            "total_latency": 0,
+            "total_frames": 0,
+            "max_latency": 0,
+            "min_latency": float("inf"),
+        }
 
-    max_run_count : int
-        Maximum consecutive advances in one direction before the other
-        direction is forced. Constrains the slope to [1/max_run_count, max_run_count].
+    def __call__(self, input_features: NDArray[np.float32]) -> int:
+        self.step(input_features)
+        return self.current_position
 
-    frame_per_seg : int
-        Number of frames per segment (audio chunk).
+    def run(self, verbose=True) -> Generator[float, None, NDArray]:
+        """Run alignment from queue."""
+        self.reset()
 
-    frame_rate : int
-        Frame rate of the audio stream.
+        if verbose:
+            n = len(self.state_space) if self.state_space is not None else self.N_ref
+            pbar = progressbar.ProgressBar(
+                max_value=n,
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
+            pbar.start()
 
-    Attributes
-    ----------
-    warping_path : np.ndarray [shape=(2, T)]
-        Warping path. ``warping_path[0]`` = reference index,
-        ``warping_path[1]`` = input index.
-    """
+        while self.is_still_following():
+            item = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if item is STREAM_END:
+                break
+            if item is None:
+                continue
+            input_feature = item[0] if isinstance(item, tuple) else item
+            self.last_queue_update = time.time()
+            self.step(input_feature)
 
-    DEFAULT_DISTANCE_FUNC: str = "euclidean"
-    distance_func: str
+            if verbose:
+                pbar.update(
+                    int(np.searchsorted(self.state_space, self.current_beat))
+                    if self.state_space is not None
+                    else min(self.current_position, self.N_ref - 1)
+                )
+
+            latency = time.time() - self.last_queue_update
+            self.latency_stats = set_latency_stats(
+                latency, self.latency_stats, self.input_index
+            )
+            yield self.current_beat
+
+        if verbose:
+            pbar.finish()
+
+        return self.warping_path
+
+
+# ---------------------------------------------------------------------------
+# Frame-level subclass (audio)
+# ---------------------------------------------------------------------------
+
+
+class OnlineTimeWarpingDixonFrame(OnlineTimeWarpingDixon):
+    """Frame-level OLTW with backward-forward search for audio input.
+
+    Window size is in seconds (converted to frames via frame_rate).
+    """
 
     def __init__(
         self,
         reference_features,
-        queue,
+        queue=None,
         window_size=WINDOW_SIZE,
-        distance_func=DEFAULT_DISTANCE_FUNC,
+        distance_func=OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC,
         max_run_count=MAX_RUN_COUNT,
-        frame_per_seg=FRAME_PER_SEG,
         frame_rate=FRAME_RATE,
-        state_to_ref_time_map = None,
-        ref_to_state_time_map = None,
-        state_space = None,
+        state_to_ref_time_map=None,
+        ref_to_state_time_map=None,
+        state_space=None,
         **kwargs,
     ):
-        super().__init__(reference_features=reference_features)
-        self.queue = queue
-        self.N_ref = self.reference_features.shape[0]
+        super().__init__(
+            reference_features=reference_features,
+            queue=queue,
+            max_run_count=max_run_count,
+            distance_func=distance_func,
+            state_space=state_space,
+            state_to_ref_time_map=state_to_ref_time_map,
+            ref_to_state_time_map=ref_to_state_time_map,
+            **kwargs,
+        )
         self.frame_rate = frame_rate
         self.w = int(window_size * self.frame_rate)
-        self.distance_func = distance_func.lower()
-        self.max_run_count = max_run_count
-        self.frame_per_seg = frame_per_seg
-        self.state_to_ref_time_map = state_to_ref_time_map
-        self.ref_to_state_time_map = ref_to_state_time_map
-        self.state_space = state_space
         self.reset()
 
     def reset(self):
-        """Reset mutable state for a new alignment run."""
         self.input_features = np.empty((0, self.reference_features.shape[1]))
         self.acc_dist_matrix = np.full((self.w, self.w), np.inf, dtype=np.float32)
-        self.wp = np.array([[0, 0]]).T  # [shape=(2, T)]
+        self.wp = np.array([[0, 0]]).T
         self.ref_pointer = 0
         self.input_pointer = 0
         self.run_count_ref = 0
@@ -106,7 +196,7 @@ def reset(self):
         self.current_position = 0
         self.input_index = 0
         self.last_queue_update = time.time()
-        self.latency_stats: Dict[str, float] = {
+        self.latency_stats = {
             "total_latency": 0,
             "total_frames": 0,
             "max_latency": 0,
@@ -115,9 +205,21 @@ def reset(self):
         self._initialized = False
 
     @property
-    def warping_path(self) -> NDArray[np.float32]:  # [shape=(2, T)]
+    def current_beat(self) -> float:
+        if self._ref_frame_to_beat is not None:
+            idx = min(self.best_ref, len(self._ref_frame_to_beat) - 1)
+            return float(self._ref_frame_to_beat[idx])
+        return float(self.best_ref)
+
+    @property
+    def warping_path(self) -> NDArray[np.float32]:
         return self.wp
 
+    def is_still_following(self):
+        return self.ref_pointer <= (self.N_ref - 1)
+
+    # ---- internal methods ----
+
     def _ref_offset(self):
         return max(0, self.ref_pointer - self.w)
 
@@ -131,24 +233,13 @@ def _input_win_size(self):
         return min(self.w, self.input_pointer)
 
     def offset(self):
-        """Calculate the offset for the current window position."""
         return np.array([self._ref_offset(), self._input_offset()])
 
     def _compute_cell(self, r_win, i_win):
-        """
-        Compute DTW accumulated cost for cell (r_win, i_win) in window coordinates.
-
-        Uses Sakoe-Chiba symmetric weighting (DAFx 2005, Eq. 3):
-            D[i,j] = min(D[i-1,j] + d, D[i,j-1] + d, D[i-1,j-1] + 2d)
-
-        Matrix convention: acc_dist_matrix[ref_win, input_win]
-        """
         abs_ref = self._ref_offset() + r_win
         abs_input = self._input_offset() + i_win
-
         ref_feature = self.reference_features[abs_ref]
         input_feature = self.input_features[abs_input]
-
         local_dist = scipy.spatial.distance.cdist(
             ref_feature.reshape(1, -1),
             input_feature.reshape(1, -1),
@@ -166,145 +257,85 @@ def _compute_cell(self, r_win, i_win):
                 self.acc_dist_matrix[r_win - 1, i_win] + local_dist
             )
         else:
-            candidates = [
+            self.acc_dist_matrix[r_win, i_win] = min(
                 self.acc_dist_matrix[r_win - 1, i_win] + local_dist,
                 self.acc_dist_matrix[r_win, i_win - 1] + local_dist,
                 self.acc_dist_matrix[r_win - 1, i_win - 1] + 2 * local_dist,
-            ]
-            self.acc_dist_matrix[r_win, i_win] = min(candidates)
+            )
 
     def _accept_input(self, input_features):
-        """Store input features and advance input pointer."""
         self.input_features = np.vstack([self.input_features, input_features])
-        self.input_pointer += self.frame_per_seg
+        self.input_pointer += 1
 
     def _compute_input_column(self):
-        """Shift the window left if full, then fill the new rightmost column."""
         if self.input_pointer > self.w:
             self.acc_dist_matrix[:, :-1] = self.acc_dist_matrix[:, 1:]
             self.acc_dist_matrix[:, -1] = np.inf
-
         input_col = self._input_win_size() - 1
         for r in range(self._ref_win_size()):
             self._compute_cell(r, input_col)
 
     def _advance_ref(self):
-        """Advance ref pointer, shift the window up if full, then fill the new bottom row."""
-        self.ref_pointer += self.frame_per_seg
-
+        self.ref_pointer += 1
         if self.ref_pointer > self.w:
             self.acc_dist_matrix[:-1, :] = self.acc_dist_matrix[1:, :]
             self.acc_dist_matrix[-1, :] = np.inf
-
         ref_row = self._ref_win_size() - 1
         for c in range(self._input_win_size()):
             self._compute_cell(ref_row, c)
 
     def _determine_direction(self):
-        """Apply MaxRunCount slope constraint to the suggested direction"""
-        suggested_direction = self.get_expand_direction()
-
+        suggested = self._get_expand_direction()
         if self.run_count_ref >= self.max_run_count:
             return Direction.TARGET
         elif self.run_count_input >= self.max_run_count:
             return Direction.REF
-        else:
-            return suggested_direction
-
-    def get_expand_direction(self):
-        """
-        Determine expansion direction (GetInc from Dixon 2005, IJCAI Fig. 1).
-
-        Searches the L-shaped boundary of the sliding window — the last ref row
-        and the last input column — for the cell with minimum normalised
-        accumulated cost.  The direction is chosen based on where that minimum
-        falls:
-        - Corner (last row AND last col) → BOTH
-        - In the column (not at last row) → TARGET (advance input to catch up)
-        - In the row (not at last col)    → REF    (advance ref to catch up)
-
-        Normalization: cost / (1 + i + j), an O(1) approximation of the true
-        path-length divisor (DAFx 2005, Sec. 2).
-        """
+        return suggested
+
+    def _get_expand_direction(self):
         ref_ws = self._ref_win_size()
         input_ws = self._input_win_size()
         ref_off = self._ref_offset()
         input_off = self._input_offset()
-
-        # Current row and column in window coordinates
-        row = ref_ws - 1  # last ref row
-        col = input_ws - 1  # last input column
-
+        row = ref_ws - 1
+        col = input_ws - 1
         best_cost = np.inf
-        best_row = row
-        best_col = col
+        best_row, best_col = row, col
 
-        # Search current column (last input column) across all ref rows
         for r in range(ref_ws):
-            abs_r = ref_off + r
-            abs_i = input_off + col
-            cost = self.acc_dist_matrix[r, col]
-            norm_cost = cost / (1 + abs_r + abs_i)
-            if norm_cost < best_cost:
-                best_cost = norm_cost
-                best_row = r
-                best_col = col
-
-        # Search current row (last ref row) across all input columns
+            cost = self.acc_dist_matrix[r, col] / (1 + ref_off + r + input_off + col)
+            if cost < best_cost:
+                best_cost = cost
+                best_row, best_col = r, col
+
         for c in range(input_ws):
-            abs_r = ref_off + row
-            abs_i = input_off + c
-            cost = self.acc_dist_matrix[row, c]
-            norm_cost = cost / (1 + abs_r + abs_i)
-            if norm_cost < best_cost:
-                best_cost = norm_cost
-                best_row = row
-                best_col = c
-
-        # Convert best window coords to absolute
+            cost = self.acc_dist_matrix[row, c] / (1 + ref_off + row + input_off + c)
+            if cost < best_cost:
+                best_cost = cost
+                best_row, best_col = row, c
+
         self.best_ref = ref_off + best_row
         self.best_input = input_off + best_col
 
-        # Determine direction based on where the best point is
         if best_row == row and best_col == col:
             return Direction.BOTH
         elif best_row < row:
-            # Best is in the column (not at current row) → input needs to catch up
             return Direction.TARGET
-        else:
-            # Best is in the row (not at current col) → ref needs to catch up
-            return Direction.REF
+        return Direction.REF
 
     def save_history(self):
-        """Append current best alignment point to warping path."""
-        new_point = np.array([[self.best_ref], [self.best_input]])
+        new_point = np.array([[self.current_beat], [self.best_input]])
         self.wp = np.concatenate((self.wp, new_point), axis=1)
 
-    def __call__(self, input_features: NDArray[np.float32]) -> int:
-        self.step(input_features)
-        return self.current_position
-
     def step(self, input_features):
-        """Process one input frame and update alignment state.
-
-        Handles REF-only catch-up internally so that each call
-        consumes exactly one input frame.
-
-        Parameters
-        ----------
-        input_features : NDArray[np.float32]
-            A single input feature frame.
-        """
-        # First call: initialization (matches run()'s pre-loop logic)
         if not self._initialized:
             self._accept_input(input_features)
-            self.ref_pointer += self.frame_per_seg
+            self.ref_pointer += 1
             self._compute_cell(0, 0)
             self._initialized = True
             self.input_index += 1
             return
 
-        # Phase 1: REF-only catch-up (0 or more iterations)
         direction = None
         while self.is_still_following():
             direction = self._determine_direction()
@@ -319,18 +350,16 @@ def step(self, input_features):
         if not self.is_still_following():
             return
 
-        # Phase 2: consume input (direction is TARGET or BOTH)
         self._accept_input(input_features)
         self._compute_input_column()
 
-        if direction is not Direction.TARGET:  # BOTH
+        if direction is not Direction.TARGET:
             self._advance_ref()
 
-        # Update run counts
         if direction == Direction.TARGET:
             self.run_count_input += 1
             self.run_count_ref = 0
-        else:  # BOTH
+        else:
             self.run_count_ref = 1
             self.run_count_input = 0
 
@@ -338,48 +367,212 @@ def step(self, input_features):
         self.current_position = self.best_ref
         self.input_index += 1
 
-    def is_still_following(self):
-        return self.ref_pointer <= (self.N_ref - self.frame_per_seg)
-
-    def run(self, verbose=True):
-        """Run the online alignment process.
-
-        Parameters
-        ----------
-        verbose : bool, optional
-            Whether to show progress bar, by default True
-
-        Yields
-        ------
-        int
-            Current position in the reference sequence
-
-        Returns
-        -------
-        NDArray[np.float32]
-            The warping path as a 2D array where each column contains
-            (reference_position, input_position)
-        """
+
+# ---------------------------------------------------------------------------
+# Event-level subclass (MIDI)
+# ---------------------------------------------------------------------------
+
+
+class OnlineTimeWarpingDixonEvent(OnlineTimeWarpingDixon):
+    """Event-level OLTW for MIDI input.
+
+    Each step processes one onset event. Uses sparse cost matrix with
+    path-length normalization and reference catch-up based on argmin
+    of normalized path cost.
+
+    Parameters
+    ----------
+    c : int
+        Search width in number of events.
+    """
+
+    def __init__(
+        self,
+        reference_features: NDArray[np.float32],
+        queue: Optional[RECVQueue] = None,
+        c: int = 30,
+        max_run_count: int = MAX_RUN_COUNT,
+        distance_func: str = "euclidean",
+        state_space=None,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            reference_features=reference_features,
+            queue=queue,
+            max_run_count=max_run_count,
+            distance_func=distance_func,
+            state_space=state_space,
+            **kwargs,
+        )
+        self.ref = self.reference_features.astype(np.float32)
+        self.c = c
         self.reset()
 
+    def reset(self) -> None:
+        self._D: Dict = {}
+        self._L: Dict = {}
+        self._inputs = []
+        self.t = -1
+        self.j = -1
+        self.run_count_input = 0
+        self.run_count_ref = 0
+        self.current_position = 0
+        self.input_index = 0
+        self._warping_path = []
+        self.last_queue_update = time.time()
+        self.latency_stats = {
+            "total_latency": 0,
+            "total_frames": 0,
+            "max_latency": 0,
+            "min_latency": float("inf"),
+        }
+
+    @property
+    def current_beat(self) -> float:
+        if self.state_space is not None:
+            idx = min(self.current_position, len(self.state_space) - 1)
+            return float(self.state_space[idx])
+        return float(self.current_position)
+
+    @property
+    def warping_path(self) -> NDArray:
+        return (
+            np.array(self._warping_path).T if self._warping_path else np.empty((2, 0))
+        )
+
+    def is_still_following(self) -> bool:
+        return self.current_position < self.N_ref - 1
+
+    # ---- cost matrix (sparse) ----
+
+    def _local_cost(self, ref_idx, input_idx):
+        return scipy.spatial.distance.cdist(
+            self.ref[ref_idx].reshape(1, -1),
+            self._inputs[input_idx].reshape(1, -1),
+            metric=self.distance_func,
+        )[0, 0]
+
+    def _get_D(self, i, j):
+        return self._D.get((i, j), np.inf)
+
+    def _get_L(self, i, j):
+        return self._L.get((i, j), 0)
+
+    def _evaluate(self, ref_i, inp_j):
+        if ref_i < 0 or inp_j < 0 or ref_i >= self.N_ref:
+            return
+        d = self._local_cost(ref_i, inp_j)
+        if ref_i == 0 and inp_j == 0:
+            self._D[(ref_i, inp_j)] = d
+            self._L[(ref_i, inp_j)] = 1
+            return
+        best_cost, best_len = np.inf, 0
+        for prev_i, prev_j, w in [
+            (ref_i - 1, inp_j, 1),
+            (ref_i, inp_j - 1, 1),
+            (ref_i - 1, inp_j - 1, 2),
+        ]:
+            prev = self._D.get((prev_i, prev_j), np.inf)
+            if prev < np.inf:
+                c = prev + w * d
+                if c < best_cost:
+                    best_cost = c
+                    best_len = self._L.get((prev_i, prev_j), 0) + 1
+        if best_cost < np.inf:
+            self._D[(ref_i, inp_j)] = best_cost
+            self._L[(ref_i, inp_j)] = best_len
+
+    def _norm_cost(self, ref_i, inp_j):
+        L = self._get_L(ref_i, inp_j)
+        return self._get_D(ref_i, inp_j) / L if L > 0 else np.inf
+
+    def _argmin_path_cost(self):
+        best_cost, best_ref, best_inp = np.inf, self.j, self.t
+        for k in range(max(0, self.t - self.c + 1), self.t + 1):
+            nc = self._norm_cost(self.j, k)
+            if nc < best_cost:
+                best_cost, best_ref, best_inp = nc, self.j, k
+        for k in range(max(0, self.j - self.c + 1), self.j + 1):
+            nc = self._norm_cost(k, self.t)
+            if nc < best_cost:
+                best_cost, best_ref, best_inp = nc, k, self.t
+        return best_ref, best_inp
+
+    def _compute_input_column(self):
+        for k in range(max(0, self.j - self.c + 1), self.j + 1):
+            self._evaluate(k, self.t)
+
+    def _compute_ref_row(self):
+        for k in range(max(0, self.t - self.c + 1), self.t + 1):
+            self._evaluate(self.j, k)
+
+    def step(self, input_feat: NDArray[np.float32], perf_time: float = None) -> None:
+        self._inputs.append(np.asarray(input_feat, dtype=np.float32))
+        self.t += 1
+        self._compute_input_column()
+
+        if self.t < self.c and self.j + 1 < self.N_ref:
+            self.j += 1
+            self._compute_ref_row()
+            self.run_count_input = 0
+            self.run_count_ref = 0
+        else:
+            self.run_count_input += 1
+            self.run_count_ref = 0
+            while self.j + 1 < self.N_ref:
+                best_ref, best_inp = self._argmin_path_cost()
+                if best_ref >= self.j and best_inp < self.t:
+                    self.j += 1
+                    self._compute_ref_row()
+                    self.run_count_ref += 1
+                    self.run_count_input = 0
+                    if self.run_count_ref >= self.max_run_count:
+                        break
+                else:
+                    break
+
+        best_ref, _ = self._argmin_path_cost()
+        self.current_position = min(best_ref, self.N_ref - 1)
+        t = perf_time if perf_time is not None else self.input_index
+        self._warping_path.append((self.current_beat, t))
+        self.input_index += 1
+
+    def run(self, verbose=True) -> Generator[float, None, NDArray]:
+        """Run from queue. Extracts timestamp from (feature, time) tuples."""
+        self.reset()
         if verbose:
-            pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
+            pbar = progressbar.ProgressBar(
+                max_value=self.N_ref,
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
+            pbar.start()
 
         while self.is_still_following():
-            input_feature, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
+            item = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if item is STREAM_END:
+                break
+            if item is None:
+                continue
+            if isinstance(item, tuple):
+                features, perf_time = item[0], float(item[1])
+            else:
+                features, perf_time = item, None
             self.last_queue_update = time.time()
-            self.step(input_feature)
+            self.step(features, perf_time=perf_time)
 
             if verbose:
-                pbar.update(int(self.current_position))
-
+                pbar.update(min(self.current_position, self.N_ref - 1))
             latency = time.time() - self.last_queue_update
             self.latency_stats = set_latency_stats(
                 latency, self.latency_stats, self.input_index
             )
-            yield self.current_position
+            yield self.current_beat
 
         if verbose:
             pbar.finish()
+        return self.warping_path
 
-        return self.wp
+
+if __name__ == "__main__":
+    pass  # pragma: no cover
diff --git a/matchmaker/features/audio.py b/matchmaker/features/audio.py
index 3a9c27b..7316a4b 100644
--- a/matchmaker/features/audio.py
+++ b/matchmaker/features/audio.py
@@ -9,7 +9,7 @@
 import librosa
 import numpy as np
 
-from matchmaker.utils.processor import Processor
+from matchmaker.features.processor import Processor
 
 SAMPLE_RATE = 44100
 FRAME_RATE = 30
@@ -20,7 +20,6 @@
 DCT_TYPE = 2
 NORM = np.inf
 FEATURES = "chroma"
-QUEUE_TIMEOUT = 1
 
 # Type hint for Input Audio frame.
 InputAudioSeries = np.ndarray
diff --git a/matchmaker/features/midi.py b/matchmaker/features/midi.py
index bd2d946..802eec1 100644
--- a/matchmaker/features/midi.py
+++ b/matchmaker/features/midi.py
@@ -13,13 +13,15 @@
 from partitura.score import Part, Score, ScoreLike, merge_parts
 from partitura.utils.music import performance_from_part
 
-from matchmaker.utils.processor import Processor
+from matchmaker.features.processor import Processor
 from matchmaker.utils.symbolic import (
     framed_midi_messages_from_performance,
     midi_messages_from_performance,
 )
 from matchmaker.utils.typing import InputMIDIFrame, NDArrayFloat
 
+CHORD_THRESHOLD: float = 0.035  # seconds; matches IOI_THRESHOLD in outer_product_hmm
+
 
 class PitchProcessor(Processor):
     """
@@ -102,6 +104,11 @@ class PitchIOIProcessor(Processor):
     return_pitch_list: bool
         If True, it will return an array of MIDI pitch values, instead of
         a "piano roll" slice.
+
+    chord_threshold : float
+        Maximum gap (seconds) between note-ons to be considered part of
+        the same chord. Notes within this gap are accumulated and flushed
+        together. Set to 0 to disable chord grouping.
     """
 
     prev_time: Optional[float]
@@ -111,12 +118,40 @@ def __init__(
         self,
         piano_range: bool = False,
         return_pitch_list: bool = False,
+        chord_threshold: float = CHORD_THRESHOLD,
     ) -> None:
         super().__init__()
         self.prev_time = None
         self.piano_range = piano_range
         self.return_pitch_list = return_pitch_list
         self.piano_shift = 21 if piano_range else 0
+        self.chord_threshold = chord_threshold
+        self._pending_pitch = np.zeros(128, dtype=np.float32)
+        self._pending_list: List[int] = []
+        self._pending_time: Optional[float] = None
+
+    def _flush(self) -> Tuple[NDArrayFloat, float]:
+        if self.prev_time is None:
+            ioi_obs = 0.0
+        else:
+            ioi_obs = self._pending_time - self.prev_time
+        self.prev_time = self._pending_time
+
+        if self.return_pitch_list:
+            result = (
+                np.array(self._pending_list, dtype=np.float32),
+                ioi_obs,
+            )
+        else:
+            pitch = self._pending_pitch
+            if self.piano_range:
+                pitch = pitch[21:109]
+            result = (pitch.copy(), ioi_obs)
+
+        self._pending_pitch = np.zeros(128, dtype=np.float32)
+        self._pending_list = []
+        self._pending_time = None
+        return result
 
     def __call__(
         self,
@@ -126,46 +161,37 @@ def __call__(
             data, f_time = frame
         else:
             data = frame
-        # pitch_obs = []
-        pitch_obs = np.zeros(
-            128,
-            dtype=np.float32,
-        )
 
-        # TODO: Replace the for loop with list comprehension
-        pitch_obs_list = []
-        for msg, _ in data:
+        result = None
+        for msg, m_time in data:
             if (
                 getattr(msg, "type", "other") == "note_on"
                 and getattr(msg, "velocity", 0) > 0
             ):
-                pitch_obs[msg.note] = 1
-                pitch_obs_list.append(msg.note - self.piano_shift)
+                if (
+                    self._pending_time is not None
+                    and (m_time - self._pending_time) > self.chord_threshold
+                ):
+                    result = self._flush()
 
-        if pitch_obs.sum() > 0:
-            if self.prev_time is None:
-                # There is no IOI for the first observed note
-                ioi_obs = 0.0
-            else:
-                ioi_obs = f_time - self.prev_time
-            self.prev_time = f_time
-            if self.piano_range:
-                pitch_obs = pitch_obs[21:109]
+                self._pending_pitch[msg.note] = 1
+                self._pending_list.append(msg.note - self.piano_shift)
+                if self._pending_time is None:
+                    self._pending_time = m_time
 
-            if self.return_pitch_list:
-                return (
-                    np.array(
-                        pitch_obs_list,
-                        dtype=np.float32,
-                    ),
-                    ioi_obs,
-                )
-            return (pitch_obs, ioi_obs)
-        else:
-            return None
+        return result
+
+    def flush_remaining(self) -> Optional[Tuple[NDArrayFloat, float]]:
+        """Flush any remaining pending notes (call at end of stream)."""
+        if self._pending_time is not None:
+            return self._flush()
+        return None
 
     def reset(self) -> None:
-        pass
+        self.prev_time = None
+        self._pending_pitch = np.zeros(128, dtype=np.float32)
+        self._pending_list = []
+        self._pending_time = None
 
 
 class PianoRollProcessor(Processor):
@@ -234,6 +260,84 @@ def reset(self) -> None:
         self.active_notes = dict()
 
 
+class OnsetPianoRollProcessor(Processor):
+    """Binary pianoroll per chord onset, with streaming chord grouping.
+
+    Accumulates note-on events and flushes a chord vector when the time
+    gap between consecutive note-ons exceeds ``chord_threshold``.
+
+    Returns ``(pianoroll, timestamp)`` when a chord is flushed, ``None``
+    while accumulating.
+
+    Parameters
+    ----------
+    piano_range : bool
+        If True, 88 keys (21-108). Otherwise 128.
+    chord_threshold : float
+        Maximum gap (seconds) between note-ons in the same chord.
+    """
+
+    def __init__(
+        self,
+        piano_range: bool = True,
+        chord_threshold: float = CHORD_THRESHOLD,
+        dtype: type = np.float32,
+    ):
+        Processor.__init__(self)
+        self.piano_range = piano_range
+        self.chord_threshold = chord_threshold
+        self.dtype = dtype
+        self._dim = 88 if piano_range else 128
+        self._offset = 21 if piano_range else 0
+        self._pending_notes: List[int] = []
+        self._pending_time: Optional[float] = None
+
+    def _flush(self) -> Tuple[np.ndarray, float]:
+        vec = np.zeros(self._dim, dtype=self.dtype)
+        for note in self._pending_notes:
+            idx = note - self._offset
+            if 0 <= idx < self._dim:
+                vec[idx] = 1.0
+        t = self._pending_time
+        self._pending_notes = []
+        self._pending_time = None
+        return (vec, t)
+
+    def __call__(self, frame: InputMIDIFrame) -> Optional[Tuple[np.ndarray, float]]:
+        if isinstance(frame, tuple):
+            data, f_time = frame
+        else:
+            data = frame
+            f_time = 0.0
+
+        result = None
+        for msg, m_time in data:
+            if msg.type == "note_on" and msg.velocity > 0:
+                # Flush previous chord if gap exceeded
+                if (
+                    self._pending_time is not None
+                    and (m_time - self._pending_time) > self.chord_threshold
+                    and self._pending_notes
+                ):
+                    result = self._flush()
+
+                self._pending_notes.append(msg.note)
+                if self._pending_time is None:
+                    self._pending_time = m_time
+
+        return result
+
+    def flush_remaining(self) -> Optional[Tuple[np.ndarray, float]]:
+        """Flush any remaining pending notes (call at end of stream)."""
+        if self._pending_notes:
+            return self._flush()
+        return None
+
+    def reset(self) -> None:
+        self._pending_notes = []
+        self._pending_time = None
+
+
 class PitchClassPianoRollProcessor(Processor):
     """
     A class to convert a MIDI file time slice to a piano roll representation.
@@ -353,5 +457,88 @@ def compute_features_from_symbolic(
     return outputs
 
 
+# ---------------------------------------------------------------------------
+# Onset-level features (for event-level OLTW)
+# ---------------------------------------------------------------------------
+
+
+def onset_pianoroll(
+    note_array: NDArray,
+    onset_key: str = "onset_beat",
+    piano_range: bool = True,
+) -> Tuple[NDArray, NDArray]:
+    """One binary pitch vector per unique onset.
+
+    Parameters
+    ----------
+    note_array : np.ndarray
+        Structured array with "pitch" and ``onset_key`` fields.
+    onset_key : str
+        Column name for onset times ("onset_beat" or "onset_sec").
+    piano_range : bool
+        If True, 88 keys (MIDI 21-108). Otherwise 128.
+
+    Returns
+    -------
+    features : np.ndarray [N_onsets, D]
+    onsets : np.ndarray [N_onsets]
+    """
+    dim = 88 if piano_range else 128
+    offset = 21 if piano_range else 0
+    onsets = np.unique(note_array[onset_key])
+    features = np.zeros((len(onsets), dim), dtype=np.float32)
+    for i, t in enumerate(onsets):
+        for p in note_array["pitch"][note_array[onset_key] == t]:
+            idx = p - offset
+            if 0 <= idx < dim:
+                features[i, idx] = 1.0
+    return features, onsets
+
+
+def group_onsets(
+    note_array: NDArray,
+    threshold: float = CHORD_THRESHOLD,
+    piano_range: bool = True,
+) -> Tuple[NDArray, NDArray]:
+    """Group note onsets within ``threshold`` seconds into chords.
+
+    Parameters
+    ----------
+    note_array : np.ndarray
+        Structured array with "pitch" and "onset_sec" fields.
+    threshold : float
+        Maximum time gap (seconds) to merge into one chord.
+    piano_range : bool
+        If True, 88 keys. Otherwise 128.
+
+    Returns
+    -------
+    features : np.ndarray [N_chords, D]
+    times : np.ndarray [N_chords]
+        Mean onset time per chord group.
+    """
+    dim = 88 if piano_range else 128
+    offset = 21 if piano_range else 0
+    onsets = note_array["onset_sec"]
+    si = np.argsort(onsets)
+    groups, cur = [], [si[0]]
+    for i in range(1, len(si)):
+        if onsets[si[i]] - onsets[si[i - 1]] < threshold:
+            cur.append(si[i])
+        else:
+            groups.append(cur)
+            cur = [si[i]]
+    groups.append(cur)
+    features = np.zeros((len(groups), dim), dtype=np.float32)
+    times = np.zeros(len(groups))
+    for i, g in enumerate(groups):
+        times[i] = np.mean(onsets[g])
+        for idx in g:
+            p = note_array["pitch"][idx] - offset
+            if 0 <= p < dim:
+                features[i, p] = 1.0
+    return features, times
+
+
 if __name__ == "__main__":  # pragma: no cover
     pass
diff --git a/matchmaker/utils/processor.py b/matchmaker/features/processor.py
similarity index 100%
rename from matchmaker/utils/processor.py
rename to matchmaker/features/processor.py
diff --git a/matchmaker/io/audio.py b/matchmaker/io/audio.py
index 043ecf9..9fa596e 100644
--- a/matchmaker/io/audio.py
+++ b/matchmaker/io/audio.py
@@ -12,16 +12,22 @@
 import numpy as np
 import pyaudio
 
-from matchmaker.features.audio import HOP_LENGTH, SAMPLE_RATE, ChromagramProcessor
+from matchmaker.features.audio import (
+    HOP_LENGTH,
+    SAMPLE_RATE,
+    ChromagramProcessor,
+)
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END, Stream
 from matchmaker.utils.audio import (
     get_audio_devices,
     get_default_input_device_index,
     get_device_index_from_name,
 )
-from matchmaker.utils.misc import RECVQueue, set_latency_stats
-from matchmaker.utils.stream import Stream
+from matchmaker.utils.misc import set_latency_stats
 
 CHANNELS = 1
+QUEUE_TIMEOUT = 10
 
 
 class AudioStream(Stream):
@@ -53,7 +59,7 @@ def __init__(
         hop_length: int = HOP_LENGTH,
         queue: Optional[RECVQueue] = None,
         device_name_or_index: Optional[Union[str, int]] = None,
-        wait: bool = True,
+        wait: bool = False,
         target_sr: int = SAMPLE_RATE,
     ):
         if processor is None:
@@ -123,9 +129,14 @@ def __init__(
             "min_latency": float("inf"),
         }
         self.input_index = 0
+        self._preloaded_audio = None
 
         if self.mock:
             self.run = self.run_offline
+            # Pre-load and resample audio so the stream thread can start
+            # producing frames immediately (avoids queue-timeout race condition
+            # when librosa.load takes longer than QUEUE_TIMEOUT).
+            self._preload_audio()
         else:
             self.run = self.run_online
 
@@ -159,6 +170,8 @@ def _process_frame(
         # initial y
         target_audio = np.frombuffer(data, dtype=np.float32)
         self._process_feature(target_audio, time_info["input_buffer_adc_time"])
+        if not self.stream_start.is_set():
+            self.stream_start.set()
 
         return (data, pyaudio.paContinue)
 
@@ -225,6 +238,13 @@ def stop_listening(self) -> None:
             self.audio_interface.terminate()
         self.listen = False
 
+    def _preload_audio(self) -> None:
+        """Pre-load and resample audio file so run_offline can start immediately."""
+        audio_y, sr = librosa.load(self.file_path, sr=None)
+        if sr != self.target_sr:
+            audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
+        self._preloaded_audio = audio_y
+
     def run_offline(self) -> None:
         """Process audio file in offline mode.
 
@@ -240,12 +260,17 @@ def run_offline(self) -> None:
         self.start_listening()
         self.init_time = time.time()
 
-        audio_y, sr = librosa.load(self.file_path, sr=None)
-        if sr != self.target_sr:
-            audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
-            sr = self.target_sr
+        if self._preloaded_audio is not None:
+            audio_y = self._preloaded_audio
+            self._preloaded_audio = None  # free memory
+        else:
+            audio_y, sr = librosa.load(self.file_path, sr=None)
+            if sr != self.target_sr:
+                audio_y = librosa.resample(
+                    y=audio_y, orig_sr=sr, target_sr=self.target_sr
+                )
+        sr = self.target_sr
 
-        time_interval = self.hop_length / float(sr)
         # Pad to next hop_length boundary so no trailing samples are lost
         remainder = len(audio_y) % self.hop_length
         if remainder > 0:
@@ -253,6 +278,7 @@ def run_offline(self) -> None:
                 (audio_y, np.zeros(self.hop_length - remainder, dtype=np.float32))
             )
         trimmed_audio = audio_y
+        time_interval = self.hop_length / float(sr)
         # Do not stop early on digital silence (all-zeros tails).
         while trimmed_audio.size > 0:
             self.input_index += 1
@@ -260,11 +286,15 @@ def run_offline(self) -> None:
             target_audio = trimmed_audio[: self.hop_length]
             self._process_feature(target_audio, self.last_data_received)
             trimmed_audio = trimmed_audio[self.hop_length :]
-            elapsed_time = time.time() - self.last_data_received
+
+            if not self.stream_start.is_set():
+                self.stream_start.set()
 
             if self.wait:
+                elapsed_time = time.time() - self.last_data_received
                 time.sleep(max(time_interval - elapsed_time, 0))
 
+        self.queue.put(STREAM_END)
         self.stop_listening()
 
     def run_online(self) -> None:
diff --git a/matchmaker/io/midi.py b/matchmaker/io/midi.py
index cfdca59..b83b5f7 100644
--- a/matchmaker/io/midi.py
+++ b/matchmaker/io/midi.py
@@ -12,10 +12,10 @@
 from mido.ports import BaseInput as MidiInputPort
 
 from matchmaker.features.midi import PitchIOIProcessor
+from matchmaker.features.processor import Processor
 from matchmaker.io.mediator import CeusMediator
-from matchmaker.utils.misc import RECVQueue
-from matchmaker.utils.processor import Processor
-from matchmaker.utils.stream import Stream
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END, Stream
 from matchmaker.utils.symbolic import (
     Buffer,
     framed_midi_messages_from_performance,
@@ -107,21 +107,19 @@ def __init__(
         self.midi_messages = []
 
         self.polling_period = polling_period
+
         if (polling_period is None) and (self.mock is False):
             self.is_windowed = False
             self.run = self.run_online_single
             self._process_frame = self._process_frame_message
-
         elif (polling_period is None) and (self.mock is True):
             self.is_windowed = False
             self.run = self.run_offline_single
             self._process_frame = self._process_frame_message
-
         elif (polling_period is not None) and (self.mock is False):
             self.is_windowed = True
             self.run = self.run_online_windowed
             self._process_frame = self._process_frame_window
-
         elif (polling_period is not None) and (self.mock is True):
             self.is_windowed = True
             self.run = self.run_offline_windowed
@@ -135,10 +133,16 @@ def _process_frame_message(
         **kwargs,
     ) -> None:
         output = self.processor(([(data, c_time)], c_time))
+
+        if output is None:
+            return
+
         if self.return_midi_messages:
             self.queue.put(((data, c_time), output))
         else:
             self.queue.put(output)
+        if not self.stream_start.is_set():
+            self.stream_start.set()
 
     def _process_frame_window(
         self,
@@ -149,11 +153,15 @@ def _process_frame_window(
         # the data is the Buffer instance
         output = self.processor((data.frame[:], data.time))
 
-        # if output is not None:
+        if output is None:
+            return
+
         if self.return_midi_messages:
             self.queue.put((data.frame, output))
         else:
             self.queue.put(output)
+        if not self.stream_start.is_set():
+            self.stream_start.set()
 
     def run_online_single(self):
         self.start_listening()
@@ -219,8 +227,8 @@ def run_offline_single(self):
         midi_messages, message_times = midi_messages_from_performance(
             perf=self.file_path,
         )
-        self.init_time = message_times.min()
         self.start_listening()
+        self.init_time = message_times.min()
         for msg, c_time in zip(midi_messages, message_times):
             self.add_midi_message(
                 msg=msg,
@@ -230,6 +238,12 @@ def run_offline_single(self):
                 data=msg,
                 c_time=c_time,
             )
+        # Flush remaining chord from OnsetPianoRollProcessor
+        if hasattr(self.processor, "flush_remaining"):
+            last = self.processor.flush_remaining()
+            if last is not None:
+                self.queue.put(last)
+        self.queue.put(STREAM_END)
         self.stop_listening()
 
     def run_offline_windowed(self):
@@ -247,6 +261,11 @@ def run_offline_windowed(self):
             self._process_frame_window(
                 data=frame,
             )
+        if hasattr(self.processor, "flush_remaining"):
+            last = self.processor.flush_remaining()
+            if last is not None:
+                self.queue.put(last)
+        self.queue.put(STREAM_END)
 
     @property
     def current_time(self) -> Optional[float]:
diff --git a/matchmaker/io/queue.py b/matchmaker/io/queue.py
new file mode 100644
index 0000000..4799016
--- /dev/null
+++ b/matchmaker/io/queue.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Queue utilities for matchmaker streaming.
+"""
+
+from queue import Empty, Queue
+from typing import Any
+
+
+class RECVQueue(Queue):
+    """Queue with a recv method (like Pipe).
+
+    Uses Queue.get with a timeout to remain interruptable via
+    KeyboardInterrupt. Periodically queries with a 1s timeout as a
+    middle ground between busy-waiting and uninterruptable blocked waiting.
+    """
+
+    def __init__(self) -> None:
+        Queue.__init__(self)
+
+    def recv(self) -> Any:
+        """Return and remove an item from the queue."""
+        while True:
+            try:
+                return self.get(timeout=1)
+            except Empty:  # pragma: no cover
+                pass
+
+    def poll(self) -> bool:
+        return self.empty()
diff --git a/matchmaker/utils/stream.py b/matchmaker/io/stream.py
similarity index 91%
rename from matchmaker/utils/stream.py
rename to matchmaker/io/stream.py
index ca51389..115b547 100644
--- a/matchmaker/utils/stream.py
+++ b/matchmaker/io/stream.py
@@ -6,13 +6,17 @@
 
 from __future__ import annotations
 
+import threading
 import time
 from threading import Thread
 from types import TracebackType
 from typing import TYPE_CHECKING, Any, Callable, Optional, Type, Union
 
+STREAM_START = threading.Event  # call STREAM_START() to create per-instance event
+STREAM_END = object()  # put into queue to signal end-of-stream
+
 if TYPE_CHECKING:  # pragma: no cover
-    from matchmaker.utils.processor import Processor
+    from matchmaker.features.processor import Processor
 
 
 class Stream(Thread):
@@ -43,6 +47,7 @@ def __init__(
         self.mock = mock
         self.listen = False
         self.init_time = None
+        self.stream_start = STREAM_START()
 
     def start_listening(self):
         """
diff --git a/matchmaker/matchmaker.py b/matchmaker/matchmaker.py
index 776e081..de3da68 100644
--- a/matchmaker/matchmaker.py
+++ b/matchmaker/matchmaker.py
@@ -1,15 +1,21 @@
 import os
 import sys
+import time
 from pathlib import Path
 from typing import Optional, Union
 
 import numpy as np
 import partitura
 from partitura.io.exportmidi import get_ppq
-from partitura.score import Part, merge_parts
 from partitura.musicanalysis.performance_codec import get_time_maps_from_alignment
+from partitura.score import Part, merge_parts
 
-from matchmaker.dp import OnlineTimeWarpingArzt, OnlineTimeWarpingDixon
+from matchmaker.dp import (
+    OnlineTimeWarpingArztEvent,
+    OnlineTimeWarpingArztFrame,
+    OnlineTimeWarpingDixonEvent,
+    OnlineTimeWarpingDixonFrame,
+)
 from matchmaker.features.audio import (
     FRAME_RATE,
     SAMPLE_RATE,
@@ -21,26 +27,20 @@
     MFCCProcessor,
 )
 from matchmaker.features.midi import (
+    OnsetPianoRollProcessor,
     PianoRollProcessor,
     PitchClassPianoRollProcessor,
     PitchIOIProcessor,
+    onset_pianoroll,
 )
 from matchmaker.io.audio import AudioStream
 from matchmaker.io.midi import MidiStream
-from matchmaker.prob.hmm import (
-    GaussianAudioPitchHMM,
-    GaussianAudioPitchTempoHMM,
-    PitchHMM,
-    PitchIOIHMM,
-)
-from matchmaker.prob.outer_product_hmm import OuterProductHMM
-from matchmaker.prob.outer_product_hmm_audio import AudioOuterProductHMM
+from matchmaker.prob import AudioOuterProductHMM, OuterProductHMM, PitchHMM, PitchIOIHMM
 from matchmaker.utils.eval import (
     TOLERANCES_IN_BEATS,
     TOLERANCES_IN_MILLISECONDS,
-    get_evaluation_results,
-    transfer_from_perf_to_predicted_score,
-    transfer_from_score_to_predicted_perf,
+    evaluate_alignment,
+    transfer_positions,
 )
 from matchmaker.utils.misc import (
     adjust_tempo_for_performance_file,
@@ -52,53 +52,53 @@
 )
 from matchmaker.utils.tempo_models import KalmanTempoModel
 
+PathLike = Union[str, bytes, os.PathLike]
 sys.setrecursionlimit(10_000)
 
-PathLike = Union[str, bytes, os.PathLike]
 DEFAULT_TEMPO = 120
-
-
-DEFAULT_DISTANCE_FUNCS = {
-    "arzt": OnlineTimeWarpingArzt.DEFAULT_DISTANCE_FUNC,
-    "dixon": OnlineTimeWarpingDixon.DEFAULT_DISTANCE_FUNC,
-    "hmm": None,
-    "outerhmm": None,
-    "audio_outerhmm": None,
-    "pthmm": None,
+MIDI_FRAME_RATE = 1  # dummy value for MIDI input
+DEFAULT_PROCESSOR = {
+    "audio": "chroma",
+    "midi": "pitch_ioi",
 }
-
 DEFAULT_METHODS = {
     "audio": "arzt",
     "midi": "outerhmm",
 }
-
-AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm", "audio_outerhmm"]
+AVAILABLE_METHODS = ["arzt", "dixon", "hmm", "pthmm", "outerhmm"]
+OLTW_METHODS = {"arzt", "dixon"}
 KWARGS = {
     "audio": {
         "dixon": {
+            "processor": "lse",
             "window_size": 10,
         },
         "arzt": {
-            "window_size": 5,
-            "start_window_size": 0.25,
-            "step_size" : 5,},
-        "audio_outerhmm": {
+            "window_size": 10,
+            "start_window_size": 0.1,
+            "step_size": 3,
+        },
+        "outerhmm": {
+            "processor": "cqt_spectral_flux",
             "sample_rate": 16000,
-            "frame_rate": 50,
+            "frame_rate": 25,
+            "s_j": 0.0,
         },
     },
     "midi": {
         "arzt": {
             "processor": "pianoroll",
             "piano_range": True,
-            "window_size": 200,
-            "start_window_size": 200,
+            "frame_rate": 100,
+            "window_size": 2,
+            "start_window_size": 2,
             "step_size": 5,
         },
         "dixon": {
             "processor": "pianoroll",
             "piano_range": True,
-            "window_size": 30,
+            "frame_rate": 100,
+            "window_size": 0.3,
         },
         "hmm": {
             "processor": "pitch_ioi",
@@ -131,8 +131,8 @@ class Matchmaker(object):
         only for offline option. For debugging or fast testing, set to False
     input_type : str
         Type of input to use: audio or midi
-    feature_type : str
-        Type of feature to use
+    processor : str
+        Type of feature processor to use
     method : str
         Score following method to use
     device_name_or_index : Union[str, int]
@@ -151,39 +151,33 @@ def __init__(
         self,
         score_file: PathLike,
         performance_file: Union[PathLike, None] = None,
-        wait: bool = True,  # only for offline option. For debugging or fast testing, set to False
-        input_type: str = "audio",  # 'audio' or 'midi'
-        feature_type: str = None,
+        input_type: str = "audio",
         method: str = None,
-        distance_func: Optional[str] = None,
+        *,
+        processor: str = None,
         device_name_or_index: Union[str, int] = None,
+        tempo: Optional[float] = None,
         sample_rate: int = SAMPLE_RATE,
         frame_rate: int = FRAME_RATE,
-        tempo: Optional[float] = None,
-        kwargs=KWARGS,
-        unfold_score=True,
         auto_adjust_tempo: bool = False,
+        wait: bool = False,
+        unfold_score=True,
+        kwargs=KWARGS,
     ):
         self.score_file = str(score_file)
         self.performance_file = (
             str(performance_file) if performance_file is not None else None
         )
 
-        # if input_type not in ("audio", "midi"):
-        #     raise ValueError(f"Invalid input_type {input_type}")
         self.input_type = input_type
-        self.feature_type = feature_type
-        self.frame_rate = frame_rate if input_type == "audio" else 1
-        self.sample_rate = sample_rate
-        self.hop_length = sample_rate // self.frame_rate
         self.score_part: Optional[Part] = None
-        self.distance_func = distance_func
         self.device_name_or_index = device_name_or_index
         self.processor = None
         self.stream = None
         self.score_follower = None
         self.reference_features = None
         self._has_run = False
+        self.alignment_duration = None
 
         # validate method first
         if method is None:
@@ -192,14 +186,14 @@ def __init__(
             raise ValueError(f"Invalid method. Available methods: {AVAILABLE_METHODS}")
 
         self.method = method
-        self.config = kwargs[self.input_type][self.method]
+        self.config = dict(kwargs.get(self.input_type, {}).get(self.method, {}))
         self.auto_adjust_tempo = auto_adjust_tempo
 
-        # Apply method-specific defaults from config (only if not explicitly provided by caller)
-        if sample_rate == SAMPLE_RATE and "sample_rate" in self.config:
-            self.sample_rate = self.config["sample_rate"]
-        if frame_rate == FRAME_RATE and "frame_rate" in self.config:
-            self.frame_rate = self.config["frame_rate"]
+        # Resolve sample_rate and frame_rate: config overrides defaults
+        self.sample_rate = self.config.pop("sample_rate", sample_rate)
+        self.frame_rate = self.config.pop(
+            "frame_rate", frame_rate if input_type == "audio" else MIDI_FRAME_RATE
+        )
         self.hop_length = self.sample_rate // self.frame_rate
 
         # setup score file
@@ -213,8 +207,22 @@ def __init__(
                 score = partitura.load_score(self.score_file)
 
             if unfold_score:
-                score = partitura.score.unfold_part_maximal(score, ignore_leaps=False)
-            self.score_part = merge_parts(score.parts)
+                try:
+                    # Ensure recursion limit is high enough for deepcopy of
+                    # complex scores. External libraries (e.g. madmom) may
+                    # lower it during processing.
+                    _prev_limit = sys.getrecursionlimit()
+                    sys.setrecursionlimit(max(_prev_limit, 10_000))
+                    unfolded = partitura.score.unfold_part_maximal(
+                        score, ignore_leaps=False
+                    )
+                    self.score_part = merge_parts(unfolded.parts)
+                    sys.setrecursionlimit(_prev_limit)
+                except Exception:
+                    sys.setrecursionlimit(max(sys.getrecursionlimit(), 10_000))
+                    self.score_part = merge_parts(score.parts)
+            else:
+                self.score_part = merge_parts(score.parts)
         except Exception as e:
             raise ValueError(f"Invalid score file: {e}")
 
@@ -227,49 +235,10 @@ def __init__(
             score_tempo = get_tempo_from_score(self.score_part, self.score_file)
             self.tempo = score_tempo if score_tempo is not None else DEFAULT_TEMPO
 
-        # setup feature processor
-        if self.feature_type is None:
-            if input_type == "audio":
-                self.feature_type = (
-                    "cqt_spectral_flux" if method == "audio_outerhmm" else "chroma"
-                )
-            else:
-                self.feature_type = "pitch_ioi"
-
-        if self.feature_type == "chroma":
-            self.processor = ChromagramProcessor(
-                sample_rate=self.sample_rate,
-                hop_length=self.hop_length,
-            )
-        elif self.feature_type == "mfcc":
-            self.processor = MFCCProcessor(
-                sample_rate=self.sample_rate,
-            )
-        elif self.feature_type == "cqt":
-            self.processor = CQTProcessor(
-                sample_rate=self.sample_rate,
-            )
-        elif self.feature_type == "mel":
-            self.processor = MelSpectrogramProcessor(
-                sample_rate=self.sample_rate,
-            )
-        elif self.feature_type == "lse":
-            self.processor = LogSpectralEnergyProcessor(
-                sample_rate=self.sample_rate,
-            )
-        elif self.feature_type == "pitch_ioi":
-            self.processor = PitchIOIProcessor(piano_range=self.config["piano_range"])
-        elif self.feature_type == "pitchclass":
-            self.processor = PitchClassPianoRollProcessor()
-        elif self.feature_type == "pianoroll":
-            self.processor = PianoRollProcessor(piano_range=self.config["piano_range"])
-        elif self.feature_type == "cqt_spectral_flux":
-            self.processor = CQTSpectralFluxProcessor(
-                sample_rate=self.sample_rate,
-                hop_length=self.hop_length,
-            )
-        else:
-            raise ValueError(f"Invalid feature type `{self.feature_type}`")
+        processor_type = processor or self.config.pop(
+            "processor", DEFAULT_PROCESSOR[input_type]
+        )
+        self.processor = self._build_processor(method, processor_type)
 
         if self.performance_file is not None:
             if self.input_type == "audio" and not is_audio_file(self.performance_file):
@@ -281,13 +250,41 @@ def __init__(
                     f"Invalid performance file. Expected MIDI file, but got {self.performance_file}"
                 )
 
-        # setup distance function
-        if distance_func is None:
-            distance_func = DEFAULT_DISTANCE_FUNCS[self.method]
-        # setup stream device
+        self.stream = self._build_stream(method, wait)
+        self.reference_features = self.preprocess_score()
+        self.score_follower = self._build_score_follower(method)
+
+    def _build_processor(self, method, processor_type):
+        audio_kw = dict(sample_rate=self.sample_rate, hop_length=self.hop_length)
+
+        AUDIO_PROCESSORS = {
+            "chroma": lambda: ChromagramProcessor(**audio_kw),
+            "mfcc": lambda: MFCCProcessor(**audio_kw),
+            "cqt": lambda: CQTProcessor(**audio_kw),
+            "mel": lambda: MelSpectrogramProcessor(**audio_kw),
+            "lse": lambda: LogSpectralEnergyProcessor(**audio_kw),
+            "cqt_spectral_flux": lambda: CQTSpectralFluxProcessor(**audio_kw),
+        }
+        MIDI_PROCESSORS = {
+            "pitch_ioi": lambda: PitchIOIProcessor(
+                piano_range=self.config["piano_range"],
+                return_pitch_list=(method == "hmm"),
+            ),
+            "pitchclass": lambda: PitchClassPianoRollProcessor(),
+            "pianoroll": lambda: PianoRollProcessor(
+                piano_range=self.config["piano_range"],
+            ),
+        }
 
+        if processor_type in AUDIO_PROCESSORS:
+            return AUDIO_PROCESSORS[processor_type]()
+        elif processor_type in MIDI_PROCESSORS:
+            return MIDI_PROCESSORS[processor_type]()
+        raise ValueError(f"Invalid feature type '{processor_type}'")
+
+    def _build_stream(self, method, wait):
         if self.input_type == "audio":
-            self.stream = AudioStream(
+            return AudioStream(
                 processor=self.processor,
                 device_name_or_index=self.device_name_or_index,
                 file_path=self.performance_file,
@@ -297,131 +294,167 @@ def __init__(
                 hop_length=self.hop_length,
             )
         elif self.input_type == "midi":
-            self.stream = MidiStream(
-                processor=self.processor,
+            if method in OLTW_METHODS:
+                processor = OnsetPianoRollProcessor(
+                    piano_range=self.config.get("piano_range", True),
+                )
+            else:
+                processor = self.processor
+            return MidiStream(
+                processor=processor,
                 port=self.device_name_or_index,
                 file_path=self.performance_file,
-                **({"polling_period": None} if method == "outerhmm" else {}),
+                polling_period=None,
             )
-        else:
-            raise ValueError(f"Invalid input type {self.input_type}")
+        raise ValueError(f"Invalid input type '{self.input_type}'")
 
-        self.reference_features = self.preprocess_score()
+    def _build_score_follower(self, method):
+        if self.input_type == "audio":
+            return self._build_audio_follower(method)
+        elif self.input_type == "midi":
+            return self._build_symbolic_follower(method)
+        raise ValueError(f"Invalid input_type '{self.input_type}'")
 
-        if distance_func is None:
-            distance_func = DEFAULT_DISTANCE_FUNCS[method]
+    def _build_audio_follower(self, method):
+        ref = self.reference_features
+        queue = self.stream.queue
+        state_space = np.unique(self.score_part.note_array()["onset_beat"])
 
-        if method == "arzt":
-            state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
-            self.score_follower = OnlineTimeWarpingArzt(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
-                distance_func=distance_func,
-                frame_rate=self.frame_rate,
-                window_size=self.config["window_size"],
-                start_window_size=self.config["start_window_size"],
-                state_to_ref_time_map=state_to_ref_time_map,
-                ref_to_state_time_map=ref_to_state_time_map,
-                step_size=self.config["step_size"],
-                state_space=np.unique(self.score_part.note_array()["onset_beat"])
+        if method in OLTW_METHODS:
+            self.ppart = partitura.utils.music.performance_from_part(
+                self.score_part, bpm=self.tempo
             )
-        elif method == "dixon":
-            state_to_ref_time_map, ref_to_state_time_map = self.get_time_maps()
-            self.score_follower = OnlineTimeWarpingDixon(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
-                distance_func=distance_func,
-                frame_rate=self.frame_rate,
-                window_size=self.config["window_size"],
-                state_to_ref_time_map=state_to_ref_time_map,
-                ref_to_state_time_map=ref_to_state_time_map,
-                state_space=np.unique(self.score_part.note_array()["onset_beat"])
-            )
-        elif method == "hmm" and self.input_type == "midi":
-            self.score_follower = PitchIOIHMM(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
-                tempo_model=self.config["tempo_model"],
-                has_insertions=True,
-                piano_range=self.config["piano_range"],
+            self.ppart.sustain_pedal_threshold = 127
+            try:
+                stm, rtm = self.get_time_maps()
+            except Exception:
+                stm, rtm = None, None
+            cls = (
+                OnlineTimeWarpingArztFrame
+                if method == "arzt"
+                else OnlineTimeWarpingDixonFrame
             )
-        elif method == "pthmm" and self.input_type == "audio":
-            self.score_follower = GaussianAudioPitchTempoHMM(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
+            return cls(
+                reference_features=ref,
+                queue=queue,
+                state_space=state_space,
+                frame_rate=self.frame_rate,
+                state_to_ref_time_map=stm,
+                ref_to_state_time_map=rtm,
+                ref_frame_to_beat=self._build_ref_frame_to_beat(),
+                **self.config,
             )
-        elif method == "audio_outerhmm" and self.input_type == "audio":
-            self.score_follower = AudioOuterProductHMM(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
+        elif method == "outerhmm":
+            return AudioOuterProductHMM(
+                reference_features=ref,
+                queue=queue,
                 tempo=self.tempo,
-                sample_rate=self.sample_rate,
                 hop_length=self.hop_length,
+                **self.config,
+            )
+        raise ValueError(f"No audio follower for method '{method}'")
+
+    def _build_symbolic_follower(self, method):
+        ref = self.reference_features
+        queue = self.stream.queue
+
+        if method in OLTW_METHODS:
+            # Convert note_array to onset pianoroll for event-level OLTW
+            onset_ref, state_space = onset_pianoroll(
+                ref,
+                onset_key="onset_beat",
+                piano_range=self.config.get("piano_range", True),
+            )
+            # Filter out frame-level config keys
+            skip = {
+                "window_size",
+                "start_window_size",
+                "frame_rate",
+                "processor",
+                "piano_range",
+            }
+            config = {k: v for k, v in self.config.items() if k not in skip}
+            cls = (
+                OnlineTimeWarpingArztEvent
+                if method == "arzt"
+                else OnlineTimeWarpingDixonEvent
             )
-        elif method == "pthmm" and self.input_type == "midi":
-            self.score_follower = PitchHMM(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
+            return cls(
+                reference_features=onset_ref,
+                queue=queue,
+                state_space=state_space,
+                **config,
+            )
+        elif method == "hmm":
+            return PitchIOIHMM(
+                reference_features=ref,
+                queue=queue,
                 has_insertions=True,
-                piano_range=self.config["piano_range"],
+                **self.config,
+            )
+        elif method == "pthmm":
+            return PitchHMM(
+                reference_features=ref,
+                queue=queue,
+                has_insertions=True,
+                **self.config,
             )
-        elif method == "outerhmm" and self.input_type == "midi":
-            self.score_follower = OuterProductHMM(
-                reference_features=self.reference_features,
-                queue=self.stream.queue,
+        elif method == "outerhmm":
+            return OuterProductHMM(
+                reference_features=ref,
+                queue=queue,
+                **self.config,
             )
+        raise ValueError(f"No MIDI follower for method '{method}'")
+
+    def _wp_perf_to_seconds(self, wp_perf):
+        """Convert warping path performance axis to absolute seconds."""
+        if self.input_type == "audio":
+            return wp_perf / self.frame_rate
+        elif self.method in OLTW_METHODS:
+            return wp_perf  # OnsetPianoRollProcessor provides absolute timestamps
         else:
-            raise ValueError("Invalid method")
+            # HMM: IOI-accumulated from 0; shift by first note onset
+            _perf = partitura.load_performance_midi(self.performance_file)
+            return wp_perf + float(_perf.note_array()["onset_sec"].min())
 
     def preprocess_score(self):
-        """Preprocess score to extract reference features."""
+        """Extract reference features from the score."""
         if self.auto_adjust_tempo and self.performance_file is not None:
             self.tempo = adjust_tempo_for_performance_file(
                 self.score_part, self.performance_file, self.tempo
             )
 
-        if self.method in {"arzt", "dixon"}:
-            self.ppart = partitura.utils.music.performance_from_part(self.score_part, bpm=self.tempo)
-            self.ppart.sustain_pedal_threshold = 127
-            if self.input_type == "audio":
-                self.score_audio = generate_score_audio(
-                    self.score_part, self.tempo, self.sample_rate
-                ).astype(np.float32)
-                reference_features = self.processor(self.score_audio)
-                self.processor.reset()
-                return reference_features
-            else:
-                polling_period = 0.01
-                reference_features = (
-                    partitura.utils.music.compute_pianoroll(
-                        note_info=self.ppart,
-                        time_unit="sec",
-                        time_div=int(np.round(1 / polling_period)),
-                        binary=True,
-                        piano_range=self.config["piano_range"],
-                    )
-                    .toarray()
-                    .T
-                ).astype(np.float32)
-                return reference_features
-        else:
-            return self.score_part.note_array()
-    
+        if self.input_type == "audio" and self.method in OLTW_METHODS:
+            score_audio = generate_score_audio(
+                self.score_part, self.tempo, self.sample_rate
+            ).astype(np.float32)
+            features = self.processor(score_audio)
+            self.processor.reset()
+            return features
+
+        return self.score_part.note_array()
+
     def get_time_maps(self):
-        alignment = [{"label" : "match", "score_id" : nid, "performance_id": nid} for nid in self.score_part.note_array()["id"]]
-        return get_time_maps_from_alignment(self.ppart.note_array(), self.score_part.note_array(), alignment)
+        sna = self.score_part.note_array()
+        pna = self.ppart.note_array()
+        note_ids = sna["id"]
+        # If note IDs are missing, use index-based IDs
+        if len(set(note_ids)) <= 1:
+            synth_ids = [f"n{i}" for i in range(len(sna))]
+            sna = sna.copy()
+            sna["id"] = synth_ids
+            pna = pna.copy()
+            pna["id"] = synth_ids[: len(pna)]
+            note_ids = synth_ids
+        alignment = [
+            {"label": "match", "score_id": nid, "performance_id": nid}
+            for nid in note_ids
+        ]
+        return get_time_maps_from_alignment(pna, sna, alignment)
 
     def _convert_frame_to_beat(self, current_frame: int) -> float:
-        """
-        Convert frame number to relative beat position in the score.
-
-        Parameters
-        ----------
-        frame_rate : int
-            Frame rate of the audio stream
-        current_frame : int
-            Current frame number
-        """
+        """Convert frame number to beat position in the score."""
         tick = get_ppq(self.score_part)
         timeline_time = (current_frame / self.frame_rate) * tick * (self.tempo / 60)
         beat_position = np.round(
@@ -430,6 +463,13 @@ def _convert_frame_to_beat(self, current_frame: int) -> float:
         )
         return beat_position
 
+    def _build_ref_frame_to_beat(self) -> np.ndarray:
+        """Precompute beat position for each reference feature frame."""
+        n_ref = self.reference_features.shape[0]
+        return np.array(
+            [self._convert_frame_to_beat(i) for i in range(n_ref)],
+        )
+
     def build_score_annotations(
         self,
         level="beat",
@@ -484,34 +524,6 @@ def build_score_annotations(
 
         return score_annots
 
-    def convert_timestamps_to_beats(self, timestamps):
-        """
-        Convert an array of timestamps (in seconds) to beat positions.
-
-        Parameters
-        ----------
-        timestamps : array-like
-            Array of timestamps in seconds
-
-        Returns
-        -------
-        beats : np.ndarray
-            Array of beat positions corresponding to the input timestamps
-        """
-        beats = []
-        tick = get_ppq(self.score_part)
-
-        for timestamp in timestamps:
-            timeline_time = timestamp * tick * (self.tempo / 60)
-
-            beat_position = np.round(
-                self.score_part.beat_map(timeline_time),
-                decimals=2,
-            )
-            beats.append(beat_position)
-
-        return np.array(beats)
-
     def get_latency_stats(self):
         feature_stats = self.stream.latency_stats
         inference_stats = self.score_follower.latency_stats
@@ -533,37 +545,45 @@ def run_evaluation(
         self,
         perf_annotations: Union[PathLike, np.ndarray],
         level: str = "note",
-        tolerances: list = TOLERANCES_IN_MILLISECONDS,
-        musical_beat: bool = False,  # beat annots are difference in some dataset
+        tolerances: list = None,
+        musical_beat: bool = False,
         debug: bool = False,
         save_dir: PathLike = None,
         run_name: str = None,
-        domain: str = "performance",  # "score" or "performance"
+        domain: str = "score",
+        plot_dist_matrix: bool = True,
     ) -> dict:
         """
-        Evaluate the score following process
+        Evaluate the score following process.
+
+        When domain="score" (default), returns beat-based metrics as primary
+        and ms-based metrics under "ms" key. When domain="performance",
+        returns ms-based metrics only (legacy behavior).
 
         Parameters
         ----------
         perf_annotations : PathLike or np.ndarray
-            Path to the performance annotations file (tab-separated),
-            or numpy array of annotation times in seconds.
+            Path to the performance annotations file or numpy array of onset times (seconds).
         level : str
-            Level of annotations to use: bar, beat or note
-        tolerance : list
-            Tolerances to use for evaluation (in milliseconds)
+            Annotation level: "beat" or "note"
+        tolerances : list or None
+            Tolerances for evaluation. If None, uses default for the domain.
+        musical_beat : bool
+            Whether to use musical beat
         debug : bool
-            Whether to save the score and performance audio with beat annotations
+            Whether to save debug outputs
         domain : str
-            Evaluation domain, either "score" or "performance".
-            "score" domain evaluates in beat unit, "performance" domain evaluates in second unit. (Default: "performance")
+            "score" (default, beat-based primary) or "performance" (ms-based, legacy)
 
         Returns
         -------
         dict
-            Evaluation results with mean, median, std, skewness, kurtosis, and
-            accuracy for each tolerance
+            Evaluation results. If domain="score", includes both beat and ms metrics.
         """
+        if tolerances is None:
+            tolerances = (
+                TOLERANCES_IN_BEATS if domain == "score" else TOLERANCES_IN_MILLISECONDS
+            )
         if not self._has_run:
             raise ValueError("Must call run() before evaluation")
 
@@ -572,87 +592,77 @@ def run_evaluation(
         else:
             perf_annots = np.loadtxt(fname=perf_annotations, delimiter="\t", usecols=0)
 
-        return_type = "seconds" if domain == "performance" else "beats"
-        score_annots = self.build_score_annotations(level, musical_beat, return_type)
-
-        original_perf_annots_counts = len(perf_annots)
+        wp = self.score_follower.warping_path
+        wp_score = wp[0].astype(float)
+        wp_perf_sec = self._wp_perf_to_seconds(wp[1].astype(float))
 
-        min_length = min(len(score_annots), len(perf_annots))
-        score_annots = score_annots[:min_length]
-        perf_annots = perf_annots[:min_length]
-
-        mode = (
-            "state"
-            if (self.input_type == "midi" or self.method == "audio_outerhmm")
-            else "frame"
-        )
-        perf_annots_predicted = transfer_from_score_to_predicted_perf(
-            self.score_follower.warping_path,
-            score_annots,
-            frame_rate=self.frame_rate,
-            mode=mode,
+        score_annots_beats = self.build_score_annotations(
+            level, musical_beat, return_type="beats"
         )
+        min_length = min(len(score_annots_beats), len(perf_annots))
+        score_annots_beats = score_annots_beats[:min_length]
+        perf_annots = perf_annots[:min_length]
 
-        score_annots_predicted = transfer_from_perf_to_predicted_score(
-            self.score_follower.warping_path,
+        eval_results = evaluate_alignment(
+            wp_score,
+            wp_perf_sec,
+            score_annots_beats,
             perf_annots,
-            frame_rate=self.frame_rate,
-            mode=mode,
+            beat_tolerances=tolerances if domain == "score" else TOLERANCES_IN_BEATS,
+            ms_tolerances=TOLERANCES_IN_MILLISECONDS,
         )
-        score_annots = score_annots[: len(score_annots_predicted)]
 
-        if original_perf_annots_counts != len(perf_annots_predicted):
-            print(
-                f"Length of the annotation changed: {original_perf_annots_counts} -> {len(perf_annots_predicted)}"
-            )
+        # Real-Time Factor (domain-independent)
+        if self.alignment_duration is not None:
+            finite_perf = perf_annots[np.isfinite(perf_annots)]
+            if len(finite_perf) > 0:
+                perf_duration = float(np.max(finite_perf) - np.min(finite_perf))
+                if perf_duration > 0:
+                    eval_results["rtf"] = float(
+                        f"{self.alignment_duration / perf_duration:.4f}"
+                    )
 
-        # Evaluation metrics
-        if domain == "performance":
-            eval_results = get_evaluation_results(
-                perf_annots,
-                perf_annots_predicted,
-                total_counts=original_perf_annots_counts,
-                tolerances=tolerances,
-            )
-        else:
-            score_annots_predicted = self.convert_timestamps_to_beats(
-                score_annots_predicted
-            )
-            if tolerances == TOLERANCES_IN_MILLISECONDS:
-                tolerances = TOLERANCES_IN_BEATS
-            eval_results = get_evaluation_results(
-                score_annots,
-                score_annots_predicted,
-                total_counts=original_perf_annots_counts,
-                tolerances=tolerances,
-                in_seconds=False,
-            )
         if self.input_type == "audio":
             latency_results = self.get_latency_stats()
             eval_results.update(latency_results)
 
-        # Debug: save warping path TSV, results JSON, and plots
         if debug and save_dir is not None:
+            wp_sec = np.array([wp_score, wp_perf_sec])
+            sf = self.score_follower
             save_debug_results(
-                warping_path=self.score_follower.warping_path,
-                score_annots=score_annots,
+                warping_path=wp_sec,
+                score_annots=score_annots_beats,
                 perf_annots=perf_annots,
-                perf_annots_predicted=perf_annots_predicted,
+                perf_annots_predicted=transfer_positions(
+                    wp_sec,
+                    score_annots_beats,
+                    frame_rate=1,
+                    domain="performance",
+                ),
                 eval_results=eval_results,
                 frame_rate=self.frame_rate,
                 save_dir=save_dir,
                 run_name=run_name or "results",
-                state_space=getattr(self.score_follower, "state_space", None),
-                ref_features=getattr(self.score_follower, "reference_features", None),
-                input_features=getattr(self.score_follower, "input_features", None),
-                distance_func=getattr(self.score_follower, "distance_func", None),
+                state_space=getattr(sf, "state_space", None),
+                ref_features=(
+                    getattr(sf, "reference_features", None)
+                    if plot_dist_matrix
+                    else None
+                ),
+                input_features=(
+                    getattr(sf, "input_features", None) if plot_dist_matrix else None
+                ),
+                distance_func=(
+                    getattr(sf, "distance_func", None) if plot_dist_matrix else None
+                ),
+                ref_frame_to_beat=getattr(sf, "_ref_frame_to_beat", None),
             )
 
         return eval_results
 
-    def run(self, verbose: bool = True, wait: bool = True):
+    def run(self, verbose: bool = True):
         """
-        Run the score following process
+        Run the score following process.
 
         Yields
         ------
@@ -661,16 +671,15 @@ def run(self, verbose: bool = True, wait: bool = True):
 
         Returns
         -------
-        list
-            Alignment results with warping path
+        np.ndarray
+            Warping path (2, T)
         """
         with self.stream:
+            self.stream.stream_start.wait()
+            t0 = time.time()
             for current_position in self.score_follower.run(verbose=verbose):
-                if self.input_type == "audio" and self.method != "audio_outerhmm":
-                    position_in_beat = self._convert_frame_to_beat(current_position)
-                    yield position_in_beat
-                else:
-                    yield float(self.score_follower.state_space[current_position])
+                yield current_position
+        self.alignment_duration = time.time() - t0
 
         self._has_run = True
         return self.score_follower.warping_path
diff --git a/matchmaker/prob/__init__.py b/matchmaker/prob/__init__.py
index c5ab843..122fa75 100644
--- a/matchmaker/prob/__init__.py
+++ b/matchmaker/prob/__init__.py
@@ -1,7 +1,9 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-Probabilistic methods for music alignment
+Probabilistic methods for music alignment.
 """
 
-from .hmm import PitchIOIHMM
+from .hmm import PitchHMM, PitchIOIHMM
+from .outer_product_hmm import OuterProductHMM
+from .outer_product_hmm_audio import AudioOuterProductHMM
diff --git a/matchmaker/prob/hmm.py b/matchmaker/prob/hmm.py
index bfbaf4c..562bf5c 100644
--- a/matchmaker/prob/hmm.py
+++ b/matchmaker/prob/hmm.py
@@ -22,9 +22,10 @@
 from scipy.stats import gumbel_l, norm
 
 from matchmaker.base import OnlineAlignment
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END
+from matchmaker.utils.errors import MatchmakerMissingParameterError
 from matchmaker.utils.misc import (
-    MatchmakerMissingParameterError,
-    RECVQueue,
     get_window_indices,
     interleave_with_constant,
     set_latency_stats,
@@ -47,6 +48,7 @@
 DEFAULT_GUMBEL_AUDIO_SCALE = 0.05
 QUEUE_TIMEOUT = 10
 
+
 class BaseHMM(HiddenMarkovModel):
     """
     Base class for Hidden Markov Model alignment methods.
@@ -109,39 +111,37 @@ def __init__(
         }
 
     @property
-    def warping_path(self) -> NDArrayInt:
-        return (np.array(self._warping_path).T).astype(np.int32)
+    def warping_path(self):
+        return np.array(self._warping_path).T
 
-    def __call__(self, input: NDArrayFloat) -> float:
+    def __call__(self, input: NDArrayFloat, *args, **kwargs) -> float:
         current_state = self.forward_algorithm_step(
             observation=input,
             log_probabilities=False,
         )
 
-        self._warping_path.append((current_state, self.input_index))
+        try:
+            beat = float(self.state_space[current_state])
+        except (ValueError, TypeError):
+            beat = current_state
+        self._warping_path.append((beat, self.input_index))
         self.input_index += 1
         self.current_state = current_state
 
         return current_state
 
-    def run(self) -> Generator[int, None, NDArrayInt]:
+    def run(self, verbose: bool = True) -> Generator[int, None, NDArrayInt]:
         if self.queue is not None:
-            prev_state = self.current_state
-            same_state_counter = 0
             while self.is_still_following():
-                target_feature = self.queue.get()
-
-                current_state = self(target_feature)
+                item = self.queue.get()
+                if item is STREAM_END:
+                    break
+                if item is None:
+                    continue
 
-                if current_state == prev_state:
-                    if same_state_counter < self.patience:
-                        same_state_counter += 1
-                    else:
-                        break
-                else:
-                    same_state_counter = 0
+                current_state = self(item)
 
-                yield current_state
+                yield float(self.state_space[current_state])
 
             return self.warping_path
 
@@ -193,6 +193,7 @@ def __init__(
         initial_probabilities: Optional[np.ndarray] = None,
         has_insertions: bool = True,
         piano_range: bool = True,
+        **kwargs,
     ) -> None:
         """
         Initialize the object.
@@ -227,6 +228,7 @@ def __init__(
             reference_features=reference_features,
         )
         self.reference_features = reference_features
+        self.perf_onset = None
         (
             observation_model,
             transition_matrix,
@@ -282,17 +284,25 @@ def __init__(
         )
 
     def __call__(self, input, *args, **kwargs):
-        frame_index = args[0] if args else None
-        
-        pitch_obs = input
-            
+        # PitchIOIProcessor returns (pitch_array, ioi). Use pitch for
+        # observation, ioi for time tracking.
+        if isinstance(input, tuple):
+            pitch_obs, ioi_obs = input
+            if self.perf_onset is None:
+                self.perf_onset = 0
+            else:
+                self.perf_onset += ioi_obs
+        else:
+            pitch_obs = input
+
         current_state = self.forward_algorithm_step(
             observation=pitch_obs,
             log_probabilities=False,
         )
-        self._warping_path.append((current_state, self.input_index))
-        self.input_index = self.input_index + 1 if frame_index is None else frame_index
-
+        beat = float(self.state_space[current_state])
+        perf_time = getattr(self, "perf_onset", self.input_index) or self.input_index
+        self._warping_path.append((beat, perf_time))
+        self.input_index += 1
         self.current_state = current_state
 
         return self.current_state
@@ -339,7 +349,7 @@ def _build_hmm_modules(
         transition_matrix = stable_transition_matrix(
             n_states=len(unique_onsets_s),
             dist=gumbel_l,
-            scale=1.0,#0.5,
+            scale=1.0,  # 0.5,
             inserted_states=inserted_states,
         )
         initial_probabilities = init_dist(
@@ -547,7 +557,9 @@ def gumbel_transition_matrix(  # TODO check works for audio (parameter)
                     np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
                 )
         else:
-            transition_matrix[i] = gumbel_l.pdf(np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale)
+            transition_matrix[i] = gumbel_l.pdf(
+                np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
+            )
 
     # Normalize transition matrix (so that it is a proper stochastic matrix):
     transition_matrix /= transition_matrix.sum(1, keepdims=True)
@@ -555,10 +567,11 @@ def gumbel_transition_matrix(  # TODO check works for audio (parameter)
     # Return the computed transition matrix:
     return transition_matrix
 
+
 def stable_transition_matrix(  # TODO check works for audio (parameter)
     n_states: int,
     mp_trans_state: int = 1,
-    dist = gumbel_l,
+    dist=gumbel_l,
     scale: float = 0.5,
     inserted_states: bool = False,
 ) -> NDArrayFloat:
@@ -606,7 +619,9 @@ def stable_transition_matrix(  # TODO check works for audio (parameter)
                     np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
                 )
         else:
-            transition_matrix[i] = dist.pdf(np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale)
+            transition_matrix[i] = dist.pdf(
+                np.arange(n_states), loc=i + mp_trans_state * 2 - 1, scale=scale
+            )
 
     # Normalize transition matrix (so that it is a proper stochastic matrix):
     transition_matrix /= transition_matrix.sum(1, keepdims=True)
@@ -614,6 +629,7 @@ def stable_transition_matrix(  # TODO check works for audio (parameter)
     # Return the computed transition matrix:
     return transition_matrix
 
+
 def init_dist(
     n_states: int,
     dist=gumbel_l,
@@ -784,6 +800,7 @@ def compute_discrete_pitch_profiles(
 
     return pitch_profiles
 
+
 # Old version, to be deprecated.
 def compute_discrete_pitch_profiles_old(
     chord_pitches: NDArrayFloat,
@@ -1131,7 +1148,6 @@ def __init__(
         self.states = np.arange(len(audio_features))
 
     def __call__(self, observation: NDArrayFloat) -> NDArrayFloat:
-
         pitch_obs, tempo_est = observation
 
         if self.current_state is None:
@@ -1152,7 +1168,6 @@ def __call__(self, observation: NDArrayFloat) -> NDArrayFloat:
 
         obs_prob = pitch_prob * tempo_prob
 
-
         return obs_prob
 
 
@@ -1201,7 +1216,6 @@ def __init__(
         self.states = np.arange(len(audio_features))
 
     def __call__(self, observation: NDArrayFloat) -> NDArrayFloat:
-
         pitch_obs, tempo_est = observation
 
         # ioi_idx = self.current_state if self.current_state is not None else 0
@@ -1298,6 +1312,7 @@ def __init__(self, pitch_profiles, ioi_matrix, ioi_precision):
             ioi_prob_args=ioi_prob_args,
         )
 
+
 class ACCPitchIOIObservationModel(ObservationModel):
     """
     Computes the probabilities that an observation was emitted, i.e. the
@@ -1489,6 +1504,7 @@ def __init__(
         initial_probabilities: Optional[np.ndarray] = None,
         has_insertions: bool = False,
         piano_range: bool = False,
+        **kwargs,
     ) -> None:
         """
         Initialize the object.
@@ -1597,6 +1613,7 @@ def __init__(
             tempo_model=tempo_model,
             has_insertions=has_insertions,
             queue=queue,
+            **kwargs,
         )
 
     def __call__(self, input, *args, **kwargs):
@@ -1615,7 +1632,8 @@ def __call__(self, input, *args, **kwargs):
             ),
             log_probabilities=False,
         )
-        self._warping_path.append((current_state, self.input_index))
+        score_beat = float(self.state_space[current_state])
+        self._warping_path.append((score_beat, self.perf_onset))
         self.input_index = self.input_index + 1 if frame_index is None else frame_index
 
         if self.current_state is None:
@@ -1656,17 +1674,17 @@ def _build_hmm_modules(
         self,
         piano_range: bool = False,
         inserted_states: bool = True,
-        observation_model = ACCPitchIOIObservationModel,
-        tempo_model = KalmanTempoModel,
+        observation_model=ACCPitchIOIObservationModel,
+        tempo_model=KalmanTempoModel,
     ):
         snote_array = self.reference_features
-        
+
         unique_sonsets = np.unique(snote_array["onset_beat"])
         unique_sonset_idxs = [
             np.where(snote_array["onset_beat"] == ui)[0] for ui in unique_sonsets
         ]
         chord_pitches = [snote_array["pitch"][uix] for uix in unique_sonset_idxs]
-        
+
         pitch_profiles = compute_discrete_pitch_profiles(
             chord_pitches=chord_pitches,
             piano_range=piano_range,
@@ -1676,7 +1694,7 @@ def _build_hmm_modules(
             unique_onsets=unique_sonsets,
             inserted_states=inserted_states,
         )
-        
+
         observation_model = observation_model(
             pitch_profiles=pitch_profiles,
             ioi_matrix=ioi_matrix,
@@ -1703,11 +1721,11 @@ def _build_hmm_modules(
                 init_score_onset=unique_sonsets.min(),
                 init_beat_period=60 / 100,
             )
-            
+
         transition_matrix = stable_transition_matrix(
             n_states=len(ioi_matrix[0]),
             dist=gumbel_l,
-            scale=1.0,#0.5,
+            scale=1.0,  # 0.5,
             inserted_states=inserted_states,
         )
         initial_probabilities = init_dist(
@@ -1724,45 +1742,20 @@ def _build_hmm_modules(
         )
 
     def run(self, verbose: bool = True):
-        same_state_counter = 0
-        empty_counter = 0
-        verbose = False
-        if verbose:
-            pbar = progressbar.ProgressBar(
-                maxval=self.n_states,  # redirect_stdout=True
-            )
-            pbar.start()
-
         while self.is_still_following():
-            prev_state = self.current_state
-            # TODO: check self.queue.get() format. maybe this should actually be a tuple
             try:
-                queue_input = self.queue.get(timeout=QUEUE_TIMEOUT)
-                #features, f_time = queue_input
-                #print(f'{features=}, {f_time=}')
+                item = self.queue.get(timeout=QUEUE_TIMEOUT)
             except:
                 break
-            #TODO: try MidiStream.return_midi_messages = True
+            if item is STREAM_END:
+                break
+            if item is None:
+                continue
 
-            if queue_input is not None:
-                #print(f'pitch_ioi: {queue_input=}')
-                current_state = self.__call__(queue_input)
-                empty_counter = 0
-                if current_state == prev_state:
-                    if same_state_counter < self.patience:
-                        same_state_counter += 1
-                    else:
-                        break
-                else:
-                    same_state_counter = 0
-                
+            current_state = self.__call__(item)
 
-                if verbose:
-                    pbar.update(int(current_state))
-                yield current_state
+            yield float(self.state_space[current_state])
 
-            if verbose:
-                pbar.finish()
         return self.warping_path
 
 
@@ -1938,6 +1931,8 @@ def run(self, verbose: bool = True):
         while self.is_still_following():
             prev_state = self.current_state
             queue_input = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if queue_input is STREAM_END:
+                break
             features, f_time = queue_input
             self.last_queue_update = time.time()
             self.input_features = (
@@ -1961,7 +1956,7 @@ def run(self, verbose: bool = True):
                 )
                 if verbose:
                     pbar.update(int(current_state))
-                yield current_state
+                yield float(self.state_space[current_state])
 
             if verbose:
                 pbar.finish()
@@ -1987,6 +1982,7 @@ def __init__(
         initial_probabilities: Optional[np.ndarray] = None,
         state_space: Optional[NDArray] = None,
         patience: int = 200,
+        **kwargs,
     ) -> None:
         """
         Initialize the object.
@@ -2090,7 +2086,6 @@ def __init__(
         self.input_features = None
         self.distance_func = "Euclidean"
 
-
         BaseHMM.__init__(
             self,
             observation_model=observation_model,
@@ -2153,6 +2148,8 @@ def run(self, verbose: bool = True):
         while self.is_still_following():
             prev_state = self.current_state
             queue_input = self.queue.get(timeout=QUEUE_TIMEOUT)
+            if queue_input is STREAM_END:
+                break
             features, f_time = queue_input
             self.last_queue_update = time.time()
             self.input_features = (
@@ -2176,7 +2173,7 @@ def run(self, verbose: bool = True):
                 )
                 if verbose:
                     pbar.update(int(current_state))
-                yield current_state
+                yield float(self.state_space[current_state])
 
             if verbose:
                 pbar.finish()
diff --git a/matchmaker/prob/outer_product_hmm.py b/matchmaker/prob/outer_product_hmm.py
index adb3d57..680aa70 100644
--- a/matchmaker/prob/outer_product_hmm.py
+++ b/matchmaker/prob/outer_product_hmm.py
@@ -4,7 +4,8 @@
 from numpy.typing import NDArray
 
 from matchmaker.base import OnlineAlignment
-from matchmaker.utils.misc import RECVQueue
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END
 
 try:
     # import the compiled function (name depends on your .pyx)
@@ -13,7 +14,6 @@
     viterbi_step_cy = None
 
 import numpy as np
-
 from partitura.score import Part, Score, ScoreLike
 
 NDArrayFloat = NDArray[np.float32]
@@ -216,6 +216,7 @@ def __init__(
         r: Optional[np.ndarray] = None,
         other_prob: float = 1e-6,
         patience: int = 10,
+        **kwargs,
     ) -> None:
         """
         Outer-product Hidden Markov Model for score following.
@@ -285,15 +286,18 @@ def __init__(
         )
 
         self.current_state = 0
+        self.input_index = 0
+        self.perf_time = 0.0
         self._warping_path = []
         self._current_chord = np.zeros(88, dtype=int)
         self.patience = patience
-        self.state_probabilities = np.ones(self.n_states) / self.n_states
+        self.state_probabilities = np.zeros(self.n_states)
+        self.state_probabilities[0] = 1.0
         self.is_first_observation = True
 
     @property
-    def warping_path(self) -> NDArrayInt:
-        return (np.array(self._warping_path).T).astype(np.int32)
+    def warping_path(self) -> NDArray:
+        return np.array(self._warping_path).T
 
     def is_still_following(self) -> bool:
         if self.current_state is not None:
@@ -305,17 +309,22 @@ def __call__(
         self, input: tuple[np.ndarray, float], *args, **kwargs
     ) -> Optional[int]:
         pitch_obs, ioi = input
+        self.perf_time += ioi
 
         if ioi < IOI_THRESHOLD:
             self._current_chord = np.maximum(self._current_chord, pitch_obs)
-            return self.current_state
+            # Return None to signal "chord continuation" (no state advance);
+            # run() uses this to skip the patience counter.
+            return None
         else:
             self._current_chord = pitch_obs
             self.state_probabilities = self.viterbi_step(
                 self.state_probabilities, self._current_chord
             )
             self.current_state = np.argmax(self.state_probabilities)
-            self._warping_path.append(self.current_state)
+            score_beat = float(self.state_space[self.current_state])
+            self._warping_path.append((score_beat, self.perf_time))
+            self.input_index += 1
 
             return self.current_state
 
@@ -337,8 +346,10 @@ def compute_obs_likelihood(
             b[i] = likelihood of observing `observation` at state i.
         """
 
-        b = self.b_table[:, 21:109] * observation
-        return b
+        log_b = np.log(np.maximum(self.b_table[:, 21:109], 1e-300))  # (N, 88)
+        log_em = log_b @ observation  # (N,): log-product over active pitches
+        log_em -= log_em.max()  # shift for numerical stability
+        return np.exp(log_em)  # (N,)
 
     # Viterbi update
     def viterbi_step(
@@ -392,8 +403,8 @@ def viterbi_step(
                     local_max = val
             skip_contrib = self.r[i] * global_skip_max
 
-            new_probs[i] = sum(
-                b[i] * (skip_contrib if skip_contrib >= local_max else local_max)
+            new_probs[i] = b[i] * (
+                skip_contrib if skip_contrib >= local_max else local_max
             )
         if np.sum(new_probs) > 0:
             new_probs /= np.sum(new_probs)
@@ -417,8 +428,13 @@ def run(
             prev_state = self.current_state
 
             queue_input = self.queue.get()
+            if queue_input is STREAM_END:
+                break
             if queue_input is not None:
                 current_state = self(queue_input)
+                if current_state is None:
+                    # Chord continuation: no state advance, skip patience.
+                    continue
                 empty_counter = 0
                 if current_state == prev_state:
                     if same_state_counter < self.patience:
@@ -429,10 +445,8 @@ def run(
                     same_state_counter = 0
 
                 if verbose:
-                    # current_state may be None (no state yet); guard the update
-                    if current_state is not None:
-                        pbar.update(int(current_state))
-                yield current_state
+                    pbar.update(int(current_state))
+                yield float(self.state_space[current_state])
 
             if verbose:
                 pbar.finish()
diff --git a/matchmaker/prob/outer_product_hmm_audio.py b/matchmaker/prob/outer_product_hmm_audio.py
index e6dc3b9..25d12dc 100644
--- a/matchmaker/prob/outer_product_hmm_audio.py
+++ b/matchmaker/prob/outer_product_hmm_audio.py
@@ -8,32 +8,28 @@
 from partitura.score import Part, Score, ScoreLike
 
 from matchmaker.base import OnlineAlignment
-from matchmaker.features.audio import QUEUE_TIMEOUT
-from matchmaker.utils.misc import RECVQueue, set_latency_stats
+from matchmaker.io.audio import QUEUE_TIMEOUT
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END
+from matchmaker.utils.misc import set_latency_stats
 
 NDArrayFloat = NDArray[np.float32]
 NDArrayInt = NDArray[np.int32]
 
-DEFAULT_PITCH_ERROR_PROBS = {
-    "correct_pitch_prob": 0.9497,
-    "semi_tone_error_prob": 0.0145 / 2.0,
-    "whole_tone_error_prob": 0.0224 / 2.0,
-    "octave_error_prob": 0.0047 / 2.0,
-    "within_one_octave_error_prob": 0.0086 / 9.0 / 2.0,
-}
-
-# DEFAULT_TRANSITIONS = [
-#     (1, 1.0),  # normal (i→i+1)
-#     (2, 1e-50),  # deletion (i→i+2), HHMMState_simple.hpp: log10(-50)
-# ]
+# Nakamura et al. 2016 Section IV-B experimental parameters.
+# Neighbourhood transitions a^{(nbh)}_{j,i} from nakamura_data.py:
+#   These are the "small transition probabilities" for the banded structure.
+#   Paper: a_{i,i} = 0 (self-transition handled by bottom HMM a00),
+#          a_{i,i+2} = 1e-50 (deletion, effectively 0).
+# We use the empirical values from [13] (Nakamura JNMR 2014).
 DEFAULT_TRANSITIONS = [
-    (-3, 0.001),
-    (-2, 0.001),
-    (-1, 0.002),
-    (0, 0.01342),
-    (1, 0.96),
-    (2, 0.01),
-    (3, 0.002),
+    (-3, 0.00509),
+    (-2, 0.00516),
+    (-1, 0.00886),
+    (0, 0.01342),  # insertion (staying at same top state)
+    (1, 0.94531),  # normal forward progression
+    (2, 0.00610),  # deletion (skip one note)
+    (3, 0.00073),
 ]
 
 DEFAULT_D1 = 3
@@ -43,8 +39,11 @@
 
 _FLUX_EXIT_BOOST: float = 1.0
 _OTHER_PROB: float = 1e-6
-_PAUSE_ENTRY_PROB: float = 0.01  # probability of entering pause state from sound
-_PAUSE_DURATION_SEC: float = 0.5
+# Paper IV-B:
+#   a_{0,1}^{(i)} = 1e-100  (pause entry: almost never enter pause)
+#   a_{1,1}^{(i)} = 0.999   (pause self-transition: once in pause, stay)
+_PAUSE_ENTRY_PROB: float = 1e-100
+_PAUSE_SELF_TRANSITION: float = 0.999
 _PAUSE_EMISSION_MAX: float = 1e-3
 
 
@@ -127,11 +126,13 @@ def __init__(
         reference_features: np.ndarray,
         queue: Optional[RECVQueue] = None,
         transitions: Optional[List[tuple[int, float]]] = None,
-        pitch_error_probs: Optional[dict[str, float]] = None,
         patience: int = 0,
         tempo: float = 120.0,
         sample_rate: int = 16000,
         hop_length: int = 320,
+        s_j: float = 1e-5,
+        r_i: Optional[np.ndarray] = None,
+        **kwargs,
     ) -> None:
         self.reference_features = reference_features
         OnlineAlignment.__init__(
@@ -173,16 +174,10 @@ def __init__(
         self.transitions = (
             transitions if transitions is not None else DEFAULT_TRANSITIONS
         )
-        self.pitch_error_probs = (
-            pitch_error_probs
-            if pitch_error_probs is not None
-            else DEFAULT_PITCH_ERROR_PROBS
-        )
         self.other_prob = _OTHER_PROB
         self.sample_rate = int(sample_rate)
         self.hop_length = int(hop_length)
         self.pause_entry_prob = _PAUSE_ENTRY_PROB
-        self.pause_duration_sec = _PAUSE_DURATION_SEC
         self.pause_emission_max = _PAUSE_EMISSION_MAX
 
         # Transition setup with banded structure
@@ -202,7 +197,23 @@ def __init__(
                 row_sums = self.alpha.sum(axis=1, keepdims=True)
             self.alpha = self.alpha / row_sums
 
-        self.current_state = 0
+        # Repeat/skip factorization (Nakamura Eq.11):
+        #   a_{j,i} = a^{(nbh)}_{j,i} + s_j * r_i
+        # s_j: probability of stopping at event j before a repeat/skip
+        # r_i: probability of resuming at event i after a repeat/skip
+        self.S = np.full(self.n_states, float(s_j), dtype=float)
+        if r_i is not None:
+            self.r = np.asarray(r_i, dtype=float)
+        else:
+            self.r = np.ones(self.n_states, dtype=float) / self.n_states
+
+        # Renormalize alpha to account for s_j mass:
+        # Σ_i a_{j,i} = Σ_i a^{(nbh)}_{j,i} + s_j * Σ_i r_i = 1
+        # => Σ_i a^{(nbh)}_{j,i} = 1 - s_j  (since Σ_i r_i = 1)
+        if s_j > 0:
+            self.alpha = self.alpha * (1.0 - self.S[:, None])
+
+        self.current_state_index = 0
         self._warping_path = []
         self._current_chord = np.zeros(88, dtype=int)
         self.patience = int(patience)
@@ -223,13 +234,8 @@ def __init__(
             tempo=tempo,
             frame_rate=frame_rate,
         )
-        self.a11 = float(
-            np.clip(
-                self._pause_self_transition_prob(self.pause_duration_sec, frame_rate),
-                0.0,
-                1.0,
-            )
-        )
+        # Paper IV-B: a_{1,1}^{(i)} = 0.999 (pause self-transition)
+        self.a11 = float(_PAUSE_SELF_TRANSITION)
         # Pause entry prob a01 (II-E)
         move_prob = 1.0 - self.a00
         p_pause = float(np.clip(self.pause_entry_prob, 0.0, 1.0))
@@ -243,9 +249,21 @@ def __init__(
         self.e0 = np.clip(1.0 - self.a00 - self.a01, 1e-10, 1.0)
         self.e1 = float(np.clip(1.0 - self.a11, 1e-10, 1.0))
 
+        # Precompute alpha diagonals and sliding window indices for vectorized forward_step
+        self._alpha_diags = []
+        for d in range(-self.D2, self.D1 + 1):
+            self._alpha_diags.append(np.diagonal(self.alpha, offset=-d).copy())
+        self._j_starts = np.maximum(0, np.arange(self.n_states) - self.D2)
+        self._j_ends = np.minimum(self.n_states, np.arange(self.n_states) + self.D1 + 1)
+
+    @property
+    def warping_path(self) -> np.ndarray:
+        return np.array(self._warping_path).T
+
     @property
-    def warping_path(self) -> NDArrayInt:
-        return (np.array(self._warping_path).T).astype(np.int32)
+    def current_position(self) -> float:
+        """Current score position in beats."""
+        return float(self.state_space[self.current_state_index])
 
     @staticmethod
     def _pause_self_transition_prob(
@@ -286,8 +304,8 @@ def _compute_chord_self_transition_probs(
         return np.clip(1.0 - 1.0 / d_i, 1e-6, 1.0 - 1e-6)
 
     def is_still_following(self) -> bool:
-        if self.current_state is not None:
-            return self.current_state <= self.n_states - 1
+        if self.current_state_index is not None:
+            return self.current_state_index < self.n_states - 1
         return False
 
     def __call__(self, input, *args, **kwargs) -> Optional[int]:
@@ -321,10 +339,12 @@ def __call__(self, input, *args, **kwargs) -> Optional[int]:
         top_scores = probs[0::2] + probs[1::2]
         new_top = int(np.argmax(top_scores))
 
-        self.current_state = new_top
-        self._warping_path.append((self.current_state, self.input_index))
+        self.current_state_index = new_top
+        self._warping_path.append(
+            (float(self.state_space[self.current_state_index]), self.input_index)
+        )
         self.input_index += 1
-        return self.current_state
+        return self.current_state_index
 
     def compute_obs_likelihood(
         self,
@@ -390,14 +410,31 @@ def forward_step(
         prev_sound = np.asarray(prev_probs[0::2], dtype=float)
         prev_pause = np.asarray(prev_probs[1::2], dtype=float)
 
-        # Emission
-        emit_sound = self.compute_obs_likelihood(observation)
-        emit_pause_scalar = self._compute_pause_emission(observation)
+        # --- Single _preprocess_obs call + inline emission computation ---
+        obs = _preprocess_obs(observation)
+        cqt = np.maximum(obs[:88] if obs.size >= 88 else obs, 0.0)
+        cqt_sum = cqt.sum()
+
+        # Sound emission (from compute_obs_likelihood)
+        if cqt_sum <= 0:
+            emit_sound = np.full(N, 1e-300, dtype=float)
+        else:
+            cqt_norm = cqt / cqt_sum
+            em = self.chord_harmonic_mask @ cqt_norm
+            emit_sound = np.maximum(np.nan_to_num(em, nan=1e-12), 1e-12)
+
+        # Pause emission (from _compute_pause_emission)
+        if cqt_sum <= 0:
+            emit_pause_scalar = min(1.0, self.pause_emission_max)
+        else:
+            var = float(np.var(cqt / cqt_sum))
+            emit_pause_scalar = min(
+                max(1.0 / (1.0 + 200.0 * var), 1e-300), self.pause_emission_max
+            )
         emit_pause = np.full(N, emit_pause_scalar, dtype=float)
 
         # Spectral-flux-driven exit boost
-        obs_flat = _preprocess_obs(observation)
-        flux = float(obs_flat[88]) if obs_flat.size > 88 else 0.0
+        flux = float(obs[88]) if obs.size > 88 else 0.0
         f = flux / (flux + 1.0)  # [0,1)
         boost = 1.0 + _FLUX_EXIT_BOOST * f
         e0 = np.clip(self.e0 * boost, 1e-10, 1.0 - self.a01 - 1e-10)
@@ -406,18 +443,27 @@ def forward_step(
         # Exit masses from each top state j (Eq.(6))
         exit_mass = prev_sound * e0 + prev_pause * self.e1  # (N,)
 
-        # Compute neigh_sum_i for each i (banded, Eq.(9))
-        neigh_sum = np.zeros(N, dtype=float)
-        for i in range(N):
-            j_start = max(0, i - self.D2)
-            j_end = min(N, i + self.D1 + 1)
-            ssum = 0.0
-            for j in range(j_start, j_end):
-                a = float(self.alpha[j, i])
-                if a <= 0:
-                    continue
-                ssum += exit_mass[j] * a
-            neigh_sum[i] = ssum
+        # --- Vectorized neighbourhood sum (replaces O(N*(D1+D2)) Python loop) ---
+        # Global skip term: Σ_j exit_mass[j] * S[j]  (O(N), computed once)
+        global_skip_sum = float(np.dot(exit_mass, self.S))
+
+        # Local neighbourhood transition: sum over diagonals of alpha
+        local_nbh = np.zeros(N, dtype=float)
+        for k, d in enumerate(range(-self.D2, self.D1 + 1)):
+            diag = self._alpha_diags[k]
+            L = len(diag)
+            src = max(0, d)  # source index offset in exit_mass
+            dst = max(0, -d)  # destination index offset in local_nbh
+            local_nbh[dst : dst + L] += exit_mass[src : src + L] * diag
+
+        # Local skip via cumsum sliding window: O(N) instead of O(N*D)
+        eS = exit_mass * self.S
+        cumsum_eS = np.empty(N + 1, dtype=float)
+        cumsum_eS[0] = 0.0
+        np.cumsum(eS, out=cumsum_eS[1:])
+        local_skip = cumsum_eS[self._j_ends] - cumsum_eS[self._j_starts]
+
+        neigh_sum = local_nbh + self.r * (global_skip_sum - local_skip)
 
         # Within-top bottom transitions
         within_sound = prev_sound * a00
@@ -448,21 +494,27 @@ def run(
         same_state_counter = 0
         empty_counter = 0
         if verbose:
-            pbar = progressbar.ProgressBar(maxval=self.n_states)
+            pbar = progressbar.ProgressBar(
+                maxval=len(self.state_space),
+                redirect_stdout=True,
+                redirect_stderr=True,
+            )
             pbar.start()
 
         while self.is_still_following():
-            prev_state = self.current_state
+            prev_state = self.current_state_index
 
             try:
                 queue_input = self.queue.get(timeout=QUEUE_TIMEOUT)
             except Empty:
                 break
+            if queue_input is STREAM_END:
+                break
             self.last_queue_update = time.time()
             if queue_input is not None:
-                current_state = self(queue_input)
+                self(queue_input)
                 empty_counter = 0
-                if current_state == prev_state:
+                if self.current_state_index == prev_state:
                     if self.patience > 0:
                         if same_state_counter < self.patience:
                             same_state_counter += 1
@@ -472,13 +524,12 @@ def run(
                     same_state_counter = 0
 
                 if verbose:
-                    if current_state is not None:
-                        pbar.update(int(current_state) + 1)  # states starts with 0
+                    pbar.update(self.current_state_index + 1)
                 latency = time.time() - self.last_queue_update
                 self.latency_stats = set_latency_stats(
                     latency, self.latency_stats, self.input_index
                 )
-                yield current_state
+                yield self.current_position
 
         if verbose:
             pbar.finish()
diff --git a/matchmaker/utils/errors.py b/matchmaker/utils/errors.py
new file mode 100644
index 0000000..2cf055d
--- /dev/null
+++ b/matchmaker/utils/errors.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Custom exception classes for matchmaker.
+"""
+
+from typing import Iterable, List, Union
+
+
+class MatchmakerInvalidParameterTypeError(Exception):
+    """Error for flagging an invalid parameter type."""
+
+    def __init__(
+        self,
+        parameter_name: str,
+        required_parameter_type: Union[type, Iterable[type]],
+        actual_parameter_type: type,
+        *args,
+    ) -> None:
+        if isinstance(required_parameter_type, Iterable):
+            rqpt = ", ".join([f"{pt}" for pt in required_parameter_type])
+        else:
+            rqpt = required_parameter_type
+        message = f"`{parameter_name}` was expected to be {rqpt}, but is {actual_parameter_type}"
+        super().__init__(message, *args)
+
+
+class MatchmakerInvalidOptionError(Exception):
+    """Error for invalid option."""
+
+    def __init__(self, parameter_name, valid_options, value, *args) -> None:
+        rqop = ", ".join([f"{op}" for op in valid_options])
+        message = f"`{parameter_name}` was expected to be in {rqop}, but is {value}"
+        super().__init__(message, *args)
+
+
+class MatchmakerMissingParameterError(Exception):
+    """Error for flagging a missing parameter."""
+
+    def __init__(self, parameter_name: Union[str, List[str]], *args) -> None:
+        if isinstance(parameter_name, Iterable) and not isinstance(parameter_name, str):
+            message = ", ".join([f"`{pn}`" for pn in parameter_name])
+            message = f"{message} were not given"
+        else:
+            message = f"`{parameter_name}` was not given."
+        super().__init__(message, *args)
diff --git a/matchmaker/utils/eval.py b/matchmaker/utils/eval.py
index 20e5192..5d0c313 100644
--- a/matchmaker/utils/eval.py
+++ b/matchmaker/utils/eval.py
@@ -1,5 +1,3 @@
-from typing import TypedDict, Union
-
 import numpy as np
 import scipy
 
@@ -11,159 +9,97 @@ def transfer_positions(
     wp,
     ref_anns,
     frame_rate,
-    reverse=False,
     *,
-    mode: str = "auto",
-    reducer: str = "min",
-    state_offset: Union[int, str] = "auto",
-    output: str = "seconds",
+    domain: str = "score",
+    aggregation_func=None,
 ):
     """
-    Transfer the positions of the reference annotations to the target annotations using the warping path.
-
-    This function supports two common warping-path conventions:
-
-    - **frame mode** (classic DTW-style): wp[0] and wp[1] are frame indices for reference/target features.
-    - **state mode** (HMM/score-state): wp[0] contains *reference state indices* and wp[1] contains *target frame indices*.
+    Transfer positions between score and performance using the warping path.
 
     Parameters
     ----------
     wp : np.array with shape (2, T)
-        array of warping path.
-        warping_path[0] is the index of the reference (score) feature and warping_path[1] is the index of the target(input) feature.
-    ref_ann : List[float]
-        In **frame mode**, reference annotations in seconds.
-        In **state mode**, a sequence whose length equals the number of reference states (e.g., score unique_onsets);
-        the values are not used except for determining the number of states.
+        Warping path. wp[0] = score beats, wp[1] = performance frame indices.
+    ref_anns : array-like
+        Query positions (seconds for domain="score",
+        beats for domain="performance").
     frame_rate : int
-        frame rate of the audio.
-    reverse : bool
-        If True, swap the direction (target -> reference).
-    mode : {"auto", "frame", "state"}
-        Warping-path convention. "auto" picks "state" when wp[0] looks like small discrete state indices.
-    reducer : {"min", "max", "median", "mean"}
-        In **state mode**, how to select a single representative target frame for each state when multiple wp entries
-        map to the same state.
-    state_offset : {"auto"} or int
-        In **state mode**, wp[0] may start at 0 or 1 (or have a leading start-state). "auto" chooses the offset that
-        best matches the expected number of states.
-    output : {"seconds", "frames"}
-        Return unit. "seconds" divides frames by frame_rate; "frames" returns frame indices.
+        Frame rate of the audio.
+    domain : {"score", "performance"}
+        Domain of the output.
+        "score": perf→score lookup. Given performance times (seconds),
+            return predicted score positions (beats).
+        "performance": score→perf lookup. Given score beats, return
+            predicted performance times (seconds).
+    aggregation_func : callable or None
+        Function to aggregate multiple values sharing the same key
+        (e.g., np.max, np.min, np.mean). If None, defaults to:
+        - domain="score": last entry in temporal order (tracker's
+          final decision at that frame)
+        - domain="performance": np.min (earliest arrival at that beat,
+          i.e. first-crossing rule)
 
     Returns
     -------
-    predicted_targets : np.array with shape (T,)
-        Predicted target positions (seconds or frames depending on output).
+    predicted : np.array
+        Predicted positions in the target domain.
     """
-    if output not in {"seconds", "frames"}:
-        raise ValueError(f"Invalid output={output!r}. Use 'seconds' or 'frames'.")
-
-    if reverse:
-        x, y = wp[1], wp[0]
-    else:
-        x, y = wp[0], wp[1]
-
-    if mode not in {"auto", "frame", "state"}:
-        raise ValueError(f"Invalid mode={mode!r}. Use 'auto', 'frame', or 'state'.")
-
-    # Heuristic: state paths have small discrete indices (often << target frames),
-    # while frame paths typically cover most reference frames (unique count is large).
-    if mode == "auto":
-        x_unique = np.unique(x)
-        n_ref = len(ref_anns)
-        looks_like_state = (x_unique.size <= max(4, 2 * n_ref)) and (
-            int(np.max(x)) <= max(10, 5 * n_ref)
-        )
-        mode = "state" if looks_like_state else "frame"
-
-    if mode == "frame":
-        # Causal nearest neighbor interpolation (reference seconds -> reference frames -> target frames)
-        ref_anns_frame = np.round(np.asarray(ref_anns) * frame_rate)
-        predicted_targets = np.ones(len(ref_anns_frame), dtype=float) * np.nan
-
-        for i, r in enumerate(ref_anns_frame):
-            # 1) Scan all x values less than or equal to r and find the largest x value
-            past_indices = np.where(x <= r)[0]
-            if past_indices.size > 0:
-                # Find indices corresponding to the largest x value
-                max_x_val = x[past_indices[-1]]
-                max_x_indices = np.where(x == max_x_val)[0]
-
-                # 2) Among all y values mapped to this x value, select the minimum y value
-                corresponding_y_values = y[max_x_indices]
-                predicted_targets[i] = float(np.min(corresponding_y_values))
-
-        if output == "frames":
-            return predicted_targets
-        return np.asarray(predicted_targets) / frame_rate
-
-    # mode == "state"
-    # Goal: for each reference state index, select representative target frame from wp.
-    num_states = len(ref_anns)
-    predicted_frames = np.ones(num_states, dtype=float) * np.nan
-
-    x_int = np.asarray(x, dtype=int)
-    y_int = np.asarray(y, dtype=int)
-
-    if reducer not in {"min", "max", "median", "mean"}:
-        raise ValueError(
-            f"Invalid reducer={reducer!r}. Use 'min', 'max', 'median', or 'mean'."
-        )
-
-    if state_offset == "auto":
-        # Choose offset that maximizes overlap between expected states and observed wp state indices.
-        observed = np.unique(x_int)
-        candidates = []
-        for off in (0, 1, int(np.min(x_int))):
-            if off not in candidates:
-                candidates.append(off)
-        best_off = candidates[0]
-        best_overlap = -1
-        for off in candidates:
-            expected = np.arange(off, off + num_states, dtype=int)
-            overlap = np.intersect1d(observed, expected).size
-            if overlap > best_overlap:
-                best_overlap = overlap
-                best_off = off
-        offset = best_off
+    if domain not in {"score", "performance"}:
+        raise ValueError(f"Invalid domain={domain!r}. Use 'score' or 'performance'.")
+
+    wp_score = wp[0].astype(float)
+    wp_perf = wp[1].astype(float)
+    queries = np.asarray(ref_anns, dtype=float)
+
+    def _last(arr):
+        return arr[-1]
+
+    if aggregation_func is None:
+        aggregation_func = _last if domain == "score" else np.min
+
+    if domain == "score":
+        # Perf → Score: "at perf time t, what is the tracker's score position?"
+        # Group by perf frame, take the last entry by default (tracker's final decision).
+        query_frames = queries * frame_rate
+
+        sort_idx = np.argsort(wp_perf, kind="stable")
+        wp_perf_sorted = wp_perf[sort_idx]
+        wp_score_sorted = wp_score[sort_idx]
+
+        unique_frames, first_idx = np.unique(wp_perf_sorted, return_index=True)
+        reduced_scores = np.empty(len(unique_frames))
+        for g in range(len(unique_frames)):
+            start = first_idx[g]
+            end = (
+                first_idx[g + 1] if g + 1 < len(unique_frames) else len(wp_score_sorted)
+            )
+            reduced_scores[g] = aggregation_func(wp_score_sorted[start:end])
+
+        # unique_frames is monotonic → searchsorted for last frame ≤ query
+        indices = np.searchsorted(unique_frames, query_frames, side="right") - 1
+        predicted = np.full(len(queries), np.nan)
+        valid = indices >= 0
+        predicted[valid] = reduced_scores[indices[valid]]
+        return predicted
     else:
-        offset = int(state_offset)
-
-    for s in range(num_states):
-        wp_state = s + offset
-        idx = np.where(x_int == wp_state)[0]
-        if idx.size == 0:
-            continue
-        vals = y_int[idx].astype(float)
-        if reducer == "min":
-            predicted_frames[s] = float(np.min(vals))
-        elif reducer == "max":
-            predicted_frames[s] = float(np.max(vals))
-        elif reducer == "median":
-            predicted_frames[s] = float(np.median(vals))
-        else:  # mean
-            predicted_frames[s] = float(np.mean(vals))
-
-    if output == "frames":
-        return predicted_frames
-    return predicted_frames / frame_rate
-
-
-def transfer_from_score_to_predicted_perf(wp, score_annots, frame_rate, mode="auto"):
-    predicted_perf_idx = transfer_positions(
-        wp,
-        score_annots,
-        frame_rate,
-        mode=mode,
-    )
-    return predicted_perf_idx
-
-
-def transfer_from_perf_to_predicted_score(wp, perf_annots, frame_rate, mode="auto"):
-    predicted_score_idx = transfer_positions(
-        wp, perf_annots, frame_rate, reverse=True, mode=mode
-    )
-    return predicted_score_idx
+        # Score → Perf: "when did the tracker first reach beat b?"
+        # Group by score position, aggregate perf frame values per group.
+        sort_idx = np.argsort(wp_score, kind="stable")
+        wp_score_sorted = wp_score[sort_idx]
+        wp_perf_sorted = wp_perf[sort_idx]
+
+        unique_beats, first_idx = np.unique(wp_score_sorted, return_index=True)
+        reduced_perf = np.empty(len(unique_beats))
+        for g in range(len(unique_beats)):
+            start = first_idx[g]
+            end = first_idx[g + 1] if g + 1 < len(unique_beats) else len(wp_perf_sorted)
+            reduced_perf[g] = aggregation_func(wp_perf_sorted[start:end])
+
+        indices = np.searchsorted(unique_beats, queries, side="left")
+        predicted = np.full(len(queries), np.nan)
+        valid = indices < len(unique_beats)
+        predicted[valid] = reduced_perf[indices[valid]]
+        return predicted / frame_rate
 
 
 def get_evaluation_results(
@@ -171,23 +107,25 @@ def get_evaluation_results(
     predicted_annots,
     total_counts,
     tolerances=TOLERANCES_IN_MILLISECONDS,
-    pcr_threshold=2_000,  # 2 seconds
     in_seconds=True,
 ):
     if in_seconds:
-        errors_in_delay = (gt_annots - predicted_annots) * 1000  # in milliseconds
+        errors_in_delay = (gt_annots - predicted_annots) * 1000
     else:
         errors_in_delay = gt_annots - predicted_annots
 
-    filtered_errors_in_delay = errors_in_delay[np.abs(errors_in_delay) <= pcr_threshold]
-    filtered_abs_errors_in_delay = np.abs(filtered_errors_in_delay)
+    abs_errors_in_delay = np.abs(errors_in_delay)
 
     results = {
-        "mean": float(f"{np.nanmean(filtered_abs_errors_in_delay):.4f}"),
-        "median": float(f"{np.nanmedian(filtered_abs_errors_in_delay):.4f}"),
-        "std": float(f"{np.nanstd(filtered_abs_errors_in_delay):.4f}"),
-        "skewness": float(f"{scipy.stats.skew(filtered_errors_in_delay):.4f}"),
-        "kurtosis": float(f"{scipy.stats.kurtosis(filtered_errors_in_delay):.4f}"),
+        "mean": float(f"{np.nanmean(abs_errors_in_delay):.4f}"),
+        "median": float(f"{np.nanmedian(abs_errors_in_delay):.4f}"),
+        "std": float(f"{np.nanstd(abs_errors_in_delay):.4f}"),
+        "skewness": float(
+            f"{scipy.stats.skew(errors_in_delay, nan_policy='omit'):.4f}"
+        ),
+        "kurtosis": float(
+            f"{scipy.stats.kurtosis(errors_in_delay, nan_policy='omit'):.4f}"
+        ),
     }
 
     if in_seconds:
@@ -201,6 +139,72 @@ def get_evaluation_results(
                 f"{np.sum(np.abs(errors_in_delay) <= tau) / total_counts:.4f}"
             )
 
-    results["pcr"] = float(f"{len(filtered_errors_in_delay) / total_counts:.4f}")
-    results["count"] = len(filtered_abs_errors_in_delay)
     return results
+
+
+def evaluate_alignment(
+    wp_score,
+    wp_perf_sec,
+    gt_score_beats,
+    gt_perf_sec,
+    beat_tolerances=TOLERANCES_IN_BEATS,
+    ms_tolerances=TOLERANCES_IN_MILLISECONDS,
+):
+    """Evaluate alignment quality in both beat and ms domains.
+
+    Parameters
+    ----------
+    wp_score : np.ndarray
+        Warping path score axis (beats).
+    wp_perf_sec : np.ndarray
+        Warping path performance axis (seconds).
+    gt_score_beats : np.ndarray
+        Ground truth score positions (beats).
+    gt_perf_sec : np.ndarray
+        Ground truth performance times (seconds).
+    beat_tolerances : list
+        Tolerance thresholds for beat error.
+    ms_tolerances : list
+        Tolerance thresholds for ms error.
+
+    Returns
+    -------
+    dict
+        {"beat": {...}, "ms": {...}}
+    """
+    # GT interpolator: score beat → perf time
+    valid = np.isfinite(gt_perf_sec) & np.isfinite(gt_score_beats)
+    gt_interp = scipy.interpolate.interp1d(
+        gt_score_beats[valid],
+        gt_perf_sec[valid],
+        bounds_error=False,
+        fill_value=np.nan,
+    )
+
+    # Beat metrics: perf→score prediction
+    wp_sec = np.array([wp_score, wp_perf_sec])
+    score_predicted = transfer_positions(
+        wp_sec,
+        gt_perf_sec,
+        frame_rate=1,
+        domain="score",
+    )
+    n_valid = min(len(gt_score_beats), len(score_predicted))
+    beat_results = get_evaluation_results(
+        gt_score_beats[:n_valid],
+        score_predicted[:n_valid],
+        total_counts=len(gt_score_beats),
+        tolerances=beat_tolerances,
+        in_seconds=False,
+    )
+
+    # Ms metrics: score→perf prediction
+    gt_perf_for_wp = gt_interp(wp_score)
+    ms_results = get_evaluation_results(
+        gt_perf_for_wp,
+        wp_perf_sec,
+        total_counts=len(wp_score),
+        tolerances=ms_tolerances,
+    )
+
+    return {"beat": beat_results, "ms": ms_results}
diff --git a/matchmaker/utils/misc.py b/matchmaker/utils/misc.py
index ce58269..5bb2ad2 100644
--- a/matchmaker/utils/misc.py
+++ b/matchmaker/utils/misc.py
@@ -9,8 +9,7 @@
 import re
 import xml.etree.ElementTree as ET
 from pathlib import Path
-from queue import Empty, Queue
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Dict, Optional, Union
 
 import librosa
 import mido
@@ -146,53 +145,6 @@ def extract_tempo_marking_from_musicxml(
     return None
 
 
-class MatchmakerInvalidParameterTypeError(Exception):
-    """
-    Error for flagging an invalid parameter type.
-    """
-
-    def __init__(
-        self,
-        parameter_name: str,
-        required_parameter_type: Union[type, Iterable[type]],
-        actual_parameter_type: type,
-        *args,
-    ) -> None:
-        if isinstance(required_parameter_type, Iterable):
-            rqpt = ", ".join([f"{pt}" for pt in required_parameter_type])
-        else:
-            rqpt = required_parameter_type
-        message = f"`{parameter_name}` was expected to be {rqpt}, but is {actual_parameter_type}"
-
-        super().__init__(message, *args)
-
-
-class MatchmakerInvalidOptionError(Exception):
-    """
-    Error for invalid option.
-    """
-
-    def __init__(self, parameter_name, valid_options, value, *args) -> None:
-        rqop = ", ".join([f"{op}" for op in valid_options])
-        message = f"`{parameter_name}` was expected to be in {rqop}, but is {value}"
-
-        super().__init__(message, *args)
-
-
-class MatchmakerMissingParameterError(Exception):
-    """
-    Error for flagging a missing parameter
-    """
-
-    def __init__(self, parameter_name: Union[str, List[str]], *args) -> None:
-        if isinstance(parameter_name, Iterable) and not isinstance(parameter_name, str):
-            message = ", ".join([f"`{pn}`" for pn in parameter_name])
-            message = f"{message} were not given"
-        else:
-            message = f"`{parameter_name}` was not given."
-        super().__init__(message, *args)
-
-
 def ensure_rng(
     seed: Union[numbers.Integral, np.random.RandomState],
 ) -> np.random.RandomState:
@@ -224,33 +176,6 @@ def ensure_rng(
         )
 
 
-class RECVQueue(Queue):
-    """
-    Queue with a recv method (like Pipe)
-
-    This class uses python's Queue.get with a timeout makes it interruptable via KeyboardInterrupt
-    and even for the future where that is possibly out-dated, the interrupt can happen after each timeout
-    so periodically query the queue with a timeout of 1s each attempt, finding a middleground
-    between busy-waiting and uninterruptable blocked waiting
-    """
-
-    def __init__(self) -> None:
-        Queue.__init__(self)
-
-    def recv(self) -> Any:
-        """
-        Return and remove an item from the queue.
-        """
-        while True:
-            try:
-                return self.get(timeout=1)
-            except Empty:  # pragma: no cover
-                pass
-
-    def poll(self) -> bool:
-        return self.empty()
-
-
 def get_window_indices(indices: np.ndarray, context: int) -> np.ndarray:
     # Create a range array from -context to context (inclusive)
     range_array = np.arange(-context, context + 1)
@@ -431,7 +356,7 @@ def adjust_tempo_for_performance_file(
 ):
     """
     Adjust the tempo of the score part to match the performance file.
-    We round up the tempo to the nearest 20 bpm to avoid too much optimization.
+    We round the tempo to the nearest 10 bpm to avoid too much optimization.
 
     Parameters
     ----------
@@ -449,9 +374,7 @@ def adjust_tempo_for_performance_file(
     else:
         target_length = librosa.get_duration(path=str(performance_file))
     ratio = target_length / source_length
-    rounded_tempo = int(
-        (default_tempo / ratio + 19) // 20 * 20
-    )  # round up to nearest 20
+    rounded_tempo = int(round(default_tempo / ratio / 10) * 10)  # round to nearest 10
     print(
         f"default tempo: {default_tempo} (score length: {source_length}) -> adjusted_tempo: {rounded_tempo} (perf length: {target_length})"
     )
@@ -521,6 +444,15 @@ def save_nparray_to_csv(array: NDArray, save_path: str):
         writer.writerows(array)
 
 
+def _beats_to_frames(
+    beats: np.ndarray,
+    ref_frame_to_beat: np.ndarray,
+) -> np.ndarray:
+    """Convert beat positions to (float) frame indices via inverse interpolation."""
+    frames = np.arange(len(ref_frame_to_beat), dtype=float)
+    return np.interp(beats, ref_frame_to_beat, frames)
+
+
 def plot_alignment(
     warping_path: np.ndarray,
     perf_annots: np.ndarray,
@@ -533,92 +465,93 @@ def plot_alignment(
     ref_features: Optional[np.ndarray] = None,
     input_features: Optional[np.ndarray] = None,
     distance_func=None,
+    ref_frame_to_beat: Optional[np.ndarray] = None,
 ):
-    """Plot warping path, GT annotations, and predicted points in one figure.
-
-    Layers (back to front): distance matrix → warping path → predicted → GT.
-    """
+    """Plot warping path, GT annotations, and predicted points."""
     save_dir.mkdir(parents=True, exist_ok=True)
     gt = np.asarray(perf_annots, dtype=float)
     pred = np.asarray(perf_annots_predicted, dtype=float)
     n = min(len(gt), len(pred))
     gt, pred = gt[:n], pred[:n]
 
-    has_dist_matrix = (
+    fig, ax = plt.subplots(figsize=(30, 30))
+
+    # Distance matrix background
+    show_dist = False
+    if (
         ref_features is not None
         and input_features is not None
         and distance_func is not None
-    )
+    ):
+        try:
+            if isinstance(distance_func, str):
+                dist = scipy.spatial.distance.cdist(
+                    ref_features, input_features, metric=distance_func
+                )
+            else:
+                dist = np.array(
+                    [
+                        [distance_func(r, i) for i in input_features]
+                        for r in ref_features
+                    ],
+                    dtype=np.float32,
+                )
+            n_input = input_features.shape[0]
+            n_ref = ref_features.shape[0]
+            ax.imshow(
+                dist,
+                aspect="auto",
+                origin="lower",
+                interpolation="nearest",
+                extent=(0, n_input - 1, 0, n_ref - 1),
+            )
+            show_dist = True
+        except Exception:
+            pass
 
-    fig, ax = plt.subplots(figsize=(30, 30))
+    # x-axis: performance time in frames
+    x_gt = gt * float(frame_rate)
+    wp_x = warping_path[1] * float(frame_rate)
 
-    if has_dist_matrix:
-        # DTW mode: everything in frame space
-        dist = scipy.spatial.distance.cdist(
-            ref_features,
-            input_features,
-            metric=distance_func,
-        )
-        ax.imshow(
-            dist,
-            aspect="auto",
-            origin="lower",
-            interpolation="nearest",
-            extent=(0, input_features.shape[0] - 1, 0, ref_features.shape[0] - 1),
-        )
-        x_gt = gt * float(frame_rate)
-        x_pred = pred * float(frame_rate)
-        if score_y is not None:
-            y = np.asarray(score_y, dtype=float)[:n] * float(frame_rate)
-        else:
-            y = np.arange(n)
-        ylabel = "score (frames)"
-        wp_x = warping_path[1]
-        wp_y = warping_path[0]
+    # y-axis: score position (beats)
+    wp_in_beats = np.issubdtype(warping_path[0].dtype, np.floating)
+    if state_space is not None and not wp_in_beats:
+        wp_y = state_space[warping_path[0]]
+    elif show_dist and wp_in_beats and ref_frame_to_beat is not None:
+        wp_y = _beats_to_frames(warping_path[0], ref_frame_to_beat)
     else:
-        # HMM mode: x in frames, y in beats via state_space
-        x_gt = gt * float(frame_rate)
-        x_pred = pred * float(frame_rate)
-        if score_y is None:
-            y = np.arange(n)
-            ylabel = "annotation index"
-        else:
-            y = np.asarray(score_y, dtype=float)[:n]
-            ylabel = "score position (beats)"
-        wp_x = warping_path[1]
-        if state_space is not None:
-            wp_y = state_space[warping_path[0]]
-        else:
-            wp_y = warping_path[0]
-
-    # 1. Warping path
-    if has_dist_matrix:
-        ax.plot(
-            wp_x,
-            wp_y,
-            ".",
-            color="white",
-            alpha=0.7,
-            markersize=15,
-            label="warping path",
-            zorder=2,
-        )
+        wp_y = warping_path[0]
+
+    # GT score positions (y-axis for annotation dots)
+    if score_y is not None:
+        y_gt = np.asarray(score_y, dtype=float)[:n]
+        if show_dist and wp_in_beats and ref_frame_to_beat is not None:
+            y_gt = _beats_to_frames(y_gt, ref_frame_to_beat)
     else:
-        ax.plot(
-            wp_x,
-            wp_y,
-            ".",
-            color="lime",
-            alpha=0.5,
-            markersize=15,
-            label="warping path",
-            zorder=2,
-        )
+        y_gt = np.arange(n)
 
-    # 2. Predicted points
+    # Predicted score positions at GT perf times (perf→score direction)
+    wp_x_sorted = np.asarray(wp_x, dtype=float)
+    wp_y_sorted = np.asarray(wp_y, dtype=float)
+    if len(wp_x_sorted) > 1:
+        y_pred = np.interp(x_gt, wp_x_sorted, wp_y_sorted)
+    else:
+        y_pred = y_gt
+
+    # Plot layers
+    ax.plot(
+        wp_x,
+        wp_y,
+        ".",
+        color="white" if show_dist else "lime",
+        alpha=0.7 if show_dist else 0.5,
+        markersize=15,
+        label="warping path",
+        zorder=2,
+    )
     ax.scatter(
-        x_pred,
-        y,
+        x_gt,
+        y_pred,
         label="predicted",
         s=80,
         alpha=0.9,
@@ -627,11 +560,9 @@ def plot_alignment(
         linewidths=0,
         zorder=3,
     )
-
-    # 3. GT annotations (front)
     ax.scatter(
         x_gt,
-        y,
+        y_gt,
         label="ground truth",
         s=120,
         alpha=0.9,
@@ -641,8 +572,26 @@ def plot_alignment(
         zorder=4,
     )
 
+    if show_dist:
+        ax.set_xlim(0, input_features.shape[0] - 1)
+        ax.set_ylim(0, ref_features.shape[0] - 1)
+
+    # Beat tick labels when projected to frame space
+    if show_dist and wp_in_beats and ref_frame_to_beat is not None:
+        finite_beats = ref_frame_to_beat[np.isfinite(ref_frame_to_beat)]
+        beat_min, beat_max = (
+            finite_beats[0],
+            finite_beats[-1] if len(finite_beats) > 0 else (0, 1),
+        )
+        n_ticks = max(2, min(12, int(beat_max - beat_min) + 1))
+        beat_ticks = np.unique(
+            np.round(np.linspace(beat_min, beat_max, n_ticks)).astype(int)
+        )
+        ax.set_yticks(_beats_to_frames(beat_ticks.astype(float), ref_frame_to_beat))
+        ax.set_yticklabels([str(b) for b in beat_ticks])
+
     ax.set_xlabel("performance frame")
-    ax.set_ylabel(ylabel)
+    ax.set_ylabel("score position (beats)")
     ax.set_title(f"[{save_dir.name}] alignment ({name})")
     ax.grid(True, alpha=0.2)
     ax.legend(loc="best")
@@ -664,28 +613,29 @@ def save_debug_results(
     ref_features: Optional[np.ndarray] = None,
     input_features: Optional[np.ndarray] = None,
     distance_func=None,
+    ref_frame_to_beat: Optional[np.ndarray] = None,
 ):
     """Save debug outputs: warping path TSV, results JSON, and alignment plot."""
     save_dir = Path(save_dir)
     save_dir.mkdir(parents=True, exist_ok=True)
 
-    # 1. Warping path TSV + results JSON
+    # 1. Warping path TSV + results JSON + GT annotations
     save_nparray_to_csv(warping_path.T, (save_dir / f"wp_{run_name}.tsv").as_posix())
+    gt_pairs = np.column_stack([score_annots, perf_annots])
+    save_nparray_to_csv(gt_pairs, (save_dir / f"gt_{run_name}.tsv").as_posix())
     import json
 
     with open(save_dir / f"{run_name}.json", "w") as f:
         json.dump(eval_results, f, indent=4)
 
     # 2. Alignment plot
-    if state_space is not None:
-        score_y = state_space
-    else:
-        sx = np.asarray(score_annots, dtype=float)
-        score_y = (
-            sx
-            if sx.ndim == 1 and len(sx) == len(perf_annots) and np.all(np.diff(sx) >= 0)
-            else None
-        )
+    # score_y = beat positions for each annotation (y-axis of the plot)
+    sx = np.asarray(score_annots, dtype=float)
+    score_y = (
+        sx
+        if sx.ndim == 1 and len(sx) == len(perf_annots) and np.all(np.diff(sx) >= 0)
+        else None
+    )
     plot_alignment(
         warping_path,
         perf_annots,
@@ -698,4 +648,5 @@ def save_debug_results(
         ref_features=ref_features,
         input_features=input_features,
         distance_func=distance_func,
+        ref_frame_to_beat=ref_frame_to_beat,
     )
diff --git a/pyproject.toml b/pyproject.toml
index 1dfd23a..005b31d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "pymatchmaker"
 version = "0.2.1"
 description = "A package for real-time music alignment"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = { text = "Apache 2.0" }
 keywords = ["music", "alignment", "midi", "audio"]
 authors = [
@@ -18,6 +18,10 @@ classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "Programming Language :: Cython",
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
@@ -31,10 +35,10 @@ dependencies = [
     "python-rtmidi>=1.5.8",
     "mido>=1.3.2",
     "numpy>=1.26.3,<2.0",
-    "scipy>=1.11.4,<1.15",
+    "scipy>=1.11.4",
     "librosa>=0.10.1",
     "pandas>=2.0.0",
-    "partitura>=1.7.0",
+    "partitura>=1.8.0",
     "progressbar2>=4.2.0",
     "python-hiddenmarkov>=0.1.3",
     "pyaudio>=0.2.14",
diff --git a/run_examples.py b/run_examples.py
index 4b13c59..245b1b7 100644
--- a/run_examples.py
+++ b/run_examples.py
@@ -31,6 +31,12 @@ def main():
     group = parser.add_mutually_exclusive_group()
     group.add_argument("--audio", action="store_true", help="Use audio input mode")
     group.add_argument("--midi", action="store_true", help="Use MIDI input mode")
+    parser.add_argument(
+        "--method",
+        type=str,
+        default=None,
+        help="Score following method (e.g., arzt, dixon, outerhmm, audio_outerhmm)",
+    )
     args = parser.parse_args()
 
     input_mode = "midi" if args.midi else "audio"
@@ -39,7 +45,10 @@ def main():
     print(f"Running matchmaker with the score file ({SCORE_FILE.name})...")
     print("-" * 50)
 
-    method = "outerhmm" if input_mode == "midi" else "arzt"
+    if args.method is not None:
+        method = args.method
+    else:
+        method = "outerhmm" if input_mode == "midi" else "arzt"
 
     # Initialize matchmaker (simulation mode)
     try:
@@ -54,7 +63,7 @@ def main():
         return
 
     # Run real-time score following
-    for current_position in mm.run(wait=True):
+    for current_position in mm.run():
         timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3]
         print(f"[{timestamp}] Current beat position: {current_position}")
 
diff --git a/tests/test_io_audio.py b/tests/test_io_audio.py
index a313758..2fdedb9 100644
--- a/tests/test_io_audio.py
+++ b/tests/test_io_audio.py
@@ -19,9 +19,9 @@
     MelSpectrogramProcessor,
     MFCCProcessor,
 )
+from matchmaker.features.processor import DummyProcessor
 from matchmaker.io.audio import AudioStream
 from matchmaker.utils.audio import check_input_audio_devices, get_audio_devices
-from matchmaker.utils.processor import DummyProcessor
 from tests.utils import generate_sine_wave
 
 HAS_AUDIO_INPUT = check_input_audio_devices()
@@ -227,7 +227,11 @@ def test_offline_input(self, mock_stdout=None):
             print("current time", self.stream.current_time)
             self.stream.join()
 
-            outputs = list(self.stream.queue.queue)
+            from matchmaker.io.stream import STREAM_END
+
+            outputs = [
+                item for item in self.stream.queue.queue if item is not STREAM_END
+            ]
 
             for _, ftime in outputs:
                 self.assertTrue(isinstance(ftime, float))
diff --git a/tests/test_matchmaker.py b/tests/test_matchmaker.py
index 8a7d894..b032326 100644
--- a/tests/test_matchmaker.py
+++ b/tests/test_matchmaker.py
@@ -5,6 +5,8 @@
 import warnings
 from pathlib import Path
 
+import numpy as np
+
 from matchmaker import Matchmaker
 from matchmaker.dp import OnlineTimeWarpingArzt
 from matchmaker.dp.oltw_dixon import OnlineTimeWarpingDixon
@@ -14,6 +16,7 @@
 from matchmaker.io.midi import MidiStream
 from matchmaker.prob.hmm import PitchIOIHMM
 from matchmaker.prob.outer_product_hmm import OuterProductHMM
+from matchmaker.prob.outer_product_hmm_audio import AudioOuterProductHMM
 
 warnings.filterwarnings("ignore", module="partitura")
 warnings.filterwarnings("ignore", module="librosa")
@@ -22,21 +25,17 @@
 class TestMatchmaker(unittest.TestCase):
     def setUp(self):
         # Set up paths to test files
-        self.score_file = "./tests/resources/Bach-fugue_bwv_858.musicxml"
-        self.performance_file_audio = "./tests/resources/Bach-fugue_bwv_858.mp3"
-        self.performance_file_midi = "./tests/resources/Bach-fugue_bwv_858.mid"
+        self.score_file = "./matchmaker/assets/simple_mozart_k265_var1.musicxml"
+        self.performance_file_audio = "./matchmaker/assets/simple_mozart_k265_var1.mp3"
+        self.performance_file_midi = "./matchmaker/assets/simple_mozart_k265_var1.mid"
         self.performance_file_annotations = (
-            "./tests/resources/Bach-fugue_bwv_858_note_annotations.txt"
+            "./matchmaker/assets/simple_mozart_k265_var1_note_annotations.txt"
+        )
+        self.performance_file_beat_annotations = (
+            "./matchmaker/assets/simple_mozart_k265_var1_beat_annotations.txt"
         )
 
         self.test_datasets = [
-            {
-                "name": "bach_fugue_bwv_858",
-                "score": "./tests/resources/Bach-fugue_bwv_858.musicxml",
-                "audio": "./tests/resources/Bach-fugue_bwv_858.mp3",
-                "midi": "./tests/resources/Bach-fugue_bwv_858.mid",
-                "annotations": "./tests/resources/Bach-fugue_bwv_858_note_annotations.txt",
-            },
             {
                 "name": "simple_mozart_k265_var1",
                 "score": "./matchmaker/assets/simple_mozart_k265_var1.musicxml",
@@ -44,6 +43,13 @@ def setUp(self):
                 "midi": "./matchmaker/assets/simple_mozart_k265_var1.mid",
                 "annotations": "./matchmaker/assets/simple_mozart_k265_var1_note_annotations.txt",
             },
+            {
+                "name": "bach_fugue_bwv_858",
+                "score": "./tests/resources/Bach-fugue_bwv_858.musicxml",
+                "audio": "./tests/resources/Bach-fugue_bwv_858.mp3",
+                "midi": "./tests/resources/Bach-fugue_bwv_858.mid",
+                "annotations": "./tests/resources/Bach-fugue_bwv_858_note_annotations.txt",
+            },
         ]
 
     def test_matchmaker_audio_init(self):
@@ -72,6 +78,7 @@ def test_matchmaker_audio_run(self):
         # When & Then: running the alignment process, the yielded result should be a float values
         for position_in_beat in mm.run(verbose=False):
             self.assertIsInstance(position_in_beat, float)
+            break
 
     def test_matchmaker_audio_run_with_result(self):
         # Given: a Matchmaker instance with audio input
@@ -95,12 +102,11 @@ def test_matchmaker_audio_run_with_result(self):
 
     def test_matchmaker_audio_run_with_evaluation(self):
         for dataset in self.test_datasets:
-            for method in ["arzt", "dixon"]:
+            for method in ["arzt", "dixon", "outerhmm"]:
                 with self.subTest(dataset=dataset["name"], method=method):
                     mm = Matchmaker(
                         score_file=dataset["score"],
                         performance_file=dataset["audio"],
-                        wait=False,
                         input_type="audio",
                         method=method,
                     )
@@ -116,15 +122,15 @@ def test_matchmaker_audio_run_with_evaluation(self):
                     current_test = f"{dataset['name']}_{method}"
                     results = mm.run_evaluation(
                         dataset["annotations"],
-                        debug=True,
-                        save_dir=Path("./tests/results"),
-                        run_name=current_test,
+                        debug=False,
+                        # save_dir=Path("./tests/results"),
+                        # run_name=current_test,
                     )
                     print(f"[{current_test}] RESULTS: {json.dumps(results, indent=4)}")
 
                     # Then: the results should at least be 0.5
                     for threshold in ["300ms", "500ms", "1000ms"]:
-                        self.assertGreaterEqual(results[threshold], 0.5)
+                        self.assertGreaterEqual(results["ms"][threshold], 0.5)
 
     def test_matchmaker_audio_run_with_evaluation_cqt(self):
         # Given: a Matchmaker instance with audio input
@@ -133,8 +139,7 @@ def test_matchmaker_audio_run_with_evaluation_cqt(self):
             performance_file=self.performance_file_audio,
             wait=False,
             input_type="audio",
-            feature_type="cqt",
-            distance_func="Cosine",
+            processor="cqt",
             method="arzt",
         )
         try:
@@ -154,7 +159,7 @@ def test_matchmaker_audio_run_with_evaluation_cqt(self):
 
         # Then: the results should at least be 0.5
         for threshold in ["300ms", "500ms", "1000ms"]:
-            self.assertGreaterEqual(results[threshold], 0.5)
+            self.assertGreaterEqual(results["ms"][threshold], 0.5)
 
     def test_matchmaker_audio_run_with_evaluation_in_beats(self):
         # Given: a Matchmaker instance with audio input
@@ -172,18 +177,14 @@ def test_matchmaker_audio_run_with_evaluation_in_beats(self):
             mm._has_run = True
 
         results = mm.run_evaluation(
-            "./tests/resources/Bach-fugue_bwv_858_beat_annotations.txt",
-            level="beat",
-            debug=True,
-            save_dir=Path("./tests/results"),
-            run_name="test_matchmaker_audio_run_with_evaluation_in_beats",
+            self.performance_file_annotations,
             domain="score",
         )
         print(f"RESULTS: {json.dumps(results, indent=4)}")
 
         # Then: the results should at least be 0.5
         for threshold in ["0.3b", "0.5b", "1b"]:
-            self.assertGreaterEqual(results[threshold], 0.5)
+            self.assertGreaterEqual(results["beat"][threshold], 0.5)
 
     def test_matchmaker_audio_run_with_evaluation_before_run(self):
         # Given: a Matchmaker instance with audio input
@@ -226,6 +227,47 @@ def test_matchmaker_audio_arzt_init(self):
         self.assertIsInstance(mm.stream, AudioStream)
         self.assertIsInstance(mm.score_follower, OnlineTimeWarpingArzt)
 
+    def test_matchmaker_audio_outerhmm_init(self):
+        mm = Matchmaker(
+            score_file=self.score_file,
+            performance_file=self.performance_file_audio,
+            input_type="audio",
+            method="outerhmm",
+        )
+
+        self.assertIsInstance(mm.stream, AudioStream)
+        self.assertIsInstance(mm.score_follower, AudioOuterProductHMM)
+
+    def test_matchmaker_audio_outerhmm_run(self):
+        mm = Matchmaker(
+            score_file=self.score_file,
+            performance_file=self.performance_file_audio,
+            input_type="audio",
+            method="outerhmm",
+        )
+
+        for position_in_beat in mm.run(verbose=False):
+            self.assertIsInstance(position_in_beat, float)
+            break
+
+    def test_matchmaker_audio_rtf(self):
+        for method in ["arzt", "dixon", "outerhmm"]:
+            with self.subTest(method=method):
+                mm = Matchmaker(
+                    score_file=self.score_file,
+                    performance_file=self.performance_file_audio,
+                    input_type="audio",
+                    method=method,
+                )
+                list(mm.run(verbose=False))
+
+                results = mm.run_evaluation(
+                    self.performance_file_annotations,
+                )
+                self.assertIn("rtf", results)
+                self.assertGreater(results["rtf"], 0)
+                self.assertLess(results["rtf"], 1.0)
+
     def test_matchmaker_with_frame_rate(self):
         # Given: a Matchmaker instance with audio input
         mm = Matchmaker(
@@ -233,12 +275,12 @@ def test_matchmaker_with_frame_rate(self):
             performance_file=self.performance_file_audio,
             wait=False,
             input_type="audio",
-            frame_rate=100,
+            frame_rate=50,
         )
 
-        # Then: the frame rate should be 100
-        self.assertEqual(mm.frame_rate, 100)
-        self.assertEqual(mm.score_follower.frame_rate, 100)
+        # Then: the frame rate should be 50
+        self.assertEqual(mm.frame_rate, 50)
+        self.assertEqual(mm.score_follower.frame_rate, 50)
 
     def test_matchmaker_invalid_input_type(self):
         # Test Matchmaker with invalid input type
@@ -293,12 +335,54 @@ def test_matchmaker_midi_run(self):
         )
 
         # When & Then: running the alignment process,
-        # the yielded result should be a float values
+        # the yielded result should be numeric (int state index for MIDI)
         for position_in_beat in mm.run():
-            self.assertIsInstance(position_in_beat, float)
-            if position_in_beat >= 130:
+            self.assertIsInstance(position_in_beat, (int, float, np.integer))
+            if position_in_beat >= 10:
                 break
 
+    def test_matchmaker_midi_run_with_evaluation(self):
+        """Test all MIDI methods: run + evaluate on test datasets."""
+        for dataset in self.test_datasets:
+            for method in ["hmm", "pthmm", "outerhmm", "arzt", "dixon"]:
+                with self.subTest(dataset=dataset["name"], method=method):
+                    mm = Matchmaker(
+                        score_file=dataset["score"],
+                        performance_file=dataset["midi"],
+                        input_type="midi",
+                        method=method,
+                    )
+
+                    try:
+                        positions = list(mm.run())
+                    except queue.Empty as e:
+                        print(f"Error: {type(e)}, {e}")
+                        traceback.print_exc()
+                        mm._has_run = True
+
+                    # All methods should produce positions
+                    self.assertGreater(len(positions), 0)
+
+                    # WP should be valid
+                    wp = mm.score_follower.warping_path
+                    self.assertEqual(wp.shape[0], 2)
+                    self.assertGreater(wp.shape[1], 0)
+
+                    # Evaluate all methods
+                    results = mm.run_evaluation(
+                        dataset["annotations"],
+                        debug=False,
+                    )
+                    current_test = f"{dataset['name']}_{method}_midi"
+                    print(
+                        f"[{current_test}] beat_0.5b={results['beat']['0.5b']:.3f}, ms_300ms={results['ms']['300ms']:.3f}"
+                    )
+
+                    for threshold in ["0.5b", "1b"]:
+                        self.assertGreaterEqual(results["beat"][threshold], 0.3)
+                    for threshold in ["300ms", "500ms", "1000ms"]:
+                        self.assertGreaterEqual(results["ms"][threshold], 0.3)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_oltw_arzt.py b/tests/test_oltw_arzt.py
index 5bf6083..7633e5c 100644
--- a/tests/test_oltw_arzt.py
+++ b/tests/test_oltw_arzt.py
@@ -9,12 +9,12 @@
 import numpy as np
 from scipy.spatial import distance as sp_distance
 
-from matchmaker.dp.oltw_arzt import OnlineTimeWarpingArzt
+from matchmaker.dp.oltw_arzt import OnlineTimeWarpingArztFrame as OnlineTimeWarpingArzt
 from matchmaker.utils import (
     CYTHONIZED_METRICS_W_ARGUMENTS,
     CYTHONIZED_METRICS_WO_ARGUMENTS,
 )
-from matchmaker.utils.misc import (
+from matchmaker.utils.errors import (
     MatchmakerInvalidOptionError,
     MatchmakerInvalidParameterTypeError,
 )
@@ -22,7 +22,7 @@
 
 RNG = np.random.RandomState(1984)
 
-SCIPY_DISTANCES = [
+_ALL_SCIPY_DISTANCES = [
     "braycurtis",
     "canberra",
     "chebyshev",
@@ -36,13 +36,14 @@
     "dice",
     "hamming",
     "jaccard",
-    "kulczynski1",
     "rogerstanimoto",
     "russellrao",
     "sokalmichener",
     "sokalsneath",
     "yule",
 ]
+# Filter to distances available in the installed scipy version
+SCIPY_DISTANCES = [d for d in _ALL_SCIPY_DISTANCES if hasattr(sp_distance, d)]
 
 
 class TestOnlineTimeWarpingArzt(unittest.TestCase):
diff --git a/tests/test_prob_hmm.py b/tests/test_prob_hmm.py
index 9335866..b790820 100644
--- a/tests/test_prob_hmm.py
+++ b/tests/test_prob_hmm.py
@@ -197,7 +197,7 @@ def test_init(self):
             "./tests/resources/Bach-fugue_bwv_858_annotations.txt"
         )
 
-        self.performance = process_audio_offline(
-            perf_info=self.performance_file_audio,
+        self.performance = process_midi_offline(
+            perf_info=self.performance_file_midi,
             processor=PitchProcessor(piano_range=True),
         )
diff --git a/tests/test_utils_misc.py b/tests/test_utils_misc.py
index 497c0f5..deb76ee 100644
--- a/tests/test_utils_misc.py
+++ b/tests/test_utils_misc.py
@@ -4,13 +4,13 @@
 
 import numpy as np
 
-from matchmaker.utils.misc import (
+from matchmaker.io.queue import RECVQueue
+from matchmaker.utils.errors import (
     MatchmakerInvalidOptionError,
     MatchmakerInvalidParameterTypeError,
     MatchmakerMissingParameterError,
-    RECVQueue,
-    ensure_rng,
 )
+from matchmaker.utils.misc import ensure_rng
 
 
 class TestMatchmakerExceptions(unittest.TestCase):
diff --git a/tests/test_utils_processor.py b/tests/test_utils_processor.py
index a08c727..a11b9f3 100644
--- a/tests/test_utils_processor.py
+++ b/tests/test_utils_processor.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from matchmaker.utils.processor import DummyProcessor, Processor, ProcessorWrapper
+from matchmaker.features.processor import DummyProcessor, Processor, ProcessorWrapper
 
 RNG = np.random.RandomState(1984)
 
diff --git a/tests/test_utils_stream.py b/tests/test_utils_stream.py
index 9bdf2a7..5ebbd08 100644
--- a/tests/test_utils_stream.py
+++ b/tests/test_utils_stream.py
@@ -9,8 +9,8 @@
 
 import numpy as np
 
-from matchmaker.utils.processor import DummyProcessor
-from matchmaker.utils.stream import Stream
+from matchmaker.features.processor import DummyProcessor
+from matchmaker.io.stream import Stream
 
 RNG = np.random.RandomState(1984)
 
diff --git a/tests/utils.py b/tests/utils.py
index 80f1dca..a7f4226 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -18,7 +18,8 @@
 
 from matchmaker.io.audio import HOP_LENGTH, SAMPLE_RATE, AudioStream
 from matchmaker.io.midi import POLLING_PERIOD, MidiStream
-from matchmaker.utils.misc import RECVQueue
+from matchmaker.io.queue import RECVQueue
+from matchmaker.io.stream import STREAM_END
 
 # Random number generator
 RNG = np.random.RandomState(1984)
@@ -289,7 +290,7 @@ def process_midi_offline(
     ) as stream:
         pass
 
-    outputs = list(queue.queue)
+    outputs = [item for item in queue.queue if item is not STREAM_END]
 
     return outputs