Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pypi_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Build wheels
run: python -m cibuildwheel
env:
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
CIBW_ARCHS: "x86_64"
CIBW_ARCHS_MACOS: "x86_64 arm64"
CIBW_BEFORE_BUILD: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_pypi_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
- name: Build wheels
run: python -m cibuildwheel
env:
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
CIBW_BUILD: "cp310-* cp311-* cp312-* cp313-*"
CIBW_ARCHS: "x86_64"
CIBW_ARCHS_MACOS: "x86_64 arm64"
CIBW_BEFORE_BUILD: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
fail-fast: false
matrix:
platform: [ubuntu-latest, macos-latest]
python-version: ["3.9", "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12", "3.13"]

runs-on: ${{ matrix.platform }}
steps:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ The full documentation for matchmaker is available online at [readthedocs.org](h

### Prerequisites

- Available Python version: 3.12
- Available Python version: 3.11, 3.12, 3.13
- [Fluidsynth](https://www.fluidsynth.org/)
- [PortAudio](http://www.portaudio.com/)

Expand Down
76 changes: 45 additions & 31 deletions matchmaker/dp/oltw_arzt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@

from matchmaker.base import OnlineAlignment
from matchmaker.dp.dtw_loop import oltw_arzt_loop
from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
from matchmaker.features.audio import FRAME_RATE
from matchmaker.io.audio import QUEUE_TIMEOUT
from matchmaker.utils import (
CYTHONIZED_METRICS_W_ARGUMENTS,
CYTHONIZED_METRICS_WO_ARGUMENTS,
Expand All @@ -26,10 +27,11 @@
RECVQueue,
set_latency_stats,
)
from matchmaker.utils.stream import STREAM_END

STEP_SIZE: int = 5
WINDOW_SIZE: int = 5
START_WINDOW_SIZE: Union[float, int] = 0.25
STEP_SIZE: int = 3
WINDOW_SIZE: int = 10
START_WINDOW_SIZE: Union[float, int] = 0.1


class OnlineTimeWarpingArzt(OnlineAlignment):
Expand Down Expand Up @@ -98,9 +100,9 @@ def __init__(
current_position: int = 0,
frame_rate: int = FRAME_RATE,
queue: Optional[RECVQueue] = None,
state_to_ref_time_map = None,
ref_to_state_time_map = None,
state_space = None,
state_to_ref_time_map=None,
ref_to_state_time_map=None,
state_space=None,
**kwargs,
) -> None:
super().__init__(reference_features=reference_features)
Expand Down Expand Up @@ -154,7 +156,7 @@ def __init__(

self.N_ref: int = self.reference_features.shape[0]
self.frame_rate = frame_rate
self.window_size: int = window_size * self.frame_rate
self.window_size: int = int(np.round(window_size * self.frame_rate))
self.step_size: int = step_size
self.start_window_size: int = int(np.round(start_window_size * frame_rate))
self.init_position: int = current_position
Expand All @@ -178,12 +180,22 @@ def __init__(
}
self.state_to_ref_time_map = state_to_ref_time_map
self.ref_to_state_time_map = ref_to_state_time_map
self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"])
self.state_space = state_space
self._ref_frame_to_beat: Optional[NDArray[np.float32]] = kwargs.get(
"ref_frame_to_beat", None
)

@property
def current_beat(self) -> float:
"""Current score position in beats."""
if self._ref_frame_to_beat is not None:
idx = min(self.current_position, len(self._ref_frame_to_beat) - 1)
return float(self._ref_frame_to_beat[idx])
return float(self.current_position)

@property
def warping_path(self) -> NDArray[np.int32]:
wp = (np.array(self._warping_path).T).astype(np.int32)
return wp
def warping_path(self) -> NDArray[np.float32]:
return np.array(self._warping_path).T

def __call__(self, input: NDArray[np.float32]) -> int:
self.step(input)
Expand Down Expand Up @@ -211,10 +223,18 @@ def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]
self.reset()

if verbose:
pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
pbar = progressbar.ProgressBar(
max_value=len(self.state_space),
redirect_stdout=True,
redirect_stderr=True,
)
pbar.start()

while self.is_still_following():
features, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
item = self.queue.get(timeout=QUEUE_TIMEOUT)
if item is STREAM_END:
break
features, f_time = item
self.last_queue_update = time.time()
self.input_features = (
np.concatenate((self.input_features, features))
Expand All @@ -224,13 +244,13 @@ def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]
self.step(features)

if verbose:
pbar.update(int(self.current_position))
pbar.update(int(np.searchsorted(self.state_space, self.current_beat)))

latency = time.time() - self.last_queue_update
self.latency_stats = set_latency_stats(
latency, self.latency_stats, self.input_index
)
yield self.current_position
yield self.current_beat

if verbose:
pbar.finish()
Expand Down Expand Up @@ -289,23 +309,17 @@ def step(self, input_features: NDArray[np.float32]) -> None:
min_index=min_index,
)

# adapt current_position: do not go backwards,
# but also go a maximum of N steps forward

if self.input_index == 0:
# enforce the first time step to stay at the
# initial position
self.current_position = min( # TODO: Is this necessary?
max(self.current_position, min_index),
self.current_position,
)
else:
self.current_position = min(
max(self.current_position, min_index),
self.current_position + self.step_size,
# Clamp new position: no backwards, max step_size forward per frame
if self.input_index > 0:
self.current_position = int(
np.clip(
min_index,
self.current_position,
self.current_position + self.step_size,
)
)

self._warping_path.append((self.current_position, self.input_index))
self._warping_path.append((self.current_beat, self.input_index))
# update input index
self.input_index += 1

Expand Down
39 changes: 29 additions & 10 deletions matchmaker/dp/oltw_dixon.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
from numpy.typing import NDArray

from matchmaker.base import OnlineAlignment
from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
from matchmaker.features.audio import FRAME_RATE
from matchmaker.io.audio import QUEUE_TIMEOUT
from matchmaker.utils.misc import set_latency_stats
from matchmaker.utils.stream import STREAM_END


class Direction(IntEnum):
Expand Down Expand Up @@ -74,9 +76,9 @@ def __init__(
max_run_count=MAX_RUN_COUNT,
frame_per_seg=FRAME_PER_SEG,
frame_rate=FRAME_RATE,
state_to_ref_time_map = None,
ref_to_state_time_map = None,
state_space = None,
state_to_ref_time_map=None,
ref_to_state_time_map=None,
state_space=None,
**kwargs,
):
super().__init__(reference_features=reference_features)
Expand All @@ -90,6 +92,7 @@ def __init__(
self.state_to_ref_time_map = state_to_ref_time_map
self.ref_to_state_time_map = ref_to_state_time_map
self.state_space = state_space
self._ref_frame_to_beat = kwargs.get("ref_frame_to_beat", None)
self.reset()

def reset(self):
Expand All @@ -114,6 +117,14 @@ def reset(self):
}
self._initialized = False

@property
def current_beat(self) -> float:
"""Current score position in beats."""
if self._ref_frame_to_beat is not None:
idx = min(self.best_ref, len(self._ref_frame_to_beat) - 1)
return float(self._ref_frame_to_beat[idx])
return float(self.best_ref)

@property
def warping_path(self) -> NDArray[np.float32]: # [shape=(2, T)]
return self.wp
Expand Down Expand Up @@ -276,8 +287,8 @@ def get_expand_direction(self):
return Direction.REF

def save_history(self):
"""Append current best alignment point to warping path."""
new_point = np.array([[self.best_ref], [self.best_input]])
"""Append current best alignment point to warping path (beats, input_frame)."""
new_point = np.array([[self.current_beat], [self.best_input]])
self.wp = np.concatenate((self.wp, new_point), axis=1)

def __call__(self, input_features: NDArray[np.float32]) -> int:
Expand Down Expand Up @@ -363,21 +374,29 @@ def run(self, verbose=True):
self.reset()

if verbose:
pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
pbar = progressbar.ProgressBar(
max_value=len(self.state_space),
redirect_stdout=True,
redirect_stderr=True,
)
pbar.start()

while self.is_still_following():
input_feature, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
item = self.queue.get(timeout=QUEUE_TIMEOUT)
if item is STREAM_END:
break
input_feature, f_time = item
self.last_queue_update = time.time()
self.step(input_feature)

if verbose:
pbar.update(int(self.current_position))
pbar.update(int(np.searchsorted(self.state_space, self.current_beat)))

latency = time.time() - self.last_queue_update
self.latency_stats = set_latency_stats(
latency, self.latency_stats, self.input_index
)
yield self.current_position
yield self.current_beat

if verbose:
pbar.finish()
Expand Down
1 change: 0 additions & 1 deletion matchmaker/features/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
DCT_TYPE = 2
NORM = np.inf
FEATURES = "chroma"
QUEUE_TIMEOUT = 1

# Type hint for Input Audio frame.
InputAudioSeries = np.ndarray
Expand Down
47 changes: 38 additions & 9 deletions matchmaker/io/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,21 @@
import numpy as np
import pyaudio

from matchmaker.features.audio import HOP_LENGTH, SAMPLE_RATE, ChromagramProcessor
from matchmaker.features.audio import (
HOP_LENGTH,
SAMPLE_RATE,
ChromagramProcessor,
)
from matchmaker.utils.audio import (
get_audio_devices,
get_default_input_device_index,
get_device_index_from_name,
)
from matchmaker.utils.misc import RECVQueue, set_latency_stats
from matchmaker.utils.stream import Stream
from matchmaker.utils.stream import STREAM_END, Stream

CHANNELS = 1
QUEUE_TIMEOUT = 10


class AudioStream(Stream):
Expand Down Expand Up @@ -53,7 +58,7 @@ def __init__(
hop_length: int = HOP_LENGTH,
queue: Optional[RECVQueue] = None,
device_name_or_index: Optional[Union[str, int]] = None,
wait: bool = True,
wait: bool = False,
target_sr: int = SAMPLE_RATE,
):
if processor is None:
Expand Down Expand Up @@ -123,9 +128,14 @@ def __init__(
"min_latency": float("inf"),
}
self.input_index = 0
self._preloaded_audio = None

if self.mock:
self.run = self.run_offline
# Pre-load and resample audio so the stream thread can start
# producing frames immediately (avoids queue-timeout race condition
# when librosa.load takes longer than QUEUE_TIMEOUT).
self._preload_audio()
else:
self.run = self.run_online

Expand Down Expand Up @@ -159,6 +169,8 @@ def _process_frame(
# initial y
target_audio = np.frombuffer(data, dtype=np.float32)
self._process_feature(target_audio, time_info["input_buffer_adc_time"])
if not self.stream_start.is_set():
self.stream_start.set()

return (data, pyaudio.paContinue)

Expand Down Expand Up @@ -225,6 +237,13 @@ def stop_listening(self) -> None:
self.audio_interface.terminate()
self.listen = False

def _preload_audio(self) -> None:
"""Pre-load and resample audio file so run_offline can start immediately."""
audio_y, sr = librosa.load(self.file_path, sr=None)
if sr != self.target_sr:
audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
self._preloaded_audio = audio_y

def run_offline(self) -> None:
"""Process audio file in offline mode.

Expand All @@ -240,31 +259,41 @@ def run_offline(self) -> None:
self.start_listening()
self.init_time = time.time()

audio_y, sr = librosa.load(self.file_path, sr=None)
if sr != self.target_sr:
audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
sr = self.target_sr
if self._preloaded_audio is not None:
audio_y = self._preloaded_audio
self._preloaded_audio = None # free memory
else:
audio_y, sr = librosa.load(self.file_path, sr=None)
if sr != self.target_sr:
audio_y = librosa.resample(
y=audio_y, orig_sr=sr, target_sr=self.target_sr
)
sr = self.target_sr

time_interval = self.hop_length / float(sr)
# Pad to next hop_length boundary so no trailing samples are lost
remainder = len(audio_y) % self.hop_length
if remainder > 0:
audio_y = np.concatenate(
(audio_y, np.zeros(self.hop_length - remainder, dtype=np.float32))
)
trimmed_audio = audio_y
time_interval = self.hop_length / float(sr)
# Do not stop early on digital silence (all-zeros tails).
while trimmed_audio.size > 0:
self.input_index += 1
self.last_data_received = time.time()
target_audio = trimmed_audio[: self.hop_length]
self._process_feature(target_audio, self.last_data_received)
trimmed_audio = trimmed_audio[self.hop_length :]
elapsed_time = time.time() - self.last_data_received

if not self.stream_start.is_set():
self.stream_start.set()

if self.wait:
elapsed_time = time.time() - self.last_data_received
time.sleep(max(time_interval - elapsed_time, 0))

self.queue.put(STREAM_END)
self.stop_listening()

def run_online(self) -> None:
Expand Down
Loading
Loading