Skip to content

Commit c978afa

Browse files
committed
code fix for audio trackers benchmark
1 parent 6614a54 commit c978afa

22 files changed

Lines changed: 727 additions & 504 deletions

.github/workflows/pypi_publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
- name: Build wheels
2727
run: python -m cibuildwheel
2828
env:
29-
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
29+
CIBW_BUILD: "cp311-* cp312-* cp313-*"
3030
CIBW_ARCHS: "x86_64"
3131
CIBW_ARCHS_MACOS: "x86_64 arm64"
3232
CIBW_BEFORE_BUILD: |

.github/workflows/test_pypi_publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ jobs:
2828
- name: Build wheels
2929
run: python -m cibuildwheel
3030
env:
31-
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
31+
CIBW_BUILD: "cp311-* cp312-* cp313-*"
3232
CIBW_ARCHS: "x86_64"
3333
CIBW_ARCHS_MACOS: "x86_64 arm64"
3434
CIBW_BEFORE_BUILD: |

.github/workflows/unittest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
fail-fast: false
1212
matrix:
1313
platform: [ubuntu-latest, macos-latest]
14-
python-version: ["3.9", "3.10", "3.11", "3.12"]
14+
python-version: ["3.11", "3.12", "3.13"]
1515

1616
runs-on: ${{ matrix.platform }}
1717
steps:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ The full documentation for matchmaker is available online at [readthedocs.org](h
1414

1515
### Prerequisites
1616

17-
- Available Python version: 3.12
17+
- Available Python version: 3.11, 3.12, 3.13
1818
- [Fluidsynth](https://www.fluidsynth.org/)
1919
- [PortAudio](http://www.portaudio.com/)
2020

matchmaker/dp/oltw_arzt.py

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313

1414
from matchmaker.base import OnlineAlignment
1515
from matchmaker.dp.dtw_loop import oltw_arzt_loop
16-
from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
16+
from matchmaker.features.audio import FRAME_RATE
17+
from matchmaker.io.audio import QUEUE_TIMEOUT
1718
from matchmaker.utils import (
1819
CYTHONIZED_METRICS_W_ARGUMENTS,
1920
CYTHONIZED_METRICS_WO_ARGUMENTS,
@@ -26,10 +27,11 @@
2627
RECVQueue,
2728
set_latency_stats,
2829
)
30+
from matchmaker.utils.stream import STREAM_END
2931

30-
STEP_SIZE: int = 5
31-
WINDOW_SIZE: int = 5
32-
START_WINDOW_SIZE: Union[float, int] = 0.25
32+
STEP_SIZE: int = 3
33+
WINDOW_SIZE: int = 10
34+
START_WINDOW_SIZE: Union[float, int] = 0.1
3335

3436

3537
class OnlineTimeWarpingArzt(OnlineAlignment):
@@ -98,9 +100,9 @@ def __init__(
98100
current_position: int = 0,
99101
frame_rate: int = FRAME_RATE,
100102
queue: Optional[RECVQueue] = None,
101-
state_to_ref_time_map = None,
102-
ref_to_state_time_map = None,
103-
state_space = None,
103+
state_to_ref_time_map=None,
104+
ref_to_state_time_map=None,
105+
state_space=None,
104106
**kwargs,
105107
) -> None:
106108
super().__init__(reference_features=reference_features)
@@ -178,12 +180,22 @@ def __init__(
178180
}
179181
self.state_to_ref_time_map = state_to_ref_time_map
180182
self.ref_to_state_time_map = ref_to_state_time_map
181-
self.state_space = state_space #if state_space != None else np.unique(self.reference_features.note_array()["onset_beat"])
183+
self.state_space = state_space
184+
self._ref_frame_to_beat: Optional[NDArray[np.float32]] = kwargs.get(
185+
"ref_frame_to_beat", None
186+
)
187+
188+
@property
189+
def current_beat(self) -> float:
190+
"""Current score position in beats."""
191+
if self._ref_frame_to_beat is not None:
192+
idx = min(self.current_position, len(self._ref_frame_to_beat) - 1)
193+
return float(self._ref_frame_to_beat[idx])
194+
return float(self.current_position)
182195

183196
@property
184-
def warping_path(self) -> NDArray[np.int32]:
185-
wp = (np.array(self._warping_path).T).astype(np.int32)
186-
return wp
197+
def warping_path(self) -> NDArray[np.float32]:
198+
return np.array(self._warping_path).T
187199

188200
def __call__(self, input: NDArray[np.float32]) -> int:
189201
self.step(input)
@@ -211,10 +223,18 @@ def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]
211223
self.reset()
212224

213225
if verbose:
214-
pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
226+
pbar = progressbar.ProgressBar(
227+
max_value=len(self.state_space),
228+
redirect_stdout=True,
229+
redirect_stderr=True,
230+
)
231+
pbar.start()
215232

216233
while self.is_still_following():
217-
features, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
234+
item = self.queue.get(timeout=QUEUE_TIMEOUT)
235+
if item is STREAM_END:
236+
break
237+
features, f_time = item
218238
self.last_queue_update = time.time()
219239
self.input_features = (
220240
np.concatenate((self.input_features, features))
@@ -224,13 +244,13 @@ def run(self, verbose: bool = True) -> Generator[int, None, NDArray[np.float32]]
224244
self.step(features)
225245

226246
if verbose:
227-
pbar.update(int(self.current_position))
247+
pbar.update(int(np.searchsorted(self.state_space, self.current_beat)))
228248

229249
latency = time.time() - self.last_queue_update
230250
self.latency_stats = set_latency_stats(
231251
latency, self.latency_stats, self.input_index
232252
)
233-
yield self.current_position
253+
yield self.current_beat
234254

235255
if verbose:
236256
pbar.finish()
@@ -289,23 +309,17 @@ def step(self, input_features: NDArray[np.float32]) -> None:
289309
min_index=min_index,
290310
)
291311

292-
# adapt current_position: do not go backwards,
293-
# but also go a maximum of N steps forward
294-
295-
if self.input_index == 0:
296-
# enforce the first time step to stay at the
297-
# initial position
298-
self.current_position = min( # TODO: Is this necessary?
299-
max(self.current_position, min_index),
300-
self.current_position,
301-
)
302-
else:
303-
self.current_position = min(
304-
max(self.current_position, min_index),
305-
self.current_position + self.step_size,
312+
# Clamp new position: no backwards, max step_size forward per frame
313+
if self.input_index > 0:
314+
self.current_position = int(
315+
np.clip(
316+
min_index,
317+
self.current_position,
318+
self.current_position + self.step_size,
319+
)
306320
)
307321

308-
self._warping_path.append((self.current_position, self.input_index))
322+
self._warping_path.append((self.current_beat, self.input_index))
309323
# update input index
310324
self.input_index += 1
311325

matchmaker/dp/oltw_dixon.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
from numpy.typing import NDArray
1515

1616
from matchmaker.base import OnlineAlignment
17-
from matchmaker.features.audio import FRAME_RATE, QUEUE_TIMEOUT
17+
from matchmaker.features.audio import FRAME_RATE
18+
from matchmaker.io.audio import QUEUE_TIMEOUT
1819
from matchmaker.utils.misc import set_latency_stats
20+
from matchmaker.utils.stream import STREAM_END
1921

2022

2123
class Direction(IntEnum):
@@ -74,9 +76,9 @@ def __init__(
7476
max_run_count=MAX_RUN_COUNT,
7577
frame_per_seg=FRAME_PER_SEG,
7678
frame_rate=FRAME_RATE,
77-
state_to_ref_time_map = None,
78-
ref_to_state_time_map = None,
79-
state_space = None,
79+
state_to_ref_time_map=None,
80+
ref_to_state_time_map=None,
81+
state_space=None,
8082
**kwargs,
8183
):
8284
super().__init__(reference_features=reference_features)
@@ -90,6 +92,7 @@ def __init__(
9092
self.state_to_ref_time_map = state_to_ref_time_map
9193
self.ref_to_state_time_map = ref_to_state_time_map
9294
self.state_space = state_space
95+
self._ref_frame_to_beat = kwargs.get("ref_frame_to_beat", None)
9396
self.reset()
9497

9598
def reset(self):
@@ -114,6 +117,14 @@ def reset(self):
114117
}
115118
self._initialized = False
116119

120+
@property
121+
def current_beat(self) -> float:
122+
"""Current score position in beats."""
123+
if self._ref_frame_to_beat is not None:
124+
idx = min(self.best_ref, len(self._ref_frame_to_beat) - 1)
125+
return float(self._ref_frame_to_beat[idx])
126+
return float(self.best_ref)
127+
117128
@property
118129
def warping_path(self) -> NDArray[np.float32]: # [shape=(2, T)]
119130
return self.wp
@@ -276,8 +287,8 @@ def get_expand_direction(self):
276287
return Direction.REF
277288

278289
def save_history(self):
279-
"""Append current best alignment point to warping path."""
280-
new_point = np.array([[self.best_ref], [self.best_input]])
290+
"""Append current best alignment point to warping path (beats, input_frame)."""
291+
new_point = np.array([[self.current_beat], [self.best_input]])
281292
self.wp = np.concatenate((self.wp, new_point), axis=1)
282293

283294
def __call__(self, input_features: NDArray[np.float32]) -> int:
@@ -363,21 +374,29 @@ def run(self, verbose=True):
363374
self.reset()
364375

365376
if verbose:
366-
pbar = progressbar.ProgressBar(max_value=self.N_ref, redirect_stdout=True)
377+
pbar = progressbar.ProgressBar(
378+
max_value=len(self.state_space),
379+
redirect_stdout=True,
380+
redirect_stderr=True,
381+
)
382+
pbar.start()
367383

368384
while self.is_still_following():
369-
input_feature, f_time = self.queue.get(timeout=QUEUE_TIMEOUT)
385+
item = self.queue.get(timeout=QUEUE_TIMEOUT)
386+
if item is STREAM_END:
387+
break
388+
input_feature, f_time = item
370389
self.last_queue_update = time.time()
371390
self.step(input_feature)
372391

373392
if verbose:
374-
pbar.update(int(self.current_position))
393+
pbar.update(int(np.searchsorted(self.state_space, self.current_beat)))
375394

376395
latency = time.time() - self.last_queue_update
377396
self.latency_stats = set_latency_stats(
378397
latency, self.latency_stats, self.input_index
379398
)
380-
yield self.current_position
399+
yield self.current_beat
381400

382401
if verbose:
383402
pbar.finish()

matchmaker/features/audio.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
DCT_TYPE = 2
2121
NORM = np.inf
2222
FEATURES = "chroma"
23-
QUEUE_TIMEOUT = 1
2423

2524
# Type hint for Input Audio frame.
2625
InputAudioSeries = np.ndarray

matchmaker/io/audio.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,21 @@
1212
import numpy as np
1313
import pyaudio
1414

15-
from matchmaker.features.audio import HOP_LENGTH, SAMPLE_RATE, ChromagramProcessor
15+
from matchmaker.features.audio import (
16+
HOP_LENGTH,
17+
SAMPLE_RATE,
18+
ChromagramProcessor,
19+
)
1620
from matchmaker.utils.audio import (
1721
get_audio_devices,
1822
get_default_input_device_index,
1923
get_device_index_from_name,
2024
)
2125
from matchmaker.utils.misc import RECVQueue, set_latency_stats
22-
from matchmaker.utils.stream import Stream
26+
from matchmaker.utils.stream import STREAM_END, Stream
2327

2428
CHANNELS = 1
29+
QUEUE_TIMEOUT = 10
2530

2631

2732
class AudioStream(Stream):
@@ -53,7 +58,7 @@ def __init__(
5358
hop_length: int = HOP_LENGTH,
5459
queue: Optional[RECVQueue] = None,
5560
device_name_or_index: Optional[Union[str, int]] = None,
56-
wait: bool = True,
61+
wait: bool = False,
5762
target_sr: int = SAMPLE_RATE,
5863
):
5964
if processor is None:
@@ -123,9 +128,14 @@ def __init__(
123128
"min_latency": float("inf"),
124129
}
125130
self.input_index = 0
131+
self._preloaded_audio = None
126132

127133
if self.mock:
128134
self.run = self.run_offline
135+
# Pre-load and resample audio so the stream thread can start
136+
# producing frames immediately (avoids queue-timeout race condition
137+
# when librosa.load takes longer than QUEUE_TIMEOUT).
138+
self._preload_audio()
129139
else:
130140
self.run = self.run_online
131141

@@ -159,6 +169,8 @@ def _process_frame(
159169
# initial y
160170
target_audio = np.frombuffer(data, dtype=np.float32)
161171
self._process_feature(target_audio, time_info["input_buffer_adc_time"])
172+
if not self.stream_start.is_set():
173+
self.stream_start.set()
162174

163175
return (data, pyaudio.paContinue)
164176

@@ -225,6 +237,13 @@ def stop_listening(self) -> None:
225237
self.audio_interface.terminate()
226238
self.listen = False
227239

240+
def _preload_audio(self) -> None:
241+
"""Pre-load and resample audio file so run_offline can start immediately."""
242+
audio_y, sr = librosa.load(self.file_path, sr=None)
243+
if sr != self.target_sr:
244+
audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
245+
self._preloaded_audio = audio_y
246+
228247
def run_offline(self) -> None:
229248
"""Process audio file in offline mode.
230249
@@ -240,31 +259,41 @@ def run_offline(self) -> None:
240259
self.start_listening()
241260
self.init_time = time.time()
242261

243-
audio_y, sr = librosa.load(self.file_path, sr=None)
244-
if sr != self.target_sr:
245-
audio_y = librosa.resample(y=audio_y, orig_sr=sr, target_sr=self.target_sr)
246-
sr = self.target_sr
262+
if self._preloaded_audio is not None:
263+
audio_y = self._preloaded_audio
264+
self._preloaded_audio = None # free memory
265+
else:
266+
audio_y, sr = librosa.load(self.file_path, sr=None)
267+
if sr != self.target_sr:
268+
audio_y = librosa.resample(
269+
y=audio_y, orig_sr=sr, target_sr=self.target_sr
270+
)
271+
sr = self.target_sr
247272

248-
time_interval = self.hop_length / float(sr)
249273
# Pad to next hop_length boundary so no trailing samples are lost
250274
remainder = len(audio_y) % self.hop_length
251275
if remainder > 0:
252276
audio_y = np.concatenate(
253277
(audio_y, np.zeros(self.hop_length - remainder, dtype=np.float32))
254278
)
255279
trimmed_audio = audio_y
280+
time_interval = self.hop_length / float(sr)
256281
# Do not stop early on digital silence (all-zeros tails).
257282
while trimmed_audio.size > 0:
258283
self.input_index += 1
259284
self.last_data_received = time.time()
260285
target_audio = trimmed_audio[: self.hop_length]
261286
self._process_feature(target_audio, self.last_data_received)
262287
trimmed_audio = trimmed_audio[self.hop_length :]
263-
elapsed_time = time.time() - self.last_data_received
288+
289+
if not self.stream_start.is_set():
290+
self.stream_start.set()
264291

265292
if self.wait:
293+
elapsed_time = time.time() - self.last_data_received
266294
time.sleep(max(time_interval - elapsed_time, 0))
267295

296+
self.queue.put(STREAM_END)
268297
self.stop_listening()
269298

270299
def run_online(self) -> None:

matchmaker/io/midi.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ def _process_frame_message(
139139
self.queue.put(((data, c_time), output))
140140
else:
141141
self.queue.put(output)
142+
if not self.stream_start.is_set():
143+
self.stream_start.set()
142144

143145
def _process_frame_window(
144146
self,

0 commit comments

Comments
 (0)