diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 666f71b..9e1d61e 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -271,6 +271,43 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "audio-core" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ebbf82d06013f4c41fe71303feb980cddd78496d904d06be627972de51a24" + +[[package]] +name = "audioadapter" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91f87b70b051c5866680ad79f6743a42ccab264c009d1a71f4d33a3872ae60c8" +dependencies = [ + "audio-core", + "num-traits", +] + +[[package]] +name = "audioadapter-buffers" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9097d67933fb083d382ce980430afdb758aada60846010aee6be068c06cef0ca" +dependencies = [ + "audioadapter", + "audioadapter-sample", + "num-traits", +] + +[[package]] +name = "audioadapter-sample" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ab94f2bc04a14e1f49ee5f222f66460e8a1b51627bdfedf34eed394d747938" +dependencies = [ + "audio-core", + "num-traits", +] + [[package]] name = "autocfg" version = "1.5.0" @@ -4070,14 +4107,18 @@ checksum = "4ade083ccbb4bf536df69d1f6432cc23deb7acccff86b183f3923a6fd56a1153" [[package]] name = "rubato" -version = "0.15.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5d18b486e7d29a408ef3f825bc1327d8f87af091c987ca2f5b734625940e234" +checksum = "ce96ead1a91f7895704a9f08ea5947dfc8bd7c1f2936a22295b655ec67e5c6ef" dependencies = [ + "audioadapter", + "audioadapter-buffers", "num-complex", "num-integer", "num-traits", "realfft", + "visibility", + "windowfunctions", ] [[package]] @@ -6251,6 +6292,17 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "visibility" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d674d135b4a8c1d7e813e2f8d1c9a58308aee4a680323066025e53132218bd91" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "vswhom" version = "0.1.0" @@ -6656,6 +6708,15 @@ dependencies = [ "windows-version", ] +[[package]] +name = "windowfunctions" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90628d739333b7c5d2ee0b70210b97b8cddc38440c682c96fd9e2c24c2db5f3a" +dependencies = [ + "num-traits", +] + [[package]] name = "windows" version = "0.44.0" diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 8cf1a97..07b80d0 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -78,7 +78,7 @@ symphonia = { version = "0.5", default-features = false, features = [ "pcm", ] } cpal = "0.17" -rubato = "0.15" +rubato = "2.0" rtrb = "0.3" crossbeam-channel = "0.5" diff --git a/src-tauri/src/audio/resampler.rs b/src-tauri/src/audio/resampler.rs index 60d6916..9b69c54 100644 --- a/src-tauri/src/audio/resampler.rs +++ b/src-tauri/src/audio/resampler.rs @@ -1,40 +1,47 @@ -//! Thin wrapper around `rubato::FftFixedIn` that handles the -//! deinterleave → resample → reinterleave dance. +//! Thin wrapper around rubato's FFT resampler with `FixedSync::Input` +//! semantics: every `process_into_buffer` call must consume exactly +//! `inner.input_frames_next()` interleaved frames. //! -//! Symphonia gives us interleaved f32 samples. Rubato works on planar -//! `Vec>` (one inner vec per channel). We convert on the fly. +//! Both symphonia (decoder) and cpal (output) operate on interleaved +//! `f32`, so we feed and read interleaved buffers via the +//! [`InterleavedSlice`] adapter — no deinterleave/reinterleave hop. //! //! When the source and destination sample rates already match, a -//! [`Resampler::passthrough`] variant skips all allocation and just +//! [`Resampler::Passthrough`] variant skips all allocation and just //! forwards the input slice unchanged — important for the common case -//! where the user's MP3s already match the device rate. +//! where the user's tracks already match the device rate. -use rubato::{FftFixedIn, Resampler as _}; +use rubato::audioadapter_buffers::direct::InterleavedSlice; +use rubato::{Fft, FixedSync, Resampler as _}; use crate::error::{AppError, AppResult}; -/// FFT chunk size, in input frames per `process()` call. 1024 is the -/// rubato documented sweet spot for FftFixedIn; smaller = lower latency -/// but more FFT overhead, larger = better frequency resolution but more -/// memory. +/// Desired FFT chunk size in input frames per call. Rubato may round +/// to the nearest GCD-aligned value of the (in_rate, out_rate) pair; +/// the actual size is queried via `input_frames_next()` after +/// construction. 1024 is the rubato-recommended sweet spot — smaller +/// trades latency for FFT overhead, larger trades memory for +/// frequency resolution. const CHUNK_SIZE: usize = 1024; -/// FFT sub-chunk count. 2 is the typical value from rubato's docs. +/// FFT sub-chunk count. Higher values reduce processing delay at the +/// cost of softer anti-aliasing; 2 is the value rubato's docs use as +/// a baseline. const SUB_CHUNKS: usize = 2; pub enum Resampler { Passthrough, Fft { - inner: FftFixedIn, + inner: Fft, channels: usize, - // Planar scratch buffers kept around so each `process` call - // reuses the allocation. Inner Vecs are re-sized to CHUNK_SIZE - // on construction. - in_buf: Vec>, - /// Accumulator of not-yet-processed frames per channel — - /// rubato requires exactly CHUNK_SIZE frames per call, so we - /// buffer partial packets across decoder iterations. - pending: Vec>, + /// Reusable interleaved input buffer sized to one rubato chunk. + in_scratch: Vec, + /// Reusable interleaved output buffer sized to the resampler's + /// max output per call. + out_scratch: Vec, + /// Interleaved samples not yet handed to rubato. Drains in + /// `input_frames_next()` increments on every `process` call. + pending: Vec, }, } @@ -44,29 +51,34 @@ impl Resampler { return Ok(Self::Passthrough); } - let inner = FftFixedIn::::new( + let inner = Fft::::new( src_rate as usize, dst_rate as usize, CHUNK_SIZE, SUB_CHUNKS, channels, + FixedSync::Input, ) .map_err(|e| AppError::Audio(format!("rubato init: {e}")))?; - let in_buf = vec![vec![0.0_f32; CHUNK_SIZE]; channels]; - let pending = vec![Vec::with_capacity(CHUNK_SIZE * 2); channels]; + let frames_in = inner.input_frames_next(); + let frames_out_max = inner.output_frames_max(); + let in_scratch = vec![0.0_f32; frames_in * channels]; + let out_scratch = vec![0.0_f32; frames_out_max * channels]; + let pending = Vec::with_capacity(frames_in * channels * 2); Ok(Self::Fft { inner, channels, - in_buf, + in_scratch, + out_scratch, pending, }) } - /// Process an interleaved `f32` input buffer and append resampled - /// interleaved output into `out`. Returns `Ok(())` on success; on - /// rubato errors returns [`AppError::Audio`]. + /// Process an interleaved `f32` input buffer and append the + /// resampled interleaved output into `out`. Returns `Ok(())` on + /// success; on rubato errors returns [`AppError::Audio`]. /// /// When [`Self::Passthrough`], the input is appended verbatim. pub fn process(&mut self, input: &[f32], out: &mut Vec) -> AppResult<()> { @@ -78,60 +90,116 @@ impl Resampler { Self::Fft { inner, channels, - in_buf, + in_scratch, + out_scratch, pending, } => { let chans = *channels; - debug_assert!(input.len() % chans == 0, "interleaved input not aligned to channel count"); - let frames = input.len() / chans; - - // Deinterleave incoming frames into the per-channel - // pending buffers. - for ch in 0..chans { - pending[ch].reserve(frames); - for f in 0..frames { - pending[ch].push(input[f * chans + ch]); - } - } + debug_assert!( + input.len() % chans == 0, + "interleaved input not aligned to channel count" + ); - // Drain as many full CHUNK_SIZE blocks as pending holds. - while pending[0].len() >= CHUNK_SIZE { - for ch in 0..chans { - // Copy one chunk out of pending into the scratch - // in_buf, then remove those frames from pending. - in_buf[ch].clear(); - in_buf[ch].extend_from_slice(&pending[ch][..CHUNK_SIZE]); - pending[ch].drain(..CHUNK_SIZE); + pending.extend_from_slice(input); + + loop { + let frames_in = inner.input_frames_next(); + let in_samples = frames_in * chans; + if pending.len() < in_samples { + break; } + in_scratch[..in_samples].copy_from_slice(&pending[..in_samples]); + pending.drain(..in_samples); - let resampled = inner - .process(in_buf, None) - .map_err(|e| AppError::Audio(format!("rubato process: {e}")))?; - - // Re-interleave into `out`. rubato gives us the same - // channel count in the output. - let out_frames = resampled[0].len(); - out.reserve(out_frames * chans); - for f in 0..out_frames { - for ch in 0..chans { - out.push(resampled[ch][f]); - } + // `output_frames_max` may shift between calls when + // FixedSync::Input pulls multiple sub-chunks from + // the saved-frames backlog; resize on the rare + // occasion it grows so the adapter always fits. + let frames_out_max = inner.output_frames_max(); + if out_scratch.len() < frames_out_max * chans { + out_scratch.resize(frames_out_max * chans, 0.0); } + + let n_out = { + let in_buf = InterleavedSlice::new( + &in_scratch[..in_samples], + chans, + frames_in, + ) + .map_err(|e| AppError::Audio(format!("rubato in adapter: {e}")))?; + let mut out_buf = InterleavedSlice::new_mut( + &mut out_scratch[..frames_out_max * chans], + chans, + frames_out_max, + ) + .map_err(|e| AppError::Audio(format!("rubato out adapter: {e}")))?; + let (_n_in, n_out) = inner + .process_into_buffer(&in_buf, &mut out_buf, None) + .map_err(|e| AppError::Audio(format!("rubato process: {e}")))?; + n_out + }; + + out.extend_from_slice(&out_scratch[..n_out * chans]); } Ok(()) } } } - /// Flush any frames still buffered in the rubato state. MVP: we - /// just drop them, which truncates the tail by at most - /// `CHUNK_SIZE - 1` frames. Gapless playback would need a proper - /// `process_partial_into_buffer` call here. + /// Drop any frames still buffered in the pending queue. MVP: we + /// just discard them, which truncates the tail by at most one + /// rubato chunk. Gapless playback would need + /// `process_into_buffer(..., partial_len = Some(remaining))` here + /// so rubato pads with silence instead of swallowing the tail. pub fn flush(&mut self) { if let Self::Fft { pending, .. } = self { - for chan in pending.iter_mut() { - chan.clear(); - } + pending.clear(); } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn passthrough_returns_input_unchanged() { + let mut r = Resampler::new(48_000, 48_000, 2).expect("ctor"); + let input = vec![0.1_f32, 0.2, 0.3, 0.4, 0.5, 0.6]; + let mut out = Vec::new(); + r.process(&input, &mut out).expect("process"); + assert_eq!(out, input); + } + + #[test] + fn fft_48k_to_44_1k_produces_proportional_output() { + let channels = 2; + let src = 48_000_u32; + let dst = 44_100_u32; + let mut r = Resampler::new(src, dst, channels).expect("ctor"); + + // Feed ~1s of stereo silence in chunks of 4096 frames. + let total_frames = src as usize; + let mut out = Vec::new(); + let chunk_frames = 4096; + let mut fed = 0; + while fed < total_frames { + let take = chunk_frames.min(total_frames - fed); + let buf = vec![0.0_f32; take * channels]; + r.process(&buf, &mut out).expect("process"); + fed += take; + } + + // Output should be roughly `total_frames * dst / src` interleaved + // frames. Tolerance covers the trailing partial rubato chunk we + // haven't drained yet. + let expected_frames = total_frames * dst as usize / src as usize; + let produced_frames = out.len() / channels; + let diff = (produced_frames as i64 - expected_frames as i64).abs(); + assert!( + diff < 2_048, + "expected ~{expected_frames} frames, got {produced_frames} (diff {diff})" + ); + assert_eq!(out.len() % channels, 0, "output not aligned to channels"); + } +}