diff --git a/daw-backend/src/audio/pool.rs b/daw-backend/src/audio/pool.rs index 2b88df8..42903a5 100644 --- a/daw-backend/src/audio/pool.rs +++ b/daw-backend/src/audio/pool.rs @@ -1,17 +1,53 @@ use std::path::PathBuf; +use std::f32::consts::PI; -/// Cubic Hermite interpolation for smooth resampling -/// p0, p1, p2, p3 are four consecutive samples -/// x is the fractional position between p1 and p2 (0.0 to 1.0) +/// Windowed sinc interpolation for high-quality time stretching +/// This is stateless and can handle arbitrary fractional positions #[inline] -fn hermite_interpolate(p0: f32, p1: f32, p2: f32, p3: f32, x: f32) -> f32 { - // Hermite basis functions for smooth interpolation - let c0 = p1; - let c1 = 0.5 * (p2 - p0); - let c2 = p0 - 2.5 * p1 + 2.0 * p2 - 0.5 * p3; - let c3 = 0.5 * (p3 - p0) + 1.5 * (p1 - p2); +fn sinc(x: f32) -> f32 { + if x.abs() < 1e-5 { + 1.0 + } else { + let px = PI * x; + px.sin() / px + } +} - ((c3 * x + c2) * x + c1) * x + c0 +/// Blackman window function +#[inline] +fn blackman_window(x: f32, width: f32) -> f32 { + if x.abs() > width { + 0.0 + } else { + let a0 = 0.42; + let a1 = 0.5; + let a2 = 0.08; + // Map x from [-width, width] to [0, 1] for proper Blackman window evaluation + let n = (x / width + 1.0) / 2.0; + a0 - a1 * (2.0 * PI * n).cos() + a2 * (4.0 * PI * n).cos() + } +} + +/// High-quality windowed sinc interpolation +/// Uses a 32-tap windowed sinc kernel for smooth, artifact-free interpolation +/// frac: fractional position to interpolate at (0.0 to 1.0) +/// samples: array of samples centered around the target position +#[inline] +fn windowed_sinc_interpolate(samples: &[f32], frac: f32) -> f32 { + let mut result = 0.0; + let kernel_size = samples.len(); + let half_kernel = (kernel_size / 2) as f32; + + for i in 0..kernel_size { + // Distance from interpolation point + // samples[half_kernel] is at position 0, we want to interpolate at position frac + let x = frac + half_kernel - (i as f32); + let sinc_val = sinc(x); + let window_val = blackman_window(x, half_kernel); + result += samples[i] * sinc_val * window_val; + } + + result } /// Audio file stored in the pool @@ -73,7 +109,7 @@ impl AudioPool { self.files.len() } - /// Render audio from a file in the pool with sample rate and channel conversion + /// Render audio from a file in the pool with high-quality windowed sinc interpolation /// start_time_seconds: position in the audio file to start reading from (in seconds) /// Returns the number of samples actually rendered pub fn render_from_file( @@ -85,123 +121,119 @@ impl AudioPool { engine_sample_rate: u32, engine_channels: u32, ) -> usize { - if let Some(audio_file) = self.files.get(pool_index) { - // Calculate starting frame position in the source file (frame = one sample per channel) - let src_start_frame = start_time_seconds * audio_file.sample_rate as f64; + let Some(audio_file) = self.files.get(pool_index) else { + return 0; + }; - // Calculate sample rate conversion ratio (frames) - let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64; + let src_channels = audio_file.channels as usize; + let dst_channels = engine_channels as usize; + let output_frames = output.len() / dst_channels; - let src_channels = audio_file.channels; - let dst_channels = engine_channels; + // Calculate starting position in source with fractional precision + let src_start_position = start_time_seconds * audio_file.sample_rate as f64; - // Render frame by frame - let output_frames = output.len() / dst_channels as usize; - let mut rendered_frames = 0; + // Sample rate conversion ratio + let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64; - for frame_idx in 0..output_frames { - // Calculate the corresponding frame in the source file - let src_frame_pos = src_start_frame + (frame_idx as f64 * rate_ratio); - let src_frame_idx = src_frame_pos as usize; + // Kernel size for windowed sinc (32 taps = high quality, good performance) + const KERNEL_SIZE: usize = 32; + const HALF_KERNEL: usize = KERNEL_SIZE / 2; - // Check bounds - if src_frame_idx >= audio_file.frames as usize { - break; - } + let mut rendered_frames = 0; - // Calculate source sample index (interleaved) - let src_sample_idx = src_frame_idx * src_channels as usize; + // Render frame by frame with windowed sinc interpolation + for output_frame in 0..output_frames { + // Calculate exact fractional position in source + let src_position = src_start_position + (output_frame as f64 * rate_ratio); + let src_frame = src_position.floor() as i32; + let frac = (src_position - src_frame as f64) as f32; - // Check bounds for interpolation - if src_sample_idx + src_channels as usize > audio_file.data.len() { - break; - } - - // Cubic Hermite interpolation for high-quality time stretching - let frac = (src_frame_pos - src_frame_idx as f64) as f32; - - // We need 4 points for cubic interpolation: p0, p1, p2, p3 - // where we interpolate between p1 and p2 - let p1_frame = src_frame_idx; - let p0_frame = if p1_frame > 0 { p1_frame - 1 } else { p1_frame }; - let p2_frame = p1_frame + 1; - let p3_frame = p1_frame + 2; - - let p0_idx = p0_frame * src_channels as usize; - let p1_idx = p1_frame * src_channels as usize; - let p2_idx = p2_frame * src_channels as usize; - let p3_idx = p3_frame * src_channels as usize; - - let can_interpolate = p3_idx + src_channels as usize <= audio_file.data.len(); - - // Read and convert channels - for dst_ch in 0..dst_channels { - let sample = if src_channels == dst_channels { - // Same number of channels - direct mapping - let ch = dst_ch as usize; - if can_interpolate && frac > 0.0 { - let p0 = audio_file.data[p0_idx + ch]; - let p1 = audio_file.data[p1_idx + ch]; - let p2 = audio_file.data[p2_idx + ch]; - let p3 = audio_file.data[p3_idx + ch]; - hermite_interpolate(p0, p1, p2, p3, frac) - } else { - audio_file.data[p1_idx + ch] - } - } else if src_channels == 1 && dst_channels > 1 { - // Mono to multi-channel - duplicate to all channels - if can_interpolate && frac > 0.0 { - let p0 = audio_file.data[p0_idx]; - let p1 = audio_file.data[p1_idx]; - let p2 = audio_file.data[p2_idx]; - let p3 = audio_file.data[p3_idx]; - hermite_interpolate(p0, p1, p2, p3, frac) - } else { - audio_file.data[p1_idx] - } - } else if src_channels > 1 && dst_channels == 1 { - // Multi-channel to mono - average all source channels - let mut sum = 0.0f32; - for src_ch in 0..src_channels { - let ch = src_ch as usize; - let s = if can_interpolate && frac > 0.0 { - let p0 = audio_file.data[p0_idx + ch]; - let p1 = audio_file.data[p1_idx + ch]; - let p2 = audio_file.data[p2_idx + ch]; - let p3 = audio_file.data[p3_idx + ch]; - hermite_interpolate(p0, p1, p2, p3, frac) - } else { - audio_file.data[p1_idx + ch] - }; - sum += s; - } - sum / src_channels as f32 - } else { - // Mismatched channels - use modulo for simple mapping - let src_ch = (dst_ch % src_channels) as usize; - if can_interpolate && frac > 0.0 { - let p0 = audio_file.data[p0_idx + src_ch]; - let p1 = audio_file.data[p1_idx + src_ch]; - let p2 = audio_file.data[p2_idx + src_ch]; - let p3 = audio_file.data[p3_idx + src_ch]; - hermite_interpolate(p0, p1, p2, p3, frac) - } else { - audio_file.data[p1_idx + src_ch] - } - }; - - // Mix into output with gain - let output_idx = frame_idx * dst_channels as usize + dst_ch as usize; - output[output_idx] += sample * gain; - } - - rendered_frames += 1; + // Check if we've gone past the end of the audio file + if src_frame < 0 || src_frame as usize >= audio_file.frames as usize { + break; } - rendered_frames * dst_channels as usize - } else { - 0 + // Interpolate each channel + for dst_ch in 0..dst_channels { + let sample = if src_channels == dst_channels { + // Direct channel mapping + let ch_offset = dst_ch; + + // Extract channel samples for interpolation + let mut channel_samples = Vec::with_capacity(KERNEL_SIZE); + for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) { + let idx = src_frame + i; + if idx >= 0 && (idx as usize) < audio_file.frames as usize { + let sample_idx = (idx as usize) * src_channels + ch_offset; + channel_samples.push(audio_file.data[sample_idx]); + } else { + channel_samples.push(0.0); + } + } + + windowed_sinc_interpolate(&channel_samples, frac) + + } else if src_channels == 1 && dst_channels > 1 { + // Mono to stereo - duplicate + let mut channel_samples = Vec::with_capacity(KERNEL_SIZE); + for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) { + let idx = src_frame + i; + if idx >= 0 && (idx as usize) < audio_file.frames as usize { + channel_samples.push(audio_file.data[idx as usize]); + } else { + channel_samples.push(0.0); + } + } + + windowed_sinc_interpolate(&channel_samples, frac) + + } else if src_channels > 1 && dst_channels == 1 { + // Multi-channel to mono - average all source channels + let mut sum = 0.0; + + for src_ch in 0..src_channels { + let mut channel_samples = Vec::with_capacity(KERNEL_SIZE); + for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) { + let idx = src_frame + i; + if idx >= 0 && (idx as usize) < audio_file.frames as usize { + let sample_idx = (idx as usize) * src_channels + src_ch; + channel_samples.push(audio_file.data[sample_idx]); + } else { + channel_samples.push(0.0); + } + } + sum += windowed_sinc_interpolate(&channel_samples, frac); + } + + sum / src_channels as f32 + + } else { + // Mismatched channels - use modulo mapping + let src_ch = dst_ch % src_channels; + + let mut channel_samples = Vec::with_capacity(KERNEL_SIZE); + for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) { + let idx = src_frame + i; + if idx >= 0 && (idx as usize) < audio_file.frames as usize { + let sample_idx = (idx as usize) * src_channels + src_ch; + channel_samples.push(audio_file.data[sample_idx]); + } else { + channel_samples.push(0.0); + } + } + + windowed_sinc_interpolate(&channel_samples, frac) + }; + + // Mix into output with gain + let output_idx = output_frame * dst_channels + dst_ch; + output[output_idx] += sample * gain; + } + + rendered_frames += 1; } + + rendered_frames * dst_channels } }