better time stretching

2025-10-18 23:28:20 -04:00 · 2025-10-18 23:28:20 -04:00 · d4fb8b721a
parent f9e2d36f3a
commit d4fb8b721a
1 changed files with 150 additions and 118 deletions
--- a/daw-backend/src/audio/pool.rs
+++ b/daw-backend/src/audio/pool.rs
@ -1,17 +1,53 @@
 use std::path::PathBuf;
 use std::f32::consts::PI;
-/// Cubic Hermite interpolation for smooth resampling
+/// Windowed sinc interpolation for high-quality time stretching
-/// p0, p1, p2, p3 are four consecutive samples
+/// This is stateless and can handle arbitrary fractional positions
 /// x is the fractional position between p1 and p2 (0.0 to 1.0)
 #[inline]
-fn hermite_interpolate(p0: f32, p1: f32, p2: f32, p3: f32, x: f32) -> f32 {
+fn sinc(x: f32) -> f32 {
-    // Hermite basis functions for smooth interpolation
+    if x.abs() < 1e-5 {
-    let c0 = p1;
+        1.0
-    let c1 = 0.5 * (p2 - p0);
+    } else {
-    let c2 = p0 - 2.5 * p1 + 2.0 * p2 - 0.5 * p3;
+        let px = PI * x;
-    let c3 = 0.5 * (p3 - p0) + 1.5 * (p1 - p2);
+        px.sin() / px
    }
 }
-    ((c3 * x + c2) * x + c1) * x + c0
+/// Blackman window function
 #[inline]
 fn blackman_window(x: f32, width: f32) -> f32 {
    if x.abs() > width {
        0.0
    } else {
        let a0 = 0.42;
        let a1 = 0.5;
        let a2 = 0.08;
        // Map x from [-width, width] to [0, 1] for proper Blackman window evaluation
        let n = (x / width + 1.0) / 2.0;
        a0 - a1 * (2.0 * PI * n).cos() + a2 * (4.0 * PI * n).cos()
    }
 }
 /// High-quality windowed sinc interpolation
 /// Uses a 32-tap windowed sinc kernel for smooth, artifact-free interpolation
 /// frac: fractional position to interpolate at (0.0 to 1.0)
 /// samples: array of samples centered around the target position
 #[inline]
 fn windowed_sinc_interpolate(samples: &[f32], frac: f32) -> f32 {
    let mut result = 0.0;
    let kernel_size = samples.len();
    let half_kernel = (kernel_size / 2) as f32;
    for i in 0..kernel_size {
        // Distance from interpolation point
        // samples[half_kernel] is at position 0, we want to interpolate at position frac
        let x = frac + half_kernel - (i as f32);
        let sinc_val = sinc(x);
        let window_val = blackman_window(x, half_kernel);
        result += samples[i] * sinc_val * window_val;
    }
    result
 }
 /// Audio file stored in the pool
@ -73,7 +109,7 @@ impl AudioPool {
        self.files.len()
    }
-    /// Render audio from a file in the pool with sample rate and channel conversion
+    /// Render audio from a file in the pool with high-quality windowed sinc interpolation
    /// start_time_seconds: position in the audio file to start reading from (in seconds)
    /// Returns the number of samples actually rendered
    pub fn render_from_file(
@ -85,123 +121,119 @@ impl AudioPool {
        engine_sample_rate: u32,
        engine_channels: u32,
    ) -> usize {
-        if let Some(audio_file) = self.files.get(pool_index) {
+        let Some(audio_file) = self.files.get(pool_index) else {
-            // Calculate starting frame position in the source file (frame = one sample per channel)
+            return 0;
-            let src_start_frame = start_time_seconds * audio_file.sample_rate as f64;
+        };
-            // Calculate sample rate conversion ratio (frames)
+        let src_channels = audio_file.channels as usize;
-            let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
+        let dst_channels = engine_channels as usize;
        let output_frames = output.len() / dst_channels;
-            let src_channels = audio_file.channels;
+        // Calculate starting position in source with fractional precision
-            let dst_channels = engine_channels;
+        let src_start_position = start_time_seconds * audio_file.sample_rate as f64;
-            // Render frame by frame
+        // Sample rate conversion ratio
-            let output_frames = output.len() / dst_channels as usize;
+        let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
            let mut rendered_frames = 0;
-            for frame_idx in 0..output_frames {
+        // Kernel size for windowed sinc (32 taps = high quality, good performance)
-                // Calculate the corresponding frame in the source file
+        const KERNEL_SIZE: usize = 32;
-                let src_frame_pos = src_start_frame + (frame_idx as f64 * rate_ratio);
+        const HALF_KERNEL: usize = KERNEL_SIZE / 2;
                let src_frame_idx = src_frame_pos as usize;
-                // Check bounds
+        let mut rendered_frames = 0;
                if src_frame_idx >= audio_file.frames as usize {
                    break;
                }
-                // Calculate source sample index (interleaved)
+        // Render frame by frame with windowed sinc interpolation
-                let src_sample_idx = src_frame_idx * src_channels as usize;
+        for output_frame in 0..output_frames {
            // Calculate exact fractional position in source
            let src_position = src_start_position + (output_frame as f64 * rate_ratio);
            let src_frame = src_position.floor() as i32;
            let frac = (src_position - src_frame as f64) as f32;
-                // Check bounds for interpolation
+            // Check if we've gone past the end of the audio file
-                if src_sample_idx + src_channels as usize > audio_file.data.len() {
+            if src_frame < 0 || src_frame as usize >= audio_file.frames as usize {
-                    break;
+                break;
                }
                // Cubic Hermite interpolation for high-quality time stretching
                let frac = (src_frame_pos - src_frame_idx as f64) as f32;
                // We need 4 points for cubic interpolation: p0, p1, p2, p3
                // where we interpolate between p1 and p2
                let p1_frame = src_frame_idx;
                let p0_frame = if p1_frame > 0 { p1_frame - 1 } else { p1_frame };
                let p2_frame = p1_frame + 1;
                let p3_frame = p1_frame + 2;
                let p0_idx = p0_frame * src_channels as usize;
                let p1_idx = p1_frame * src_channels as usize;
                let p2_idx = p2_frame * src_channels as usize;
                let p3_idx = p3_frame * src_channels as usize;
                let can_interpolate = p3_idx + src_channels as usize <= audio_file.data.len();
                // Read and convert channels
                for dst_ch in 0..dst_channels {
                    let sample = if src_channels == dst_channels {
                        // Same number of channels - direct mapping
                        let ch = dst_ch as usize;
                        if can_interpolate && frac > 0.0 {
                            let p0 = audio_file.data[p0_idx + ch];
                            let p1 = audio_file.data[p1_idx + ch];
                            let p2 = audio_file.data[p2_idx + ch];
                            let p3 = audio_file.data[p3_idx + ch];
                            hermite_interpolate(p0, p1, p2, p3, frac)
                        } else {
                            audio_file.data[p1_idx + ch]
                        }
                    } else if src_channels == 1 && dst_channels > 1 {
                        // Mono to multi-channel - duplicate to all channels
                        if can_interpolate && frac > 0.0 {
                            let p0 = audio_file.data[p0_idx];
                            let p1 = audio_file.data[p1_idx];
                            let p2 = audio_file.data[p2_idx];
                            let p3 = audio_file.data[p3_idx];
                            hermite_interpolate(p0, p1, p2, p3, frac)
                        } else {
                            audio_file.data[p1_idx]
                        }
                    } else if src_channels > 1 && dst_channels == 1 {
                        // Multi-channel to mono - average all source channels
                        let mut sum = 0.0f32;
                        for src_ch in 0..src_channels {
                            let ch = src_ch as usize;
                            let s = if can_interpolate && frac > 0.0 {
                                let p0 = audio_file.data[p0_idx + ch];
                                let p1 = audio_file.data[p1_idx + ch];
                                let p2 = audio_file.data[p2_idx + ch];
                                let p3 = audio_file.data[p3_idx + ch];
                                hermite_interpolate(p0, p1, p2, p3, frac)
                            } else {
                                audio_file.data[p1_idx + ch]
                            };
                            sum += s;
                        }
                        sum / src_channels as f32
                    } else {
                        // Mismatched channels - use modulo for simple mapping
                        let src_ch = (dst_ch % src_channels) as usize;
                        if can_interpolate && frac > 0.0 {
                            let p0 = audio_file.data[p0_idx + src_ch];
                            let p1 = audio_file.data[p1_idx + src_ch];
                            let p2 = audio_file.data[p2_idx + src_ch];
                            let p3 = audio_file.data[p3_idx + src_ch];
                            hermite_interpolate(p0, p1, p2, p3, frac)
                        } else {
                            audio_file.data[p1_idx + src_ch]
                        }
                    };
                    // Mix into output with gain
                    let output_idx = frame_idx * dst_channels as usize + dst_ch as usize;
                    output[output_idx] += sample * gain;
                }
                rendered_frames += 1;
            }
-            rendered_frames * dst_channels as usize
+            // Interpolate each channel
-        } else {
+            for dst_ch in 0..dst_channels {
-            0
+                let sample = if src_channels == dst_channels {
                    // Direct channel mapping
                    let ch_offset = dst_ch;
                    // Extract channel samples for interpolation
                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
                        let idx = src_frame + i;
                        if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                            let sample_idx = (idx as usize) * src_channels + ch_offset;
                            channel_samples.push(audio_file.data[sample_idx]);
                        } else {
                            channel_samples.push(0.0);
                        }
                    }
                    windowed_sinc_interpolate(&channel_samples, frac)
                } else if src_channels == 1 && dst_channels > 1 {
                    // Mono to stereo - duplicate
                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
                        let idx = src_frame + i;
                        if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                            channel_samples.push(audio_file.data[idx as usize]);
                        } else {
                            channel_samples.push(0.0);
                        }
                    }
                    windowed_sinc_interpolate(&channel_samples, frac)
                } else if src_channels > 1 && dst_channels == 1 {
                    // Multi-channel to mono - average all source channels
                    let mut sum = 0.0;
                    for src_ch in 0..src_channels {
                        let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
                        for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
                            let idx = src_frame + i;
                            if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                                let sample_idx = (idx as usize) * src_channels + src_ch;
                                channel_samples.push(audio_file.data[sample_idx]);
                            } else {
                                channel_samples.push(0.0);
                            }
                        }
                        sum += windowed_sinc_interpolate(&channel_samples, frac);
                    }
                    sum / src_channels as f32
                } else {
                    // Mismatched channels - use modulo mapping
                    let src_ch = dst_ch % src_channels;
                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
                        let idx = src_frame + i;
                        if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                            let sample_idx = (idx as usize) * src_channels + src_ch;
                            channel_samples.push(audio_file.data[sample_idx]);
                        } else {
                            channel_samples.push(0.0);
                        }
                    }
                    windowed_sinc_interpolate(&channel_samples, frac)
                };
                // Mix into output with gain
                let output_idx = output_frame * dst_channels + dst_ch;
                output[output_idx] += sample * gain;
            }
            rendered_frames += 1;
        }
        rendered_frames * dst_channels
    }
 }