better time stretching

2025-10-18 23:28:20 -04:00 · 2025-10-18 23:28:20 -04:00 · d4fb8b721a
parent f9e2d36f3a
commit d4fb8b721a
1 changed files with 150 additions and 118 deletions
--- a/daw-backend/src/audio/pool.rs
+++ b/daw-backend/src/audio/pool.rs
@ -1,17 +1,53 @@
 use std::path::PathBuf;
+use std::f32::consts::PI;

-/// Cubic Hermite interpolation for smooth resampling
-/// p0, p1, p2, p3 are four consecutive samples
-/// x is the fractional position between p1 and p2 (0.0 to 1.0)
+/// Windowed sinc interpolation for high-quality time stretching
+/// This is stateless and can handle arbitrary fractional positions
 #[inline]
-fn hermite_interpolate(p0: f32, p1: f32, p2: f32, p3: f32, x: f32) -> f32 {
-    // Hermite basis functions for smooth interpolation
-    let c0 = p1;
-    let c1 = 0.5 * (p2 - p0);
-    let c2 = p0 - 2.5 * p1 + 2.0 * p2 - 0.5 * p3;
-    let c3 = 0.5 * (p3 - p0) + 1.5 * (p1 - p2);
+fn sinc(x: f32) -> f32 {
+    if x.abs() < 1e-5 {
+        1.0
+    } else {
+        let px = PI * x;
+        px.sin() / px
+    }
+}

-    ((c3 * x + c2) * x + c1) * x + c0
+/// Blackman window function
+#[inline]
+fn blackman_window(x: f32, width: f32) -> f32 {
+    if x.abs() > width {
+        0.0
+    } else {
+        let a0 = 0.42;
+        let a1 = 0.5;
+        let a2 = 0.08;
+        // Map x from [-width, width] to [0, 1] for proper Blackman window evaluation
+        let n = (x / width + 1.0) / 2.0;
+        a0 - a1 * (2.0 * PI * n).cos() + a2 * (4.0 * PI * n).cos()
+    }
+}
+
+/// High-quality windowed sinc interpolation
+/// Uses a 32-tap windowed sinc kernel for smooth, artifact-free interpolation
+/// frac: fractional position to interpolate at (0.0 to 1.0)
+/// samples: array of samples centered around the target position
+#[inline]
+fn windowed_sinc_interpolate(samples: &[f32], frac: f32) -> f32 {
+    let mut result = 0.0;
+    let kernel_size = samples.len();
+    let half_kernel = (kernel_size / 2) as f32;
+
+    for i in 0..kernel_size {
+        // Distance from interpolation point
+        // samples[half_kernel] is at position 0, we want to interpolate at position frac
+        let x = frac + half_kernel - (i as f32);
+        let sinc_val = sinc(x);
+        let window_val = blackman_window(x, half_kernel);
+        result += samples[i] * sinc_val * window_val;
+    }
+
+    result
 }

 /// Audio file stored in the pool
@ -73,7 +109,7 @@ impl AudioPool {
        self.files.len()
    }

-    /// Render audio from a file in the pool with sample rate and channel conversion
+    /// Render audio from a file in the pool with high-quality windowed sinc interpolation
    /// start_time_seconds: position in the audio file to start reading from (in seconds)
    /// Returns the number of samples actually rendered
    pub fn render_from_file(
@ -85,123 +121,119 @@ impl AudioPool {
        engine_sample_rate: u32,
        engine_channels: u32,
    ) -> usize {
-        if let Some(audio_file) = self.files.get(pool_index) {
-            // Calculate starting frame position in the source file (frame = one sample per channel)
-            let src_start_frame = start_time_seconds * audio_file.sample_rate as f64;
+        let Some(audio_file) = self.files.get(pool_index) else {
+            return 0;
+        };

-            // Calculate sample rate conversion ratio (frames)
-            let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
+        let src_channels = audio_file.channels as usize;
+        let dst_channels = engine_channels as usize;
+        let output_frames = output.len() / dst_channels;

-            let src_channels = audio_file.channels;
-            let dst_channels = engine_channels;
+        // Calculate starting position in source with fractional precision
+        let src_start_position = start_time_seconds * audio_file.sample_rate as f64;

-            // Render frame by frame
-            let output_frames = output.len() / dst_channels as usize;
-            let mut rendered_frames = 0;
+        // Sample rate conversion ratio
+        let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;

-            for frame_idx in 0..output_frames {
-                // Calculate the corresponding frame in the source file
-                let src_frame_pos = src_start_frame + (frame_idx as f64 * rate_ratio);
-                let src_frame_idx = src_frame_pos as usize;
+        // Kernel size for windowed sinc (32 taps = high quality, good performance)
+        const KERNEL_SIZE: usize = 32;
+        const HALF_KERNEL: usize = KERNEL_SIZE / 2;

-                // Check bounds
-                if src_frame_idx >= audio_file.frames as usize {
-                    break;
-                }
+        let mut rendered_frames = 0;

-                // Calculate source sample index (interleaved)
-                let src_sample_idx = src_frame_idx * src_channels as usize;
+        // Render frame by frame with windowed sinc interpolation
+        for output_frame in 0..output_frames {
+            // Calculate exact fractional position in source
+            let src_position = src_start_position + (output_frame as f64 * rate_ratio);
+            let src_frame = src_position.floor() as i32;
+            let frac = (src_position - src_frame as f64) as f32;

-                // Check bounds for interpolation
-                if src_sample_idx + src_channels as usize > audio_file.data.len() {
-                    break;
-                }
-
-                // Cubic Hermite interpolation for high-quality time stretching
-                let frac = (src_frame_pos - src_frame_idx as f64) as f32;
-
-                // We need 4 points for cubic interpolation: p0, p1, p2, p3
-                // where we interpolate between p1 and p2
-                let p1_frame = src_frame_idx;
-                let p0_frame = if p1_frame > 0 { p1_frame - 1 } else { p1_frame };
-                let p2_frame = p1_frame + 1;
-                let p3_frame = p1_frame + 2;
-
-                let p0_idx = p0_frame * src_channels as usize;
-                let p1_idx = p1_frame * src_channels as usize;
-                let p2_idx = p2_frame * src_channels as usize;
-                let p3_idx = p3_frame * src_channels as usize;
-
-                let can_interpolate = p3_idx + src_channels as usize <= audio_file.data.len();
-
-                // Read and convert channels
-                for dst_ch in 0..dst_channels {
-                    let sample = if src_channels == dst_channels {
-                        // Same number of channels - direct mapping
-                        let ch = dst_ch as usize;
-                        if can_interpolate && frac > 0.0 {
-                            let p0 = audio_file.data[p0_idx + ch];
-                            let p1 = audio_file.data[p1_idx + ch];
-                            let p2 = audio_file.data[p2_idx + ch];
-                            let p3 = audio_file.data[p3_idx + ch];
-                            hermite_interpolate(p0, p1, p2, p3, frac)
-                        } else {
-                            audio_file.data[p1_idx + ch]
-                        }
-                    } else if src_channels == 1 && dst_channels > 1 {
-                        // Mono to multi-channel - duplicate to all channels
-                        if can_interpolate && frac > 0.0 {
-                            let p0 = audio_file.data[p0_idx];
-                            let p1 = audio_file.data[p1_idx];
-                            let p2 = audio_file.data[p2_idx];
-                            let p3 = audio_file.data[p3_idx];
-                            hermite_interpolate(p0, p1, p2, p3, frac)
-                        } else {
-                            audio_file.data[p1_idx]
-                        }
-                    } else if src_channels > 1 && dst_channels == 1 {
-                        // Multi-channel to mono - average all source channels
-                        let mut sum = 0.0f32;
-                        for src_ch in 0..src_channels {
-                            let ch = src_ch as usize;
-                            let s = if can_interpolate && frac > 0.0 {
-                                let p0 = audio_file.data[p0_idx + ch];
-                                let p1 = audio_file.data[p1_idx + ch];
-                                let p2 = audio_file.data[p2_idx + ch];
-                                let p3 = audio_file.data[p3_idx + ch];
-                                hermite_interpolate(p0, p1, p2, p3, frac)
-                            } else {
-                                audio_file.data[p1_idx + ch]
-                            };
-                            sum += s;
-                        }
-                        sum / src_channels as f32
-                    } else {
-                        // Mismatched channels - use modulo for simple mapping
-                        let src_ch = (dst_ch % src_channels) as usize;
-                        if can_interpolate && frac > 0.0 {
-                            let p0 = audio_file.data[p0_idx + src_ch];
-                            let p1 = audio_file.data[p1_idx + src_ch];
-                            let p2 = audio_file.data[p2_idx + src_ch];
-                            let p3 = audio_file.data[p3_idx + src_ch];
-                            hermite_interpolate(p0, p1, p2, p3, frac)
-                        } else {
-                            audio_file.data[p1_idx + src_ch]
-                        }
-                    };
-
-                    // Mix into output with gain
-                    let output_idx = frame_idx * dst_channels as usize + dst_ch as usize;
-                    output[output_idx] += sample * gain;
-                }
-
-                rendered_frames += 1;
+            // Check if we've gone past the end of the audio file
+            if src_frame < 0 || src_frame as usize >= audio_file.frames as usize {
+                break;
            }

-            rendered_frames * dst_channels as usize
-        } else {
-            0
+            // Interpolate each channel
+            for dst_ch in 0..dst_channels {
+                let sample = if src_channels == dst_channels {
+                    // Direct channel mapping
+                    let ch_offset = dst_ch;
+
+                    // Extract channel samples for interpolation
+                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
+                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                        let idx = src_frame + i;
+                        if idx >= 0 && (idx as usize) < audio_file.frames as usize {
+                            let sample_idx = (idx as usize) * src_channels + ch_offset;
+                            channel_samples.push(audio_file.data[sample_idx]);
+                        } else {
+                            channel_samples.push(0.0);
+                        }
+                    }
+
+                    windowed_sinc_interpolate(&channel_samples, frac)
+
+                } else if src_channels == 1 && dst_channels > 1 {
+                    // Mono to stereo - duplicate
+                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
+                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                        let idx = src_frame + i;
+                        if idx >= 0 && (idx as usize) < audio_file.frames as usize {
+                            channel_samples.push(audio_file.data[idx as usize]);
+                        } else {
+                            channel_samples.push(0.0);
+                        }
+                    }
+
+                    windowed_sinc_interpolate(&channel_samples, frac)
+
+                } else if src_channels > 1 && dst_channels == 1 {
+                    // Multi-channel to mono - average all source channels
+                    let mut sum = 0.0;
+
+                    for src_ch in 0..src_channels {
+                        let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
+                        for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                            let idx = src_frame + i;
+                            if idx >= 0 && (idx as usize) < audio_file.frames as usize {
+                                let sample_idx = (idx as usize) * src_channels + src_ch;
+                                channel_samples.push(audio_file.data[sample_idx]);
+                            } else {
+                                channel_samples.push(0.0);
+                            }
+                        }
+                        sum += windowed_sinc_interpolate(&channel_samples, frac);
+                    }
+
+                    sum / src_channels as f32
+
+                } else {
+                    // Mismatched channels - use modulo mapping
+                    let src_ch = dst_ch % src_channels;
+
+                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
+                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                        let idx = src_frame + i;
+                        if idx >= 0 && (idx as usize) < audio_file.frames as usize {
+                            let sample_idx = (idx as usize) * src_channels + src_ch;
+                            channel_samples.push(audio_file.data[sample_idx]);
+                        } else {
+                            channel_samples.push(0.0);
+                        }
+                    }
+
+                    windowed_sinc_interpolate(&channel_samples, frac)
+                };
+
+                // Mix into output with gain
+                let output_idx = output_frame * dst_channels + dst_ch;
+                output[output_idx] += sample * gain;
+            }
+
+            rendered_frames += 1;
        }
+
+        rendered_frames * dst_channels
    }
 }