better time stretching

This commit is contained in:
Skyler Lehmkuhl 2025-10-18 23:28:20 -04:00
parent f9e2d36f3a
commit d4fb8b721a
1 changed files with 150 additions and 118 deletions

View File

@ -1,17 +1,53 @@
use std::path::PathBuf;
use std::f32::consts::PI;
/// Cubic Hermite interpolation for smooth resampling
/// p0, p1, p2, p3 are four consecutive samples
/// x is the fractional position between p1 and p2 (0.0 to 1.0)
/// Windowed sinc interpolation for high-quality time stretching
/// This is stateless and can handle arbitrary fractional positions
#[inline]
fn hermite_interpolate(p0: f32, p1: f32, p2: f32, p3: f32, x: f32) -> f32 {
// Hermite basis functions for smooth interpolation
let c0 = p1;
let c1 = 0.5 * (p2 - p0);
let c2 = p0 - 2.5 * p1 + 2.0 * p2 - 0.5 * p3;
let c3 = 0.5 * (p3 - p0) + 1.5 * (p1 - p2);
fn sinc(x: f32) -> f32 {
if x.abs() < 1e-5 {
1.0
} else {
let px = PI * x;
px.sin() / px
}
}
((c3 * x + c2) * x + c1) * x + c0
/// Blackman window function
#[inline]
fn blackman_window(x: f32, width: f32) -> f32 {
if x.abs() > width {
0.0
} else {
let a0 = 0.42;
let a1 = 0.5;
let a2 = 0.08;
// Map x from [-width, width] to [0, 1] for proper Blackman window evaluation
let n = (x / width + 1.0) / 2.0;
a0 - a1 * (2.0 * PI * n).cos() + a2 * (4.0 * PI * n).cos()
}
}
/// High-quality windowed sinc interpolation
/// Uses a 32-tap windowed sinc kernel for smooth, artifact-free interpolation
/// frac: fractional position to interpolate at (0.0 to 1.0)
/// samples: array of samples centered around the target position
#[inline]
fn windowed_sinc_interpolate(samples: &[f32], frac: f32) -> f32 {
let mut result = 0.0;
let kernel_size = samples.len();
let half_kernel = (kernel_size / 2) as f32;
for i in 0..kernel_size {
// Distance from interpolation point
// samples[half_kernel] is at position 0, we want to interpolate at position frac
let x = frac + half_kernel - (i as f32);
let sinc_val = sinc(x);
let window_val = blackman_window(x, half_kernel);
result += samples[i] * sinc_val * window_val;
}
result
}
/// Audio file stored in the pool
@ -73,7 +109,7 @@ impl AudioPool {
self.files.len()
}
/// Render audio from a file in the pool with sample rate and channel conversion
/// Render audio from a file in the pool with high-quality windowed sinc interpolation
/// start_time_seconds: position in the audio file to start reading from (in seconds)
/// Returns the number of samples actually rendered
pub fn render_from_file(
@ -85,123 +121,119 @@ impl AudioPool {
engine_sample_rate: u32,
engine_channels: u32,
) -> usize {
if let Some(audio_file) = self.files.get(pool_index) {
// Calculate starting frame position in the source file (frame = one sample per channel)
let src_start_frame = start_time_seconds * audio_file.sample_rate as f64;
let Some(audio_file) = self.files.get(pool_index) else {
return 0;
};
// Calculate sample rate conversion ratio (frames)
let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
let src_channels = audio_file.channels as usize;
let dst_channels = engine_channels as usize;
let output_frames = output.len() / dst_channels;
let src_channels = audio_file.channels;
let dst_channels = engine_channels;
// Calculate starting position in source with fractional precision
let src_start_position = start_time_seconds * audio_file.sample_rate as f64;
// Render frame by frame
let output_frames = output.len() / dst_channels as usize;
let mut rendered_frames = 0;
// Sample rate conversion ratio
let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
for frame_idx in 0..output_frames {
// Calculate the corresponding frame in the source file
let src_frame_pos = src_start_frame + (frame_idx as f64 * rate_ratio);
let src_frame_idx = src_frame_pos as usize;
// Kernel size for windowed sinc (32 taps = high quality, good performance)
const KERNEL_SIZE: usize = 32;
const HALF_KERNEL: usize = KERNEL_SIZE / 2;
// Check bounds
if src_frame_idx >= audio_file.frames as usize {
break;
}
let mut rendered_frames = 0;
// Calculate source sample index (interleaved)
let src_sample_idx = src_frame_idx * src_channels as usize;
// Render frame by frame with windowed sinc interpolation
for output_frame in 0..output_frames {
// Calculate exact fractional position in source
let src_position = src_start_position + (output_frame as f64 * rate_ratio);
let src_frame = src_position.floor() as i32;
let frac = (src_position - src_frame as f64) as f32;
// Check bounds for interpolation
if src_sample_idx + src_channels as usize > audio_file.data.len() {
break;
}
// Cubic Hermite interpolation for high-quality time stretching
let frac = (src_frame_pos - src_frame_idx as f64) as f32;
// We need 4 points for cubic interpolation: p0, p1, p2, p3
// where we interpolate between p1 and p2
let p1_frame = src_frame_idx;
let p0_frame = if p1_frame > 0 { p1_frame - 1 } else { p1_frame };
let p2_frame = p1_frame + 1;
let p3_frame = p1_frame + 2;
let p0_idx = p0_frame * src_channels as usize;
let p1_idx = p1_frame * src_channels as usize;
let p2_idx = p2_frame * src_channels as usize;
let p3_idx = p3_frame * src_channels as usize;
let can_interpolate = p3_idx + src_channels as usize <= audio_file.data.len();
// Read and convert channels
for dst_ch in 0..dst_channels {
let sample = if src_channels == dst_channels {
// Same number of channels - direct mapping
let ch = dst_ch as usize;
if can_interpolate && frac > 0.0 {
let p0 = audio_file.data[p0_idx + ch];
let p1 = audio_file.data[p1_idx + ch];
let p2 = audio_file.data[p2_idx + ch];
let p3 = audio_file.data[p3_idx + ch];
hermite_interpolate(p0, p1, p2, p3, frac)
} else {
audio_file.data[p1_idx + ch]
}
} else if src_channels == 1 && dst_channels > 1 {
// Mono to multi-channel - duplicate to all channels
if can_interpolate && frac > 0.0 {
let p0 = audio_file.data[p0_idx];
let p1 = audio_file.data[p1_idx];
let p2 = audio_file.data[p2_idx];
let p3 = audio_file.data[p3_idx];
hermite_interpolate(p0, p1, p2, p3, frac)
} else {
audio_file.data[p1_idx]
}
} else if src_channels > 1 && dst_channels == 1 {
// Multi-channel to mono - average all source channels
let mut sum = 0.0f32;
for src_ch in 0..src_channels {
let ch = src_ch as usize;
let s = if can_interpolate && frac > 0.0 {
let p0 = audio_file.data[p0_idx + ch];
let p1 = audio_file.data[p1_idx + ch];
let p2 = audio_file.data[p2_idx + ch];
let p3 = audio_file.data[p3_idx + ch];
hermite_interpolate(p0, p1, p2, p3, frac)
} else {
audio_file.data[p1_idx + ch]
};
sum += s;
}
sum / src_channels as f32
} else {
// Mismatched channels - use modulo for simple mapping
let src_ch = (dst_ch % src_channels) as usize;
if can_interpolate && frac > 0.0 {
let p0 = audio_file.data[p0_idx + src_ch];
let p1 = audio_file.data[p1_idx + src_ch];
let p2 = audio_file.data[p2_idx + src_ch];
let p3 = audio_file.data[p3_idx + src_ch];
hermite_interpolate(p0, p1, p2, p3, frac)
} else {
audio_file.data[p1_idx + src_ch]
}
};
// Mix into output with gain
let output_idx = frame_idx * dst_channels as usize + dst_ch as usize;
output[output_idx] += sample * gain;
}
rendered_frames += 1;
// Check if we've gone past the end of the audio file
if src_frame < 0 || src_frame as usize >= audio_file.frames as usize {
break;
}
rendered_frames * dst_channels as usize
} else {
0
// Interpolate each channel
for dst_ch in 0..dst_channels {
let sample = if src_channels == dst_channels {
// Direct channel mapping
let ch_offset = dst_ch;
// Extract channel samples for interpolation
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
let idx = src_frame + i;
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
let sample_idx = (idx as usize) * src_channels + ch_offset;
channel_samples.push(audio_file.data[sample_idx]);
} else {
channel_samples.push(0.0);
}
}
windowed_sinc_interpolate(&channel_samples, frac)
} else if src_channels == 1 && dst_channels > 1 {
// Mono to stereo - duplicate
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
let idx = src_frame + i;
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
channel_samples.push(audio_file.data[idx as usize]);
} else {
channel_samples.push(0.0);
}
}
windowed_sinc_interpolate(&channel_samples, frac)
} else if src_channels > 1 && dst_channels == 1 {
// Multi-channel to mono - average all source channels
let mut sum = 0.0;
for src_ch in 0..src_channels {
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
let idx = src_frame + i;
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
let sample_idx = (idx as usize) * src_channels + src_ch;
channel_samples.push(audio_file.data[sample_idx]);
} else {
channel_samples.push(0.0);
}
}
sum += windowed_sinc_interpolate(&channel_samples, frac);
}
sum / src_channels as f32
} else {
// Mismatched channels - use modulo mapping
let src_ch = dst_ch % src_channels;
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
let idx = src_frame + i;
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
let sample_idx = (idx as usize) * src_channels + src_ch;
channel_samples.push(audio_file.data[sample_idx]);
} else {
channel_samples.push(0.0);
}
}
windowed_sinc_interpolate(&channel_samples, frac)
};
// Mix into output with gain
let output_idx = output_frame * dst_channels + dst_ch;
output[output_idx] += sample * gain;
}
rendered_frames += 1;
}
rendered_frames * dst_channels
}
}