better time stretching
This commit is contained in:
parent
f9e2d36f3a
commit
d4fb8b721a
|
|
@ -1,17 +1,53 @@
|
|||
use std::path::PathBuf;
|
||||
use std::f32::consts::PI;
|
||||
|
||||
/// Cubic Hermite interpolation for smooth resampling
|
||||
/// p0, p1, p2, p3 are four consecutive samples
|
||||
/// x is the fractional position between p1 and p2 (0.0 to 1.0)
|
||||
/// Windowed sinc interpolation for high-quality time stretching
|
||||
/// This is stateless and can handle arbitrary fractional positions
|
||||
#[inline]
|
||||
fn hermite_interpolate(p0: f32, p1: f32, p2: f32, p3: f32, x: f32) -> f32 {
|
||||
// Hermite basis functions for smooth interpolation
|
||||
let c0 = p1;
|
||||
let c1 = 0.5 * (p2 - p0);
|
||||
let c2 = p0 - 2.5 * p1 + 2.0 * p2 - 0.5 * p3;
|
||||
let c3 = 0.5 * (p3 - p0) + 1.5 * (p1 - p2);
|
||||
fn sinc(x: f32) -> f32 {
|
||||
if x.abs() < 1e-5 {
|
||||
1.0
|
||||
} else {
|
||||
let px = PI * x;
|
||||
px.sin() / px
|
||||
}
|
||||
}
|
||||
|
||||
((c3 * x + c2) * x + c1) * x + c0
|
||||
/// Blackman window function
|
||||
#[inline]
|
||||
fn blackman_window(x: f32, width: f32) -> f32 {
|
||||
if x.abs() > width {
|
||||
0.0
|
||||
} else {
|
||||
let a0 = 0.42;
|
||||
let a1 = 0.5;
|
||||
let a2 = 0.08;
|
||||
// Map x from [-width, width] to [0, 1] for proper Blackman window evaluation
|
||||
let n = (x / width + 1.0) / 2.0;
|
||||
a0 - a1 * (2.0 * PI * n).cos() + a2 * (4.0 * PI * n).cos()
|
||||
}
|
||||
}
|
||||
|
||||
/// High-quality windowed sinc interpolation
|
||||
/// Uses a 32-tap windowed sinc kernel for smooth, artifact-free interpolation
|
||||
/// frac: fractional position to interpolate at (0.0 to 1.0)
|
||||
/// samples: array of samples centered around the target position
|
||||
#[inline]
|
||||
fn windowed_sinc_interpolate(samples: &[f32], frac: f32) -> f32 {
|
||||
let mut result = 0.0;
|
||||
let kernel_size = samples.len();
|
||||
let half_kernel = (kernel_size / 2) as f32;
|
||||
|
||||
for i in 0..kernel_size {
|
||||
// Distance from interpolation point
|
||||
// samples[half_kernel] is at position 0, we want to interpolate at position frac
|
||||
let x = frac + half_kernel - (i as f32);
|
||||
let sinc_val = sinc(x);
|
||||
let window_val = blackman_window(x, half_kernel);
|
||||
result += samples[i] * sinc_val * window_val;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Audio file stored in the pool
|
||||
|
|
@ -73,7 +109,7 @@ impl AudioPool {
|
|||
self.files.len()
|
||||
}
|
||||
|
||||
/// Render audio from a file in the pool with sample rate and channel conversion
|
||||
/// Render audio from a file in the pool with high-quality windowed sinc interpolation
|
||||
/// start_time_seconds: position in the audio file to start reading from (in seconds)
|
||||
/// Returns the number of samples actually rendered
|
||||
pub fn render_from_file(
|
||||
|
|
@ -85,123 +121,119 @@ impl AudioPool {
|
|||
engine_sample_rate: u32,
|
||||
engine_channels: u32,
|
||||
) -> usize {
|
||||
if let Some(audio_file) = self.files.get(pool_index) {
|
||||
// Calculate starting frame position in the source file (frame = one sample per channel)
|
||||
let src_start_frame = start_time_seconds * audio_file.sample_rate as f64;
|
||||
let Some(audio_file) = self.files.get(pool_index) else {
|
||||
return 0;
|
||||
};
|
||||
|
||||
// Calculate sample rate conversion ratio (frames)
|
||||
let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
|
||||
let src_channels = audio_file.channels as usize;
|
||||
let dst_channels = engine_channels as usize;
|
||||
let output_frames = output.len() / dst_channels;
|
||||
|
||||
let src_channels = audio_file.channels;
|
||||
let dst_channels = engine_channels;
|
||||
// Calculate starting position in source with fractional precision
|
||||
let src_start_position = start_time_seconds * audio_file.sample_rate as f64;
|
||||
|
||||
// Render frame by frame
|
||||
let output_frames = output.len() / dst_channels as usize;
|
||||
let mut rendered_frames = 0;
|
||||
// Sample rate conversion ratio
|
||||
let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
|
||||
|
||||
for frame_idx in 0..output_frames {
|
||||
// Calculate the corresponding frame in the source file
|
||||
let src_frame_pos = src_start_frame + (frame_idx as f64 * rate_ratio);
|
||||
let src_frame_idx = src_frame_pos as usize;
|
||||
// Kernel size for windowed sinc (32 taps = high quality, good performance)
|
||||
const KERNEL_SIZE: usize = 32;
|
||||
const HALF_KERNEL: usize = KERNEL_SIZE / 2;
|
||||
|
||||
// Check bounds
|
||||
if src_frame_idx >= audio_file.frames as usize {
|
||||
break;
|
||||
}
|
||||
let mut rendered_frames = 0;
|
||||
|
||||
// Calculate source sample index (interleaved)
|
||||
let src_sample_idx = src_frame_idx * src_channels as usize;
|
||||
// Render frame by frame with windowed sinc interpolation
|
||||
for output_frame in 0..output_frames {
|
||||
// Calculate exact fractional position in source
|
||||
let src_position = src_start_position + (output_frame as f64 * rate_ratio);
|
||||
let src_frame = src_position.floor() as i32;
|
||||
let frac = (src_position - src_frame as f64) as f32;
|
||||
|
||||
// Check bounds for interpolation
|
||||
if src_sample_idx + src_channels as usize > audio_file.data.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
// Cubic Hermite interpolation for high-quality time stretching
|
||||
let frac = (src_frame_pos - src_frame_idx as f64) as f32;
|
||||
|
||||
// We need 4 points for cubic interpolation: p0, p1, p2, p3
|
||||
// where we interpolate between p1 and p2
|
||||
let p1_frame = src_frame_idx;
|
||||
let p0_frame = if p1_frame > 0 { p1_frame - 1 } else { p1_frame };
|
||||
let p2_frame = p1_frame + 1;
|
||||
let p3_frame = p1_frame + 2;
|
||||
|
||||
let p0_idx = p0_frame * src_channels as usize;
|
||||
let p1_idx = p1_frame * src_channels as usize;
|
||||
let p2_idx = p2_frame * src_channels as usize;
|
||||
let p3_idx = p3_frame * src_channels as usize;
|
||||
|
||||
let can_interpolate = p3_idx + src_channels as usize <= audio_file.data.len();
|
||||
|
||||
// Read and convert channels
|
||||
for dst_ch in 0..dst_channels {
|
||||
let sample = if src_channels == dst_channels {
|
||||
// Same number of channels - direct mapping
|
||||
let ch = dst_ch as usize;
|
||||
if can_interpolate && frac > 0.0 {
|
||||
let p0 = audio_file.data[p0_idx + ch];
|
||||
let p1 = audio_file.data[p1_idx + ch];
|
||||
let p2 = audio_file.data[p2_idx + ch];
|
||||
let p3 = audio_file.data[p3_idx + ch];
|
||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
||||
} else {
|
||||
audio_file.data[p1_idx + ch]
|
||||
}
|
||||
} else if src_channels == 1 && dst_channels > 1 {
|
||||
// Mono to multi-channel - duplicate to all channels
|
||||
if can_interpolate && frac > 0.0 {
|
||||
let p0 = audio_file.data[p0_idx];
|
||||
let p1 = audio_file.data[p1_idx];
|
||||
let p2 = audio_file.data[p2_idx];
|
||||
let p3 = audio_file.data[p3_idx];
|
||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
||||
} else {
|
||||
audio_file.data[p1_idx]
|
||||
}
|
||||
} else if src_channels > 1 && dst_channels == 1 {
|
||||
// Multi-channel to mono - average all source channels
|
||||
let mut sum = 0.0f32;
|
||||
for src_ch in 0..src_channels {
|
||||
let ch = src_ch as usize;
|
||||
let s = if can_interpolate && frac > 0.0 {
|
||||
let p0 = audio_file.data[p0_idx + ch];
|
||||
let p1 = audio_file.data[p1_idx + ch];
|
||||
let p2 = audio_file.data[p2_idx + ch];
|
||||
let p3 = audio_file.data[p3_idx + ch];
|
||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
||||
} else {
|
||||
audio_file.data[p1_idx + ch]
|
||||
};
|
||||
sum += s;
|
||||
}
|
||||
sum / src_channels as f32
|
||||
} else {
|
||||
// Mismatched channels - use modulo for simple mapping
|
||||
let src_ch = (dst_ch % src_channels) as usize;
|
||||
if can_interpolate && frac > 0.0 {
|
||||
let p0 = audio_file.data[p0_idx + src_ch];
|
||||
let p1 = audio_file.data[p1_idx + src_ch];
|
||||
let p2 = audio_file.data[p2_idx + src_ch];
|
||||
let p3 = audio_file.data[p3_idx + src_ch];
|
||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
||||
} else {
|
||||
audio_file.data[p1_idx + src_ch]
|
||||
}
|
||||
};
|
||||
|
||||
// Mix into output with gain
|
||||
let output_idx = frame_idx * dst_channels as usize + dst_ch as usize;
|
||||
output[output_idx] += sample * gain;
|
||||
}
|
||||
|
||||
rendered_frames += 1;
|
||||
// Check if we've gone past the end of the audio file
|
||||
if src_frame < 0 || src_frame as usize >= audio_file.frames as usize {
|
||||
break;
|
||||
}
|
||||
|
||||
rendered_frames * dst_channels as usize
|
||||
} else {
|
||||
0
|
||||
// Interpolate each channel
|
||||
for dst_ch in 0..dst_channels {
|
||||
let sample = if src_channels == dst_channels {
|
||||
// Direct channel mapping
|
||||
let ch_offset = dst_ch;
|
||||
|
||||
// Extract channel samples for interpolation
|
||||
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||
let idx = src_frame + i;
|
||||
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||
let sample_idx = (idx as usize) * src_channels + ch_offset;
|
||||
channel_samples.push(audio_file.data[sample_idx]);
|
||||
} else {
|
||||
channel_samples.push(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
windowed_sinc_interpolate(&channel_samples, frac)
|
||||
|
||||
} else if src_channels == 1 && dst_channels > 1 {
|
||||
// Mono to stereo - duplicate
|
||||
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||
let idx = src_frame + i;
|
||||
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||
channel_samples.push(audio_file.data[idx as usize]);
|
||||
} else {
|
||||
channel_samples.push(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
windowed_sinc_interpolate(&channel_samples, frac)
|
||||
|
||||
} else if src_channels > 1 && dst_channels == 1 {
|
||||
// Multi-channel to mono - average all source channels
|
||||
let mut sum = 0.0;
|
||||
|
||||
for src_ch in 0..src_channels {
|
||||
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||
let idx = src_frame + i;
|
||||
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||
let sample_idx = (idx as usize) * src_channels + src_ch;
|
||||
channel_samples.push(audio_file.data[sample_idx]);
|
||||
} else {
|
||||
channel_samples.push(0.0);
|
||||
}
|
||||
}
|
||||
sum += windowed_sinc_interpolate(&channel_samples, frac);
|
||||
}
|
||||
|
||||
sum / src_channels as f32
|
||||
|
||||
} else {
|
||||
// Mismatched channels - use modulo mapping
|
||||
let src_ch = dst_ch % src_channels;
|
||||
|
||||
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||
let idx = src_frame + i;
|
||||
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||
let sample_idx = (idx as usize) * src_channels + src_ch;
|
||||
channel_samples.push(audio_file.data[sample_idx]);
|
||||
} else {
|
||||
channel_samples.push(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
windowed_sinc_interpolate(&channel_samples, frac)
|
||||
};
|
||||
|
||||
// Mix into output with gain
|
||||
let output_idx = output_frame * dst_channels + dst_ch;
|
||||
output[output_idx] += sample * gain;
|
||||
}
|
||||
|
||||
rendered_frames += 1;
|
||||
}
|
||||
|
||||
rendered_frames * dst_channels
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue