better time stretching
This commit is contained in:
parent
f9e2d36f3a
commit
d4fb8b721a
|
|
@ -1,17 +1,53 @@
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::f32::consts::PI;
|
||||||
|
|
||||||
/// Cubic Hermite interpolation for smooth resampling
|
/// Windowed sinc interpolation for high-quality time stretching
|
||||||
/// p0, p1, p2, p3 are four consecutive samples
|
/// This is stateless and can handle arbitrary fractional positions
|
||||||
/// x is the fractional position between p1 and p2 (0.0 to 1.0)
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn hermite_interpolate(p0: f32, p1: f32, p2: f32, p3: f32, x: f32) -> f32 {
|
fn sinc(x: f32) -> f32 {
|
||||||
// Hermite basis functions for smooth interpolation
|
if x.abs() < 1e-5 {
|
||||||
let c0 = p1;
|
1.0
|
||||||
let c1 = 0.5 * (p2 - p0);
|
} else {
|
||||||
let c2 = p0 - 2.5 * p1 + 2.0 * p2 - 0.5 * p3;
|
let px = PI * x;
|
||||||
let c3 = 0.5 * (p3 - p0) + 1.5 * (p1 - p2);
|
px.sin() / px
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
((c3 * x + c2) * x + c1) * x + c0
|
/// Blackman window function
|
||||||
|
#[inline]
|
||||||
|
fn blackman_window(x: f32, width: f32) -> f32 {
|
||||||
|
if x.abs() > width {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
let a0 = 0.42;
|
||||||
|
let a1 = 0.5;
|
||||||
|
let a2 = 0.08;
|
||||||
|
// Map x from [-width, width] to [0, 1] for proper Blackman window evaluation
|
||||||
|
let n = (x / width + 1.0) / 2.0;
|
||||||
|
a0 - a1 * (2.0 * PI * n).cos() + a2 * (4.0 * PI * n).cos()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// High-quality windowed sinc interpolation
|
||||||
|
/// Uses a 32-tap windowed sinc kernel for smooth, artifact-free interpolation
|
||||||
|
/// frac: fractional position to interpolate at (0.0 to 1.0)
|
||||||
|
/// samples: array of samples centered around the target position
|
||||||
|
#[inline]
|
||||||
|
fn windowed_sinc_interpolate(samples: &[f32], frac: f32) -> f32 {
|
||||||
|
let mut result = 0.0;
|
||||||
|
let kernel_size = samples.len();
|
||||||
|
let half_kernel = (kernel_size / 2) as f32;
|
||||||
|
|
||||||
|
for i in 0..kernel_size {
|
||||||
|
// Distance from interpolation point
|
||||||
|
// samples[half_kernel] is at position 0, we want to interpolate at position frac
|
||||||
|
let x = frac + half_kernel - (i as f32);
|
||||||
|
let sinc_val = sinc(x);
|
||||||
|
let window_val = blackman_window(x, half_kernel);
|
||||||
|
result += samples[i] * sinc_val * window_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Audio file stored in the pool
|
/// Audio file stored in the pool
|
||||||
|
|
@ -73,7 +109,7 @@ impl AudioPool {
|
||||||
self.files.len()
|
self.files.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Render audio from a file in the pool with sample rate and channel conversion
|
/// Render audio from a file in the pool with high-quality windowed sinc interpolation
|
||||||
/// start_time_seconds: position in the audio file to start reading from (in seconds)
|
/// start_time_seconds: position in the audio file to start reading from (in seconds)
|
||||||
/// Returns the number of samples actually rendered
|
/// Returns the number of samples actually rendered
|
||||||
pub fn render_from_file(
|
pub fn render_from_file(
|
||||||
|
|
@ -85,123 +121,119 @@ impl AudioPool {
|
||||||
engine_sample_rate: u32,
|
engine_sample_rate: u32,
|
||||||
engine_channels: u32,
|
engine_channels: u32,
|
||||||
) -> usize {
|
) -> usize {
|
||||||
if let Some(audio_file) = self.files.get(pool_index) {
|
let Some(audio_file) = self.files.get(pool_index) else {
|
||||||
// Calculate starting frame position in the source file (frame = one sample per channel)
|
return 0;
|
||||||
let src_start_frame = start_time_seconds * audio_file.sample_rate as f64;
|
};
|
||||||
|
|
||||||
// Calculate sample rate conversion ratio (frames)
|
let src_channels = audio_file.channels as usize;
|
||||||
let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
|
let dst_channels = engine_channels as usize;
|
||||||
|
let output_frames = output.len() / dst_channels;
|
||||||
|
|
||||||
let src_channels = audio_file.channels;
|
// Calculate starting position in source with fractional precision
|
||||||
let dst_channels = engine_channels;
|
let src_start_position = start_time_seconds * audio_file.sample_rate as f64;
|
||||||
|
|
||||||
// Render frame by frame
|
// Sample rate conversion ratio
|
||||||
let output_frames = output.len() / dst_channels as usize;
|
let rate_ratio = audio_file.sample_rate as f64 / engine_sample_rate as f64;
|
||||||
let mut rendered_frames = 0;
|
|
||||||
|
|
||||||
for frame_idx in 0..output_frames {
|
// Kernel size for windowed sinc (32 taps = high quality, good performance)
|
||||||
// Calculate the corresponding frame in the source file
|
const KERNEL_SIZE: usize = 32;
|
||||||
let src_frame_pos = src_start_frame + (frame_idx as f64 * rate_ratio);
|
const HALF_KERNEL: usize = KERNEL_SIZE / 2;
|
||||||
let src_frame_idx = src_frame_pos as usize;
|
|
||||||
|
|
||||||
// Check bounds
|
let mut rendered_frames = 0;
|
||||||
if src_frame_idx >= audio_file.frames as usize {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate source sample index (interleaved)
|
// Render frame by frame with windowed sinc interpolation
|
||||||
let src_sample_idx = src_frame_idx * src_channels as usize;
|
for output_frame in 0..output_frames {
|
||||||
|
// Calculate exact fractional position in source
|
||||||
|
let src_position = src_start_position + (output_frame as f64 * rate_ratio);
|
||||||
|
let src_frame = src_position.floor() as i32;
|
||||||
|
let frac = (src_position - src_frame as f64) as f32;
|
||||||
|
|
||||||
// Check bounds for interpolation
|
// Check if we've gone past the end of the audio file
|
||||||
if src_sample_idx + src_channels as usize > audio_file.data.len() {
|
if src_frame < 0 || src_frame as usize >= audio_file.frames as usize {
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
// Cubic Hermite interpolation for high-quality time stretching
|
|
||||||
let frac = (src_frame_pos - src_frame_idx as f64) as f32;
|
|
||||||
|
|
||||||
// We need 4 points for cubic interpolation: p0, p1, p2, p3
|
|
||||||
// where we interpolate between p1 and p2
|
|
||||||
let p1_frame = src_frame_idx;
|
|
||||||
let p0_frame = if p1_frame > 0 { p1_frame - 1 } else { p1_frame };
|
|
||||||
let p2_frame = p1_frame + 1;
|
|
||||||
let p3_frame = p1_frame + 2;
|
|
||||||
|
|
||||||
let p0_idx = p0_frame * src_channels as usize;
|
|
||||||
let p1_idx = p1_frame * src_channels as usize;
|
|
||||||
let p2_idx = p2_frame * src_channels as usize;
|
|
||||||
let p3_idx = p3_frame * src_channels as usize;
|
|
||||||
|
|
||||||
let can_interpolate = p3_idx + src_channels as usize <= audio_file.data.len();
|
|
||||||
|
|
||||||
// Read and convert channels
|
|
||||||
for dst_ch in 0..dst_channels {
|
|
||||||
let sample = if src_channels == dst_channels {
|
|
||||||
// Same number of channels - direct mapping
|
|
||||||
let ch = dst_ch as usize;
|
|
||||||
if can_interpolate && frac > 0.0 {
|
|
||||||
let p0 = audio_file.data[p0_idx + ch];
|
|
||||||
let p1 = audio_file.data[p1_idx + ch];
|
|
||||||
let p2 = audio_file.data[p2_idx + ch];
|
|
||||||
let p3 = audio_file.data[p3_idx + ch];
|
|
||||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
|
||||||
} else {
|
|
||||||
audio_file.data[p1_idx + ch]
|
|
||||||
}
|
|
||||||
} else if src_channels == 1 && dst_channels > 1 {
|
|
||||||
// Mono to multi-channel - duplicate to all channels
|
|
||||||
if can_interpolate && frac > 0.0 {
|
|
||||||
let p0 = audio_file.data[p0_idx];
|
|
||||||
let p1 = audio_file.data[p1_idx];
|
|
||||||
let p2 = audio_file.data[p2_idx];
|
|
||||||
let p3 = audio_file.data[p3_idx];
|
|
||||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
|
||||||
} else {
|
|
||||||
audio_file.data[p1_idx]
|
|
||||||
}
|
|
||||||
} else if src_channels > 1 && dst_channels == 1 {
|
|
||||||
// Multi-channel to mono - average all source channels
|
|
||||||
let mut sum = 0.0f32;
|
|
||||||
for src_ch in 0..src_channels {
|
|
||||||
let ch = src_ch as usize;
|
|
||||||
let s = if can_interpolate && frac > 0.0 {
|
|
||||||
let p0 = audio_file.data[p0_idx + ch];
|
|
||||||
let p1 = audio_file.data[p1_idx + ch];
|
|
||||||
let p2 = audio_file.data[p2_idx + ch];
|
|
||||||
let p3 = audio_file.data[p3_idx + ch];
|
|
||||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
|
||||||
} else {
|
|
||||||
audio_file.data[p1_idx + ch]
|
|
||||||
};
|
|
||||||
sum += s;
|
|
||||||
}
|
|
||||||
sum / src_channels as f32
|
|
||||||
} else {
|
|
||||||
// Mismatched channels - use modulo for simple mapping
|
|
||||||
let src_ch = (dst_ch % src_channels) as usize;
|
|
||||||
if can_interpolate && frac > 0.0 {
|
|
||||||
let p0 = audio_file.data[p0_idx + src_ch];
|
|
||||||
let p1 = audio_file.data[p1_idx + src_ch];
|
|
||||||
let p2 = audio_file.data[p2_idx + src_ch];
|
|
||||||
let p3 = audio_file.data[p3_idx + src_ch];
|
|
||||||
hermite_interpolate(p0, p1, p2, p3, frac)
|
|
||||||
} else {
|
|
||||||
audio_file.data[p1_idx + src_ch]
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mix into output with gain
|
|
||||||
let output_idx = frame_idx * dst_channels as usize + dst_ch as usize;
|
|
||||||
output[output_idx] += sample * gain;
|
|
||||||
}
|
|
||||||
|
|
||||||
rendered_frames += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rendered_frames * dst_channels as usize
|
// Interpolate each channel
|
||||||
} else {
|
for dst_ch in 0..dst_channels {
|
||||||
0
|
let sample = if src_channels == dst_channels {
|
||||||
|
// Direct channel mapping
|
||||||
|
let ch_offset = dst_ch;
|
||||||
|
|
||||||
|
// Extract channel samples for interpolation
|
||||||
|
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||||
|
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||||
|
let idx = src_frame + i;
|
||||||
|
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||||
|
let sample_idx = (idx as usize) * src_channels + ch_offset;
|
||||||
|
channel_samples.push(audio_file.data[sample_idx]);
|
||||||
|
} else {
|
||||||
|
channel_samples.push(0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
windowed_sinc_interpolate(&channel_samples, frac)
|
||||||
|
|
||||||
|
} else if src_channels == 1 && dst_channels > 1 {
|
||||||
|
// Mono to stereo - duplicate
|
||||||
|
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||||
|
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||||
|
let idx = src_frame + i;
|
||||||
|
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||||
|
channel_samples.push(audio_file.data[idx as usize]);
|
||||||
|
} else {
|
||||||
|
channel_samples.push(0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
windowed_sinc_interpolate(&channel_samples, frac)
|
||||||
|
|
||||||
|
} else if src_channels > 1 && dst_channels == 1 {
|
||||||
|
// Multi-channel to mono - average all source channels
|
||||||
|
let mut sum = 0.0;
|
||||||
|
|
||||||
|
for src_ch in 0..src_channels {
|
||||||
|
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||||
|
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||||
|
let idx = src_frame + i;
|
||||||
|
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||||
|
let sample_idx = (idx as usize) * src_channels + src_ch;
|
||||||
|
channel_samples.push(audio_file.data[sample_idx]);
|
||||||
|
} else {
|
||||||
|
channel_samples.push(0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sum += windowed_sinc_interpolate(&channel_samples, frac);
|
||||||
|
}
|
||||||
|
|
||||||
|
sum / src_channels as f32
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Mismatched channels - use modulo mapping
|
||||||
|
let src_ch = dst_ch % src_channels;
|
||||||
|
|
||||||
|
let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
|
||||||
|
for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
|
||||||
|
let idx = src_frame + i;
|
||||||
|
if idx >= 0 && (idx as usize) < audio_file.frames as usize {
|
||||||
|
let sample_idx = (idx as usize) * src_channels + src_ch;
|
||||||
|
channel_samples.push(audio_file.data[sample_idx]);
|
||||||
|
} else {
|
||||||
|
channel_samples.push(0.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
windowed_sinc_interpolate(&channel_samples, frac)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Mix into output with gain
|
||||||
|
let output_idx = output_frame * dst_channels + dst_ch;
|
||||||
|
output[output_idx] += sample * gain;
|
||||||
|
}
|
||||||
|
|
||||||
|
rendered_frames += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rendered_frames * dst_channels
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue