From 8e38c0c5a11f2006aca7c0af58d13cfa6543ad66 Mon Sep 17 00:00:00 2001
From: Skyler Lehmkuhl <skycooler@gmail.com>
Date: Wed, 11 Feb 2026 16:15:16 -0500
Subject: [PATCH] Fix audio overruns

---
 daw-backend/src/audio/engine.rs           | 97 ++++++++++++++++++++---
 daw-backend/src/audio/node_graph/graph.rs | 62 ++++++++++-----
 daw-backend/src/audio/pool.rs             | 34 +++-----
 daw-backend/src/audio/project.rs          | 15 ++--
 daw-backend/src/audio/track.rs            | 17 +++-
 daw-backend/src/lib.rs                    | 41 ++++++----
 6 files changed, 189 insertions(+), 77 deletions(-)
diff --git a/daw-backend/src/audio/engine.rs b/daw-backend/src/audio/engine.rs
index cbe9e32..0ad8b64 100644
--- a/daw-backend/src/audio/engine.rs
+++ b/daw-backend/src/audio/engine.rs
@@ -63,6 +63,18 @@ pub struct Engine {
 
     // Metronome for click track
     metronome: Metronome,
+
+    // Pre-allocated buffer for recording input samples (avoids allocation per callback)
+    recording_sample_buffer: Vec<f32>,
+
+    // Callback timing diagnostics (enabled by DAW_AUDIO_DEBUG=1)
+    debug_audio: bool,
+    callback_count: u64,
+    timing_worst_total_us: u64,
+    timing_worst_commands_us: u64,
+    timing_worst_render_us: u64,
+    timing_sum_total_us: u64,
+    timing_overrun_count: u64,
 }
 
 impl Engine {
@@ -110,6 +122,14 @@ impl Engine {
             midi_recording_state: None,
             midi_input_manager: None,
             metronome: Metronome::new(sample_rate),
+            recording_sample_buffer: Vec::with_capacity(4096),
+            debug_audio: std::env::var("DAW_AUDIO_DEBUG").map_or(false, |v| v == "1"),
+            callback_count: 0,
+            timing_worst_total_us: 0,
+            timing_worst_commands_us: 0,
+            timing_worst_render_us: 0,
+            timing_sum_total_us: 0,
+            timing_overrun_count: 0,
         }
     }
 
@@ -209,6 +229,8 @@ impl Engine {
 
     /// Process audio callback - called from the audio thread
     pub fn process(&mut self, output: &mut [f32]) {
+        let t_start = if self.debug_audio { Some(std::time::Instant::now()) } else { None };
+
         // Process all pending commands
         while let Ok(cmd) = self.command_rx.pop() {
             self.handle_command(cmd);
@@ -236,12 +258,16 @@ impl Engine {
 
         // Forward chunk generation events from background threads
         while let Ok(event) = self.chunk_generation_rx.try_recv() {
-            if let AudioEvent::WaveformChunksReady { pool_index, detail_level, ref chunks } = event {
-                println!("📬 [AUDIO THREAD] Received {} chunks for pool {} level {}, forwarding to UI", chunks.len(), pool_index, detail_level);
+            if self.debug_audio {
+                if let AudioEvent::WaveformChunksReady { pool_index, detail_level, ref chunks } = event {
+                    eprintln!("[AUDIO THREAD] Received {} chunks for pool {} level {}, forwarding to UI", chunks.len(), pool_index, detail_level);
+                }
             }
             let _ = self.event_tx.push(event);
         }
 
+        let t_commands = if self.debug_audio { Some(std::time::Instant::now()) } else { None };
+
         if self.playing {
             // Ensure mix buffer is sized correctly
             if self.mix_buffer.len() != output.len() {
@@ -323,15 +349,24 @@ impl Engine {
         // Process recording if active (independent of playback state)
         if let Some(recording) = &mut self.recording_state {
             if let Some(input_rx) = &mut self.input_rx {
-                // Pull samples from input ringbuffer
-                let mut samples = Vec::new();
+                // Phase 1: Discard stale samples by popping without storing
+                // (fast — no Vec push, no add_samples overhead)
+                while recording.samples_to_skip > 0 {
+                    match input_rx.pop() {
+                        Ok(_) => recording.samples_to_skip -= 1,
+                        Err(_) => break,
+                    }
+                }
+
+                // Phase 2: Pull fresh samples for actual recording
+                self.recording_sample_buffer.clear();
                 while let Ok(sample) = input_rx.pop() {
-                    samples.push(sample);
+                    self.recording_sample_buffer.push(sample);
                 }
 
                 // Add samples to recording
-                if !samples.is_empty() {
-                    match recording.add_samples(&samples) {
+                if !self.recording_sample_buffer.is_empty() {
+                    match recording.add_samples(&self.recording_sample_buffer) {
                         Ok(_flushed) => {
                             // Update clip duration every callback for sample-accurate timing
                             let duration = recording.duration();
@@ -348,7 +383,7 @@ impl Engine {
                             }
 
                             // Send progress event periodically (every ~0.1 seconds)
-                            self.recording_progress_counter += samples.len();
+                            self.recording_progress_counter += self.recording_sample_buffer.len();
                             if self.recording_progress_counter >= (self.sample_rate as usize / 10) {
                                 let _ = self.event_tx.push(AudioEvent::RecordingProgress(clip_id, duration));
                                 self.recording_progress_counter = 0;
@@ -366,6 +401,42 @@ impl Engine {
                 }
             }
         }
+
+        // Timing diagnostics (DAW_AUDIO_DEBUG=1)
+        if let (true, Some(t_start), Some(t_commands)) = (self.debug_audio, t_start, t_commands) {
+            let t_end = std::time::Instant::now();
+            let total_us = t_end.duration_since(t_start).as_micros() as u64;
+            let commands_us = t_commands.duration_since(t_start).as_micros() as u64;
+            let render_us = total_us.saturating_sub(commands_us);
+
+            self.callback_count += 1;
+            self.timing_sum_total_us += total_us;
+            if total_us > self.timing_worst_total_us { self.timing_worst_total_us = total_us; }
+            if commands_us > self.timing_worst_commands_us { self.timing_worst_commands_us = commands_us; }
+            if render_us > self.timing_worst_render_us { self.timing_worst_render_us = render_us; }
+
+            let frames = output.len() as u64 / self.channels as u64;
+            let deadline_us = frames * 1_000_000 / self.sample_rate as u64;
+
+            if total_us > deadline_us {
+                self.timing_overrun_count += 1;
+                eprintln!(
+                    "[AUDIO TIMING] OVERRUN #{}: total={} us (deadline={} us) | cmds={} us, render={} us | buf={} frames",
+                    self.timing_overrun_count, total_us, deadline_us, commands_us, render_us, frames
+                );
+            }
+
+            if self.callback_count % 860 == 0 {
+                let avg_us = self.timing_sum_total_us / self.callback_count;
+                eprintln!(
+                    "[AUDIO TIMING] avg={} us, worst: total={} us, cmds={} us, render={} us | overruns={}/{} ({:.1}%) | deadline={} us",
+                    avg_us, self.timing_worst_total_us, self.timing_worst_commands_us, self.timing_worst_render_us,
+                    self.timing_overrun_count, self.callback_count,
+                    self.timing_overrun_count as f64 / self.callback_count as f64 * 100.0,
+                    deadline_us
+                );
+            }
+        }
     }
 
     /// Handle a command from the UI thread
@@ -2023,9 +2094,9 @@ impl Engine {
                         flush_interval_seconds,
                     );
 
-                    // Check how many samples are currently in the input buffer and mark them for skipping
+                    // Count stale samples so we can skip them incrementally
                     let samples_in_buffer = if let Some(input_rx) = &self.input_rx {
-                        input_rx.slots()  // Number of samples currently in the buffer
+                        input_rx.slots()
                     } else {
                         0
                     };
@@ -2033,11 +2104,11 @@ impl Engine {
                     self.recording_state = Some(recording_state);
                     self.recording_progress_counter = 0; // Reset progress counter
 
-                    // Set the number of samples to skip on the recording state
+                    // Set samples to skip (drained incrementally across callbacks)
                     if let Some(recording) = &mut self.recording_state {
                         recording.samples_to_skip = samples_in_buffer;
-                        if samples_in_buffer > 0 {
-                            eprintln!("Will skip {} stale samples from input buffer", samples_in_buffer);
+                        if self.debug_audio && samples_in_buffer > 0 {
+                            eprintln!("[AUDIO DEBUG] Will skip {} stale samples from input buffer", samples_in_buffer);
                         }
                     }
 
diff --git a/daw-backend/src/audio/node_graph/graph.rs b/daw-backend/src/audio/node_graph/graph.rs
index 9f2f0f6..bd24e5c 100644
--- a/daw-backend/src/audio/node_graph/graph.rs
+++ b/daw-backend/src/audio/node_graph/graph.rs
@@ -95,6 +95,9 @@ pub struct AudioGraph {
 
     /// Current playback time (for automation nodes)
     playback_time: f64,
+
+    /// Cached topological sort order (invalidated on graph mutation)
+    topo_cache: Option<Vec<NodeIndex>>,
 }
 
 impl AudioGraph {
@@ -113,12 +116,14 @@ impl AudioGraph {
             midi_input_buffers: (0..16).map(|_| Vec::with_capacity(128)).collect(),
             node_positions: std::collections::HashMap::new(),
             playback_time: 0.0,
+            topo_cache: None,
         }
     }
 
     /// Add a node to the graph
     pub fn add_node(&mut self, node: Box<dyn AudioNode>) -> NodeIndex {
         let graph_node = GraphNode::new(node, self.buffer_size);
+        self.topo_cache = None;
         self.graph.add_node(graph_node)
     }
 
@@ -158,6 +163,7 @@ impl AudioGraph {
 
         // Add the edge
         self.graph.add_edge(from, to, Connection { from_port, to_port });
+        self.topo_cache = None;
 
         Ok(())
     }
@@ -175,6 +181,7 @@ impl AudioGraph {
             let conn = &self.graph[edge_idx];
             if conn.from_port == from_port && conn.to_port == to_port {
                 self.graph.remove_edge(edge_idx);
+                self.topo_cache = None;
             }
         }
     }
@@ -182,6 +189,7 @@ impl AudioGraph {
     /// Remove a node from the graph
     pub fn remove_node(&mut self, node: NodeIndex) {
         self.graph.remove_node(node);
+        self.topo_cache = None;
 
         // Update MIDI targets
         self.midi_targets.retain(|&idx| idx != node);
@@ -372,15 +380,21 @@ impl AudioGraph {
             }
         }
 
-        // Topological sort for processing order
-        let topo = petgraph::algo::toposort(&self.graph, None)
-            .unwrap_or_else(|_| {
-                // If there's a cycle (shouldn't happen due to validation), just process in index order
-                self.graph.node_indices().collect()
-            });
+        // Topological sort for processing order (cached, recomputed only on graph mutation)
+        if self.topo_cache.is_none() {
+            self.topo_cache = Some(
+                petgraph::algo::toposort(&self.graph, None)
+                    .unwrap_or_else(|_| {
+                        // If there's a cycle (shouldn't happen due to validation), just process in index order
+                        self.graph.node_indices().collect()
+                    })
+            );
+        }
+        let topo_len = self.topo_cache.as_ref().unwrap().len();
 
         // Process nodes in topological order
-        for node_idx in topo {
+        for topo_i in 0..topo_len {
+            let node_idx = self.topo_cache.as_ref().unwrap()[topo_i];
             // Get input port information
             let inputs = self.graph[node_idx].node.inputs();
             let num_audio_cv_inputs = inputs.iter().filter(|p| p.signal_type != SignalType::Midi).count();
@@ -409,25 +423,33 @@ impl AudioGraph {
                 }
             }
 
-            // Collect inputs from connected nodes
-            let incoming = self.graph.edges_directed(node_idx, Direction::Incoming).collect::<Vec<_>>();
+            // Collect edge info into stack array to avoid heap allocation
+            // (need to collect because we borrow graph immutably for source node data)
+            const MAX_EDGES: usize = 32;
+            let mut edge_info: [(NodeIndex, usize, usize); MAX_EDGES] = [(NodeIndex::new(0), 0, 0); MAX_EDGES];
+            let mut edge_count = 0;
+            for edge in self.graph.edges_directed(node_idx, Direction::Incoming) {
+                if edge_count < MAX_EDGES {
+                    edge_info[edge_count] = (edge.source(), edge.weight().from_port, edge.weight().to_port);
+                    edge_count += 1;
+                }
+            }
 
-            for edge in incoming {
-                let source_idx = edge.source();
-                let conn = edge.weight();
+            for ei in 0..edge_count {
+                let (source_idx, from_port, to_port) = edge_info[ei];
                 let source_node = &self.graph[source_idx];
 
                 // Determine source port type
-                if conn.from_port < source_node.node.outputs().len() {
-                    let source_port_type = source_node.node.outputs()[conn.from_port].signal_type;
+                if from_port < source_node.node.outputs().len() {
+                    let source_port_type = source_node.node.outputs()[from_port].signal_type;
 
                     match source_port_type {
                         SignalType::Audio | SignalType::CV => {
                             // Copy audio/CV data
-                            if conn.to_port < num_audio_cv_inputs && conn.from_port < source_node.output_buffers.len() {
-                                let source_buffer = &source_node.output_buffers[conn.from_port];
-                                if conn.to_port < self.input_buffers.len() {
-                                    for (dst, src) in self.input_buffers[conn.to_port].iter_mut().zip(source_buffer.iter()) {
+                            if to_port < num_audio_cv_inputs && from_port < source_node.output_buffers.len() {
+                                let source_buffer = &source_node.output_buffers[from_port];
+                                if to_port < self.input_buffers.len() {
+                                    for (dst, src) in self.input_buffers[to_port].iter_mut().zip(source_buffer.iter()) {
                                         // If dst is NaN (unconnected), replace it; otherwise add (for mixing)
                                         if dst.is_nan() {
                                             *dst = *src;
@@ -442,12 +464,12 @@ impl AudioGraph {
                             // Copy MIDI events
                             // Map from global port index to MIDI-only port index
                             let midi_port_idx = inputs.iter()
-                                .take(conn.to_port + 1)
+                                .take(to_port + 1)
                                 .filter(|p| p.signal_type == SignalType::Midi)
                                 .count() - 1;
 
                             let source_midi_idx = source_node.node.outputs().iter()
-                                .take(conn.from_port + 1)
+                                .take(from_port + 1)
                                 .filter(|p| p.signal_type == SignalType::Midi)
                                 .count() - 1;
 
diff --git a/daw-backend/src/audio/pool.rs b/daw-backend/src/audio/pool.rs
index f0ad688..2aff9da 100644
--- a/daw-backend/src/audio/pool.rs
+++ b/daw-backend/src/audio/pool.rs
@@ -296,15 +296,13 @@ impl AudioClipPool {
                     // Direct channel mapping
                     let ch_offset = dst_ch;
 
-                    // Extract channel samples for interpolation
-                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
-                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                    // Extract channel samples for interpolation (stack-allocated)
+                    let mut channel_samples = [0.0f32; KERNEL_SIZE];
+                    for (j, i) in (-(HALF_KERNEL as i32)..(HALF_KERNEL as i32)).enumerate() {
                         let idx = src_frame + i;
                         if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                             let sample_idx = (idx as usize) * src_channels + ch_offset;
-                            channel_samples.push(audio_file.data[sample_idx]);
-                        } else {
-                            channel_samples.push(0.0);
+                            channel_samples[j] = audio_file.data[sample_idx];
                         }
                     }
 
@@ -312,13 +310,11 @@ impl AudioClipPool {
 
                 } else if src_channels == 1 && dst_channels > 1 {
                     // Mono to stereo - duplicate
-                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
-                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                    let mut channel_samples = [0.0f32; KERNEL_SIZE];
+                    for (j, i) in (-(HALF_KERNEL as i32)..(HALF_KERNEL as i32)).enumerate() {
                         let idx = src_frame + i;
                         if idx >= 0 && (idx as usize) < audio_file.frames as usize {
-                            channel_samples.push(audio_file.data[idx as usize]);
-                        } else {
-                            channel_samples.push(0.0);
+                            channel_samples[j] = audio_file.data[idx as usize];
                         }
                     }
 
@@ -329,14 +325,12 @@ impl AudioClipPool {
                     let mut sum = 0.0;
 
                     for src_ch in 0..src_channels {
-                        let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
-                        for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                        let mut channel_samples = [0.0f32; KERNEL_SIZE];
+                        for (j, i) in (-(HALF_KERNEL as i32)..(HALF_KERNEL as i32)).enumerate() {
                             let idx = src_frame + i;
                             if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                                 let sample_idx = (idx as usize) * src_channels + src_ch;
-                                channel_samples.push(audio_file.data[sample_idx]);
-                            } else {
-                                channel_samples.push(0.0);
+                                channel_samples[j] = audio_file.data[sample_idx];
                             }
                         }
                         sum += windowed_sinc_interpolate(&channel_samples, frac);
@@ -348,14 +342,12 @@ impl AudioClipPool {
                     // Mismatched channels - use modulo mapping
                     let src_ch = dst_ch % src_channels;
 
-                    let mut channel_samples = Vec::with_capacity(KERNEL_SIZE);
-                    for i in -(HALF_KERNEL as i32)..(HALF_KERNEL as i32) {
+                    let mut channel_samples = [0.0f32; KERNEL_SIZE];
+                    for (j, i) in (-(HALF_KERNEL as i32)..(HALF_KERNEL as i32)).enumerate() {
                         let idx = src_frame + i;
                         if idx >= 0 && (idx as usize) < audio_file.frames as usize {
                             let sample_idx = (idx as usize) * src_channels + src_ch;
-                            channel_samples.push(audio_file.data[sample_idx]);
-                        } else {
-                            channel_samples.push(0.0);
+                            channel_samples[j] = audio_file.data[sample_idx];
                         }
                     }
 
diff --git a/daw-backend/src/audio/project.rs b/daw-backend/src/audio/project.rs
index 1b2174f..4b21c08 100644
--- a/daw-backend/src/audio/project.rs
+++ b/daw-backend/src/audio/project.rs
@@ -367,8 +367,9 @@ impl Project {
             output.len(),
         );
 
-        // Render each root track
-        for &track_id in &self.root_tracks.clone() {
+        // Render each root track (index-based to avoid clone)
+        for i in 0..self.root_tracks.len() {
+            let track_id = self.root_tracks[i];
             self.render_track(
                 track_id,
                 output,
@@ -439,8 +440,8 @@ impl Project {
                 track.render(output, midi_pool, ctx.playhead_seconds, ctx.sample_rate, ctx.channels);
             }
             Some(TrackNode::Group(group)) => {
-                // Get children IDs, check if this group is soloed, and transform context
-                let children: Vec<TrackId> = group.children.clone();
+                // Read group properties and transform context (index-based child iteration to avoid clone)
+                let num_children = group.children.len();
                 let this_group_is_soloed = group.solo;
                 let child_ctx = group.transform_context(ctx);
 
@@ -452,7 +453,11 @@ impl Project {
                 // Recursively render all children into the group buffer
                 // If this group is soloed (or parent was soloed), children inherit that state
                 let children_parent_soloed = parent_is_soloed || this_group_is_soloed;
-                for &child_id in &children {
+                for i in 0..num_children {
+                    let child_id = match self.tracks.get(&track_id) {
+                        Some(TrackNode::Group(g)) => g.children[i],
+                        _ => break,
+                    };
                     self.render_track(
                         child_id,
                         &mut group_buffer,
diff --git a/daw-backend/src/audio/track.rs b/daw-backend/src/audio/track.rs
index 501ad80..1ab2278 100644
--- a/daw-backend/src/audio/track.rs
+++ b/daw-backend/src/audio/track.rs
@@ -578,6 +578,10 @@ pub struct AudioTrack {
     /// Runtime effects processing graph (rebuilt from preset on load)
     #[serde(skip, default = "default_audio_graph")]
     pub effects_graph: AudioGraph,
+
+    /// Pre-allocated buffer for clip rendering (avoids heap allocation per callback)
+    #[serde(skip, default)]
+    clip_render_buffer: Vec<f32>,
 }
 
 impl Clone for AudioTrack {
@@ -593,6 +597,7 @@ impl Clone for AudioTrack {
             next_automation_id: self.next_automation_id,
             effects_graph_preset: self.effects_graph_preset.clone(),
             effects_graph: default_audio_graph(), // Create fresh graph, not cloned
+            clip_render_buffer: Vec::new(),
         }
     }
 }
@@ -635,6 +640,7 @@ impl AudioTrack {
             next_automation_id: 0,
             effects_graph_preset: None,
             effects_graph,
+            clip_render_buffer: Vec::new(),
         }
     }
 
@@ -755,11 +761,13 @@ impl AudioTrack {
         let buffer_duration_seconds = output.len() as f64 / (sample_rate as f64 * channels as f64);
         let buffer_end_seconds = playhead_seconds + buffer_duration_seconds;
 
-        // Create a temporary buffer for clip rendering
-        let mut clip_buffer = vec![0.0f32; output.len()];
+        // Split borrow: take clip_render_buffer out to avoid borrow conflict with &self methods
+        let mut clip_buffer = std::mem::take(&mut self.clip_render_buffer);
+        clip_buffer.resize(output.len(), 0.0);
+        clip_buffer.fill(0.0);
         let mut rendered = 0;
 
-        // Render all active clip instances into the temporary buffer
+        // Render all active clip instances into the buffer
         for clip in &self.clips {
             // Check if clip overlaps with current buffer time range
             if clip.external_start < buffer_end_seconds && clip.external_end() > playhead_seconds {
@@ -787,6 +795,9 @@ impl AudioTrack {
             }
         }
 
+        // Put the buffer back for reuse next callback
+        self.clip_render_buffer = clip_buffer;
+
         // Process through the effects graph (this will write to output buffer)
         self.effects_graph.process(output, &[], playhead_seconds);
 
diff --git a/daw-backend/src/lib.rs b/daw-backend/src/lib.rs
index 08ba403..008a35f 100644
--- a/daw-backend/src/lib.rs
+++ b/daw-backend/src/lib.rs
@@ -48,6 +48,13 @@ impl AudioSystem {
     /// * `event_emitter` - Optional event emitter for pushing events to external systems
     /// * `buffer_size` - Audio buffer size in frames (128, 256, 512, 1024, etc.)
     ///                   Smaller = lower latency but higher CPU usage. Default: 256
+    ///
+    /// # Environment Variables
+    /// * `DAW_AUDIO_DEBUG=1` - Enable audio callback timing diagnostics. Logs:
+    ///   - Device and config info at startup
+    ///   - First 10 callback buffer sizes (to detect ALSA buffer variance)
+    ///   - Per-overrun timing breakdown (command vs render time)
+    ///   - Periodic (~5s) timing summaries (avg/worst/overrun rate)
     pub fn new(
         event_emitter: Option<std::sync::Arc<dyn EventEmitter>>,
         buffer_size: u32,
@@ -62,6 +69,12 @@ impl AudioSystem {
         let default_output_config = output_device.default_output_config().map_err(|e| e.to_string())?;
         let sample_rate = default_output_config.sample_rate().0;
         let channels = default_output_config.channels() as u32;
+        let debug_audio = std::env::var("DAW_AUDIO_DEBUG").map_or(false, |v| v == "1");
+        if debug_audio {
+            eprintln!("[AUDIO DEBUG] Device: {:?}", output_device.name());
+            eprintln!("[AUDIO DEBUG] Default config: {:?}", default_output_config);
+            eprintln!("[AUDIO DEBUG] Default buffer size: {:?}", default_output_config.buffer_size());
+        }
 
         // Create queues
         let (command_tx, command_rx) = rtrb::RingBuffer::new(512); // Larger buffer for MIDI + UI commands
@@ -102,29 +115,27 @@ impl AudioSystem {
 
         let mut output_buffer = vec![0.0f32; 16384];
 
-        // Log audio configuration
-        println!("Audio Output Configuration:");
-        println!("  Sample Rate: {} Hz", output_config.sample_rate.0);
-        println!("  Channels: {}", output_config.channels);
-        println!("  Buffer Size: {:?}", output_config.buffer_size);
-
-        // Calculate expected latency
-        if let cpal::BufferSize::Fixed(size) = output_config.buffer_size {
-            let latency_ms = (size as f64 / output_config.sample_rate.0 as f64) * 1000.0;
-            println!("  Expected Latency: {:.2} ms", latency_ms);
+        if debug_audio {
+            eprintln!("[AUDIO DEBUG] Output config: sr={} Hz, ch={}, buf={:?}",
+                output_config.sample_rate.0, output_config.channels, output_config.buffer_size);
+            if let cpal::BufferSize::Fixed(size) = output_config.buffer_size {
+                let latency_ms = (size as f64 / output_config.sample_rate.0 as f64) * 1000.0;
+                eprintln!("[AUDIO DEBUG] Expected latency: {:.2} ms", latency_ms);
+            }
         }
 
-        let mut first_callback = true;
+        let mut callback_log_count: u32 = 0;
+        let cb_debug = debug_audio;
         let output_stream = output_device
             .build_output_stream(
                 &output_config,
                 move |data: &mut [f32], _: &cpal::OutputCallbackInfo| {
-                    if first_callback {
+                    if cb_debug && callback_log_count < 10 {
                         let frames = data.len() / output_config.channels as usize;
                         let latency_ms = (frames as f64 / output_config.sample_rate.0 as f64) * 1000.0;
-                        println!("Audio callback buffer size: {} samples ({} frames, {:.2} ms latency)",
-                                 data.len(), frames, latency_ms);
-                        first_callback = false;
+                        eprintln!("[AUDIO CB #{}] {} samples ({} frames, {:.2} ms)",
+                                 callback_log_count, data.len(), frames, latency_ms);
+                        callback_log_count += 1;
                     }
                     let buf = &mut output_buffer[..data.len()];
                     buf.fill(0.0);