From 408343094a0f00988592ad349378d8ebac0a786f Mon Sep 17 00:00:00 2001
From: Skyler Lehmkuhl <skycooler@gmail.com>
Date: Sat, 14 Feb 2026 23:58:20 -0500
Subject: [PATCH] Stream audio to spectrograph shader too

---
 daw-backend/src/audio/engine.rs               |  42 +++----
 daw-backend/src/command/types.rs              |  12 +-
 daw-backend/src/io/audio_file.rs              | 116 ++++++++++++++++++
 lightningbeam-ui/Cargo.toml                   |   4 -
 .../lightningbeam-editor/src/cqt_gpu.rs       |  76 ++++++++----
 .../lightningbeam-editor/src/main.rs          |  65 +++++++---
 .../src/panes/piano_roll.rs                   |   9 +-
 .../src/panes/shaders/cqt_compute.wgsl        |   4 +-
 .../src/panes/shaders/cqt_render.wgsl         |   7 +-
 .../src/panes/shaders/waveform.wgsl           |   5 +-
 .../src/panes/timeline.rs                     |   2 +-
 .../lightningbeam-editor/src/waveform_gpu.rs  | 116 +++++++++++++++---
 12 files changed, 366 insertions(+), 92 deletions(-)

diff --git a/daw-backend/src/audio/engine.rs b/daw-backend/src/audio/engine.rs
index 94f1b13..0bcd3f2 100644
--- a/daw-backend/src/audio/engine.rs
+++ b/daw-backend/src/audio/engine.rs
@@ -272,24 +272,25 @@ impl Engine {
         // Forward chunk generation events from background threads
         while let Ok(event) = self.chunk_generation_rx.try_recv() {
             match event {
-                AudioEvent::WaveformDecodeComplete { pool_index, samples } => {
-                    // Update pool entry with decoded waveform samples
+                AudioEvent::WaveformDecodeComplete { pool_index, samples, decoded_frames: df, total_frames: _tf } => {
+                    // Update pool entry and forward samples directly to UI
                     if let Some(file) = self.audio_pool.get_file_mut(pool_index) {
-                        let total = file.frames;
+                        let sr = file.sample_rate;
+                        let ch = file.channels;
                         if let crate::audio::pool::AudioStorage::Compressed {
                             ref mut decoded_for_waveform,
                             ref mut decoded_frames,
                             ..
                         } = file.storage {
-                            eprintln!("[ENGINE] Waveform decode complete for pool {}: {} samples", pool_index, samples.len());
-                            *decoded_for_waveform = samples;
-                            *decoded_frames = total;
+                            *decoded_for_waveform = samples.clone();
+                            *decoded_frames = df;
                         }
-                        // Notify frontend that waveform data is ready
+                        // Send samples inline — UI won't need to query back
                         let _ = self.event_tx.push(AudioEvent::AudioDecodeProgress {
                             pool_index,
-                            decoded_frames: total,
-                            total_frames: total,
+                            samples,
+                            sample_rate: sr,
+                            channels: ch,
                         });
                     }
                 }
@@ -1789,26 +1790,25 @@ impl Engine {
                         });
                     }
 
-                    // Spawn background thread to decode full file for waveform display
+                    // Spawn background thread to decode file progressively for waveform display
                     let bg_tx = self.chunk_generation_tx.clone();
                     let bg_path = path.to_path_buf();
+                    let bg_total_frames = total_frames;
                     let _ = std::thread::Builder::new()
                         .name(format!("waveform-decode-{}", idx))
                         .spawn(move || {
-                            eprintln!("[WAVEFORM DECODE] Starting full decode of {:?}", bg_path);
-                            match crate::io::AudioFile::load(&bg_path) {
-                                Ok(loaded) => {
-                                    eprintln!("[WAVEFORM DECODE] Complete: {} frames, {} channels",
-                                        loaded.frames, loaded.channels);
+                            crate::io::AudioFile::decode_progressive(
+                                &bg_path,
+                                bg_total_frames,
+                                |audio_data, decoded_frames, total| {
                                     let _ = bg_tx.send(AudioEvent::WaveformDecodeComplete {
                                         pool_index: idx,
-                                        samples: loaded.data,
+                                        samples: audio_data.to_vec(),
+                                        decoded_frames,
+                                        total_frames: total,
                                     });
-                                }
-                                Err(e) => {
-                                    eprintln!("[WAVEFORM DECODE] Failed to decode {:?}: {}", bg_path, e);
-                                }
-                            }
+                                },
+                            );
                         });
                     idx
                 }
diff --git a/daw-backend/src/command/types.rs b/daw-backend/src/command/types.rs
index 38e080e..d776679 100644
--- a/daw-backend/src/command/types.rs
+++ b/daw-backend/src/command/types.rs
@@ -274,18 +274,22 @@ pub enum AudioEvent {
     },
 
     /// Progressive decode progress for a compressed audio file's waveform data.
-    /// The UI can use this to update waveform display incrementally.
+    /// Carries the samples inline so the UI doesn't need to query back.
     AudioDecodeProgress {
         pool_index: usize,
-        decoded_frames: u64,
-        total_frames: u64,
+        samples: Vec<f32>,
+        sample_rate: u32,
+        channels: u32,
     },
 
-    /// Background waveform decode completed for a compressed audio file.
+    /// Background waveform decode progress/completion for a compressed audio file.
     /// Internal event — consumed by the engine to update the pool, not forwarded to UI.
+    /// `decoded_frames` < `total_frames` means partial; equal means complete.
     WaveformDecodeComplete {
         pool_index: usize,
         samples: Vec<f32>,
+        decoded_frames: u64,
+        total_frames: u64,
     },
 }
 
diff --git a/daw-backend/src/io/audio_file.rs b/daw-backend/src/io/audio_file.rs
index 6a2d841..2aadfec 100644
--- a/daw-backend/src/io/audio_file.rs
+++ b/daw-backend/src/io/audio_file.rs
@@ -338,6 +338,122 @@ impl AudioFile {
         })
     }
 
+    /// Decode a compressed audio file progressively, calling `on_progress` with
+    /// partial data snapshots so the UI can display waveforms as they decode.
+    /// Sends updates roughly every 2 seconds of decoded audio.
+    pub fn decode_progressive<P: AsRef<Path>, F>(path: P, total_frames: u64, on_progress: F)
+    where
+        F: Fn(&[f32], u64, u64),
+    {
+        let path = path.as_ref();
+
+        let file = match std::fs::File::open(path) {
+            Ok(f) => f,
+            Err(e) => {
+                eprintln!("[WAVEFORM DECODE] Failed to open {:?}: {}", path, e);
+                return;
+            }
+        };
+
+        let mss = MediaSourceStream::new(Box::new(file), Default::default());
+
+        let mut hint = Hint::new();
+        if let Some(extension) = path.extension() {
+            if let Some(ext_str) = extension.to_str() {
+                hint.with_extension(ext_str);
+            }
+        }
+
+        let probed = match symphonia::default::get_probe()
+            .format(&hint, mss, &FormatOptions::default(), &MetadataOptions::default())
+        {
+            Ok(p) => p,
+            Err(e) => {
+                eprintln!("[WAVEFORM DECODE] Failed to probe {:?}: {}", path, e);
+                return;
+            }
+        };
+
+        let mut format = probed.format;
+
+        let track = match format.tracks().iter()
+            .find(|t| t.codec_params.codec != symphonia::core::codecs::CODEC_TYPE_NULL)
+        {
+            Some(t) => t,
+            None => {
+                eprintln!("[WAVEFORM DECODE] No audio tracks in {:?}", path);
+                return;
+            }
+        };
+
+        let track_id = track.id;
+        let channels = track.codec_params.channels
+            .map(|c| c.count() as u32)
+            .unwrap_or(2);
+        let sample_rate = track.codec_params.sample_rate.unwrap_or(44100);
+
+        let mut decoder = match symphonia::default::get_codecs()
+            .make(&track.codec_params, &DecoderOptions::default())
+        {
+            Ok(d) => d,
+            Err(e) => {
+                eprintln!("[WAVEFORM DECODE] Failed to create decoder for {:?}: {}", path, e);
+                return;
+            }
+        };
+
+        let mut audio_data = Vec::new();
+        let mut sample_buf = None;
+        // Send a progress update roughly every 2 seconds of audio
+        // Send first update quickly (0.25s), then every 2s of audio
+        let initial_interval = (sample_rate as usize * channels as usize) / 4;
+        let steady_interval = (sample_rate as usize * channels as usize) * 2;
+        let mut sent_first = false;
+        let mut last_update_len = 0usize;
+
+        loop {
+            let packet = match format.next_packet() {
+                Ok(packet) => packet,
+                Err(Error::IoError(e)) if e.kind() == std::io::ErrorKind::UnexpectedEof => break,
+                Err(Error::ResetRequired) => break,
+                Err(_) => break,
+            };
+
+            if packet.track_id() != track_id {
+                continue;
+            }
+
+            match decoder.decode(&packet) {
+                Ok(decoded) => {
+                    if sample_buf.is_none() {
+                        let spec = *decoded.spec();
+                        let duration = decoded.capacity() as u64;
+                        sample_buf = Some(SampleBuffer::<f32>::new(duration, spec));
+                    }
+                    if let Some(ref mut buf) = sample_buf {
+                        buf.copy_interleaved_ref(decoded);
+                        audio_data.extend_from_slice(buf.samples());
+                    }
+
+                    // Send progressive update (fast initial, then periodic)
+                    let interval = if sent_first { steady_interval } else { initial_interval };
+                    if audio_data.len() - last_update_len >= interval {
+                        let decoded_frames = audio_data.len() as u64 / channels as u64;
+                        on_progress(&audio_data, decoded_frames, total_frames);
+                        last_update_len = audio_data.len();
+                        sent_first = true;
+                    }
+                }
+                Err(Error::DecodeError(_)) => continue,
+                Err(_) => break,
+            }
+        }
+
+        // Final update with all data
+        let decoded_frames = audio_data.len() as u64 / channels as u64;
+        on_progress(&audio_data, decoded_frames, decoded_frames.max(total_frames));
+    }
+
     /// Calculate the duration of the audio file in seconds
     pub fn duration(&self) -> f64 {
         self.frames as f64 / self.sample_rate as f64
diff --git a/lightningbeam-ui/Cargo.toml b/lightningbeam-ui/Cargo.toml
index 75d2aa7..d01ee5a 100644
--- a/lightningbeam-ui/Cargo.toml
+++ b/lightningbeam-ui/Cargo.toml
@@ -59,8 +59,6 @@ opt-level = 2
 opt-level = 2
 [profile.dev.package.symphonia-bundle-flac]
 opt-level = 2
-[profile.dev.package.symphonia-format-wav]
-opt-level = 2
 [profile.dev.package.symphonia-format-ogg]
 opt-level = 2
 [profile.dev.package.symphonia-codec-vorbis]
@@ -71,5 +69,3 @@ opt-level = 2
 opt-level = 2
 [profile.dev.package.cpal]
 opt-level = 2
-[profile.dev.package.rubato]
-opt-level = 2
diff --git a/lightningbeam-ui/lightningbeam-editor/src/cqt_gpu.rs b/lightningbeam-ui/lightningbeam-editor/src/cqt_gpu.rs
index 427558a..1409290 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/cqt_gpu.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/cqt_gpu.rs
@@ -21,7 +21,7 @@ const BINS_PER_OCTAVE: u32 = 24;
 const FREQ_BINS: u32 = 174; // ceil(log2(4186.0 / 27.5) * 24) = ceil(173.95)
 const HOP_SIZE: u32 = 512;
 const CACHE_CAPACITY: u32 = 4096;
-const MAX_COLS_PER_FRAME: u32 = 256;
+const MAX_COLS_PER_FRAME: u32 = 128;
 const F_MIN: f64 = 27.5; // A0 = MIDI 21
 const WAVEFORM_TEX_WIDTH: u32 = 2048;
 
@@ -49,7 +49,7 @@ struct CqtComputeParams {
     tex_width: u32,
     total_frames: u32,
     sample_rate: f32,
-    _pad0: u32,
+    column_stride: u32,
     _pad1: u32,
     _pad2: u32,
 }
@@ -78,7 +78,8 @@ pub struct CqtRenderParams {
     pub cache_start_column: f32,      // 4 @ 76
     pub cache_valid_start: f32,       // 4 @ 80
     pub cache_valid_end: f32,         // 4 @ 84
-    pub _pad: [f32; 2],              // 8 @ 88, total 96
+    pub column_stride: f32,          // 4 @ 88
+    pub _pad: f32,                   // 4 @ 92, total 96
 }
 
 /// Per-pool-index cache entry with ring buffer and GPU resources.
@@ -111,6 +112,7 @@ struct CqtCacheEntry {
 
     // Metadata
     sample_rate: u32,
+    current_stride: u32,
 }
 
 /// Global GPU resources for CQT (stored in egui_wgpu::CallbackResources).
@@ -132,6 +134,8 @@ pub struct CqtCallback {
     /// Visible column range (global CQT column indices)
     pub visible_col_start: i64,
     pub visible_col_end: i64,
+    /// Column stride: 1 = full resolution, N = compute every Nth column
+    pub stride: u32,
 }
 
 /// Precompute CQT bin parameters for a given sample rate.
@@ -341,7 +345,14 @@ impl CqtGpuResources {
         total_frames: u64,
         sample_rate: u32,
     ) {
-        if self.entries.contains_key(&pool_index) {
+        // If entry exists, check if waveform data has grown (progressive decode)
+        if let Some(entry) = self.entries.get_mut(&pool_index) {
+            if entry.waveform_total_frames != total_frames {
+                // Waveform texture updated in-place with more data.
+                // The texture view is still valid (no destroy/recreate),
+                // so just update total_frames to allow computing new columns.
+                entry.waveform_total_frames = total_frames;
+            }
             return;
         }
 
@@ -458,6 +469,7 @@ impl CqtGpuResources {
                 render_bind_group,
                 render_uniform_buffer,
                 sample_rate,
+                current_stride: 1,
             },
         );
     }
@@ -473,18 +485,20 @@ fn dispatch_cqt_compute(
     entry: &CqtCacheEntry,
     start_col: i64,
     end_col: i64,
+    stride: u32,
 ) -> Vec<wgpu::CommandBuffer> {
-    let num_cols = (end_col - start_col) as u32;
-    if num_cols == 0 {
+    // Number of cache slots needed (each slot covers `stride` global columns)
+    let num_cols = ((end_col - start_col) as u32 / stride).max(1);
+    if end_col <= start_col {
         return Vec::new();
     }
 
     // Clamp to max per frame
     let num_cols = num_cols.min(MAX_COLS_PER_FRAME);
 
-    // Calculate ring buffer write offset
+    // Calculate ring buffer write offset (in cache slots, not global columns)
     let cache_write_offset =
-        ((start_col - entry.cache_start_column) as u32) % entry.cache_capacity;
+        (((start_col - entry.cache_start_column) / stride as i64) as u32) % entry.cache_capacity;
 
     let params = CqtComputeParams {
         hop_size: HOP_SIZE,
@@ -496,7 +510,7 @@ fn dispatch_cqt_compute(
         tex_width: WAVEFORM_TEX_WIDTH,
         total_frames: entry.waveform_total_frames as u32,
         sample_rate: entry.sample_rate as f32,
-        _pad0: 0,
+        column_stride: stride,
         _pad1: 0,
         _pad2: 0,
     };
@@ -569,9 +583,26 @@ impl egui_wgpu::CallbackTrait for CqtCallback {
         );
 
         // Determine which columns need computing
+        let stride = self.stride.max(1) as i64;
         let vis_start = self.visible_col_start.max(0);
         let max_col = (total_frames as i64) / HOP_SIZE as i64;
-        let vis_end = self.visible_col_end.min(max_col);
+        let vis_end_raw = self.visible_col_end.min(max_col);
+        // Clamp visible range to cache capacity (in global columns, accounting for stride)
+        let vis_end = vis_end_raw.min(vis_start + CACHE_CAPACITY as i64 * stride);
+
+        // If stride changed, invalidate cache
+        {
+            let entry = cqt_gpu.entries.get_mut(&self.pool_index).unwrap();
+            if entry.current_stride != self.stride {
+                entry.current_stride = self.stride;
+                entry.cache_start_column = vis_start;
+                entry.cache_valid_start = vis_start;
+                entry.cache_valid_end = vis_start;
+            }
+        }
+
+        // Stride-aware max columns per frame (in global column units)
+        let max_cols_global = MAX_COLS_PER_FRAME as i64 * stride;
 
         // Read current cache state, compute what's needed, then update state.
         // We split borrows carefully: read entry state, compute, then write back.
@@ -590,17 +621,18 @@ impl egui_wgpu::CallbackTrait for CqtCallback {
                 && vis_start < cache_valid_end
                 && vis_end > cache_valid_end
             {
-                // Scrolling right
+                // Scrolling right — align to stride boundary
                 let actual_end =
-                    cache_valid_end + (vis_end - cache_valid_end).min(MAX_COLS_PER_FRAME as i64);
+                    cache_valid_end + (vis_end - cache_valid_end).min(max_cols_global);
                 cmds = dispatch_cqt_compute(
                     device, queue, &cqt_gpu.compute_pipeline, entry,
-                    cache_valid_end, actual_end,
+                    cache_valid_end, actual_end, self.stride,
                 );
                 let entry = cqt_gpu.entries.get_mut(&self.pool_index).unwrap();
                 entry.cache_valid_end = actual_end;
-                if entry.cache_valid_end - entry.cache_valid_start > entry.cache_capacity as i64 {
-                    entry.cache_valid_start = entry.cache_valid_end - entry.cache_capacity as i64;
+                let cache_cap_global = entry.cache_capacity as i64 * stride;
+                if entry.cache_valid_end - entry.cache_valid_start > cache_cap_global {
+                    entry.cache_valid_start = entry.cache_valid_end - cache_cap_global;
                     entry.cache_start_column = entry.cache_valid_start;
                 }
             } else if vis_end <= cache_valid_end
@@ -609,16 +641,17 @@ impl egui_wgpu::CallbackTrait for CqtCallback {
             {
                 // Scrolling left
                 let actual_start =
-                    cache_valid_start - (cache_valid_start - vis_start).min(MAX_COLS_PER_FRAME as i64);
+                    cache_valid_start - (cache_valid_start - vis_start).min(max_cols_global);
                 cmds = dispatch_cqt_compute(
                     device, queue, &cqt_gpu.compute_pipeline, entry,
-                    actual_start, cache_valid_start,
+                    actual_start, cache_valid_start, self.stride,
                 );
                 let entry = cqt_gpu.entries.get_mut(&self.pool_index).unwrap();
                 entry.cache_valid_start = actual_start;
                 entry.cache_start_column = actual_start;
-                if entry.cache_valid_end - entry.cache_valid_start > entry.cache_capacity as i64 {
-                    entry.cache_valid_end = entry.cache_valid_start + entry.cache_capacity as i64;
+                let cache_cap_global = entry.cache_capacity as i64 * stride;
+                if entry.cache_valid_end - entry.cache_valid_start > cache_cap_global {
+                    entry.cache_valid_end = entry.cache_valid_start + cache_cap_global;
                 }
             } else {
                 // No overlap or first compute — reset cache
@@ -627,11 +660,11 @@ impl egui_wgpu::CallbackTrait for CqtCallback {
                 entry.cache_valid_start = vis_start;
                 entry.cache_valid_end = vis_start;
 
-                let compute_end = vis_start + (vis_end - vis_start).min(MAX_COLS_PER_FRAME as i64);
+                let compute_end = vis_start + (vis_end - vis_start).min(max_cols_global);
                 let entry = cqt_gpu.entries.get(&self.pool_index).unwrap();
                 cmds = dispatch_cqt_compute(
                     device, queue, &cqt_gpu.compute_pipeline, entry,
-                    vis_start, compute_end,
+                    vis_start, compute_end, self.stride,
                 );
                 let entry = cqt_gpu.entries.get_mut(&self.pool_index).unwrap();
                 entry.cache_valid_end = compute_end;
@@ -645,6 +678,7 @@ impl egui_wgpu::CallbackTrait for CqtCallback {
         params.cache_valid_start = entry.cache_valid_start as f32;
         params.cache_valid_end = entry.cache_valid_end as f32;
         params.cache_capacity = entry.cache_capacity as f32;
+        params.column_stride = self.stride as f32;
 
         queue.write_buffer(
             &entry.render_uniform_buffer,
diff --git a/lightningbeam-ui/lightningbeam-editor/src/main.rs b/lightningbeam-ui/lightningbeam-editor/src/main.rs
index d82cfed..e45b47d 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/main.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/main.rs
@@ -2353,8 +2353,6 @@ impl EditorApp {
     }
 
     /// Import an audio file via daw-backend (async — non-blocking)
-    ///
-    /// Reads only metadata from the file (sub-millisecond), then sends the path
     /// to the engine for async import. The engine memory-maps WAV files or sets
     /// up stream decoding for compressed formats. An `AudioFileReady` event is
     /// emitted when the file is playback-ready; the event handler populates the
@@ -2749,10 +2747,37 @@ impl EditorApp {
             // Get the newly created layer ID (it's the last child in the document)
             let doc = self.action_executor.document();
             if let Some(last_layer) = doc.root.children.last() {
-                target_layer_id = Some(last_layer.id());
+                let layer_id = last_layer.id();
+                target_layer_id = Some(layer_id);
 
                 // Update active layer to the new layer
                 self.active_layer_id = target_layer_id;
+
+                // Create a backend audio/MIDI track and add the mapping
+                if let Some(ref controller_arc) = self.audio_controller {
+                    let mut controller = controller_arc.lock().unwrap();
+                    match asset_info.clip_type {
+                        panes::DragClipType::AudioSampled => {
+                            match controller.create_audio_track_sync(layer_name.clone()) {
+                                Ok(track_id) => {
+                                    self.layer_to_track_map.insert(layer_id, track_id);
+                                    self.track_to_layer_map.insert(track_id, layer_id);
+                                }
+                                Err(e) => eprintln!("Failed to create audio track for auto-place: {}", e),
+                            }
+                        }
+                        panes::DragClipType::AudioMidi => {
+                            match controller.create_midi_track_sync(layer_name.clone()) {
+                                Ok(track_id) => {
+                                    self.layer_to_track_map.insert(layer_id, track_id);
+                                    self.track_to_layer_map.insert(track_id, layer_id);
+                                }
+                                Err(e) => eprintln!("Failed to create MIDI track for auto-place: {}", e),
+                            }
+                        }
+                        _ => {} // Other types don't need backend tracks
+                    }
+                }
             }
         }
 
@@ -3613,22 +3638,11 @@ impl eframe::App for EditorApp {
                             // via AudioDecodeProgress events.
                             ctx.request_repaint();
                         }
-                        AudioEvent::AudioDecodeProgress { pool_index, decoded_frames, total_frames } => {
-                            // Waveform decode complete — fetch samples for GPU waveform
-                            if decoded_frames == total_frames {
-                                if let Some(ref controller_arc) = self.audio_controller {
-                                    let mut controller = controller_arc.lock().unwrap();
-                                    match controller.get_pool_audio_samples(pool_index) {
-                                        Ok((samples, sr, ch)) => {
-                                            println!("Waveform decode complete for pool {}: {} samples", pool_index, samples.len());
-                                            self.raw_audio_cache.insert(pool_index, (samples, sr, ch));
-                                            self.waveform_gpu_dirty.insert(pool_index);
-                                        }
-                                        Err(e) => eprintln!("Failed to fetch decoded audio for pool {}: {}", pool_index, e),
-                                    }
-                                }
-                                ctx.request_repaint();
-                            }
+                        AudioEvent::AudioDecodeProgress { pool_index, samples, sample_rate, channels } => {
+                            // Samples arrive inline — no query needed
+                            self.raw_audio_cache.insert(pool_index, (samples, sample_rate, channels));
+                            self.waveform_gpu_dirty.insert(pool_index);
+                            ctx.request_repaint();
                         }
                         _ => {} // Ignore other events for now
                     }
@@ -4057,6 +4071,19 @@ impl eframe::App for EditorApp {
             self.split_clips_at_playhead();
         }
 
+        // Space bar toggles play/pause (only when no text input is focused)
+        if !wants_keyboard && ctx.input(|i| i.key_pressed(egui::Key::Space)) {
+            self.is_playing = !self.is_playing;
+            if let Some(ref controller_arc) = self.audio_controller {
+                let mut controller = controller_arc.lock().unwrap();
+                if self.is_playing {
+                    controller.play();
+                } else {
+                    controller.pause();
+                }
+            }
+        }
+
         ctx.input(|i| {
             // Check menu shortcuts that use modifiers (Cmd+S, etc.) - allow even when typing
             // But skip shortcuts without modifiers when keyboard input is claimed (e.g., virtual piano)
diff --git a/lightningbeam-ui/lightningbeam-editor/src/panes/piano_roll.rs b/lightningbeam-ui/lightningbeam-editor/src/panes/piano_roll.rs
index 13dbe1b..68a7c59 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/piano_roll.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/piano_roll.rs
@@ -1296,6 +1296,11 @@ impl PianoRollPane {
                 let vis_col_start = (vis_audio_start * sample_rate as f64 / 512.0).floor() as i64;
                 let vis_col_end = (vis_audio_end * sample_rate as f64 / 512.0).ceil() as i64 + 1;
 
+                // Calculate stride: how many CQT columns per pixel
+                // When zoomed out, multiple CQT columns map to one pixel — compute every Nth
+                let cols_per_pixel = sample_rate as f32 / (512.0 * self.pixels_per_second);
+                let cqt_stride = (cols_per_pixel.ceil() as u32).max(1);
+
                 let callback = crate::cqt_gpu::CqtCallback {
                     pool_index,
                     params: crate::cqt_gpu::CqtRenderParams {
@@ -1318,12 +1323,14 @@ impl PianoRollPane {
                         cache_start_column: 0.0,
                         cache_valid_start: 0.0,
                         cache_valid_end: 0.0,
-                        _pad: [0.0; 2],
+                        column_stride: 0.0, // filled by prepare()
+                        _pad: 0.0,
                     },
                     target_format: shared.target_format,
                     sample_rate,
                     visible_col_start: vis_col_start,
                     visible_col_end: vis_col_end,
+                    stride: cqt_stride,
                 };
 
                 ui.painter().add(egui_wgpu::Callback::new_paint_callback(
diff --git a/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_compute.wgsl b/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_compute.wgsl
index fd80e4c..a257c8f 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_compute.wgsl
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_compute.wgsl
@@ -18,7 +18,7 @@ struct CqtParams {
     tex_width: u32,           // waveform texture width (2048)
     total_frames: u32,        // total audio frames in waveform texture
     sample_rate: f32,
-    _pad0: u32,
+    column_stride: u32,
     _pad1: u32,
     _pad2: u32,
 }
@@ -46,7 +46,7 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
         return;
     }
 
-    let global_col = params.column_start + col_rel;
+    let global_col = params.column_start + col_rel * params.column_stride;
     let sample_start = global_col * params.hop_size;
 
     let info = bins[bin_k];
diff --git a/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_render.wgsl b/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_render.wgsl
index 02266a4..908019f 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_render.wgsl
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/cqt_render.wgsl
@@ -27,7 +27,8 @@ struct Params {
     cache_start_column: f32,      // 4 @ 76
     cache_valid_start: f32,       // 4 @ 80
     cache_valid_end: f32,         // 4 @ 84
-    _pad: vec2<f32>,              // 8 @ 88, total 96
+    column_stride: f32,           // 4 @ 88
+    _pad: f32,                    // 4 @ 92, total 96
 }
 
 @group(0) @binding(0) var cache_tex: texture_2d<f32>;
@@ -141,8 +142,8 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
         discard;
     }
 
-    // Map global column to ring buffer position
-    let ring_pos = global_col - params.cache_start_column;
+    // Map global column to ring buffer position (accounting for stride)
+    let ring_pos = (global_col - params.cache_start_column) / params.column_stride;
     let cache_x = ring_pos % params.cache_capacity;
 
     // Sample cache texture with bilinear filtering
diff --git a/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/waveform.wgsl b/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/waveform.wgsl
index f5d4616..d3d740c 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/waveform.wgsl
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/waveform.wgsl
@@ -63,8 +63,9 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
     }
 
     // Fragment X position → audio time
-    let timeline_time = params.viewport_start_time + (frag_x - params.clip_rect.x) / params.pixels_per_second;
-    let audio_time = timeline_time - params.clip_start_time + params.trim_start;
+    // clip_start_time is the screen X of the (unclamped) clip left edge.
+    // (frag_x - clip_start_time) / pps gives the time offset from the clip's start.
+    let audio_time = (frag_x - params.clip_start_time) / params.pixels_per_second + params.trim_start;
 
     // Audio time → frame index
     let frame_f = audio_time * params.sample_rate - params.segment_start_frame;
diff --git a/lightningbeam-ui/lightningbeam-editor/src/panes/timeline.rs b/lightningbeam-ui/lightningbeam-editor/src/panes/timeline.rs
index 684660d..c081c64 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/timeline.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/timeline.rs
@@ -1255,7 +1255,7 @@ impl TimelinePane {
                                                         pixels_per_second: self.pixels_per_second as f32,
                                                         audio_duration: audio_file_duration as f32,
                                                         sample_rate: *sr as f32,
-                                                        clip_start_time: instance_start as f32,
+                                                        clip_start_time: clip_screen_start,
                                                         trim_start: preview_trim_start as f32,
                                                         tex_width: crate::waveform_gpu::tex_width() as f32,
                                                         total_frames: total_frames as f32,
diff --git a/lightningbeam-ui/lightningbeam-editor/src/waveform_gpu.rs b/lightningbeam-ui/lightningbeam-editor/src/waveform_gpu.rs
index 3d62975..82d492b 100644
--- a/lightningbeam-ui/lightningbeam-editor/src/waveform_gpu.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/waveform_gpu.rs
@@ -43,8 +43,10 @@ pub struct WaveformGpuEntry {
     pub uniform_buffers: Vec<wgpu::Buffer>,
     /// Frames covered by each texture segment
     pub frames_per_segment: u32,
-    /// Total frame count
+    /// Total frame count of data currently in the texture
     pub total_frames: u64,
+    /// Allocated texture height (may be larger than needed for current total_frames)
+    pub tex_height: u32,
     /// Sample rate
     pub sample_rate: u32,
     /// Number of channels in source audio
@@ -271,14 +273,100 @@ impl WaveformGpuResources {
         sample_rate: u32,
         channels: u32,
     ) -> Vec<wgpu::CommandBuffer> {
-        // Remove old entry if exists
-        self.entries.remove(&pool_index);
-
-        let total_frames = samples.len() / channels.max(1) as usize;
-        if total_frames == 0 {
+        let new_total_frames = samples.len() / channels.max(1) as usize;
+        if new_total_frames == 0 {
             return Vec::new();
         }
 
+        // If entry exists and texture is large enough, do an incremental update
+        let incremental = if let Some(entry) = self.entries.get(&pool_index) {
+            let new_tex_height = (new_total_frames as u32 + TEX_WIDTH - 1) / TEX_WIDTH;
+            if new_tex_height <= entry.tex_height && new_total_frames > entry.total_frames as usize {
+                Some((entry.total_frames as usize, entry.tex_height))
+            } else if new_total_frames <= entry.total_frames as usize {
+                return Vec::new(); // No new data
+            } else {
+                None // Texture too small, need full recreate
+            }
+        } else {
+            None // No entry yet
+        };
+
+        if let Some((old_frames, tex_height)) = incremental {
+            // Write only the NEW rows into the existing texture
+            let start_row = old_frames as u32 / TEX_WIDTH;
+            let end_row = (new_total_frames as u32 + TEX_WIDTH - 1) / TEX_WIDTH;
+            let rows_to_write = end_row - start_row;
+
+            let row_texel_count = (TEX_WIDTH * rows_to_write) as usize;
+            let mut row_data: Vec<half::f16> = vec![half::f16::ZERO; row_texel_count * 4];
+
+            let row_start_frame = start_row as usize * TEX_WIDTH as usize;
+            for frame in 0..(rows_to_write as usize * TEX_WIDTH as usize) {
+                let global_frame = row_start_frame + frame;
+                if global_frame >= new_total_frames {
+                    break;
+                }
+                let sample_offset = global_frame * channels as usize;
+                let left = if sample_offset < samples.len() {
+                    samples[sample_offset]
+                } else {
+                    0.0
+                };
+                let right = if channels >= 2 && sample_offset + 1 < samples.len() {
+                    samples[sample_offset + 1]
+                } else {
+                    left
+                };
+                let texel_offset = frame * 4;
+                row_data[texel_offset] = half::f16::from_f32(left);
+                row_data[texel_offset + 1] = half::f16::from_f32(left);
+                row_data[texel_offset + 2] = half::f16::from_f32(right);
+                row_data[texel_offset + 3] = half::f16::from_f32(right);
+            }
+
+            let entry = self.entries.get(&pool_index).unwrap();
+            queue.write_texture(
+                wgpu::TexelCopyTextureInfo {
+                    texture: &entry.textures[0],
+                    mip_level: 0,
+                    origin: wgpu::Origin3d { x: 0, y: start_row, z: 0 },
+                    aspect: wgpu::TextureAspect::All,
+                },
+                bytemuck::cast_slice(&row_data),
+                wgpu::TexelCopyBufferLayout {
+                    offset: 0,
+                    bytes_per_row: Some(TEX_WIDTH * 8),
+                    rows_per_image: Some(rows_to_write),
+                },
+                wgpu::Extent3d {
+                    width: TEX_WIDTH,
+                    height: rows_to_write,
+                    depth_or_array_layers: 1,
+                },
+            );
+
+            // Regenerate mipmaps
+            let mip_count = compute_mip_count(TEX_WIDTH, tex_height);
+            let cmds = self.generate_mipmaps(
+                device,
+                &entry.textures[0],
+                TEX_WIDTH,
+                tex_height,
+                mip_count,
+                new_total_frames as u32,
+            );
+
+            // Update total_frames after borrow of entry is done
+            self.entries.get_mut(&pool_index).unwrap().total_frames = new_total_frames as u64;
+            return cmds;
+        }
+
+        // Full create (first upload or texture needs to grow)
+        self.entries.remove(&pool_index);
+
+        let total_frames = new_total_frames;
+
         let max_frames_per_segment = (TEX_WIDTH as u64)
             * (device.limits().max_texture_dimension_2d as u64);
         let segment_count =
@@ -323,7 +411,6 @@ impl WaveformGpuResources {
             });
 
             // Pack raw samples into Rgba16Float data for mip 0
-            // R=left_min=left_sample, G=left_max=left_sample, B=right_min, A=right_max
             let texel_count = (TEX_WIDTH * tex_height) as usize;
             let mut mip0_data: Vec<half::f16> = vec![half::f16::ZERO; texel_count * 4];
 
@@ -339,14 +426,14 @@ impl WaveformGpuResources {
                 let right = if channels >= 2 && sample_offset + 1 < samples.len() {
                     samples[sample_offset + 1]
                 } else {
-                    left // Mono: duplicate left to right
+                    left
                 };
 
                 let texel_offset = frame * 4;
-                mip0_data[texel_offset] = half::f16::from_f32(left);     // R = left_min
-                mip0_data[texel_offset + 1] = half::f16::from_f32(left); // G = left_max
-                mip0_data[texel_offset + 2] = half::f16::from_f32(right); // B = right_min
-                mip0_data[texel_offset + 3] = half::f16::from_f32(right); // A = right_max
+                mip0_data[texel_offset] = half::f16::from_f32(left);
+                mip0_data[texel_offset + 1] = half::f16::from_f32(left);
+                mip0_data[texel_offset + 2] = half::f16::from_f32(right);
+                mip0_data[texel_offset + 3] = half::f16::from_f32(right);
             }
 
             // Upload mip 0
@@ -360,7 +447,7 @@ impl WaveformGpuResources {
                 bytemuck::cast_slice(&mip0_data),
                 wgpu::TexelCopyBufferLayout {
                     offset: 0,
-                    bytes_per_row: Some(TEX_WIDTH * 8), // 4 channels × 2 bytes (f16)
+                    bytes_per_row: Some(TEX_WIDTH * 8),
                     rows_per_image: Some(tex_height),
                 },
                 wgpu::Extent3d {
@@ -387,7 +474,7 @@ impl WaveformGpuResources {
                 ..Default::default()
             });
 
-            // Create uniform buffer placeholder (will be filled per-draw in paint)
+            // Create uniform buffer placeholder
             let uniform_buffer = device.create_buffer(&wgpu::BufferDescriptor {
                 label: Some(&format!("waveform_{}_seg{}_uniforms", pool_index, seg)),
                 size: std::mem::size_of::<WaveformParams>() as u64,
@@ -430,6 +517,7 @@ impl WaveformGpuResources {
                 uniform_buffers,
                 frames_per_segment,
                 total_frames: total_frames as u64,
+                tex_height: (total_frames as u32 + TEX_WIDTH - 1) / TEX_WIDTH,
                 sample_rate,
                 channels,
             },