From 3c5a24e0b65e9a90013f2f7818d7f9b63a8ac5ac Mon Sep 17 00:00:00 2001
From: Skyler Lehmkuhl <skycooler@gmail.com>
Date: Thu, 6 Nov 2025 06:04:39 -0500
Subject: [PATCH] video backend

---
 daw-backend/src/audio/engine.rs     |  22 +
 daw-backend/src/command/types.rs    |   2 +
 src-tauri/Cargo.lock                |  97 +++-
 src-tauri/Cargo.toml                |   4 +
 src-tauri/examples/video_inspect.rs | 104 ++++
 src-tauri/src/audio.rs              |  34 +-
 src-tauri/src/lib.rs                |   7 +
 src-tauri/src/video.rs              | 503 +++++++++++++++++++
 src/actions/index.js                | 157 +++++-
 src/main.js                         | 156 +++++-
 src/models/graphics-object.js       |  43 +-
 src/models/layer.js                 | 267 +++++++++-
 src/state.js                        |   1 +
 src/timeline.js                     |   5 +-
 src/widgets.js                      | 736 ++++++++++++++++++++++++----
 15 files changed, 2011 insertions(+), 127 deletions(-)
 create mode 100644 src-tauri/examples/video_inspect.rs
 create mode 100644 src-tauri/src/video.rs

diff --git a/daw-backend/src/audio/engine.rs b/daw-backend/src/audio/engine.rs
index 1d42291..2133750 100644
--- a/daw-backend/src/audio/engine.rs
+++ b/daw-backend/src/audio/engine.rs
@@ -384,6 +384,24 @@ impl Engine {
                     _ => {}
                 }
             }
+            Command::TrimClip(track_id, clip_id, new_start_time, new_duration, new_offset) => {
+                match self.project.get_track_mut(track_id) {
+                    Some(crate::audio::track::TrackNode::Audio(track)) => {
+                        if let Some(clip) = track.clips.iter_mut().find(|c| c.id == clip_id) {
+                            clip.start_time = new_start_time;
+                            clip.duration = new_duration;
+                            clip.offset = new_offset;
+                        }
+                    }
+                    Some(crate::audio::track::TrackNode::Midi(track)) => {
+                        if let Some(clip) = track.clips.iter_mut().find(|c| c.id == clip_id) {
+                            clip.start_time = new_start_time;
+                            clip.duration = new_duration;
+                        }
+                    }
+                    _ => {}
+                }
+            }
             Command::CreateMetatrack(name) => {
                 let track_id = self.project.add_group_track(name.clone(), None);
                 // Notify UI about the new metatrack
@@ -1893,6 +1911,10 @@ impl EngineController {
         let _ = self.command_tx.push(Command::MoveClip(track_id, clip_id, new_start_time));
     }
 
+    pub fn trim_clip(&mut self, track_id: TrackId, clip_id: ClipId, new_start_time: f64, new_duration: f64, new_offset: f64) {
+        let _ = self.command_tx.push(Command::TrimClip(track_id, clip_id, new_start_time, new_duration, new_offset));
+    }
+
     /// Send a generic command to the audio thread
     pub fn send_command(&mut self, command: Command) {
         let _ = self.command_tx.push(command);
diff --git a/daw-backend/src/command/types.rs b/daw-backend/src/command/types.rs
index 099bdad..7846c98 100644
--- a/daw-backend/src/command/types.rs
+++ b/daw-backend/src/command/types.rs
@@ -29,6 +29,8 @@ pub enum Command {
     // Clip management commands
     /// Move a clip to a new timeline position
     MoveClip(TrackId, ClipId, f64),
+    /// Trim a clip (track_id, clip_id, new_start_time, new_duration, new_offset)
+    TrimClip(TrackId, ClipId, f64, f64, f64),
 
     // Metatrack management commands
     /// Create a new metatrack with a name
diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock
index 4952019..ed43b02 100644
--- a/src-tauri/Cargo.lock
+++ b/src-tauri/Cargo.lock
@@ -247,6 +247,24 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bindgen"
+version = "0.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
+dependencies = [
+ "bitflags 2.8.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 1.1.0",
+ "shlex",
+ "syn 2.0.96",
+]
+
 [[package]]
 name = "bindgen"
 version = "0.72.1"
@@ -260,7 +278,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "rustc-hash",
+ "rustc-hash 2.1.1",
  "shlex",
  "syn 2.0.96",
 ]
@@ -732,7 +750,7 @@ version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ceec7a6067e62d6f931a2baf6f3a751f4a892595bcec1461a3c94ef9949864b6"
 dependencies = [
- "bindgen",
+ "bindgen 0.72.1",
 ]
 
 [[package]]
@@ -1372,6 +1390,31 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "ffmpeg-next"
+version = "7.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da02698288e0275e442a47fc12ca26d50daf0d48b15398ba5906f20ac2e2a9f9"
+dependencies = [
+ "bitflags 2.8.0",
+ "ffmpeg-sys-next",
+ "libc",
+]
+
+[[package]]
+name = "ffmpeg-sys-next"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9e9c75ebd4463de9d8998fb134ba26347fe5faee62fabf0a4b4d41bd500b4ad"
+dependencies = [
+ "bindgen 0.70.1",
+ "cc",
+ "libc",
+ "num_cpus",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "field-offset"
 version = "0.3.6"
@@ -1866,6 +1909,12 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -2417,7 +2466,9 @@ dependencies = [
  "chrono",
  "cpal",
  "daw-backend",
+ "ffmpeg-next",
  "log",
+ "lru",
  "rtrb",
  "serde",
  "serde_json",
@@ -2764,6 +2815,16 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
 [[package]]
 name = "num_enum"
 version = "0.7.3"
@@ -3835,6 +3896,12 @@ version = "0.1.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
 
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
 [[package]]
 name = "rustc-hash"
 version = "2.1.1"
@@ -3960,10 +4027,11 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.217"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
+ "serde_core",
  "serde_derive",
 ]
 
@@ -3979,10 +4047,19 @@ dependencies = [
 ]
 
 [[package]]
-name = "serde_derive"
-version = "1.0.217"
+name = "serde_core"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -5491,6 +5568,12 @@ version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3ef4c4aa54d5d05a279399bfa921ec387b7aba77caf7a682ae8d86785b8fdad2"
 
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
 [[package]]
 name = "version-compare"
 version = "0.2.0"
diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml
index d7199da..9e1b91c 100644
--- a/src-tauri/Cargo.toml
+++ b/src-tauri/Cargo.toml
@@ -36,6 +36,10 @@ daw-backend = { path = "../daw-backend" }
 cpal = "0.15"
 rtrb = "0.3"
 
+# Video decoding
+ffmpeg-next = "7.0"
+lru = "0.12"
+
 [profile.dev]
 opt-level = 1  # Enable basic optimizations in debug mode for audio decoding performance
 
diff --git a/src-tauri/examples/video_inspect.rs b/src-tauri/examples/video_inspect.rs
new file mode 100644
index 0000000..9950fd3
--- /dev/null
+++ b/src-tauri/examples/video_inspect.rs
@@ -0,0 +1,104 @@
+extern crate ffmpeg_next as ffmpeg;
+
+use std::env;
+
+fn main() {
+    ffmpeg::init().unwrap();
+
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        eprintln!("Usage: {} <video_file>", args[0]);
+        std::process::exit(1);
+    }
+
+    let path = &args[1];
+    let input = ffmpeg::format::input(path).expect("Failed to open video");
+
+    println!("=== VIDEO FILE INFORMATION ===");
+    println!("File: {}", path);
+    println!("Format: {}", input.format().name());
+    println!("Duration: {:.2}s", input.duration() as f64 / f64::from(ffmpeg::ffi::AV_TIME_BASE));
+    println!();
+
+    let video_stream = input.streams()
+        .best(ffmpeg::media::Type::Video)
+        .expect("No video stream found");
+
+    let stream_index = video_stream.index();
+    let time_base = f64::from(video_stream.time_base());
+    let duration = video_stream.duration() as f64 * time_base;
+    let fps = f64::from(video_stream.avg_frame_rate());
+
+    println!("=== VIDEO STREAM ===");
+    println!("Stream index: {}", stream_index);
+    println!("Time base: {} ({:.10})", video_stream.time_base(), time_base);
+    println!("Duration: {:.2}s", duration);
+    println!("FPS: {:.2}", fps);
+    println!("Frames: {}", video_stream.frames());
+
+    let context = ffmpeg::codec::context::Context::from_parameters(video_stream.parameters())
+        .expect("Failed to create context");
+    let decoder = context.decoder().video().expect("Failed to create decoder");
+
+    println!("Codec: {:?}", decoder.id());
+    println!("Resolution: {}x{}", decoder.width(), decoder.height());
+    println!("Pixel format: {:?}", decoder.format());
+    println!();
+
+    println!("=== SCANNING FRAMES ===");
+    println!("Timestamp (ts) | Time (s) | Key | Type");
+    println!("---------------|----------|-----|-----");
+
+    let mut input = ffmpeg::format::input(path).expect("Failed to reopen video");
+    let context = ffmpeg::codec::context::Context::from_parameters(
+        input.streams().best(ffmpeg::media::Type::Video).unwrap().parameters()
+    ).expect("Failed to create context");
+    let mut decoder = context.decoder().video().expect("Failed to create decoder");
+
+    let mut frame_count = 0;
+    let mut keyframe_count = 0;
+
+    for (stream, packet) in input.packets() {
+        if stream.index() == stream_index {
+            let packet_pts = packet.pts().unwrap_or(0);
+            let packet_time = packet_pts as f64 * time_base;
+            let is_key = packet.is_key();
+
+            if is_key {
+                keyframe_count += 1;
+            }
+
+            // Print first 50 packets and all keyframes
+            if frame_count < 50 || is_key {
+                println!("{:14} | {:8.2} | {:3} | {:?}",
+                    packet_pts,
+                    packet_time,
+                    if is_key { "KEY" } else { " " },
+                    if is_key { "I-frame" } else { "P/B-frame" }
+                );
+            }
+
+            decoder.send_packet(&packet).ok();
+            let mut frame = ffmpeg::util::frame::Video::empty();
+            while decoder.receive_frame(&mut frame).is_ok() {
+                frame_count += 1;
+            }
+        }
+    }
+
+    // Flush decoder
+    decoder.send_eof().ok();
+    let mut frame = ffmpeg::util::frame::Video::empty();
+    while decoder.receive_frame(&mut frame).is_ok() {
+        frame_count += 1;
+    }
+
+    println!();
+    println!("=== SUMMARY ===");
+    println!("Total frames decoded: {}", frame_count);
+    println!("Total keyframes: {}", keyframe_count);
+    if keyframe_count > 0 {
+        println!("Average keyframe interval: {:.2} frames", frame_count as f64 / keyframe_count as f64);
+        println!("Average keyframe interval: {:.2}s", duration / keyframe_count as f64);
+    }
+}
diff --git a/src-tauri/src/audio.rs b/src-tauri/src/audio.rs
index 9578d37..c34d49a 100644
--- a/src-tauri/src/audio.rs
+++ b/src-tauri/src/audio.rs
@@ -30,15 +30,15 @@ pub struct MidiFileMetadata {
 }
 
 pub struct AudioState {
-    controller: Option<EngineController>,
-    sample_rate: u32,
-    channels: u32,
-    buffer_size: u32,
-    next_track_id: u32,
-    next_pool_index: usize,
-    next_graph_node_id: u32,
+    pub(crate) controller: Option<EngineController>,
+    pub(crate) sample_rate: u32,
+    pub(crate) channels: u32,
+    pub(crate) buffer_size: u32,
+    pub(crate) next_track_id: u32,
+    pub(crate) next_pool_index: usize,
+    pub(crate) next_graph_node_id: u32,
     // Track next node ID for each VoiceAllocator template (VoiceAllocator backend ID -> next template node ID)
-    template_node_counters: HashMap<u32, u32>,
+    pub(crate) template_node_counters: HashMap<u32, u32>,
 }
 
 impl Default for AudioState {
@@ -367,6 +367,24 @@ pub async fn audio_move_clip(
     }
 }
 
+#[tauri::command]
+pub async fn audio_trim_clip(
+    state: tauri::State<'_, Arc<Mutex<AudioState>>>,
+    track_id: u32,
+    clip_id: u32,
+    new_start_time: f64,
+    new_duration: f64,
+    new_offset: f64,
+) -> Result<(), String> {
+    let mut audio_state = state.lock().unwrap();
+    if let Some(controller) = &mut audio_state.controller {
+        controller.trim_clip(track_id, clip_id, new_start_time, new_duration, new_offset);
+        Ok(())
+    } else {
+        Err("Audio not initialized".to_string())
+    }
+}
+
 #[tauri::command]
 pub async fn audio_start_recording(
     state: tauri::State<'_, Arc<Mutex<AudioState>>>,
diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs
index 29902ff..6aca3dd 100644
--- a/src-tauri/src/lib.rs
+++ b/src-tauri/src/lib.rs
@@ -7,6 +7,7 @@ use chrono::Local;
 use tauri::{AppHandle, Manager, Url, WebviewUrl, WebviewWindowBuilder};
 
 mod audio;
+mod video;
 
 
 #[derive(Default)]
@@ -130,6 +131,7 @@ pub fn run() {
     tauri::Builder::default()
       .manage(Mutex::new(AppState::default()))
       .manage(Arc::new(Mutex::new(audio::AudioState::default())))
+      .manage(Arc::new(Mutex::new(video::VideoState::default())))
       .setup(|app| {
         #[cfg(any(windows, target_os = "linux"))] // Windows/Linux needs different handling from macOS
         {
@@ -204,6 +206,7 @@ pub fn run() {
         audio::audio_load_file,
         audio::audio_add_clip,
         audio::audio_move_clip,
+        audio::audio_trim_clip,
         audio::audio_start_recording,
         audio::audio_stop_recording,
         audio::audio_pause_recording,
@@ -250,6 +253,10 @@ pub fn run() {
         audio::audio_resolve_missing_file,
         audio::audio_serialize_track_graph,
         audio::audio_load_track_graph,
+        video::video_load_file,
+        video::video_get_frame,
+        video::video_set_cache_size,
+        video::video_get_pool_info,
       ])
       // .manage(window_counter)
       .build(tauri::generate_context!())
diff --git a/src-tauri/src/video.rs b/src-tauri/src/video.rs
new file mode 100644
index 0000000..38feeb7
--- /dev/null
+++ b/src-tauri/src/video.rs
@@ -0,0 +1,503 @@
+use std::sync::{Arc, Mutex};
+use std::num::NonZeroUsize;
+use ffmpeg_next as ffmpeg;
+use lru::LruCache;
+use daw_backend::WaveformPeak;
+
+#[derive(serde::Serialize, Clone)]
+pub struct VideoFileMetadata {
+    pub pool_index: usize,
+    pub width: u32,
+    pub height: u32,
+    pub fps: f64,
+    pub duration: f64,
+    pub has_audio: bool,
+    pub audio_pool_index: Option<usize>,
+    pub audio_duration: Option<f64>,
+    pub audio_sample_rate: Option<u32>,
+    pub audio_channels: Option<u32>,
+    pub audio_waveform: Option<Vec<WaveformPeak>>,
+}
+
+struct VideoDecoder {
+    path: String,
+    width: u32,          // Original video width
+    height: u32,         // Original video height
+    output_width: u32,   // Scaled output width
+    output_height: u32,  // Scaled output height
+    fps: f64,
+    duration: f64,
+    time_base: f64,
+    stream_index: usize,
+    frame_cache: LruCache<i64, Vec<u8>>, // timestamp -> RGBA data
+    input: Option<ffmpeg::format::context::Input>,
+    decoder: Option<ffmpeg::decoder::Video>,
+    last_decoded_ts: i64, // Track the last decoded frame timestamp
+}
+
+impl VideoDecoder {
+    fn new(path: String, cache_size: usize, max_width: Option<u32>, max_height: Option<u32>) -> Result<Self, String> {
+        ffmpeg::init().map_err(|e| e.to_string())?;
+
+        let input = ffmpeg::format::input(&path)
+            .map_err(|e| format!("Failed to open video: {}", e))?;
+
+        let video_stream = input.streams()
+            .best(ffmpeg::media::Type::Video)
+            .ok_or("No video stream found")?;
+
+        let stream_index = video_stream.index();
+
+        let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+            video_stream.parameters()
+        ).map_err(|e| e.to_string())?;
+
+        let decoder = context_decoder.decoder().video()
+            .map_err(|e| e.to_string())?;
+
+        let width = decoder.width();
+        let height = decoder.height();
+        let time_base = f64::from(video_stream.time_base());
+
+        // Calculate output dimensions (scale down if larger than max)
+        let (output_width, output_height) = if let (Some(max_w), Some(max_h)) = (max_width, max_height) {
+            // Calculate scale to fit within max dimensions while preserving aspect ratio
+            let scale = (max_w as f32 / width as f32).min(max_h as f32 / height as f32).min(1.0);
+            ((width as f32 * scale) as u32, (height as f32 * scale) as u32)
+        } else {
+            (width, height)
+        };
+
+        // Try to get duration from stream, fallback to container
+        let duration = if video_stream.duration() > 0 {
+            video_stream.duration() as f64 * time_base
+        } else if input.duration() > 0 {
+            input.duration() as f64 / f64::from(ffmpeg::ffi::AV_TIME_BASE)
+        } else {
+            // If no duration available, estimate from frame count and fps
+            let fps = f64::from(video_stream.avg_frame_rate());
+            if video_stream.frames() > 0 && fps > 0.0 {
+                video_stream.frames() as f64 / fps
+            } else {
+                0.0 // Unknown duration
+            }
+        };
+
+        let fps = f64::from(video_stream.avg_frame_rate());
+
+        Ok(Self {
+            path,
+            width,
+            height,
+            output_width,
+            output_height,
+            fps,
+            duration,
+            time_base,
+            stream_index,
+            frame_cache: LruCache::new(
+                NonZeroUsize::new(cache_size).unwrap()
+            ),
+            input: None,
+            decoder: None,
+            last_decoded_ts: -1,
+        })
+    }
+
+    fn get_frame(&mut self, timestamp: f64) -> Result<Vec<u8>, String> {
+        use std::time::Instant;
+        let t_start = Instant::now();
+
+        // Convert timestamp to frame timestamp
+        let frame_ts = (timestamp / self.time_base) as i64;
+
+        // Check cache
+        if let Some(cached_frame) = self.frame_cache.get(&frame_ts) {
+            eprintln!("[Video Timing] Cache hit for ts={:.3}s ({}ms)", timestamp, t_start.elapsed().as_millis());
+            return Ok(cached_frame.clone());
+        }
+
+        let t_after_cache = Instant::now();
+
+        // Determine if we need to seek
+        // Seek if: no decoder open, going backwards, or jumping forward more than 2 seconds
+        let need_seek = self.decoder.is_none()
+            || frame_ts < self.last_decoded_ts
+            || frame_ts > self.last_decoded_ts + (2.0 / self.time_base) as i64;
+
+        if need_seek {
+            let t_seek_start = Instant::now();
+
+            // Reopen input
+            let mut input = ffmpeg::format::input(&self.path)
+                .map_err(|e| format!("Failed to reopen video: {}", e))?;
+
+            // Seek to timestamp
+            input.seek(frame_ts, ..frame_ts)
+                .map_err(|e| format!("Seek failed: {}", e))?;
+
+            let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+                input.streams().best(ffmpeg::media::Type::Video).unwrap().parameters()
+            ).map_err(|e| e.to_string())?;
+
+            let decoder = context_decoder.decoder().video()
+                .map_err(|e| e.to_string())?;
+
+            self.input = Some(input);
+            self.decoder = Some(decoder);
+            self.last_decoded_ts = -1; // Reset since we seeked
+
+            eprintln!("[Video Timing] Seek took {}ms", t_seek_start.elapsed().as_millis());
+        }
+
+        let input = self.input.as_mut().unwrap();
+        let decoder = self.decoder.as_mut().unwrap();
+
+        // Decode frames until we find the one closest to our target timestamp
+        let mut best_frame_data: Option<Vec<u8>> = None;
+        let mut best_frame_ts: Option<i64> = None;
+        let t_decode_start = Instant::now();
+        let mut decode_count = 0;
+        let mut scale_time_ms = 0u128;
+
+        for (stream, packet) in input.packets() {
+            if stream.index() == self.stream_index {
+                decoder.send_packet(&packet)
+                    .map_err(|e| e.to_string())?;
+
+                let mut frame = ffmpeg::util::frame::Video::empty();
+                while decoder.receive_frame(&mut frame).is_ok() {
+                    decode_count += 1;
+                    let current_frame_ts = frame.timestamp().unwrap_or(0);
+                    self.last_decoded_ts = current_frame_ts; // Update last decoded position
+
+                    // Check if this frame is closer to our target than the previous best
+                    let is_better = match best_frame_ts {
+                        None => true,
+                        Some(best_ts) => {
+                            (current_frame_ts - frame_ts).abs() < (best_ts - frame_ts).abs()
+                        }
+                    };
+
+                    if is_better {
+                        let t_scale_start = Instant::now();
+
+                        // Convert to RGBA and scale to output size
+                        let mut scaler = ffmpeg::software::scaling::context::Context::get(
+                            frame.format(),
+                            frame.width(),
+                            frame.height(),
+                            ffmpeg::format::Pixel::RGBA,
+                            self.output_width,
+                            self.output_height,
+                            ffmpeg::software::scaling::flag::Flags::BILINEAR,
+                        ).map_err(|e| e.to_string())?;
+
+                        let mut rgb_frame = ffmpeg::util::frame::Video::empty();
+                        scaler.run(&frame, &mut rgb_frame)
+                            .map_err(|e| e.to_string())?;
+
+                        // Remove stride padding to create tightly packed RGBA data
+                        let width = self.output_width as usize;
+                        let height = self.output_height as usize;
+                        let stride = rgb_frame.stride(0);
+                        let row_size = width * 4; // RGBA = 4 bytes per pixel
+                        let source_data = rgb_frame.data(0);
+
+                        let mut packed_data = Vec::with_capacity(row_size * height);
+                        for y in 0..height {
+                            let row_start = y * stride;
+                            let row_end = row_start + row_size;
+                            packed_data.extend_from_slice(&source_data[row_start..row_end]);
+                        }
+
+                        scale_time_ms += t_scale_start.elapsed().as_millis();
+                        best_frame_data = Some(packed_data);
+                        best_frame_ts = Some(current_frame_ts);
+                    }
+
+                    // If we've reached or passed the target timestamp, we can stop
+                    if current_frame_ts >= frame_ts {
+                        // Found our frame, cache and return it
+                        if let Some(data) = best_frame_data {
+                            let total_time = t_start.elapsed().as_millis();
+                            let decode_time = t_decode_start.elapsed().as_millis();
+                            eprintln!("[Video Timing] ts={:.3}s | Decoded {} frames in {}ms | Scale: {}ms | Total: {}ms",
+                                timestamp, decode_count, decode_time, scale_time_ms, total_time);
+                            self.frame_cache.put(frame_ts, data.clone());
+                            return Ok(data);
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
+        eprintln!("[Video Decoder] ERROR: Failed to decode frame for timestamp {}", timestamp);
+        Err("Failed to decode frame".to_string())
+    }
+}
+
+pub struct VideoState {
+    pool: Vec<Arc<Mutex<VideoDecoder>>>,
+    next_pool_index: usize,
+    cache_size: usize,
+}
+
+impl Default for VideoState {
+    fn default() -> Self {
+        Self {
+            pool: Vec::new(),
+            next_pool_index: 0,
+            cache_size: 20, // Default cache size
+        }
+    }
+}
+
+#[tauri::command]
+pub async fn video_load_file(
+    video_state: tauri::State<'_, Arc<Mutex<VideoState>>>,
+    audio_state: tauri::State<'_, Arc<Mutex<crate::audio::AudioState>>>,
+    path: String,
+) -> Result<VideoFileMetadata, String> {
+    ffmpeg::init().map_err(|e| e.to_string())?;
+
+    // Open input to check for audio stream
+    let mut input = ffmpeg::format::input(&path)
+        .map_err(|e| format!("Failed to open video: {}", e))?;
+
+    let audio_stream_opt = input.streams()
+        .best(ffmpeg::media::Type::Audio);
+
+    let has_audio = audio_stream_opt.is_some();
+
+    // Extract audio if present
+    let (audio_pool_index, audio_duration, audio_sample_rate, audio_channels, audio_waveform) = if has_audio {
+        let audio_stream = audio_stream_opt.unwrap();
+        let audio_index = audio_stream.index();
+
+        // Get audio properties
+        let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+            audio_stream.parameters()
+        ).map_err(|e| e.to_string())?;
+
+        let mut audio_decoder = context_decoder.decoder().audio()
+            .map_err(|e| e.to_string())?;
+
+        let sample_rate = audio_decoder.rate();
+        let channels = audio_decoder.channels() as u32;
+
+        // Decode all audio frames
+        let mut audio_samples: Vec<f32> = Vec::new();
+
+        for (stream, packet) in input.packets() {
+            if stream.index() == audio_index {
+                audio_decoder.send_packet(&packet)
+                    .map_err(|e| e.to_string())?;
+
+                let mut audio_frame = ffmpeg::util::frame::Audio::empty();
+                while audio_decoder.receive_frame(&mut audio_frame).is_ok() {
+                    // Convert audio to f32 planar format
+                    let format = audio_frame.format();
+                    let frame_channels = audio_frame.channels() as usize;
+
+                    // Create resampler to convert to f32 planar
+                    let mut resampler = ffmpeg::software::resampling::context::Context::get(
+                        format,
+                        audio_frame.channel_layout(),
+                        sample_rate,
+                        ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed),
+                        audio_frame.channel_layout(),
+                        sample_rate,
+                    ).map_err(|e| e.to_string())?;
+
+                    let mut resampled_frame = ffmpeg::util::frame::Audio::empty();
+                    resampler.run(&audio_frame, &mut resampled_frame)
+                        .map_err(|e| e.to_string())?;
+
+                    // Extract f32 samples (interleaved format)
+                    let data_ptr = resampled_frame.data(0).as_ptr() as *const f32;
+                    let total_samples = resampled_frame.samples() * frame_channels;
+                    let samples_slice = unsafe {
+                        std::slice::from_raw_parts(data_ptr, total_samples)
+                    };
+
+                    audio_samples.extend_from_slice(samples_slice);
+                }
+            }
+        }
+
+        // Flush audio decoder
+        audio_decoder.send_eof().map_err(|e| e.to_string())?;
+        let mut audio_frame = ffmpeg::util::frame::Audio::empty();
+        while audio_decoder.receive_frame(&mut audio_frame).is_ok() {
+            let format = audio_frame.format();
+            let frame_channels = audio_frame.channels() as usize;
+
+            let mut resampler = ffmpeg::software::resampling::context::Context::get(
+                format,
+                audio_frame.channel_layout(),
+                sample_rate,
+                ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed),
+                audio_frame.channel_layout(),
+                sample_rate,
+            ).map_err(|e| e.to_string())?;
+
+            let mut resampled_frame = ffmpeg::util::frame::Audio::empty();
+            resampler.run(&audio_frame, &mut resampled_frame)
+                .map_err(|e| e.to_string())?;
+
+            let data_ptr = resampled_frame.data(0).as_ptr() as *const f32;
+            let total_samples = resampled_frame.samples() * frame_channels;
+            let samples_slice = unsafe {
+                std::slice::from_raw_parts(data_ptr, total_samples)
+            };
+
+            audio_samples.extend_from_slice(samples_slice);
+        }
+
+        // Calculate audio duration
+        let total_samples_per_channel = audio_samples.len() / channels as usize;
+        let audio_duration = total_samples_per_channel as f64 / sample_rate as f64;
+
+        // Generate waveform
+        let target_peaks = ((audio_duration * 300.0) as usize).clamp(1000, 20000);
+        let waveform = generate_waveform(&audio_samples, channels, target_peaks);
+
+        // Send audio to DAW backend
+        let mut audio_state_guard = audio_state.lock().unwrap();
+        let audio_pool_index = audio_state_guard.next_pool_index;
+        audio_state_guard.next_pool_index += 1;
+
+        if let Some(controller) = &mut audio_state_guard.controller {
+            controller.add_audio_file(
+                path.clone(),
+                audio_samples,
+                channels,
+                sample_rate,
+            );
+        }
+        drop(audio_state_guard);
+
+        (Some(audio_pool_index), Some(audio_duration), Some(sample_rate), Some(channels), Some(waveform))
+    } else {
+        (None, None, None, None, None)
+    };
+
+    // Create video decoder with max dimensions for playback (800x600)
+    // This scales down high-res videos to reduce data transfer
+    let mut video_state_guard = video_state.lock().unwrap();
+    let pool_index = video_state_guard.next_pool_index;
+    video_state_guard.next_pool_index += 1;
+
+    let decoder = VideoDecoder::new(path, video_state_guard.cache_size, Some(800), Some(600))?;
+
+    let metadata = VideoFileMetadata {
+        pool_index,
+        width: decoder.output_width,  // Return scaled dimensions to JS
+        height: decoder.output_height,
+        fps: decoder.fps,
+        duration: decoder.duration,
+        has_audio,
+        audio_pool_index,
+        audio_duration,
+        audio_sample_rate,
+        audio_channels,
+        audio_waveform,
+    };
+
+    video_state_guard.pool.push(Arc::new(Mutex::new(decoder)));
+
+    Ok(metadata)
+}
+
+fn generate_waveform(audio_data: &[f32], channels: u32, target_peaks: usize) -> Vec<WaveformPeak> {
+    let total_samples = audio_data.len();
+    let samples_per_channel = total_samples / channels as usize;
+    let samples_per_peak = (samples_per_channel / target_peaks).max(1);
+
+    let mut waveform = Vec::new();
+
+    for peak_idx in 0..target_peaks {
+        let start_sample = peak_idx * samples_per_peak;
+        let end_sample = ((peak_idx + 1) * samples_per_peak).min(samples_per_channel);
+
+        if start_sample >= samples_per_channel {
+            break;
+        }
+
+        let mut min_val = 0.0f32;
+        let mut max_val = 0.0f32;
+
+        for sample_idx in start_sample..end_sample {
+            // Average across channels
+            let mut channel_sum = 0.0f32;
+            for ch in 0..channels as usize {
+                let idx = sample_idx * channels as usize + ch;
+                if idx < total_samples {
+                    channel_sum += audio_data[idx];
+                }
+            }
+            let avg_sample = channel_sum / channels as f32;
+
+            min_val = min_val.min(avg_sample);
+            max_val = max_val.max(avg_sample);
+        }
+
+        waveform.push(WaveformPeak {
+            min: min_val,
+            max: max_val,
+        });
+    }
+
+    waveform
+}
+
+// Use a custom serializer wrapper for efficient binary transfer
+#[derive(serde::Serialize)]
+struct BinaryFrame(#[serde(with = "serde_bytes")] Vec<u8>);
+
+#[tauri::command]
+pub async fn video_get_frame(
+    state: tauri::State<'_, Arc<Mutex<VideoState>>>,
+    pool_index: usize,
+    timestamp: f64,
+) -> Result<Vec<u8>, String> {
+    let video_state = state.lock().unwrap();
+
+    let decoder = video_state.pool.get(pool_index)
+        .ok_or("Invalid pool index")?
+        .clone();
+
+    drop(video_state);
+
+    let mut decoder = decoder.lock().unwrap();
+    decoder.get_frame(timestamp)
+}
+
+#[tauri::command]
+pub async fn video_set_cache_size(
+    state: tauri::State<'_, Arc<Mutex<VideoState>>>,
+    cache_size: usize,
+) -> Result<(), String> {
+    let mut video_state = state.lock().unwrap();
+    video_state.cache_size = cache_size;
+    Ok(())
+}
+
+#[tauri::command]
+pub async fn video_get_pool_info(
+    state: tauri::State<'_, Arc<Mutex<VideoState>>>,
+    pool_index: usize,
+) -> Result<(u32, u32, f64), String> {
+    let video_state = state.lock().unwrap();
+    let decoder = video_state.pool.get(pool_index)
+        .ok_or("Invalid pool index")?
+        .lock().unwrap();
+
+    Ok((
+        decoder.output_width,   // Return scaled dimensions
+        decoder.output_height,
+        decoder.fps
+    ))
+}
diff --git a/src/actions/index.js b/src/actions/index.js
index d1d7000..ab5f79d 100644
--- a/src/actions/index.js
+++ b/src/actions/index.js
@@ -12,7 +12,7 @@ import {
   Frame
 } from '../models/animation.js';
 import { GraphicsObject } from '../models/graphics-object.js';
-import { Layer, AudioTrack } from '../models/layer.js';
+import { VectorLayer, AudioTrack, VideoLayer } from '../models/layer.js';
 import {
   arraysAreEqual,
   lerp,
@@ -161,6 +161,7 @@ let redoStack = null;
 let updateMenu = null;
 let updateLayers = null;
 let updateUI = null;
+let updateVideoFrames = null;
 let updateInfopanel = null;
 let invoke = null;
 let config = null;
@@ -186,6 +187,7 @@ export function initializeActions(deps) {
   updateMenu = deps.updateMenu;
   updateLayers = deps.updateLayers;
   updateUI = deps.updateUI;
+  updateVideoFrames = deps.updateVideoFrames;
   updateInfopanel = deps.updateInfopanel;
   invoke = deps.invoke;
   config = deps.config;
@@ -587,6 +589,147 @@ export const actions = {
       }
     },
   },
+  addVideo: {
+    create: (filePath, object, videoname) => {
+      redoStack.length = 0;
+      let action = {
+        filePath: filePath,
+        videoname: videoname,
+        layeruuid: uuidv4(),
+        object: object.idx,
+      };
+      undoStack.push({ name: "addVideo", action: action });
+      actions.addVideo.execute(action);
+      updateMenu();
+    },
+    execute: async (action) => {
+      // Create new VideoLayer
+      let newVideoLayer = new VideoLayer(action.layeruuid, action.videoname);
+      let object = pointerList[action.object];
+
+      // Add layer to object
+      object.layers.push(newVideoLayer);
+
+      // Update UI
+      updateLayers();
+      if (context.timelineWidget) {
+        context.timelineWidget.requestRedraw();
+      }
+
+      // Load video asynchronously
+      try {
+        const metadata = await invoke('video_load_file', {
+          path: action.filePath
+        });
+
+        // Add clip to video layer
+        await newVideoLayer.addClip(
+          metadata.pool_index,
+          0, // startTime
+          metadata.duration,
+          0, // offset
+          action.videoname,
+          metadata.duration // sourceDuration
+        );
+
+        // If video has audio, create linked AudioTrack
+        if (metadata.has_audio && metadata.audio_pool_index !== null) {
+          const audioTrackUuid = uuidv4();
+          const audioTrackName = `${action.videoname} (Audio)`;
+          const newAudioTrack = new AudioTrack(audioTrackUuid, audioTrackName);
+
+          // Initialize track in backend
+          await newAudioTrack.initializeTrack();
+
+          // Add audio clip using the extracted audio
+          const audioClipId = newAudioTrack.clips.length;
+          await invoke('audio_add_clip', {
+            trackId: newAudioTrack.audioTrackId,
+            poolIndex: metadata.audio_pool_index,
+            startTime: 0,
+            duration: metadata.audio_duration,
+            offset: 0
+          });
+
+          const audioClip = {
+            clipId: audioClipId,
+            poolIndex: metadata.audio_pool_index,
+            name: audioTrackName,
+            startTime: 0,
+            duration: metadata.audio_duration,
+            offset: 0,
+            waveform: metadata.audio_waveform,
+            sourceDuration: metadata.audio_duration
+          };
+          newAudioTrack.clips.push(audioClip);
+
+          // Link the clips to each other
+          const videoClip = newVideoLayer.clips[0];  // The video clip we just added
+          if (videoClip) {
+            videoClip.linkedAudioClip = audioClip;
+            audioClip.linkedVideoClip = videoClip;
+          }
+
+          // Also keep track-level references for convenience
+          newVideoLayer.linkedAudioTrack = newAudioTrack;
+          newAudioTrack.linkedVideoLayer = newVideoLayer;
+
+          // Add audio track to object
+          object.audioTracks.push(newAudioTrack);
+
+          // Store reference for rollback
+          action.audioTrackUuid = audioTrackUuid;
+
+          console.log(`Video audio extracted: ${metadata.audio_duration}s, ${metadata.audio_sample_rate}Hz, ${metadata.audio_channels}ch`);
+        }
+
+        // Update UI with real clip data
+        updateLayers();
+        if (context.timelineWidget) {
+          context.timelineWidget.requestRedraw();
+        }
+
+        // Make this the active layer
+        if (context.activeObject) {
+          context.activeObject.activeLayer = newVideoLayer;
+          updateLayers();
+        }
+
+        // Fetch first frame
+        if (updateVideoFrames) {
+          await updateVideoFrames(context.activeObject.currentTime || 0);
+        }
+
+        // Trigger redraw to show the first frame
+        updateUI();
+
+        console.log(`Video loaded: ${action.videoname}, ${metadata.width}x${metadata.height}, ${metadata.duration}s`);
+      } catch (error) {
+        console.error('Failed to load video:', error);
+      }
+    },
+    rollback: (action) => {
+      let object = pointerList[action.object];
+      let layer = pointerList[action.layeruuid];
+      object.layers.splice(object.layers.indexOf(layer), 1);
+
+      // Remove linked audio track if it was created
+      if (action.audioTrackUuid) {
+        let audioTrack = pointerList[action.audioTrackUuid];
+        if (audioTrack) {
+          const index = object.audioTracks.indexOf(audioTrack);
+          if (index !== -1) {
+            object.audioTracks.splice(index, 1);
+          }
+        }
+      }
+
+      updateLayers();
+      if (context.timelineWidget) {
+        context.timelineWidget.requestRedraw();
+      }
+    },
+  },
   addMIDI: {
     create: (filePath, object, midiname) => {
       redoStack.length = 0;
@@ -832,8 +975,8 @@ export const actions = {
     },
     execute: (action) => {
       let object = pointerList[action.object];
-      let layer = new Layer(action.uuid);
-      layer.name = `Layer ${object.layers.length + 1}`;
+      let layer = new VectorLayer(action.uuid);
+      layer.name = `VectorLayer ${object.layers.length + 1}`;
       object.layers.push(layer);
       object.currentLayer = object.layers.indexOf(layer);
       updateLayers();
@@ -854,7 +997,7 @@ export const actions = {
       redoStack.length = 0;
       // Don't allow deleting the only layer
       if (context.activeObject.layers.length == 1) return;
-      if (!(layer instanceof Layer)) {
+      if (!(layer instanceof VectorLayer)) {
         layer = context.activeObject.activeLayer;
       }
       let action = {
@@ -929,8 +1072,8 @@ export const actions = {
           let object = GraphicsObject.fromJSON(action.object);
           activeObject.addObject(object);
           break;
-        case "Layer":
-          let layer = Layer.fromJSON(action.object);
+        case "VectorLayer":
+          let layer = VectorLayer.fromJSON(action.object);
           activeObject.addLayer(layer);
       }
       updateUI();
@@ -943,7 +1086,7 @@ export const actions = {
           let object = pointerList[action.object.idx];
           activeObject.removeChild(object);
           break;
-        case "Layer":
+        case "VectorLayer":
           let layer = pointerList[action.object.idx];
           activeObject.removeLayer(layer);
       }
diff --git a/src/main.js b/src/main.js
index f87a3d9..e47be2d 100644
--- a/src/main.js
+++ b/src/main.js
@@ -95,6 +95,7 @@ import {
 import {
   VectorLayer,
   AudioTrack,
+  VideoLayer,
   initializeLayerDependencies
 } from "./models/layer.js";
 import {
@@ -135,6 +136,7 @@ const { getVersion } = window.__TAURI__.app;
 // Supported file extensions
 const imageExtensions = ["png", "gif", "avif", "jpg", "jpeg"];
 const audioExtensions = ["mp3", "wav", "aiff", "ogg", "flac"];
+const videoExtensions = ["mp4", "mov", "avi", "mkv", "webm", "m4v"];
 const midiExtensions = ["mid", "midi"];
 const beamExtensions = ["beam"];
 
@@ -343,6 +345,7 @@ let mouseEvent;
 window.context = context;
 window.actions = actions;
 window.addKeyframeAtPlayhead = addKeyframeAtPlayhead;
+window.updateVideoFrames = null; // Will be set after function is defined
 
 function uuidv4() {
   return "10000000-1000-4000-8000-100000000000".replace(/[018]/g, (c) =>
@@ -1009,6 +1012,38 @@ function playbackLoop() {
   }
 }
 
+// Update video frames for all VideoLayers in the scene
+async function updateVideoFrames(currentTime) {
+  // Recursively find all VideoLayers in the scene
+  function findVideoLayers(obj) {
+    const videoLayers = [];
+    if (obj.layers) {
+      for (let layer of obj.layers) {
+        if (layer.type === 'video') {
+          videoLayers.push(layer);
+        }
+      }
+    }
+    // Recursively check children (GraphicsObjects can contain other GraphicsObjects)
+    if (obj.children) {
+      for (let child of obj.children) {
+        videoLayers.push(...findVideoLayers(child));
+      }
+    }
+    return videoLayers;
+  }
+
+  const videoLayers = findVideoLayers(context.activeObject);
+
+  // Update all video layers in parallel
+  await Promise.all(videoLayers.map(layer => layer.updateFrame(currentTime)));
+
+  // Note: No updateUI() call here - renderUI() will draw after awaiting this function
+}
+
+// Expose updateVideoFrames globally
+window.updateVideoFrames = updateVideoFrames;
+
 // Single-step forward by one frame/second
 function advance() {
   if (context.timelineWidget?.timelineState?.timeFormat === "frames") {
@@ -1025,6 +1060,9 @@ function advance() {
   // Sync DAW backend
   invoke('audio_seek', { seconds: context.activeObject.currentTime });
 
+  // Update video frames
+  updateVideoFrames(context.activeObject.currentTime);
+
   updateLayers();
   updateMenu();
   updateUI();
@@ -1108,6 +1146,10 @@ async function handleAudioEvent(event) {
         if (context.timelineWidget?.timelineState) {
           context.timelineWidget.timelineState.currentTime = quantizedTime;
         }
+
+        // Update video frames
+        updateVideoFrames(quantizedTime);
+
         // Update time display
         if (context.updateTimeDisplay) {
           context.updateTimeDisplay();
@@ -2329,6 +2371,10 @@ async function importFile() {
       name: "Audio files",
       extensions: audioExtensions,
     },
+    {
+      name: "Video files",
+      extensions: videoExtensions,
+    },
     {
       name: "MIDI files",
       extensions: midiExtensions,
@@ -2384,10 +2430,12 @@ async function importFile() {
     let usedFilterIndex = 0;
     if (audioExtensions.includes(ext)) {
       usedFilterIndex = 1; // Audio
+    } else if (videoExtensions.includes(ext)) {
+      usedFilterIndex = 2; // Video
     } else if (midiExtensions.includes(ext)) {
-      usedFilterIndex = 2; // MIDI
+      usedFilterIndex = 3; // MIDI
     } else if (beamExtensions.includes(ext)) {
-      usedFilterIndex = 3; // Lightningbeam
+      usedFilterIndex = 4; // Lightningbeam
     } else {
       usedFilterIndex = 0; // Image (default)
     }
@@ -2454,6 +2502,9 @@ async function importFile() {
     } else if (audioExtensions.includes(ext)) {
       // Handle audio files - pass file path directly to backend
       actions.addAudio.create(path, context.activeObject, filename);
+    } else if (videoExtensions.includes(ext)) {
+      // Handle video files
+      actions.addVideo.create(path, context.activeObject, filename);
     } else if (midiExtensions.includes(ext)) {
       // Handle MIDI files
       actions.addMIDI.create(path, context.activeObject, filename);
@@ -4804,6 +4855,12 @@ function timelineV2() {
     timelineWidget.lastDragEvent = e;
 
     timelineWidget.handleMouseEvent("mousemove", x, y);
+
+    // Update cursor based on widget's cursor property
+    if (timelineWidget.cursor) {
+      canvas.style.cursor = timelineWidget.cursor;
+    }
+
     updateCanvasSize(); // Redraw after interaction
   });
 
@@ -5520,7 +5577,12 @@ function updateUI() {
 context.updateUI = updateUI;
 context.updateMenu = updateMenu;
 
-function renderUI() {
+async function renderUI() {
+  // Update video frames BEFORE drawing
+  if (context.activeObject) {
+    await updateVideoFrames(context.activeObject.currentTime);
+  }
+
   for (let canvas of canvases) {
     let ctx = canvas.getContext("2d");
     ctx.resetTransform();
@@ -6548,6 +6610,11 @@ async function renderMenu() {
         action: actions.addLayer.create,
         accelerator: getShortcut("addLayer"),
       },
+      {
+        text: "Add Video Layer",
+        enabled: true,
+        action: addVideoLayer,
+      },
       {
         text: "Add Audio Track",
         enabled: true,
@@ -10787,10 +10854,33 @@ function getMimeType(filePath) {
 }
 
 
-function renderAll() {
+let renderInProgress = false;
+let rafScheduled = false;
+
+// FPS tracking
+let lastFpsLogTime = 0;
+let frameCount = 0;
+let fpsHistory = [];
+
+async function renderAll() {
+  rafScheduled = false;
+
+  // Skip if a render is already in progress (prevent stacking async calls)
+  if (renderInProgress) {
+    // Schedule another attempt if not already scheduled
+    if (!rafScheduled) {
+      rafScheduled = true;
+      requestAnimationFrame(renderAll);
+    }
+    return;
+  }
+
+  renderInProgress = true;
+  const renderStartTime = performance.now();
+
   try {
     if (uiDirty) {
-      renderUI();
+      await renderUI();
       uiDirty = false;
     }
     if (layersDirty) {
@@ -10823,7 +10913,33 @@ function renderAll() {
       repeatCount = 2;
     }
   } finally {
-    requestAnimationFrame(renderAll);
+    renderInProgress = false;
+
+    // FPS logging (only when playing)
+    if (playing) {
+      frameCount++;
+      const now = performance.now();
+      const renderTime = now - renderStartTime;
+
+      if (now - lastFpsLogTime >= 1000) {
+        const fps = frameCount / ((now - lastFpsLogTime) / 1000);
+        fpsHistory.push({ fps, renderTime });
+        console.log(`[FPS] ${fps.toFixed(1)} fps | Render time: ${renderTime.toFixed(1)}ms`);
+        frameCount = 0;
+        lastFpsLogTime = now;
+
+        // Keep only last 10 samples
+        if (fpsHistory.length > 10) {
+          fpsHistory.shift();
+        }
+      }
+    }
+
+    // Schedule next frame if not already scheduled
+    if (!rafScheduled) {
+      rafScheduled = true;
+      requestAnimationFrame(renderAll);
+    }
   }
 }
 
@@ -10834,6 +10950,7 @@ initializeActions({
   updateMenu,
   updateLayers,
   updateUI,
+  updateVideoFrames,
   updateInfopanel,
   invoke,
   config
@@ -10923,6 +11040,33 @@ async function addEmptyMIDITrack() {
   }
 }
 
+async function addVideoLayer() {
+  console.log('[addVideoLayer] Creating new video layer');
+  const layerName = `Video ${context.activeObject.layers.filter(l => l.type === 'video').length + 1}`;
+  const layerUuid = uuidv4();
+
+  try {
+    // Create new VideoLayer
+    const newVideoLayer = new VideoLayer(layerUuid, layerName);
+
+    // Add layer to active object
+    context.activeObject.layers.push(newVideoLayer);
+
+    // Select the newly created layer
+    context.activeObject.activeLayer = newVideoLayer;
+
+    // Update UI
+    updateLayers();
+    if (context.timelineWidget) {
+      context.timelineWidget.requestRedraw();
+    }
+
+    console.log('Empty video layer created:', layerName);
+  } catch (error) {
+    console.error('Failed to create video layer:', error);
+  }
+}
+
 // MIDI Command Wrappers
 // Note: getAvailableInstruments() removed - now using node-based instruments
 
diff --git a/src/models/graphics-object.js b/src/models/graphics-object.js
index 07adc4d..f17538a 100644
--- a/src/models/graphics-object.js
+++ b/src/models/graphics-object.js
@@ -1,7 +1,7 @@
 // GraphicsObject model: Main container for layers and animation
 
 import { context, config, pointerList, startProps } from '../state.js';
-import { VectorLayer, AudioTrack } from './layer.js';
+import { VectorLayer, AudioTrack, VideoLayer } from './layer.js';
 import { TempShape } from './shapes.js';
 import { AnimationCurve, Keyframe } from './animation.js';
 import { Widget } from '../widgets.js';
@@ -45,8 +45,20 @@ class GraphicsObject extends Widget {
     this.name = this.idx;
 
     this.currentFrameNum = 0; // LEGACY: kept for backwards compatibility
-    this.currentTime = 0; // New: continuous time for AnimationData curves
+    this._currentTime = 0; // Internal storage for currentTime
     this.currentLayer = 0;
+
+    // Make currentTime a getter/setter property
+    Object.defineProperty(this, 'currentTime', {
+      get: function() {
+        return this._currentTime;
+      },
+      set: function(value) {
+        this._currentTime = value;
+      },
+      enumerable: true,
+      configurable: true
+    });
     this._activeAudioTrack = null; // Reference to active audio track (if any)
 
     // Initialize children and audioTracks based on initialChildType
@@ -56,6 +68,9 @@ class GraphicsObject extends Widget {
     if (initialChildType === 'layer') {
       this.children = [new VectorLayer(uuid + "-L1", this)];
       this.currentLayer = 0;  // Set first layer as active
+    } else if (initialChildType === 'video') {
+      this.children = [new VideoLayer(uuid + "-V1", "Video 1")];
+      this.currentLayer = 0;  // Set first video layer as active
     } else if (initialChildType === 'midi') {
       const midiTrack = new AudioTrack(uuid + "-M1", "MIDI 1", 'midi');
       this.audioTracks.push(midiTrack);
@@ -103,7 +118,12 @@ class GraphicsObject extends Widget {
       graphicsObject.parent = pointerList[json.parent]
     }
     for (let layer of json.layers) {
-      graphicsObject.layers.push(VectorLayer.fromJSON(layer, graphicsObject));
+      if (layer.type === 'VideoLayer') {
+        graphicsObject.layers.push(VideoLayer.fromJSON(layer));
+      } else {
+        // Default to VectorLayer
+        graphicsObject.layers.push(VectorLayer.fromJSON(layer, graphicsObject));
+      }
     }
     // Handle audioTracks (may not exist in older files)
     if (json.audioTracks) {
@@ -177,9 +197,20 @@ class GraphicsObject extends Widget {
 
     // Check visual layers
     for (let layer of this.layers) {
+      // Check animation data duration
       if (layer.animationData && layer.animationData.duration > maxDuration) {
         maxDuration = layer.animationData.duration;
       }
+
+      // Check video layer clips (VideoLayer has clips like AudioTrack)
+      if (layer.type === 'video' && layer.clips) {
+        for (let clip of layer.clips) {
+          const clipEnd = clip.startTime + clip.duration;
+          if (clipEnd > maxDuration) {
+            maxDuration = clipEnd;
+          }
+        }
+      }
     }
 
     // Check audio tracks
@@ -300,6 +331,12 @@ class GraphicsObject extends Widget {
     for (let layer of this.layers) {
       if (context.activeObject == this && !layer.visible) continue;
 
+      // Handle VideoLayer differently - call its draw method
+      if (layer.type === 'video') {
+        layer.draw(context);
+        continue;
+      }
+
       // Draw activeShape (shape being drawn in progress) for active layer only
       if (layer === context.activeLayer && layer.activeShape) {
         let cxt = {...context};
diff --git a/src/models/layer.js b/src/models/layer.js
index 55a66de..b6a9ef3 100644
--- a/src/models/layer.js
+++ b/src/models/layer.js
@@ -1245,4 +1245,269 @@ class AudioTrack {
   }
 }
 
-export { VectorLayer, AudioTrack };
+class VideoLayer extends Widget {
+  constructor(uuid, name) {
+    super(0, 0);
+    if (!uuid) {
+      this.idx = uuidv4();
+    } else {
+      this.idx = uuid;
+    }
+    this.name = name || "Video";
+    this.type = 'video';
+    this.visible = true;
+    this.audible = true;
+    this.animationData = new AnimationData(this);
+
+    // Empty arrays for layer compatibility
+    Object.defineProperty(this, 'shapes', {
+      value: Object.freeze([]),
+      writable: false,
+      enumerable: true,
+      configurable: false
+    });
+    Object.defineProperty(this, 'children', {
+      value: Object.freeze([]),
+      writable: false,
+      enumerable: true,
+      configurable: false
+    });
+
+    // Video clips on this layer
+    // { clipId, poolIndex, name, startTime, duration, offset, width, height }
+    this.clips = [];
+
+    // Associated audio track (if video has audio)
+    this.linkedAudioTrack = null;  // Reference to AudioTrack
+
+    // Timeline display
+    this.collapsed = false;
+    this.curvesMode = 'segment';
+    this.curvesHeight = 150;
+
+    pointerList[this.idx] = this;
+  }
+
+  async addClip(poolIndex, startTime, duration, offset = 0.0, name = '', sourceDuration = null) {
+    const poolInfo = await invoke('video_get_pool_info', { poolIndex });
+    // poolInfo is [width, height, fps] tuple from Rust
+    const [width, height, fps] = poolInfo;
+
+    this.clips.push({
+      clipId: this.clips.length,
+      poolIndex,
+      name: name || `Video ${this.clips.length + 1}`,
+      startTime,
+      duration,
+      offset,
+      width,
+      height,
+      sourceDuration: sourceDuration || duration  // Store original file duration
+    });
+
+    console.log(`Video clip added: ${name}, ${width}x${height}, duration: ${duration}s`);
+  }
+
+  // Pre-fetch frames for current time (call before draw)
+  async updateFrame(currentTime) {
+    // Prevent concurrent calls - if already updating, skip
+    if (this.updateInProgress) {
+      return;
+    }
+    this.updateInProgress = true;
+
+    try {
+      for (let clip of this.clips) {
+        // Check if clip is active at current time
+        if (currentTime < clip.startTime ||
+            currentTime >= clip.startTime + clip.duration) {
+          clip.currentFrame = null;
+          continue;
+        }
+
+        // Calculate video timestamp from clip time
+        const clipTime = currentTime - clip.startTime;
+        const videoTimestamp = clip.offset + clipTime;
+
+        // Only fetch if timestamp changed
+        if (clip.lastFetchedTimestamp === videoTimestamp && clip.currentFrame) {
+          continue;
+        }
+
+        // Skip if already fetching this frame
+        if (clip.fetchInProgress === videoTimestamp) {
+          continue;
+        }
+
+        clip.fetchInProgress = videoTimestamp;
+        clip.lastFetchedTimestamp = videoTimestamp;
+
+        try {
+          // Request frame from Rust backend
+          const t_start = performance.now();
+          let frameData = await invoke('video_get_frame', {
+            poolIndex: clip.poolIndex,
+            timestamp: videoTimestamp
+          });
+          const t_after_ipc = performance.now();
+
+          // Handle different formats that Tauri might return
+          // ByteBuf from Rust can come as Uint8Array or Array depending on serialization
+          if (!(frameData instanceof Uint8Array)) {
+            frameData = new Uint8Array(frameData);
+          }
+
+          // Validate frame data size
+          const expectedSize = clip.width * clip.height * 4; // RGBA = 4 bytes per pixel
+
+          if (frameData.length !== expectedSize) {
+            throw new Error(`Invalid frame data size: got ${frameData.length}, expected ${expectedSize}`);
+          }
+
+          // Convert to ImageData
+          const t_before_conversion = performance.now();
+          const imageData = new ImageData(
+            new Uint8ClampedArray(frameData),
+            clip.width,
+            clip.height
+          );
+          const t_after_conversion = performance.now();
+
+          // Create or reuse temp canvas
+          if (!clip.frameCanvas) {
+            clip.frameCanvas = document.createElement('canvas');
+            clip.frameCanvas.width = clip.width;
+            clip.frameCanvas.height = clip.height;
+          }
+
+          const tempCtx = clip.frameCanvas.getContext('2d');
+          const t_before_putimage = performance.now();
+          tempCtx.putImageData(imageData, 0, 0);
+          const t_after_putimage = performance.now();
+
+          clip.currentFrame = clip.frameCanvas;
+
+          // Log detailed timing breakdown
+          const total_time = t_after_putimage - t_start;
+          const ipc_time = t_after_ipc - t_start;
+          const conversion_time = t_after_conversion - t_before_conversion;
+          const putimage_time = t_after_putimage - t_before_putimage;
+
+          console.log(`[JS Video Timing] ts=${videoTimestamp.toFixed(3)}s | Total: ${total_time.toFixed(1)}ms | IPC: ${ipc_time.toFixed(1)}ms (${(ipc_time/total_time*100).toFixed(0)}%) | Convert: ${conversion_time.toFixed(1)}ms | PutImage: ${putimage_time.toFixed(1)}ms | Size: ${(frameData.length/1024/1024).toFixed(2)}MB`);
+        } catch (error) {
+          console.error('Failed to get video frame:', error);
+          clip.currentFrame = null;
+        } finally {
+          clip.fetchInProgress = null;
+        }
+      }
+    } finally {
+      this.updateInProgress = false;
+    }
+  }
+
+  // Draw cached frames (synchronous)
+  draw(cxt, currentTime) {
+    if (!this.visible) {
+      return;
+    }
+
+    const ctx = cxt.ctx || cxt;
+
+    // Use currentTime from context if not provided
+    if (currentTime === undefined) {
+      currentTime = cxt.activeObject?.currentTime || 0;
+    }
+
+    for (let clip of this.clips) {
+      // Check if clip is active at current time
+      if (currentTime < clip.startTime ||
+          currentTime >= clip.startTime + clip.duration) {
+        continue;
+      }
+
+      // Draw cached frame if available
+      if (clip.currentFrame) {
+        try {
+          // Scale to fit canvas while maintaining aspect ratio
+          const canvasWidth = config.fileWidth;
+          const canvasHeight = config.fileHeight;
+          const scale = Math.min(
+            canvasWidth / clip.width,
+            canvasHeight / clip.height
+          );
+          const scaledWidth = clip.width * scale;
+          const scaledHeight = clip.height * scale;
+          const x = (canvasWidth - scaledWidth) / 2;
+          const y = (canvasHeight - scaledHeight) / 2;
+
+          ctx.drawImage(clip.currentFrame, x, y, scaledWidth, scaledHeight);
+        } catch (error) {
+          console.error('Failed to draw video frame:', error);
+        }
+      } else {
+        // Draw placeholder if frame not loaded yet
+        ctx.save();
+        ctx.fillStyle = '#333333';
+        ctx.fillRect(0, 0, config.fileWidth, config.fileHeight);
+        ctx.fillStyle = '#ffffff';
+        ctx.font = '24px sans-serif';
+        ctx.textAlign = 'center';
+        ctx.textBaseline = 'middle';
+        ctx.fillText('Loading...', config.fileWidth / 2, config.fileHeight / 2);
+        ctx.restore();
+      }
+    }
+  }
+
+  static fromJSON(json) {
+    const videoLayer = new VideoLayer(json.idx, json.name);
+
+    if (json.animationData) {
+      videoLayer.animationData = AnimationData.fromJSON(json.animationData, videoLayer);
+    }
+
+    if (json.clips) {
+      videoLayer.clips = json.clips;
+    }
+
+    if (json.linkedAudioTrack) {
+      // Will be resolved after all objects are loaded
+      videoLayer.linkedAudioTrack = json.linkedAudioTrack;
+    }
+
+    videoLayer.visible = json.visible;
+    videoLayer.audible = json.audible;
+
+    return videoLayer;
+  }
+
+  toJSON(randomizeUuid = false) {
+    return {
+      type: "VideoLayer",
+      idx: randomizeUuid ? uuidv4() : this.idx,
+      name: randomizeUuid ? this.name + " copy" : this.name,
+      visible: this.visible,
+      audible: this.audible,
+      animationData: this.animationData.toJSON(),
+      clips: this.clips,
+      linkedAudioTrack: this.linkedAudioTrack?.idx
+    };
+  }
+
+  copy(idx) {
+    const json = this.toJSON(true);
+    json.idx = idx.slice(0, 8) + this.idx.slice(8);
+    return VideoLayer.fromJSON(json);
+  }
+
+  // Compatibility methods for layer interface
+  bbox() {
+    return {
+      x: { min: 0, max: config.fileWidth },
+      y: { min: 0, max: config.fileHeight }
+    };
+  }
+}
+
+export { VectorLayer, AudioTrack, VideoLayer };
diff --git a/src/state.js b/src/state.js
index f287cc2..674f842 100644
--- a/src/state.js
+++ b/src/state.js
@@ -85,6 +85,7 @@ export let config = {
   reopenLastSession: false,
   lastImportFilterIndex: 0,  // Index of last used filter in import dialog (0=Image, 1=Audio, 2=Lightningbeam)
   audioBufferSize: 256,  // Audio buffer size in frames (128, 256, 512, 1024, etc. - requires restart)
+  minClipDuration: 0.1,  // Minimum clip duration in seconds when trimming
   // Layout settings
   currentLayout: "animation",  // Current active layout key
   defaultLayout: "animation",  // Default layout for new files
diff --git a/src/timeline.js b/src/timeline.js
index 1b81101..81eae63 100644
--- a/src/timeline.js
+++ b/src/timeline.js
@@ -571,9 +571,12 @@ class TrackHierarchy {
 
     // Iterate through layers (GraphicsObject.children are Layers)
     for (let layer of graphicsObject.children) {
+      // Determine layer type - check if it's a VideoLayer
+      const layerType = layer.type === 'video' ? 'video' : 'layer'
+
       // Add layer track
       const layerTrack = {
-        type: 'layer',
+        type: layerType,
         object: layer,
         name: layer.name || 'Layer',
         indent: 0,
diff --git a/src/widgets.js b/src/widgets.js
index ae2afeb..5640a4e 100644
--- a/src/widgets.js
+++ b/src/widgets.js
@@ -947,6 +947,112 @@ class TimelineWindowV2 extends Widget {
   /**
    * Draw track backgrounds in timeline area (Phase 2)
    */
+  // Create a cached pattern for the timeline grid
+  createTimelinePattern(trackHeight) {
+    const cacheKey = `${this.timelineState.timeFormat}_${this.timelineState.pixelsPerSecond}_${this.timelineState.framerate}_${this.timelineState.bpm}_${trackHeight}`
+
+    // Return cached pattern if available
+    if (this.cachedPattern && this.cachedPatternKey === cacheKey) {
+      return this.cachedPattern
+    }
+
+    let patternWidth, patternHeight = trackHeight
+
+    if (this.timelineState.timeFormat === 'frames') {
+      // Pattern for 5 frames
+      const frameDuration = 1 / this.timelineState.framerate
+      const frameWidth = frameDuration * this.timelineState.pixelsPerSecond
+      patternWidth = frameWidth * 5
+    } else if (this.timelineState.timeFormat === 'measures') {
+      // Pattern for one measure
+      const beatsPerSecond = this.timelineState.bpm / 60
+      const beatsPerMeasure = this.timelineState.timeSignature.numerator
+      const beatWidth = (1 / beatsPerSecond) * this.timelineState.pixelsPerSecond
+      patternWidth = beatWidth * beatsPerMeasure
+    } else {
+      // Pattern for seconds - use 10 second intervals
+      patternWidth = this.timelineState.pixelsPerSecond * 10
+    }
+
+    // Create pattern canvas
+    const patternCanvas = document.createElement('canvas')
+    patternCanvas.width = Math.ceil(patternWidth)
+    patternCanvas.height = patternHeight
+    const pctx = patternCanvas.getContext('2d')
+
+    // Fill background
+    pctx.fillStyle = shade
+    pctx.fillRect(0, 0, patternWidth, patternHeight)
+
+    if (this.timelineState.timeFormat === 'frames') {
+      const frameDuration = 1 / this.timelineState.framerate
+      const frameWidth = frameDuration * this.timelineState.pixelsPerSecond
+
+      for (let i = 0; i < 5; i++) {
+        const x = i * frameWidth
+        if (i === 0) {
+          // First frame in pattern (every 5th): shade it
+          pctx.fillStyle = shadow
+          pctx.fillRect(x, 0, frameWidth, patternHeight)
+        } else {
+          // Regular frame: draw edge line
+          pctx.strokeStyle = shadow
+          pctx.lineWidth = 1
+          pctx.beginPath()
+          pctx.moveTo(x, 0)
+          pctx.lineTo(x, patternHeight)
+          pctx.stroke()
+        }
+      }
+    } else if (this.timelineState.timeFormat === 'measures') {
+      const beatsPerSecond = this.timelineState.bpm / 60
+      const beatsPerMeasure = this.timelineState.timeSignature.numerator
+      const beatWidth = (1 / beatsPerSecond) * this.timelineState.pixelsPerSecond
+
+      for (let i = 0; i < beatsPerMeasure; i++) {
+        const x = i * beatWidth
+        const isMeasureBoundary = i === 0
+        const isEvenBeat = (i % 2) === 0
+
+        pctx.save()
+        if (isMeasureBoundary) {
+          pctx.globalAlpha = 1.0
+        } else if (isEvenBeat) {
+          pctx.globalAlpha = 0.5
+        } else {
+          pctx.globalAlpha = 0.25
+        }
+
+        pctx.strokeStyle = shadow
+        pctx.lineWidth = 1
+        pctx.beginPath()
+        pctx.moveTo(x, 0)
+        pctx.lineTo(x, patternHeight)
+        pctx.stroke()
+        pctx.restore()
+      }
+    } else {
+      // Seconds mode: draw lines every second for 10 seconds
+      const secondWidth = this.timelineState.pixelsPerSecond
+
+      for (let i = 0; i < 10; i++) {
+        const x = i * secondWidth
+        pctx.strokeStyle = shadow
+        pctx.lineWidth = 1
+        pctx.beginPath()
+        pctx.moveTo(x, 0)
+        pctx.lineTo(x, patternHeight)
+        pctx.stroke()
+      }
+    }
+
+    // Cache the pattern
+    this.cachedPatternKey = cacheKey
+    this.cachedPattern = pctx.createPattern(patternCanvas, 'repeat')
+
+    return this.cachedPattern
+  }
+
   drawTracks(ctx) {
     ctx.save()
     ctx.translate(this.trackHeaderWidth, this.ruler.height)  // Start after headers, below ruler
@@ -966,96 +1072,18 @@ class TimelineWindowV2 extends Widget {
       const y = this.trackHierarchy.getTrackY(i)
       const trackHeight = this.trackHierarchy.getTrackHeight(track)
 
-      // Draw track background (same color for all tracks)
-      ctx.fillStyle = shade
-      ctx.fillRect(0, y, trackAreaWidth, trackHeight)
+      // Create and apply pattern for this track
+      const pattern = this.createTimelinePattern(trackHeight)
 
-      // Draw interval markings
+      // Calculate pattern offset based on viewport start time
       const visibleStartTime = this.timelineState.viewportStartTime
-      const visibleEndTime = visibleStartTime + (trackAreaWidth / this.timelineState.pixelsPerSecond)
+      const patternOffsetX = -this.timelineState.timeToPixel(visibleStartTime)
 
-      if (this.timelineState.timeFormat === 'frames') {
-        // Frames mode: mark every frame edge, with every 5th frame shaded
-        const frameDuration = 1 / this.timelineState.framerate
-        const startFrame = Math.floor(visibleStartTime / frameDuration)
-        const endFrame = Math.ceil(visibleEndTime / frameDuration)
-
-        for (let frame = startFrame; frame <= endFrame; frame++) {
-          const time = frame * frameDuration
-          const x = this.timelineState.timeToPixel(time)
-          const nextX = this.timelineState.timeToPixel((frame + 1) * frameDuration)
-
-          if (x >= 0 && x <= trackAreaWidth) {
-            if (frame % 5 === 0) {
-              // Every 5th frame: shade the entire frame width
-              ctx.fillStyle = shadow
-              ctx.fillRect(x, y, nextX - x, trackHeight)
-            } else {
-              // Regular frame: draw edge line
-              ctx.strokeStyle = shadow
-              ctx.lineWidth = 1
-              ctx.beginPath()
-              ctx.moveTo(x, y)
-              ctx.lineTo(x, y + trackHeight)
-              ctx.stroke()
-            }
-          }
-        }
-      } else if (this.timelineState.timeFormat === 'measures') {
-        // Measures mode: draw beats with varying opacity
-        const beatsPerSecond = this.timelineState.bpm / 60
-        const beatsPerMeasure = this.timelineState.timeSignature.numerator
-        const startBeat = Math.floor(visibleStartTime * beatsPerSecond)
-        const endBeat = Math.ceil(visibleEndTime * beatsPerSecond)
-
-        for (let beat = startBeat; beat <= endBeat; beat++) {
-          const time = beat / beatsPerSecond
-          const x = this.timelineState.timeToPixel(time)
-
-          if (x >= 0 && x <= trackAreaWidth) {
-            // Determine position within the measure
-            const beatInMeasure = beat % beatsPerMeasure
-            const isMeasureBoundary = beatInMeasure === 0
-            const isEvenBeatInMeasure = (beatInMeasure % 2) === 0
-
-            // Set opacity based on position
-            ctx.save()
-            if (isMeasureBoundary) {
-              ctx.globalAlpha = 1.0  // Full opacity for measure boundaries
-            } else if (isEvenBeatInMeasure) {
-              ctx.globalAlpha = 0.5  // Half opacity for even beats
-            } else {
-              ctx.globalAlpha = 0.25  // Quarter opacity for odd beats
-            }
-
-            ctx.strokeStyle = shadow
-            ctx.lineWidth = 1
-            ctx.beginPath()
-            ctx.moveTo(x, y)
-            ctx.lineTo(x, y + trackHeight)
-            ctx.stroke()
-            ctx.restore()
-          }
-        }
-      } else {
-        // Seconds mode: mark every second edge
-        const startSecond = Math.floor(visibleStartTime)
-        const endSecond = Math.ceil(visibleEndTime)
-
-        ctx.strokeStyle = shadow
-        ctx.lineWidth = 1
-
-        for (let second = startSecond; second <= endSecond; second++) {
-          const x = this.timelineState.timeToPixel(second)
-
-          if (x >= 0 && x <= trackAreaWidth) {
-            ctx.beginPath()
-            ctx.moveTo(x, y)
-            ctx.lineTo(x, y + trackHeight)
-            ctx.stroke()
-          }
-        }
-      }
+      ctx.save()
+      ctx.translate(patternOffsetX, y)
+      ctx.fillStyle = pattern
+      ctx.fillRect(-patternOffsetX, 0, trackAreaWidth, trackHeight)
+      ctx.restore()
 
       // Draw track border
       ctx.strokeStyle = shadow
@@ -1425,19 +1453,25 @@ class TimelineWindowV2 extends Widget {
               const waveformHeight = trackHeight - 14  // Leave padding at top/bottom
               const waveformData = clip.waveform
 
-              // Calculate how many pixels each waveform peak represents
-              const pixelsPerPeak = clipWidth / waveformData.length
+              // Calculate the full source audio duration and pixels per peak based on that
+              const sourceDuration = clip.sourceDuration || clip.duration
+              const pixelsPerSecond = this.timelineState.pixelsPerSecond
+              const fullSourceWidth = sourceDuration * pixelsPerSecond
+              const pixelsPerPeak = fullSourceWidth / waveformData.length
 
-              // Calculate the range of visible peaks
-              const firstVisiblePeak = Math.max(0, Math.floor((visibleStart - startX) / pixelsPerPeak))
-              const lastVisiblePeak = Math.min(waveformData.length - 1, Math.ceil((visibleEnd - startX) / pixelsPerPeak))
+              // Calculate which peak corresponds to the clip's offset (trimmed left edge)
+              const offsetPeakIndex = Math.floor((clip.offset / sourceDuration) * waveformData.length)
+
+              // Calculate the range of visible peaks, accounting for offset
+              const firstVisiblePeak = Math.max(offsetPeakIndex, Math.floor((visibleStart - startX) / pixelsPerPeak) + offsetPeakIndex)
+              const lastVisiblePeak = Math.min(waveformData.length - 1, Math.ceil((visibleEnd - startX) / pixelsPerPeak) + offsetPeakIndex)
 
               // Draw waveform as a filled path
               ctx.beginPath()
 
               // Trace along the max values (left to right)
               for (let i = firstVisiblePeak; i <= lastVisiblePeak; i++) {
-                const peakX = startX + (i * pixelsPerPeak)
+                const peakX = startX + ((i - offsetPeakIndex) * pixelsPerPeak)
                 const peak = waveformData[i]
                 const maxY = centerY + (peak.max * waveformHeight * 0.5)
 
@@ -1450,7 +1484,7 @@ class TimelineWindowV2 extends Widget {
 
               // Trace back along the min values (right to left)
               for (let i = lastVisiblePeak; i >= firstVisiblePeak; i--) {
-                const peakX = startX + (i * pixelsPerPeak)
+                const peakX = startX + ((i - offsetPeakIndex) * pixelsPerPeak)
                 const peak = waveformData[i]
                 const minY = centerY + (peak.min * waveformHeight * 0.5)
                 ctx.lineTo(peakX, minY)
@@ -1462,6 +1496,58 @@ class TimelineWindowV2 extends Widget {
             }
           }
         }
+      } else if (track.type === 'video') {
+        // Draw video clips for VideoLayer
+        const videoLayer = track.object
+        const y = this.trackHierarchy.getTrackY(i)
+        const trackHeight = this.trackHierarchy.trackHeight  // Use base height for clips
+
+        // Draw each clip
+        for (let clip of videoLayer.clips) {
+          const startX = this.timelineState.timeToPixel(clip.startTime)
+          const endX = this.timelineState.timeToPixel(clip.startTime + clip.duration)
+          const clipWidth = endX - startX
+
+          // Video clips use purple/magenta color
+          const clipColor = '#9b59b6'  // Purple for video clips
+
+          // Draw clip rectangle
+          ctx.fillStyle = clipColor
+          ctx.fillRect(
+            startX,
+            y + 5,
+            clipWidth,
+            trackHeight - 10
+          )
+
+          // Draw border
+          ctx.strokeStyle = shadow
+          ctx.lineWidth = 1
+          ctx.strokeRect(
+            startX,
+            y + 5,
+            clipWidth,
+            trackHeight - 10
+          )
+
+          // Draw clip name if there's enough space
+          const minWidthForLabel = 40
+          if (clipWidth >= minWidthForLabel) {
+            ctx.fillStyle = labelColor
+            ctx.font = '11px sans-serif'
+            ctx.textAlign = 'left'
+            ctx.textBaseline = 'middle'
+
+            // Clip text to clip bounds
+            ctx.save()
+            ctx.beginPath()
+            ctx.rect(startX + 2, y + 5, clipWidth - 4, trackHeight - 10)
+            ctx.clip()
+
+            ctx.fillText(clip.name, startX + 4, y + trackHeight / 2)
+            ctx.restore()
+          }
+        }
       }
     }
 
@@ -2101,6 +2187,39 @@ class TimelineWindowV2 extends Widget {
           return true
         }
 
+        // Check if clicking on audio clip edge to start trimming
+        const audioEdgeInfo = this.getAudioClipEdgeAtPoint(track, adjustedX, adjustedY)
+        if (audioEdgeInfo) {
+          // Skip if right-clicking (button 2)
+          if (this.lastClickEvent?.button === 2) {
+            return false
+          }
+
+          // Select the track
+          this.selectTrack(track)
+
+          // Start audio clip edge dragging
+          this.draggingAudioClipEdge = {
+            track: track,
+            edge: audioEdgeInfo.edge,
+            clip: audioEdgeInfo.clip,
+            clipIndex: audioEdgeInfo.clipIndex,
+            audioTrack: audioEdgeInfo.audioTrack,
+            initialClipStart: audioEdgeInfo.clip.startTime,
+            initialClipDuration: audioEdgeInfo.clip.duration,
+            initialClipOffset: audioEdgeInfo.clip.offset,
+            initialLinkedVideoOffset: audioEdgeInfo.clip.linkedVideoClip?.offset || 0
+          }
+
+          // Enable global mouse events for dragging
+          this._globalEvents.add("mousemove")
+          this._globalEvents.add("mouseup")
+
+          console.log('Started dragging audio clip', audioEdgeInfo.edge, 'edge')
+          if (this.requestRedraw) this.requestRedraw()
+          return true
+        }
+
         // Check if clicking on audio clip to start dragging
         const audioClipInfo = this.getAudioClipAtPoint(track, adjustedX, adjustedY)
         if (audioClipInfo) {
@@ -2132,6 +2251,70 @@ class TimelineWindowV2 extends Widget {
           return true
         }
 
+        // Check if clicking on video clip edge to start trimming
+        const videoEdgeInfo = this.getVideoClipEdgeAtPoint(track, adjustedX, adjustedY)
+        if (videoEdgeInfo) {
+          // Skip if right-clicking (button 2)
+          if (this.lastClickEvent?.button === 2) {
+            return false
+          }
+
+          // Select the track
+          this.selectTrack(track)
+
+          // Start video clip edge dragging
+          this.draggingVideoClipEdge = {
+            track: track,
+            edge: videoEdgeInfo.edge,
+            clip: videoEdgeInfo.clip,
+            clipIndex: videoEdgeInfo.clipIndex,
+            videoLayer: videoEdgeInfo.videoLayer,
+            initialClipStart: videoEdgeInfo.clip.startTime,
+            initialClipDuration: videoEdgeInfo.clip.duration,
+            initialClipOffset: videoEdgeInfo.clip.offset,
+            initialLinkedAudioOffset: videoEdgeInfo.clip.linkedAudioClip?.offset || 0
+          }
+
+          // Enable global mouse events for dragging
+          this._globalEvents.add("mousemove")
+          this._globalEvents.add("mouseup")
+
+          console.log('Started dragging video clip', videoEdgeInfo.edge, 'edge')
+          if (this.requestRedraw) this.requestRedraw()
+          return true
+        }
+
+        // Check if clicking on video clip to start dragging
+        const videoClipInfo = this.getVideoClipAtPoint(track, adjustedX, adjustedY)
+        if (videoClipInfo) {
+          // Skip drag if right-clicking (button 2)
+          if (this.lastClickEvent?.button === 2) {
+            return false
+          }
+
+          // Select the track
+          this.selectTrack(track)
+
+          // Start video clip dragging
+          const clickTime = this.timelineState.pixelToTime(adjustedX)
+          this.draggingVideoClip = {
+            track: track,
+            clip: videoClipInfo.clip,
+            clipIndex: videoClipInfo.clipIndex,
+            videoLayer: videoClipInfo.videoLayer,
+            initialMouseTime: clickTime,
+            initialClipStartTime: videoClipInfo.clip.startTime
+          }
+
+          // Enable global mouse events for dragging
+          this._globalEvents.add("mousemove")
+          this._globalEvents.add("mouseup")
+
+          console.log('Started dragging video clip at time', videoClipInfo.clip.startTime)
+          if (this.requestRedraw) this.requestRedraw()
+          return true
+        }
+
         // Phase 6: Check if clicking on segment to start dragging
         const segmentInfo = this.getSegmentAtPoint(track, adjustedX, adjustedY)
         if (segmentInfo) {
@@ -2657,6 +2840,115 @@ class TimelineWindowV2 extends Widget {
     return null
   }
 
+  getAudioClipEdgeAtPoint(track, x, y) {
+    const clipInfo = this.getAudioClipAtPoint(track, x, y)
+    if (!clipInfo) return null
+
+    const clickTime = this.timelineState.pixelToTime(x)
+    const edgeThreshold = 8 / this.timelineState.pixelsPerSecond  // 8 pixels in time units
+
+    const clipStart = clipInfo.clip.startTime
+    const clipEnd = clipInfo.clip.startTime + clipInfo.clip.duration
+
+    // Check if near left edge
+    if (Math.abs(clickTime - clipStart) <= edgeThreshold) {
+      return {
+        edge: 'left',
+        clip: clipInfo.clip,
+        clipIndex: clipInfo.clipIndex,
+        audioTrack: clipInfo.audioTrack,
+        clipStart: clipStart,
+        clipEnd: clipEnd
+      }
+    }
+
+    // Check if near right edge
+    if (Math.abs(clickTime - clipEnd) <= edgeThreshold) {
+      return {
+        edge: 'right',
+        clip: clipInfo.clip,
+        clipIndex: clipInfo.clipIndex,
+        audioTrack: clipInfo.audioTrack,
+        clipStart: clipStart,
+        clipEnd: clipEnd
+      }
+    }
+
+    return null
+  }
+
+  getVideoClipAtPoint(track, x, y) {
+    if (track.type !== 'video') return null
+
+    const trackIndex = this.trackHierarchy.tracks.indexOf(track)
+    if (trackIndex === -1) return null
+
+    const trackY = this.trackHierarchy.getTrackY(trackIndex)
+    const trackHeight = this.trackHierarchy.trackHeight
+    const clipTop = trackY + 5
+    const clipBottom = trackY + trackHeight - 5
+
+    // Check if y is within clip bounds
+    if (y < clipTop || y > clipBottom) return null
+
+    const clickTime = this.timelineState.pixelToTime(x)
+    const videoLayer = track.object
+
+    // Check each clip
+    for (let i = 0; i < videoLayer.clips.length; i++) {
+      const clip = videoLayer.clips[i]
+      const clipStart = clip.startTime
+      const clipEnd = clip.startTime + clip.duration
+
+      if (clickTime >= clipStart && clickTime <= clipEnd) {
+        return {
+          clip: clip,
+          clipIndex: i,
+          videoLayer: videoLayer
+        }
+      }
+    }
+
+    return null
+  }
+
+  getVideoClipEdgeAtPoint(track, x, y) {
+    const clipInfo = this.getVideoClipAtPoint(track, x, y)
+    if (!clipInfo) return null
+
+    const clickTime = this.timelineState.pixelToTime(x)
+    const edgeThreshold = 8 / this.timelineState.pixelsPerSecond  // 8 pixels in time units
+
+    const clipStart = clipInfo.clip.startTime
+    const clipEnd = clipInfo.clip.startTime + clipInfo.clip.duration
+
+    // Check if near left edge
+    if (Math.abs(clickTime - clipStart) <= edgeThreshold) {
+      return {
+        edge: 'left',
+        clip: clipInfo.clip,
+        clipIndex: clipInfo.clipIndex,
+        videoLayer: clipInfo.videoLayer,
+        clipStart: clipStart,
+        clipEnd: clipEnd
+      }
+    }
+
+    // Check if near right edge
+    if (Math.abs(clickTime - clipEnd) <= edgeThreshold) {
+      return {
+        edge: 'right',
+        clip: clipInfo.clip,
+        clipIndex: clipInfo.clipIndex,
+        videoLayer: clipInfo.videoLayer,
+        clipStart: clipStart,
+        clipEnd: clipEnd
+      }
+    }
+
+    return null
+  }
+
   /**
    * Get segment edge at a point (Phase 6)
    * Returns {edge: 'left'|'right', startTime, endTime, keyframe, animationData, curveName} if near an edge
@@ -3496,6 +3788,54 @@ class TimelineWindowV2 extends Widget {
       return true
     }
 
+    // Handle audio clip edge dragging (trimming)
+    if (this.draggingAudioClipEdge) {
+      const adjustedX = x - this.trackHeaderWidth
+      const newTime = this.timelineState.pixelToTime(adjustedX)
+      const minClipDuration = this.context.config.minClipDuration
+
+      if (this.draggingAudioClipEdge.edge === 'left') {
+        // Dragging left edge - adjust startTime and offset
+        const initialEnd = this.draggingAudioClipEdge.initialClipStart + this.draggingAudioClipEdge.initialClipDuration
+        const maxStartTime = initialEnd - minClipDuration
+        const newStartTime = Math.max(0, Math.min(newTime, maxStartTime))
+        const startTimeDelta = newStartTime - this.draggingAudioClipEdge.initialClipStart
+
+        this.draggingAudioClipEdge.clip.startTime = newStartTime
+        this.draggingAudioClipEdge.clip.offset = this.draggingAudioClipEdge.initialClipOffset + startTimeDelta
+        this.draggingAudioClipEdge.clip.duration = this.draggingAudioClipEdge.initialClipDuration - startTimeDelta
+
+        // Also trim linked video clip if it exists
+        if (this.draggingAudioClipEdge.clip.linkedVideoClip) {
+          const videoClip = this.draggingAudioClipEdge.clip.linkedVideoClip
+          videoClip.startTime = newStartTime
+          videoClip.offset = (this.draggingAudioClipEdge.initialLinkedVideoOffset || 0) + startTimeDelta
+          videoClip.duration = this.draggingAudioClipEdge.initialClipDuration - startTimeDelta
+        }
+      } else {
+        // Dragging right edge - adjust duration
+        const minEndTime = this.draggingAudioClipEdge.initialClipStart + minClipDuration
+        const newEndTime = Math.max(minEndTime, newTime)
+        let newDuration = newEndTime - this.draggingAudioClipEdge.clip.startTime
+
+        // Constrain duration to not exceed source file duration minus offset
+        const maxAvailableDuration = this.draggingAudioClipEdge.clip.sourceDuration - this.draggingAudioClipEdge.clip.offset
+        newDuration = Math.min(newDuration, maxAvailableDuration)
+
+        this.draggingAudioClipEdge.clip.duration = newDuration
+
+        // Also trim linked video clip if it exists
+        if (this.draggingAudioClipEdge.clip.linkedVideoClip) {
+          const linkedMaxDuration = this.draggingAudioClipEdge.clip.linkedVideoClip.sourceDuration - this.draggingAudioClipEdge.clip.linkedVideoClip.offset
+          this.draggingAudioClipEdge.clip.linkedVideoClip.duration = Math.min(newDuration, linkedMaxDuration)
+        }
+      }
+
+      // Trigger timeline redraw
+      if (this.requestRedraw) this.requestRedraw()
+      return true
+    }
+
     // Handle audio clip dragging
     if (this.draggingAudioClip) {
       // Adjust coordinates to timeline area
@@ -3510,6 +3850,83 @@ class TimelineWindowV2 extends Widget {
       // Update clip's start time (ensure it doesn't go negative)
       this.draggingAudioClip.clip.startTime = Math.max(0, this.draggingAudioClip.initialClipStartTime + timeDelta)
 
+      // Also move linked video clip if it exists
+      if (this.draggingAudioClip.clip.linkedVideoClip) {
+        this.draggingAudioClip.clip.linkedVideoClip.startTime = this.draggingAudioClip.clip.startTime
+      }
+
+      // Trigger timeline redraw
+      if (this.requestRedraw) this.requestRedraw()
+      return true
+    }
+
+    // Handle video clip edge dragging (trimming)
+    if (this.draggingVideoClipEdge) {
+      const adjustedX = x - this.trackHeaderWidth
+      const newTime = this.timelineState.pixelToTime(adjustedX)
+      const minClipDuration = this.context.config.minClipDuration
+
+      if (this.draggingVideoClipEdge.edge === 'left') {
+        // Dragging left edge - adjust startTime and offset
+        const initialEnd = this.draggingVideoClipEdge.initialClipStart + this.draggingVideoClipEdge.initialClipDuration
+        const maxStartTime = initialEnd - minClipDuration
+        const newStartTime = Math.max(0, Math.min(newTime, maxStartTime))
+        const startTimeDelta = newStartTime - this.draggingVideoClipEdge.initialClipStart
+
+        this.draggingVideoClipEdge.clip.startTime = newStartTime
+        this.draggingVideoClipEdge.clip.offset = this.draggingVideoClipEdge.initialClipOffset + startTimeDelta
+        this.draggingVideoClipEdge.clip.duration = this.draggingVideoClipEdge.initialClipDuration - startTimeDelta
+
+        // Also trim linked audio clip if it exists
+        if (this.draggingVideoClipEdge.clip.linkedAudioClip) {
+          const audioClip = this.draggingVideoClipEdge.clip.linkedAudioClip
+          audioClip.startTime = newStartTime
+          audioClip.offset = (this.draggingVideoClipEdge.initialLinkedAudioOffset || 0) + startTimeDelta
+          audioClip.duration = this.draggingVideoClipEdge.initialClipDuration - startTimeDelta
+        }
+      } else {
+        // Dragging right edge - adjust duration
+        const minEndTime = this.draggingVideoClipEdge.initialClipStart + minClipDuration
+        const newEndTime = Math.max(minEndTime, newTime)
+        let newDuration = newEndTime - this.draggingVideoClipEdge.clip.startTime
+
+        // Constrain duration to not exceed source file duration minus offset
+        const maxAvailableDuration = this.draggingVideoClipEdge.clip.sourceDuration - this.draggingVideoClipEdge.clip.offset
+        newDuration = Math.min(newDuration, maxAvailableDuration)
+
+        this.draggingVideoClipEdge.clip.duration = newDuration
+
+        // Also trim linked audio clip if it exists
+        if (this.draggingVideoClipEdge.clip.linkedAudioClip) {
+          const linkedMaxDuration = this.draggingVideoClipEdge.clip.linkedAudioClip.sourceDuration - this.draggingVideoClipEdge.clip.linkedAudioClip.offset
+          this.draggingVideoClipEdge.clip.linkedAudioClip.duration = Math.min(newDuration, linkedMaxDuration)
+        }
+      }
+
+      // Trigger timeline redraw
+      if (this.requestRedraw) this.requestRedraw()
+      return true
+    }
+
+    // Handle video clip dragging
+    if (this.draggingVideoClip) {
+      // Adjust coordinates to timeline area
+      const adjustedX = x - this.trackHeaderWidth
+
+      // Convert mouse position to time
+      const newTime = this.timelineState.pixelToTime(adjustedX)
+
+      // Calculate time delta
+      const timeDelta = newTime - this.draggingVideoClip.initialMouseTime
+
+      // Update clip's start time (ensure it doesn't go negative)
+      this.draggingVideoClip.clip.startTime = Math.max(0, this.draggingVideoClip.initialClipStartTime + timeDelta)
+
+      // Also move linked audio clip if it exists
+      if (this.draggingVideoClip.clip.linkedAudioClip) {
+        this.draggingVideoClip.clip.linkedAudioClip.startTime = this.draggingVideoClip.clip.startTime
+      }
+
       // Trigger timeline redraw
       if (this.requestRedraw) this.requestRedraw()
       return true
@@ -3569,6 +3986,39 @@ class TimelineWindowV2 extends Widget {
       return true
     }
 
+    // Update cursor based on hover position (when not dragging)
+    if (!this.draggingAudioClip && !this.draggingVideoClip &&
+        !this.draggingAudioClipEdge && !this.draggingVideoClipEdge &&
+        !this.draggingKeyframe && !this.draggingPlayhead && !this.draggingSegment) {
+      const trackY = y - this.ruler.height
+      if (trackY >= 0 && x >= this.trackHeaderWidth) {
+        const adjustedY = trackY - this.trackScrollOffset
+        const adjustedX = x - this.trackHeaderWidth
+        const track = this.trackHierarchy.getTrackAtY(adjustedY)
+
+        if (track) {
+          // Check for audio clip edge
+          if (track.type === 'audio') {
+            const audioEdgeInfo = this.getAudioClipEdgeAtPoint(track, adjustedX, adjustedY)
+            if (audioEdgeInfo) {
+              this.cursor = audioEdgeInfo.edge === 'left' ? 'w-resize' : 'e-resize'
+              return false
+            }
+          }
+          // Check for video clip edge
+          else if (track.type === 'video') {
+            const videoEdgeInfo = this.getVideoClipEdgeAtPoint(track, adjustedX, adjustedY)
+            if (videoEdgeInfo) {
+              this.cursor = videoEdgeInfo.edge === 'left' ? 'w-resize' : 'e-resize'
+              return false
+            }
+          }
+        }
+      }
+      // Reset cursor if not over an edge
+      this.cursor = 'default'
+    }
+
     return false
   }
 
@@ -3631,6 +4081,67 @@ class TimelineWindowV2 extends Widget {
       return true
     }
 
+    // Complete audio clip edge dragging (trimming)
+    if (this.draggingAudioClipEdge) {
+      console.log('Finished trimming audio clip edge')
+
+      // Update backend with new clip trim
+      invoke('audio_trim_clip', {
+        trackId: this.draggingAudioClipEdge.audioTrack.audioTrackId,
+        clipId: this.draggingAudioClipEdge.clip.clipId,
+        newStartTime: this.draggingAudioClipEdge.clip.startTime,
+        newDuration: this.draggingAudioClipEdge.clip.duration,
+        newOffset: this.draggingAudioClipEdge.clip.offset
+      }).catch(error => {
+        console.error('Failed to trim audio clip in backend:', error)
+      })
+
+      // Also update linked video clip if it exists
+      if (this.draggingAudioClipEdge.clip.linkedVideoClip) {
+        console.log('Linked video clip also trimmed')
+      }
+
+      // Clean up dragging state
+      this.draggingAudioClipEdge = null
+      this._globalEvents.delete("mousemove")
+      this._globalEvents.delete("mouseup")
+
+      // Final redraw
+      if (this.requestRedraw) this.requestRedraw()
+      return true
+    }
+
+    // Complete video clip edge dragging (trimming)
+    if (this.draggingVideoClipEdge) {
+      console.log('Finished trimming video clip edge')
+
+      // Update linked audio clip in backend if it exists
+      if (this.draggingVideoClipEdge.clip.linkedAudioClip) {
+        const linkedAudioClip = this.draggingVideoClipEdge.clip.linkedAudioClip
+        const audioTrack = this.draggingVideoClipEdge.videoLayer.linkedAudioTrack
+        if (audioTrack) {
+          invoke('audio_trim_clip', {
+            trackId: audioTrack.audioTrackId,
+            clipId: linkedAudioClip.clipId,
+            newStartTime: linkedAudioClip.startTime,
+            newDuration: linkedAudioClip.duration,
+            newOffset: linkedAudioClip.offset
+          }).catch(error => {
+            console.error('Failed to trim linked audio clip in backend:', error)
+          })
+        }
+      }
+
+      // Clean up dragging state
+      this.draggingVideoClipEdge = null
+      this._globalEvents.delete("mousemove")
+      this._globalEvents.delete("mouseup")
+
+      // Final redraw
+      if (this.requestRedraw) this.requestRedraw()
+      return true
+    }
+
     // Complete audio clip dragging
     if (this.draggingAudioClip) {
       console.log('Finished dragging audio clip')
@@ -3644,6 +4155,12 @@ class TimelineWindowV2 extends Widget {
         console.error('Failed to move clip in backend:', error)
       })
 
+      // Also update linked video clip in backend if it exists
+      if (this.draggingAudioClip.clip.linkedVideoClip) {
+        // Video clips don't have a backend move command yet, so just log for now
+        console.log('Linked video clip also moved to time', this.draggingAudioClip.clip.startTime)
+      }
+
       // Clean up dragging state
       this.draggingAudioClip = null
       this._globalEvents.delete("mousemove")
@@ -3654,6 +4171,37 @@ class TimelineWindowV2 extends Widget {
       return true
     }
 
+    // Complete video clip dragging
+    if (this.draggingVideoClip) {
+      console.log('Finished dragging video clip')
+
+      // Video clips don't have a backend position yet (they're just visual)
+      // But we need to update the linked audio clip in the backend
+      if (this.draggingVideoClip.clip.linkedAudioClip) {
+        const linkedAudioClip = this.draggingVideoClip.clip.linkedAudioClip
+        // Find the audio track that contains this clip
+        const audioTrack = this.draggingVideoClip.videoLayer.linkedAudioTrack
+        if (audioTrack) {
+          invoke('audio_move_clip', {
+            trackId: audioTrack.audioTrackId,
+            clipId: linkedAudioClip.clipId,
+            newStartTime: linkedAudioClip.startTime
+          }).catch(error => {
+            console.error('Failed to move linked audio clip in backend:', error)
+          })
+        }
+      }
+
+      // Clean up dragging state
+      this.draggingVideoClip = null
+      this._globalEvents.delete("mousemove")
+      this._globalEvents.delete("mouseup")
+
+      // Final redraw
+      if (this.requestRedraw) this.requestRedraw()
+      return true
+    }
+
     // Phase 6: Complete segment dragging
     if (this.draggingSegment) {
       console.log('Finished dragging segment')