Video import

2025-12-02 13:39:55 -05:00 · 2025-12-02 13:39:55 -05:00 · d453571c9b
parent c2f8969432
commit d453571c9b
12 changed files with 1292 additions and 52 deletions
--- a/lightningbeam-ui/lightningbeam-core/Cargo.toml
+++ b/lightningbeam-ui/lightningbeam-core/Cargo.toml
@ -23,6 +23,10 @@ uuid = { version = "1.0", features = ["v4", "serde"] }
 # Audio backend
 daw-backend = { path = "../../daw-backend" }

+# Video decoding
+ffmpeg-next = "7.0"
+lru = "0.12"
+
 # File I/O
 zip = "0.6"
 chrono = "0.4"
--- a/lightningbeam-ui/lightningbeam-core/src/actions/add_clip_instance.rs
+++ b/lightningbeam-ui/lightningbeam-core/src/actions/add_clip_instance.rs
@ -110,7 +110,17 @@ impl Action for AddClipInstanceAction {
    }

    fn execute_backend(&mut self, backend: &mut BackendContext, document: &Document) -> Result<(), String> {
-        // Only sync audio clips to the backend
+        // Only sync audio/MIDI clips to the backend
+        // Check if this is an audio layer first
+        let layer = document
+            .get_layer(&self.layer_id)
+            .ok_or_else(|| format!("Layer {} not found", self.layer_id))?;
+
+        // Only process audio layers - vector and video clips don't need backend sync
+        if !matches!(layer, AnyLayer::Audio(_)) {
+            return Ok(());
+        }
+
        // Look up the clip from the document
        let clip = document
            .get_audio_clip(&self.clip_instance.clip_id)
--- a/lightningbeam-ui/lightningbeam-core/src/clip.rs
+++ b/lightningbeam-ui/lightningbeam-core/src/clip.rs
@ -247,6 +247,11 @@ pub struct VideoClip {

    /// Frame rate (from video metadata)
    pub frame_rate: f64,
+
+    /// Optional linked audio clip (extracted from video file)
+    /// When set, the audio clip should be moved/trimmed in sync with this video clip
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub linked_audio_clip_id: Option<Uuid>,
 }

 impl VideoClip {
@ -267,6 +272,7 @@ impl VideoClip {
            height,
            duration,
            frame_rate,
+            linked_audio_clip_id: None,
        }
    }
 }
--- a/lightningbeam-ui/lightningbeam-core/src/lib.rs
+++ b/lightningbeam-ui/lightningbeam-core/src/lib.rs
@ -14,6 +14,7 @@ pub mod layer_tree;
 pub mod clip;
 pub mod document;
 pub mod renderer;
+pub mod video;
 pub mod action;
 pub mod actions;
 pub mod selection;
--- a/lightningbeam-ui/lightningbeam-core/src/renderer.rs
+++ b/lightningbeam-ui/lightningbeam-core/src/renderer.rs
@ -77,19 +77,30 @@ fn decode_image_asset(asset: &ImageAsset) -> Option<Image> {
 }

 /// Render a document to a Vello scene
-pub fn render_document(document: &Document, scene: &mut Scene, image_cache: &mut ImageCache) {
-    render_document_with_transform(document, scene, Affine::IDENTITY, image_cache);
+pub fn render_document(
+    document: &Document,
+    scene: &mut Scene,
+    image_cache: &mut ImageCache,
+    video_manager: &std::sync::Arc<std::sync::Mutex<crate::video::VideoManager>>,
+) {
+    render_document_with_transform(document, scene, Affine::IDENTITY, image_cache, video_manager);
 }

 /// Render a document to a Vello scene with a base transform
 /// The base transform is composed with all object transforms (useful for camera zoom/pan)
-pub fn render_document_with_transform(document: &Document, scene: &mut Scene, base_transform: Affine, image_cache: &mut ImageCache) {
+pub fn render_document_with_transform(
+    document: &Document,
+    scene: &mut Scene,
+    base_transform: Affine,
+    image_cache: &mut ImageCache,
+    video_manager: &std::sync::Arc<std::sync::Mutex<crate::video::VideoManager>>,
+) {
    // 1. Draw background
    render_background(document, scene, base_transform);

    // 2. Recursively render the root graphics object at current time
    let time = document.current_time;
-    render_graphics_object(document, time, scene, base_transform, image_cache);
+    render_graphics_object(document, time, scene, base_transform, image_cache, video_manager);
 }

 /// Draw the document background
@ -109,7 +120,14 @@ fn render_background(document: &Document, scene: &mut Scene, base_transform: Aff
 }

 /// Recursively render the root graphics object and its children
-fn render_graphics_object(document: &Document, time: f64, scene: &mut Scene, base_transform: Affine, image_cache: &mut ImageCache) {
+fn render_graphics_object(
+    document: &Document,
+    time: f64,
+    scene: &mut Scene,
+    base_transform: Affine,
+    image_cache: &mut ImageCache,
+    video_manager: &std::sync::Arc<std::sync::Mutex<crate::video::VideoManager>>,
+) {
    // Check if any layers are soloed
    let any_soloed = document.visible_layers().any(|layer| layer.soloed());

@ -121,24 +139,36 @@ fn render_graphics_object(document: &Document, time: f64, scene: &mut Scene, bas
        if any_soloed {
            // Only render soloed layers when solo is active
            if layer.soloed() {
-                render_layer(document, time, layer, scene, base_transform, 1.0, image_cache);
+                render_layer(document, time, layer, scene, base_transform, 1.0, image_cache, video_manager);
            }
        } else {
            // Render all visible layers when no solo is active
-            render_layer(document, time, layer, scene, base_transform, 1.0, image_cache);
+            render_layer(document, time, layer, scene, base_transform, 1.0, image_cache, video_manager);
        }
    }
 }

 /// Render a single layer
-fn render_layer(document: &Document, time: f64, layer: &AnyLayer, scene: &mut Scene, base_transform: Affine, parent_opacity: f64, image_cache: &mut ImageCache) {
+fn render_layer(
+    document: &Document,
+    time: f64,
+    layer: &AnyLayer,
+    scene: &mut Scene,
+    base_transform: Affine,
+    parent_opacity: f64,
+    image_cache: &mut ImageCache,
+    video_manager: &std::sync::Arc<std::sync::Mutex<crate::video::VideoManager>>,
+) {
    match layer {
-        AnyLayer::Vector(vector_layer) => render_vector_layer(document, time, vector_layer, scene, base_transform, parent_opacity, image_cache),
+        AnyLayer::Vector(vector_layer) => {
+            render_vector_layer(document, time, vector_layer, scene, base_transform, parent_opacity, image_cache, video_manager)
+        }
        AnyLayer::Audio(_) => {
            // Audio layers don't render visually
        }
-        AnyLayer::Video(_) => {
-            // Video rendering not yet implemented
+        AnyLayer::Video(video_layer) => {
+            let mut video_mgr = video_manager.lock().unwrap();
+            render_video_layer(document, time, video_layer, scene, base_transform, parent_opacity, &mut video_mgr);
        }
    }
 }
@ -153,6 +183,7 @@ fn render_clip_instance(
    base_transform: Affine,
    animation_data: &crate::animation::AnimationData,
    image_cache: &mut ImageCache,
+    video_manager: &std::sync::Arc<std::sync::Mutex<crate::video::VideoManager>>,
 ) {
    // Try to find the clip in the document's clip libraries
    // For now, only handle VectorClips (VideoClip and AudioClip rendering not yet implemented)
@ -280,19 +311,192 @@ fn render_clip_instance(
        if !layer_node.data.visible() {
            continue;
        }
-        render_layer(document, clip_time, &layer_node.data, scene, instance_transform, clip_opacity, image_cache);
+        render_layer(document, clip_time, &layer_node.data, scene, instance_transform, clip_opacity, image_cache, video_manager);
+    }
+}
+
+/// Render a video layer with all its clip instances
+fn render_video_layer(
+    document: &Document,
+    time: f64,
+    layer: &crate::layer::VideoLayer,
+    scene: &mut Scene,
+    base_transform: Affine,
+    parent_opacity: f64,
+    video_manager: &mut crate::video::VideoManager,
+) {
+    use crate::animation::TransformProperty;
+
+    // Cascade opacity: parent_opacity × layer.opacity
+    let layer_opacity = parent_opacity * layer.layer.opacity;
+
+    // Render each video clip instance
+    for clip_instance in &layer.clip_instances {
+        // Get the video clip from the document
+        let Some(video_clip) = document.video_clips.get(&clip_instance.clip_id) else {
+            continue; // Clip not found
+        };
+
+        // Remap timeline time to clip's internal time
+        let Some(clip_time) = clip_instance.remap_time(time, video_clip.duration) else {
+            continue; // Clip instance not active at this time
+        };
+
+        // Get video frame from VideoManager
+        let Some(frame) = video_manager.get_frame(&clip_instance.clip_id, clip_time) else {
+            continue; // Frame not available
+        };
+
+        // Evaluate animated transform properties
+        let transform = &clip_instance.transform;
+        let x = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::X,
+            },
+            time,
+            transform.x,
+        );
+        let y = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::Y,
+            },
+            time,
+            transform.y,
+        );
+        let rotation = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::Rotation,
+            },
+            time,
+            transform.rotation,
+        );
+        let scale_x = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::ScaleX,
+            },
+            time,
+            transform.scale_x,
+        );
+        let scale_y = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::ScaleY,
+            },
+            time,
+            transform.scale_y,
+        );
+        let skew_x = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::SkewX,
+            },
+            time,
+            transform.skew_x,
+        );
+        let skew_y = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::SkewY,
+            },
+            time,
+            transform.skew_y,
+        );
+
+        // Build skew transform (applied around center)
+        let center_x = video_clip.width / 2.0;
+        let center_y = video_clip.height / 2.0;
+
+        let skew_transform = if skew_x != 0.0 || skew_y != 0.0 {
+            let skew_x_affine = if skew_x != 0.0 {
+                let tan_skew = skew_x.to_radians().tan();
+                Affine::new([1.0, 0.0, tan_skew, 1.0, 0.0, 0.0])
+            } else {
+                Affine::IDENTITY
+            };
+
+            let skew_y_affine = if skew_y != 0.0 {
+                let tan_skew = skew_y.to_radians().tan();
+                Affine::new([1.0, tan_skew, 0.0, 1.0, 0.0, 0.0])
+            } else {
+                Affine::IDENTITY
+            };
+
+            // Skew around center
+            Affine::translate((center_x, center_y))
+                * skew_x_affine
+                * skew_y_affine
+                * Affine::translate((-center_x, -center_y))
+        } else {
+            Affine::IDENTITY
+        };
+
+        let clip_transform = Affine::translate((x, y))
+            * Affine::rotate(rotation.to_radians())
+            * Affine::scale_non_uniform(scale_x, scale_y)
+            * skew_transform;
+        let instance_transform = base_transform * clip_transform;
+
+        // Evaluate animated opacity
+        let opacity = layer.layer.animation_data.eval(
+            &crate::animation::AnimationTarget::Object {
+                id: clip_instance.id,
+                property: TransformProperty::Opacity,
+            },
+            time,
+            clip_instance.opacity,
+        );
+
+        // Cascade opacity: layer_opacity × animated opacity
+        let final_opacity = (layer_opacity * opacity) as f32;
+
+        // Create peniko Image from video frame data (zero-copy via Arc clone)
+        // Coerce Arc<Vec<u8>> to Arc<dyn AsRef<[u8]> + Send + Sync>
+        let blob_data: Arc<dyn AsRef<[u8]> + Send + Sync> = frame.rgba_data.clone();
+        let image = Image::new(
+            vello::peniko::Blob::new(blob_data),
+            vello::peniko::ImageFormat::Rgba8,
+            frame.width,
+            frame.height,
+        );
+
+        // Apply opacity
+        let image_with_alpha = image.with_alpha(final_opacity);
+
+        // Create rectangle path for the video frame
+        let video_rect = Rect::new(0.0, 0.0, video_clip.width, video_clip.height);
+
+        // Render video frame as image fill
+        scene.fill(
+            Fill::NonZero,
+            instance_transform,
+            &image_with_alpha,
+            None,
+            &video_rect,
+        );
    }
 }

 /// Render a vector layer with all its clip instances and shape instances
-fn render_vector_layer(document: &Document, time: f64, layer: &VectorLayer, scene: &mut Scene, base_transform: Affine, parent_opacity: f64, image_cache: &mut ImageCache) {
-
+fn render_vector_layer(
+    document: &Document,
+    time: f64,
+    layer: &VectorLayer,
+    scene: &mut Scene,
+    base_transform: Affine,
+    parent_opacity: f64,
+    image_cache: &mut ImageCache,
+    video_manager: &std::sync::Arc<std::sync::Mutex<crate::video::VideoManager>>,
+) {
    // Cascade opacity: parent_opacity × layer.opacity
    let layer_opacity = parent_opacity * layer.layer.opacity;

    // Render clip instances first (they appear under shape instances)
    for clip_instance in &layer.clip_instances {
-        render_clip_instance(document, time, clip_instance, layer_opacity, scene, base_transform, &layer.layer.animation_data, image_cache);
+        render_clip_instance(document, time, clip_instance, layer_opacity, scene, base_transform, &layer.layer.animation_data, image_cache, video_manager);
    }

    // Render each shape instance in the layer
--- a/lightningbeam-ui/lightningbeam-core/src/video.rs
+++ b/lightningbeam-ui/lightningbeam-core/src/video.rs
@ -0,0 +1,739 @@
+//! Video decoding and management for Lightningbeam
+//!
+//! This module provides FFmpeg-based video decoding with LRU frame caching
+//! for efficient video playback and preview.
+
+use std::sync::{Arc, Mutex};
+use std::num::NonZeroUsize;
+use std::collections::HashMap;
+use ffmpeg_next as ffmpeg;
+use lru::LruCache;
+use uuid::Uuid;
+
+/// Metadata about a video file
+#[derive(Debug, Clone)]
+pub struct VideoMetadata {
+    pub width: u32,
+    pub height: u32,
+    pub fps: f64,
+    pub duration: f64,
+    pub has_audio: bool,
+}
+
+/// Video decoder with LRU frame caching
+struct VideoDecoder {
+    path: String,
+    width: u32,          // Original video width
+    height: u32,         // Original video height
+    output_width: u32,   // Scaled output width
+    output_height: u32,  // Scaled output height
+    fps: f64,
+    duration: f64,
+    time_base: f64,
+    stream_index: usize,
+    frame_cache: LruCache<i64, Vec<u8>>, // timestamp -> RGBA data
+    input: Option<ffmpeg::format::context::Input>,
+    decoder: Option<ffmpeg::decoder::Video>,
+    last_decoded_ts: i64, // Track the last decoded frame timestamp
+    keyframe_positions: Vec<i64>, // Index of keyframe timestamps for fast seeking
+}
+
+impl VideoDecoder {
+    /// Create a new video decoder
+    ///
+    /// `max_width` and `max_height` specify the maximum output dimensions.
+    /// Video will be scaled down if larger, preserving aspect ratio.
+    fn new(path: String, cache_size: usize, max_width: Option<u32>, max_height: Option<u32>) -> Result<Self, String> {
+        ffmpeg::init().map_err(|e| e.to_string())?;
+
+        let input = ffmpeg::format::input(&path)
+            .map_err(|e| format!("Failed to open video: {}", e))?;
+
+        let video_stream = input.streams()
+            .best(ffmpeg::media::Type::Video)
+            .ok_or("No video stream found")?;
+
+        let stream_index = video_stream.index();
+
+        let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+            video_stream.parameters()
+        ).map_err(|e| e.to_string())?;
+
+        let decoder = context_decoder.decoder().video()
+            .map_err(|e| e.to_string())?;
+
+        let width = decoder.width();
+        let height = decoder.height();
+        let time_base = f64::from(video_stream.time_base());
+
+        // Calculate output dimensions (scale down if larger than max)
+        let (output_width, output_height) = if let (Some(max_w), Some(max_h)) = (max_width, max_height) {
+            // Calculate scale to fit within max dimensions while preserving aspect ratio
+            let scale = (max_w as f32 / width as f32).min(max_h as f32 / height as f32).min(1.0);
+            ((width as f32 * scale) as u32, (height as f32 * scale) as u32)
+        } else {
+            (width, height)
+        };
+
+        // Try to get duration from stream, fallback to container
+        let duration = if video_stream.duration() > 0 {
+            video_stream.duration() as f64 * time_base
+        } else if input.duration() > 0 {
+            input.duration() as f64 / f64::from(ffmpeg::ffi::AV_TIME_BASE)
+        } else {
+            // If no duration available, estimate from frame count and fps
+            let fps = f64::from(video_stream.avg_frame_rate());
+            if video_stream.frames() > 0 && fps > 0.0 {
+                video_stream.frames() as f64 / fps
+            } else {
+                0.0 // Unknown duration
+            }
+        };
+
+        let fps = f64::from(video_stream.avg_frame_rate());
+
+        // Build keyframe index for fast seeking
+        // This scans the video once to find all keyframe positions
+        eprintln!("[Video Decoder] Building keyframe index for {}", path);
+        let keyframe_positions = Self::build_keyframe_index(&path, stream_index)?;
+        eprintln!("[Video Decoder] Found {} keyframes", keyframe_positions.len());
+
+        Ok(Self {
+            path,
+            width,
+            height,
+            output_width,
+            output_height,
+            fps,
+            duration,
+            time_base,
+            stream_index,
+            frame_cache: LruCache::new(
+                NonZeroUsize::new(cache_size).unwrap()
+            ),
+            input: None,
+            decoder: None,
+            last_decoded_ts: -1,
+            keyframe_positions,
+        })
+    }
+
+    /// Build an index of all keyframe positions in the video
+    /// This enables fast seeking by knowing exactly where keyframes are
+    fn build_keyframe_index(path: &str, stream_index: usize) -> Result<Vec<i64>, String> {
+        let mut input = ffmpeg::format::input(path)
+            .map_err(|e| format!("Failed to open video for indexing: {}", e))?;
+
+        let mut keyframes = Vec::new();
+
+        // Scan through all packets to find keyframes
+        for (stream, packet) in input.packets() {
+            if stream.index() == stream_index {
+                // Check if this packet is a keyframe
+                if packet.is_key() {
+                    if let Some(pts) = packet.pts() {
+                        keyframes.push(pts);
+                    }
+                }
+            }
+        }
+
+        // Ensure keyframes are sorted (they should be already)
+        keyframes.sort_unstable();
+
+        Ok(keyframes)
+    }
+
+    /// Find the nearest keyframe at or before the target timestamp
+    /// Returns the keyframe timestamp, or 0 if target is before first keyframe
+    fn find_nearest_keyframe_before(&self, target_ts: i64) -> i64 {
+        // Binary search to find the largest keyframe <= target_ts
+        match self.keyframe_positions.binary_search(&target_ts) {
+            Ok(idx) => self.keyframe_positions[idx],  // Exact match
+            Err(0) => 0,  // Target is before first keyframe, seek to start
+            Err(idx) => self.keyframe_positions[idx - 1],  // Use previous keyframe
+        }
+    }
+
+    /// Get a decoded frame at the specified timestamp
+    fn get_frame(&mut self, timestamp: f64) -> Result<Vec<u8>, String> {
+        use std::time::Instant;
+        let t_start = Instant::now();
+
+        // Round timestamp to nearest frame boundary to improve cache hits
+        // This ensures that timestamps like 1.0001s and 0.9999s both map to frame 1.0s
+        let frame_duration = 1.0 / self.fps;
+        let rounded_timestamp = (timestamp / frame_duration).round() * frame_duration;
+
+        // Convert timestamp to frame timestamp
+        let frame_ts = (rounded_timestamp / self.time_base) as i64;
+
+        // Check cache
+        if let Some(cached_frame) = self.frame_cache.get(&frame_ts) {
+            eprintln!("[Video Timing] Cache hit for ts={:.3}s ({}ms)", timestamp, t_start.elapsed().as_millis());
+            return Ok(cached_frame.clone());
+        }
+
+        // Determine if we need to seek
+        // Seek if: no decoder open, going backwards, or jumping forward more than 2 seconds
+        let need_seek = self.decoder.is_none()
+            || frame_ts < self.last_decoded_ts
+            || frame_ts > self.last_decoded_ts + (2.0 / self.time_base) as i64;
+
+        if need_seek {
+            let t_seek_start = Instant::now();
+
+            // Find the nearest keyframe at or before our target using the index
+            // This is the exact keyframe position, so we can seek directly to it
+            let keyframe_ts_stream = self.find_nearest_keyframe_before(frame_ts);
+
+            // Convert from stream timebase to AV_TIME_BASE (microseconds) for container-level seek
+            // input.seek() with stream=-1 expects AV_TIME_BASE units, not stream units
+            let keyframe_seconds = keyframe_ts_stream as f64 * self.time_base;
+            let keyframe_ts_av = (keyframe_seconds * 1_000_000.0) as i64; // AV_TIME_BASE = 1000000
+
+            eprintln!("[Video Seek] Target: {} | Keyframe(stream): {} | Keyframe(AV): {} | Index size: {}",
+                frame_ts, keyframe_ts_stream, keyframe_ts_av, self.keyframe_positions.len());
+
+            // Reopen input
+            let mut input = ffmpeg::format::input(&self.path)
+                .map_err(|e| format!("Failed to reopen video: {}", e))?;
+
+            // Seek directly to the keyframe with a 1-unit window
+            // Can't use keyframe_ts..keyframe_ts (empty) or ..= (not supported)
+            input.seek(keyframe_ts_av, keyframe_ts_av..(keyframe_ts_av + 1))
+                .map_err(|e| format!("Seek failed: {}", e))?;
+
+            eprintln!("[Video Timing] Seek call took {}ms", t_seek_start.elapsed().as_millis());
+
+            let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+                input.streams().best(ffmpeg::media::Type::Video).unwrap().parameters()
+            ).map_err(|e| e.to_string())?;
+
+            let decoder = context_decoder.decoder().video()
+                .map_err(|e| e.to_string())?;
+
+            self.input = Some(input);
+            self.decoder = Some(decoder);
+            // Set last_decoded_ts to just before the seek target so forward playback works
+            // Without this, every frame would trigger a new seek
+            self.last_decoded_ts = frame_ts - 1;
+        }
+
+        let input = self.input.as_mut().unwrap();
+        let decoder = self.decoder.as_mut().unwrap();
+
+        // Decode frames until we find the one closest to our target timestamp
+        let mut best_frame_data: Option<Vec<u8>> = None;
+        let mut best_frame_ts: Option<i64> = None;
+        let t_decode_start = Instant::now();
+        let mut decode_count = 0;
+        let mut scale_time_ms = 0u128;
+
+        for (stream, packet) in input.packets() {
+            if stream.index() == self.stream_index {
+                decoder.send_packet(&packet)
+                    .map_err(|e| e.to_string())?;
+
+                let mut frame = ffmpeg::util::frame::Video::empty();
+                while decoder.receive_frame(&mut frame).is_ok() {
+                    decode_count += 1;
+                    let current_frame_ts = frame.timestamp().unwrap_or(0);
+                    self.last_decoded_ts = current_frame_ts; // Update last decoded position
+
+                    // Check if this frame is closer to our target than the previous best
+                    let is_better = match best_frame_ts {
+                        None => true,
+                        Some(best_ts) => {
+                            (current_frame_ts - frame_ts).abs() < (best_ts - frame_ts).abs()
+                        }
+                    };
+
+                    if is_better {
+                        let t_scale_start = Instant::now();
+
+                        // Convert to RGBA and scale to output size
+                        let mut scaler = ffmpeg::software::scaling::context::Context::get(
+                            frame.format(),
+                            frame.width(),
+                            frame.height(),
+                            ffmpeg::format::Pixel::RGBA,
+                            self.output_width,
+                            self.output_height,
+                            ffmpeg::software::scaling::flag::Flags::BILINEAR,
+                        ).map_err(|e| e.to_string())?;
+
+                        let mut rgb_frame = ffmpeg::util::frame::Video::empty();
+                        scaler.run(&frame, &mut rgb_frame)
+                            .map_err(|e| e.to_string())?;
+
+                        // Remove stride padding to create tightly packed RGBA data
+                        let width = self.output_width as usize;
+                        let height = self.output_height as usize;
+                        let stride = rgb_frame.stride(0);
+                        let row_size = width * 4; // RGBA = 4 bytes per pixel
+                        let source_data = rgb_frame.data(0);
+
+                        let mut packed_data = Vec::with_capacity(row_size * height);
+                        for y in 0..height {
+                            let row_start = y * stride;
+                            let row_end = row_start + row_size;
+                            packed_data.extend_from_slice(&source_data[row_start..row_end]);
+                        }
+
+                        scale_time_ms += t_scale_start.elapsed().as_millis();
+                        best_frame_data = Some(packed_data);
+                        best_frame_ts = Some(current_frame_ts);
+                    }
+
+                    // If we've reached or passed the target timestamp, we can stop
+                    if current_frame_ts >= frame_ts {
+                        // Found our frame, cache and return it
+                        if let Some(data) = best_frame_data {
+                            let total_time = t_start.elapsed().as_millis();
+                            let decode_time = t_decode_start.elapsed().as_millis();
+                            eprintln!("[Video Timing] ts={:.3}s | Decoded {} frames in {}ms | Scale: {}ms | Total: {}ms",
+                                timestamp, decode_count, decode_time, scale_time_ms, total_time);
+                            self.frame_cache.put(frame_ts, data.clone());
+                            return Ok(data);
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+
+        eprintln!("[Video Decoder] ERROR: Failed to decode frame for timestamp {}", timestamp);
+        Err("Failed to decode frame".to_string())
+    }
+}
+
+/// Probe video file for metadata without creating a full decoder
+pub fn probe_video(path: &str) -> Result<VideoMetadata, String> {
+    ffmpeg::init().map_err(|e| e.to_string())?;
+
+    let input = ffmpeg::format::input(path)
+        .map_err(|e| format!("Failed to open video: {}", e))?;
+
+    let video_stream = input.streams()
+        .best(ffmpeg::media::Type::Video)
+        .ok_or("No video stream found")?;
+
+    let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+        video_stream.parameters()
+    ).map_err(|e| e.to_string())?;
+
+    let decoder = context_decoder.decoder().video()
+        .map_err(|e| e.to_string())?;
+
+    let width = decoder.width();
+    let height = decoder.height();
+    let time_base = f64::from(video_stream.time_base());
+
+    // Try to get duration from stream, fallback to container
+    let duration = if video_stream.duration() > 0 {
+        video_stream.duration() as f64 * time_base
+    } else if input.duration() > 0 {
+        input.duration() as f64 / f64::from(ffmpeg::ffi::AV_TIME_BASE)
+    } else {
+        // If no duration available, estimate from frame count and fps
+        let fps = f64::from(video_stream.avg_frame_rate());
+        if video_stream.frames() > 0 && fps > 0.0 {
+            video_stream.frames() as f64 / fps
+        } else {
+            0.0 // Unknown duration
+        }
+    };
+
+    let fps = f64::from(video_stream.avg_frame_rate());
+
+    // Check for audio stream
+    let has_audio = input.streams()
+        .best(ffmpeg::media::Type::Audio)
+        .is_some();
+
+    Ok(VideoMetadata {
+        width,
+        height,
+        fps,
+        duration,
+        has_audio,
+    })
+}
+
+/// A single decoded video frame with RGBA data
+#[derive(Debug, Clone)]
+pub struct VideoFrame {
+    pub width: u32,
+    pub height: u32,
+    pub rgba_data: Arc<Vec<u8>>,
+    pub timestamp: f64,
+}
+
+/// Manages video decoders and frame caching for multiple video clips
+pub struct VideoManager {
+    /// Pool of video decoders, one per clip
+    decoders: HashMap<Uuid, Arc<Mutex<VideoDecoder>>>,
+
+    /// Frame cache: (clip_id, timestamp_ms) -> frame
+    /// Stores raw RGBA data for zero-copy rendering
+    frame_cache: HashMap<(Uuid, i64), Arc<VideoFrame>>,
+
+    /// Thumbnail cache: clip_id -> Vec of (timestamp, rgba_data)
+    /// Low-resolution (64px width) thumbnails for scrubbing
+    thumbnail_cache: HashMap<Uuid, Vec<(f64, Arc<Vec<u8>>)>>,
+
+    /// Maximum number of frames to cache per decoder
+    cache_size: usize,
+}
+
+impl VideoManager {
+    /// Create a new video manager with default cache size
+    pub fn new() -> Self {
+        Self::with_cache_size(20)
+    }
+
+    /// Create a new video manager with specified cache size
+    pub fn with_cache_size(cache_size: usize) -> Self {
+        Self {
+            decoders: HashMap::new(),
+            frame_cache: HashMap::new(),
+            thumbnail_cache: HashMap::new(),
+            cache_size,
+        }
+    }
+
+    /// Load a video file and create a decoder for it
+    ///
+    /// `target_width` and `target_height` specify the maximum dimensions
+    /// for decoded frames. Video will be scaled down if larger.
+    pub fn load_video(
+        &mut self,
+        clip_id: Uuid,
+        path: String,
+        target_width: u32,
+        target_height: u32,
+    ) -> Result<VideoMetadata, String> {
+        // First probe the video for metadata
+        let metadata = probe_video(&path)?;
+
+        // Create decoder with target dimensions
+        let decoder = VideoDecoder::new(
+            path,
+            self.cache_size,
+            Some(target_width),
+            Some(target_height),
+        )?;
+
+        // Store decoder in pool
+        self.decoders.insert(clip_id, Arc::new(Mutex::new(decoder)));
+
+        Ok(metadata)
+    }
+
+    /// Get a decoded frame for a specific clip at a specific timestamp
+    ///
+    /// Returns None if the clip is not loaded or decoding fails.
+    /// Frames are cached for performance.
+    pub fn get_frame(&mut self, clip_id: &Uuid, timestamp: f64) -> Option<Arc<VideoFrame>> {
+        // Convert timestamp to milliseconds for cache key
+        let timestamp_ms = (timestamp * 1000.0) as i64;
+        let cache_key = (*clip_id, timestamp_ms);
+
+        // Check frame cache first
+        if let Some(cached_frame) = self.frame_cache.get(&cache_key) {
+            return Some(Arc::clone(cached_frame));
+        }
+
+        // Get decoder for this clip
+        let decoder_arc = self.decoders.get(clip_id)?;
+        let mut decoder = decoder_arc.lock().ok()?;
+
+        // Decode the frame
+        let rgba_data = decoder.get_frame(timestamp).ok()?;
+        let width = decoder.output_width;
+        let height = decoder.output_height;
+
+        // Create VideoFrame and cache it
+        let frame = Arc::new(VideoFrame {
+            width,
+            height,
+            rgba_data: Arc::new(rgba_data),
+            timestamp,
+        });
+
+        self.frame_cache.insert(cache_key, Arc::clone(&frame));
+
+        Some(frame)
+    }
+
+    /// Generate thumbnails for a video clip
+    ///
+    /// Thumbnails are generated every 5 seconds at 64px width.
+    /// This should be called in a background thread to avoid blocking.
+    pub fn generate_thumbnails(&mut self, clip_id: &Uuid, duration: f64) -> Result<(), String> {
+        let decoder_arc = self.decoders.get(clip_id)
+            .ok_or("Clip not loaded")?
+            .clone();
+
+        let mut decoder = decoder_arc.lock()
+            .map_err(|e| format!("Failed to lock decoder: {}", e))?;
+
+        let mut thumbnails = Vec::new();
+        let interval = 5.0; // Generate thumbnail every 5 seconds
+        let mut t = 0.0;
+
+        while t < duration {
+            // Decode frame at this timestamp
+            if let Ok(rgba_data) = decoder.get_frame(t) {
+                // Decode already scaled to output dimensions, but we want 64px width for thumbnails
+                // We need to scale down further
+                let current_width = decoder.output_width;
+                let current_height = decoder.output_height;
+
+                // Calculate thumbnail dimensions (64px width, maintain aspect ratio)
+                let thumb_width = 64u32;
+                let aspect_ratio = current_height as f32 / current_width as f32;
+                let thumb_height = (thumb_width as f32 * aspect_ratio) as u32;
+
+                // Simple nearest-neighbor downsampling for thumbnails
+                let thumb_data = downsample_rgba(
+                    &rgba_data,
+                    current_width,
+                    current_height,
+                    thumb_width,
+                    thumb_height,
+                );
+
+                thumbnails.push((t, Arc::new(thumb_data)));
+            }
+
+            t += interval;
+        }
+
+        // Store thumbnails in cache
+        self.thumbnail_cache.insert(*clip_id, thumbnails);
+
+        Ok(())
+    }
+
+    /// Get the thumbnail closest to the specified timestamp
+    ///
+    /// Returns None if no thumbnails have been generated for this clip.
+    pub fn get_thumbnail_at(&self, clip_id: &Uuid, timestamp: f64) -> Option<(u32, u32, Arc<Vec<u8>>)> {
+        let thumbnails = self.thumbnail_cache.get(clip_id)?;
+
+        if thumbnails.is_empty() {
+            return None;
+        }
+
+        // Binary search for closest thumbnail
+        let idx = thumbnails.binary_search_by(|(t, _)| {
+            t.partial_cmp(&timestamp).unwrap_or(std::cmp::Ordering::Equal)
+        }).unwrap_or_else(|idx| {
+            // If exact match not found, pick the closest
+            if idx == 0 {
+                0
+            } else if idx >= thumbnails.len() {
+                thumbnails.len() - 1
+            } else {
+                // Compare distance to previous and next
+                let prev_dist = (thumbnails[idx - 1].0 - timestamp).abs();
+                let next_dist = (thumbnails[idx].0 - timestamp).abs();
+                if prev_dist < next_dist {
+                    idx - 1
+                } else {
+                    idx
+                }
+            }
+        });
+
+        let (_, rgba_data) = &thumbnails[idx];
+
+        // Return (width, height, data)
+        // Thumbnails are always 64px width
+        let thumb_width = 64;
+        let thumb_height = (rgba_data.len() / (thumb_width * 4)) as u32;
+
+        Some((thumb_width as u32, thumb_height, Arc::clone(rgba_data)))
+    }
+
+    /// Remove a video clip and its cached data
+    pub fn unload_video(&mut self, clip_id: &Uuid) {
+        self.decoders.remove(clip_id);
+
+        // Remove all cached frames for this clip
+        self.frame_cache.retain(|(id, _), _| id != clip_id);
+
+        // Remove thumbnails
+        self.thumbnail_cache.remove(clip_id);
+    }
+
+    /// Clear all frame caches (useful for memory management)
+    pub fn clear_frame_cache(&mut self) {
+        self.frame_cache.clear();
+    }
+}
+
+impl Default for VideoManager {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Simple nearest-neighbor downsampling for RGBA images
+fn downsample_rgba(
+    src: &[u8],
+    src_width: u32,
+    src_height: u32,
+    dst_width: u32,
+    dst_height: u32,
+) -> Vec<u8> {
+    let mut dst = Vec::with_capacity((dst_width * dst_height * 4) as usize);
+
+    let x_ratio = src_width as f32 / dst_width as f32;
+    let y_ratio = src_height as f32 / dst_height as f32;
+
+    for y in 0..dst_height {
+        for x in 0..dst_width {
+            let src_x = (x as f32 * x_ratio) as u32;
+            let src_y = (y as f32 * y_ratio) as u32;
+
+            let src_idx = ((src_y * src_width + src_x) * 4) as usize;
+
+            // Copy RGBA bytes
+            dst.push(src[src_idx]);     // R
+            dst.push(src[src_idx + 1]); // G
+            dst.push(src[src_idx + 2]); // B
+            dst.push(src[src_idx + 3]); // A
+        }
+    }
+
+    dst
+}
+
+/// Extracted audio data from a video file
+#[derive(Debug, Clone)]
+pub struct ExtractedAudio {
+    pub samples: Vec<f32>,
+    pub channels: u32,
+    pub sample_rate: u32,
+    pub duration: f64,
+}
+
+/// Extract audio from a video file
+///
+/// This function performs the slow FFmpeg decoding without holding any locks.
+/// The caller can then quickly add the audio to the DAW backend in a background thread.
+///
+/// Returns None if the video has no audio stream.
+pub fn extract_audio_from_video(path: &str) -> Result<Option<ExtractedAudio>, String> {
+    ffmpeg::init().map_err(|e| e.to_string())?;
+
+    // Open video file
+    let mut input = ffmpeg::format::input(path)
+        .map_err(|e| format!("Failed to open video: {}", e))?;
+
+    // Find audio stream
+    let audio_stream_opt = input.streams()
+        .best(ffmpeg::media::Type::Audio);
+
+    // Return None if no audio stream
+    if audio_stream_opt.is_none() {
+        return Ok(None);
+    }
+
+    let audio_stream = audio_stream_opt.unwrap();
+    let audio_index = audio_stream.index();
+
+    // Get audio properties
+    let context_decoder = ffmpeg::codec::context::Context::from_parameters(
+        audio_stream.parameters()
+    ).map_err(|e| e.to_string())?;
+
+    let mut audio_decoder = context_decoder.decoder().audio()
+        .map_err(|e| e.to_string())?;
+
+    let sample_rate = audio_decoder.rate();
+    let channels = audio_decoder.channels() as u32;
+
+    // Decode all audio frames
+    let mut audio_samples: Vec<f32> = Vec::new();
+
+    for (stream, packet) in input.packets() {
+        if stream.index() == audio_index {
+            audio_decoder.send_packet(&packet)
+                .map_err(|e| e.to_string())?;
+
+            let mut audio_frame = ffmpeg::util::frame::Audio::empty();
+            while audio_decoder.receive_frame(&mut audio_frame).is_ok() {
+                // Convert audio to f32 packed format
+                let format = audio_frame.format();
+                let frame_channels = audio_frame.channels() as usize;
+
+                // Create resampler to convert to f32 packed
+                let mut resampler = ffmpeg::software::resampling::context::Context::get(
+                    format,
+                    audio_frame.channel_layout(),
+                    sample_rate,
+                    ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed),
+                    audio_frame.channel_layout(),
+                    sample_rate,
+                ).map_err(|e| e.to_string())?;
+
+                let mut resampled_frame = ffmpeg::util::frame::Audio::empty();
+                resampler.run(&audio_frame, &mut resampled_frame)
+                    .map_err(|e| e.to_string())?;
+
+                // Extract f32 samples (interleaved format)
+                let data_ptr = resampled_frame.data(0).as_ptr() as *const f32;
+                let total_samples = resampled_frame.samples() * frame_channels;
+                let samples_slice = unsafe {
+                    std::slice::from_raw_parts(data_ptr, total_samples)
+                };
+
+                audio_samples.extend_from_slice(samples_slice);
+            }
+        }
+    }
+
+    // Flush audio decoder
+    audio_decoder.send_eof().map_err(|e| e.to_string())?;
+    let mut audio_frame = ffmpeg::util::frame::Audio::empty();
+    while audio_decoder.receive_frame(&mut audio_frame).is_ok() {
+        let format = audio_frame.format();
+        let frame_channels = audio_frame.channels() as usize;
+
+        let mut resampler = ffmpeg::software::resampling::context::Context::get(
+            format,
+            audio_frame.channel_layout(),
+            sample_rate,
+            ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed),
+            audio_frame.channel_layout(),
+            sample_rate,
+        ).map_err(|e| e.to_string())?;
+
+        let mut resampled_frame = ffmpeg::util::frame::Audio::empty();
+        resampler.run(&audio_frame, &mut resampled_frame)
+            .map_err(|e| e.to_string())?;
+
+        let data_ptr = resampled_frame.data(0).as_ptr() as *const f32;
+        let total_samples = resampled_frame.samples() * frame_channels;
+        let samples_slice = unsafe {
+            std::slice::from_raw_parts(data_ptr, total_samples)
+        };
+
+        audio_samples.extend_from_slice(samples_slice);
+    }
+
+    // Calculate duration
+    let total_samples_per_channel = audio_samples.len() / channels as usize;
+    let duration = total_samples_per_channel as f64 / sample_rate as f64;
+
+    Ok(Some(ExtractedAudio {
+        samples: audio_samples,
+        channels,
+        sample_rate,
+        duration,
+    }))
+}
--- a/lightningbeam-ui/lightningbeam-editor/src/main.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/main.rs
@ -4,6 +4,7 @@ use lightningbeam_core::layout::{LayoutDefinition, LayoutNode};
 use lightningbeam_core::pane::PaneType;
 use lightningbeam_core::tool::Tool;
 use std::collections::HashMap;
+use std::sync::Arc;
 use clap::Parser;
 use uuid::Uuid;

@ -457,6 +458,8 @@ struct EditorApp {
    audio_event_rx: Option<rtrb::Consumer<daw_backend::AudioEvent>>, // Audio event receiver
    audio_sample_rate: u32, // Audio sample rate
    audio_channels: u32, // Audio channel count
+    // Video decoding and management
+    video_manager: std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>, // Shared video manager
    // Track ID mapping (Document layer UUIDs <-> daw-backend TrackIds)
    layer_to_track_map: HashMap<Uuid, daw_backend::TrackId>,
    track_to_layer_map: HashMap<daw_backend::TrackId, Uuid>,
@ -607,6 +610,9 @@ impl EditorApp {
            audio_event_rx,
            audio_sample_rate,
            audio_channels,
+            video_manager: std::sync::Arc::new(std::sync::Mutex::new(
+                lightningbeam_core::video::VideoManager::new()
+            )),
            layer_to_track_map: HashMap::new(),
            track_to_layer_map: HashMap::new(),
            playback_time: 0.0, // Start at beginning
@ -1072,7 +1078,20 @@ impl EditorApp {
            }
            MenuAction::AddVideoLayer => {
                println!("Menu: Add Video Layer");
-                // TODO: Implement add video layer
+                // Create a new video layer with a default name
+                let layer_number = self.action_executor.document().root.children.len() + 1;
+                let layer_name = format!("Video {}", layer_number);
+                let new_layer = lightningbeam_core::layer::AnyLayer::Video(
+                    lightningbeam_core::layer::VideoLayer::new(&layer_name)
+                );
+
+                // Add the layer to the document
+                self.action_executor.document_mut().root.add_child(new_layer.clone());
+
+                // Set it as the active layer
+                if let Some(last_layer) = self.action_executor.document().root.children.last() {
+                    self.active_layer_id = Some(last_layer.id());
+                }
            }
            MenuAction::AddAudioTrack => {
                // Create a new sampled audio layer with a default name
@ -1663,26 +1682,77 @@ impl EditorApp {
    /// Import a video file (placeholder - decoder not yet ported)
    fn import_video(&mut self, path: &std::path::Path) {
        use lightningbeam_core::clip::VideoClip;
+        use lightningbeam_core::video::probe_video;

        let name = path.file_stem()
            .and_then(|s| s.to_str())
            .unwrap_or("Untitled Video")
            .to_string();

-        // TODO: Use video decoder to get actual dimensions/duration
-        // For now, create a placeholder with default values
-        let clip = VideoClip::new(
+        let path_str = path.to_string_lossy().to_string();
+
+        // Probe video for metadata
+        let metadata = match probe_video(&path_str) {
+            Ok(meta) => meta,
+            Err(e) => {
+                eprintln!("Failed to probe video '{}': {}", name, e);
+                return;
+            }
+        };
+
+        // Create video clip with real metadata
+        let mut clip = VideoClip::new(
            &name,
-            path.to_string_lossy().to_string(),
-            1920.0,  // Default width (TODO: probe video)
-            1080.0,  // Default height (TODO: probe video)
-            0.0,     // Duration unknown (TODO: probe video)
-            30.0,    // Default frame rate (TODO: probe video)
+            path_str.clone(),
+            metadata.width as f64,
+            metadata.height as f64,
+            metadata.duration,
+            metadata.fps,
        );

+        let clip_id = clip.id;
+
+        // Load video into VideoManager
+        let doc_width = self.action_executor.document().width as u32;
+        let doc_height = self.action_executor.document().height as u32;
+
+        let mut video_mgr = self.video_manager.lock().unwrap();
+        if let Err(e) = video_mgr.load_video(clip_id, path_str.clone(), doc_width, doc_height) {
+            eprintln!("Failed to load video '{}': {}", name, e);
+            return;
+        }
+        drop(video_mgr);
+
+        // TODO: Extract audio in background thread if present
+        // TODO: Create AudioClip and link to VideoClip via linked_audio_clip_id
+
+        // Spawn background thread for thumbnail generation
+        let video_manager_clone = Arc::clone(&self.video_manager);
+        let duration = metadata.duration;
+        std::thread::spawn(move || {
+            let mut video_mgr = video_manager_clone.lock().unwrap();
+            if let Err(e) = video_mgr.generate_thumbnails(&clip_id, duration) {
+                eprintln!("Failed to generate video thumbnails: {}", e);
+            } else {
+                println!("  Generated thumbnails for video clip {}", clip_id);
+            }
+        });
+
+        // Add clip to document
        let clip_id = self.action_executor.document_mut().add_video_clip(clip);
-        println!("Imported video '{}' (placeholder - dimensions/duration unknown) - ID: {}", name, clip_id);
-        println!("Note: Video decoder not yet ported. Video preview unavailable.");
+
+        println!("Imported video '{}' ({}x{}, {:.2}s @ {:.0}fps) - ID: {}",
+            name,
+            metadata.width,
+            metadata.height,
+            metadata.duration,
+            metadata.fps,
+            clip_id
+        );
+
+        if metadata.has_audio {
+            println!("  Video has audio track (extraction not yet implemented)");
+        }
    }
 }

@ -1900,6 +1970,7 @@ impl eframe::App for EditorApp {
                rdp_tolerance: &mut self.rdp_tolerance,
                schneider_max_error: &mut self.schneider_max_error,
                audio_controller: self.audio_controller.as_ref(),
+                video_manager: &self.video_manager,
                playback_time: &mut self.playback_time,
                is_playing: &mut self.is_playing,
                dragging_asset: &mut self.dragging_asset,
@ -2067,6 +2138,7 @@ struct RenderContext<'a> {
    rdp_tolerance: &'a mut f64,
    schneider_max_error: &'a mut f64,
    audio_controller: Option<&'a std::sync::Arc<std::sync::Mutex<daw_backend::EngineController>>>,
+    video_manager: &'a std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>,
    playback_time: &'a mut f64,
    is_playing: &'a mut bool,
    dragging_asset: &'a mut Option<panes::DraggingAsset>,
@ -2545,6 +2617,7 @@ fn render_pane(
                rdp_tolerance: ctx.rdp_tolerance,
                schneider_max_error: ctx.schneider_max_error,
                audio_controller: ctx.audio_controller,
+                video_manager: ctx.video_manager,
                layer_to_track_map: ctx.layer_to_track_map,
                playback_time: ctx.playback_time,
                is_playing: ctx.is_playing,
@ -2601,6 +2674,7 @@ fn render_pane(
                rdp_tolerance: ctx.rdp_tolerance,
                schneider_max_error: ctx.schneider_max_error,
                audio_controller: ctx.audio_controller,
+                video_manager: ctx.video_manager,
                layer_to_track_map: ctx.layer_to_track_map,
                playback_time: ctx.playback_time,
                is_playing: ctx.is_playing,
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/asset_library.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/asset_library.rs
@ -272,6 +272,70 @@ fn generate_waveform_thumbnail(
    rgba
 }

+/// Generate a video thumbnail by decoding the first frame
+/// Returns a 64x64 RGBA thumbnail with letterboxing to maintain aspect ratio
+fn generate_video_thumbnail(
+    clip_id: &uuid::Uuid,
+    video_manager: &std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>,
+) -> Option<Vec<u8>> {
+    // Get a frame from the video (at 1 second to skip potential black intros)
+    let timestamp = 1.0;
+
+    let frame = {
+        let mut video_mgr = video_manager.lock().ok()?;
+        video_mgr.get_frame(clip_id, timestamp)?
+    };
+
+    let src_width = frame.width as usize;
+    let src_height = frame.height as usize;
+    let dst_size = THUMBNAIL_SIZE as usize;
+
+    // Calculate letterboxing dimensions to maintain aspect ratio
+    let src_aspect = src_width as f32 / src_height as f32;
+    let (scaled_width, scaled_height, offset_x, offset_y) = if src_aspect > 1.0 {
+        // Wide video - letterbox top and bottom
+        let scaled_width = dst_size;
+        let scaled_height = (dst_size as f32 / src_aspect) as usize;
+        let offset_y = (dst_size - scaled_height) / 2;
+        (scaled_width, scaled_height, 0, offset_y)
+    } else {
+        // Tall video - letterbox left and right
+        let scaled_height = dst_size;
+        let scaled_width = (dst_size as f32 * src_aspect) as usize;
+        let offset_x = (dst_size - scaled_width) / 2;
+        (scaled_width, scaled_height, offset_x, 0)
+    };
+
+    // Create thumbnail with black letterbox bars
+    let mut rgba = vec![0u8; dst_size * dst_size * 4];
+
+    let x_ratio = src_width as f32 / scaled_width as f32;
+    let y_ratio = src_height as f32 / scaled_height as f32;
+
+    // Fill the scaled region
+    for dst_y in 0..scaled_height {
+        for dst_x in 0..scaled_width {
+            let src_x = (dst_x as f32 * x_ratio) as usize;
+            let src_y = (dst_y as f32 * y_ratio) as usize;
+            let src_idx = (src_y * src_width + src_x) * 4;
+
+            let final_x = dst_x + offset_x;
+            let final_y = dst_y + offset_y;
+            let dst_idx = (final_y * dst_size + final_x) * 4;
+
+            // Copy RGBA bytes
+            if src_idx + 3 < frame.rgba_data.len() && dst_idx + 3 < rgba.len() {
+                rgba[dst_idx] = frame.rgba_data[src_idx];
+                rgba[dst_idx + 1] = frame.rgba_data[src_idx + 1];
+                rgba[dst_idx + 2] = frame.rgba_data[src_idx + 2];
+                rgba[dst_idx + 3] = frame.rgba_data[src_idx + 3];
+            }
+        }
+    }
+
+    Some(rgba)
+}
+
 /// Generate a piano roll thumbnail for MIDI clips
 /// Shows notes as horizontal bars with Y position = note % 12 (one octave)
 fn generate_midi_thumbnail(
@ -960,16 +1024,17 @@ impl AssetLibraryPane {
        &mut self,
        ui: &mut egui::Ui,
        rect: egui::Rect,
+        path: &NodePath,
        shared: &mut SharedPaneState,
        assets: &[&AssetEntry],
        document: &Document,
    ) {
        match self.view_mode {
            AssetViewMode::List => {
-                self.render_asset_list_view(ui, rect, shared, assets, document);
+                self.render_asset_list_view(ui, rect, path, shared, assets, document);
            }
            AssetViewMode::Grid => {
-                self.render_asset_grid_view(ui, rect, shared, assets, document);
+                self.render_asset_grid_view(ui, rect, path, shared, assets, document);
            }
        }
    }
@ -979,6 +1044,7 @@ impl AssetLibraryPane {
        &mut self,
        ui: &mut egui::Ui,
        rect: egui::Rect,
+        path: &NodePath,
        shared: &mut SharedPaneState,
        assets: &[&AssetEntry],
        document: &Document,
@ -1019,6 +1085,7 @@ impl AssetLibraryPane {
        let scroll_area_rect = rect;
        ui.allocate_ui_at_rect(scroll_area_rect, |ui| {
            egui::ScrollArea::vertical()
+                .id_salt(("asset_list_scroll", path))
                .auto_shrink([false, false])
                .show(ui, |ui| {
                    ui.set_min_width(scroll_area_rect.width() - 16.0); // Account for scrollbar
@ -1157,8 +1224,9 @@ impl AssetLibraryPane {
                                        .map(|clip| generate_vector_thumbnail(clip, bg_color))
                                }
                                AssetCategory::Video => {
-                                    // Video backend not implemented yet - use placeholder
-                                    Some(generate_placeholder_thumbnail(AssetCategory::Video, 200))
+                                    // Generate video thumbnail from first frame
+                                    generate_video_thumbnail(&asset_id, &shared.video_manager)
+                                        .or_else(|| Some(generate_placeholder_thumbnail(AssetCategory::Video, 200)))
                                }
                                AssetCategory::Audio => {
                                    // Check if it's sampled or MIDI
@ -1287,6 +1355,7 @@ impl AssetLibraryPane {
        &mut self,
        ui: &mut egui::Ui,
        rect: egui::Rect,
+        path: &NodePath,
        shared: &mut SharedPaneState,
        assets: &[&AssetEntry],
        document: &Document,
@ -1335,6 +1404,7 @@ impl AssetLibraryPane {
        // Use egui's built-in ScrollArea for scrolling
        ui.allocate_ui_at_rect(rect, |ui| {
            egui::ScrollArea::vertical()
+                .id_salt(("asset_grid_scroll", path))
                .auto_shrink([false, false])
                .show(ui, |ui| {
                    // Reserve space for the entire grid
@ -1429,7 +1499,9 @@ impl AssetLibraryPane {
                                        .map(|clip| generate_vector_thumbnail(clip, bg_color))
                                }
                                AssetCategory::Video => {
-                                    Some(generate_placeholder_thumbnail(AssetCategory::Video, 200))
+                                    // Generate video thumbnail from first frame
+                                    generate_video_thumbnail(&asset_id, &shared.video_manager)
+                                        .or_else(|| Some(generate_placeholder_thumbnail(AssetCategory::Video, 200)))
                                }
                                AssetCategory::Audio => {
                                    if let Some(clip) = document.audio_clips.get(&asset_id) {
@ -1572,7 +1644,7 @@ impl PaneRenderer for AssetLibraryPane {
        &mut self,
        ui: &mut egui::Ui,
        rect: egui::Rect,
-        _path: &NodePath,
+        path: &NodePath,
        shared: &mut SharedPaneState,
    ) {
        // Get an Arc clone of the document for thumbnail generation
@ -1600,7 +1672,7 @@ impl PaneRenderer for AssetLibraryPane {
        // Render components
        self.render_search_bar(ui, search_rect, shared);
        self.render_category_tabs(ui, tabs_rect, shared);
-        self.render_assets(ui, list_rect, shared, &filtered_assets, &document_arc);
+        self.render_assets(ui, list_rect, path, shared, &filtered_assets, &document_arc);

        // Context menu handling
        if let Some(ref context_state) = self.context_menu.clone() {
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/infopanel.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/infopanel.rs
@ -204,7 +204,7 @@ impl InfopanelPane {
    }

    /// Render tool-specific options section
-    fn render_tool_section(&mut self, ui: &mut Ui, shared: &mut SharedPaneState) {
+    fn render_tool_section(&mut self, ui: &mut Ui, path: &NodePath, shared: &mut SharedPaneState) {
        let tool = *shared.selected_tool;

        // Only show tool options for tools that have options
@ -218,6 +218,7 @@ impl InfopanelPane {
        }

        egui::CollapsingHeader::new("Tool Options")
+            .id_salt(("tool_options", path))
            .default_open(self.tool_section_open)
            .show(ui, |ui| {
                self.tool_section_open = true;
@ -234,7 +235,7 @@ impl InfopanelPane {
                        // Simplify mode
                        ui.horizontal(|ui| {
                            ui.label("Simplify:");
-                            egui::ComboBox::from_id_salt("draw_simplify")
+                            egui::ComboBox::from_id_salt(("draw_simplify", path))
                                .selected_text(match shared.draw_simplify_mode {
                                    SimplifyMode::Corners => "Corners",
                                    SimplifyMode::Smooth => "Smooth",
@ -325,10 +326,12 @@ impl InfopanelPane {
    fn render_transform_section(
        &mut self,
        ui: &mut Ui,
+        path: &NodePath,
        shared: &mut SharedPaneState,
        info: &SelectionInfo,
    ) {
        egui::CollapsingHeader::new("Transform")
+            .id_salt(("transform", path))
            .default_open(self.transform_section_open)
            .show(ui, |ui| {
                self.transform_section_open = true;
@ -523,10 +526,12 @@ impl InfopanelPane {
    fn render_shape_section(
        &mut self,
        ui: &mut Ui,
+        path: &NodePath,
        shared: &mut SharedPaneState,
        info: &SelectionInfo,
    ) {
        egui::CollapsingHeader::new("Shape")
+            .id_salt(("shape", path))
            .default_open(self.shape_section_open)
            .show(ui, |ui| {
                self.shape_section_open = true;
@ -666,8 +671,9 @@ impl InfopanelPane {
    }

    /// Render document settings section (shown when nothing is selected)
-    fn render_document_section(&self, ui: &mut Ui, shared: &mut SharedPaneState) {
+    fn render_document_section(&self, ui: &mut Ui, path: &NodePath, shared: &mut SharedPaneState) {
        egui::CollapsingHeader::new("Document")
+            .id_salt(("document", path))
            .default_open(true)
            .show(ui, |ui| {
                ui.add_space(4.0);
@ -755,7 +761,7 @@ impl PaneRenderer for InfopanelPane {
        &mut self,
        ui: &mut egui::Ui,
        rect: egui::Rect,
-        _path: &NodePath,
+        path: &NodePath,
        shared: &mut SharedPaneState,
    ) {
        // Background
@ -774,29 +780,29 @@ impl PaneRenderer for InfopanelPane {
        );

        egui::ScrollArea::vertical()
-            .id_salt("infopanel_scroll")
+            .id_salt(("infopanel_scroll", path))
            .show(&mut content_ui, |ui| {
                ui.set_min_width(content_rect.width() - 16.0);

                // 1. Tool options section (always shown if tool has options)
-                self.render_tool_section(ui, shared);
+                self.render_tool_section(ui, path, shared);

                // 2. Gather selection info
                let info = self.gather_selection_info(shared);

                // 3. Transform section (if shapes selected)
                if info.shape_count > 0 {
-                    self.render_transform_section(ui, shared, &info);
+                    self.render_transform_section(ui, path, shared, &info);
                }

                // 4. Shape properties section (if shapes selected)
                if info.shape_count > 0 {
-                    self.render_shape_section(ui, shared, &info);
+                    self.render_shape_section(ui, path, shared, &info);
                }

                // 5. Document settings (if nothing selected)
                if info.is_empty {
-                    self.render_document_section(ui, shared);
+                    self.render_document_section(ui, path, shared);
                }

                // Show selection count at bottom
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/mod.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/mod.rs
@ -109,6 +109,8 @@ pub struct SharedPaneState<'a> {
    pub schneider_max_error: &'a mut f64,
    /// Audio engine controller for playback control (wrapped in Arc<Mutex<>> for thread safety)
    pub audio_controller: Option<&'a std::sync::Arc<std::sync::Mutex<daw_backend::EngineController>>>,
+    /// Video manager for video decoding and frame caching
+    pub video_manager: &'a std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>,
    /// Mapping from Document layer UUIDs to daw-backend TrackIds
    pub layer_to_track_map: &'a std::collections::HashMap<Uuid, daw_backend::TrackId>,
    /// Global playback state
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/stage.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/stage.rs
@ -46,6 +46,8 @@ struct SharedVelloResources {
    sampler: wgpu::Sampler,
    /// Shared image cache for avoiding re-decoding images every frame
    image_cache: Mutex<lightningbeam_core::renderer::ImageCache>,
+    /// Video manager for video decoding and frame caching
+    video_manager: std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>,
 }

 /// Per-instance Vello resources (created for each Stage pane)
@ -62,7 +64,7 @@ pub struct VelloResourcesMap {
 }

 impl SharedVelloResources {
-    pub fn new(device: &wgpu::Device) -> Result<Self, String> {
+    pub fn new(device: &wgpu::Device, video_manager: std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>) -> Result<Self, String> {
        let renderer = vello::Renderer::new(
            device,
            vello::RendererOptions {
@ -164,6 +166,7 @@ impl SharedVelloResources {
            blit_bind_group_layout,
            sampler,
            image_cache: Mutex::new(lightningbeam_core::renderer::ImageCache::new()),
+            video_manager,
        })
    }
 }
@ -242,6 +245,7 @@ struct VelloCallback {
    selected_tool: lightningbeam_core::tool::Tool, // Current tool for rendering mode-specific UI
    eyedropper_request: Option<(egui::Pos2, super::ColorMode)>, // Pending eyedropper sample
    playback_time: f64, // Current playback time for animation evaluation
+    video_manager: std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>,
 }

 impl VelloCallback {
@ -261,8 +265,9 @@ impl VelloCallback {
        selected_tool: lightningbeam_core::tool::Tool,
        eyedropper_request: Option<(egui::Pos2, super::ColorMode)>,
        playback_time: f64,
+        video_manager: std::sync::Arc<std::sync::Mutex<lightningbeam_core::video::VideoManager>>,
    ) -> Self {
-        Self { rect, pan_offset, zoom, instance_id, document, tool_state, active_layer_id, drag_delta, selection, fill_color, stroke_color, stroke_width, selected_tool, eyedropper_request, playback_time }
+        Self { rect, pan_offset, zoom, instance_id, document, tool_state, active_layer_id, drag_delta, selection, fill_color, stroke_color, stroke_width, selected_tool, eyedropper_request, playback_time, video_manager }
    }
 }

@ -288,7 +293,7 @@ impl egui_wgpu::CallbackTrait for VelloCallback {
        // Initialize shared resources if not yet created (only happens once for first Stage pane)
        if map.shared.is_none() {
            map.shared = Some(Arc::new(
-                SharedVelloResources::new(device).expect("Failed to initialize shared Vello resources")
+                SharedVelloResources::new(device, self.video_manager.clone()).expect("Failed to initialize shared Vello resources")
            ));
        }

@ -320,7 +325,13 @@ impl egui_wgpu::CallbackTrait for VelloCallback {

        // Render the document to the scene with camera transform
        let mut image_cache = shared.image_cache.lock().unwrap();
-        lightningbeam_core::renderer::render_document_with_transform(&self.document, &mut scene, camera_transform, &mut image_cache);
+        lightningbeam_core::renderer::render_document_with_transform(
+            &self.document,
+            &mut scene,
+            camera_transform,
+            &mut image_cache,
+            &shared.video_manager,
+        );
        drop(image_cache); // Explicitly release lock before other operations

        // Render drag preview objects with transparency
@ -4237,9 +4248,31 @@ impl PaneRenderer for StagePane {
                                shared.pending_actions.push(Box::new(action));
                            } else {
                                // For clips, create a clip instance
-                                let clip_instance = ClipInstance::new(dragging.clip_id)
+                                // Video clips align to stage origin (0,0), other clips use mouse position
+                                let (pos_x, pos_y) = if dragging.clip_type == DragClipType::Video {
+                                    (0.0, 0.0)
+                                } else {
+                                    (world_pos.x as f64, world_pos.y as f64)
+                                };
+
+                                let mut clip_instance = ClipInstance::new(dragging.clip_id)
                                    .with_timeline_start(drop_time)
-                                    .with_position(world_pos.x as f64, world_pos.y as f64);
+                                    .with_position(pos_x, pos_y);
+
+                                // For video clips, scale to fill document dimensions
+                                if dragging.clip_type == DragClipType::Video {
+                                    if let Some((video_width, video_height)) = dragging.dimensions {
+                                        let doc_width = shared.action_executor.document().width;
+                                        let doc_height = shared.action_executor.document().height;
+
+                                        // Calculate scale to fill document
+                                        let scale_x = doc_width / video_width;
+                                        let scale_y = doc_height / video_height;
+
+                                        clip_instance.transform.scale_x = scale_x;
+                                        clip_instance.transform.scale_y = scale_y;
+                                    }
+                                }

                                // Create and queue action
                                let action = lightningbeam_core::actions::AddClipInstanceAction::new(
@ -4347,6 +4380,7 @@ impl PaneRenderer for StagePane {
            *shared.selected_tool,
            self.pending_eyedropper_sample,
            *shared.playback_time,
+            shared.video_manager.clone(),
        );

        let cb = egui_wgpu::Callback::new_paint_callback(
--- a/lightningbeam-ui/lightningbeam-editor/src/panes/timeline.rs
+++ b/lightningbeam-ui/lightningbeam-editor/src/panes/timeline.rs
@ -943,6 +943,7 @@ impl TimelinePane {
    }

    /// Render layer rows (timeline content area)
+    /// Returns video clip hover data for processing after input handling
    fn render_layers(
        &self,
        ui: &mut egui::Ui,
@ -955,9 +956,12 @@ impl TimelinePane {
        waveform_cache: &std::collections::HashMap<usize, Vec<daw_backend::WaveformPeak>>,
        waveform_image_cache: &mut crate::waveform_image_cache::WaveformImageCache,
        audio_controller: Option<&std::sync::Arc<std::sync::Mutex<daw_backend::EngineController>>>,
-    ) {
+    ) -> Vec<(egui::Rect, uuid::Uuid, f64, f64)> {
        let painter = ui.painter();

+        // Collect video clip rects for hover detection (to avoid borrow conflicts)
+        let mut video_clip_hovers: Vec<(egui::Rect, uuid::Uuid, f64, f64)> = Vec::new();
+
        // Theme colors for active/inactive layers
        let active_style = theme.style(".timeline-row-active", ui.ctx());
        let inactive_style = theme.style(".timeline-row-inactive", ui.ctx());
@ -1186,6 +1190,11 @@ impl TimelinePane {
                            }
                        }

+                        // VIDEO PREVIEW: Collect clip rect for hover detection
+                        if let lightningbeam_core::layer::AnyLayer::Video(_) = layer {
+                            video_clip_hovers.push((clip_rect, clip_instance.clip_id, clip_instance.trim_start, instance_start));
+                        }
+
                        // Draw border only if selected (brighter version of clip color)
                        if selection.contains_clip_instance(&clip_instance.id) {
                            painter.rect_stroke(
@ -1221,6 +1230,9 @@ impl TimelinePane {
                egui::Stroke::new(1.0, egui::Color32::from_gray(20)),
            );
        }
+
+        // Return video clip hover data for processing after input handling
+        video_clip_hovers
    }

    /// Handle mouse input for scrubbing, panning, zooming, layer selection, and clip instance selection
@ -1936,7 +1948,7 @@ impl PaneRenderer for TimelinePane {

        // Render layer rows with clipping
        ui.set_clip_rect(content_rect.intersect(original_clip_rect));
-        self.render_layers(ui, content_rect, shared.theme, document, shared.active_layer_id, shared.selection, shared.midi_event_cache, shared.waveform_cache, shared.waveform_image_cache, shared.audio_controller);
+        let video_clip_hovers = self.render_layers(ui, content_rect, shared.theme, document, shared.active_layer_id, shared.selection, shared.midi_event_cache, shared.waveform_cache, shared.waveform_image_cache, shared.audio_controller);

        // Render playhead on top (clip to timeline area)
        ui.set_clip_rect(timeline_rect.intersect(original_clip_rect));
@ -1962,6 +1974,70 @@ impl PaneRenderer for TimelinePane {
            shared.audio_controller,
        );

+        // VIDEO HOVER DETECTION: Handle video clip hover tooltips AFTER input handling
+        // This ensures hover events aren't consumed by the main input handler
+        for (clip_rect, clip_id, trim_start, instance_start) in video_clip_hovers {
+            let hover_response = ui.allocate_rect(clip_rect, egui::Sense::hover());
+
+            if hover_response.hovered() {
+                if let Some(hover_pos) = hover_response.hover_pos() {
+                    // Calculate timestamp at hover position
+                    let hover_offset_pixels = hover_pos.x - clip_rect.min.x;
+                    let hover_offset_time = (hover_offset_pixels as f64) / (self.pixels_per_second as f64);
+                    let hover_timestamp = instance_start + hover_offset_time;
+
+                    // Remap to clip content time accounting for trim
+                    let clip_content_time = trim_start + (hover_timestamp - instance_start);
+
+                    // Try to get thumbnail from video manager
+                    let thumbnail_data: Option<(u32, u32, std::sync::Arc<Vec<u8>>)> = {
+                        let video_mgr = shared.video_manager.lock().unwrap();
+                        video_mgr.get_thumbnail_at(&clip_id, clip_content_time)
+                    };
+
+                    if let Some((thumb_width, thumb_height, ref thumb_data)) = thumbnail_data {
+                        // Create texture from thumbnail
+                        let color_image = egui::ColorImage::from_rgba_unmultiplied(
+                            [thumb_width as usize, thumb_height as usize],
+                            &thumb_data,
+                        );
+                        let texture = ui.ctx().load_texture(
+                            format!("video_hover_{}", clip_id),
+                            color_image,
+                            egui::TextureOptions::LINEAR,
+                        );
+
+                        // Show tooltip with thumbnail positioned near cursor
+                        let tooltip_pos = hover_pos + egui::vec2(10.0, 10.0);
+                        egui::Area::new(egui::Id::new(format!("video_hover_tooltip_{}", clip_id)))
+                            .fixed_pos(tooltip_pos)
+                            .order(egui::Order::Tooltip)
+                            .show(ui.ctx(), |ui| {
+                                egui::Frame::popup(ui.style())
+                                    .show(ui, |ui| {
+                                        ui.vertical(|ui| {
+                                            ui.image(&texture);
+                                            ui.label(format!("Time: {:.2}s", clip_content_time));
+                                        });
+                                    });
+                            });
+                    } else {
+                        // Show simple tooltip if no thumbnail available
+                        let tooltip_pos = hover_pos + egui::vec2(10.0, 10.0);
+                        egui::Area::new(egui::Id::new(format!("video_tooltip_{}", clip_id)))
+                            .fixed_pos(tooltip_pos)
+                            .order(egui::Order::Tooltip)
+                            .show(ui.ctx(), |ui| {
+                                egui::Frame::popup(ui.style())
+                                    .show(ui, |ui| {
+                                        ui.label(format!("Video clip\nTime: {:.2}s\n(Thumbnails generating...)", clip_content_time));
+                                    });
+                            });
+                    }
+                }
+            }
+        }
+
        // Handle asset drag-and-drop from Asset Library
        if let Some(dragging) = shared.dragging_asset.as_ref() {
            if let Some(pointer_pos) = ui.ctx().pointer_interact_pos() {
@ -2016,10 +2092,22 @@ impl PaneRenderer for TimelinePane {
                            let center_y = doc.height / 2.0;

                            // Create clip instance centered on stage, at drop time
-                            let clip_instance = ClipInstance::new(dragging.clip_id)
+                            let mut clip_instance = ClipInstance::new(dragging.clip_id)
                                .with_timeline_start(drop_time)
                                .with_position(center_x, center_y);

+                            // For video clips, scale to fill document dimensions
+                            if dragging.clip_type == DragClipType::Video {
+                                if let Some((video_width, video_height)) = dragging.dimensions {
+                                    // Calculate scale to fill document
+                                    let scale_x = doc.width / video_width;
+                                    let scale_y = doc.height / video_height;
+
+                                    clip_instance.transform.scale_x = scale_x;
+                                    clip_instance.transform.scale_y = scale_y;
+                                }
+                            }
+
                            // Create and queue action
                            let action = lightningbeam_core::actions::AddClipInstanceAction::new(
                                layer_id,