//! Video decoding and management for Lightningbeam //! //! This module provides FFmpeg-based video decoding with LRU frame caching //! for efficient video playback and preview. use std::sync::{Arc, Mutex}; use std::num::NonZeroUsize; use std::collections::HashMap; use ffmpeg_next as ffmpeg; use lru::LruCache; use uuid::Uuid; /// Metadata about a video file #[derive(Debug, Clone)] pub struct VideoMetadata { pub width: u32, pub height: u32, pub fps: f64, pub duration: f64, pub has_audio: bool, } /// Video decoder with LRU frame caching pub struct VideoDecoder { path: String, width: u32, // Original video width height: u32, // Original video height output_width: u32, // Scaled output width output_height: u32, // Scaled output height fps: f64, duration: f64, time_base: f64, stream_index: usize, frame_cache: LruCache>, // timestamp -> RGBA data input: Option, decoder: Option, last_decoded_ts: i64, // Track the last decoded frame timestamp keyframe_positions: Vec, // Index of keyframe timestamps for fast seeking } impl VideoDecoder { /// Create a new video decoder /// /// `max_width` and `max_height` specify the maximum output dimensions. /// Video will be scaled down if larger, preserving aspect ratio. /// `build_keyframes` controls whether to build the keyframe index immediately (slow) /// or defer it for async building later. fn new(path: String, cache_size: usize, max_width: Option, max_height: Option, build_keyframes: bool) -> Result { ffmpeg::init().map_err(|e| e.to_string())?; let input = ffmpeg::format::input(&path) .map_err(|e| format!("Failed to open video: {}", e))?; let video_stream = input.streams() .best(ffmpeg::media::Type::Video) .ok_or("No video stream found")?; let stream_index = video_stream.index(); let context_decoder = ffmpeg::codec::context::Context::from_parameters( video_stream.parameters() ).map_err(|e| e.to_string())?; let decoder = context_decoder.decoder().video() .map_err(|e| e.to_string())?; let width = decoder.width(); let height = decoder.height(); let time_base = f64::from(video_stream.time_base()); // Calculate output dimensions (scale down if larger than max) let (output_width, output_height) = if let (Some(max_w), Some(max_h)) = (max_width, max_height) { // Calculate scale to fit within max dimensions while preserving aspect ratio let scale = (max_w as f32 / width as f32).min(max_h as f32 / height as f32).min(1.0); ((width as f32 * scale) as u32, (height as f32 * scale) as u32) } else { (width, height) }; // Try to get duration from stream, fallback to container let duration = if video_stream.duration() > 0 { video_stream.duration() as f64 * time_base } else if input.duration() > 0 { input.duration() as f64 / f64::from(ffmpeg::ffi::AV_TIME_BASE) } else { // If no duration available, estimate from frame count and fps let fps = f64::from(video_stream.avg_frame_rate()); if video_stream.frames() > 0 && fps > 0.0 { video_stream.frames() as f64 / fps } else { 0.0 // Unknown duration } }; let fps = f64::from(video_stream.avg_frame_rate()); // Optionally build keyframe index for fast seeking let keyframe_positions = if build_keyframes { eprintln!("[Video Decoder] Building keyframe index for {}", path); let positions = Self::build_keyframe_index(&path, stream_index)?; eprintln!("[Video Decoder] Found {} keyframes", positions.len()); positions } else { eprintln!("[Video Decoder] Deferring keyframe index building for {}", path); Vec::new() }; Ok(Self { path, width, height, output_width, output_height, fps, duration, time_base, stream_index, frame_cache: LruCache::new( NonZeroUsize::new(cache_size).unwrap() ), input: None, decoder: None, last_decoded_ts: -1, keyframe_positions, }) } /// Build keyframe index for this decoder /// This can be called asynchronously after decoder creation fn build_and_set_keyframe_index(&mut self) -> Result<(), String> { eprintln!("[Video Decoder] Building keyframe index for {}", self.path); let positions = Self::build_keyframe_index(&self.path, self.stream_index)?; eprintln!("[Video Decoder] Found {} keyframes", positions.len()); self.keyframe_positions = positions; Ok(()) } /// Get the output width (scaled dimensions) pub fn get_output_width(&self) -> u32 { self.output_width } /// Get the output height (scaled dimensions) pub fn get_output_height(&self) -> u32 { self.output_height } /// Decode a frame at the specified timestamp (public wrapper) pub fn decode_frame(&mut self, timestamp: f64) -> Result, String> { self.get_frame(timestamp) } /// Build an index of all keyframe positions in the video /// This enables fast seeking by knowing exactly where keyframes are fn build_keyframe_index(path: &str, stream_index: usize) -> Result, String> { let mut input = ffmpeg::format::input(path) .map_err(|e| format!("Failed to open video for indexing: {}", e))?; let mut keyframes = Vec::new(); // Scan through all packets to find keyframes for (stream, packet) in input.packets() { if stream.index() == stream_index { // Check if this packet is a keyframe if packet.is_key() { if let Some(pts) = packet.pts() { keyframes.push(pts); } } } } // Ensure keyframes are sorted (they should be already) keyframes.sort_unstable(); Ok(keyframes) } /// Find the nearest keyframe at or before the target timestamp /// Returns the keyframe timestamp, or 0 if target is before first keyframe fn find_nearest_keyframe_before(&self, target_ts: i64) -> i64 { // Binary search to find the largest keyframe <= target_ts match self.keyframe_positions.binary_search(&target_ts) { Ok(idx) => self.keyframe_positions[idx], // Exact match Err(0) => 0, // Target is before first keyframe, seek to start Err(idx) => self.keyframe_positions[idx - 1], // Use previous keyframe } } /// Get a decoded frame at the specified timestamp fn get_frame(&mut self, timestamp: f64) -> Result, String> { use std::time::Instant; let t_start = Instant::now(); // Round timestamp to nearest frame boundary to improve cache hits // This ensures that timestamps like 1.0001s and 0.9999s both map to frame 1.0s let frame_duration = 1.0 / self.fps; let rounded_timestamp = (timestamp / frame_duration).round() * frame_duration; // Convert timestamp to frame timestamp let frame_ts = (rounded_timestamp / self.time_base) as i64; // Check cache if let Some(cached_frame) = self.frame_cache.get(&frame_ts) { eprintln!("[Video Timing] Cache hit for ts={:.3}s ({}ms)", timestamp, t_start.elapsed().as_millis()); return Ok(cached_frame.clone()); } // Determine if we need to seek // Seek if: no decoder open, going backwards, or jumping forward more than 2 seconds let need_seek = self.decoder.is_none() || frame_ts < self.last_decoded_ts || frame_ts > self.last_decoded_ts + (2.0 / self.time_base) as i64; if need_seek { let t_seek_start = Instant::now(); // Find the nearest keyframe at or before our target using the index // This is the exact keyframe position, so we can seek directly to it let keyframe_ts_stream = self.find_nearest_keyframe_before(frame_ts); // Convert from stream timebase to AV_TIME_BASE (microseconds) for container-level seek // input.seek() with stream=-1 expects AV_TIME_BASE units, not stream units let keyframe_seconds = keyframe_ts_stream as f64 * self.time_base; let keyframe_ts_av = (keyframe_seconds * 1_000_000.0) as i64; // AV_TIME_BASE = 1000000 eprintln!("[Video Seek] Target: {} | Keyframe(stream): {} | Keyframe(AV): {} | Index size: {}", frame_ts, keyframe_ts_stream, keyframe_ts_av, self.keyframe_positions.len()); // Reopen input let mut input = ffmpeg::format::input(&self.path) .map_err(|e| format!("Failed to reopen video: {}", e))?; // Seek directly to the keyframe with a 1-unit window // Can't use keyframe_ts..keyframe_ts (empty) or ..= (not supported) input.seek(keyframe_ts_av, keyframe_ts_av..(keyframe_ts_av + 1)) .map_err(|e| format!("Seek failed: {}", e))?; eprintln!("[Video Timing] Seek call took {}ms", t_seek_start.elapsed().as_millis()); let context_decoder = ffmpeg::codec::context::Context::from_parameters( input.streams().best(ffmpeg::media::Type::Video).unwrap().parameters() ).map_err(|e| e.to_string())?; let decoder = context_decoder.decoder().video() .map_err(|e| e.to_string())?; self.input = Some(input); self.decoder = Some(decoder); // Set last_decoded_ts to just before the seek target so forward playback works // Without this, every frame would trigger a new seek self.last_decoded_ts = frame_ts - 1; } let input = self.input.as_mut().unwrap(); let decoder = self.decoder.as_mut().unwrap(); // Decode frames until we find the one closest to our target timestamp let mut best_frame_data: Option> = None; let mut best_frame_ts: Option = None; let t_decode_start = Instant::now(); let mut decode_count = 0; let mut scale_time_ms = 0u128; for (stream, packet) in input.packets() { if stream.index() == self.stream_index { decoder.send_packet(&packet) .map_err(|e| e.to_string())?; let mut frame = ffmpeg::util::frame::Video::empty(); while decoder.receive_frame(&mut frame).is_ok() { decode_count += 1; let current_frame_ts = frame.timestamp().unwrap_or(0); self.last_decoded_ts = current_frame_ts; // Update last decoded position // Check if this frame is closer to our target than the previous best let is_better = match best_frame_ts { None => true, Some(best_ts) => { (current_frame_ts - frame_ts).abs() < (best_ts - frame_ts).abs() } }; if is_better { let t_scale_start = Instant::now(); // Convert to RGBA and scale to output size let mut scaler = ffmpeg::software::scaling::context::Context::get( frame.format(), frame.width(), frame.height(), ffmpeg::format::Pixel::RGBA, self.output_width, self.output_height, ffmpeg::software::scaling::flag::Flags::BILINEAR, ).map_err(|e| e.to_string())?; let mut rgb_frame = ffmpeg::util::frame::Video::empty(); scaler.run(&frame, &mut rgb_frame) .map_err(|e| e.to_string())?; // Remove stride padding to create tightly packed RGBA data let width = self.output_width as usize; let height = self.output_height as usize; let stride = rgb_frame.stride(0); let row_size = width * 4; // RGBA = 4 bytes per pixel let source_data = rgb_frame.data(0); let mut packed_data = Vec::with_capacity(row_size * height); for y in 0..height { let row_start = y * stride; let row_end = row_start + row_size; packed_data.extend_from_slice(&source_data[row_start..row_end]); } scale_time_ms += t_scale_start.elapsed().as_millis(); best_frame_data = Some(packed_data); best_frame_ts = Some(current_frame_ts); } // If we've reached or passed the target timestamp, we can stop if current_frame_ts >= frame_ts { // Found our frame, cache and return it if let Some(data) = best_frame_data { let total_time = t_start.elapsed().as_millis(); let decode_time = t_decode_start.elapsed().as_millis(); eprintln!("[Video Timing] ts={:.3}s | Decoded {} frames in {}ms | Scale: {}ms | Total: {}ms", timestamp, decode_count, decode_time, scale_time_ms, total_time); self.frame_cache.put(frame_ts, data.clone()); return Ok(data); } break; } } } } eprintln!("[Video Decoder] ERROR: Failed to decode frame for timestamp {}", timestamp); Err("Failed to decode frame".to_string()) } } /// Probe video file for metadata without creating a full decoder pub fn probe_video(path: &str) -> Result { ffmpeg::init().map_err(|e| e.to_string())?; let input = ffmpeg::format::input(path) .map_err(|e| format!("Failed to open video: {}", e))?; let video_stream = input.streams() .best(ffmpeg::media::Type::Video) .ok_or("No video stream found")?; let context_decoder = ffmpeg::codec::context::Context::from_parameters( video_stream.parameters() ).map_err(|e| e.to_string())?; let decoder = context_decoder.decoder().video() .map_err(|e| e.to_string())?; let width = decoder.width(); let height = decoder.height(); let time_base = f64::from(video_stream.time_base()); // Try to get duration from stream, fallback to container let duration = if video_stream.duration() > 0 { video_stream.duration() as f64 * time_base } else if input.duration() > 0 { input.duration() as f64 / f64::from(ffmpeg::ffi::AV_TIME_BASE) } else { // If no duration available, estimate from frame count and fps let fps = f64::from(video_stream.avg_frame_rate()); if video_stream.frames() > 0 && fps > 0.0 { video_stream.frames() as f64 / fps } else { 0.0 // Unknown duration } }; let fps = f64::from(video_stream.avg_frame_rate()); // Check for audio stream let has_audio = input.streams() .best(ffmpeg::media::Type::Audio) .is_some(); Ok(VideoMetadata { width, height, fps, duration, has_audio, }) } /// A single decoded video frame with RGBA data #[derive(Debug, Clone)] pub struct VideoFrame { pub width: u32, pub height: u32, pub rgba_data: Arc>, pub timestamp: f64, } /// Manages video decoders and frame caching for multiple video clips pub struct VideoManager { /// Pool of video decoders, one per clip decoders: HashMap>>, /// Frame cache: (clip_id, timestamp_ms) -> frame /// Stores raw RGBA data for zero-copy rendering frame_cache: HashMap<(Uuid, i64), Arc>, /// Thumbnail cache: clip_id -> Vec of (timestamp, rgba_data) /// Low-resolution (64px width) thumbnails for scrubbing thumbnail_cache: HashMap>)>>, /// Maximum number of frames to cache per decoder cache_size: usize, } impl VideoManager { /// Create a new video manager with default cache size pub fn new() -> Self { Self::with_cache_size(20) } /// Create a new video manager with specified cache size pub fn with_cache_size(cache_size: usize) -> Self { Self { decoders: HashMap::new(), frame_cache: HashMap::new(), thumbnail_cache: HashMap::new(), cache_size, } } /// Load a video file and create a decoder for it /// /// `target_width` and `target_height` specify the maximum dimensions /// for decoded frames. Video will be scaled down if larger. /// /// The keyframe index is NOT built during this call - use `build_keyframe_index_async` /// in a background thread to build it asynchronously. pub fn load_video( &mut self, clip_id: Uuid, path: String, target_width: u32, target_height: u32, ) -> Result { // First probe the video for metadata let metadata = probe_video(&path)?; // Create decoder with target dimensions, without building keyframe index let decoder = VideoDecoder::new( path, self.cache_size, Some(target_width), Some(target_height), false, // Don't build keyframe index synchronously )?; // Store decoder in pool self.decoders.insert(clip_id, Arc::new(Mutex::new(decoder))); Ok(metadata) } /// Build keyframe index for a loaded video asynchronously /// /// This should be called from a background thread after load_video() /// to avoid blocking the UI during import. pub fn build_keyframe_index(&self, clip_id: &Uuid) -> Result<(), String> { let decoder_arc = self.decoders.get(clip_id) .ok_or_else(|| format!("Video clip {} not found", clip_id))?; let mut decoder = decoder_arc.lock() .map_err(|e| format!("Failed to lock decoder: {}", e))?; decoder.build_and_set_keyframe_index() } /// Get a decoded frame for a specific clip at a specific timestamp /// /// Returns None if the clip is not loaded or decoding fails. /// Frames are cached for performance. pub fn get_frame(&mut self, clip_id: &Uuid, timestamp: f64) -> Option> { // Convert timestamp to milliseconds for cache key let timestamp_ms = (timestamp * 1000.0) as i64; let cache_key = (*clip_id, timestamp_ms); // Check frame cache first if let Some(cached_frame) = self.frame_cache.get(&cache_key) { return Some(Arc::clone(cached_frame)); } // Get decoder for this clip let decoder_arc = self.decoders.get(clip_id)?; let mut decoder = decoder_arc.lock().ok()?; // Decode the frame let rgba_data = decoder.get_frame(timestamp).ok()?; let width = decoder.output_width; let height = decoder.output_height; // Create VideoFrame and cache it let frame = Arc::new(VideoFrame { width, height, rgba_data: Arc::new(rgba_data), timestamp, }); self.frame_cache.insert(cache_key, Arc::clone(&frame)); Some(frame) } /// Generate thumbnails for a video clip (single batch version - use generate_thumbnails_progressive instead) /// /// Thumbnails are generated every 5 seconds at 128px width. /// This should be called in a background thread to avoid blocking. /// Thumbnails are inserted into the cache progressively as they're generated, /// allowing the UI to display them immediately. /// /// DEPRECATED: Use generate_thumbnails_progressive which releases the lock between thumbnails. pub fn generate_thumbnails(&mut self, clip_id: &Uuid, duration: f64) -> Result<(), String> { let decoder_arc = self.decoders.get(clip_id) .ok_or("Clip not loaded")? .clone(); let mut decoder = decoder_arc.lock() .map_err(|e| format!("Failed to lock decoder: {}", e))?; // Initialize thumbnail cache entry with empty vec self.thumbnail_cache.insert(*clip_id, Vec::new()); let interval = 5.0; // Generate thumbnail every 5 seconds let mut t = 0.0; while t < duration { // Decode frame at this timestamp if let Ok(rgba_data) = decoder.get_frame(t) { // Decode already scaled to output dimensions, but we want 128px width for thumbnails // We need to scale down further let current_width = decoder.output_width; let current_height = decoder.output_height; // Calculate thumbnail dimensions (128px width, maintain aspect ratio) let thumb_width = 128u32; let aspect_ratio = current_height as f32 / current_width as f32; let thumb_height = (thumb_width as f32 * aspect_ratio) as u32; // Simple nearest-neighbor downsampling for thumbnails let thumb_data = downsample_rgba( &rgba_data, current_width, current_height, thumb_width, thumb_height, ); // Insert thumbnail into cache immediately so UI can display it if let Some(thumbnails) = self.thumbnail_cache.get_mut(clip_id) { thumbnails.push((t, Arc::new(thumb_data))); } } t += interval; } Ok(()) } /// Get the decoder Arc for a clip (for external thumbnail generation) /// This allows external code to decode frames without holding the VideoManager lock pub fn get_decoder(&self, clip_id: &Uuid) -> Option>> { self.decoders.get(clip_id).cloned() } /// Insert a thumbnail into the cache (for external thumbnail generation) pub fn insert_thumbnail(&mut self, clip_id: &Uuid, timestamp: f64, data: Arc>) { self.thumbnail_cache .entry(*clip_id) .or_insert_with(Vec::new) .push((timestamp, data)); } /// Get the thumbnail closest to the specified timestamp /// /// Returns None if no thumbnails have been generated for this clip. pub fn get_thumbnail_at(&self, clip_id: &Uuid, timestamp: f64) -> Option<(u32, u32, Arc>)> { let thumbnails = self.thumbnail_cache.get(clip_id)?; if thumbnails.is_empty() { return None; } // Binary search for closest thumbnail let idx = thumbnails.binary_search_by(|(t, _)| { t.partial_cmp(×tamp).unwrap_or(std::cmp::Ordering::Equal) }).unwrap_or_else(|idx| { // If exact match not found, pick the closest if idx == 0 { 0 } else if idx >= thumbnails.len() { thumbnails.len() - 1 } else { // Compare distance to previous and next let prev_dist = (thumbnails[idx - 1].0 - timestamp).abs(); let next_dist = (thumbnails[idx].0 - timestamp).abs(); if prev_dist < next_dist { idx - 1 } else { idx } } }); let (_, rgba_data) = &thumbnails[idx]; // Return (width, height, data) // Thumbnails are always 128px width let thumb_width = 128; let thumb_height = (rgba_data.len() / (thumb_width * 4)) as u32; Some((thumb_width as u32, thumb_height, Arc::clone(rgba_data))) } /// Remove a video clip and its cached data pub fn unload_video(&mut self, clip_id: &Uuid) { self.decoders.remove(clip_id); // Remove all cached frames for this clip self.frame_cache.retain(|(id, _), _| id != clip_id); // Remove thumbnails self.thumbnail_cache.remove(clip_id); } /// Clear all frame caches (useful for memory management) pub fn clear_frame_cache(&mut self) { self.frame_cache.clear(); } } impl Default for VideoManager { fn default() -> Self { Self::new() } } /// Simple nearest-neighbor downsampling for RGBA images pub fn downsample_rgba_public( src: &[u8], src_width: u32, src_height: u32, dst_width: u32, dst_height: u32, ) -> Vec { downsample_rgba(src, src_width, src_height, dst_width, dst_height) } /// Simple nearest-neighbor downsampling for RGBA images (internal) fn downsample_rgba( src: &[u8], src_width: u32, src_height: u32, dst_width: u32, dst_height: u32, ) -> Vec { let mut dst = Vec::with_capacity((dst_width * dst_height * 4) as usize); let x_ratio = src_width as f32 / dst_width as f32; let y_ratio = src_height as f32 / dst_height as f32; for y in 0..dst_height { for x in 0..dst_width { let src_x = (x as f32 * x_ratio) as u32; let src_y = (y as f32 * y_ratio) as u32; let src_idx = ((src_y * src_width + src_x) * 4) as usize; // Copy RGBA bytes dst.push(src[src_idx]); // R dst.push(src[src_idx + 1]); // G dst.push(src[src_idx + 2]); // B dst.push(src[src_idx + 3]); // A } } dst } /// Extracted audio data from a video file #[derive(Debug, Clone)] pub struct ExtractedAudio { pub samples: Vec, pub channels: u32, pub sample_rate: u32, pub duration: f64, } /// Extract audio from a video file /// /// This function performs the slow FFmpeg decoding without holding any locks. /// The caller can then quickly add the audio to the DAW backend in a background thread. /// /// Returns None if the video has no audio stream. pub fn extract_audio_from_video(path: &str) -> Result, String> { ffmpeg::init().map_err(|e| e.to_string())?; // Open video file let mut input = ffmpeg::format::input(path) .map_err(|e| format!("Failed to open video: {}", e))?; // Find audio stream let audio_stream_opt = input.streams() .best(ffmpeg::media::Type::Audio); // Return None if no audio stream if audio_stream_opt.is_none() { return Ok(None); } let audio_stream = audio_stream_opt.unwrap(); let audio_index = audio_stream.index(); // Get audio properties let context_decoder = ffmpeg::codec::context::Context::from_parameters( audio_stream.parameters() ).map_err(|e| e.to_string())?; let mut audio_decoder = context_decoder.decoder().audio() .map_err(|e| e.to_string())?; let sample_rate = audio_decoder.rate(); let channels = audio_decoder.channels() as u32; // Decode all audio frames let mut audio_samples: Vec = Vec::new(); for (stream, packet) in input.packets() { if stream.index() == audio_index { audio_decoder.send_packet(&packet) .map_err(|e| e.to_string())?; let mut audio_frame = ffmpeg::util::frame::Audio::empty(); while audio_decoder.receive_frame(&mut audio_frame).is_ok() { // Convert audio to f32 packed format let format = audio_frame.format(); let frame_channels = audio_frame.channels() as usize; // Create resampler to convert to f32 packed let mut resampler = ffmpeg::software::resampling::context::Context::get( format, audio_frame.channel_layout(), sample_rate, ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed), audio_frame.channel_layout(), sample_rate, ).map_err(|e| e.to_string())?; let mut resampled_frame = ffmpeg::util::frame::Audio::empty(); resampler.run(&audio_frame, &mut resampled_frame) .map_err(|e| e.to_string())?; // Extract f32 samples (interleaved format) let data_ptr = resampled_frame.data(0).as_ptr() as *const f32; let total_samples = resampled_frame.samples() * frame_channels; let samples_slice = unsafe { std::slice::from_raw_parts(data_ptr, total_samples) }; audio_samples.extend_from_slice(samples_slice); } } } // Flush audio decoder audio_decoder.send_eof().map_err(|e| e.to_string())?; let mut audio_frame = ffmpeg::util::frame::Audio::empty(); while audio_decoder.receive_frame(&mut audio_frame).is_ok() { let format = audio_frame.format(); let frame_channels = audio_frame.channels() as usize; let mut resampler = ffmpeg::software::resampling::context::Context::get( format, audio_frame.channel_layout(), sample_rate, ffmpeg::format::Sample::F32(ffmpeg::format::sample::Type::Packed), audio_frame.channel_layout(), sample_rate, ).map_err(|e| e.to_string())?; let mut resampled_frame = ffmpeg::util::frame::Audio::empty(); resampler.run(&audio_frame, &mut resampled_frame) .map_err(|e| e.to_string())?; let data_ptr = resampled_frame.data(0).as_ptr() as *const f32; let total_samples = resampled_frame.samples() * frame_channels; let samples_slice = unsafe { std::slice::from_raw_parts(data_ptr, total_samples) }; audio_samples.extend_from_slice(samples_slice); } // Calculate duration let total_samples_per_channel = audio_samples.len() / channels as usize; let duration = total_samples_per_channel as f64 / sample_rate as f64; Ok(Some(ExtractedAudio { samples: audio_samples, channels, sample_rate, duration, })) }