From cb62d0ee9df545e3786eaf2c2aa794c0f86eb4ab Mon Sep 17 00:00:00 2001 From: Skyler Lehmkuhl Date: Fri, 12 Dec 2025 11:12:02 -0500 Subject: [PATCH] slightly improve video export speed --- lightningbeam-ui/Cargo.toml | 17 +- .../lightningbeam-core/src/gpu/mod.rs | 2 + .../src/gpu/yuv_converter.rs | 241 +++++++++ .../src/export/async_render_helpers.md | 62 +++ .../src/export/cpu_yuv_converter.rs | 143 ++++++ .../lightningbeam-editor/src/export/mod.rs | 244 ++++++--- .../src/export/perf_metrics.rs | 191 +++++++ .../src/export/readback_pipeline.rs | 317 ++++++++++++ .../src/export/video_exporter.rs | 466 +++++++++++++++--- .../lightningbeam-editor/src/main.rs | 1 - 10 files changed, 1542 insertions(+), 142 deletions(-) create mode 100644 lightningbeam-ui/lightningbeam-core/src/gpu/yuv_converter.rs create mode 100644 lightningbeam-ui/lightningbeam-editor/src/export/async_render_helpers.md create mode 100644 lightningbeam-ui/lightningbeam-editor/src/export/cpu_yuv_converter.rs create mode 100644 lightningbeam-ui/lightningbeam-editor/src/export/perf_metrics.rs create mode 100644 lightningbeam-ui/lightningbeam-editor/src/export/readback_pipeline.rs diff --git a/lightningbeam-ui/Cargo.toml b/lightningbeam-ui/Cargo.toml index 10bfd21..1d7b708 100644 --- a/lightningbeam-ui/Cargo.toml +++ b/lightningbeam-ui/Cargo.toml @@ -9,15 +9,18 @@ members = [ # UI Framework (using eframe for simplified integration) # Note: Upgraded from 0.29 to 0.31 to fix Linux IME/keyboard input issues # See: https://github.com/emilk/egui/pull/5198 -eframe = { version = "0.31", default-features = true, features = ["wgpu"] } -egui_extras = { version = "0.31", features = ["image", "svg"] } -egui-wgpu = "0.31" +# Upgraded to 0.33 for shader editor (egui_code_editor) and continued bug fixes +egui = "0.33" +eframe = { version = "0.33", default-features = true, features = ["wgpu"] } +egui_extras = { version = "0.33", features = ["image", "svg", "syntect"] } +egui-wgpu = "0.33" +egui_code_editor = "0.2" # GPU Rendering -# vello 0.5 uses wgpu 24, matching eframe 0.31 -vello = "0.5" -wgpu = "24" -kurbo = { version = "0.11", features = ["serde"] } +# vello from git uses wgpu 27, matching eframe 0.33 +vello = { git = "https://github.com/linebender/vello", branch = "main" } +wgpu = { version = "27", features = ["vulkan", "metal"] } +kurbo = { version = "0.12", features = ["serde"] } peniko = "0.5" # Windowing diff --git a/lightningbeam-ui/lightningbeam-core/src/gpu/mod.rs b/lightningbeam-ui/lightningbeam-core/src/gpu/mod.rs index 681f245..c5ac285 100644 --- a/lightningbeam-ui/lightningbeam-core/src/gpu/mod.rs +++ b/lightningbeam-ui/lightningbeam-core/src/gpu/mod.rs @@ -10,12 +10,14 @@ pub mod buffer_pool; pub mod color_convert; pub mod compositor; pub mod effect_processor; +pub mod yuv_converter; // Re-export commonly used types pub use buffer_pool::{BufferHandle, BufferPool, BufferSpec, BufferFormat}; pub use color_convert::SrgbToLinearConverter; pub use compositor::{Compositor, CompositorLayer, BlendMode}; pub use effect_processor::{EffectProcessor, EffectUniforms}; +pub use yuv_converter::YuvConverter; /// Standard HDR internal texture format (16-bit float per channel) pub const HDR_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float; diff --git a/lightningbeam-ui/lightningbeam-core/src/gpu/yuv_converter.rs b/lightningbeam-ui/lightningbeam-core/src/gpu/yuv_converter.rs new file mode 100644 index 0000000..fde594d --- /dev/null +++ b/lightningbeam-ui/lightningbeam-core/src/gpu/yuv_converter.rs @@ -0,0 +1,241 @@ +//! GPU-accelerated RGBA to YUV420p color space conversion +//! +//! Provides a compute shader-based converter for transforming RGBA textures +//! to YUV420p planar format using the BT.709 color matrix (HD video standard). +//! This replaces the CPU-based conversion with GPU parallel processing. + +/// GPU pipeline for RGBA to YUV420p color space conversion +/// +/// Converts Rgba8Unorm textures to YUV420p planar format using BT.709 colorspace. +/// The Y plane is full resolution, while U and V planes are subsampled 4:2:0. +/// +/// Output texture layout: +/// - Rows 0 to height-1: Y plane (luma, full resolution) +/// - Rows height to height + height/4 - 1: U plane (chroma, half resolution) +/// - Rows height + height/4 to height + height/2 - 1: V plane (chroma, half resolution) +pub struct YuvConverter { + pipeline: wgpu::ComputePipeline, + bind_group_layout: wgpu::BindGroupLayout, +} + +impl YuvConverter { + /// Create a new RGBA to YUV420p converter + pub fn new(device: &wgpu::Device) -> Self { + // Create bind group layout + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: Some("yuv_converter_bind_group_layout"), + entries: &[ + // Input RGBA texture (binding 0) + wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: false }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, + // Output YUV texture (Rgba8Unorm storage texture, binding 1) + // Note: R8Unorm doesn't support storage binding, so we use Rgba8Unorm and write to .r channel + wgpu::BindGroupLayoutEntry { + binding: 1, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: wgpu::TextureFormat::Rgba8Unorm, + view_dimension: wgpu::TextureViewDimension::D2, + }, + count: None, + }, + ], + }); + + // Create pipeline layout + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: Some("yuv_converter_pipeline_layout"), + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + // Create shader module + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("yuv_converter_shader"), + source: wgpu::ShaderSource::Wgsl(YUV_CONVERTER_SHADER.into()), + }); + + // Create compute pipeline + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some("yuv_converter_pipeline"), + layout: Some(&pipeline_layout), + module: &shader, + entry_point: Some("main"), + compilation_options: wgpu::PipelineCompilationOptions::default(), + cache: None, + }); + + Self { + pipeline, + bind_group_layout, + } + } + + /// Convert RGBA texture to YUV420p planar format + /// + /// Reads from `rgba_view` and writes Y, U, V planes to `yuv_output_view`. + /// The output texture must be R8Unorm format with height = input_height * 1.5 + /// to accommodate the packed YUV planes. + /// + /// # Arguments + /// * `device` - GPU device + /// * `encoder` - Command encoder to record GPU commands + /// * `rgba_view` - Source RGBA texture view + /// * `yuv_output_view` - Destination YUV planar texture view (R8Unorm, height*1.5) + /// * `width` - Width of the source RGBA texture + /// * `height` - Height of the source RGBA texture + pub fn convert( + &self, + device: &wgpu::Device, + encoder: &mut wgpu::CommandEncoder, + rgba_view: &wgpu::TextureView, + yuv_output_view: &wgpu::TextureView, + width: u32, + height: u32, + ) { + // Create bind group for this conversion + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("yuv_converter_bind_group"), + layout: &self.bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(rgba_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::TextureView(yuv_output_view), + }, + ], + }); + + // Compute pass + let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("yuv_conversion_pass"), + timestamp_writes: None, + }); + + compute_pass.set_pipeline(&self.pipeline); + compute_pass.set_bind_group(0, &bind_group, &[]); + + // Dispatch workgroups: 8x8 threads per workgroup + // Each thread processes one pixel for the Y plane + // Chroma planes are processed by threads at even coordinates + let workgroup_size = 8; + let workgroups_x = (width + workgroup_size - 1) / workgroup_size; + let workgroups_y = (height + workgroup_size - 1) / workgroup_size; + compute_pass.dispatch_workgroups(workgroups_x, workgroups_y, 1); + } +} + +/// WGSL compute shader for RGBA to YUV420p conversion +const YUV_CONVERTER_SHADER: &str = r#" +// RGBA to YUV420p Compute Shader +// BT.709 color space for HD video (ITU-R BT.709-6 standard) +// +// Color matrix: +// Y = 0.2126*R + 0.7152*G + 0.0722*B +// U = -0.1146*R - 0.3854*G + 0.5000*B + 0.5 +// V = 0.5000*R - 0.4542*G - 0.0458*B + 0.5 +// +// Output texture layout (packed planar, side-by-side U/V): +// - Rows [0, height): Y plane (full resolution, full width) +// - Rows [height, height + height/2): U plane (left half, columns 0 to width/2-1) +// V plane (right half, columns width/2 to width-1) + +@group(0) @binding(0) var input_rgba: texture_2d; +@group(0) @binding(1) var output_yuv: texture_storage_2d; + +@compute @workgroup_size(8, 8, 1) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let dims = textureDimensions(input_rgba); + let pos = global_id.xy; + + // Bounds check + if (pos.x >= dims.x || pos.y >= dims.y) { + return; + } + + // Load RGBA pixel + let rgba = textureLoad(input_rgba, pos, 0); + let r = rgba.r; + let g = rgba.g; + let b = rgba.b; + + // Compute Y (luma) - full resolution, BT.709 + let y = 0.2126 * r + 0.7152 * g + 0.0722 * b; + + // Write Y value to Y plane (rows 0 to height-1) + textureStore(output_yuv, pos, vec4(y, 0.0, 0.0, 0.0)); + + // Compute U and V (chroma) - subsampled 4:2:0 + // Only process even coordinates (top-left of 2x2 blocks) + if (pos.x % 2u == 0u && pos.y % 2u == 0u) { + // Sample 2x2 block for chroma subsampling + var r_sum = r; + var g_sum = g; + var b_sum = b; + var count = 1.0; + + // Sample right neighbor (x+1, y) + if (pos.x + 1u < dims.x) { + let rgba_r = textureLoad(input_rgba, pos + vec2(1u, 0u), 0); + r_sum += rgba_r.r; + g_sum += rgba_r.g; + b_sum += rgba_r.b; + count += 1.0; + } + + // Sample bottom neighbor (x, y+1) + if (pos.y + 1u < dims.y) { + let rgba_b = textureLoad(input_rgba, pos + vec2(0u, 1u), 0); + r_sum += rgba_b.r; + g_sum += rgba_b.g; + b_sum += rgba_b.b; + count += 1.0; + } + + // Sample bottom-right neighbor (x+1, y+1) + if (pos.x + 1u < dims.x && pos.y + 1u < dims.y) { + let rgba_br = textureLoad(input_rgba, pos + vec2(1u, 1u), 0); + r_sum += rgba_br.r; + g_sum += rgba_br.g; + b_sum += rgba_br.b; + count += 1.0; + } + + // Average the 2x2 block + let r_avg = r_sum / count; + let g_avg = g_sum / count; + let b_avg = b_sum / count; + + // Compute chroma components (BT.709, centered at 0.5 for unsigned 8-bit) + let u = -0.1146 * r_avg - 0.3854 * g_avg + 0.5000 * b_avg + 0.5; + let v = 0.5000 * r_avg - 0.4542 * g_avg - 0.0458 * b_avg + 0.5; + + // Compute chroma plane positions (half resolution) + // Pack U and V side-by-side: U on left half, V on right half + let chroma_x = pos.x / 2u; + let chroma_y = pos.y / 2u; + + // U plane: left half (columns 0 to width/2-1), rows height to height+height/2-1 + let u_pos = vec2(chroma_x, dims.y + chroma_y); + + // V plane: right half (columns width/2 to width-1), rows height to height+height/2-1 + let v_pos = vec2(dims.x / 2u + chroma_x, dims.y + chroma_y); + + // Write U and V values to their respective planes + textureStore(output_yuv, u_pos, vec4(u, 0.0, 0.0, 0.0)); + textureStore(output_yuv, v_pos, vec4(v, 0.0, 0.0, 0.0)); + } +} +"#; diff --git a/lightningbeam-ui/lightningbeam-editor/src/export/async_render_helpers.md b/lightningbeam-ui/lightningbeam-editor/src/export/async_render_helpers.md new file mode 100644 index 0000000..b670810 --- /dev/null +++ b/lightningbeam-ui/lightningbeam-editor/src/export/async_render_helpers.md @@ -0,0 +1,62 @@ +# Plan for Async Rendering Helpers + +I'm creating this temporary document to plan the async rendering changes. + +## Current Architecture (Synchronous) +`render_frame_to_rgba_hdr()` in video_exporter.rs: +1. Render document to RGBA (lines 750-991) +2. GPU YUV conversion (lines 993-1005) +3. Copy YUV to staging buffer (lines 1007-1029) +4. Submit GPU commands (line 1031) +5. **BLOCKING** map_async + wait (lines 1033-1045) +6. Extract Y, U, V planes from mapped buffer (lines 1047-1087) +7. Unmap and return YUV planes (lines 1089-1092) + +## New Architecture (Async Pipelined) +Split into two phases using ReadbackPipeline: + +### Phase 1: Submit Frame (Non-blocking) +New function `submit_frame_to_readback_pipeline()`: +- Input: buffer from ReadbackPipeline.acquire() +- Steps 1-3: Render to RGBA, GPU YUV, copy to buffer's YUV texture +- Return encoder to ReadbackPipeline for submission +- **Does NOT wait for GPU** + +### Phase 2: Extract YUV (After async mapping) +Helper function `extract_yuv_planes_from_buffer()`: +- Input: mapped buffer data from ReadbackPipeline +- Steps 6-7: Extract Y, U, V planes, return them +- Used after ReadbackPipeline.get_mapped_data() + +## Modified render_next_video_frame() +New async pipeline loop: +``` +while more_work_to_do: + // Poll for completed frames + for result in pipeline.poll_nonblocking(): + data = pipeline.get_mapped_data(result.buffer_id) + (y, u, v) = extract_yuv_planes(data) + send_to_encoder_in_order(result.frame_num, y, u, v) + pipeline.release(result.buffer_id) + + // Submit new frames (up to 3 in flight) + if current_frame < total_frames && frames_in_flight < 3: + if let Some(buffer) = pipeline.acquire(frame_num, timestamp): + encoder = submit_frame_to_pipeline(buffer) + pipeline.submit_and_readback(buffer.id, encoder) + frames_in_flight++ + current_frame++ + + // Done when all frames submitted AND all completed + if current_frame >= total_frames && frames_in_flight == 0: + return Ok(false) + + return Ok(true) // More work to do +``` + +This achieves triple buffering: +- Frame N: GPU rendering +- Frame N-1: GPU→CPU async transfer +- Frame N-2: CPU encoding + +Expected speedup: 5x diff --git a/lightningbeam-ui/lightningbeam-editor/src/export/cpu_yuv_converter.rs b/lightningbeam-ui/lightningbeam-editor/src/export/cpu_yuv_converter.rs new file mode 100644 index 0000000..6572b4c --- /dev/null +++ b/lightningbeam-ui/lightningbeam-editor/src/export/cpu_yuv_converter.rs @@ -0,0 +1,143 @@ +//! CPU-based RGBA→YUV420p color space converter using FFmpeg's swscale +//! +//! This module provides a wrapper around FFmpeg's highly-optimized swscale library +//! for converting RGBA data to YUV420p format. Uses SIMD instructions when available +//! for maximum performance. + +use ffmpeg_next as ffmpeg; + +/// CPU-based RGBA→YUV420p converter using FFmpeg's swscale +/// +/// This converter uses FFmpeg's swscale library which is highly optimized with SIMD +/// instructions (SSE, AVX) for fast color space conversion on the CPU. +pub struct CpuYuvConverter { + width: u32, + height: u32, +} + +impl CpuYuvConverter { + /// Create new converter for given dimensions + /// + /// # Arguments + /// * `width` - Frame width in pixels + /// * `height` - Frame height in pixels + pub fn new(width: u32, height: u32) -> Result { + Ok(Self { width, height }) + } + + /// Convert RGBA data to YUV420p planes + /// + /// Performs color space conversion from RGBA (8-bit per channel, packed format) + /// to YUV420p (8-bit per channel, planar format with subsampled chroma). + /// + /// Uses BT.709 color matrix (HD standard) for the conversion. + /// + /// # Arguments + /// * `rgba_data` - Packed RGBA data (width * height * 4 bytes) + /// + /// # Returns + /// Tuple of (y_plane, u_plane, v_plane) as separate Vec + /// + /// # Panics + /// Panics if rgba_data length doesn't match width * height * 4 + pub fn convert(&self, rgba_data: &[u8]) -> Result<(Vec, Vec, Vec), String> { + let expected_size = (self.width * self.height * 4) as usize; + assert_eq!( + rgba_data.len(), + expected_size, + "RGBA data size mismatch: expected {} bytes, got {}", + expected_size, + rgba_data.len() + ); + + // Create source RGBA frame + let mut rgba_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::RGBA, + self.width, + self.height, + ); + + // Copy RGBA data into source frame + // ffmpeg-next provides mutable access to the frame data + let frame_data = rgba_frame.data_mut(0); + frame_data.copy_from_slice(rgba_data); + + // Create destination YUV420p frame + let mut yuv_frame = ffmpeg::frame::Video::new( + ffmpeg::format::Pixel::YUV420P, + self.width, + self.height, + ); + + // Create swscale context for RGBA→YUV420p conversion + // Uses BT.709 color matrix (HD standard) + let mut scaler = ffmpeg::software::scaling::Context::get( + ffmpeg::format::Pixel::RGBA, + self.width, + self.height, + ffmpeg::format::Pixel::YUV420P, + self.width, + self.height, + ffmpeg::software::scaling::Flags::BILINEAR, + ) + .map_err(|e| format!("Failed to create swscale context: {}", e))?; + + // Perform the conversion (SIMD-optimized) + scaler + .run(&rgba_frame, &mut yuv_frame) + .map_err(|e| format!("swscale conversion failed: {}", e))?; + + // Extract planar YUV data + // YUV420p has 3 planes: + // - Y: full resolution (width × height) + // - U: quarter resolution (width/2 × height/2) + // - V: quarter resolution (width/2 × height/2) + let y_plane = yuv_frame.data(0).to_vec(); + let u_plane = yuv_frame.data(1).to_vec(); + let v_plane = yuv_frame.data(2).to_vec(); + + Ok((y_plane, u_plane, v_plane)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_converter_creation() { + let converter = CpuYuvConverter::new(1920, 1080); + assert!(converter.is_ok()); + } + + #[test] + fn test_conversion_output_sizes() { + let converter = CpuYuvConverter::new(1920, 1080).unwrap(); + + // Create dummy RGBA data (all black) + let rgba_data = vec![0u8; 1920 * 1080 * 4]; + + let result = converter.convert(&rgba_data); + assert!(result.is_ok()); + + let (y, u, v) = result.unwrap(); + + // Y plane should be full resolution + assert_eq!(y.len(), 1920 * 1080); + + // U and V planes should be quarter resolution (subsampled 2x2) + assert_eq!(u.len(), (1920 / 2) * (1080 / 2)); + assert_eq!(v.len(), (1920 / 2) * (1080 / 2)); + } + + #[test] + #[should_panic(expected = "RGBA data size mismatch")] + fn test_wrong_input_size_panics() { + let converter = CpuYuvConverter::new(1920, 1080).unwrap(); + + // Wrong size input + let rgba_data = vec![0u8; 1000]; + + let _ = converter.convert(&rgba_data); + } +} diff --git a/lightningbeam-ui/lightningbeam-editor/src/export/mod.rs b/lightningbeam-ui/lightningbeam-editor/src/export/mod.rs index a5de980..adafe96 100644 --- a/lightningbeam-ui/lightningbeam-editor/src/export/mod.rs +++ b/lightningbeam-ui/lightningbeam-editor/src/export/mod.rs @@ -6,6 +6,9 @@ pub mod audio_exporter; pub mod dialog; pub mod video_exporter; +pub mod readback_pipeline; +pub mod perf_metrics; +pub mod cpu_yuv_converter; use lightningbeam_core::export::{AudioExportSettings, VideoExportSettings, ExportProgress}; use lightningbeam_core::document::Document; @@ -18,8 +21,14 @@ use std::sync::atomic::{AtomicBool, Ordering}; /// Message sent from main thread to video encoder thread enum VideoFrameMessage { - /// RGBA frame data with frame number and timestamp - Frame { frame_num: usize, timestamp: f64, rgba_data: Vec }, + /// YUV420p frame data with frame number and timestamp (GPU-converted) + Frame { + frame_num: usize, + timestamp: f64, + y_plane: Vec, + u_plane: Vec, + v_plane: Vec, + }, /// Signal that all frames have been sent Done, } @@ -44,6 +53,16 @@ pub struct VideoExportState { frame_tx: Option>, /// HDR GPU resources for compositing pipeline (effects, color conversion) gpu_resources: Option, + /// Async triple-buffered readback pipeline for GPU RGBA frames + readback_pipeline: Option, + /// CPU YUV converter for RGBA→YUV420p conversion + cpu_yuv_converter: Option, + /// Frames that have been submitted to GPU but not yet encoded + frames_in_flight: usize, + /// Next frame number to send to encoder (for ordering) + next_frame_to_encode: usize, + /// Performance metrics for instrumentation + perf_metrics: Option, } /// Export orchestrator that manages the export process @@ -168,13 +187,11 @@ impl ExportOrchestrator { // Poll video progress while let Ok(progress) = parallel.video_progress_rx.try_recv() { - println!("📨 [PARALLEL] Video progress: {:?}", std::mem::discriminant(&progress)); parallel.video_progress = Some(progress); } // Poll audio progress while let Ok(progress) = parallel.audio_progress_rx.try_recv() { - println!("📨 [PARALLEL] Audio progress: {:?}", std::mem::discriminant(&progress)); parallel.audio_progress = Some(progress); } @@ -621,7 +638,7 @@ impl ExportOrchestrator { self.thread_handle = Some(handle); // Initialize video export state - // GPU resources will be initialized lazily on first frame (needs device) + // GPU resources and readback pipeline will be initialized lazily on first frame (needs device) self.video_state = Some(VideoExportState { current_frame: 0, total_frames, @@ -632,6 +649,11 @@ impl ExportOrchestrator { height, frame_tx: Some(frame_tx), gpu_resources: None, + readback_pipeline: None, + cpu_yuv_converter: None, + frames_in_flight: 0, + next_frame_to_encode: 0, + perf_metrics: Some(perf_metrics::ExportMetrics::new()), }); println!("🎬 [VIDEO EXPORT] Encoder thread spawned, ready for frames"); @@ -745,7 +767,7 @@ impl ExportOrchestrator { }); // Initialize video export state for incremental rendering - // GPU resources will be initialized lazily on first frame (needs device) + // GPU resources and readback pipeline will be initialized lazily on first frame (needs device) self.video_state = Some(VideoExportState { current_frame: 0, total_frames, @@ -756,6 +778,11 @@ impl ExportOrchestrator { height: video_height, frame_tx: Some(frame_tx), gpu_resources: None, + readback_pipeline: None, + cpu_yuv_converter: None, + frames_in_flight: 0, + next_frame_to_encode: 0, + perf_metrics: Some(perf_metrics::ExportMetrics::new()), }); // Initialize parallel export state @@ -777,6 +804,7 @@ impl ExportOrchestrator { /// Render and send the next video frame (call from main thread) /// + /// Uses async triple-buffered pipeline for maximum throughput. /// Returns true if there are more frames to render, false if done. /// /// # Arguments @@ -798,62 +826,143 @@ impl ExportOrchestrator { image_cache: &mut ImageCache, video_manager: &Arc>, ) -> Result { + use std::time::Instant; + let state = self.video_state.as_mut() .ok_or("No video export in progress")?; - if state.current_frame >= state.total_frames { - // All frames rendered, signal encoder thread - if let Some(tx) = state.frame_tx.take() { - tx.send(VideoFrameMessage::Done).ok(); - } - // Clean up GPU resources - state.gpu_resources = None; - return Ok(false); - } - - // Calculate timestamp for this frame - let timestamp = state.start_time + (state.current_frame as f64 / state.framerate); - - // Get frame dimensions from export settings let width = state.width; let height = state.height; - // Initialize GPU resources on first frame (needs device) + // Initialize GPU resources and readback pipeline on first frame if state.gpu_resources.is_none() { - println!("🎬 [VIDEO EXPORT] Initializing HDR GPU resources for {}x{}", width, height); + println!("🎬 [VIDEO EXPORT] Initializing HDR GPU + async pipeline {}x{}", width, height); state.gpu_resources = Some(video_exporter::ExportGpuResources::new(device, width, height)); + state.readback_pipeline = Some(readback_pipeline::ReadbackPipeline::new(device, queue, width, height)); + state.cpu_yuv_converter = Some(cpu_yuv_converter::CpuYuvConverter::new(width, height)?); + println!("🚀 [ASYNC PIPELINE] Triple-buffered pipeline initialized"); + println!("🚀 [CPU YUV] swscale converter initialized"); } - // Render frame to RGBA buffer using HDR pipeline (with effects) - let mut rgba_buffer = vec![0u8; (width * height * 4) as usize]; + let pipeline = state.readback_pipeline.as_mut().unwrap(); let gpu_resources = state.gpu_resources.as_mut().unwrap(); - video_exporter::render_frame_to_rgba_hdr( - document, - timestamp, - width, - height, - device, - queue, - renderer, - image_cache, - video_manager, - gpu_resources, - &mut rgba_buffer, - )?; + let cpu_converter = state.cpu_yuv_converter.as_mut().unwrap(); + let mut metrics = state.perf_metrics.as_mut(); - // Send frame to encoder thread - if let Some(tx) = &state.frame_tx { - tx.send(VideoFrameMessage::Frame { - frame_num: state.current_frame, - timestamp, - rgba_data: rgba_buffer, - }).map_err(|_| "Failed to send frame to encoder")?; + // Poll for completed async readbacks (non-blocking) + if let Some(m) = metrics.as_mut() { + m.poll_count += 1; + } + let completed_frames = pipeline.poll_nonblocking(); + if let Some(m) = metrics.as_mut() { + m.completions_per_poll.push(completed_frames.len()); } - state.current_frame += 1; + // Process completed frames IN ORDER + for result in completed_frames { + if result.frame_num == state.next_frame_to_encode { + // Record readback completion time + if let Some(m) = metrics.as_mut() { + if let Some(frame_metrics) = m.frames.get_mut(result.frame_num) { + frame_metrics.readback_complete = Some(Instant::now()); + } + } - // Return true if more frames remain - Ok(state.current_frame < state.total_frames) + // Extract RGBA data (timed) + let extraction_start = Instant::now(); + let rgba_data = pipeline.extract_rgba_data(result.buffer_id); + let extraction_end = Instant::now(); + + // CPU YUV conversion (timed) + let conversion_start = Instant::now(); + let (y, u, v) = cpu_converter.convert(&rgba_data)?; + let conversion_end = Instant::now(); + + if let Some(m) = metrics.as_mut() { + if let Some(frame_metrics) = m.frames.get_mut(result.frame_num) { + frame_metrics.extraction_start = Some(extraction_start); + frame_metrics.extraction_end = Some(extraction_end); + frame_metrics.conversion_start = Some(conversion_start); + frame_metrics.conversion_end = Some(conversion_end); + } + } + + // Send to encoder + if let Some(tx) = &state.frame_tx { + tx.send(VideoFrameMessage::Frame { + frame_num: result.frame_num, + timestamp: result.timestamp, + y_plane: y, + u_plane: u, + v_plane: v, + }).map_err(|_| "Failed to send frame")?; + } + + pipeline.release(result.buffer_id); + state.frames_in_flight -= 1; + state.next_frame_to_encode += 1; + } + } + + // Submit new frames (up to 3 in flight) + while state.current_frame < state.total_frames && state.frames_in_flight < 3 { + let timestamp = state.start_time + (state.current_frame as f64 / state.framerate); + + if let Some(acquired) = pipeline.acquire(state.current_frame, timestamp) { + // Create frame metrics entry + if let Some(m) = metrics.as_mut() { + m.frames.push(perf_metrics::FrameMetrics::new(state.current_frame)); + } + + // Render to GPU (timed) + let render_start = Instant::now(); + let encoder = video_exporter::render_frame_to_gpu_rgba( + document, timestamp, width, height, + device, queue, renderer, image_cache, video_manager, + gpu_resources, &acquired.rgba_texture_view, + )?; + let render_end = Instant::now(); + + // Record render timing + if let Some(m) = metrics.as_mut() { + if let Some(frame_metrics) = m.frames.get_mut(state.current_frame) { + frame_metrics.render_end = Some(render_end); + frame_metrics.submit_time = Some(Instant::now()); + } + } + + // Submit for async readback + pipeline.submit_and_readback(acquired.id, encoder); + + state.current_frame += 1; + state.frames_in_flight += 1; + } else { + break; // All buffers in use + } + } + + // Done when all submitted AND all completed + if state.current_frame >= state.total_frames && state.frames_in_flight == 0 { + println!("🎬 [VIDEO EXPORT] Complete: {} frames", state.total_frames); + + // Print performance summary + if let Some(m) = &state.perf_metrics { + m.print_summary(); + m.print_per_frame_details(10); + } + + if let Some(tx) = state.frame_tx.take() { + tx.send(VideoFrameMessage::Done).ok(); + } + + state.gpu_resources = None; + state.readback_pipeline = None; + state.cpu_yuv_converter = None; + state.perf_metrics = None; + return Ok(false); + } + + Ok(true) // More work to do } /// Background thread that receives frames and encodes them @@ -925,9 +1034,9 @@ impl ExportOrchestrator { // Wait for first frame to determine dimensions let first_frame = match frame_rx.recv() { - Ok(VideoFrameMessage::Frame { frame_num, timestamp, rgba_data }) => { - println!("🧵 [ENCODER] Received first frame ({} bytes)", rgba_data.len()); - Some((frame_num, timestamp, rgba_data)) + Ok(VideoFrameMessage::Frame { frame_num, timestamp, y_plane, u_plane, v_plane }) => { + println!("🧵 [ENCODER] Received first YUV frame (Y: {} bytes)", y_plane.len()); + Some((frame_num, timestamp, y_plane, u_plane, v_plane)) } Ok(VideoFrameMessage::Done) => { return Err("No frames to encode".to_string()); @@ -938,9 +1047,9 @@ impl ExportOrchestrator { }; // Determine dimensions from first frame - let (width, height) = if let Some((_, _, ref rgba_data)) = first_frame { - // Calculate dimensions from buffer size (RGBA = 4 bytes per pixel) - let pixel_count = rgba_data.len() / 4; + let (width, height) = if let Some((_, _, ref y_plane, _, _)) = first_frame { + // Calculate dimensions from Y plane size (full resolution, 1 byte per pixel) + let pixel_count = y_plane.len(); // Use settings dimensions if provided, otherwise infer from buffer let w = settings.width.unwrap_or(1920); // Default to 1920 if not specified let h = settings.height.unwrap_or(1080); // Default to 1080 if not specified @@ -979,11 +1088,13 @@ impl ExportOrchestrator { println!("🧵 [ENCODER] Encoder initialized, ready to encode frames"); // Process first frame - if let Some((frame_num, timestamp, rgba_data)) = first_frame { + if let Some((frame_num, timestamp, y_plane, u_plane, v_plane)) = first_frame { Self::encode_frame( &mut encoder, &mut output, - &rgba_data, + &y_plane, + &u_plane, + &v_plane, width, height, timestamp, @@ -994,8 +1105,6 @@ impl ExportOrchestrator { frame: 1, total: total_frames, }).ok(); - - println!("🧵 [ENCODER] Encoded frame {}", frame_num); } // Process remaining frames @@ -1006,11 +1115,13 @@ impl ExportOrchestrator { } match frame_rx.recv() { - Ok(VideoFrameMessage::Frame { frame_num, timestamp, rgba_data }) => { + Ok(VideoFrameMessage::Frame { frame_num, timestamp, y_plane, u_plane, v_plane }) => { Self::encode_frame( &mut encoder, &mut output, - &rgba_data, + &y_plane, + &u_plane, + &v_plane, width, height, timestamp, @@ -1023,10 +1134,6 @@ impl ExportOrchestrator { frame: frames_encoded, total: total_frames, }).ok(); - - if frames_encoded % 30 == 0 || frames_encoded == frame_num + 1 { - println!("🧵 [ENCODER] Encoded frame {}/{}", frames_encoded, total_frames); - } } Ok(VideoFrameMessage::Done) => { println!("🧵 [ENCODER] All frames received, flushing encoder"); @@ -1052,17 +1159,18 @@ impl ExportOrchestrator { Ok(()) } - /// Encode a single RGBA frame + /// Encode a single YUV420p frame (already converted by GPU) fn encode_frame( encoder: &mut ffmpeg_next::encoder::Video, output: &mut ffmpeg_next::format::context::Output, - rgba_data: &[u8], + y_plane: &[u8], + u_plane: &[u8], + v_plane: &[u8], width: u32, height: u32, timestamp: f64, ) -> Result<(), String> { - // Convert RGBA to YUV420p - let (y_plane, u_plane, v_plane) = video_exporter::rgba_to_yuv420p(rgba_data, width, height); + // YUV planes already converted by GPU (no CPU conversion needed) // Create FFmpeg video frame let mut video_frame = ffmpeg_next::frame::Video::new( @@ -1087,8 +1195,6 @@ impl ExportOrchestrator { // Encoder time base is 1/(framerate * 1000), so PTS = timestamp * (framerate * 1000) let encoder_tb = encoder.time_base(); let pts = (timestamp * encoder_tb.1 as f64) as i64; - println!("🎬 [ENCODE] Frame timestamp={:.3}s, encoder_tb={}/{}, calculated PTS={}", - timestamp, encoder_tb.0, encoder_tb.1, pts); video_frame.set_pts(Some(pts)); // Send frame to encoder diff --git a/lightningbeam-ui/lightningbeam-editor/src/export/perf_metrics.rs b/lightningbeam-ui/lightningbeam-editor/src/export/perf_metrics.rs new file mode 100644 index 0000000..37a934e --- /dev/null +++ b/lightningbeam-ui/lightningbeam-editor/src/export/perf_metrics.rs @@ -0,0 +1,191 @@ +//! Performance instrumentation for video export pipeline +//! +//! Tracks timing for each stage of the export process: +//! - GPU rendering (render_frame_to_gpu_yuv) +//! - Async readback (map_async completion) +//! - YUV plane extraction +//! - FFmpeg encoding +//! - Polling frequency and efficiency + +use std::time::{Duration, Instant}; + +/// Performance metrics for a single frame +#[derive(Debug)] +pub struct FrameMetrics { + pub frame_num: usize, + pub render_start: Instant, + pub render_end: Option, + pub submit_time: Option, + pub readback_complete: Option, + pub extraction_start: Option, + pub extraction_end: Option, + pub conversion_start: Option, + pub conversion_end: Option, + pub encode_start: Option, + pub encode_end: Option, +} + +impl FrameMetrics { + pub fn new(frame_num: usize) -> Self { + Self { + frame_num, + render_start: Instant::now(), + render_end: None, + submit_time: None, + readback_complete: None, + extraction_start: None, + extraction_end: None, + conversion_start: None, + conversion_end: None, + encode_start: None, + encode_end: None, + } + } + + pub fn render_duration(&self) -> Option { + self.render_end.map(|end| end.duration_since(self.render_start)) + } + + pub fn readback_duration(&self) -> Option { + self.submit_time.and_then(|submit| + self.readback_complete.map(|complete| + complete.duration_since(submit) + ) + ) + } + + pub fn extraction_duration(&self) -> Option { + self.extraction_start.and_then(|start| + self.extraction_end.map(|end| + end.duration_since(start) + ) + ) + } + + pub fn conversion_duration(&self) -> Option { + self.conversion_start.and_then(|start| + self.conversion_end.map(|end| + end.duration_since(start) + ) + ) + } + + pub fn encode_duration(&self) -> Option { + self.encode_start.and_then(|start| + self.encode_end.map(|end| + end.duration_since(start) + ) + ) + } + + pub fn total_duration(&self) -> Option { + self.encode_end.map(|end| end.duration_since(self.render_start)) + } +} + +/// Aggregate performance metrics for entire export +pub struct ExportMetrics { + pub frames: Vec, + export_start: Instant, + pub poll_count: usize, + pub completions_per_poll: Vec, +} + +impl ExportMetrics { + pub fn new() -> Self { + Self { + frames: Vec::new(), + export_start: Instant::now(), + poll_count: 0, + completions_per_poll: Vec::new(), + } + } + + /// Print comprehensive performance summary + pub fn print_summary(&self) { + println!("\n📊 [PERF] Export Performance Summary"); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + + // Calculate averages for each stage + let mut render_times = Vec::new(); + let mut readback_times = Vec::new(); + let mut extraction_times = Vec::new(); + let mut conversion_times = Vec::new(); + let mut encode_times = Vec::new(); + let mut total_times = Vec::new(); + + for metrics in &self.frames { + if let Some(d) = metrics.render_duration() { + render_times.push(d); + } + if let Some(d) = metrics.readback_duration() { + readback_times.push(d); + } + if let Some(d) = metrics.extraction_duration() { + extraction_times.push(d); + } + if let Some(d) = metrics.conversion_duration() { + conversion_times.push(d); + } + if let Some(d) = metrics.encode_duration() { + encode_times.push(d); + } + if let Some(d) = metrics.total_duration() { + total_times.push(d); + } + } + + let avg = |times: &[Duration]| -> f64 { + if times.is_empty() { return 0.0; } + times.iter().sum::().as_secs_f64() / times.len() as f64 * 1000.0 + }; + + println!("Render: {:.2}ms avg", avg(&render_times)); + println!("Readback: {:.2}ms avg", avg(&readback_times)); + println!("Extraction: {:.2}ms avg", avg(&extraction_times)); + println!("Conversion: {:.2}ms avg", avg(&conversion_times)); + println!("Encode: {:.2}ms avg", avg(&encode_times)); + println!("Total: {:.2}ms avg", avg(&total_times)); + + let total_export_time = Instant::now().duration_since(self.export_start).as_secs_f64(); + let fps = self.frames.len() as f64 / total_export_time; + println!("\nOverall: {:.2} fps ({:.1}s for {} frames)", + fps, total_export_time, self.frames.len()); + + if self.poll_count > 0 { + let avg_completions = self.completions_per_poll.iter().sum::() as f64 / self.poll_count as f64; + println!("Polls: {} ({:.2} completions/poll avg)", + self.poll_count, avg_completions); + } + + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"); + } + + /// Print detailed per-frame breakdown for last N frames + pub fn print_per_frame_details(&self, last_n: usize) { + println!("\n📋 [PERF] Per-Frame Breakdown (last {} frames)", last_n); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + println!("{:>5} | {:>8} | {:>8} | {:>8} | {:>8} | {:>8} | {:>8}", + "Frame", "Render", "Readback", "Extract", "Convert", "Encode", "Total"); + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"); + + let start = if self.frames.len() > last_n { + self.frames.len() - last_n + } else { + 0 + }; + + for metrics in &self.frames[start..] { + println!("{:5} | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms", + metrics.frame_num, + metrics.render_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0), + metrics.readback_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0), + metrics.extraction_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0), + metrics.conversion_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0), + metrics.encode_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0), + metrics.total_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0), + ); + } + println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"); + } +} diff --git a/lightningbeam-ui/lightningbeam-editor/src/export/readback_pipeline.rs b/lightningbeam-ui/lightningbeam-editor/src/export/readback_pipeline.rs new file mode 100644 index 0000000..8668891 --- /dev/null +++ b/lightningbeam-ui/lightningbeam-editor/src/export/readback_pipeline.rs @@ -0,0 +1,317 @@ +//! Async triple-buffered GPU readback pipeline for video export +//! +//! This module implements a pipelined export system that overlaps GPU rendering +//! with CPU encoding to maximize throughput. It uses triple buffering to keep +//! both GPU and CPU busy simultaneously: +//! +//! - Frame N: GPU rendering/conversion +//! - Frame N-1: GPU→CPU async transfer +//! - Frame N-2: CPU encoding +//! +//! Expected speedup: 5x over synchronous blocking approach + +use std::sync::mpsc::{channel, Receiver, Sender}; + +/// Result from a completed async buffer mapping +#[derive(Debug)] +pub struct ReadbackResult { + pub buffer_id: usize, + pub frame_num: usize, + pub timestamp: f64, +} + +/// State of a pipeline buffer in the triple-buffering state machine +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum BufferState { + /// Buffer is available for new frame rendering + Free, + /// GPU is currently rendering/converting to this buffer + Rendering, + /// Buffer readback submitted, waiting for GPU→CPU transfer + ReadbackPending, + /// Buffer mapped and ready for CPU to read + Mapped, + /// CPU is encoding this buffer's data + Encoding, +} + +/// A single buffer in the triple-buffering pipeline +struct PipelineBuffer { + id: usize, + /// RGBA texture for GPU rendering output (Rgba8Unorm) + rgba_texture: wgpu::Texture, + rgba_texture_view: wgpu::TextureView, + /// Staging buffer for GPU→CPU transfer (MAP_READ) + staging_buffer: wgpu::Buffer, + /// Current state in the pipeline + state: BufferState, + /// Frame metadata (set when rendering starts) + frame_num: Option, + timestamp: Option, +} + +/// Handle to an acquired buffer for rendering +pub struct AcquiredBuffer { + pub id: usize, + pub rgba_texture_view: wgpu::TextureView, +} + +/// Triple-buffered async readback pipeline +/// +/// Manages 3 buffers cycling through the pipeline: +/// Free → Rendering → ReadbackPending → Mapped → Encoding → Free +pub struct ReadbackPipeline { + buffers: Vec, + /// Channel for async map_async callbacks + readback_rx: Receiver, + readback_tx: Sender, + /// wgpu device and queue references (needed for polling and buffer operations) + device: wgpu::Device, + queue: wgpu::Queue, + /// Buffer dimensions + width: u32, + height: u32, +} + +impl ReadbackPipeline { + /// Create a new triple-buffered readback pipeline + /// + /// # Arguments + /// * `device` - GPU device (will be cloned for async operations) + /// * `queue` - GPU queue (will be cloned for async operations) + /// * `width` - Frame width in pixels + /// * `height` - Frame height in pixels + pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self { + let (readback_tx, readback_rx) = channel(); + + // Create 3 buffers for triple buffering + let mut buffers = Vec::new(); + for id in 0..3 { + // RGBA texture (Rgba8Unorm) + let rgba_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some(&format!("readback_rgba_texture_{}", id)), + size: wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + + let rgba_texture_view = rgba_texture.create_view(&wgpu::TextureViewDescriptor::default()); + + // Staging buffer for GPU→CPU readback + let rgba_buffer_size = (width * height * 4) as u64; // Rgba8Unorm = 4 bytes/pixel + let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some(&format!("readback_staging_buffer_{}", id)), + size: rgba_buffer_size, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + buffers.push(PipelineBuffer { + id, + rgba_texture, + rgba_texture_view, + staging_buffer, + state: BufferState::Free, + frame_num: None, + timestamp: None, + }); + } + + Self { + buffers, + readback_rx, + readback_tx, + device: device.clone(), + queue: queue.clone(), + width, + height, + } + } + + /// Acquire a free buffer for rendering (non-blocking) + /// + /// Returns None if all buffers are in use (caller should poll and retry) + pub fn acquire(&mut self, frame_num: usize, timestamp: f64) -> Option { + // Find first Free buffer + for buffer in &mut self.buffers { + if buffer.state == BufferState::Free { + buffer.state = BufferState::Rendering; + buffer.frame_num = Some(frame_num); + buffer.timestamp = Some(timestamp); + + return Some(AcquiredBuffer { + id: buffer.id, + rgba_texture_view: buffer.rgba_texture_view.clone(), + }); + } + } + + None // All buffers busy + } + + /// Submit GPU commands and initiate async readback + /// + /// # Arguments + /// * `buffer_id` - ID of the buffer to submit (from AcquiredBuffer) + /// * `encoder` - Command encoder with rendering commands + pub fn submit_and_readback(&mut self, buffer_id: usize, mut encoder: wgpu::CommandEncoder) { + let buffer = &mut self.buffers[buffer_id]; + assert_eq!(buffer.state, BufferState::Rendering, "Buffer not in Rendering state"); + + // Copy RGBA texture to staging buffer + encoder.copy_texture_to_buffer( + wgpu::TexelCopyTextureInfo { + texture: &buffer.rgba_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyBufferInfo { + buffer: &buffer.staging_buffer, + layout: wgpu::TexelCopyBufferLayout { + offset: 0, + bytes_per_row: Some(self.width * 4), // Rgba8Unorm + rows_per_image: Some(self.height), + }, + }, + wgpu::Extent3d { + width: self.width, + height: self.height, + depth_or_array_layers: 1, + }, + ); + + // Submit GPU commands (non-blocking) + self.queue.submit(Some(encoder.finish())); + + // Initiate async buffer mapping + let frame_num = buffer.frame_num.unwrap(); + let timestamp = buffer.timestamp.unwrap(); + let tx = self.readback_tx.clone(); + + buffer.staging_buffer.slice(..).map_async(wgpu::MapMode::Read, move |result| { + if result.is_ok() { + let _ = tx.send(ReadbackResult { + buffer_id, + frame_num, + timestamp, + }); + } + }); + + buffer.state = BufferState::ReadbackPending; + } + + /// Poll for completed readbacks (non-blocking) + /// + /// Returns list of buffers that are now ready for CPU encoding. + /// Call this frequently to process completed transfers. + pub fn poll_nonblocking(&mut self) -> Vec { + // Poll GPU without blocking + self.device.poll(wgpu::PollType::Poll); + + // Collect all completed readbacks + let mut results = Vec::new(); + while let Ok(result) = self.readback_rx.try_recv() { + // Update buffer state to Mapped + self.buffers[result.buffer_id].state = BufferState::Mapped; + results.push(result); + } + + results + } + + /// Extract RGBA data from mapped buffer (for CPU YUV conversion) + /// + /// Buffer must be in Mapped state (after poll_nonblocking returned it). + /// This immediately copies the RGBA data, allowing the buffer to be released. + pub fn extract_rgba_data(&mut self, buffer_id: usize) -> Vec { + let buffer = &mut self.buffers[buffer_id]; + assert_eq!(buffer.state, BufferState::Mapped, "Buffer not in Mapped state"); + + buffer.state = BufferState::Encoding; + + // Map the buffer and copy RGBA data + let slice = buffer.staging_buffer.slice(..); + let data = slice.get_mapped_range(); + + // Simple copy - RGBA data goes to CPU for conversion + data.to_vec() + } + + /// Release buffer after encoding completes, returning it to the free pool + /// + /// # Arguments + /// * `buffer_id` - ID of buffer to release + pub fn release(&mut self, buffer_id: usize) { + let buffer = &mut self.buffers[buffer_id]; + assert_eq!(buffer.state, BufferState::Encoding, "Buffer not in Encoding state"); + + // Unmap buffer + buffer.staging_buffer.unmap(); + + // Clear metadata + buffer.frame_num = None; + buffer.timestamp = None; + + // Return to free pool + buffer.state = BufferState::Free; + } + + /// Flush pipeline and wait for all pending operations + /// + /// Call this at the end of export to ensure all frames are processed + pub fn flush(&mut self) -> Vec { + let mut all_results = Vec::new(); + + // Keep polling until all buffers are Free + loop { + // Poll for new completions + self.device.poll(wgpu::PollType::Poll); + + while let Ok(result) = self.readback_rx.try_recv() { + self.buffers[result.buffer_id].state = BufferState::Mapped; + all_results.push(result); + } + + // Check if all buffers are Free (or can be made Free) + let mut all_free = true; + for buffer in &self.buffers { + match buffer.state { + BufferState::Free => {}, + BufferState::Rendering | BufferState::ReadbackPending => { + all_free = false; + break; + }, + BufferState::Mapped | BufferState::Encoding => { + // These should be handled by the caller, shouldn't happen during flush + panic!("Buffer in {} state during flush - caller should encode and release", + if buffer.state == BufferState::Mapped { "Mapped" } else { "Encoding" }); + } + } + } + + if all_free { + break; + } + + // Small sleep to avoid busy-waiting + std::thread::sleep(std::time::Duration::from_millis(1)); + } + + all_results + } + + /// Get buffer count currently in flight (for monitoring) + pub fn buffers_in_flight(&self) -> usize { + self.buffers.iter().filter(|b| b.state != BufferState::Free).count() + } +} diff --git a/lightningbeam-ui/lightningbeam-editor/src/export/video_exporter.rs b/lightningbeam-ui/lightningbeam-editor/src/export/video_exporter.rs index cb19df2..4228e43 100644 --- a/lightningbeam-ui/lightningbeam-editor/src/export/video_exporter.rs +++ b/lightningbeam-ui/lightningbeam-editor/src/export/video_exporter.rs @@ -12,7 +12,7 @@ use lightningbeam_core::renderer::{ImageCache, render_document_for_compositing, use lightningbeam_core::video::VideoManager; use lightningbeam_core::gpu::{ BufferPool, BufferSpec, BufferFormat, Compositor, CompositorLayer, - SrgbToLinearConverter, EffectProcessor, HDR_FORMAT, + SrgbToLinearConverter, EffectProcessor, YuvConverter, HDR_FORMAT, }; /// Reusable frame buffers to avoid allocations @@ -56,10 +56,22 @@ pub struct ExportGpuResources { pub srgb_to_linear: SrgbToLinearConverter, /// Effect processor for shader effects pub effect_processor: EffectProcessor, + /// GPU-accelerated RGBA to YUV420p converter + pub yuv_converter: YuvConverter, /// HDR accumulator texture for compositing pub hdr_texture: wgpu::Texture, /// View for HDR texture pub hdr_texture_view: wgpu::TextureView, + /// Persistent RGBA output texture (sRGB, reused for all frames) + pub output_texture: wgpu::Texture, + /// View for persistent output texture + pub output_texture_view: wgpu::TextureView, + /// Persistent YUV texture for GPU conversion (R8Unorm, height*1.5, reused for all frames) + pub yuv_texture: wgpu::Texture, + /// View for persistent YUV texture + pub yuv_texture_view: wgpu::TextureView, + /// Persistent staging buffer for GPU→CPU readback (reused for all frames) + pub staging_buffer: wgpu::Buffer, /// Linear to sRGB blit pipeline for final output pub linear_to_srgb_pipeline: wgpu::RenderPipeline, /// Bind group layout for linear to sRGB blit @@ -75,6 +87,7 @@ impl ExportGpuResources { let compositor = Compositor::new(device, HDR_FORMAT); let srgb_to_linear = SrgbToLinearConverter::new(device); let effect_processor = EffectProcessor::new(device, HDR_FORMAT); + let yuv_converter = YuvConverter::new(device); // Create HDR accumulator texture let hdr_texture = device.create_texture(&wgpu::TextureDescriptor { @@ -95,6 +108,53 @@ impl ExportGpuResources { }); let hdr_texture_view = hdr_texture.create_view(&wgpu::TextureViewDescriptor::default()); + // Create persistent RGBA output texture (sRGB, reused for all frames) + let output_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("export_output_texture"), + size: wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT + | wgpu::TextureUsages::TEXTURE_BINDING + | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + let output_texture_view = output_texture.create_view(&wgpu::TextureViewDescriptor::default()); + + // Create persistent YUV texture (Rgba8Unorm, height*1.5 for packed Y+U+V planes) + // Note: Using Rgba8Unorm instead of R8Unorm because R8Unorm doesn't support STORAGE_BINDING + let yuv_height = height + height / 2; // Y plane + U plane + V plane + let yuv_texture = device.create_texture(&wgpu::TextureDescriptor { + label: Some("export_yuv_texture"), + size: wgpu::Extent3d { + width, + height: yuv_height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC, + view_formats: &[], + }); + let yuv_texture_view = yuv_texture.create_view(&wgpu::TextureViewDescriptor::default()); + + // Create persistent staging buffer for GPU→CPU readback + let yuv_buffer_size = (width * yuv_height * 4) as u64; // Rgba8Unorm = 4 bytes per pixel + let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: Some("export_staging_buffer"), + size: yuv_buffer_size, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + // Create linear to sRGB blit pipeline let linear_to_srgb_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { label: Some("linear_to_srgb_bind_group_layout"), @@ -179,8 +239,14 @@ impl ExportGpuResources { compositor, srgb_to_linear, effect_processor, + yuv_converter, hdr_texture, hdr_texture_view, + output_texture, + output_texture_view, + yuv_texture, + yuv_texture_view, + staging_buffer, linear_to_srgb_pipeline, linear_to_srgb_bind_group_layout, linear_to_srgb_sampler, @@ -476,20 +542,11 @@ pub fn receive_and_write_packets( let encoder_tb = encoder.time_base(); let stream_tb = output.stream(0).ok_or("No output stream found")?.time_base(); - println!("🎬 [PACKET] Encoder TB: {}/{}, Stream TB: {}/{}", - encoder_tb.0, encoder_tb.1, stream_tb.0, stream_tb.1); - while encoder.receive_packet(&mut encoded).is_ok() { - println!("🎬 [PACKET] Before rescale - PTS: {:?}, DTS: {:?}, Duration: {:?}", - encoded.pts(), encoded.dts(), encoded.duration()); - encoded.set_stream(0); // Rescale timestamps from encoder time base to stream time base encoded.rescale_ts(encoder_tb, stream_tb); - println!("🎬 [PACKET] After rescale - PTS: {:?}, DTS: {:?}, Duration: {:?}", - encoded.pts(), encoded.dts(), encoded.duration()); - encoded .write_interleaved(output) .map_err(|e| format!("Failed to write packet: {}", e))?; @@ -660,10 +717,9 @@ pub fn render_frame_to_rgba( /// * `image_cache` - Image cache for rendering /// * `video_manager` - Video manager for video clips /// * `gpu_resources` - HDR GPU resources for compositing -/// * `rgba_buffer` - Output buffer for RGBA pixels (must be width * height * 4 bytes) /// /// # Returns -/// Ok(()) on success, Err with message on failure +/// Ok((y_plane, u_plane, v_plane)) with YUV420p planes on success, Err with message on failure pub fn render_frame_to_rgba_hdr( document: &mut Document, timestamp: f64, @@ -675,8 +731,7 @@ pub fn render_frame_to_rgba_hdr( image_cache: &mut ImageCache, video_manager: &Arc>, gpu_resources: &mut ExportGpuResources, - rgba_buffer: &mut [u8], -) -> Result<(), String> { +) -> Result<(Vec, Vec, Vec), String> { use vello::kurbo::Affine; // Set document time to the frame timestamp @@ -879,22 +934,8 @@ pub fn render_frame_to_rgba_hdr( // Advance frame counter for buffer cleanup gpu_resources.buffer_pool.next_frame(); - // Create output texture for final sRGB output - let output_texture = device.create_texture(&wgpu::TextureDescriptor { - label: Some("export_output_texture"), - size: wgpu::Extent3d { - width, - height, - depth_or_array_layers: 1, - }, - mip_level_count: 1, - sample_count: 1, - dimension: wgpu::TextureDimension::D2, - format: wgpu::TextureFormat::Rgba8Unorm, - usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC, - view_formats: &[], - }); - let output_view = output_texture.create_view(&wgpu::TextureViewDescriptor::default()); + // Use persistent output texture (already created in ExportGpuResources) + let output_view = &gpu_resources.output_texture_view; // Convert HDR to sRGB for output let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { @@ -940,52 +981,48 @@ pub fn render_frame_to_rgba_hdr( queue.submit(Some(encoder.finish())); - // GPU readback: Create staging buffer with proper alignment - let bytes_per_pixel = 4u32; // RGBA8 - let bytes_per_row_alignment = 256u32; - let unpadded_bytes_per_row = width * bytes_per_pixel; - let bytes_per_row = ((unpadded_bytes_per_row + bytes_per_row_alignment - 1) - / bytes_per_row_alignment) * bytes_per_row_alignment; - let buffer_size = (bytes_per_row * height) as u64; - - let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor { - label: Some("export_staging_buffer"), - size: buffer_size, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, + // GPU YUV conversion: Convert RGBA output to YUV420p + let mut yuv_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_yuv_conversion_encoder"), }); - // Copy texture to staging buffer - let mut copy_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { - label: Some("export_copy_encoder"), - }); + gpu_resources.yuv_converter.convert( + device, + &mut yuv_encoder, + output_view, + &gpu_resources.yuv_texture_view, + width, + height, + ); - copy_encoder.copy_texture_to_buffer( + // Copy YUV texture to persistent staging buffer + let yuv_height = height + height / 2; // Y plane + U plane + V plane + yuv_encoder.copy_texture_to_buffer( wgpu::TexelCopyTextureInfo { - texture: &output_texture, + texture: &gpu_resources.yuv_texture, mip_level: 0, origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, wgpu::TexelCopyBufferInfo { - buffer: &staging_buffer, + buffer: &gpu_resources.staging_buffer, layout: wgpu::TexelCopyBufferLayout { offset: 0, - bytes_per_row: Some(bytes_per_row), - rows_per_image: Some(height), + bytes_per_row: Some(width * 4), // Rgba8Unorm = 4 bytes per pixel + rows_per_image: Some(yuv_height), }, }, wgpu::Extent3d { width, - height, + height: yuv_height, depth_or_array_layers: 1, }, ); - queue.submit(Some(copy_encoder.finish())); + queue.submit(Some(yuv_encoder.finish())); - // Map buffer and read pixels (synchronous) - let buffer_slice = staging_buffer.slice(..); + // Map buffer and read YUV pixels (synchronous) + let buffer_slice = gpu_resources.staging_buffer.slice(..); let (sender, receiver) = std::sync::mpsc::channel(); buffer_slice.map_async(wgpu::MapMode::Read, move |result| { sender.send(result).ok(); @@ -998,20 +1035,319 @@ pub fn render_frame_to_rgba_hdr( .map_err(|_| "Failed to receive buffer mapping result")? .map_err(|e| format!("Failed to map buffer: {:?}", e))?; - // Copy data from mapped buffer to output, removing padding + // Extract Y, U, V planes from packed YUV buffer let data = buffer_slice.get_mapped_range(); - for y in 0..height as usize { - let src_offset = y * bytes_per_row as usize; - let dst_offset = y * unpadded_bytes_per_row as usize; - let row_bytes = unpadded_bytes_per_row as usize; - rgba_buffer[dst_offset..dst_offset + row_bytes] - .copy_from_slice(&data[src_offset..src_offset + row_bytes]); + let width_usize = width as usize; + let height_usize = height as usize; + + // Y plane: rows 0 to height-1 (extract R channel from Rgba8Unorm) + let y_plane_size = width_usize * height_usize; + let mut y_plane = vec![0u8; y_plane_size]; + for y in 0..height_usize { + let src_row_offset = y * width_usize * 4; // 4 bytes per pixel (Rgba8Unorm) + let dst_row_offset = y * width_usize; + for x in 0..width_usize { + y_plane[dst_row_offset + x] = data[src_row_offset + x * 4]; // Extract R channel + } + } + + // U and V planes: rows height to height + height/2 - 1 (half resolution, side-by-side layout) + // U plane is in left half (columns 0 to width/2-1), V plane is in right half (columns width/2 to width-1) + let chroma_width = width_usize / 2; + let chroma_height = height_usize / 2; + let chroma_row_start = height_usize * width_usize * 4; // Start of chroma rows in bytes + + let mut u_plane = vec![0u8; chroma_width * chroma_height]; + let mut v_plane = vec![0u8; chroma_width * chroma_height]; + + for y in 0..chroma_height { + let row_offset = chroma_row_start + y * width_usize * 4; // Full width rows in chroma region + + // Extract U plane (left half: columns 0 to chroma_width-1) + let u_start = row_offset; + let dst_offset = y * chroma_width; + for x in 0..chroma_width { + u_plane[dst_offset + x] = data[u_start + x * 4]; // Extract R channel + } + + // Extract V plane (right half: columns width/2 to width/2+chroma_width-1) + let v_start = row_offset + chroma_width * 4; + for x in 0..chroma_width { + v_plane[dst_offset + x] = data[v_start + x * 4]; // Extract R channel + } } drop(data); - staging_buffer.unmap(); + gpu_resources.staging_buffer.unmap(); - Ok(()) + Ok((y_plane, u_plane, v_plane)) +} + +/// Render frame to GPU RGBA texture (non-blocking, for async pipeline) +/// +/// Similar to render_frame_to_rgba_hdr but renders to an external RGBA texture view +/// (provided by ReadbackPipeline) and returns the command encoder WITHOUT blocking on readback. +/// The caller (ReadbackPipeline) will submit the encoder and handle async readback. +/// +/// # Arguments +/// * `document` - Document to render +/// * `timestamp` - Time in seconds to render at +/// * `width` - Frame width in pixels +/// * `height` - Frame height in pixels +/// * `device` - wgpu device +/// * `queue` - wgpu queue +/// * `renderer` - Vello renderer +/// * `image_cache` - Image cache for rendering +/// * `video_manager` - Video manager for video clips +/// * `gpu_resources` - HDR GPU resources for compositing +/// * `rgba_texture_view` - External RGBA texture view (from ReadbackPipeline) +/// +/// # Returns +/// Command encoder ready for submission (caller submits via ReadbackPipeline) +pub fn render_frame_to_gpu_rgba( + document: &mut Document, + timestamp: f64, + width: u32, + height: u32, + device: &wgpu::Device, + queue: &wgpu::Queue, + renderer: &mut vello::Renderer, + image_cache: &mut ImageCache, + video_manager: &Arc>, + gpu_resources: &mut ExportGpuResources, + rgba_texture_view: &wgpu::TextureView, +) -> Result { + use vello::kurbo::Affine; + + // Set document time to the frame timestamp + document.current_time = timestamp; + + // Use identity transform for export (document coordinates = pixel coordinates) + let base_transform = Affine::IDENTITY; + + // Render document for compositing (returns per-layer scenes) + let composite_result = render_document_for_compositing( + document, + base_transform, + image_cache, + video_manager, + ); + + // Buffer specs for layer rendering + let layer_spec = BufferSpec::new(width, height, BufferFormat::Rgba8Srgb); + let hdr_spec = BufferSpec::new(width, height, BufferFormat::Rgba16Float); + + // Render parameters for Vello (transparent background for layers) + let layer_render_params = vello::RenderParams { + base_color: vello::peniko::Color::TRANSPARENT, + width, + height, + antialiasing_method: vello::AaConfig::Area, + }; + + // Render background and composite it + let bg_srgb_handle = gpu_resources.buffer_pool.acquire(device, layer_spec); + let bg_hdr_handle = gpu_resources.buffer_pool.acquire(device, hdr_spec); + + if let (Some(bg_srgb_view), Some(bg_hdr_view)) = ( + gpu_resources.buffer_pool.get_view(bg_srgb_handle), + gpu_resources.buffer_pool.get_view(bg_hdr_handle), + ) { + renderer.render_to_texture(device, queue, &composite_result.background, bg_srgb_view, &layer_render_params) + .map_err(|e| format!("Failed to render background: {}", e))?; + + let mut convert_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_bg_srgb_to_linear_encoder"), + }); + gpu_resources.srgb_to_linear.convert(device, &mut convert_encoder, bg_srgb_view, bg_hdr_view); + queue.submit(Some(convert_encoder.finish())); + + let bg_compositor_layer = CompositorLayer::normal(bg_hdr_handle, 1.0); + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_bg_composite_encoder"), + }); + gpu_resources.compositor.composite( + device, + queue, + &mut encoder, + &[bg_compositor_layer], + &gpu_resources.buffer_pool, + &gpu_resources.hdr_texture_view, + Some([0.0, 0.0, 0.0, 1.0]), + ); + queue.submit(Some(encoder.finish())); + } + gpu_resources.buffer_pool.release(bg_srgb_handle); + gpu_resources.buffer_pool.release(bg_hdr_handle); + + // Render and composite each layer incrementally + for rendered_layer in &composite_result.layers { + if !rendered_layer.has_content { + continue; + } + + match &rendered_layer.layer_type { + RenderedLayerType::Content => { + let srgb_handle = gpu_resources.buffer_pool.acquire(device, layer_spec); + let hdr_layer_handle = gpu_resources.buffer_pool.acquire(device, hdr_spec); + + if let (Some(srgb_view), Some(hdr_layer_view)) = ( + gpu_resources.buffer_pool.get_view(srgb_handle), + gpu_resources.buffer_pool.get_view(hdr_layer_handle), + ) { + renderer.render_to_texture(device, queue, &rendered_layer.scene, srgb_view, &layer_render_params) + .map_err(|e| format!("Failed to render layer: {}", e))?; + + let mut convert_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_layer_srgb_to_linear_encoder"), + }); + gpu_resources.srgb_to_linear.convert(device, &mut convert_encoder, srgb_view, hdr_layer_view); + queue.submit(Some(convert_encoder.finish())); + + let compositor_layer = CompositorLayer::normal(hdr_layer_handle, rendered_layer.opacity); + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_layer_composite_encoder"), + }); + gpu_resources.compositor.composite( + device, + queue, + &mut encoder, + &[compositor_layer], + &gpu_resources.buffer_pool, + &gpu_resources.hdr_texture_view, + None, + ); + queue.submit(Some(encoder.finish())); + } + gpu_resources.buffer_pool.release(srgb_handle); + gpu_resources.buffer_pool.release(hdr_layer_handle); + } + RenderedLayerType::Effect { effect_instances } => { + // Effect layer - apply effects to the current HDR accumulator + let current_time = document.current_time; + + for effect_instance in effect_instances { + // Get effect definition from document + let Some(effect_def) = document.get_effect_definition(&effect_instance.clip_id) else { + continue; + }; + + // Compile effect if needed + if !gpu_resources.effect_processor.is_compiled(&effect_def.id) { + let success = gpu_resources.effect_processor.compile_effect(device, effect_def); + if !success { + eprintln!("Failed to compile effect: {}", effect_def.name); + continue; + } + } + + // Create EffectInstance from ClipInstance for the processor + let effect_inst = lightningbeam_core::effect::EffectInstance::new( + effect_def, + effect_instance.timeline_start, + effect_instance.timeline_start + effect_instance.effective_duration(lightningbeam_core::effect::EFFECT_DURATION), + ); + + // Acquire temp buffer for effect output (HDR format) + let effect_output_handle = gpu_resources.buffer_pool.acquire(device, hdr_spec); + + if let Some(effect_output_view) = gpu_resources.buffer_pool.get_view(effect_output_handle) { + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_effect_encoder"), + }); + + // Apply effect: HDR accumulator → effect output buffer + let applied = gpu_resources.effect_processor.apply_effect( + device, + queue, + &mut encoder, + effect_def, + &effect_inst, + &gpu_resources.hdr_texture_view, + effect_output_view, + width, + height, + current_time, + ); + + if applied { + // Copy effect output back to HDR accumulator + encoder.copy_texture_to_texture( + wgpu::TexelCopyTextureInfo { + texture: gpu_resources.buffer_pool.get_texture(effect_output_handle).unwrap(), + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::TexelCopyTextureInfo { + texture: &gpu_resources.hdr_texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::Extent3d { + width, + height, + depth_or_array_layers: 1, + }, + ); + } + + queue.submit(Some(encoder.finish())); + } + + gpu_resources.buffer_pool.release(effect_output_handle); + } + } + } + } + + // Convert HDR to sRGB (linear → sRGB), render directly to external RGBA texture + let output_view = rgba_texture_view; + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("export_linear_to_srgb_bind_group"), + layout: &gpu_resources.linear_to_srgb_bind_group_layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&gpu_resources.hdr_texture_view), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: wgpu::BindingResource::Sampler(&gpu_resources.linear_to_srgb_sampler), + }, + ], + }); + + let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("export_linear_to_srgb_encoder"), + }); + + { + let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("export_linear_to_srgb_pass"), + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &output_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), + store: wgpu::StoreOp::Store, + }, + depth_slice: None, + })], + depth_stencil_attachment: None, + occlusion_query_set: None, + timestamp_writes: None, + }); + + render_pass.set_pipeline(&gpu_resources.linear_to_srgb_pipeline); + render_pass.set_bind_group(0, &bind_group, &[]); + render_pass.draw(0..4, 0..1); + } + + // Return encoder for caller to submit (ReadbackPipeline will handle submission and async readback) + // Frame is already rendered to external RGBA texture, no GPU YUV conversion needed + Ok(encoder) } #[cfg(test)] diff --git a/lightningbeam-ui/lightningbeam-editor/src/main.rs b/lightningbeam-ui/lightningbeam-editor/src/main.rs index 3360e4c..a2c65e0 100644 --- a/lightningbeam-ui/lightningbeam-editor/src/main.rs +++ b/lightningbeam-ui/lightningbeam-editor/src/main.rs @@ -2936,7 +2936,6 @@ impl eframe::App for EditorApp { } } if let Some(progress) = orchestrator.poll_progress() { - println!("📨 [MAIN] Received progress from orchestrator!"); match progress { lightningbeam_core::export::ExportProgress::Started { total_frames } => { println!("Export started: {} frames", total_frames);