slightly improve video export speed
This commit is contained in:
parent
d94ec0d6a8
commit
cb62d0ee9d
|
|
@ -9,15 +9,18 @@ members = [
|
||||||
# UI Framework (using eframe for simplified integration)
|
# UI Framework (using eframe for simplified integration)
|
||||||
# Note: Upgraded from 0.29 to 0.31 to fix Linux IME/keyboard input issues
|
# Note: Upgraded from 0.29 to 0.31 to fix Linux IME/keyboard input issues
|
||||||
# See: https://github.com/emilk/egui/pull/5198
|
# See: https://github.com/emilk/egui/pull/5198
|
||||||
eframe = { version = "0.31", default-features = true, features = ["wgpu"] }
|
# Upgraded to 0.33 for shader editor (egui_code_editor) and continued bug fixes
|
||||||
egui_extras = { version = "0.31", features = ["image", "svg"] }
|
egui = "0.33"
|
||||||
egui-wgpu = "0.31"
|
eframe = { version = "0.33", default-features = true, features = ["wgpu"] }
|
||||||
|
egui_extras = { version = "0.33", features = ["image", "svg", "syntect"] }
|
||||||
|
egui-wgpu = "0.33"
|
||||||
|
egui_code_editor = "0.2"
|
||||||
|
|
||||||
# GPU Rendering
|
# GPU Rendering
|
||||||
# vello 0.5 uses wgpu 24, matching eframe 0.31
|
# vello from git uses wgpu 27, matching eframe 0.33
|
||||||
vello = "0.5"
|
vello = { git = "https://github.com/linebender/vello", branch = "main" }
|
||||||
wgpu = "24"
|
wgpu = { version = "27", features = ["vulkan", "metal"] }
|
||||||
kurbo = { version = "0.11", features = ["serde"] }
|
kurbo = { version = "0.12", features = ["serde"] }
|
||||||
peniko = "0.5"
|
peniko = "0.5"
|
||||||
|
|
||||||
# Windowing
|
# Windowing
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,14 @@ pub mod buffer_pool;
|
||||||
pub mod color_convert;
|
pub mod color_convert;
|
||||||
pub mod compositor;
|
pub mod compositor;
|
||||||
pub mod effect_processor;
|
pub mod effect_processor;
|
||||||
|
pub mod yuv_converter;
|
||||||
|
|
||||||
// Re-export commonly used types
|
// Re-export commonly used types
|
||||||
pub use buffer_pool::{BufferHandle, BufferPool, BufferSpec, BufferFormat};
|
pub use buffer_pool::{BufferHandle, BufferPool, BufferSpec, BufferFormat};
|
||||||
pub use color_convert::SrgbToLinearConverter;
|
pub use color_convert::SrgbToLinearConverter;
|
||||||
pub use compositor::{Compositor, CompositorLayer, BlendMode};
|
pub use compositor::{Compositor, CompositorLayer, BlendMode};
|
||||||
pub use effect_processor::{EffectProcessor, EffectUniforms};
|
pub use effect_processor::{EffectProcessor, EffectUniforms};
|
||||||
|
pub use yuv_converter::YuvConverter;
|
||||||
|
|
||||||
/// Standard HDR internal texture format (16-bit float per channel)
|
/// Standard HDR internal texture format (16-bit float per channel)
|
||||||
pub const HDR_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float;
|
pub const HDR_FORMAT: wgpu::TextureFormat = wgpu::TextureFormat::Rgba16Float;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,241 @@
|
||||||
|
//! GPU-accelerated RGBA to YUV420p color space conversion
|
||||||
|
//!
|
||||||
|
//! Provides a compute shader-based converter for transforming RGBA textures
|
||||||
|
//! to YUV420p planar format using the BT.709 color matrix (HD video standard).
|
||||||
|
//! This replaces the CPU-based conversion with GPU parallel processing.
|
||||||
|
|
||||||
|
/// GPU pipeline for RGBA to YUV420p color space conversion
|
||||||
|
///
|
||||||
|
/// Converts Rgba8Unorm textures to YUV420p planar format using BT.709 colorspace.
|
||||||
|
/// The Y plane is full resolution, while U and V planes are subsampled 4:2:0.
|
||||||
|
///
|
||||||
|
/// Output texture layout:
|
||||||
|
/// - Rows 0 to height-1: Y plane (luma, full resolution)
|
||||||
|
/// - Rows height to height + height/4 - 1: U plane (chroma, half resolution)
|
||||||
|
/// - Rows height + height/4 to height + height/2 - 1: V plane (chroma, half resolution)
|
||||||
|
pub struct YuvConverter {
|
||||||
|
pipeline: wgpu::ComputePipeline,
|
||||||
|
bind_group_layout: wgpu::BindGroupLayout,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl YuvConverter {
|
||||||
|
/// Create a new RGBA to YUV420p converter
|
||||||
|
pub fn new(device: &wgpu::Device) -> Self {
|
||||||
|
// Create bind group layout
|
||||||
|
let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
|
label: Some("yuv_converter_bind_group_layout"),
|
||||||
|
entries: &[
|
||||||
|
// Input RGBA texture (binding 0)
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 0,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Texture {
|
||||||
|
sample_type: wgpu::TextureSampleType::Float { filterable: false },
|
||||||
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
|
multisampled: false,
|
||||||
|
},
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
// Output YUV texture (Rgba8Unorm storage texture, binding 1)
|
||||||
|
// Note: R8Unorm doesn't support storage binding, so we use Rgba8Unorm and write to .r channel
|
||||||
|
wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: 1,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::StorageTexture {
|
||||||
|
access: wgpu::StorageTextureAccess::WriteOnly,
|
||||||
|
format: wgpu::TextureFormat::Rgba8Unorm,
|
||||||
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
|
},
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create pipeline layout
|
||||||
|
let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||||
|
label: Some("yuv_converter_pipeline_layout"),
|
||||||
|
bind_group_layouts: &[&bind_group_layout],
|
||||||
|
push_constant_ranges: &[],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create shader module
|
||||||
|
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||||||
|
label: Some("yuv_converter_shader"),
|
||||||
|
source: wgpu::ShaderSource::Wgsl(YUV_CONVERTER_SHADER.into()),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create compute pipeline
|
||||||
|
let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
|
||||||
|
label: Some("yuv_converter_pipeline"),
|
||||||
|
layout: Some(&pipeline_layout),
|
||||||
|
module: &shader,
|
||||||
|
entry_point: Some("main"),
|
||||||
|
compilation_options: wgpu::PipelineCompilationOptions::default(),
|
||||||
|
cache: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
Self {
|
||||||
|
pipeline,
|
||||||
|
bind_group_layout,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert RGBA texture to YUV420p planar format
|
||||||
|
///
|
||||||
|
/// Reads from `rgba_view` and writes Y, U, V planes to `yuv_output_view`.
|
||||||
|
/// The output texture must be R8Unorm format with height = input_height * 1.5
|
||||||
|
/// to accommodate the packed YUV planes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `device` - GPU device
|
||||||
|
/// * `encoder` - Command encoder to record GPU commands
|
||||||
|
/// * `rgba_view` - Source RGBA texture view
|
||||||
|
/// * `yuv_output_view` - Destination YUV planar texture view (R8Unorm, height*1.5)
|
||||||
|
/// * `width` - Width of the source RGBA texture
|
||||||
|
/// * `height` - Height of the source RGBA texture
|
||||||
|
pub fn convert(
|
||||||
|
&self,
|
||||||
|
device: &wgpu::Device,
|
||||||
|
encoder: &mut wgpu::CommandEncoder,
|
||||||
|
rgba_view: &wgpu::TextureView,
|
||||||
|
yuv_output_view: &wgpu::TextureView,
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
) {
|
||||||
|
// Create bind group for this conversion
|
||||||
|
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("yuv_converter_bind_group"),
|
||||||
|
layout: &self.bind_group_layout,
|
||||||
|
entries: &[
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::TextureView(rgba_view),
|
||||||
|
},
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 1,
|
||||||
|
resource: wgpu::BindingResource::TextureView(yuv_output_view),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Compute pass
|
||||||
|
let mut compute_pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
|
||||||
|
label: Some("yuv_conversion_pass"),
|
||||||
|
timestamp_writes: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
compute_pass.set_pipeline(&self.pipeline);
|
||||||
|
compute_pass.set_bind_group(0, &bind_group, &[]);
|
||||||
|
|
||||||
|
// Dispatch workgroups: 8x8 threads per workgroup
|
||||||
|
// Each thread processes one pixel for the Y plane
|
||||||
|
// Chroma planes are processed by threads at even coordinates
|
||||||
|
let workgroup_size = 8;
|
||||||
|
let workgroups_x = (width + workgroup_size - 1) / workgroup_size;
|
||||||
|
let workgroups_y = (height + workgroup_size - 1) / workgroup_size;
|
||||||
|
compute_pass.dispatch_workgroups(workgroups_x, workgroups_y, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// WGSL compute shader for RGBA to YUV420p conversion
|
||||||
|
const YUV_CONVERTER_SHADER: &str = r#"
|
||||||
|
// RGBA to YUV420p Compute Shader
|
||||||
|
// BT.709 color space for HD video (ITU-R BT.709-6 standard)
|
||||||
|
//
|
||||||
|
// Color matrix:
|
||||||
|
// Y = 0.2126*R + 0.7152*G + 0.0722*B
|
||||||
|
// U = -0.1146*R - 0.3854*G + 0.5000*B + 0.5
|
||||||
|
// V = 0.5000*R - 0.4542*G - 0.0458*B + 0.5
|
||||||
|
//
|
||||||
|
// Output texture layout (packed planar, side-by-side U/V):
|
||||||
|
// - Rows [0, height): Y plane (full resolution, full width)
|
||||||
|
// - Rows [height, height + height/2): U plane (left half, columns 0 to width/2-1)
|
||||||
|
// V plane (right half, columns width/2 to width-1)
|
||||||
|
|
||||||
|
@group(0) @binding(0) var input_rgba: texture_2d<f32>;
|
||||||
|
@group(0) @binding(1) var output_yuv: texture_storage_2d<rgba8unorm, write>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(8, 8, 1)
|
||||||
|
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
||||||
|
let dims = textureDimensions(input_rgba);
|
||||||
|
let pos = global_id.xy;
|
||||||
|
|
||||||
|
// Bounds check
|
||||||
|
if (pos.x >= dims.x || pos.y >= dims.y) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load RGBA pixel
|
||||||
|
let rgba = textureLoad(input_rgba, pos, 0);
|
||||||
|
let r = rgba.r;
|
||||||
|
let g = rgba.g;
|
||||||
|
let b = rgba.b;
|
||||||
|
|
||||||
|
// Compute Y (luma) - full resolution, BT.709
|
||||||
|
let y = 0.2126 * r + 0.7152 * g + 0.0722 * b;
|
||||||
|
|
||||||
|
// Write Y value to Y plane (rows 0 to height-1)
|
||||||
|
textureStore(output_yuv, pos, vec4<f32>(y, 0.0, 0.0, 0.0));
|
||||||
|
|
||||||
|
// Compute U and V (chroma) - subsampled 4:2:0
|
||||||
|
// Only process even coordinates (top-left of 2x2 blocks)
|
||||||
|
if (pos.x % 2u == 0u && pos.y % 2u == 0u) {
|
||||||
|
// Sample 2x2 block for chroma subsampling
|
||||||
|
var r_sum = r;
|
||||||
|
var g_sum = g;
|
||||||
|
var b_sum = b;
|
||||||
|
var count = 1.0;
|
||||||
|
|
||||||
|
// Sample right neighbor (x+1, y)
|
||||||
|
if (pos.x + 1u < dims.x) {
|
||||||
|
let rgba_r = textureLoad(input_rgba, pos + vec2<u32>(1u, 0u), 0);
|
||||||
|
r_sum += rgba_r.r;
|
||||||
|
g_sum += rgba_r.g;
|
||||||
|
b_sum += rgba_r.b;
|
||||||
|
count += 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample bottom neighbor (x, y+1)
|
||||||
|
if (pos.y + 1u < dims.y) {
|
||||||
|
let rgba_b = textureLoad(input_rgba, pos + vec2<u32>(0u, 1u), 0);
|
||||||
|
r_sum += rgba_b.r;
|
||||||
|
g_sum += rgba_b.g;
|
||||||
|
b_sum += rgba_b.b;
|
||||||
|
count += 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample bottom-right neighbor (x+1, y+1)
|
||||||
|
if (pos.x + 1u < dims.x && pos.y + 1u < dims.y) {
|
||||||
|
let rgba_br = textureLoad(input_rgba, pos + vec2<u32>(1u, 1u), 0);
|
||||||
|
r_sum += rgba_br.r;
|
||||||
|
g_sum += rgba_br.g;
|
||||||
|
b_sum += rgba_br.b;
|
||||||
|
count += 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Average the 2x2 block
|
||||||
|
let r_avg = r_sum / count;
|
||||||
|
let g_avg = g_sum / count;
|
||||||
|
let b_avg = b_sum / count;
|
||||||
|
|
||||||
|
// Compute chroma components (BT.709, centered at 0.5 for unsigned 8-bit)
|
||||||
|
let u = -0.1146 * r_avg - 0.3854 * g_avg + 0.5000 * b_avg + 0.5;
|
||||||
|
let v = 0.5000 * r_avg - 0.4542 * g_avg - 0.0458 * b_avg + 0.5;
|
||||||
|
|
||||||
|
// Compute chroma plane positions (half resolution)
|
||||||
|
// Pack U and V side-by-side: U on left half, V on right half
|
||||||
|
let chroma_x = pos.x / 2u;
|
||||||
|
let chroma_y = pos.y / 2u;
|
||||||
|
|
||||||
|
// U plane: left half (columns 0 to width/2-1), rows height to height+height/2-1
|
||||||
|
let u_pos = vec2<u32>(chroma_x, dims.y + chroma_y);
|
||||||
|
|
||||||
|
// V plane: right half (columns width/2 to width-1), rows height to height+height/2-1
|
||||||
|
let v_pos = vec2<u32>(dims.x / 2u + chroma_x, dims.y + chroma_y);
|
||||||
|
|
||||||
|
// Write U and V values to their respective planes
|
||||||
|
textureStore(output_yuv, u_pos, vec4<f32>(u, 0.0, 0.0, 0.0));
|
||||||
|
textureStore(output_yuv, v_pos, vec4<f32>(v, 0.0, 0.0, 0.0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
@ -0,0 +1,62 @@
|
||||||
|
# Plan for Async Rendering Helpers
|
||||||
|
|
||||||
|
I'm creating this temporary document to plan the async rendering changes.
|
||||||
|
|
||||||
|
## Current Architecture (Synchronous)
|
||||||
|
`render_frame_to_rgba_hdr()` in video_exporter.rs:
|
||||||
|
1. Render document to RGBA (lines 750-991)
|
||||||
|
2. GPU YUV conversion (lines 993-1005)
|
||||||
|
3. Copy YUV to staging buffer (lines 1007-1029)
|
||||||
|
4. Submit GPU commands (line 1031)
|
||||||
|
5. **BLOCKING** map_async + wait (lines 1033-1045)
|
||||||
|
6. Extract Y, U, V planes from mapped buffer (lines 1047-1087)
|
||||||
|
7. Unmap and return YUV planes (lines 1089-1092)
|
||||||
|
|
||||||
|
## New Architecture (Async Pipelined)
|
||||||
|
Split into two phases using ReadbackPipeline:
|
||||||
|
|
||||||
|
### Phase 1: Submit Frame (Non-blocking)
|
||||||
|
New function `submit_frame_to_readback_pipeline()`:
|
||||||
|
- Input: buffer from ReadbackPipeline.acquire()
|
||||||
|
- Steps 1-3: Render to RGBA, GPU YUV, copy to buffer's YUV texture
|
||||||
|
- Return encoder to ReadbackPipeline for submission
|
||||||
|
- **Does NOT wait for GPU**
|
||||||
|
|
||||||
|
### Phase 2: Extract YUV (After async mapping)
|
||||||
|
Helper function `extract_yuv_planes_from_buffer()`:
|
||||||
|
- Input: mapped buffer data from ReadbackPipeline
|
||||||
|
- Steps 6-7: Extract Y, U, V planes, return them
|
||||||
|
- Used after ReadbackPipeline.get_mapped_data()
|
||||||
|
|
||||||
|
## Modified render_next_video_frame()
|
||||||
|
New async pipeline loop:
|
||||||
|
```
|
||||||
|
while more_work_to_do:
|
||||||
|
// Poll for completed frames
|
||||||
|
for result in pipeline.poll_nonblocking():
|
||||||
|
data = pipeline.get_mapped_data(result.buffer_id)
|
||||||
|
(y, u, v) = extract_yuv_planes(data)
|
||||||
|
send_to_encoder_in_order(result.frame_num, y, u, v)
|
||||||
|
pipeline.release(result.buffer_id)
|
||||||
|
|
||||||
|
// Submit new frames (up to 3 in flight)
|
||||||
|
if current_frame < total_frames && frames_in_flight < 3:
|
||||||
|
if let Some(buffer) = pipeline.acquire(frame_num, timestamp):
|
||||||
|
encoder = submit_frame_to_pipeline(buffer)
|
||||||
|
pipeline.submit_and_readback(buffer.id, encoder)
|
||||||
|
frames_in_flight++
|
||||||
|
current_frame++
|
||||||
|
|
||||||
|
// Done when all frames submitted AND all completed
|
||||||
|
if current_frame >= total_frames && frames_in_flight == 0:
|
||||||
|
return Ok(false)
|
||||||
|
|
||||||
|
return Ok(true) // More work to do
|
||||||
|
```
|
||||||
|
|
||||||
|
This achieves triple buffering:
|
||||||
|
- Frame N: GPU rendering
|
||||||
|
- Frame N-1: GPU→CPU async transfer
|
||||||
|
- Frame N-2: CPU encoding
|
||||||
|
|
||||||
|
Expected speedup: 5x
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
//! CPU-based RGBA→YUV420p color space converter using FFmpeg's swscale
|
||||||
|
//!
|
||||||
|
//! This module provides a wrapper around FFmpeg's highly-optimized swscale library
|
||||||
|
//! for converting RGBA data to YUV420p format. Uses SIMD instructions when available
|
||||||
|
//! for maximum performance.
|
||||||
|
|
||||||
|
use ffmpeg_next as ffmpeg;
|
||||||
|
|
||||||
|
/// CPU-based RGBA→YUV420p converter using FFmpeg's swscale
|
||||||
|
///
|
||||||
|
/// This converter uses FFmpeg's swscale library which is highly optimized with SIMD
|
||||||
|
/// instructions (SSE, AVX) for fast color space conversion on the CPU.
|
||||||
|
pub struct CpuYuvConverter {
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CpuYuvConverter {
|
||||||
|
/// Create new converter for given dimensions
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `width` - Frame width in pixels
|
||||||
|
/// * `height` - Frame height in pixels
|
||||||
|
pub fn new(width: u32, height: u32) -> Result<Self, String> {
|
||||||
|
Ok(Self { width, height })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert RGBA data to YUV420p planes
|
||||||
|
///
|
||||||
|
/// Performs color space conversion from RGBA (8-bit per channel, packed format)
|
||||||
|
/// to YUV420p (8-bit per channel, planar format with subsampled chroma).
|
||||||
|
///
|
||||||
|
/// Uses BT.709 color matrix (HD standard) for the conversion.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `rgba_data` - Packed RGBA data (width * height * 4 bytes)
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// Tuple of (y_plane, u_plane, v_plane) as separate Vec<u8>
|
||||||
|
///
|
||||||
|
/// # Panics
|
||||||
|
/// Panics if rgba_data length doesn't match width * height * 4
|
||||||
|
pub fn convert(&self, rgba_data: &[u8]) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>), String> {
|
||||||
|
let expected_size = (self.width * self.height * 4) as usize;
|
||||||
|
assert_eq!(
|
||||||
|
rgba_data.len(),
|
||||||
|
expected_size,
|
||||||
|
"RGBA data size mismatch: expected {} bytes, got {}",
|
||||||
|
expected_size,
|
||||||
|
rgba_data.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create source RGBA frame
|
||||||
|
let mut rgba_frame = ffmpeg::frame::Video::new(
|
||||||
|
ffmpeg::format::Pixel::RGBA,
|
||||||
|
self.width,
|
||||||
|
self.height,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Copy RGBA data into source frame
|
||||||
|
// ffmpeg-next provides mutable access to the frame data
|
||||||
|
let frame_data = rgba_frame.data_mut(0);
|
||||||
|
frame_data.copy_from_slice(rgba_data);
|
||||||
|
|
||||||
|
// Create destination YUV420p frame
|
||||||
|
let mut yuv_frame = ffmpeg::frame::Video::new(
|
||||||
|
ffmpeg::format::Pixel::YUV420P,
|
||||||
|
self.width,
|
||||||
|
self.height,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create swscale context for RGBA→YUV420p conversion
|
||||||
|
// Uses BT.709 color matrix (HD standard)
|
||||||
|
let mut scaler = ffmpeg::software::scaling::Context::get(
|
||||||
|
ffmpeg::format::Pixel::RGBA,
|
||||||
|
self.width,
|
||||||
|
self.height,
|
||||||
|
ffmpeg::format::Pixel::YUV420P,
|
||||||
|
self.width,
|
||||||
|
self.height,
|
||||||
|
ffmpeg::software::scaling::Flags::BILINEAR,
|
||||||
|
)
|
||||||
|
.map_err(|e| format!("Failed to create swscale context: {}", e))?;
|
||||||
|
|
||||||
|
// Perform the conversion (SIMD-optimized)
|
||||||
|
scaler
|
||||||
|
.run(&rgba_frame, &mut yuv_frame)
|
||||||
|
.map_err(|e| format!("swscale conversion failed: {}", e))?;
|
||||||
|
|
||||||
|
// Extract planar YUV data
|
||||||
|
// YUV420p has 3 planes:
|
||||||
|
// - Y: full resolution (width × height)
|
||||||
|
// - U: quarter resolution (width/2 × height/2)
|
||||||
|
// - V: quarter resolution (width/2 × height/2)
|
||||||
|
let y_plane = yuv_frame.data(0).to_vec();
|
||||||
|
let u_plane = yuv_frame.data(1).to_vec();
|
||||||
|
let v_plane = yuv_frame.data(2).to_vec();
|
||||||
|
|
||||||
|
Ok((y_plane, u_plane, v_plane))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_converter_creation() {
|
||||||
|
let converter = CpuYuvConverter::new(1920, 1080);
|
||||||
|
assert!(converter.is_ok());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_conversion_output_sizes() {
|
||||||
|
let converter = CpuYuvConverter::new(1920, 1080).unwrap();
|
||||||
|
|
||||||
|
// Create dummy RGBA data (all black)
|
||||||
|
let rgba_data = vec![0u8; 1920 * 1080 * 4];
|
||||||
|
|
||||||
|
let result = converter.convert(&rgba_data);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let (y, u, v) = result.unwrap();
|
||||||
|
|
||||||
|
// Y plane should be full resolution
|
||||||
|
assert_eq!(y.len(), 1920 * 1080);
|
||||||
|
|
||||||
|
// U and V planes should be quarter resolution (subsampled 2x2)
|
||||||
|
assert_eq!(u.len(), (1920 / 2) * (1080 / 2));
|
||||||
|
assert_eq!(v.len(), (1920 / 2) * (1080 / 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[should_panic(expected = "RGBA data size mismatch")]
|
||||||
|
fn test_wrong_input_size_panics() {
|
||||||
|
let converter = CpuYuvConverter::new(1920, 1080).unwrap();
|
||||||
|
|
||||||
|
// Wrong size input
|
||||||
|
let rgba_data = vec![0u8; 1000];
|
||||||
|
|
||||||
|
let _ = converter.convert(&rgba_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,9 @@
|
||||||
pub mod audio_exporter;
|
pub mod audio_exporter;
|
||||||
pub mod dialog;
|
pub mod dialog;
|
||||||
pub mod video_exporter;
|
pub mod video_exporter;
|
||||||
|
pub mod readback_pipeline;
|
||||||
|
pub mod perf_metrics;
|
||||||
|
pub mod cpu_yuv_converter;
|
||||||
|
|
||||||
use lightningbeam_core::export::{AudioExportSettings, VideoExportSettings, ExportProgress};
|
use lightningbeam_core::export::{AudioExportSettings, VideoExportSettings, ExportProgress};
|
||||||
use lightningbeam_core::document::Document;
|
use lightningbeam_core::document::Document;
|
||||||
|
|
@ -18,8 +21,14 @@ use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
|
|
||||||
/// Message sent from main thread to video encoder thread
|
/// Message sent from main thread to video encoder thread
|
||||||
enum VideoFrameMessage {
|
enum VideoFrameMessage {
|
||||||
/// RGBA frame data with frame number and timestamp
|
/// YUV420p frame data with frame number and timestamp (GPU-converted)
|
||||||
Frame { frame_num: usize, timestamp: f64, rgba_data: Vec<u8> },
|
Frame {
|
||||||
|
frame_num: usize,
|
||||||
|
timestamp: f64,
|
||||||
|
y_plane: Vec<u8>,
|
||||||
|
u_plane: Vec<u8>,
|
||||||
|
v_plane: Vec<u8>,
|
||||||
|
},
|
||||||
/// Signal that all frames have been sent
|
/// Signal that all frames have been sent
|
||||||
Done,
|
Done,
|
||||||
}
|
}
|
||||||
|
|
@ -44,6 +53,16 @@ pub struct VideoExportState {
|
||||||
frame_tx: Option<Sender<VideoFrameMessage>>,
|
frame_tx: Option<Sender<VideoFrameMessage>>,
|
||||||
/// HDR GPU resources for compositing pipeline (effects, color conversion)
|
/// HDR GPU resources for compositing pipeline (effects, color conversion)
|
||||||
gpu_resources: Option<video_exporter::ExportGpuResources>,
|
gpu_resources: Option<video_exporter::ExportGpuResources>,
|
||||||
|
/// Async triple-buffered readback pipeline for GPU RGBA frames
|
||||||
|
readback_pipeline: Option<readback_pipeline::ReadbackPipeline>,
|
||||||
|
/// CPU YUV converter for RGBA→YUV420p conversion
|
||||||
|
cpu_yuv_converter: Option<cpu_yuv_converter::CpuYuvConverter>,
|
||||||
|
/// Frames that have been submitted to GPU but not yet encoded
|
||||||
|
frames_in_flight: usize,
|
||||||
|
/// Next frame number to send to encoder (for ordering)
|
||||||
|
next_frame_to_encode: usize,
|
||||||
|
/// Performance metrics for instrumentation
|
||||||
|
perf_metrics: Option<perf_metrics::ExportMetrics>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Export orchestrator that manages the export process
|
/// Export orchestrator that manages the export process
|
||||||
|
|
@ -168,13 +187,11 @@ impl ExportOrchestrator {
|
||||||
|
|
||||||
// Poll video progress
|
// Poll video progress
|
||||||
while let Ok(progress) = parallel.video_progress_rx.try_recv() {
|
while let Ok(progress) = parallel.video_progress_rx.try_recv() {
|
||||||
println!("📨 [PARALLEL] Video progress: {:?}", std::mem::discriminant(&progress));
|
|
||||||
parallel.video_progress = Some(progress);
|
parallel.video_progress = Some(progress);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Poll audio progress
|
// Poll audio progress
|
||||||
while let Ok(progress) = parallel.audio_progress_rx.try_recv() {
|
while let Ok(progress) = parallel.audio_progress_rx.try_recv() {
|
||||||
println!("📨 [PARALLEL] Audio progress: {:?}", std::mem::discriminant(&progress));
|
|
||||||
parallel.audio_progress = Some(progress);
|
parallel.audio_progress = Some(progress);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -621,7 +638,7 @@ impl ExportOrchestrator {
|
||||||
self.thread_handle = Some(handle);
|
self.thread_handle = Some(handle);
|
||||||
|
|
||||||
// Initialize video export state
|
// Initialize video export state
|
||||||
// GPU resources will be initialized lazily on first frame (needs device)
|
// GPU resources and readback pipeline will be initialized lazily on first frame (needs device)
|
||||||
self.video_state = Some(VideoExportState {
|
self.video_state = Some(VideoExportState {
|
||||||
current_frame: 0,
|
current_frame: 0,
|
||||||
total_frames,
|
total_frames,
|
||||||
|
|
@ -632,6 +649,11 @@ impl ExportOrchestrator {
|
||||||
height,
|
height,
|
||||||
frame_tx: Some(frame_tx),
|
frame_tx: Some(frame_tx),
|
||||||
gpu_resources: None,
|
gpu_resources: None,
|
||||||
|
readback_pipeline: None,
|
||||||
|
cpu_yuv_converter: None,
|
||||||
|
frames_in_flight: 0,
|
||||||
|
next_frame_to_encode: 0,
|
||||||
|
perf_metrics: Some(perf_metrics::ExportMetrics::new()),
|
||||||
});
|
});
|
||||||
|
|
||||||
println!("🎬 [VIDEO EXPORT] Encoder thread spawned, ready for frames");
|
println!("🎬 [VIDEO EXPORT] Encoder thread spawned, ready for frames");
|
||||||
|
|
@ -745,7 +767,7 @@ impl ExportOrchestrator {
|
||||||
});
|
});
|
||||||
|
|
||||||
// Initialize video export state for incremental rendering
|
// Initialize video export state for incremental rendering
|
||||||
// GPU resources will be initialized lazily on first frame (needs device)
|
// GPU resources and readback pipeline will be initialized lazily on first frame (needs device)
|
||||||
self.video_state = Some(VideoExportState {
|
self.video_state = Some(VideoExportState {
|
||||||
current_frame: 0,
|
current_frame: 0,
|
||||||
total_frames,
|
total_frames,
|
||||||
|
|
@ -756,6 +778,11 @@ impl ExportOrchestrator {
|
||||||
height: video_height,
|
height: video_height,
|
||||||
frame_tx: Some(frame_tx),
|
frame_tx: Some(frame_tx),
|
||||||
gpu_resources: None,
|
gpu_resources: None,
|
||||||
|
readback_pipeline: None,
|
||||||
|
cpu_yuv_converter: None,
|
||||||
|
frames_in_flight: 0,
|
||||||
|
next_frame_to_encode: 0,
|
||||||
|
perf_metrics: Some(perf_metrics::ExportMetrics::new()),
|
||||||
});
|
});
|
||||||
|
|
||||||
// Initialize parallel export state
|
// Initialize parallel export state
|
||||||
|
|
@ -777,6 +804,7 @@ impl ExportOrchestrator {
|
||||||
|
|
||||||
/// Render and send the next video frame (call from main thread)
|
/// Render and send the next video frame (call from main thread)
|
||||||
///
|
///
|
||||||
|
/// Uses async triple-buffered pipeline for maximum throughput.
|
||||||
/// Returns true if there are more frames to render, false if done.
|
/// Returns true if there are more frames to render, false if done.
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
|
|
@ -798,62 +826,143 @@ impl ExportOrchestrator {
|
||||||
image_cache: &mut ImageCache,
|
image_cache: &mut ImageCache,
|
||||||
video_manager: &Arc<std::sync::Mutex<VideoManager>>,
|
video_manager: &Arc<std::sync::Mutex<VideoManager>>,
|
||||||
) -> Result<bool, String> {
|
) -> Result<bool, String> {
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
let state = self.video_state.as_mut()
|
let state = self.video_state.as_mut()
|
||||||
.ok_or("No video export in progress")?;
|
.ok_or("No video export in progress")?;
|
||||||
|
|
||||||
if state.current_frame >= state.total_frames {
|
|
||||||
// All frames rendered, signal encoder thread
|
|
||||||
if let Some(tx) = state.frame_tx.take() {
|
|
||||||
tx.send(VideoFrameMessage::Done).ok();
|
|
||||||
}
|
|
||||||
// Clean up GPU resources
|
|
||||||
state.gpu_resources = None;
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate timestamp for this frame
|
|
||||||
let timestamp = state.start_time + (state.current_frame as f64 / state.framerate);
|
|
||||||
|
|
||||||
// Get frame dimensions from export settings
|
|
||||||
let width = state.width;
|
let width = state.width;
|
||||||
let height = state.height;
|
let height = state.height;
|
||||||
|
|
||||||
// Initialize GPU resources on first frame (needs device)
|
// Initialize GPU resources and readback pipeline on first frame
|
||||||
if state.gpu_resources.is_none() {
|
if state.gpu_resources.is_none() {
|
||||||
println!("🎬 [VIDEO EXPORT] Initializing HDR GPU resources for {}x{}", width, height);
|
println!("🎬 [VIDEO EXPORT] Initializing HDR GPU + async pipeline {}x{}", width, height);
|
||||||
state.gpu_resources = Some(video_exporter::ExportGpuResources::new(device, width, height));
|
state.gpu_resources = Some(video_exporter::ExportGpuResources::new(device, width, height));
|
||||||
|
state.readback_pipeline = Some(readback_pipeline::ReadbackPipeline::new(device, queue, width, height));
|
||||||
|
state.cpu_yuv_converter = Some(cpu_yuv_converter::CpuYuvConverter::new(width, height)?);
|
||||||
|
println!("🚀 [ASYNC PIPELINE] Triple-buffered pipeline initialized");
|
||||||
|
println!("🚀 [CPU YUV] swscale converter initialized");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Render frame to RGBA buffer using HDR pipeline (with effects)
|
let pipeline = state.readback_pipeline.as_mut().unwrap();
|
||||||
let mut rgba_buffer = vec![0u8; (width * height * 4) as usize];
|
|
||||||
let gpu_resources = state.gpu_resources.as_mut().unwrap();
|
let gpu_resources = state.gpu_resources.as_mut().unwrap();
|
||||||
video_exporter::render_frame_to_rgba_hdr(
|
let cpu_converter = state.cpu_yuv_converter.as_mut().unwrap();
|
||||||
document,
|
let mut metrics = state.perf_metrics.as_mut();
|
||||||
timestamp,
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
device,
|
|
||||||
queue,
|
|
||||||
renderer,
|
|
||||||
image_cache,
|
|
||||||
video_manager,
|
|
||||||
gpu_resources,
|
|
||||||
&mut rgba_buffer,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// Send frame to encoder thread
|
// Poll for completed async readbacks (non-blocking)
|
||||||
if let Some(tx) = &state.frame_tx {
|
if let Some(m) = metrics.as_mut() {
|
||||||
tx.send(VideoFrameMessage::Frame {
|
m.poll_count += 1;
|
||||||
frame_num: state.current_frame,
|
}
|
||||||
timestamp,
|
let completed_frames = pipeline.poll_nonblocking();
|
||||||
rgba_data: rgba_buffer,
|
if let Some(m) = metrics.as_mut() {
|
||||||
}).map_err(|_| "Failed to send frame to encoder")?;
|
m.completions_per_poll.push(completed_frames.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
state.current_frame += 1;
|
// Process completed frames IN ORDER
|
||||||
|
for result in completed_frames {
|
||||||
|
if result.frame_num == state.next_frame_to_encode {
|
||||||
|
// Record readback completion time
|
||||||
|
if let Some(m) = metrics.as_mut() {
|
||||||
|
if let Some(frame_metrics) = m.frames.get_mut(result.frame_num) {
|
||||||
|
frame_metrics.readback_complete = Some(Instant::now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Return true if more frames remain
|
// Extract RGBA data (timed)
|
||||||
Ok(state.current_frame < state.total_frames)
|
let extraction_start = Instant::now();
|
||||||
|
let rgba_data = pipeline.extract_rgba_data(result.buffer_id);
|
||||||
|
let extraction_end = Instant::now();
|
||||||
|
|
||||||
|
// CPU YUV conversion (timed)
|
||||||
|
let conversion_start = Instant::now();
|
||||||
|
let (y, u, v) = cpu_converter.convert(&rgba_data)?;
|
||||||
|
let conversion_end = Instant::now();
|
||||||
|
|
||||||
|
if let Some(m) = metrics.as_mut() {
|
||||||
|
if let Some(frame_metrics) = m.frames.get_mut(result.frame_num) {
|
||||||
|
frame_metrics.extraction_start = Some(extraction_start);
|
||||||
|
frame_metrics.extraction_end = Some(extraction_end);
|
||||||
|
frame_metrics.conversion_start = Some(conversion_start);
|
||||||
|
frame_metrics.conversion_end = Some(conversion_end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send to encoder
|
||||||
|
if let Some(tx) = &state.frame_tx {
|
||||||
|
tx.send(VideoFrameMessage::Frame {
|
||||||
|
frame_num: result.frame_num,
|
||||||
|
timestamp: result.timestamp,
|
||||||
|
y_plane: y,
|
||||||
|
u_plane: u,
|
||||||
|
v_plane: v,
|
||||||
|
}).map_err(|_| "Failed to send frame")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.release(result.buffer_id);
|
||||||
|
state.frames_in_flight -= 1;
|
||||||
|
state.next_frame_to_encode += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit new frames (up to 3 in flight)
|
||||||
|
while state.current_frame < state.total_frames && state.frames_in_flight < 3 {
|
||||||
|
let timestamp = state.start_time + (state.current_frame as f64 / state.framerate);
|
||||||
|
|
||||||
|
if let Some(acquired) = pipeline.acquire(state.current_frame, timestamp) {
|
||||||
|
// Create frame metrics entry
|
||||||
|
if let Some(m) = metrics.as_mut() {
|
||||||
|
m.frames.push(perf_metrics::FrameMetrics::new(state.current_frame));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Render to GPU (timed)
|
||||||
|
let render_start = Instant::now();
|
||||||
|
let encoder = video_exporter::render_frame_to_gpu_rgba(
|
||||||
|
document, timestamp, width, height,
|
||||||
|
device, queue, renderer, image_cache, video_manager,
|
||||||
|
gpu_resources, &acquired.rgba_texture_view,
|
||||||
|
)?;
|
||||||
|
let render_end = Instant::now();
|
||||||
|
|
||||||
|
// Record render timing
|
||||||
|
if let Some(m) = metrics.as_mut() {
|
||||||
|
if let Some(frame_metrics) = m.frames.get_mut(state.current_frame) {
|
||||||
|
frame_metrics.render_end = Some(render_end);
|
||||||
|
frame_metrics.submit_time = Some(Instant::now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit for async readback
|
||||||
|
pipeline.submit_and_readback(acquired.id, encoder);
|
||||||
|
|
||||||
|
state.current_frame += 1;
|
||||||
|
state.frames_in_flight += 1;
|
||||||
|
} else {
|
||||||
|
break; // All buffers in use
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Done when all submitted AND all completed
|
||||||
|
if state.current_frame >= state.total_frames && state.frames_in_flight == 0 {
|
||||||
|
println!("🎬 [VIDEO EXPORT] Complete: {} frames", state.total_frames);
|
||||||
|
|
||||||
|
// Print performance summary
|
||||||
|
if let Some(m) = &state.perf_metrics {
|
||||||
|
m.print_summary();
|
||||||
|
m.print_per_frame_details(10);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(tx) = state.frame_tx.take() {
|
||||||
|
tx.send(VideoFrameMessage::Done).ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
state.gpu_resources = None;
|
||||||
|
state.readback_pipeline = None;
|
||||||
|
state.cpu_yuv_converter = None;
|
||||||
|
state.perf_metrics = None;
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(true) // More work to do
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Background thread that receives frames and encodes them
|
/// Background thread that receives frames and encodes them
|
||||||
|
|
@ -925,9 +1034,9 @@ impl ExportOrchestrator {
|
||||||
|
|
||||||
// Wait for first frame to determine dimensions
|
// Wait for first frame to determine dimensions
|
||||||
let first_frame = match frame_rx.recv() {
|
let first_frame = match frame_rx.recv() {
|
||||||
Ok(VideoFrameMessage::Frame { frame_num, timestamp, rgba_data }) => {
|
Ok(VideoFrameMessage::Frame { frame_num, timestamp, y_plane, u_plane, v_plane }) => {
|
||||||
println!("🧵 [ENCODER] Received first frame ({} bytes)", rgba_data.len());
|
println!("🧵 [ENCODER] Received first YUV frame (Y: {} bytes)", y_plane.len());
|
||||||
Some((frame_num, timestamp, rgba_data))
|
Some((frame_num, timestamp, y_plane, u_plane, v_plane))
|
||||||
}
|
}
|
||||||
Ok(VideoFrameMessage::Done) => {
|
Ok(VideoFrameMessage::Done) => {
|
||||||
return Err("No frames to encode".to_string());
|
return Err("No frames to encode".to_string());
|
||||||
|
|
@ -938,9 +1047,9 @@ impl ExportOrchestrator {
|
||||||
};
|
};
|
||||||
|
|
||||||
// Determine dimensions from first frame
|
// Determine dimensions from first frame
|
||||||
let (width, height) = if let Some((_, _, ref rgba_data)) = first_frame {
|
let (width, height) = if let Some((_, _, ref y_plane, _, _)) = first_frame {
|
||||||
// Calculate dimensions from buffer size (RGBA = 4 bytes per pixel)
|
// Calculate dimensions from Y plane size (full resolution, 1 byte per pixel)
|
||||||
let pixel_count = rgba_data.len() / 4;
|
let pixel_count = y_plane.len();
|
||||||
// Use settings dimensions if provided, otherwise infer from buffer
|
// Use settings dimensions if provided, otherwise infer from buffer
|
||||||
let w = settings.width.unwrap_or(1920); // Default to 1920 if not specified
|
let w = settings.width.unwrap_or(1920); // Default to 1920 if not specified
|
||||||
let h = settings.height.unwrap_or(1080); // Default to 1080 if not specified
|
let h = settings.height.unwrap_or(1080); // Default to 1080 if not specified
|
||||||
|
|
@ -979,11 +1088,13 @@ impl ExportOrchestrator {
|
||||||
println!("🧵 [ENCODER] Encoder initialized, ready to encode frames");
|
println!("🧵 [ENCODER] Encoder initialized, ready to encode frames");
|
||||||
|
|
||||||
// Process first frame
|
// Process first frame
|
||||||
if let Some((frame_num, timestamp, rgba_data)) = first_frame {
|
if let Some((frame_num, timestamp, y_plane, u_plane, v_plane)) = first_frame {
|
||||||
Self::encode_frame(
|
Self::encode_frame(
|
||||||
&mut encoder,
|
&mut encoder,
|
||||||
&mut output,
|
&mut output,
|
||||||
&rgba_data,
|
&y_plane,
|
||||||
|
&u_plane,
|
||||||
|
&v_plane,
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
timestamp,
|
timestamp,
|
||||||
|
|
@ -994,8 +1105,6 @@ impl ExportOrchestrator {
|
||||||
frame: 1,
|
frame: 1,
|
||||||
total: total_frames,
|
total: total_frames,
|
||||||
}).ok();
|
}).ok();
|
||||||
|
|
||||||
println!("🧵 [ENCODER] Encoded frame {}", frame_num);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process remaining frames
|
// Process remaining frames
|
||||||
|
|
@ -1006,11 +1115,13 @@ impl ExportOrchestrator {
|
||||||
}
|
}
|
||||||
|
|
||||||
match frame_rx.recv() {
|
match frame_rx.recv() {
|
||||||
Ok(VideoFrameMessage::Frame { frame_num, timestamp, rgba_data }) => {
|
Ok(VideoFrameMessage::Frame { frame_num, timestamp, y_plane, u_plane, v_plane }) => {
|
||||||
Self::encode_frame(
|
Self::encode_frame(
|
||||||
&mut encoder,
|
&mut encoder,
|
||||||
&mut output,
|
&mut output,
|
||||||
&rgba_data,
|
&y_plane,
|
||||||
|
&u_plane,
|
||||||
|
&v_plane,
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
timestamp,
|
timestamp,
|
||||||
|
|
@ -1023,10 +1134,6 @@ impl ExportOrchestrator {
|
||||||
frame: frames_encoded,
|
frame: frames_encoded,
|
||||||
total: total_frames,
|
total: total_frames,
|
||||||
}).ok();
|
}).ok();
|
||||||
|
|
||||||
if frames_encoded % 30 == 0 || frames_encoded == frame_num + 1 {
|
|
||||||
println!("🧵 [ENCODER] Encoded frame {}/{}", frames_encoded, total_frames);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(VideoFrameMessage::Done) => {
|
Ok(VideoFrameMessage::Done) => {
|
||||||
println!("🧵 [ENCODER] All frames received, flushing encoder");
|
println!("🧵 [ENCODER] All frames received, flushing encoder");
|
||||||
|
|
@ -1052,17 +1159,18 @@ impl ExportOrchestrator {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode a single RGBA frame
|
/// Encode a single YUV420p frame (already converted by GPU)
|
||||||
fn encode_frame(
|
fn encode_frame(
|
||||||
encoder: &mut ffmpeg_next::encoder::Video,
|
encoder: &mut ffmpeg_next::encoder::Video,
|
||||||
output: &mut ffmpeg_next::format::context::Output,
|
output: &mut ffmpeg_next::format::context::Output,
|
||||||
rgba_data: &[u8],
|
y_plane: &[u8],
|
||||||
|
u_plane: &[u8],
|
||||||
|
v_plane: &[u8],
|
||||||
width: u32,
|
width: u32,
|
||||||
height: u32,
|
height: u32,
|
||||||
timestamp: f64,
|
timestamp: f64,
|
||||||
) -> Result<(), String> {
|
) -> Result<(), String> {
|
||||||
// Convert RGBA to YUV420p
|
// YUV planes already converted by GPU (no CPU conversion needed)
|
||||||
let (y_plane, u_plane, v_plane) = video_exporter::rgba_to_yuv420p(rgba_data, width, height);
|
|
||||||
|
|
||||||
// Create FFmpeg video frame
|
// Create FFmpeg video frame
|
||||||
let mut video_frame = ffmpeg_next::frame::Video::new(
|
let mut video_frame = ffmpeg_next::frame::Video::new(
|
||||||
|
|
@ -1087,8 +1195,6 @@ impl ExportOrchestrator {
|
||||||
// Encoder time base is 1/(framerate * 1000), so PTS = timestamp * (framerate * 1000)
|
// Encoder time base is 1/(framerate * 1000), so PTS = timestamp * (framerate * 1000)
|
||||||
let encoder_tb = encoder.time_base();
|
let encoder_tb = encoder.time_base();
|
||||||
let pts = (timestamp * encoder_tb.1 as f64) as i64;
|
let pts = (timestamp * encoder_tb.1 as f64) as i64;
|
||||||
println!("🎬 [ENCODE] Frame timestamp={:.3}s, encoder_tb={}/{}, calculated PTS={}",
|
|
||||||
timestamp, encoder_tb.0, encoder_tb.1, pts);
|
|
||||||
video_frame.set_pts(Some(pts));
|
video_frame.set_pts(Some(pts));
|
||||||
|
|
||||||
// Send frame to encoder
|
// Send frame to encoder
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,191 @@
|
||||||
|
//! Performance instrumentation for video export pipeline
|
||||||
|
//!
|
||||||
|
//! Tracks timing for each stage of the export process:
|
||||||
|
//! - GPU rendering (render_frame_to_gpu_yuv)
|
||||||
|
//! - Async readback (map_async completion)
|
||||||
|
//! - YUV plane extraction
|
||||||
|
//! - FFmpeg encoding
|
||||||
|
//! - Polling frequency and efficiency
|
||||||
|
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
|
/// Performance metrics for a single frame
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FrameMetrics {
|
||||||
|
pub frame_num: usize,
|
||||||
|
pub render_start: Instant,
|
||||||
|
pub render_end: Option<Instant>,
|
||||||
|
pub submit_time: Option<Instant>,
|
||||||
|
pub readback_complete: Option<Instant>,
|
||||||
|
pub extraction_start: Option<Instant>,
|
||||||
|
pub extraction_end: Option<Instant>,
|
||||||
|
pub conversion_start: Option<Instant>,
|
||||||
|
pub conversion_end: Option<Instant>,
|
||||||
|
pub encode_start: Option<Instant>,
|
||||||
|
pub encode_end: Option<Instant>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FrameMetrics {
|
||||||
|
pub fn new(frame_num: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
frame_num,
|
||||||
|
render_start: Instant::now(),
|
||||||
|
render_end: None,
|
||||||
|
submit_time: None,
|
||||||
|
readback_complete: None,
|
||||||
|
extraction_start: None,
|
||||||
|
extraction_end: None,
|
||||||
|
conversion_start: None,
|
||||||
|
conversion_end: None,
|
||||||
|
encode_start: None,
|
||||||
|
encode_end: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn render_duration(&self) -> Option<Duration> {
|
||||||
|
self.render_end.map(|end| end.duration_since(self.render_start))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn readback_duration(&self) -> Option<Duration> {
|
||||||
|
self.submit_time.and_then(|submit|
|
||||||
|
self.readback_complete.map(|complete|
|
||||||
|
complete.duration_since(submit)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extraction_duration(&self) -> Option<Duration> {
|
||||||
|
self.extraction_start.and_then(|start|
|
||||||
|
self.extraction_end.map(|end|
|
||||||
|
end.duration_since(start)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn conversion_duration(&self) -> Option<Duration> {
|
||||||
|
self.conversion_start.and_then(|start|
|
||||||
|
self.conversion_end.map(|end|
|
||||||
|
end.duration_since(start)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn encode_duration(&self) -> Option<Duration> {
|
||||||
|
self.encode_start.and_then(|start|
|
||||||
|
self.encode_end.map(|end|
|
||||||
|
end.duration_since(start)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn total_duration(&self) -> Option<Duration> {
|
||||||
|
self.encode_end.map(|end| end.duration_since(self.render_start))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Aggregate performance metrics for entire export
|
||||||
|
pub struct ExportMetrics {
|
||||||
|
pub frames: Vec<FrameMetrics>,
|
||||||
|
export_start: Instant,
|
||||||
|
pub poll_count: usize,
|
||||||
|
pub completions_per_poll: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ExportMetrics {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
frames: Vec::new(),
|
||||||
|
export_start: Instant::now(),
|
||||||
|
poll_count: 0,
|
||||||
|
completions_per_poll: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print comprehensive performance summary
|
||||||
|
pub fn print_summary(&self) {
|
||||||
|
println!("\n📊 [PERF] Export Performance Summary");
|
||||||
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||||
|
|
||||||
|
// Calculate averages for each stage
|
||||||
|
let mut render_times = Vec::new();
|
||||||
|
let mut readback_times = Vec::new();
|
||||||
|
let mut extraction_times = Vec::new();
|
||||||
|
let mut conversion_times = Vec::new();
|
||||||
|
let mut encode_times = Vec::new();
|
||||||
|
let mut total_times = Vec::new();
|
||||||
|
|
||||||
|
for metrics in &self.frames {
|
||||||
|
if let Some(d) = metrics.render_duration() {
|
||||||
|
render_times.push(d);
|
||||||
|
}
|
||||||
|
if let Some(d) = metrics.readback_duration() {
|
||||||
|
readback_times.push(d);
|
||||||
|
}
|
||||||
|
if let Some(d) = metrics.extraction_duration() {
|
||||||
|
extraction_times.push(d);
|
||||||
|
}
|
||||||
|
if let Some(d) = metrics.conversion_duration() {
|
||||||
|
conversion_times.push(d);
|
||||||
|
}
|
||||||
|
if let Some(d) = metrics.encode_duration() {
|
||||||
|
encode_times.push(d);
|
||||||
|
}
|
||||||
|
if let Some(d) = metrics.total_duration() {
|
||||||
|
total_times.push(d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let avg = |times: &[Duration]| -> f64 {
|
||||||
|
if times.is_empty() { return 0.0; }
|
||||||
|
times.iter().sum::<Duration>().as_secs_f64() / times.len() as f64 * 1000.0
|
||||||
|
};
|
||||||
|
|
||||||
|
println!("Render: {:.2}ms avg", avg(&render_times));
|
||||||
|
println!("Readback: {:.2}ms avg", avg(&readback_times));
|
||||||
|
println!("Extraction: {:.2}ms avg", avg(&extraction_times));
|
||||||
|
println!("Conversion: {:.2}ms avg", avg(&conversion_times));
|
||||||
|
println!("Encode: {:.2}ms avg", avg(&encode_times));
|
||||||
|
println!("Total: {:.2}ms avg", avg(&total_times));
|
||||||
|
|
||||||
|
let total_export_time = Instant::now().duration_since(self.export_start).as_secs_f64();
|
||||||
|
let fps = self.frames.len() as f64 / total_export_time;
|
||||||
|
println!("\nOverall: {:.2} fps ({:.1}s for {} frames)",
|
||||||
|
fps, total_export_time, self.frames.len());
|
||||||
|
|
||||||
|
if self.poll_count > 0 {
|
||||||
|
let avg_completions = self.completions_per_poll.iter().sum::<usize>() as f64 / self.poll_count as f64;
|
||||||
|
println!("Polls: {} ({:.2} completions/poll avg)",
|
||||||
|
self.poll_count, avg_completions);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Print detailed per-frame breakdown for last N frames
|
||||||
|
pub fn print_per_frame_details(&self, last_n: usize) {
|
||||||
|
println!("\n📋 [PERF] Per-Frame Breakdown (last {} frames)", last_n);
|
||||||
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||||
|
println!("{:>5} | {:>8} | {:>8} | {:>8} | {:>8} | {:>8} | {:>8}",
|
||||||
|
"Frame", "Render", "Readback", "Extract", "Convert", "Encode", "Total");
|
||||||
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
|
||||||
|
|
||||||
|
let start = if self.frames.len() > last_n {
|
||||||
|
self.frames.len() - last_n
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
for metrics in &self.frames[start..] {
|
||||||
|
println!("{:5} | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms | {:>7.2}ms",
|
||||||
|
metrics.frame_num,
|
||||||
|
metrics.render_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0),
|
||||||
|
metrics.readback_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0),
|
||||||
|
metrics.extraction_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0),
|
||||||
|
metrics.conversion_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0),
|
||||||
|
metrics.encode_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0),
|
||||||
|
metrics.total_duration().map(|d| d.as_secs_f64() * 1000.0).unwrap_or(0.0),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,317 @@
|
||||||
|
//! Async triple-buffered GPU readback pipeline for video export
|
||||||
|
//!
|
||||||
|
//! This module implements a pipelined export system that overlaps GPU rendering
|
||||||
|
//! with CPU encoding to maximize throughput. It uses triple buffering to keep
|
||||||
|
//! both GPU and CPU busy simultaneously:
|
||||||
|
//!
|
||||||
|
//! - Frame N: GPU rendering/conversion
|
||||||
|
//! - Frame N-1: GPU→CPU async transfer
|
||||||
|
//! - Frame N-2: CPU encoding
|
||||||
|
//!
|
||||||
|
//! Expected speedup: 5x over synchronous blocking approach
|
||||||
|
|
||||||
|
use std::sync::mpsc::{channel, Receiver, Sender};
|
||||||
|
|
||||||
|
/// Result from a completed async buffer mapping
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ReadbackResult {
|
||||||
|
pub buffer_id: usize,
|
||||||
|
pub frame_num: usize,
|
||||||
|
pub timestamp: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// State of a pipeline buffer in the triple-buffering state machine
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum BufferState {
|
||||||
|
/// Buffer is available for new frame rendering
|
||||||
|
Free,
|
||||||
|
/// GPU is currently rendering/converting to this buffer
|
||||||
|
Rendering,
|
||||||
|
/// Buffer readback submitted, waiting for GPU→CPU transfer
|
||||||
|
ReadbackPending,
|
||||||
|
/// Buffer mapped and ready for CPU to read
|
||||||
|
Mapped,
|
||||||
|
/// CPU is encoding this buffer's data
|
||||||
|
Encoding,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A single buffer in the triple-buffering pipeline
|
||||||
|
struct PipelineBuffer {
|
||||||
|
id: usize,
|
||||||
|
/// RGBA texture for GPU rendering output (Rgba8Unorm)
|
||||||
|
rgba_texture: wgpu::Texture,
|
||||||
|
rgba_texture_view: wgpu::TextureView,
|
||||||
|
/// Staging buffer for GPU→CPU transfer (MAP_READ)
|
||||||
|
staging_buffer: wgpu::Buffer,
|
||||||
|
/// Current state in the pipeline
|
||||||
|
state: BufferState,
|
||||||
|
/// Frame metadata (set when rendering starts)
|
||||||
|
frame_num: Option<usize>,
|
||||||
|
timestamp: Option<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handle to an acquired buffer for rendering
|
||||||
|
pub struct AcquiredBuffer {
|
||||||
|
pub id: usize,
|
||||||
|
pub rgba_texture_view: wgpu::TextureView,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Triple-buffered async readback pipeline
|
||||||
|
///
|
||||||
|
/// Manages 3 buffers cycling through the pipeline:
|
||||||
|
/// Free → Rendering → ReadbackPending → Mapped → Encoding → Free
|
||||||
|
pub struct ReadbackPipeline {
|
||||||
|
buffers: Vec<PipelineBuffer>,
|
||||||
|
/// Channel for async map_async callbacks
|
||||||
|
readback_rx: Receiver<ReadbackResult>,
|
||||||
|
readback_tx: Sender<ReadbackResult>,
|
||||||
|
/// wgpu device and queue references (needed for polling and buffer operations)
|
||||||
|
device: wgpu::Device,
|
||||||
|
queue: wgpu::Queue,
|
||||||
|
/// Buffer dimensions
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReadbackPipeline {
|
||||||
|
/// Create a new triple-buffered readback pipeline
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `device` - GPU device (will be cloned for async operations)
|
||||||
|
/// * `queue` - GPU queue (will be cloned for async operations)
|
||||||
|
/// * `width` - Frame width in pixels
|
||||||
|
/// * `height` - Frame height in pixels
|
||||||
|
pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
|
||||||
|
let (readback_tx, readback_rx) = channel();
|
||||||
|
|
||||||
|
// Create 3 buffers for triple buffering
|
||||||
|
let mut buffers = Vec::new();
|
||||||
|
for id in 0..3 {
|
||||||
|
// RGBA texture (Rgba8Unorm)
|
||||||
|
let rgba_texture = device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
label: Some(&format!("readback_rgba_texture_{}", id)),
|
||||||
|
size: wgpu::Extent3d {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
mip_level_count: 1,
|
||||||
|
sample_count: 1,
|
||||||
|
dimension: wgpu::TextureDimension::D2,
|
||||||
|
format: wgpu::TextureFormat::Rgba8Unorm,
|
||||||
|
usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
|
||||||
|
view_formats: &[],
|
||||||
|
});
|
||||||
|
|
||||||
|
let rgba_texture_view = rgba_texture.create_view(&wgpu::TextureViewDescriptor::default());
|
||||||
|
|
||||||
|
// Staging buffer for GPU→CPU readback
|
||||||
|
let rgba_buffer_size = (width * height * 4) as u64; // Rgba8Unorm = 4 bytes/pixel
|
||||||
|
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
|
label: Some(&format!("readback_staging_buffer_{}", id)),
|
||||||
|
size: rgba_buffer_size,
|
||||||
|
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
buffers.push(PipelineBuffer {
|
||||||
|
id,
|
||||||
|
rgba_texture,
|
||||||
|
rgba_texture_view,
|
||||||
|
staging_buffer,
|
||||||
|
state: BufferState::Free,
|
||||||
|
frame_num: None,
|
||||||
|
timestamp: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
buffers,
|
||||||
|
readback_rx,
|
||||||
|
readback_tx,
|
||||||
|
device: device.clone(),
|
||||||
|
queue: queue.clone(),
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Acquire a free buffer for rendering (non-blocking)
|
||||||
|
///
|
||||||
|
/// Returns None if all buffers are in use (caller should poll and retry)
|
||||||
|
pub fn acquire(&mut self, frame_num: usize, timestamp: f64) -> Option<AcquiredBuffer> {
|
||||||
|
// Find first Free buffer
|
||||||
|
for buffer in &mut self.buffers {
|
||||||
|
if buffer.state == BufferState::Free {
|
||||||
|
buffer.state = BufferState::Rendering;
|
||||||
|
buffer.frame_num = Some(frame_num);
|
||||||
|
buffer.timestamp = Some(timestamp);
|
||||||
|
|
||||||
|
return Some(AcquiredBuffer {
|
||||||
|
id: buffer.id,
|
||||||
|
rgba_texture_view: buffer.rgba_texture_view.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None // All buffers busy
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Submit GPU commands and initiate async readback
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `buffer_id` - ID of the buffer to submit (from AcquiredBuffer)
|
||||||
|
/// * `encoder` - Command encoder with rendering commands
|
||||||
|
pub fn submit_and_readback(&mut self, buffer_id: usize, mut encoder: wgpu::CommandEncoder) {
|
||||||
|
let buffer = &mut self.buffers[buffer_id];
|
||||||
|
assert_eq!(buffer.state, BufferState::Rendering, "Buffer not in Rendering state");
|
||||||
|
|
||||||
|
// Copy RGBA texture to staging buffer
|
||||||
|
encoder.copy_texture_to_buffer(
|
||||||
|
wgpu::TexelCopyTextureInfo {
|
||||||
|
texture: &buffer.rgba_texture,
|
||||||
|
mip_level: 0,
|
||||||
|
origin: wgpu::Origin3d::ZERO,
|
||||||
|
aspect: wgpu::TextureAspect::All,
|
||||||
|
},
|
||||||
|
wgpu::TexelCopyBufferInfo {
|
||||||
|
buffer: &buffer.staging_buffer,
|
||||||
|
layout: wgpu::TexelCopyBufferLayout {
|
||||||
|
offset: 0,
|
||||||
|
bytes_per_row: Some(self.width * 4), // Rgba8Unorm
|
||||||
|
rows_per_image: Some(self.height),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wgpu::Extent3d {
|
||||||
|
width: self.width,
|
||||||
|
height: self.height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
// Submit GPU commands (non-blocking)
|
||||||
|
self.queue.submit(Some(encoder.finish()));
|
||||||
|
|
||||||
|
// Initiate async buffer mapping
|
||||||
|
let frame_num = buffer.frame_num.unwrap();
|
||||||
|
let timestamp = buffer.timestamp.unwrap();
|
||||||
|
let tx = self.readback_tx.clone();
|
||||||
|
|
||||||
|
buffer.staging_buffer.slice(..).map_async(wgpu::MapMode::Read, move |result| {
|
||||||
|
if result.is_ok() {
|
||||||
|
let _ = tx.send(ReadbackResult {
|
||||||
|
buffer_id,
|
||||||
|
frame_num,
|
||||||
|
timestamp,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
buffer.state = BufferState::ReadbackPending;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Poll for completed readbacks (non-blocking)
|
||||||
|
///
|
||||||
|
/// Returns list of buffers that are now ready for CPU encoding.
|
||||||
|
/// Call this frequently to process completed transfers.
|
||||||
|
pub fn poll_nonblocking(&mut self) -> Vec<ReadbackResult> {
|
||||||
|
// Poll GPU without blocking
|
||||||
|
self.device.poll(wgpu::PollType::Poll);
|
||||||
|
|
||||||
|
// Collect all completed readbacks
|
||||||
|
let mut results = Vec::new();
|
||||||
|
while let Ok(result) = self.readback_rx.try_recv() {
|
||||||
|
// Update buffer state to Mapped
|
||||||
|
self.buffers[result.buffer_id].state = BufferState::Mapped;
|
||||||
|
results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
results
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract RGBA data from mapped buffer (for CPU YUV conversion)
|
||||||
|
///
|
||||||
|
/// Buffer must be in Mapped state (after poll_nonblocking returned it).
|
||||||
|
/// This immediately copies the RGBA data, allowing the buffer to be released.
|
||||||
|
pub fn extract_rgba_data(&mut self, buffer_id: usize) -> Vec<u8> {
|
||||||
|
let buffer = &mut self.buffers[buffer_id];
|
||||||
|
assert_eq!(buffer.state, BufferState::Mapped, "Buffer not in Mapped state");
|
||||||
|
|
||||||
|
buffer.state = BufferState::Encoding;
|
||||||
|
|
||||||
|
// Map the buffer and copy RGBA data
|
||||||
|
let slice = buffer.staging_buffer.slice(..);
|
||||||
|
let data = slice.get_mapped_range();
|
||||||
|
|
||||||
|
// Simple copy - RGBA data goes to CPU for conversion
|
||||||
|
data.to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Release buffer after encoding completes, returning it to the free pool
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `buffer_id` - ID of buffer to release
|
||||||
|
pub fn release(&mut self, buffer_id: usize) {
|
||||||
|
let buffer = &mut self.buffers[buffer_id];
|
||||||
|
assert_eq!(buffer.state, BufferState::Encoding, "Buffer not in Encoding state");
|
||||||
|
|
||||||
|
// Unmap buffer
|
||||||
|
buffer.staging_buffer.unmap();
|
||||||
|
|
||||||
|
// Clear metadata
|
||||||
|
buffer.frame_num = None;
|
||||||
|
buffer.timestamp = None;
|
||||||
|
|
||||||
|
// Return to free pool
|
||||||
|
buffer.state = BufferState::Free;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flush pipeline and wait for all pending operations
|
||||||
|
///
|
||||||
|
/// Call this at the end of export to ensure all frames are processed
|
||||||
|
pub fn flush(&mut self) -> Vec<ReadbackResult> {
|
||||||
|
let mut all_results = Vec::new();
|
||||||
|
|
||||||
|
// Keep polling until all buffers are Free
|
||||||
|
loop {
|
||||||
|
// Poll for new completions
|
||||||
|
self.device.poll(wgpu::PollType::Poll);
|
||||||
|
|
||||||
|
while let Ok(result) = self.readback_rx.try_recv() {
|
||||||
|
self.buffers[result.buffer_id].state = BufferState::Mapped;
|
||||||
|
all_results.push(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if all buffers are Free (or can be made Free)
|
||||||
|
let mut all_free = true;
|
||||||
|
for buffer in &self.buffers {
|
||||||
|
match buffer.state {
|
||||||
|
BufferState::Free => {},
|
||||||
|
BufferState::Rendering | BufferState::ReadbackPending => {
|
||||||
|
all_free = false;
|
||||||
|
break;
|
||||||
|
},
|
||||||
|
BufferState::Mapped | BufferState::Encoding => {
|
||||||
|
// These should be handled by the caller, shouldn't happen during flush
|
||||||
|
panic!("Buffer in {} state during flush - caller should encode and release",
|
||||||
|
if buffer.state == BufferState::Mapped { "Mapped" } else { "Encoding" });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if all_free {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Small sleep to avoid busy-waiting
|
||||||
|
std::thread::sleep(std::time::Duration::from_millis(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
all_results
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get buffer count currently in flight (for monitoring)
|
||||||
|
pub fn buffers_in_flight(&self) -> usize {
|
||||||
|
self.buffers.iter().filter(|b| b.state != BufferState::Free).count()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -12,7 +12,7 @@ use lightningbeam_core::renderer::{ImageCache, render_document_for_compositing,
|
||||||
use lightningbeam_core::video::VideoManager;
|
use lightningbeam_core::video::VideoManager;
|
||||||
use lightningbeam_core::gpu::{
|
use lightningbeam_core::gpu::{
|
||||||
BufferPool, BufferSpec, BufferFormat, Compositor, CompositorLayer,
|
BufferPool, BufferSpec, BufferFormat, Compositor, CompositorLayer,
|
||||||
SrgbToLinearConverter, EffectProcessor, HDR_FORMAT,
|
SrgbToLinearConverter, EffectProcessor, YuvConverter, HDR_FORMAT,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Reusable frame buffers to avoid allocations
|
/// Reusable frame buffers to avoid allocations
|
||||||
|
|
@ -56,10 +56,22 @@ pub struct ExportGpuResources {
|
||||||
pub srgb_to_linear: SrgbToLinearConverter,
|
pub srgb_to_linear: SrgbToLinearConverter,
|
||||||
/// Effect processor for shader effects
|
/// Effect processor for shader effects
|
||||||
pub effect_processor: EffectProcessor,
|
pub effect_processor: EffectProcessor,
|
||||||
|
/// GPU-accelerated RGBA to YUV420p converter
|
||||||
|
pub yuv_converter: YuvConverter,
|
||||||
/// HDR accumulator texture for compositing
|
/// HDR accumulator texture for compositing
|
||||||
pub hdr_texture: wgpu::Texture,
|
pub hdr_texture: wgpu::Texture,
|
||||||
/// View for HDR texture
|
/// View for HDR texture
|
||||||
pub hdr_texture_view: wgpu::TextureView,
|
pub hdr_texture_view: wgpu::TextureView,
|
||||||
|
/// Persistent RGBA output texture (sRGB, reused for all frames)
|
||||||
|
pub output_texture: wgpu::Texture,
|
||||||
|
/// View for persistent output texture
|
||||||
|
pub output_texture_view: wgpu::TextureView,
|
||||||
|
/// Persistent YUV texture for GPU conversion (R8Unorm, height*1.5, reused for all frames)
|
||||||
|
pub yuv_texture: wgpu::Texture,
|
||||||
|
/// View for persistent YUV texture
|
||||||
|
pub yuv_texture_view: wgpu::TextureView,
|
||||||
|
/// Persistent staging buffer for GPU→CPU readback (reused for all frames)
|
||||||
|
pub staging_buffer: wgpu::Buffer,
|
||||||
/// Linear to sRGB blit pipeline for final output
|
/// Linear to sRGB blit pipeline for final output
|
||||||
pub linear_to_srgb_pipeline: wgpu::RenderPipeline,
|
pub linear_to_srgb_pipeline: wgpu::RenderPipeline,
|
||||||
/// Bind group layout for linear to sRGB blit
|
/// Bind group layout for linear to sRGB blit
|
||||||
|
|
@ -75,6 +87,7 @@ impl ExportGpuResources {
|
||||||
let compositor = Compositor::new(device, HDR_FORMAT);
|
let compositor = Compositor::new(device, HDR_FORMAT);
|
||||||
let srgb_to_linear = SrgbToLinearConverter::new(device);
|
let srgb_to_linear = SrgbToLinearConverter::new(device);
|
||||||
let effect_processor = EffectProcessor::new(device, HDR_FORMAT);
|
let effect_processor = EffectProcessor::new(device, HDR_FORMAT);
|
||||||
|
let yuv_converter = YuvConverter::new(device);
|
||||||
|
|
||||||
// Create HDR accumulator texture
|
// Create HDR accumulator texture
|
||||||
let hdr_texture = device.create_texture(&wgpu::TextureDescriptor {
|
let hdr_texture = device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
|
@ -95,6 +108,53 @@ impl ExportGpuResources {
|
||||||
});
|
});
|
||||||
let hdr_texture_view = hdr_texture.create_view(&wgpu::TextureViewDescriptor::default());
|
let hdr_texture_view = hdr_texture.create_view(&wgpu::TextureViewDescriptor::default());
|
||||||
|
|
||||||
|
// Create persistent RGBA output texture (sRGB, reused for all frames)
|
||||||
|
let output_texture = device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
label: Some("export_output_texture"),
|
||||||
|
size: wgpu::Extent3d {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
mip_level_count: 1,
|
||||||
|
sample_count: 1,
|
||||||
|
dimension: wgpu::TextureDimension::D2,
|
||||||
|
format: wgpu::TextureFormat::Rgba8Unorm,
|
||||||
|
usage: wgpu::TextureUsages::RENDER_ATTACHMENT
|
||||||
|
| wgpu::TextureUsages::TEXTURE_BINDING
|
||||||
|
| wgpu::TextureUsages::COPY_SRC,
|
||||||
|
view_formats: &[],
|
||||||
|
});
|
||||||
|
let output_texture_view = output_texture.create_view(&wgpu::TextureViewDescriptor::default());
|
||||||
|
|
||||||
|
// Create persistent YUV texture (Rgba8Unorm, height*1.5 for packed Y+U+V planes)
|
||||||
|
// Note: Using Rgba8Unorm instead of R8Unorm because R8Unorm doesn't support STORAGE_BINDING
|
||||||
|
let yuv_height = height + height / 2; // Y plane + U plane + V plane
|
||||||
|
let yuv_texture = device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
label: Some("export_yuv_texture"),
|
||||||
|
size: wgpu::Extent3d {
|
||||||
|
width,
|
||||||
|
height: yuv_height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
mip_level_count: 1,
|
||||||
|
sample_count: 1,
|
||||||
|
dimension: wgpu::TextureDimension::D2,
|
||||||
|
format: wgpu::TextureFormat::Rgba8Unorm,
|
||||||
|
usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC,
|
||||||
|
view_formats: &[],
|
||||||
|
});
|
||||||
|
let yuv_texture_view = yuv_texture.create_view(&wgpu::TextureViewDescriptor::default());
|
||||||
|
|
||||||
|
// Create persistent staging buffer for GPU→CPU readback
|
||||||
|
let yuv_buffer_size = (width * yuv_height * 4) as u64; // Rgba8Unorm = 4 bytes per pixel
|
||||||
|
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
|
label: Some("export_staging_buffer"),
|
||||||
|
size: yuv_buffer_size,
|
||||||
|
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
|
||||||
// Create linear to sRGB blit pipeline
|
// Create linear to sRGB blit pipeline
|
||||||
let linear_to_srgb_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
let linear_to_srgb_bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||||
label: Some("linear_to_srgb_bind_group_layout"),
|
label: Some("linear_to_srgb_bind_group_layout"),
|
||||||
|
|
@ -179,8 +239,14 @@ impl ExportGpuResources {
|
||||||
compositor,
|
compositor,
|
||||||
srgb_to_linear,
|
srgb_to_linear,
|
||||||
effect_processor,
|
effect_processor,
|
||||||
|
yuv_converter,
|
||||||
hdr_texture,
|
hdr_texture,
|
||||||
hdr_texture_view,
|
hdr_texture_view,
|
||||||
|
output_texture,
|
||||||
|
output_texture_view,
|
||||||
|
yuv_texture,
|
||||||
|
yuv_texture_view,
|
||||||
|
staging_buffer,
|
||||||
linear_to_srgb_pipeline,
|
linear_to_srgb_pipeline,
|
||||||
linear_to_srgb_bind_group_layout,
|
linear_to_srgb_bind_group_layout,
|
||||||
linear_to_srgb_sampler,
|
linear_to_srgb_sampler,
|
||||||
|
|
@ -476,20 +542,11 @@ pub fn receive_and_write_packets(
|
||||||
let encoder_tb = encoder.time_base();
|
let encoder_tb = encoder.time_base();
|
||||||
let stream_tb = output.stream(0).ok_or("No output stream found")?.time_base();
|
let stream_tb = output.stream(0).ok_or("No output stream found")?.time_base();
|
||||||
|
|
||||||
println!("🎬 [PACKET] Encoder TB: {}/{}, Stream TB: {}/{}",
|
|
||||||
encoder_tb.0, encoder_tb.1, stream_tb.0, stream_tb.1);
|
|
||||||
|
|
||||||
while encoder.receive_packet(&mut encoded).is_ok() {
|
while encoder.receive_packet(&mut encoded).is_ok() {
|
||||||
println!("🎬 [PACKET] Before rescale - PTS: {:?}, DTS: {:?}, Duration: {:?}",
|
|
||||||
encoded.pts(), encoded.dts(), encoded.duration());
|
|
||||||
|
|
||||||
encoded.set_stream(0);
|
encoded.set_stream(0);
|
||||||
// Rescale timestamps from encoder time base to stream time base
|
// Rescale timestamps from encoder time base to stream time base
|
||||||
encoded.rescale_ts(encoder_tb, stream_tb);
|
encoded.rescale_ts(encoder_tb, stream_tb);
|
||||||
|
|
||||||
println!("🎬 [PACKET] After rescale - PTS: {:?}, DTS: {:?}, Duration: {:?}",
|
|
||||||
encoded.pts(), encoded.dts(), encoded.duration());
|
|
||||||
|
|
||||||
encoded
|
encoded
|
||||||
.write_interleaved(output)
|
.write_interleaved(output)
|
||||||
.map_err(|e| format!("Failed to write packet: {}", e))?;
|
.map_err(|e| format!("Failed to write packet: {}", e))?;
|
||||||
|
|
@ -660,10 +717,9 @@ pub fn render_frame_to_rgba(
|
||||||
/// * `image_cache` - Image cache for rendering
|
/// * `image_cache` - Image cache for rendering
|
||||||
/// * `video_manager` - Video manager for video clips
|
/// * `video_manager` - Video manager for video clips
|
||||||
/// * `gpu_resources` - HDR GPU resources for compositing
|
/// * `gpu_resources` - HDR GPU resources for compositing
|
||||||
/// * `rgba_buffer` - Output buffer for RGBA pixels (must be width * height * 4 bytes)
|
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
/// Ok(()) on success, Err with message on failure
|
/// Ok((y_plane, u_plane, v_plane)) with YUV420p planes on success, Err with message on failure
|
||||||
pub fn render_frame_to_rgba_hdr(
|
pub fn render_frame_to_rgba_hdr(
|
||||||
document: &mut Document,
|
document: &mut Document,
|
||||||
timestamp: f64,
|
timestamp: f64,
|
||||||
|
|
@ -675,8 +731,7 @@ pub fn render_frame_to_rgba_hdr(
|
||||||
image_cache: &mut ImageCache,
|
image_cache: &mut ImageCache,
|
||||||
video_manager: &Arc<std::sync::Mutex<VideoManager>>,
|
video_manager: &Arc<std::sync::Mutex<VideoManager>>,
|
||||||
gpu_resources: &mut ExportGpuResources,
|
gpu_resources: &mut ExportGpuResources,
|
||||||
rgba_buffer: &mut [u8],
|
) -> Result<(Vec<u8>, Vec<u8>, Vec<u8>), String> {
|
||||||
) -> Result<(), String> {
|
|
||||||
use vello::kurbo::Affine;
|
use vello::kurbo::Affine;
|
||||||
|
|
||||||
// Set document time to the frame timestamp
|
// Set document time to the frame timestamp
|
||||||
|
|
@ -879,22 +934,8 @@ pub fn render_frame_to_rgba_hdr(
|
||||||
// Advance frame counter for buffer cleanup
|
// Advance frame counter for buffer cleanup
|
||||||
gpu_resources.buffer_pool.next_frame();
|
gpu_resources.buffer_pool.next_frame();
|
||||||
|
|
||||||
// Create output texture for final sRGB output
|
// Use persistent output texture (already created in ExportGpuResources)
|
||||||
let output_texture = device.create_texture(&wgpu::TextureDescriptor {
|
let output_view = &gpu_resources.output_texture_view;
|
||||||
label: Some("export_output_texture"),
|
|
||||||
size: wgpu::Extent3d {
|
|
||||||
width,
|
|
||||||
height,
|
|
||||||
depth_or_array_layers: 1,
|
|
||||||
},
|
|
||||||
mip_level_count: 1,
|
|
||||||
sample_count: 1,
|
|
||||||
dimension: wgpu::TextureDimension::D2,
|
|
||||||
format: wgpu::TextureFormat::Rgba8Unorm,
|
|
||||||
usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_SRC,
|
|
||||||
view_formats: &[],
|
|
||||||
});
|
|
||||||
let output_view = output_texture.create_view(&wgpu::TextureViewDescriptor::default());
|
|
||||||
|
|
||||||
// Convert HDR to sRGB for output
|
// Convert HDR to sRGB for output
|
||||||
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
|
@ -940,52 +981,48 @@ pub fn render_frame_to_rgba_hdr(
|
||||||
|
|
||||||
queue.submit(Some(encoder.finish()));
|
queue.submit(Some(encoder.finish()));
|
||||||
|
|
||||||
// GPU readback: Create staging buffer with proper alignment
|
// GPU YUV conversion: Convert RGBA output to YUV420p
|
||||||
let bytes_per_pixel = 4u32; // RGBA8
|
let mut yuv_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
let bytes_per_row_alignment = 256u32;
|
label: Some("export_yuv_conversion_encoder"),
|
||||||
let unpadded_bytes_per_row = width * bytes_per_pixel;
|
|
||||||
let bytes_per_row = ((unpadded_bytes_per_row + bytes_per_row_alignment - 1)
|
|
||||||
/ bytes_per_row_alignment) * bytes_per_row_alignment;
|
|
||||||
let buffer_size = (bytes_per_row * height) as u64;
|
|
||||||
|
|
||||||
let staging_buffer = device.create_buffer(&wgpu::BufferDescriptor {
|
|
||||||
label: Some("export_staging_buffer"),
|
|
||||||
size: buffer_size,
|
|
||||||
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
|
|
||||||
mapped_at_creation: false,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Copy texture to staging buffer
|
gpu_resources.yuv_converter.convert(
|
||||||
let mut copy_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
device,
|
||||||
label: Some("export_copy_encoder"),
|
&mut yuv_encoder,
|
||||||
});
|
output_view,
|
||||||
|
&gpu_resources.yuv_texture_view,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
);
|
||||||
|
|
||||||
copy_encoder.copy_texture_to_buffer(
|
// Copy YUV texture to persistent staging buffer
|
||||||
|
let yuv_height = height + height / 2; // Y plane + U plane + V plane
|
||||||
|
yuv_encoder.copy_texture_to_buffer(
|
||||||
wgpu::TexelCopyTextureInfo {
|
wgpu::TexelCopyTextureInfo {
|
||||||
texture: &output_texture,
|
texture: &gpu_resources.yuv_texture,
|
||||||
mip_level: 0,
|
mip_level: 0,
|
||||||
origin: wgpu::Origin3d::ZERO,
|
origin: wgpu::Origin3d::ZERO,
|
||||||
aspect: wgpu::TextureAspect::All,
|
aspect: wgpu::TextureAspect::All,
|
||||||
},
|
},
|
||||||
wgpu::TexelCopyBufferInfo {
|
wgpu::TexelCopyBufferInfo {
|
||||||
buffer: &staging_buffer,
|
buffer: &gpu_resources.staging_buffer,
|
||||||
layout: wgpu::TexelCopyBufferLayout {
|
layout: wgpu::TexelCopyBufferLayout {
|
||||||
offset: 0,
|
offset: 0,
|
||||||
bytes_per_row: Some(bytes_per_row),
|
bytes_per_row: Some(width * 4), // Rgba8Unorm = 4 bytes per pixel
|
||||||
rows_per_image: Some(height),
|
rows_per_image: Some(yuv_height),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
wgpu::Extent3d {
|
wgpu::Extent3d {
|
||||||
width,
|
width,
|
||||||
height,
|
height: yuv_height,
|
||||||
depth_or_array_layers: 1,
|
depth_or_array_layers: 1,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
queue.submit(Some(copy_encoder.finish()));
|
queue.submit(Some(yuv_encoder.finish()));
|
||||||
|
|
||||||
// Map buffer and read pixels (synchronous)
|
// Map buffer and read YUV pixels (synchronous)
|
||||||
let buffer_slice = staging_buffer.slice(..);
|
let buffer_slice = gpu_resources.staging_buffer.slice(..);
|
||||||
let (sender, receiver) = std::sync::mpsc::channel();
|
let (sender, receiver) = std::sync::mpsc::channel();
|
||||||
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
|
buffer_slice.map_async(wgpu::MapMode::Read, move |result| {
|
||||||
sender.send(result).ok();
|
sender.send(result).ok();
|
||||||
|
|
@ -998,20 +1035,319 @@ pub fn render_frame_to_rgba_hdr(
|
||||||
.map_err(|_| "Failed to receive buffer mapping result")?
|
.map_err(|_| "Failed to receive buffer mapping result")?
|
||||||
.map_err(|e| format!("Failed to map buffer: {:?}", e))?;
|
.map_err(|e| format!("Failed to map buffer: {:?}", e))?;
|
||||||
|
|
||||||
// Copy data from mapped buffer to output, removing padding
|
// Extract Y, U, V planes from packed YUV buffer
|
||||||
let data = buffer_slice.get_mapped_range();
|
let data = buffer_slice.get_mapped_range();
|
||||||
for y in 0..height as usize {
|
let width_usize = width as usize;
|
||||||
let src_offset = y * bytes_per_row as usize;
|
let height_usize = height as usize;
|
||||||
let dst_offset = y * unpadded_bytes_per_row as usize;
|
|
||||||
let row_bytes = unpadded_bytes_per_row as usize;
|
// Y plane: rows 0 to height-1 (extract R channel from Rgba8Unorm)
|
||||||
rgba_buffer[dst_offset..dst_offset + row_bytes]
|
let y_plane_size = width_usize * height_usize;
|
||||||
.copy_from_slice(&data[src_offset..src_offset + row_bytes]);
|
let mut y_plane = vec![0u8; y_plane_size];
|
||||||
|
for y in 0..height_usize {
|
||||||
|
let src_row_offset = y * width_usize * 4; // 4 bytes per pixel (Rgba8Unorm)
|
||||||
|
let dst_row_offset = y * width_usize;
|
||||||
|
for x in 0..width_usize {
|
||||||
|
y_plane[dst_row_offset + x] = data[src_row_offset + x * 4]; // Extract R channel
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// U and V planes: rows height to height + height/2 - 1 (half resolution, side-by-side layout)
|
||||||
|
// U plane is in left half (columns 0 to width/2-1), V plane is in right half (columns width/2 to width-1)
|
||||||
|
let chroma_width = width_usize / 2;
|
||||||
|
let chroma_height = height_usize / 2;
|
||||||
|
let chroma_row_start = height_usize * width_usize * 4; // Start of chroma rows in bytes
|
||||||
|
|
||||||
|
let mut u_plane = vec![0u8; chroma_width * chroma_height];
|
||||||
|
let mut v_plane = vec![0u8; chroma_width * chroma_height];
|
||||||
|
|
||||||
|
for y in 0..chroma_height {
|
||||||
|
let row_offset = chroma_row_start + y * width_usize * 4; // Full width rows in chroma region
|
||||||
|
|
||||||
|
// Extract U plane (left half: columns 0 to chroma_width-1)
|
||||||
|
let u_start = row_offset;
|
||||||
|
let dst_offset = y * chroma_width;
|
||||||
|
for x in 0..chroma_width {
|
||||||
|
u_plane[dst_offset + x] = data[u_start + x * 4]; // Extract R channel
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract V plane (right half: columns width/2 to width/2+chroma_width-1)
|
||||||
|
let v_start = row_offset + chroma_width * 4;
|
||||||
|
for x in 0..chroma_width {
|
||||||
|
v_plane[dst_offset + x] = data[v_start + x * 4]; // Extract R channel
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
drop(data);
|
drop(data);
|
||||||
staging_buffer.unmap();
|
gpu_resources.staging_buffer.unmap();
|
||||||
|
|
||||||
Ok(())
|
Ok((y_plane, u_plane, v_plane))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Render frame to GPU RGBA texture (non-blocking, for async pipeline)
|
||||||
|
///
|
||||||
|
/// Similar to render_frame_to_rgba_hdr but renders to an external RGBA texture view
|
||||||
|
/// (provided by ReadbackPipeline) and returns the command encoder WITHOUT blocking on readback.
|
||||||
|
/// The caller (ReadbackPipeline) will submit the encoder and handle async readback.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `document` - Document to render
|
||||||
|
/// * `timestamp` - Time in seconds to render at
|
||||||
|
/// * `width` - Frame width in pixels
|
||||||
|
/// * `height` - Frame height in pixels
|
||||||
|
/// * `device` - wgpu device
|
||||||
|
/// * `queue` - wgpu queue
|
||||||
|
/// * `renderer` - Vello renderer
|
||||||
|
/// * `image_cache` - Image cache for rendering
|
||||||
|
/// * `video_manager` - Video manager for video clips
|
||||||
|
/// * `gpu_resources` - HDR GPU resources for compositing
|
||||||
|
/// * `rgba_texture_view` - External RGBA texture view (from ReadbackPipeline)
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// Command encoder ready for submission (caller submits via ReadbackPipeline)
|
||||||
|
pub fn render_frame_to_gpu_rgba(
|
||||||
|
document: &mut Document,
|
||||||
|
timestamp: f64,
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
device: &wgpu::Device,
|
||||||
|
queue: &wgpu::Queue,
|
||||||
|
renderer: &mut vello::Renderer,
|
||||||
|
image_cache: &mut ImageCache,
|
||||||
|
video_manager: &Arc<std::sync::Mutex<VideoManager>>,
|
||||||
|
gpu_resources: &mut ExportGpuResources,
|
||||||
|
rgba_texture_view: &wgpu::TextureView,
|
||||||
|
) -> Result<wgpu::CommandEncoder, String> {
|
||||||
|
use vello::kurbo::Affine;
|
||||||
|
|
||||||
|
// Set document time to the frame timestamp
|
||||||
|
document.current_time = timestamp;
|
||||||
|
|
||||||
|
// Use identity transform for export (document coordinates = pixel coordinates)
|
||||||
|
let base_transform = Affine::IDENTITY;
|
||||||
|
|
||||||
|
// Render document for compositing (returns per-layer scenes)
|
||||||
|
let composite_result = render_document_for_compositing(
|
||||||
|
document,
|
||||||
|
base_transform,
|
||||||
|
image_cache,
|
||||||
|
video_manager,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Buffer specs for layer rendering
|
||||||
|
let layer_spec = BufferSpec::new(width, height, BufferFormat::Rgba8Srgb);
|
||||||
|
let hdr_spec = BufferSpec::new(width, height, BufferFormat::Rgba16Float);
|
||||||
|
|
||||||
|
// Render parameters for Vello (transparent background for layers)
|
||||||
|
let layer_render_params = vello::RenderParams {
|
||||||
|
base_color: vello::peniko::Color::TRANSPARENT,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
antialiasing_method: vello::AaConfig::Area,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Render background and composite it
|
||||||
|
let bg_srgb_handle = gpu_resources.buffer_pool.acquire(device, layer_spec);
|
||||||
|
let bg_hdr_handle = gpu_resources.buffer_pool.acquire(device, hdr_spec);
|
||||||
|
|
||||||
|
if let (Some(bg_srgb_view), Some(bg_hdr_view)) = (
|
||||||
|
gpu_resources.buffer_pool.get_view(bg_srgb_handle),
|
||||||
|
gpu_resources.buffer_pool.get_view(bg_hdr_handle),
|
||||||
|
) {
|
||||||
|
renderer.render_to_texture(device, queue, &composite_result.background, bg_srgb_view, &layer_render_params)
|
||||||
|
.map_err(|e| format!("Failed to render background: {}", e))?;
|
||||||
|
|
||||||
|
let mut convert_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("export_bg_srgb_to_linear_encoder"),
|
||||||
|
});
|
||||||
|
gpu_resources.srgb_to_linear.convert(device, &mut convert_encoder, bg_srgb_view, bg_hdr_view);
|
||||||
|
queue.submit(Some(convert_encoder.finish()));
|
||||||
|
|
||||||
|
let bg_compositor_layer = CompositorLayer::normal(bg_hdr_handle, 1.0);
|
||||||
|
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("export_bg_composite_encoder"),
|
||||||
|
});
|
||||||
|
gpu_resources.compositor.composite(
|
||||||
|
device,
|
||||||
|
queue,
|
||||||
|
&mut encoder,
|
||||||
|
&[bg_compositor_layer],
|
||||||
|
&gpu_resources.buffer_pool,
|
||||||
|
&gpu_resources.hdr_texture_view,
|
||||||
|
Some([0.0, 0.0, 0.0, 1.0]),
|
||||||
|
);
|
||||||
|
queue.submit(Some(encoder.finish()));
|
||||||
|
}
|
||||||
|
gpu_resources.buffer_pool.release(bg_srgb_handle);
|
||||||
|
gpu_resources.buffer_pool.release(bg_hdr_handle);
|
||||||
|
|
||||||
|
// Render and composite each layer incrementally
|
||||||
|
for rendered_layer in &composite_result.layers {
|
||||||
|
if !rendered_layer.has_content {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match &rendered_layer.layer_type {
|
||||||
|
RenderedLayerType::Content => {
|
||||||
|
let srgb_handle = gpu_resources.buffer_pool.acquire(device, layer_spec);
|
||||||
|
let hdr_layer_handle = gpu_resources.buffer_pool.acquire(device, hdr_spec);
|
||||||
|
|
||||||
|
if let (Some(srgb_view), Some(hdr_layer_view)) = (
|
||||||
|
gpu_resources.buffer_pool.get_view(srgb_handle),
|
||||||
|
gpu_resources.buffer_pool.get_view(hdr_layer_handle),
|
||||||
|
) {
|
||||||
|
renderer.render_to_texture(device, queue, &rendered_layer.scene, srgb_view, &layer_render_params)
|
||||||
|
.map_err(|e| format!("Failed to render layer: {}", e))?;
|
||||||
|
|
||||||
|
let mut convert_encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("export_layer_srgb_to_linear_encoder"),
|
||||||
|
});
|
||||||
|
gpu_resources.srgb_to_linear.convert(device, &mut convert_encoder, srgb_view, hdr_layer_view);
|
||||||
|
queue.submit(Some(convert_encoder.finish()));
|
||||||
|
|
||||||
|
let compositor_layer = CompositorLayer::normal(hdr_layer_handle, rendered_layer.opacity);
|
||||||
|
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("export_layer_composite_encoder"),
|
||||||
|
});
|
||||||
|
gpu_resources.compositor.composite(
|
||||||
|
device,
|
||||||
|
queue,
|
||||||
|
&mut encoder,
|
||||||
|
&[compositor_layer],
|
||||||
|
&gpu_resources.buffer_pool,
|
||||||
|
&gpu_resources.hdr_texture_view,
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
queue.submit(Some(encoder.finish()));
|
||||||
|
}
|
||||||
|
gpu_resources.buffer_pool.release(srgb_handle);
|
||||||
|
gpu_resources.buffer_pool.release(hdr_layer_handle);
|
||||||
|
}
|
||||||
|
RenderedLayerType::Effect { effect_instances } => {
|
||||||
|
// Effect layer - apply effects to the current HDR accumulator
|
||||||
|
let current_time = document.current_time;
|
||||||
|
|
||||||
|
for effect_instance in effect_instances {
|
||||||
|
// Get effect definition from document
|
||||||
|
let Some(effect_def) = document.get_effect_definition(&effect_instance.clip_id) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Compile effect if needed
|
||||||
|
if !gpu_resources.effect_processor.is_compiled(&effect_def.id) {
|
||||||
|
let success = gpu_resources.effect_processor.compile_effect(device, effect_def);
|
||||||
|
if !success {
|
||||||
|
eprintln!("Failed to compile effect: {}", effect_def.name);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create EffectInstance from ClipInstance for the processor
|
||||||
|
let effect_inst = lightningbeam_core::effect::EffectInstance::new(
|
||||||
|
effect_def,
|
||||||
|
effect_instance.timeline_start,
|
||||||
|
effect_instance.timeline_start + effect_instance.effective_duration(lightningbeam_core::effect::EFFECT_DURATION),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Acquire temp buffer for effect output (HDR format)
|
||||||
|
let effect_output_handle = gpu_resources.buffer_pool.acquire(device, hdr_spec);
|
||||||
|
|
||||||
|
if let Some(effect_output_view) = gpu_resources.buffer_pool.get_view(effect_output_handle) {
|
||||||
|
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("export_effect_encoder"),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Apply effect: HDR accumulator → effect output buffer
|
||||||
|
let applied = gpu_resources.effect_processor.apply_effect(
|
||||||
|
device,
|
||||||
|
queue,
|
||||||
|
&mut encoder,
|
||||||
|
effect_def,
|
||||||
|
&effect_inst,
|
||||||
|
&gpu_resources.hdr_texture_view,
|
||||||
|
effect_output_view,
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
current_time,
|
||||||
|
);
|
||||||
|
|
||||||
|
if applied {
|
||||||
|
// Copy effect output back to HDR accumulator
|
||||||
|
encoder.copy_texture_to_texture(
|
||||||
|
wgpu::TexelCopyTextureInfo {
|
||||||
|
texture: gpu_resources.buffer_pool.get_texture(effect_output_handle).unwrap(),
|
||||||
|
mip_level: 0,
|
||||||
|
origin: wgpu::Origin3d::ZERO,
|
||||||
|
aspect: wgpu::TextureAspect::All,
|
||||||
|
},
|
||||||
|
wgpu::TexelCopyTextureInfo {
|
||||||
|
texture: &gpu_resources.hdr_texture,
|
||||||
|
mip_level: 0,
|
||||||
|
origin: wgpu::Origin3d::ZERO,
|
||||||
|
aspect: wgpu::TextureAspect::All,
|
||||||
|
},
|
||||||
|
wgpu::Extent3d {
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
queue.submit(Some(encoder.finish()));
|
||||||
|
}
|
||||||
|
|
||||||
|
gpu_resources.buffer_pool.release(effect_output_handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert HDR to sRGB (linear → sRGB), render directly to external RGBA texture
|
||||||
|
let output_view = rgba_texture_view;
|
||||||
|
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
label: Some("export_linear_to_srgb_bind_group"),
|
||||||
|
layout: &gpu_resources.linear_to_srgb_bind_group_layout,
|
||||||
|
entries: &[
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 0,
|
||||||
|
resource: wgpu::BindingResource::TextureView(&gpu_resources.hdr_texture_view),
|
||||||
|
},
|
||||||
|
wgpu::BindGroupEntry {
|
||||||
|
binding: 1,
|
||||||
|
resource: wgpu::BindingResource::Sampler(&gpu_resources.linear_to_srgb_sampler),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
|
||||||
|
label: Some("export_linear_to_srgb_encoder"),
|
||||||
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
|
||||||
|
label: Some("export_linear_to_srgb_pass"),
|
||||||
|
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
|
||||||
|
view: &output_view,
|
||||||
|
resolve_target: None,
|
||||||
|
ops: wgpu::Operations {
|
||||||
|
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
|
||||||
|
store: wgpu::StoreOp::Store,
|
||||||
|
},
|
||||||
|
depth_slice: None,
|
||||||
|
})],
|
||||||
|
depth_stencil_attachment: None,
|
||||||
|
occlusion_query_set: None,
|
||||||
|
timestamp_writes: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
render_pass.set_pipeline(&gpu_resources.linear_to_srgb_pipeline);
|
||||||
|
render_pass.set_bind_group(0, &bind_group, &[]);
|
||||||
|
render_pass.draw(0..4, 0..1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return encoder for caller to submit (ReadbackPipeline will handle submission and async readback)
|
||||||
|
// Frame is already rendered to external RGBA texture, no GPU YUV conversion needed
|
||||||
|
Ok(encoder)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
||||||
|
|
@ -2936,7 +2936,6 @@ impl eframe::App for EditorApp {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(progress) = orchestrator.poll_progress() {
|
if let Some(progress) = orchestrator.poll_progress() {
|
||||||
println!("📨 [MAIN] Received progress from orchestrator!");
|
|
||||||
match progress {
|
match progress {
|
||||||
lightningbeam_core::export::ExportProgress::Started { total_frames } => {
|
lightningbeam_core::export::ExportProgress::Started { total_frames } => {
|
||||||
println!("Export started: {} frames", total_frames);
|
println!("Export started: {} frames", total_frames);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue