877 lines
36 KiB
Rust
877 lines
36 KiB
Rust
//! GPU-accelerated raster brush engine.
|
||
//!
|
||
//! [`GpuBrushEngine`] wraps the `brush_dab.wgsl` compute pipeline and manages
|
||
//! per-keyframe canvas texture pairs (ping-pong) used as the live canvas during
|
||
//! raster painting.
|
||
//!
|
||
//! ## Lifecycle
|
||
//!
|
||
//! 1. **Stroke start** — caller supplies the initial pixel data; the engine uploads
|
||
//! it to both canvas textures so either can serve as source/dest.
|
||
//! 2. **Each drag event** — [`GpuBrushEngine::render_dabs`] copies src→dst,
|
||
//! dispatches the compute shader, then swaps src/dst.
|
||
//! 3. **Stroke end** — [`GpuBrushEngine::readback_canvas`] copies the current
|
||
//! source texture into a staging buffer and returns the raw RGBA bytes
|
||
//! (blocking — uses `device.poll(Maintain::Wait)`).
|
||
//! 4. **Idle** — canvas textures are kept alive for the next stroke (no re-upload
|
||
//! needed if the layer has not changed).
|
||
|
||
use std::collections::HashMap;
|
||
use uuid::Uuid;
|
||
use lightningbeam_core::brush_engine::GpuDab;
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Colour-space helpers
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Decode one sRGB-encoded byte to linear float [0, 1].
|
||
fn srgb_to_linear(c: f32) -> f32 {
|
||
if c <= 0.04045 {
|
||
c / 12.92
|
||
} else {
|
||
((c + 0.055) / 1.055).powf(2.4)
|
||
}
|
||
}
|
||
|
||
/// Encode one linear float [0, 1] to an sRGB-encoded byte.
|
||
fn linear_to_srgb_byte(c: u8) -> u8 {
|
||
let f = c as f32 / 255.0;
|
||
let encoded = if f <= 0.0031308 {
|
||
f * 12.92
|
||
} else {
|
||
1.055 * f.powf(1.0 / 2.4) - 0.055
|
||
};
|
||
(encoded * 255.0 + 0.5) as u8
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Per-keyframe canvas texture pair (ping-pong)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// A pair of textures used for double-buffered canvas rendering.
|
||
///
|
||
/// `current` indexes the texture that holds the up-to-date canvas state.
|
||
pub struct CanvasPair {
|
||
pub textures: [wgpu::Texture; 2],
|
||
pub views: [wgpu::TextureView; 2],
|
||
/// Index (0 or 1) of the texture that is the current "source" (authoritative).
|
||
pub current: usize,
|
||
pub width: u32,
|
||
pub height: u32,
|
||
}
|
||
|
||
impl CanvasPair {
|
||
pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self {
|
||
let desc = wgpu::TextureDescriptor {
|
||
label: Some("raster_canvas"),
|
||
size: wgpu::Extent3d { width, height, depth_or_array_layers: 1 },
|
||
mip_level_count: 1,
|
||
sample_count: 1,
|
||
dimension: wgpu::TextureDimension::D2,
|
||
format: wgpu::TextureFormat::Rgba8Unorm,
|
||
usage: wgpu::TextureUsages::TEXTURE_BINDING
|
||
| wgpu::TextureUsages::STORAGE_BINDING
|
||
| wgpu::TextureUsages::COPY_SRC
|
||
| wgpu::TextureUsages::COPY_DST,
|
||
view_formats: &[],
|
||
};
|
||
let t0 = device.create_texture(&desc);
|
||
let t1 = device.create_texture(&desc);
|
||
let v0 = t0.create_view(&wgpu::TextureViewDescriptor::default());
|
||
let v1 = t1.create_view(&wgpu::TextureViewDescriptor::default());
|
||
Self {
|
||
textures: [t0, t1],
|
||
views: [v0, v1],
|
||
current: 0,
|
||
width,
|
||
height,
|
||
}
|
||
}
|
||
|
||
/// Upload raw RGBA bytes to both textures (call once at stroke start).
|
||
///
|
||
/// `pixels` is expected to be **sRGB-encoded premultiplied** (the format stored
|
||
/// in `raw_pixels` / PNG files). The values are decoded to linear premultiplied
|
||
/// before being written to the canvas, which operates entirely in linear space.
|
||
pub fn upload(&self, queue: &wgpu::Queue, pixels: &[u8]) {
|
||
// Decode sRGB-premultiplied → linear premultiplied for the GPU canvas.
|
||
let linear: Vec<u8> = pixels.chunks_exact(4).flat_map(|p| {
|
||
let r = (srgb_to_linear(p[0] as f32 / 255.0) * 255.0 + 0.5) as u8;
|
||
let g = (srgb_to_linear(p[1] as f32 / 255.0) * 255.0 + 0.5) as u8;
|
||
let b = (srgb_to_linear(p[2] as f32 / 255.0) * 255.0 + 0.5) as u8;
|
||
[r, g, b, p[3]]
|
||
}).collect();
|
||
|
||
let layout = wgpu::TexelCopyBufferLayout {
|
||
offset: 0,
|
||
bytes_per_row: Some(self.width * 4),
|
||
rows_per_image: Some(self.height),
|
||
};
|
||
let extent = wgpu::Extent3d {
|
||
width: self.width,
|
||
height: self.height,
|
||
depth_or_array_layers: 1,
|
||
};
|
||
for tex in &self.textures {
|
||
queue.write_texture(
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: tex,
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d::ZERO,
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
&linear,
|
||
layout,
|
||
extent,
|
||
);
|
||
}
|
||
}
|
||
|
||
/// Source (current, authoritative) texture.
|
||
pub fn src(&self) -> &wgpu::Texture { &self.textures[self.current] }
|
||
/// Source texture view.
|
||
pub fn src_view(&self) -> &wgpu::TextureView { &self.views[self.current] }
|
||
/// Destination (write target) texture.
|
||
pub fn dst(&self) -> &wgpu::Texture { &self.textures[1 - self.current] }
|
||
/// Destination texture view.
|
||
pub fn dst_view(&self) -> &wgpu::TextureView { &self.views[1 - self.current] }
|
||
/// Commit the just-completed dispatch: make dst the new source.
|
||
pub fn swap(&mut self) { self.current = 1 - self.current; }
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// GpuBrushEngine
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// GPU brush engine — holds the compute pipeline and per-keyframe canvas pairs.
|
||
pub struct GpuBrushEngine {
|
||
compute_pipeline: wgpu::ComputePipeline,
|
||
compute_bg_layout: wgpu::BindGroupLayout,
|
||
|
||
/// Canvas texture pairs keyed by keyframe UUID.
|
||
pub canvases: HashMap<Uuid, CanvasPair>,
|
||
}
|
||
|
||
/// CPU-side parameters uniform for the compute shader.
|
||
#[repr(C)]
|
||
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
|
||
struct DabParams {
|
||
bbox_x0: i32,
|
||
bbox_y0: i32,
|
||
bbox_w: u32,
|
||
bbox_h: u32,
|
||
num_dabs: u32,
|
||
canvas_w: u32,
|
||
canvas_h: u32,
|
||
_pad: u32,
|
||
}
|
||
|
||
impl GpuBrushEngine {
|
||
/// Create the pipeline. Returns `Err` if the device lacks the required
|
||
/// storage-texture capability for `Rgba8Unorm`.
|
||
pub fn new(device: &wgpu::Device) -> Self {
|
||
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||
label: Some("brush_dab_shader"),
|
||
source: wgpu::ShaderSource::Wgsl(
|
||
include_str!("panes/shaders/brush_dab.wgsl").into(),
|
||
),
|
||
});
|
||
|
||
let compute_bg_layout = device.create_bind_group_layout(
|
||
&wgpu::BindGroupLayoutDescriptor {
|
||
label: Some("brush_dab_bgl"),
|
||
entries: &[
|
||
// 0: dab storage buffer (read-only)
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 0,
|
||
visibility: wgpu::ShaderStages::COMPUTE,
|
||
ty: wgpu::BindingType::Buffer {
|
||
ty: wgpu::BufferBindingType::Storage { read_only: true },
|
||
has_dynamic_offset: false,
|
||
min_binding_size: None,
|
||
},
|
||
count: None,
|
||
},
|
||
// 1: params uniform
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 1,
|
||
visibility: wgpu::ShaderStages::COMPUTE,
|
||
ty: wgpu::BindingType::Buffer {
|
||
ty: wgpu::BufferBindingType::Uniform,
|
||
has_dynamic_offset: false,
|
||
min_binding_size: None,
|
||
},
|
||
count: None,
|
||
},
|
||
// 2: canvas source (sampled)
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 2,
|
||
visibility: wgpu::ShaderStages::COMPUTE,
|
||
ty: wgpu::BindingType::Texture {
|
||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||
view_dimension: wgpu::TextureViewDimension::D2,
|
||
multisampled: false,
|
||
},
|
||
count: None,
|
||
},
|
||
// 3: canvas destination (write-only storage)
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 3,
|
||
visibility: wgpu::ShaderStages::COMPUTE,
|
||
ty: wgpu::BindingType::StorageTexture {
|
||
access: wgpu::StorageTextureAccess::WriteOnly,
|
||
format: wgpu::TextureFormat::Rgba8Unorm,
|
||
view_dimension: wgpu::TextureViewDimension::D2,
|
||
},
|
||
count: None,
|
||
},
|
||
],
|
||
},
|
||
);
|
||
|
||
let pipeline_layout = device.create_pipeline_layout(
|
||
&wgpu::PipelineLayoutDescriptor {
|
||
label: Some("brush_dab_pl"),
|
||
bind_group_layouts: &[&compute_bg_layout],
|
||
push_constant_ranges: &[],
|
||
},
|
||
);
|
||
|
||
let compute_pipeline = device.create_compute_pipeline(
|
||
&wgpu::ComputePipelineDescriptor {
|
||
label: Some("brush_dab_pipeline"),
|
||
layout: Some(&pipeline_layout),
|
||
module: &shader,
|
||
entry_point: Some("main"),
|
||
compilation_options: Default::default(),
|
||
cache: None,
|
||
},
|
||
);
|
||
|
||
Self {
|
||
compute_pipeline,
|
||
compute_bg_layout,
|
||
canvases: HashMap::new(),
|
||
}
|
||
}
|
||
|
||
/// Ensure a canvas pair exists for `keyframe_id` at the given dimensions.
|
||
///
|
||
/// If the canvas exists but has different dimensions it is replaced.
|
||
pub fn ensure_canvas(
|
||
&mut self,
|
||
device: &wgpu::Device,
|
||
keyframe_id: Uuid,
|
||
width: u32,
|
||
height: u32,
|
||
) -> &mut CanvasPair {
|
||
let needs_new = self.canvases.get(&keyframe_id)
|
||
.map_or(true, |c| c.width != width || c.height != height);
|
||
if needs_new {
|
||
self.canvases.insert(keyframe_id, CanvasPair::new(device, width, height));
|
||
} else {
|
||
}
|
||
self.canvases.get_mut(&keyframe_id).unwrap()
|
||
}
|
||
|
||
/// Dispatch the brush compute shader for `dabs` onto the canvas of `keyframe_id`.
|
||
///
|
||
/// Paint/erase dabs are batched in a single GPU dispatch with a full canvas copy.
|
||
/// Smudge dabs are dispatched sequentially (one per dab) with a bbox-only copy
|
||
/// so each dab reads the canvas state written by the previous dab.
|
||
///
|
||
/// If `dabs` is empty, does nothing.
|
||
pub fn render_dabs(
|
||
&mut self,
|
||
device: &wgpu::Device,
|
||
queue: &wgpu::Queue,
|
||
keyframe_id: Uuid,
|
||
dabs: &[GpuDab],
|
||
bbox: (i32, i32, i32, i32),
|
||
canvas_w: u32,
|
||
canvas_h: u32,
|
||
) {
|
||
if dabs.is_empty() { return; }
|
||
|
||
// Smudge dabs must be applied one at a time so each dab reads the canvas
|
||
// state written by the previous dab. Use bbox-only copies (union of current
|
||
// and previous dab) to avoid an expensive full-canvas copy per dab.
|
||
let is_smudge = dabs.first().map(|d| d.blend_mode == 2).unwrap_or(false);
|
||
if is_smudge {
|
||
let mut prev_bbox: Option<(i32, i32, i32, i32)> = None;
|
||
for dab in dabs {
|
||
let r = dab.radius + 1.0;
|
||
let cur_bbox = (
|
||
(dab.x - r).floor() as i32,
|
||
(dab.y - r).floor() as i32,
|
||
(dab.x + r).ceil() as i32,
|
||
(dab.y + r).ceil() as i32,
|
||
);
|
||
// Expand copy region to include the previous dab's bbox so the
|
||
// pixels it wrote are visible as the source for this dab's smudge.
|
||
let copy_bbox = match prev_bbox {
|
||
Some(pb) => (cur_bbox.0.min(pb.0), cur_bbox.1.min(pb.1),
|
||
cur_bbox.2.max(pb.2), cur_bbox.3.max(pb.3)),
|
||
None => cur_bbox,
|
||
};
|
||
self.render_dabs_batch(device, queue, keyframe_id,
|
||
std::slice::from_ref(dab), cur_bbox, Some(copy_bbox), canvas_w, canvas_h);
|
||
prev_bbox = Some(cur_bbox);
|
||
}
|
||
} else {
|
||
self.render_dabs_batch(device, queue, keyframe_id, dabs, bbox, None, canvas_w, canvas_h);
|
||
}
|
||
}
|
||
|
||
/// Inner batch dispatch.
|
||
///
|
||
/// `dispatch_bbox` — region dispatched to the compute shader (usually the union of all dab bboxes).
|
||
/// `copy_bbox` — region to copy src→dst before dispatch:
|
||
/// - `None` → copy the full canvas (required for paint/erase batches so
|
||
/// dabs outside the current frame's region are preserved).
|
||
/// - `Some(r)` → copy only region `r` (sufficient for sequential smudge dabs
|
||
/// because both textures hold identical data outside previously
|
||
/// touched regions, so no full copy is needed).
|
||
fn render_dabs_batch(
|
||
&mut self,
|
||
device: &wgpu::Device,
|
||
queue: &wgpu::Queue,
|
||
keyframe_id: Uuid,
|
||
dabs: &[GpuDab],
|
||
dispatch_bbox: (i32, i32, i32, i32),
|
||
copy_bbox: Option<(i32, i32, i32, i32)>,
|
||
canvas_w: u32,
|
||
canvas_h: u32,
|
||
) {
|
||
if dabs.is_empty() { return; }
|
||
let canvas = match self.canvases.get_mut(&keyframe_id) {
|
||
Some(c) => c,
|
||
None => return,
|
||
};
|
||
|
||
// Clamp the dispatch bounding box to canvas bounds.
|
||
let bbox = dispatch_bbox;
|
||
let x0 = bbox.0.max(0) as u32;
|
||
let y0 = bbox.1.max(0) as u32;
|
||
let x1 = (bbox.2 as u32).min(canvas_w);
|
||
let y1 = (bbox.3 as u32).min(canvas_h);
|
||
if x1 <= x0 || y1 <= y0 { return; }
|
||
let bbox_w = x1 - x0;
|
||
let bbox_h = y1 - y0;
|
||
|
||
// Step 1: Copy src→dst.
|
||
// For paint/erase batches (copy_bbox = None): copy the ENTIRE canvas so dst
|
||
// starts with all previous dabs — a bbox-only copy would lose dabs outside
|
||
// this frame's region after swap.
|
||
// For smudge (copy_bbox = Some(r)): copy only the union of the current and
|
||
// previous dab bboxes. Outside that region both textures hold identical
|
||
// data so no full copy is needed.
|
||
let mut copy_enc = device.create_command_encoder(
|
||
&wgpu::CommandEncoderDescriptor { label: Some("canvas_copy_encoder") },
|
||
);
|
||
match copy_bbox {
|
||
None => {
|
||
copy_enc.copy_texture_to_texture(
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: canvas.src(),
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d::ZERO,
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: canvas.dst(),
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d::ZERO,
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
wgpu::Extent3d { width: canvas_w, height: canvas_h, depth_or_array_layers: 1 },
|
||
);
|
||
}
|
||
Some(cb) => {
|
||
let cx0 = cb.0.max(0) as u32;
|
||
let cy0 = cb.1.max(0) as u32;
|
||
let cx1 = (cb.2 as u32).min(canvas_w);
|
||
let cy1 = (cb.3 as u32).min(canvas_h);
|
||
if cx1 > cx0 && cy1 > cy0 {
|
||
copy_enc.copy_texture_to_texture(
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: canvas.src(),
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d { x: cx0, y: cy0, z: 0 },
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: canvas.dst(),
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d { x: cx0, y: cy0, z: 0 },
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
wgpu::Extent3d { width: cx1 - cx0, height: cy1 - cy0, depth_or_array_layers: 1 },
|
||
);
|
||
}
|
||
}
|
||
}
|
||
queue.submit(Some(copy_enc.finish()));
|
||
|
||
// Step 2: Upload all dabs as a single storage buffer.
|
||
let dab_bytes = bytemuck::cast_slice(dabs);
|
||
let dab_buf = device.create_buffer(&wgpu::BufferDescriptor {
|
||
label: Some("dab_storage_buf"),
|
||
size: dab_bytes.len() as u64,
|
||
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
|
||
mapped_at_creation: false,
|
||
});
|
||
queue.write_buffer(&dab_buf, 0, dab_bytes);
|
||
|
||
let params = DabParams {
|
||
bbox_x0: x0 as i32,
|
||
bbox_y0: y0 as i32,
|
||
bbox_w,
|
||
bbox_h,
|
||
num_dabs: dabs.len() as u32,
|
||
canvas_w,
|
||
canvas_h,
|
||
_pad: 0,
|
||
};
|
||
let params_buf = device.create_buffer(&wgpu::BufferDescriptor {
|
||
label: Some("dab_params_buf"),
|
||
size: std::mem::size_of::<DabParams>() as u64,
|
||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
|
||
mapped_at_creation: false,
|
||
});
|
||
queue.write_buffer(¶ms_buf, 0, bytemuck::bytes_of(¶ms));
|
||
|
||
let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||
label: Some("brush_dab_bg"),
|
||
layout: &self.compute_bg_layout,
|
||
entries: &[
|
||
wgpu::BindGroupEntry { binding: 0, resource: dab_buf.as_entire_binding() },
|
||
wgpu::BindGroupEntry { binding: 1, resource: params_buf.as_entire_binding() },
|
||
wgpu::BindGroupEntry { binding: 2, resource: wgpu::BindingResource::TextureView(canvas.src_view()) },
|
||
wgpu::BindGroupEntry { binding: 3, resource: wgpu::BindingResource::TextureView(canvas.dst_view()) },
|
||
],
|
||
});
|
||
|
||
// Step 3: Single dispatch over the union bounding box.
|
||
let mut compute_enc = device.create_command_encoder(
|
||
&wgpu::CommandEncoderDescriptor { label: Some("brush_dab_encoder") },
|
||
);
|
||
{
|
||
let mut pass = compute_enc.begin_compute_pass(
|
||
&wgpu::ComputePassDescriptor { label: Some("brush_dab_pass"), timestamp_writes: None },
|
||
);
|
||
pass.set_pipeline(&self.compute_pipeline);
|
||
pass.set_bind_group(0, &bg, &[]);
|
||
pass.dispatch_workgroups(bbox_w.div_ceil(8), bbox_h.div_ceil(8), 1);
|
||
}
|
||
queue.submit(Some(compute_enc.finish()));
|
||
|
||
// Step 4: Swap once — dst (with all dabs applied) becomes the new src.
|
||
canvas.swap();
|
||
}
|
||
|
||
/// Read the current canvas back to a CPU `Vec<u8>` (raw RGBA, row-major).
|
||
///
|
||
/// **Blocks** until the GPU work is complete (`Maintain::Wait`).
|
||
/// Should only be called at stroke end, not every frame.
|
||
///
|
||
/// Returns `None` if no canvas exists for `keyframe_id`.
|
||
pub fn readback_canvas(
|
||
&self,
|
||
device: &wgpu::Device,
|
||
queue: &wgpu::Queue,
|
||
keyframe_id: Uuid,
|
||
) -> Option<Vec<u8>> {
|
||
let canvas = self.canvases.get(&keyframe_id)?;
|
||
let width = canvas.width;
|
||
let height = canvas.height;
|
||
|
||
// wgpu requires bytes_per_row to be a multiple of 256
|
||
let bytes_per_row_aligned =
|
||
((width * 4 + 255) / 256) * 256;
|
||
let total_bytes = (bytes_per_row_aligned * height) as u64;
|
||
|
||
let staging = device.create_buffer(&wgpu::BufferDescriptor {
|
||
label: Some("canvas_readback_buf"),
|
||
size: total_bytes,
|
||
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
|
||
mapped_at_creation: false,
|
||
});
|
||
|
||
let mut encoder = device.create_command_encoder(
|
||
&wgpu::CommandEncoderDescriptor { label: Some("canvas_readback_encoder") },
|
||
);
|
||
encoder.copy_texture_to_buffer(
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: canvas.src(),
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d::ZERO,
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
wgpu::TexelCopyBufferInfo {
|
||
buffer: &staging,
|
||
layout: wgpu::TexelCopyBufferLayout {
|
||
offset: 0,
|
||
bytes_per_row: Some(bytes_per_row_aligned),
|
||
rows_per_image: Some(height),
|
||
},
|
||
},
|
||
wgpu::Extent3d { width, height, depth_or_array_layers: 1 },
|
||
);
|
||
queue.submit(Some(encoder.finish()));
|
||
|
||
// Block until complete
|
||
let slice = staging.slice(..);
|
||
let (tx, rx) = std::sync::mpsc::channel();
|
||
slice.map_async(wgpu::MapMode::Read, move |r| { let _ = tx.send(r); });
|
||
let _ = device.poll(wgpu::PollType::wait_indefinitely());
|
||
if rx.recv().ok()?.is_err() { return None; }
|
||
|
||
let mapped = slice.get_mapped_range();
|
||
|
||
// De-stride: copy only `width * 4` bytes per row (drop alignment padding)
|
||
let bytes_per_row_tight = (width * 4) as usize;
|
||
let bytes_per_row_src = bytes_per_row_aligned as usize;
|
||
let mut pixels = vec![0u8; (width * height * 4) as usize];
|
||
for row in 0..height as usize {
|
||
let src = &mapped[row * bytes_per_row_src .. row * bytes_per_row_src + bytes_per_row_tight];
|
||
let dst = &mut pixels[row * bytes_per_row_tight .. (row + 1) * bytes_per_row_tight];
|
||
dst.copy_from_slice(src);
|
||
}
|
||
|
||
drop(mapped);
|
||
staging.unmap();
|
||
|
||
// Encode linear premultiplied → sRGB-encoded premultiplied so the returned
|
||
// bytes match what Vello expects (ImageAlphaType::Premultiplied with sRGB
|
||
// channels). Alpha is left unchanged.
|
||
for pixel in pixels.chunks_exact_mut(4) {
|
||
pixel[0] = linear_to_srgb_byte(pixel[0]);
|
||
pixel[1] = linear_to_srgb_byte(pixel[1]);
|
||
pixel[2] = linear_to_srgb_byte(pixel[2]);
|
||
}
|
||
|
||
Some(pixels)
|
||
}
|
||
|
||
/// Render a set of dabs to an offscreen texture and return the raw pixels.
|
||
///
|
||
/// This is a **blocking** GPU readback — intended for one-time renders such as
|
||
/// brush preview thumbnails. Do not call every frame on the hot path.
|
||
///
|
||
/// The returned `Vec<u8>` is in **sRGB-encoded premultiplied RGBA** format,
|
||
/// suitable for creating an `egui::ColorImage` via
|
||
/// `ColorImage::from_rgba_premultiplied`.
|
||
///
|
||
/// A dedicated scratch canvas keyed by a fixed UUID is reused across calls so
|
||
/// no allocation is needed after the first invocation.
|
||
pub fn render_to_image(
|
||
&mut self,
|
||
device: &wgpu::Device,
|
||
queue: &wgpu::Queue,
|
||
dabs: &[GpuDab],
|
||
width: u32,
|
||
height: u32,
|
||
) -> Vec<u8> {
|
||
use std::sync::OnceLock;
|
||
static SCRATCH_ID: OnceLock<Uuid> = OnceLock::new();
|
||
let scratch_id = *SCRATCH_ID.get_or_init(Uuid::new_v4);
|
||
|
||
// Ensure a correctly-sized scratch canvas exists.
|
||
self.ensure_canvas(device, scratch_id, width, height);
|
||
|
||
// Clear to transparent so previous renders don't bleed through.
|
||
let blank = vec![0u8; (width * height * 4) as usize];
|
||
if let Some(canvas) = self.canvases.get(&scratch_id) {
|
||
canvas.upload(queue, &blank);
|
||
}
|
||
|
||
if !dabs.is_empty() {
|
||
// Compute the union bounding box of all dabs.
|
||
let bbox = dabs.iter().fold(
|
||
(i32::MAX, i32::MAX, i32::MIN, i32::MIN),
|
||
|acc, d| {
|
||
let r = d.radius + 1.0;
|
||
(
|
||
acc.0.min((d.x - r).floor() as i32),
|
||
acc.1.min((d.y - r).floor() as i32),
|
||
acc.2.max((d.x + r).ceil() as i32),
|
||
acc.3.max((d.y + r).ceil() as i32),
|
||
)
|
||
},
|
||
);
|
||
self.render_dabs(device, queue, scratch_id, dabs, bbox, width, height);
|
||
}
|
||
|
||
self.readback_canvas(device, queue, scratch_id).unwrap_or_default()
|
||
}
|
||
|
||
/// Remove the canvas pair for a keyframe (e.g. when the layer is deleted).
|
||
pub fn remove_canvas(&mut self, keyframe_id: &Uuid) {
|
||
self.canvases.remove(keyframe_id);
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Canvas blit pipeline (renders canvas texture to layer sRGB buffer)
|
||
// ---------------------------------------------------------------------------
|
||
|
||
/// Bind group layout + pipeline for blitting a canvas texture (at document
|
||
/// resolution) into a layer render buffer (at viewport resolution), applying
|
||
/// the camera transform.
|
||
pub struct CanvasBlitPipeline {
|
||
pub pipeline: wgpu::RenderPipeline,
|
||
pub bg_layout: wgpu::BindGroupLayout,
|
||
pub sampler: wgpu::Sampler,
|
||
/// Nearest-neighbour sampler used for the selection mask texture.
|
||
pub mask_sampler: wgpu::Sampler,
|
||
}
|
||
|
||
/// Camera parameters uniform for canvas_blit.wgsl.
|
||
#[repr(C)]
|
||
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
|
||
pub struct CameraParams {
|
||
pub pan_x: f32,
|
||
pub pan_y: f32,
|
||
pub zoom: f32,
|
||
pub canvas_w: f32,
|
||
pub canvas_h: f32,
|
||
pub viewport_w: f32,
|
||
pub viewport_h: f32,
|
||
pub _pad: f32,
|
||
}
|
||
|
||
impl CanvasBlitPipeline {
|
||
pub fn new(device: &wgpu::Device) -> Self {
|
||
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
|
||
label: Some("canvas_blit_shader"),
|
||
source: wgpu::ShaderSource::Wgsl(
|
||
include_str!("panes/shaders/canvas_blit.wgsl").into(),
|
||
),
|
||
});
|
||
|
||
let bg_layout = device.create_bind_group_layout(
|
||
&wgpu::BindGroupLayoutDescriptor {
|
||
label: Some("canvas_blit_bgl"),
|
||
entries: &[
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 0,
|
||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||
ty: wgpu::BindingType::Texture {
|
||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||
view_dimension: wgpu::TextureViewDimension::D2,
|
||
multisampled: false,
|
||
},
|
||
count: None,
|
||
},
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 1,
|
||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||
count: None,
|
||
},
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 2,
|
||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||
ty: wgpu::BindingType::Buffer {
|
||
ty: wgpu::BufferBindingType::Uniform,
|
||
has_dynamic_offset: false,
|
||
min_binding_size: None,
|
||
},
|
||
count: None,
|
||
},
|
||
// Binding 3: selection mask texture (R8Unorm; 1×1 white = no mask)
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 3,
|
||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||
ty: wgpu::BindingType::Texture {
|
||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||
view_dimension: wgpu::TextureViewDimension::D2,
|
||
multisampled: false,
|
||
},
|
||
count: None,
|
||
},
|
||
// Binding 4: nearest sampler for mask (sharp selection edges)
|
||
wgpu::BindGroupLayoutEntry {
|
||
binding: 4,
|
||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
|
||
count: None,
|
||
},
|
||
],
|
||
},
|
||
);
|
||
|
||
let pipeline_layout = device.create_pipeline_layout(
|
||
&wgpu::PipelineLayoutDescriptor {
|
||
label: Some("canvas_blit_pl"),
|
||
bind_group_layouts: &[&bg_layout],
|
||
push_constant_ranges: &[],
|
||
},
|
||
);
|
||
|
||
let pipeline = device.create_render_pipeline(
|
||
&wgpu::RenderPipelineDescriptor {
|
||
label: Some("canvas_blit_pipeline"),
|
||
layout: Some(&pipeline_layout),
|
||
vertex: wgpu::VertexState {
|
||
module: &shader,
|
||
entry_point: Some("vs_main"),
|
||
buffers: &[],
|
||
compilation_options: Default::default(),
|
||
},
|
||
fragment: Some(wgpu::FragmentState {
|
||
module: &shader,
|
||
entry_point: Some("fs_main"),
|
||
targets: &[Some(wgpu::ColorTargetState {
|
||
format: wgpu::TextureFormat::Rgba16Float,
|
||
blend: None, // canvas already stores premultiplied alpha
|
||
write_mask: wgpu::ColorWrites::ALL,
|
||
})],
|
||
compilation_options: Default::default(),
|
||
}),
|
||
primitive: wgpu::PrimitiveState {
|
||
topology: wgpu::PrimitiveTopology::TriangleStrip,
|
||
..Default::default()
|
||
},
|
||
depth_stencil: None,
|
||
multisample: wgpu::MultisampleState::default(),
|
||
multiview: None,
|
||
cache: None,
|
||
},
|
||
);
|
||
|
||
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
|
||
label: Some("canvas_blit_sampler"),
|
||
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||
mag_filter: wgpu::FilterMode::Linear,
|
||
min_filter: wgpu::FilterMode::Linear,
|
||
mipmap_filter: wgpu::FilterMode::Nearest,
|
||
..Default::default()
|
||
});
|
||
|
||
let mask_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
|
||
label: Some("canvas_mask_sampler"),
|
||
address_mode_u: wgpu::AddressMode::ClampToEdge,
|
||
address_mode_v: wgpu::AddressMode::ClampToEdge,
|
||
address_mode_w: wgpu::AddressMode::ClampToEdge,
|
||
mag_filter: wgpu::FilterMode::Nearest,
|
||
min_filter: wgpu::FilterMode::Nearest,
|
||
mipmap_filter: wgpu::FilterMode::Nearest,
|
||
..Default::default()
|
||
});
|
||
|
||
Self { pipeline, bg_layout, sampler, mask_sampler }
|
||
}
|
||
|
||
/// Render the canvas texture into `target_view` (Rgba16Float) with the given camera.
|
||
///
|
||
/// `target_view` is cleared to transparent before writing.
|
||
/// `mask_view` is an R8Unorm texture in canvas-pixel space: 255 = keep, 0 = discard.
|
||
/// Pass `None` to use the built-in 1×1 all-white default (no masking).
|
||
pub fn blit(
|
||
&self,
|
||
device: &wgpu::Device,
|
||
queue: &wgpu::Queue,
|
||
canvas_view: &wgpu::TextureView,
|
||
target_view: &wgpu::TextureView,
|
||
camera: &CameraParams,
|
||
mask_view: Option<&wgpu::TextureView>,
|
||
) {
|
||
// When no mask is provided, create a temporary 1×1 all-white texture.
|
||
// (queue is already available here, unlike in new())
|
||
let tmp_mask_tex;
|
||
let tmp_mask_view;
|
||
let mask_view: &wgpu::TextureView = match mask_view {
|
||
Some(v) => v,
|
||
None => {
|
||
tmp_mask_tex = device.create_texture(&wgpu::TextureDescriptor {
|
||
label: Some("canvas_default_mask"),
|
||
size: wgpu::Extent3d { width: 1, height: 1, depth_or_array_layers: 1 },
|
||
mip_level_count: 1,
|
||
sample_count: 1,
|
||
dimension: wgpu::TextureDimension::D2,
|
||
format: wgpu::TextureFormat::R8Unorm,
|
||
usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
|
||
view_formats: &[],
|
||
});
|
||
queue.write_texture(
|
||
wgpu::TexelCopyTextureInfo {
|
||
texture: &tmp_mask_tex,
|
||
mip_level: 0,
|
||
origin: wgpu::Origin3d::ZERO,
|
||
aspect: wgpu::TextureAspect::All,
|
||
},
|
||
&[255u8],
|
||
wgpu::TexelCopyBufferLayout { offset: 0, bytes_per_row: Some(1), rows_per_image: Some(1) },
|
||
wgpu::Extent3d { width: 1, height: 1, depth_or_array_layers: 1 },
|
||
);
|
||
tmp_mask_view = tmp_mask_tex.create_view(&Default::default());
|
||
&tmp_mask_view
|
||
}
|
||
};
|
||
// Upload camera params
|
||
let cam_buf = device.create_buffer(&wgpu::BufferDescriptor {
|
||
label: Some("canvas_blit_cam_buf"),
|
||
size: std::mem::size_of::<CameraParams>() as u64,
|
||
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
|
||
mapped_at_creation: false,
|
||
});
|
||
queue.write_buffer(&cam_buf, 0, bytemuck::bytes_of(camera));
|
||
|
||
let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||
label: Some("canvas_blit_bg"),
|
||
layout: &self.bg_layout,
|
||
entries: &[
|
||
wgpu::BindGroupEntry {
|
||
binding: 0,
|
||
resource: wgpu::BindingResource::TextureView(canvas_view),
|
||
},
|
||
wgpu::BindGroupEntry {
|
||
binding: 1,
|
||
resource: wgpu::BindingResource::Sampler(&self.sampler),
|
||
},
|
||
wgpu::BindGroupEntry {
|
||
binding: 2,
|
||
resource: cam_buf.as_entire_binding(),
|
||
},
|
||
wgpu::BindGroupEntry {
|
||
binding: 3,
|
||
resource: wgpu::BindingResource::TextureView(mask_view),
|
||
},
|
||
wgpu::BindGroupEntry {
|
||
binding: 4,
|
||
resource: wgpu::BindingResource::Sampler(&self.mask_sampler),
|
||
},
|
||
],
|
||
});
|
||
|
||
let mut encoder = device.create_command_encoder(
|
||
&wgpu::CommandEncoderDescriptor { label: Some("canvas_blit_encoder") },
|
||
);
|
||
{
|
||
let mut rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
|
||
label: Some("canvas_blit_pass"),
|
||
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
|
||
view: target_view,
|
||
resolve_target: None,
|
||
depth_slice: None,
|
||
ops: wgpu::Operations {
|
||
load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
|
||
store: wgpu::StoreOp::Store,
|
||
},
|
||
})],
|
||
depth_stencil_attachment: None,
|
||
occlusion_query_set: None,
|
||
timestamp_writes: None,
|
||
});
|
||
rp.set_pipeline(&self.pipeline);
|
||
rp.set_bind_group(0, &bg, &[]);
|
||
rp.draw(0..4, 0..1);
|
||
}
|
||
queue.submit(Some(encoder.finish()));
|
||
}
|
||
}
|