Move raster editing to GPU

This commit is contained in:
Skyler Lehmkuhl 2026-03-01 15:41:28 -05:00
parent e85efe7405
commit da02edb9f5
7 changed files with 1460 additions and 83 deletions

View File

@ -38,6 +38,46 @@
use image::RgbaImage;
use crate::raster_layer::{RasterBlendMode, StrokeRecord};
/// A single brush dab ready for GPU dispatch.
///
/// Padded to 64 bytes (4 × 16 bytes) for WGSL struct alignment in a storage buffer.
#[repr(C)]
#[derive(Clone, Copy, Debug, bytemuck::Pod, bytemuck::Zeroable)]
pub struct GpuDab {
/// Dab centre X (canvas pixels)
pub x: f32,
/// Dab centre Y (canvas pixels)
pub y: f32,
/// Dab radius (pixels)
pub radius: f32,
/// Hardness 0.01.0 (controls the falloff curve shape)
pub hardness: f32,
/// Composite opacity for this dab
pub opacity: f32,
/// Brush color R (linear, premultiplied)
pub color_r: f32,
/// Brush color G
pub color_g: f32,
/// Brush color B
pub color_b: f32,
/// Brush color A
pub color_a: f32,
/// Normalized stroke direction X (smudge only; 0 otherwise)
pub ndx: f32,
/// Normalized stroke direction Y (smudge only; 0 otherwise)
pub ndy: f32,
/// Distance to sample behind stroke for smudge (smudge only; 0 otherwise)
pub smudge_dist: f32,
/// Blend mode: 0 = Normal, 1 = Erase, 2 = Smudge
pub blend_mode: u32,
pub _pad0: u32,
pub _pad1: u32,
pub _pad2: u32,
}
/// Transient brush stroke state (tracks partial dab position between segments)
pub struct StrokeState {
/// Distance along the path already "consumed" toward the next dab (in pixels)
@ -58,6 +98,114 @@ impl Default for StrokeState {
pub struct BrushEngine;
impl BrushEngine {
/// Compute the list of GPU dabs for a stroke segment.
///
/// Uses the same dab-spacing logic as [`apply_stroke_with_state`] but produces
/// [`GpuDab`] structs for upload to the GPU compute pipeline instead of painting
/// into a pixel buffer.
///
/// Also returns the union bounding box of all dabs as `(x0, y0, x1, y1)` in
/// integer canvas pixel coordinates (clamped to non-negative values; `x0==i32::MAX`
/// when the returned Vec is empty).
pub fn compute_dabs(
stroke: &StrokeRecord,
state: &mut StrokeState,
) -> (Vec<GpuDab>, (i32, i32, i32, i32)) {
let mut dabs: Vec<GpuDab> = Vec::new();
let mut bbox = (i32::MAX, i32::MAX, i32::MIN, i32::MIN);
let blend_mode_u = match stroke.blend_mode {
RasterBlendMode::Normal => 0u32,
RasterBlendMode::Erase => 1u32,
RasterBlendMode::Smudge => 2u32,
};
let mut push_dab = |dabs: &mut Vec<GpuDab>,
bbox: &mut (i32, i32, i32, i32),
x: f32, y: f32,
radius: f32, opacity: f32,
ndx: f32, ndy: f32, smudge_dist: f32| {
let r_fringe = radius + 1.0;
bbox.0 = bbox.0.min((x - r_fringe).floor() as i32);
bbox.1 = bbox.1.min((y - r_fringe).floor() as i32);
bbox.2 = bbox.2.max((x + r_fringe).ceil() as i32);
bbox.3 = bbox.3.max((y + r_fringe).ceil() as i32);
dabs.push(GpuDab {
x, y, radius,
hardness: stroke.brush_settings.hardness,
opacity,
color_r: stroke.color[0],
color_g: stroke.color[1],
color_b: stroke.color[2],
color_a: stroke.color[3],
ndx, ndy, smudge_dist,
blend_mode: blend_mode_u,
_pad0: 0, _pad1: 0, _pad2: 0,
});
};
if stroke.points.len() < 2 {
if let Some(pt) = stroke.points.first() {
let r = stroke.brush_settings.radius_at_pressure(pt.pressure);
let o = stroke.brush_settings.opacity_at_pressure(pt.pressure);
// Single-tap smudge has no direction — skip (same as CPU engine)
if !matches!(stroke.blend_mode, RasterBlendMode::Smudge) {
push_dab(&mut dabs, &mut bbox, pt.x, pt.y, r, o, 0.0, 0.0, 0.0);
}
state.distance_since_last_dab = 0.0;
}
return (dabs, bbox);
}
for window in stroke.points.windows(2) {
let p0 = &window[0];
let p1 = &window[1];
let dx = p1.x - p0.x;
let dy = p1.y - p0.y;
let seg_len = (dx * dx + dy * dy).sqrt();
if seg_len < 1e-4 { continue; }
let mut t = 0.0f32;
while t < 1.0 {
let pressure = p0.pressure + t * (p1.pressure - p0.pressure);
let radius = stroke.brush_settings.radius_at_pressure(pressure);
let spacing = (radius * stroke.brush_settings.dabs_per_radius).max(0.5);
let dist_to_next = spacing - state.distance_since_last_dab;
let seg_t_to_next = (dist_to_next / seg_len).max(0.0);
if seg_t_to_next > 1.0 - t {
state.distance_since_last_dab += seg_len * (1.0 - t);
break;
}
t += seg_t_to_next;
let x2 = p0.x + t * dx;
let y2 = p0.y + t * dy;
let pressure2 = p0.pressure + t * (p1.pressure - p0.pressure);
let radius2 = stroke.brush_settings.radius_at_pressure(pressure2);
let opacity2 = stroke.brush_settings.opacity_at_pressure(pressure2);
if matches!(stroke.blend_mode, RasterBlendMode::Smudge) {
let ndx = dx / seg_len;
let ndy = dy / seg_len;
let smudge_dist =
(radius2 * stroke.brush_settings.dabs_per_radius).max(1.0);
push_dab(&mut dabs, &mut bbox,
x2, y2, radius2, opacity2, ndx, ndy, smudge_dist);
} else {
push_dab(&mut dabs, &mut bbox,
x2, y2, radius2, opacity2, 0.0, 0.0, 0.0);
}
state.distance_since_last_dab = 0.0;
}
}
(dabs, bbox)
}
/// Apply a complete stroke to a pixel buffer.
///
/// A fresh [`StrokeState`] is created for each stroke (starts with full dab

View File

@ -358,7 +358,10 @@ fn render_raster_layer_to_scene(
format: ImageFormat::Rgba8,
width: kf.width,
height: kf.height,
alpha_type: ImageAlphaType::Alpha,
// raw_pixels stores sRGB-encoded premultiplied RGBA (channels are
// gamma-encoded, alpha is linear). Premultiplied tells Vello to
// decode the sRGB channels without premultiplying again.
alpha_type: ImageAlphaType::AlphaPremultiplied,
};
let brush = ImageBrush::new(image_data);
let canvas_rect = Rect::new(0.0, 0.0, kf.width as f64, kf.height as f64);

View File

@ -0,0 +1,691 @@
//! GPU-accelerated raster brush engine.
//!
//! [`GpuBrushEngine`] wraps the `brush_dab.wgsl` compute pipeline and manages
//! per-keyframe canvas texture pairs (ping-pong) used as the live canvas during
//! raster painting.
//!
//! ## Lifecycle
//!
//! 1. **Stroke start** — caller supplies the initial pixel data; the engine uploads
//! it to both canvas textures so either can serve as source/dest.
//! 2. **Each drag event** — [`GpuBrushEngine::render_dabs`] copies src→dst,
//! dispatches the compute shader, then swaps src/dst.
//! 3. **Stroke end** — [`GpuBrushEngine::readback_canvas`] copies the current
//! source texture into a staging buffer and returns the raw RGBA bytes
//! (blocking — uses `device.poll(Maintain::Wait)`).
//! 4. **Idle** — canvas textures are kept alive for the next stroke (no re-upload
//! needed if the layer has not changed).
use std::collections::HashMap;
use uuid::Uuid;
use lightningbeam_core::brush_engine::GpuDab;
// ---------------------------------------------------------------------------
// Colour-space helpers
// ---------------------------------------------------------------------------
/// Decode one sRGB-encoded byte to linear float [0, 1].
fn srgb_to_linear(c: f32) -> f32 {
if c <= 0.04045 {
c / 12.92
} else {
((c + 0.055) / 1.055).powf(2.4)
}
}
/// Encode one linear float [0, 1] to an sRGB-encoded byte.
fn linear_to_srgb_byte(c: u8) -> u8 {
let f = c as f32 / 255.0;
let encoded = if f <= 0.0031308 {
f * 12.92
} else {
1.055 * f.powf(1.0 / 2.4) - 0.055
};
(encoded * 255.0 + 0.5) as u8
}
// ---------------------------------------------------------------------------
// Per-keyframe canvas texture pair (ping-pong)
// ---------------------------------------------------------------------------
/// A pair of textures used for double-buffered canvas rendering.
///
/// `current` indexes the texture that holds the up-to-date canvas state.
pub struct CanvasPair {
pub textures: [wgpu::Texture; 2],
pub views: [wgpu::TextureView; 2],
/// Index (0 or 1) of the texture that is the current "source" (authoritative).
pub current: usize,
pub width: u32,
pub height: u32,
}
impl CanvasPair {
pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self {
let desc = wgpu::TextureDescriptor {
label: Some("raster_canvas"),
size: wgpu::Extent3d { width, height, depth_or_array_layers: 1 },
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsages::TEXTURE_BINDING
| wgpu::TextureUsages::STORAGE_BINDING
| wgpu::TextureUsages::COPY_SRC
| wgpu::TextureUsages::COPY_DST,
view_formats: &[],
};
let t0 = device.create_texture(&desc);
let t1 = device.create_texture(&desc);
let v0 = t0.create_view(&wgpu::TextureViewDescriptor::default());
let v1 = t1.create_view(&wgpu::TextureViewDescriptor::default());
Self {
textures: [t0, t1],
views: [v0, v1],
current: 0,
width,
height,
}
}
/// Upload raw RGBA bytes to both textures (call once at stroke start).
///
/// `pixels` is expected to be **sRGB-encoded premultiplied** (the format stored
/// in `raw_pixels` / PNG files). The values are decoded to linear premultiplied
/// before being written to the canvas, which operates entirely in linear space.
pub fn upload(&self, queue: &wgpu::Queue, pixels: &[u8]) {
// Decode sRGB-premultiplied → linear premultiplied for the GPU canvas.
let linear: Vec<u8> = pixels.chunks_exact(4).flat_map(|p| {
let r = (srgb_to_linear(p[0] as f32 / 255.0) * 255.0 + 0.5) as u8;
let g = (srgb_to_linear(p[1] as f32 / 255.0) * 255.0 + 0.5) as u8;
let b = (srgb_to_linear(p[2] as f32 / 255.0) * 255.0 + 0.5) as u8;
[r, g, b, p[3]]
}).collect();
let layout = wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(self.width * 4),
rows_per_image: Some(self.height),
};
let extent = wgpu::Extent3d {
width: self.width,
height: self.height,
depth_or_array_layers: 1,
};
for tex in &self.textures {
queue.write_texture(
wgpu::TexelCopyTextureInfo {
texture: tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
&linear,
layout,
extent,
);
}
}
/// Source (current, authoritative) texture.
pub fn src(&self) -> &wgpu::Texture { &self.textures[self.current] }
/// Source texture view.
pub fn src_view(&self) -> &wgpu::TextureView { &self.views[self.current] }
/// Destination (write target) texture.
pub fn dst(&self) -> &wgpu::Texture { &self.textures[1 - self.current] }
/// Destination texture view.
pub fn dst_view(&self) -> &wgpu::TextureView { &self.views[1 - self.current] }
/// Commit the just-completed dispatch: make dst the new source.
pub fn swap(&mut self) { self.current = 1 - self.current; }
}
// ---------------------------------------------------------------------------
// GpuBrushEngine
// ---------------------------------------------------------------------------
/// GPU brush engine — holds the compute pipeline and per-keyframe canvas pairs.
pub struct GpuBrushEngine {
compute_pipeline: wgpu::ComputePipeline,
compute_bg_layout: wgpu::BindGroupLayout,
/// Canvas texture pairs keyed by keyframe UUID.
pub canvases: HashMap<Uuid, CanvasPair>,
}
/// CPU-side parameters uniform for the compute shader.
#[repr(C)]
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
struct DabParams {
bbox_x0: i32,
bbox_y0: i32,
bbox_w: u32,
bbox_h: u32,
num_dabs: u32,
canvas_w: u32,
canvas_h: u32,
_pad: u32,
}
impl GpuBrushEngine {
/// Create the pipeline. Returns `Err` if the device lacks the required
/// storage-texture capability for `Rgba8Unorm`.
pub fn new(device: &wgpu::Device) -> Self {
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("brush_dab_shader"),
source: wgpu::ShaderSource::Wgsl(
include_str!("panes/shaders/brush_dab.wgsl").into(),
),
});
let compute_bg_layout = device.create_bind_group_layout(
&wgpu::BindGroupLayoutDescriptor {
label: Some("brush_dab_bgl"),
entries: &[
// 0: dab storage buffer (read-only)
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
// 1: params uniform
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
// 2: canvas source (sampled)
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
// 3: canvas destination (write-only storage)
wgpu::BindGroupLayoutEntry {
binding: 3,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::StorageTexture {
access: wgpu::StorageTextureAccess::WriteOnly,
format: wgpu::TextureFormat::Rgba8Unorm,
view_dimension: wgpu::TextureViewDimension::D2,
},
count: None,
},
],
},
);
let pipeline_layout = device.create_pipeline_layout(
&wgpu::PipelineLayoutDescriptor {
label: Some("brush_dab_pl"),
bind_group_layouts: &[&compute_bg_layout],
push_constant_ranges: &[],
},
);
let compute_pipeline = device.create_compute_pipeline(
&wgpu::ComputePipelineDescriptor {
label: Some("brush_dab_pipeline"),
layout: Some(&pipeline_layout),
module: &shader,
entry_point: Some("main"),
compilation_options: Default::default(),
cache: None,
},
);
Self {
compute_pipeline,
compute_bg_layout,
canvases: HashMap::new(),
}
}
/// Ensure a canvas pair exists for `keyframe_id` at the given dimensions.
///
/// If the canvas exists but has different dimensions it is replaced.
pub fn ensure_canvas(
&mut self,
device: &wgpu::Device,
keyframe_id: Uuid,
width: u32,
height: u32,
) -> &mut CanvasPair {
let needs_new = self.canvases.get(&keyframe_id)
.map_or(true, |c| c.width != width || c.height != height);
if needs_new {
self.canvases.insert(keyframe_id, CanvasPair::new(device, width, height));
}
self.canvases.get_mut(&keyframe_id).unwrap()
}
/// Dispatch the brush compute shader for `dabs` onto the canvas of `keyframe_id`.
///
/// * Pre-fills `dst` from `src` so untouched pixels are preserved.
/// * Dispatches the compute shader.
/// * Swaps src/dst so the just-written texture becomes the new source.
///
/// `dab_bbox` is `(x0, y0, x1, y1)` — the union bounding box of all dabs.
/// If `dabs` is empty or the bbox is invalid, does nothing.
pub fn render_dabs(
&mut self,
device: &wgpu::Device,
queue: &wgpu::Queue,
keyframe_id: Uuid,
dabs: &[GpuDab],
bbox: (i32, i32, i32, i32),
canvas_w: u32,
canvas_h: u32,
) {
if dabs.is_empty() || bbox.0 == i32::MAX { return; }
let canvas = match self.canvases.get_mut(&keyframe_id) {
Some(c) => c,
None => return,
};
// Clamp bbox to canvas bounds
let x0 = bbox.0.max(0) as u32;
let y0 = bbox.1.max(0) as u32;
let x1 = (bbox.2.min(canvas_w as i32 - 1)).max(0) as u32;
let y1 = (bbox.3.min(canvas_h as i32 - 1)).max(0) as u32;
if x1 < x0 || y1 < y0 { return; }
let bbox_w = x1 - x0 + 1;
let bbox_h = y1 - y0 + 1;
// --- Pre-fill dst from src: copy the ENTIRE canvas so every pixel outside
// the dab bounding box is preserved across the ping-pong swap.
// Copying only the bbox would leave dst with data from two frames ago
// in all other regions, causing missing dabs on alternating frames. ---
let mut copy_encoder = device.create_command_encoder(
&wgpu::CommandEncoderDescriptor { label: Some("canvas_copy_encoder") },
);
let full_extent = wgpu::Extent3d {
width: canvas.width,
height: canvas.height,
depth_or_array_layers: 1,
};
copy_encoder.copy_texture_to_texture(
wgpu::TexelCopyTextureInfo {
texture: canvas.src(),
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyTextureInfo {
texture: canvas.dst(),
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
full_extent,
);
queue.submit(Some(copy_encoder.finish()));
// --- Upload dab data and params ---
let dab_bytes = bytemuck::cast_slice(dabs);
let dab_buf = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("dab_storage_buf"),
size: dab_bytes.len() as u64,
usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
queue.write_buffer(&dab_buf, 0, dab_bytes);
let params = DabParams {
bbox_x0: x0 as i32,
bbox_y0: y0 as i32,
bbox_w,
bbox_h,
num_dabs: dabs.len() as u32,
canvas_w,
canvas_h,
_pad: 0,
};
let params_buf = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("dab_params_buf"),
size: std::mem::size_of::<DabParams>() as u64,
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
queue.write_buffer(&params_buf, 0, bytemuck::bytes_of(&params));
let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("brush_dab_bg"),
layout: &self.compute_bg_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: dab_buf.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 1,
resource: params_buf.as_entire_binding(),
},
wgpu::BindGroupEntry {
binding: 2,
resource: wgpu::BindingResource::TextureView(canvas.src_view()),
},
wgpu::BindGroupEntry {
binding: 3,
resource: wgpu::BindingResource::TextureView(canvas.dst_view()),
},
],
});
// --- Dispatch ---
let mut compute_encoder = device.create_command_encoder(
&wgpu::CommandEncoderDescriptor { label: Some("brush_dab_encoder") },
);
{
let mut pass = compute_encoder.begin_compute_pass(
&wgpu::ComputePassDescriptor {
label: Some("brush_dab_pass"),
timestamp_writes: None,
},
);
pass.set_pipeline(&self.compute_pipeline);
pass.set_bind_group(0, &bg, &[]);
let wg_x = bbox_w.div_ceil(8);
let wg_y = bbox_h.div_ceil(8);
pass.dispatch_workgroups(wg_x, wg_y, 1);
}
queue.submit(Some(compute_encoder.finish()));
// Swap: dst is now the authoritative source
canvas.swap();
}
/// Read the current canvas back to a CPU `Vec<u8>` (raw RGBA, row-major).
///
/// **Blocks** until the GPU work is complete (`Maintain::Wait`).
/// Should only be called at stroke end, not every frame.
///
/// Returns `None` if no canvas exists for `keyframe_id`.
pub fn readback_canvas(
&self,
device: &wgpu::Device,
queue: &wgpu::Queue,
keyframe_id: Uuid,
) -> Option<Vec<u8>> {
let canvas = self.canvases.get(&keyframe_id)?;
let width = canvas.width;
let height = canvas.height;
// wgpu requires bytes_per_row to be a multiple of 256
let bytes_per_row_aligned =
((width * 4 + 255) / 256) * 256;
let total_bytes = (bytes_per_row_aligned * height) as u64;
let staging = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("canvas_readback_buf"),
size: total_bytes,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let mut encoder = device.create_command_encoder(
&wgpu::CommandEncoderDescriptor { label: Some("canvas_readback_encoder") },
);
encoder.copy_texture_to_buffer(
wgpu::TexelCopyTextureInfo {
texture: canvas.src(),
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
wgpu::TexelCopyBufferInfo {
buffer: &staging,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row_aligned),
rows_per_image: Some(height),
},
},
wgpu::Extent3d { width, height, depth_or_array_layers: 1 },
);
queue.submit(Some(encoder.finish()));
// Block until complete
let slice = staging.slice(..);
let (tx, rx) = std::sync::mpsc::channel();
slice.map_async(wgpu::MapMode::Read, move |r| { let _ = tx.send(r); });
let _ = device.poll(wgpu::PollType::wait_indefinitely());
if rx.recv().ok()?.is_err() { return None; }
let mapped = slice.get_mapped_range();
// De-stride: copy only `width * 4` bytes per row (drop alignment padding)
let bytes_per_row_tight = (width * 4) as usize;
let bytes_per_row_src = bytes_per_row_aligned as usize;
let mut pixels = vec![0u8; (width * height * 4) as usize];
for row in 0..height as usize {
let src = &mapped[row * bytes_per_row_src .. row * bytes_per_row_src + bytes_per_row_tight];
let dst = &mut pixels[row * bytes_per_row_tight .. (row + 1) * bytes_per_row_tight];
dst.copy_from_slice(src);
}
drop(mapped);
staging.unmap();
// Encode linear premultiplied → sRGB-encoded premultiplied so the returned
// bytes match what Vello expects (ImageAlphaType::Premultiplied with sRGB
// channels). Alpha is left unchanged.
for pixel in pixels.chunks_exact_mut(4) {
pixel[0] = linear_to_srgb_byte(pixel[0]);
pixel[1] = linear_to_srgb_byte(pixel[1]);
pixel[2] = linear_to_srgb_byte(pixel[2]);
}
Some(pixels)
}
/// Remove the canvas pair for a keyframe (e.g. when the layer is deleted).
pub fn remove_canvas(&mut self, keyframe_id: &Uuid) {
self.canvases.remove(keyframe_id);
}
}
// ---------------------------------------------------------------------------
// Canvas blit pipeline (renders canvas texture to layer sRGB buffer)
// ---------------------------------------------------------------------------
/// Bind group layout + pipeline for blitting a canvas texture (at document
/// resolution) into a layer render buffer (at viewport resolution), applying
/// the camera transform.
pub struct CanvasBlitPipeline {
pub pipeline: wgpu::RenderPipeline,
pub bg_layout: wgpu::BindGroupLayout,
pub sampler: wgpu::Sampler,
}
/// Camera parameters uniform for canvas_blit.wgsl.
#[repr(C)]
#[derive(Clone, Copy, bytemuck::Pod, bytemuck::Zeroable)]
pub struct CameraParams {
pub pan_x: f32,
pub pan_y: f32,
pub zoom: f32,
pub canvas_w: f32,
pub canvas_h: f32,
pub viewport_w: f32,
pub viewport_h: f32,
pub _pad: f32,
}
impl CanvasBlitPipeline {
pub fn new(device: &wgpu::Device) -> Self {
let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("canvas_blit_shader"),
source: wgpu::ShaderSource::Wgsl(
include_str!("panes/shaders/canvas_blit.wgsl").into(),
),
});
let bg_layout = device.create_bind_group_layout(
&wgpu::BindGroupLayoutDescriptor {
label: Some("canvas_blit_bgl"),
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 2,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Uniform,
has_dynamic_offset: false,
min_binding_size: None,
},
count: None,
},
],
},
);
let pipeline_layout = device.create_pipeline_layout(
&wgpu::PipelineLayoutDescriptor {
label: Some("canvas_blit_pl"),
bind_group_layouts: &[&bg_layout],
push_constant_ranges: &[],
},
);
let pipeline = device.create_render_pipeline(
&wgpu::RenderPipelineDescriptor {
label: Some("canvas_blit_pipeline"),
layout: Some(&pipeline_layout),
vertex: wgpu::VertexState {
module: &shader,
entry_point: Some("vs_main"),
buffers: &[],
compilation_options: Default::default(),
},
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: Some("fs_main"),
targets: &[Some(wgpu::ColorTargetState {
format: wgpu::TextureFormat::Rgba8Unorm,
blend: None, // canvas already stores premultiplied alpha
write_mask: wgpu::ColorWrites::ALL,
})],
compilation_options: Default::default(),
}),
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleStrip,
..Default::default()
},
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
multiview: None,
cache: None,
},
);
let sampler = device.create_sampler(&wgpu::SamplerDescriptor {
label: Some("canvas_blit_sampler"),
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Linear,
min_filter: wgpu::FilterMode::Linear,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
});
Self { pipeline, bg_layout, sampler }
}
/// Render the canvas texture into `target_view` (Rgba8Unorm) with the given camera.
///
/// `target_view` is cleared to transparent before writing.
pub fn blit(
&self,
device: &wgpu::Device,
queue: &wgpu::Queue,
canvas_view: &wgpu::TextureView,
target_view: &wgpu::TextureView,
camera: &CameraParams,
) {
// Upload camera params
let cam_buf = device.create_buffer(&wgpu::BufferDescriptor {
label: Some("canvas_blit_cam_buf"),
size: std::mem::size_of::<CameraParams>() as u64,
usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
queue.write_buffer(&cam_buf, 0, bytemuck::bytes_of(camera));
let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: Some("canvas_blit_bg"),
layout: &self.bg_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(canvas_view),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&self.sampler),
},
wgpu::BindGroupEntry {
binding: 2,
resource: cam_buf.as_entire_binding(),
},
],
});
let mut encoder = device.create_command_encoder(
&wgpu::CommandEncoderDescriptor { label: Some("canvas_blit_encoder") },
);
{
let mut rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("canvas_blit_pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: target_view,
resolve_target: None,
depth_slice: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
});
rp.set_pipeline(&self.pipeline);
rp.set_bind_group(0, &bg, &[]);
rp.draw(0..4, 0..1);
}
queue.submit(Some(encoder.finish()));
}
}

View File

@ -23,6 +23,7 @@ use theme::{Theme, ThemeMode};
mod waveform_gpu;
mod cqt_gpu;
mod gpu_brush;
mod config;
use config::AppConfig;

View File

@ -0,0 +1,152 @@
// GPU brush dab compute shader.
//
// Renders all dabs for one stroke segment into the raster canvas.
// Uses a ping-pong pair: reads from `canvas_src` (texture_2d) via textureLoad,
// writes to `canvas_dst` (storage, write-only).
//
// `textureSample` is forbidden in compute shaders; bilinear filtering for the
// smudge tool is implemented manually using four textureLoad calls.
//
// Before this dispatch the caller copies `canvas_src` `canvas_dst` so that pixels
// outside the union dab bounding box (not touched by the shader) remain unchanged.
//
// Dispatch: ceil(bbox_w / 8) × ceil(bbox_h / 8) × 1
// Each thread covers one pixel in the bounding-box-clamped canvas region.
// ---------------------------------------------------------------------------
// Data layout must match GpuDab in brush_engine.rs (64 bytes, 16-byte aligned).
// ---------------------------------------------------------------------------
struct GpuDab {
x: f32, y: f32, radius: f32, hardness: f32, // bytes 015
opacity: f32, color_r: f32, color_g: f32, color_b: f32, // bytes 1631
color_a: f32, ndx: f32, ndy: f32, smudge_dist: f32, // bytes 3247
blend_mode: u32, _pad0: u32, _pad1: u32, _pad2: u32, // bytes 4863
}
struct Params {
bbox_x0: i32,
bbox_y0: i32,
bbox_w: u32,
bbox_h: u32,
num_dabs: u32,
canvas_w: u32,
canvas_h: u32,
_pad: u32,
}
@group(0) @binding(0) var<storage, read> dabs: array<GpuDab>;
@group(0) @binding(1) var<uniform> params: Params;
@group(0) @binding(2) var canvas_src: texture_2d<f32>;
@group(0) @binding(3) var canvas_dst: texture_storage_2d<rgba8unorm, write>;
// ---------------------------------------------------------------------------
// Manual bilinear sample from canvas_src at sub-pixel coordinates (px, py).
// Out-of-bounds texels clamp to the canvas edge (replicates ClampToEdge).
// textureSample is forbidden in compute shaders; we use four textureLoad calls.
// ---------------------------------------------------------------------------
fn bilinear_sample(px: f32, py: f32) -> vec4<f32> {
let cw = i32(params.canvas_w);
let ch = i32(params.canvas_h);
// Integer coords of the top-left sample
let ix = i32(floor(px - 0.5));
let iy = i32(floor(py - 0.5));
// Fractional weights
let fx = fract(px - 0.5);
let fy = fract(py - 0.5);
// Clamp to [0, dim-1]
let x0 = clamp(ix, 0, cw - 1);
let x1 = clamp(ix + 1, 0, cw - 1);
let y0 = clamp(iy, 0, ch - 1);
let y1 = clamp(iy + 1, 0, ch - 1);
let s00 = textureLoad(canvas_src, vec2<i32>(x0, y0), 0);
let s10 = textureLoad(canvas_src, vec2<i32>(x1, y0), 0);
let s01 = textureLoad(canvas_src, vec2<i32>(x0, y1), 0);
let s11 = textureLoad(canvas_src, vec2<i32>(x1, y1), 0);
return mix(mix(s00, s10, fx), mix(s01, s11, fx), fy);
}
// ---------------------------------------------------------------------------
// Apply a single dab to `current` and return the updated colour.
// ---------------------------------------------------------------------------
fn apply_dab(current: vec4<f32>, dab: GpuDab, px: i32, py: i32) -> vec4<f32> {
let dx = f32(px) + 0.5 - dab.x;
let dy = f32(py) + 0.5 - dab.y;
let rr = (dx * dx + dy * dy) / (dab.radius * dab.radius);
if rr > 1.0 { return current; }
// Two-segment linear falloff (identical to libmypaint calculate_opa)
let h = clamp(dab.hardness, 0.001, 1.0);
var opa_weight: f32;
if rr <= h {
opa_weight = 1.0 + rr * (-(1.0 / h - 1.0));
} else {
opa_weight = h / (1.0 - h) + rr * (-h / (1.0 - h));
}
opa_weight = clamp(opa_weight, 0.0, 1.0);
if dab.blend_mode == 0u {
// Normal: "over" operator
let dab_a = opa_weight * dab.opacity * dab.color_a;
if dab_a <= 0.0 { return current; }
let ba = 1.0 - dab_a;
return vec4<f32>(
dab_a * dab.color_r + ba * current.r,
dab_a * dab.color_g + ba * current.g,
dab_a * dab.color_b + ba * current.b,
dab_a + ba * current.a,
);
} else if dab.blend_mode == 1u {
// Erase: multiplicative alpha reduction
let dab_a = opa_weight * dab.opacity * dab.color_a;
if dab_a <= 0.0 { return current; }
let new_a = current.a * (1.0 - dab_a);
let scale = select(0.0, new_a / current.a, current.a > 1e-6);
return vec4<f32>(current.r * scale, current.g * scale, current.b * scale, new_a);
} else {
// Smudge: directional warp sample from position behind the stroke direction
let alpha = opa_weight * dab.opacity;
if alpha <= 0.0 { return current; }
let src_x = f32(px) + 0.5 - dab.ndx * dab.smudge_dist;
let src_y = f32(py) + 0.5 - dab.ndy * dab.smudge_dist;
let src = bilinear_sample(src_x, src_y);
let da = 1.0 - alpha;
return vec4<f32>(
alpha * src.r + da * current.r,
alpha * src.g + da * current.g,
alpha * src.b + da * current.b,
alpha * src.a + da * current.a,
);
}
}
// ---------------------------------------------------------------------------
// Main entry point
// ---------------------------------------------------------------------------
@compute @workgroup_size(8, 8)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
// Bounds check within the bounding box
if gid.x >= params.bbox_w || gid.y >= params.bbox_h { return; }
let px = i32(gid.x) + params.bbox_x0;
let py = i32(gid.y) + params.bbox_y0;
// Bounds check within the canvas (bbox may extend past canvas edges)
if px < 0 || py < 0 || u32(px) >= params.canvas_w || u32(py) >= params.canvas_h { return; }
// Read current pixel from source (canvas_dst was pre-filled from canvas_src
// by the caller, but we read from canvas_src to ensure consistency)
var current = textureLoad(canvas_src, vec2<i32>(px, py), 0);
// Apply all dabs for this frame (sequential in the thread, no races between threads
// since each thread owns a unique output pixel)
for (var i = 0u; i < params.num_dabs; i++) {
current = apply_dab(current, dabs[i], px, py);
}
textureStore(canvas_dst, vec2<i32>(px, py), current);
}

View File

@ -0,0 +1,83 @@
// Canvas blit shader.
//
// Renders a GPU raster canvas (at document resolution) into the layer's sRGB
// render buffer (at viewport resolution), applying the camera transform
// (pan + zoom) to map document-space pixels to viewport-space pixels.
//
// Any viewport pixel whose corresponding document coordinate falls outside
// [0, canvas_w) × [0, canvas_h) outputs transparent black.
struct CameraParams {
pan_x: f32,
pan_y: f32,
zoom: f32,
canvas_w: f32,
canvas_h: f32,
viewport_w: f32,
viewport_h: f32,
_pad: f32,
}
@group(0) @binding(0) var canvas_tex: texture_2d<f32>;
@group(0) @binding(1) var canvas_sampler: sampler;
@group(0) @binding(2) var<uniform> camera: CameraParams;
struct VertexOutput {
@builtin(position) position: vec4<f32>,
@location(0) uv: vec2<f32>,
}
// Generates a fullscreen triangle strip (same pattern as blit.wgsl)
@vertex
fn vs_main(@builtin(vertex_index) vertex_index: u32) -> VertexOutput {
var out: VertexOutput;
let x = f32((vertex_index & 1u) << 1u);
let y = f32(vertex_index & 2u);
out.position = vec4<f32>(x * 2.0 - 1.0, 1.0 - y * 2.0, 0.0, 1.0);
out.uv = vec2<f32>(x, y);
return out;
}
// Linear sRGB encoding for a single channel.
// Applied to premultiplied linear values so the downstream srgb_to_linear
// pass round-trips correctly without darkening semi-transparent edges.
fn linear_to_srgb(c: f32) -> f32 {
return select(
1.055 * pow(max(c, 0.0), 1.0 / 2.4) - 0.055,
c * 12.92,
c <= 0.0031308,
);
}
@fragment
fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
// Map viewport UV [0,1] viewport pixel
let vp = in.uv * vec2<f32>(camera.viewport_w, camera.viewport_h);
// Map viewport pixel document pixel (inverse camera transform)
let doc = (vp - vec2<f32>(camera.pan_x, camera.pan_y)) / camera.zoom;
// Map document pixel canvas UV [0,1]
let canvas_uv = doc / vec2<f32>(camera.canvas_w, camera.canvas_h);
// Out-of-bounds transparent
if canvas_uv.x < 0.0 || canvas_uv.x > 1.0
|| canvas_uv.y < 0.0 || canvas_uv.y > 1.0 {
return vec4<f32>(0.0, 0.0, 0.0, 0.0);
}
// The canvas stores premultiplied linear RGBA.
// The srgb_to_linear converter downstream applies the sRGB gamma formula
// channel-by-channel without alpha awareness. To make the round-trip
// transparent we pre-encode with linear_to_srgb here:
// canvas (linear premul) sRGB buffer srgb_to_linear linear premul
// Without this, srgb_to_linear darkens small premultiplied values
// (e.g. white at 10% opacity: 0.1 0.01), producing a grey halo.
let c = textureSample(canvas_tex, canvas_sampler, canvas_uv);
return vec4<f32>(
linear_to_srgb(c.r),
linear_to_srgb(c.g),
linear_to_srgb(c.b),
c.a,
);
}

View File

@ -36,6 +36,10 @@ struct SharedVelloResources {
effect_processor: Mutex<EffectProcessor>,
/// sRGB to linear color converter (for Vello output)
srgb_to_linear: SrgbToLinearConverter,
/// GPU raster brush engine (compute pipeline + canvas texture cache)
gpu_brush: Mutex<crate::gpu_brush::GpuBrushEngine>,
/// Canvas blit pipeline (renders GPU canvas to layer sRGB buffer)
canvas_blit: crate::gpu_brush::CanvasBlitPipeline,
}
/// Per-instance Vello resources (created for each Stage pane)
@ -206,7 +210,11 @@ impl SharedVelloResources {
// Initialize sRGB to linear converter for Vello output
let srgb_to_linear = SrgbToLinearConverter::new(device);
println!("✅ Vello shared resources initialized (renderer, shaders, HDR compositor, effect processor, and color converter)");
// Initialize GPU raster brush engine
let gpu_brush = crate::gpu_brush::GpuBrushEngine::new(device);
let canvas_blit = crate::gpu_brush::CanvasBlitPipeline::new(device);
println!("✅ Vello shared resources initialized (renderer, shaders, HDR compositor, effect processor, color converter, and GPU brush engine)");
Ok(Self {
renderer: Arc::new(Mutex::new(renderer)),
@ -220,6 +228,8 @@ impl SharedVelloResources {
compositor,
effect_processor: Mutex::new(effect_processor),
srgb_to_linear,
gpu_brush: Mutex::new(gpu_brush),
canvas_blit,
})
}
}
@ -390,6 +400,15 @@ struct VelloRenderContext {
mouse_world_pos: Option<vello::kurbo::Point>,
/// Latest webcam frame for live preview (if any camera is active)
webcam_frame: Option<lightningbeam_core::webcam::CaptureFrame>,
/// GPU brush dabs to dispatch in this frame's prepare() call.
pending_raster_dabs: Option<PendingRasterDabs>,
/// Instance ID (for storing readback results in the global map).
instance_id_for_readback: u64,
/// The (layer_id, keyframe_id) of the raster layer with a live GPU canvas.
/// Present for the entire stroke duration, not just frames with new dabs.
painting_canvas: Option<(uuid::Uuid, uuid::Uuid)>,
/// GPU canvas keyframe to remove at the top of this prepare() call.
pending_canvas_removal: Option<uuid::Uuid>,
}
/// Callback for Vello rendering within egui
@ -470,6 +489,77 @@ impl egui_wgpu::CallbackTrait for VelloCallback {
// This means we only need 1 layer buffer at a time (plus the HDR accumulator)
instance_resources.ensure_hdr_texture(device, &shared, width, height);
// --- Deferred GPU canvas removal ---
// The previous frame's render_content consumed a readback result and updated
// raw_pixels. Now that the Vello scene is current we can safely drop the
// GPU canvas; painting_canvas was already cleared so the compositor will use
// the Vello scene from here on.
if let Some(kf_id) = self.ctx.pending_canvas_removal {
if let Ok(mut gpu_brush) = shared.gpu_brush.lock() {
gpu_brush.remove_canvas(&kf_id);
}
}
// --- GPU brush dispatch ---
// Dispatch the compute shader for any pending raster dabs from this frame's
// input event. Must happen before compositing so the updated canvas texture
// is sampled correctly when the layer is blitted.
if let Some(ref pending) = self.ctx.pending_raster_dabs {
if let Ok(mut gpu_brush) = shared.gpu_brush.lock() {
// Ensure the canvas pair exists (creates it if missing or wrong size)
gpu_brush.ensure_canvas(
device,
pending.keyframe_id,
pending.canvas_width,
pending.canvas_height,
);
// On stroke start, upload the pre-stroke pixel data to both textures
if let Some(ref pixels) = pending.initial_pixels {
if let Some(canvas) = gpu_brush.canvases.get(&pending.keyframe_id) {
canvas.upload(queue, pixels);
}
}
// Dispatch the compute shader for this frame's dabs
if !pending.dabs.is_empty() {
gpu_brush.render_dabs(
device,
queue,
pending.keyframe_id,
&pending.dabs,
pending.dab_bbox,
pending.canvas_width,
pending.canvas_height,
);
}
// On stroke end, read back the finished canvas and store it so
// the next ui() call can create the undo action.
if pending.wants_final_readback {
if let Some(pixels) = gpu_brush.readback_canvas(
device,
queue,
pending.keyframe_id,
) {
let results = RASTER_READBACK_RESULTS.get_or_init(|| {
Arc::new(Mutex::new(std::collections::HashMap::new()))
});
if let Ok(mut map) = results.lock() {
map.insert(self.ctx.instance_id_for_readback, RasterReadbackResult {
layer_id: pending.layer_id,
time: pending.time,
canvas_width: pending.canvas_width,
canvas_height: pending.canvas_height,
pixels,
});
}
// Canvas is kept alive: the compositor will still blit it
// this frame (painting_canvas is still Some). render_content
// will clear painting_canvas and set pending_canvas_removal,
// so the texture is freed at the top of the next prepare().
}
}
}
}
let mut image_cache = shared.image_cache.lock().unwrap();
let composite_result = lightningbeam_core::renderer::render_document_for_compositing(
@ -558,7 +648,14 @@ impl egui_wgpu::CallbackTrait for VelloCallback {
// Now render and composite each layer incrementally
for rendered_layer in &composite_result.layers {
if !rendered_layer.has_content {
// Check if this raster layer has a live GPU canvas that should be
// blitted every frame, even when no new dabs arrived this frame.
// `painting_canvas` persists for the entire stroke duration.
let gpu_canvas_kf: Option<uuid::Uuid> = self.ctx.painting_canvas
.filter(|(layer_id, _)| *layer_id == rendered_layer.layer_id)
.map(|(_, kf_id)| kf_id);
if !rendered_layer.has_content && gpu_canvas_kf.is_none() {
continue;
}
@ -573,9 +670,42 @@ impl egui_wgpu::CallbackTrait for VelloCallback {
buffer_pool.get_view(hdr_layer_handle),
&instance_resources.hdr_texture_view,
) {
// Render layer scene to sRGB buffer
if let Ok(mut renderer) = shared.renderer.lock() {
renderer.render_to_texture(device, queue, &rendered_layer.scene, srgb_view, &layer_render_params).ok();
// GPU canvas blit path: if a live GPU canvas exists for this
// raster layer, sample it directly instead of rendering the Vello
// scene (which lags until raw_pixels is updated after readback).
let used_gpu_canvas = if let Some(kf_id) = gpu_canvas_kf {
let mut used = false;
if let Ok(gpu_brush) = shared.gpu_brush.lock() {
if let Some(canvas) = gpu_brush.canvases.get(&kf_id) {
let camera = crate::gpu_brush::CameraParams {
pan_x: self.ctx.pan_offset.x,
pan_y: self.ctx.pan_offset.y,
zoom: self.ctx.zoom,
canvas_w: canvas.width as f32,
canvas_h: canvas.height as f32,
viewport_w: width as f32,
viewport_h: height as f32,
_pad: 0.0,
};
shared.canvas_blit.blit(
device, queue,
canvas.src_view(),
srgb_view,
&camera,
);
used = true;
}
}
used
} else {
false
};
if !used_gpu_canvas {
// Render layer scene to sRGB buffer
if let Ok(mut renderer) = shared.renderer.lock() {
renderer.render_to_texture(device, queue, &rendered_layer.scene, srgb_view, &layer_render_params).ok();
}
}
// Convert sRGB to linear HDR
@ -2140,6 +2270,20 @@ pub struct StagePane {
raster_stroke_state: Option<(uuid::Uuid, f64, lightningbeam_core::brush_engine::StrokeState, Vec<u8>)>,
// Last raster stroke point (for incremental segment painting)
raster_last_point: Option<lightningbeam_core::raster_layer::StrokePoint>,
/// GPU dabs computed during this frame's drag event — consumed by prepare().
pending_raster_dabs: Option<PendingRasterDabs>,
/// Undo snapshot info captured at mouse-down; claimed when readback completes.
/// (layer_id, time, canvas_w, canvas_h, buffer_before)
pending_undo_before: Option<(uuid::Uuid, f64, u32, u32, Vec<u8>)>,
/// The (layer_id, keyframe_id) of the raster layer whose GPU canvas is live.
/// Set on mouse-down, cleared when the readback result is consumed.
/// Used every frame to blit the GPU canvas instead of the stale Vello scene.
painting_canvas: Option<(uuid::Uuid, uuid::Uuid)>,
/// Keyframe UUID whose GPU canvas should be removed at the start of the next
/// prepare() call. Set by render_content after consuming the readback result
/// and updating raw_pixels, so the canvas lives one extra composite frame to
/// avoid a flash of the stale Vello scene.
pending_canvas_removal: Option<uuid::Uuid>,
/// Synthetic drag/click override for test mode replay (debug builds only)
#[cfg(debug_assertions)]
replay_override: Option<ReplayDragState>,
@ -2171,6 +2315,46 @@ static INSTANCE_COUNTER: std::sync::atomic::AtomicU64 = std::sync::atomic::Atomi
// Global storage for eyedropper results (instance_id -> (color, color_mode))
static EYEDROPPER_RESULTS: OnceLock<Arc<Mutex<std::collections::HashMap<u64, (egui::Color32, super::ColorMode)>>>> = OnceLock::new();
/// Pending GPU dabs for a single drag event.
///
/// Created by the event handler (`handle_raster_stroke_tool`) and consumed once
/// by `VelloCallback::prepare()`.
struct PendingRasterDabs {
/// Keyframe UUID — indexes the canvas texture pair in `GpuBrushEngine`.
keyframe_id: uuid::Uuid,
/// Layer UUID — used for the undo readback result.
layer_id: uuid::Uuid,
/// Playback time of the keyframe.
time: f64,
/// Canvas dimensions (pixels).
canvas_width: u32,
canvas_height: u32,
/// Raw RGBA pixel data to upload to the canvas texture on the very first dab of
/// a stroke (i.e., when the stroke starts). `None` on subsequent drag events.
initial_pixels: Option<Vec<u8>>,
/// Dab list computed by `BrushEngine::compute_dabs()`.
dabs: Vec<lightningbeam_core::brush_engine::GpuDab>,
/// Union bounding box of `dabs` (x0, y0, x1, y1) in canvas pixel coords.
dab_bbox: (i32, i32, i32, i32),
/// When `true`, perform a full canvas readback after dispatching and store
/// the result in `RASTER_READBACK_RESULTS` so the next frame can create
/// the undo action.
wants_final_readback: bool,
}
/// Result stored by `prepare()` after a stroke-end readback.
struct RasterReadbackResult {
layer_id: uuid::Uuid,
time: f64,
canvas_width: u32,
canvas_height: u32,
/// Raw RGBA pixels from the completed stroke.
pixels: Vec<u8>,
}
// Global storage for raster readback results (instance_id -> result)
static RASTER_READBACK_RESULTS: OnceLock<Arc<Mutex<std::collections::HashMap<u64, RasterReadbackResult>>>> = OnceLock::new();
/// Cached 2x2 stipple image brush for selection overlay.
/// Pattern: [[black, transparent], [transparent, white]]
/// Tiled with nearest-neighbor sampling so each pixel stays crisp.
@ -2217,6 +2401,10 @@ impl StagePane {
current_snap: None,
raster_stroke_state: None,
raster_last_point: None,
pending_raster_dabs: None,
pending_undo_before: None,
painting_canvas: None,
pending_canvas_removal: None,
#[cfg(debug_assertions)]
replay_override: None,
}
@ -4183,9 +4371,12 @@ impl StagePane {
/// Handle raster stroke tool input (Draw/Erase/Smudge on a raster layer).
///
/// Paints incrementally into `document_mut()` on every drag event so the
/// result is visible immediately. On mouse-up the pre/post raw-pixel
/// buffers are wrapped in a `RasterStrokeAction` for undo/redo.
/// Computes GPU dab lists for each drag event and stores them in
/// `self.pending_raster_dabs` for dispatch by `VelloCallback::prepare()`.
///
/// The actual pixel rendering happens on the GPU (compute shader). The CPU
/// only does dab placement arithmetic (cheap). On stroke end a readback is
/// requested so the undo system can capture the final pixel state.
fn handle_raster_stroke_tool(
&mut self,
ui: &mut egui::Ui,
@ -4197,7 +4388,7 @@ impl StagePane {
use lightningbeam_core::tool::ToolState;
use lightningbeam_core::layer::AnyLayer;
use lightningbeam_core::raster_layer::StrokePoint;
use lightningbeam_core::brush_engine::{BrushEngine, StrokeState, image_from_raw};
use lightningbeam_core::brush_engine::{BrushEngine, StrokeState};
use lightningbeam_core::raster_layer::StrokeRecord;
let active_layer_id = match *shared.active_layer_id {
@ -4209,9 +4400,7 @@ impl StagePane {
let is_raster = shared.action_executor.document()
.get_layer(&active_layer_id)
.map_or(false, |l| matches!(l, AnyLayer::Raster(_)));
if !is_raster {
return;
}
if !is_raster { return; }
let brush = {
use lightningbeam_core::brush_settings::BrushSettings;
@ -4235,73 +4424,143 @@ impl StagePane {
[c.r() as f32 / 255.0, c.g() as f32 / 255.0, c.b() as f32 / 255.0, c.a() as f32 / 255.0]
};
// Mouse down: snapshot buffer_before, init stroke state, paint first dab
// ----------------------------------------------------------------
// Mouse down: capture buffer_before, start stroke, compute first dab
// ----------------------------------------------------------------
if self.rsp_drag_started(response) || self.rsp_clicked(response) {
let (doc_width, doc_height, buffer_before) = {
let (doc_width, doc_height) = {
let doc = shared.action_executor.document();
let buf = doc.get_layer(&active_layer_id)
.and_then(|l| if let AnyLayer::Raster(rl) = l {
rl.keyframe_at(*shared.playback_time).map(|kf| kf.raw_pixels.clone())
} else { None })
.unwrap_or_default();
(doc.width as u32, doc.height as u32, buf)
(doc.width as u32, doc.height as u32)
};
// Start a fresh stroke state; MAX distance ensures first point gets a dab
let mut stroke_state = StrokeState::new();
stroke_state.distance_since_last_dab = f32::MAX;
let first_pt = StrokePoint { x: world_pos.x, y: world_pos.y, pressure: 1.0, tilt_x: 0.0, tilt_y: 0.0, timestamp: 0.0 };
// Paint the first dab directly into the document
// Ensure the keyframe exists BEFORE reading its ID, so we always get
// the real UUID. Previously we read the ID first and fell back to a
// randomly-generated UUID when no keyframe existed; that fake UUID was
// stored in painting_canvas but subsequent drag frames used the real UUID
// from keyframe_at(), causing the GPU canvas to be a different object from
// the one being composited.
{
let doc = shared.action_executor.document_mut();
if let Some(AnyLayer::Raster(rl)) = doc.get_layer_mut(&active_layer_id) {
let kf = rl.ensure_keyframe_at(*shared.playback_time, doc_width, doc_height);
let mut img = image_from_raw(std::mem::take(&mut kf.raw_pixels), kf.width, kf.height);
let single = StrokeRecord {
brush_settings: brush.clone(),
color,
blend_mode,
points: vec![first_pt.clone()],
};
BrushEngine::apply_stroke_with_state(&mut img, &single, &mut stroke_state);
kf.raw_pixels = img.into_raw();
rl.ensure_keyframe_at(*shared.playback_time, doc_width, doc_height);
}
}
self.raster_stroke_state = Some((active_layer_id, *shared.playback_time, stroke_state, buffer_before));
// Now read the guaranteed-to-exist keyframe to get the real UUID.
let (keyframe_id, canvas_width, canvas_height, buffer_before, initial_pixels) = {
let doc = shared.action_executor.document();
if let Some(AnyLayer::Raster(rl)) = doc.get_layer(&active_layer_id) {
if let Some(kf) = rl.keyframe_at(*shared.playback_time) {
let raw = kf.raw_pixels.clone();
let init = if raw.is_empty() {
vec![0u8; (kf.width * kf.height * 4) as usize]
} else {
raw.clone()
};
(kf.id, kf.width, kf.height, raw, init)
} else {
return; // shouldn't happen after ensure_keyframe_at
}
} else {
return;
}
};
// Compute the first dab (single-point tap)
let mut stroke_state = StrokeState::new();
stroke_state.distance_since_last_dab = f32::MAX;
let first_pt = StrokePoint {
x: world_pos.x, y: world_pos.y,
pressure: 1.0, tilt_x: 0.0, tilt_y: 0.0, timestamp: 0.0,
};
let single = StrokeRecord {
brush_settings: brush.clone(),
color,
blend_mode,
points: vec![first_pt.clone()],
};
let (dabs, dab_bbox) = BrushEngine::compute_dabs(&single, &mut stroke_state);
self.painting_canvas = Some((active_layer_id, keyframe_id));
self.pending_undo_before = Some((
active_layer_id,
*shared.playback_time,
canvas_width,
canvas_height,
buffer_before,
));
self.pending_raster_dabs = Some(PendingRasterDabs {
keyframe_id,
layer_id: active_layer_id,
time: *shared.playback_time,
canvas_width,
canvas_height,
initial_pixels: Some(initial_pixels),
dabs,
dab_bbox,
wants_final_readback: false,
});
self.raster_stroke_state = Some((
active_layer_id,
*shared.playback_time,
stroke_state,
Vec::new(), // buffer_before now lives in pending_undo_before
));
self.raster_last_point = Some(first_pt);
*shared.tool_state = ToolState::DrawingRasterStroke { points: vec![] };
}
// Mouse drag: paint each new segment immediately
// ----------------------------------------------------------------
// Mouse drag: compute dabs for this segment
// ----------------------------------------------------------------
if self.rsp_dragged(response) {
if let Some((layer_id, time, ref mut stroke_state, _)) = self.raster_stroke_state {
if let Some(prev_pt) = self.raster_last_point.take() {
let curr_pt = StrokePoint { x: world_pos.x, y: world_pos.y, pressure: 1.0, tilt_x: 0.0, tilt_y: 0.0, timestamp: 0.0 };
let curr_pt = StrokePoint {
x: world_pos.x, y: world_pos.y,
pressure: 1.0, tilt_x: 0.0, tilt_y: 0.0, timestamp: 0.0,
};
// Skip if not moved enough
const MIN_DIST_SQ: f32 = 1.5 * 1.5;
let dx = curr_pt.x - prev_pt.x;
let dy = curr_pt.y - prev_pt.y;
let moved_pt = if dx * dx + dy * dy >= MIN_DIST_SQ { curr_pt.clone() } else { prev_pt.clone() };
let moved_pt = if dx * dx + dy * dy >= MIN_DIST_SQ {
curr_pt.clone()
} else {
prev_pt.clone()
};
if dx * dx + dy * dy >= MIN_DIST_SQ {
let doc = shared.action_executor.document_mut();
if let Some(AnyLayer::Raster(rl)) = doc.get_layer_mut(&layer_id) {
if let Some(kf) = rl.keyframe_at_mut(time) {
let mut img = image_from_raw(std::mem::take(&mut kf.raw_pixels), kf.width, kf.height);
let seg = StrokeRecord {
brush_settings: brush.clone(),
color,
blend_mode,
points: vec![prev_pt, curr_pt],
};
BrushEngine::apply_stroke_with_state(&mut img, &seg, stroke_state);
kf.raw_pixels = img.into_raw();
}
}
// Get keyframe info (needed for canvas dimensions)
let (kf_id, kw, kh) = {
let doc = shared.action_executor.document();
if let Some(AnyLayer::Raster(rl)) = doc.get_layer(&layer_id) {
if let Some(kf) = rl.keyframe_at(time) {
(kf.id, kf.width, kf.height)
} else { self.raster_last_point = Some(moved_pt); return; }
} else { self.raster_last_point = Some(moved_pt); return; }
};
let seg = StrokeRecord {
brush_settings: brush.clone(),
color,
blend_mode,
points: vec![prev_pt, curr_pt],
};
let (dabs, dab_bbox) = BrushEngine::compute_dabs(&seg, stroke_state);
self.pending_raster_dabs = Some(PendingRasterDabs {
keyframe_id: kf_id,
layer_id,
time,
canvas_width: kw,
canvas_height: kh,
initial_pixels: None,
dabs,
dab_bbox,
wants_final_readback: false,
});
}
self.raster_last_point = Some(moved_pt);
@ -4309,37 +4568,44 @@ impl StagePane {
}
}
// Mouse up: wrap the pre/post buffers in an undo action
// ----------------------------------------------------------------
// Mouse up: request a full-canvas readback for the undo snapshot
// ----------------------------------------------------------------
if self.rsp_drag_stopped(response)
|| (self.rsp_any_released(ui) && matches!(*shared.tool_state, ToolState::DrawingRasterStroke { .. }))
{
if let Some((layer_id, time, _, buffer_before)) = self.raster_stroke_state.take() {
use lightningbeam_core::actions::RasterStrokeAction;
let (doc_width, doc_height, buffer_after) = {
let doc = shared.action_executor.document();
let buf = doc.get_layer(&layer_id)
.and_then(|l| if let AnyLayer::Raster(rl) = l {
rl.keyframe_at(time).map(|kf| kf.raw_pixels.clone())
} else { None })
.unwrap_or_default();
(doc.width as u32, doc.height as u32, buf)
};
let action = RasterStrokeAction::new(
layer_id,
time,
buffer_before,
buffer_after,
doc_width,
doc_height,
);
// execute is a no-op for the first call (pixels already in document),
// but registers the action in the undo stack
let _ = shared.action_executor.execute(Box::new(action));
}
self.raster_stroke_state = None;
self.raster_last_point = None;
*shared.tool_state = ToolState::Idle;
// Mark the pending dabs (if any this frame) for final readback.
// If there are no pending dabs this frame, create a "readback only" entry.
if let Some(ref mut pending) = self.pending_raster_dabs {
pending.wants_final_readback = true;
} else if let Some((ub_layer, ub_time, ub_cw, ub_ch, _)) =
self.pending_undo_before.as_ref()
{
let (ub_layer, ub_time, ub_cw, ub_ch) = (*ub_layer, *ub_time, *ub_cw, *ub_ch);
// Get keyframe_id for the canvas texture lookup
let kf_id = shared.action_executor.document()
.get_layer(&ub_layer)
.and_then(|l| if let AnyLayer::Raster(rl) = l {
rl.keyframe_at(ub_time).map(|kf| kf.id)
} else { None });
if let Some(kf_id) = kf_id {
self.pending_raster_dabs = Some(PendingRasterDabs {
keyframe_id: kf_id,
layer_id: ub_layer,
time: ub_time,
canvas_width: ub_cw,
canvas_height: ub_ch,
initial_pixels: None,
dabs: Vec::new(),
dab_bbox: (i32::MAX, i32::MAX, i32::MIN, i32::MIN),
wants_final_readback: true,
});
}
}
}
}
@ -6830,6 +7096,35 @@ impl PaneRenderer for StagePane {
self.pan_offset = viewport_center - canvas_center;
}
// Check for completed raster stroke readbacks and create undo actions
if let Ok(mut results) = RASTER_READBACK_RESULTS
.get_or_init(|| Arc::new(Mutex::new(std::collections::HashMap::new())))
.lock() {
if let Some(readback) = results.remove(&self.instance_id) {
if let Some((layer_id, time, w, h, buffer_before)) = self.pending_undo_before.take() {
use lightningbeam_core::actions::RasterStrokeAction;
let action = RasterStrokeAction::new(
layer_id,
time,
buffer_before,
readback.pixels.clone(),
w,
h,
);
// execute() sets raw_pixels = buffer_after so future Vello renders
// and file saves see the completed stroke.
let _ = shared.action_executor.execute(Box::new(action));
}
// raw_pixels is now up to date; switch compositing back to the Vello
// scene. Schedule the GPU canvas for removal at the start of the next
// prepare() — keeping it alive for this frame's composite avoids a
// one-frame flash of the stale Vello scene.
if let Some((_, kf_id)) = self.painting_canvas.take() {
self.pending_canvas_removal = Some(kf_id);
}
}
}
// Check for completed eyedropper samples from GPU readback and apply them
if let Ok(mut results) = EYEDROPPER_RESULTS
.get_or_init(|| Arc::new(Mutex::new(std::collections::HashMap::new())))
@ -7176,6 +7471,10 @@ impl PaneRenderer for StagePane {
region_selection: shared.region_selection.clone(),
mouse_world_pos,
webcam_frame: shared.webcam_frame.clone(),
pending_raster_dabs: self.pending_raster_dabs.take(),
instance_id_for_readback: self.instance_id,
painting_canvas: self.painting_canvas,
pending_canvas_removal: self.pending_canvas_removal.take(),
}};
let cb = egui_wgpu::Callback::new_paint_callback(