// GPU brush dab compute shader. // // Renders all dabs for one stroke segment into the raster canvas. // Uses a ping-pong pair: reads from `canvas_src` (texture_2d) via textureLoad, // writes to `canvas_dst` (storage, write-only). // // `textureSample` is forbidden in compute shaders; bilinear filtering for the // smudge tool is implemented manually using four textureLoad calls. // // Before this dispatch the caller copies `canvas_src` → `canvas_dst` so that pixels // outside the union dab bounding box (not touched by the shader) remain unchanged. // // Dispatch: ceil(bbox_w / 8) × ceil(bbox_h / 8) × 1 // Each thread covers one pixel in the bounding-box-clamped canvas region. // --------------------------------------------------------------------------- // Data layout must match GpuDab in brush_engine.rs (64 bytes, 16-byte aligned). // --------------------------------------------------------------------------- struct GpuDab { x: f32, y: f32, radius: f32, hardness: f32, // bytes 0–15 opacity: f32, color_r: f32, color_g: f32, color_b: f32, // bytes 16–31 color_a: f32, ndx: f32, ndy: f32, smudge_dist: f32, // bytes 32–47 blend_mode: u32, elliptical_dab_ratio: f32, elliptical_dab_angle: f32, lock_alpha: f32, // bytes 48–63 } struct Params { bbox_x0: i32, bbox_y0: i32, bbox_w: u32, bbox_h: u32, num_dabs: u32, canvas_w: u32, canvas_h: u32, _pad: u32, } @group(0) @binding(0) var dabs: array; @group(0) @binding(1) var params: Params; @group(0) @binding(2) var canvas_src: texture_2d; @group(0) @binding(3) var canvas_dst: texture_storage_2d; // --------------------------------------------------------------------------- // Manual bilinear sample from canvas_src at sub-pixel coordinates (px, py). // Out-of-bounds texels clamp to the canvas edge (replicates ClampToEdge). // textureSample is forbidden in compute shaders; we use four textureLoad calls. // --------------------------------------------------------------------------- fn bilinear_sample(px: f32, py: f32) -> vec4 { let cw = i32(params.canvas_w); let ch = i32(params.canvas_h); // Integer coords of the top-left sample let ix = i32(floor(px - 0.5)); let iy = i32(floor(py - 0.5)); // Fractional weights let fx = fract(px - 0.5); let fy = fract(py - 0.5); // Clamp to [0, dim-1] let x0 = clamp(ix, 0, cw - 1); let x1 = clamp(ix + 1, 0, cw - 1); let y0 = clamp(iy, 0, ch - 1); let y1 = clamp(iy + 1, 0, ch - 1); let s00 = textureLoad(canvas_src, vec2(x0, y0), 0); let s10 = textureLoad(canvas_src, vec2(x1, y0), 0); let s01 = textureLoad(canvas_src, vec2(x0, y1), 0); let s11 = textureLoad(canvas_src, vec2(x1, y1), 0); return mix(mix(s00, s10, fx), mix(s01, s11, fx), fy); } // --------------------------------------------------------------------------- // Apply a single dab to `current` and return the updated colour. // --------------------------------------------------------------------------- fn apply_dab(current: vec4, dab: GpuDab, px: i32, py: i32) -> vec4 { let dx = f32(px) + 0.5 - dab.x; let dy = f32(py) + 0.5 - dab.y; // Normalised squared distance — supports circular and elliptical dabs. var rr: f32; if dab.elliptical_dab_ratio > 1.001 { // Rotate into the dab's local frame. // Major axis is along dab.elliptical_dab_angle; minor axis is compressed by ratio. let c = cos(dab.elliptical_dab_angle); let s = sin(dab.elliptical_dab_angle); let dx_r = dx * c + dy * s; // along major axis let dy_r = (-dx * s + dy * c) * dab.elliptical_dab_ratio; // minor axis compressed rr = (dx_r * dx_r + dy_r * dy_r) / (dab.radius * dab.radius); } else { rr = (dx * dx + dy * dy) / (dab.radius * dab.radius); } if rr > 1.0 { return current; } // Quadratic falloff: flat inner core, smooth quadratic outer zone. // r is the actual normalised distance [0,1]; h controls the hard-core radius. // Inner zone (r ≤ h): fully opaque. // Outer zone (r > h): opa = ((1-r)/(1-h))^2, giving a smooth bell-shaped dab. let h = clamp(dab.hardness, 0.0, 1.0); let r = sqrt(rr); var opa_weight: f32; if h >= 1.0 || r <= h { opa_weight = 1.0; } else { let t = (1.0 - r) / (1.0 - h); opa_weight = t * t; } if dab.blend_mode == 0u { // Normal: "over" operator on premultiplied RGBA. // If lock_alpha > 0.5, preserve the destination alpha unchanged. let dab_a = opa_weight * dab.opacity * dab.color_a; if dab_a <= 0.0 { return current; } let ba = 1.0 - dab_a; let out_a = select(dab_a + ba * current.a, current.a, dab.lock_alpha > 0.5); return vec4( dab_a * dab.color_r + ba * current.r, dab_a * dab.color_g + ba * current.g, dab_a * dab.color_b + ba * current.b, out_a, ); } else if dab.blend_mode == 1u { // Erase: multiplicative alpha reduction let dab_a = opa_weight * dab.opacity * dab.color_a; if dab_a <= 0.0 { return current; } let new_a = current.a * (1.0 - dab_a); let scale = select(0.0, new_a / current.a, current.a > 1e-6); return vec4(current.r * scale, current.g * scale, current.b * scale, new_a); } else if dab.blend_mode == 2u { // Smudge: directional warp — sample from position behind the stroke direction let alpha = opa_weight * dab.opacity; if alpha <= 0.0 { return current; } let src_x = f32(px) + 0.5 - dab.ndx * dab.smudge_dist; let src_y = f32(py) + 0.5 - dab.ndy * dab.smudge_dist; let src = bilinear_sample(src_x, src_y); let da = 1.0 - alpha; return vec4( alpha * src.r + da * current.r, alpha * src.g + da * current.g, alpha * src.b + da * current.b, alpha * src.a + da * current.a, ); } else if dab.blend_mode == 3u { // Clone stamp: sample from (this_pixel + offset) in the source canvas. // color_r/color_g store the world-space offset (source_world - drag_start_world) // computed once when the stroke begins. Each pixel samples its own source texel. let alpha = opa_weight * dab.opacity; if alpha <= 0.0 { return current; } let src_x = f32(px) + 0.5 + dab.color_r; let src_y = f32(py) + 0.5 + dab.color_g; let src = bilinear_sample(src_x, src_y); let ba = 1.0 - alpha; return vec4( alpha * src.r + ba * current.r, alpha * src.g + ba * current.g, alpha * src.b + ba * current.b, alpha * src.a + ba * current.a, ); } else if dab.blend_mode == 5u { // Pattern stamp: procedural tiling pattern using brush color. // ndx = pattern_type (0=Checker, 1=Dots, 2=H-Lines, 3=V-Lines, 4=Diagonal, 5=Crosshatch) // ndy = pattern_scale (tile size in pixels, >= 1.0) let scale = max(dab.ndy, 1.0); let pt = u32(dab.ndx); // Fractional position within the tile [0.0, 1.0) let tx = fract(f32(px) / scale); let ty = fract(f32(py) / scale); var on: bool; if pt == 0u { // Checkerboard let cx = u32(floor(f32(px) / scale)); let cy = u32(floor(f32(py) / scale)); on = (cx + cy) % 2u == 0u; } else if pt == 1u { // Polka dots (r ≈ 0.35 of cell radius) let ddx = tx - 0.5; let ddy = ty - 0.5; on = ddx * ddx + ddy * ddy < 0.1225; } else if pt == 2u { // Horizontal lines (50% duty) on = ty < 0.5; } else if pt == 3u { // Vertical lines (50% duty) on = tx < 0.5; } else if pt == 4u { // Diagonal \ (top-left → bottom-right) on = fract((f32(px) + f32(py)) / scale) < 0.5; } else if pt == 5u { // Diagonal / (top-right → bottom-left) on = fract((f32(px) - f32(py)) / scale) < 0.5; } else { // Crosshatch (type 6+) on = tx < 0.4 || ty < 0.4; } if !on { return current; } // Paint with brush color — same compositing as Normal blend let dab_a = opa_weight * dab.opacity * dab.color_a; if dab_a <= 0.0 { return current; } let ba = 1.0 - dab_a; return vec4( dab_a * dab.color_r + ba * current.r, dab_a * dab.color_g + ba * current.g, dab_a * dab.color_b + ba * current.b, dab_a + ba * current.a, ); } else if dab.blend_mode == 4u { // Healing brush: per-pixel color-corrected clone stamp. // color_r/color_g = source offset (ox, oy), same as clone stamp. // For each pixel: result = src_pixel + (local_dest_mean - local_src_mean) // Means are computed from 4 cardinal neighbors at ±half-radius — per-pixel, no banding. let alpha = opa_weight * dab.opacity; if alpha <= 0.0 { return current; } let cw = i32(params.canvas_w); let ch = i32(params.canvas_h); let ox = dab.color_r; let oy = dab.color_g; let hr = max(dab.radius * 0.5, 1.0); let ihr = i32(hr); // Per-pixel DESTINATION mean: 4 cardinal neighbors from canvas_src (pre-batch state) let d_n = textureLoad(canvas_src, vec2(px, clamp(py - ihr, 0, ch - 1)), 0); let d_s = textureLoad(canvas_src, vec2(px, clamp(py + ihr, 0, ch - 1)), 0); let d_w = textureLoad(canvas_src, vec2(clamp(px - ihr, 0, cw - 1), py ), 0); let d_e = textureLoad(canvas_src, vec2(clamp(px + ihr, 0, cw - 1), py ), 0); let d_mean = (d_n + d_s + d_w + d_e) * 0.25; // Per-pixel SOURCE mean: 4 cardinal neighbors at offset position (bilinear for sub-pixel offsets) let spx = f32(px) + 0.5 + ox; let spy = f32(py) + 0.5 + oy; let s_mean = (bilinear_sample(spx, spy - hr) + bilinear_sample(spx, spy + hr) + bilinear_sample(spx - hr, spy ) + bilinear_sample(spx + hr, spy )) * 0.25; // Source pixel + color correction let s_pixel = bilinear_sample(spx, spy); let corrected = clamp(s_pixel + (d_mean - s_mean), vec4(0.0), vec4(1.0)); let ba = 1.0 - alpha; return vec4( alpha * corrected.r + ba * current.r, alpha * corrected.g + ba * current.g, alpha * corrected.b + ba * current.b, alpha * corrected.a + ba * current.a, ); } else if dab.blend_mode == 6u { // Dodge / Burn: power-curve exposure adjustment. // color_r: 0.0 = dodge, 1.0 = burn // Uses pow(channel, gamma) which is asymmetric across channels: // burn (gamma > 1): low channels compressed toward 0 faster than high ones → saturation increases // dodge (gamma < 1): low channels lifted faster than high ones → saturation decreases // This matches the behaviour of GIMP / Photoshop dodge-burn tools. let s = opa_weight * dab.opacity; if s <= 0.0 { return current; } let rgb = max(current.rgb, vec3(0.0)); var adjusted: vec3; if dab.color_r < 0.5 { // Dodge: gamma < 1 → brightens adjusted = pow(rgb, vec3(max(1.0 - s, 0.001))); } else { // Burn: gamma > 1 → darkens and increases saturation adjusted = pow(rgb, vec3(1.0 + s)); } return vec4(clamp(adjusted, vec3(0.0), vec3(1.0)), current.a); } else if dab.blend_mode == 7u { // Sponge: saturate or desaturate existing pixels. // color_r: 0.0 = saturate, 1.0 = desaturate // Computes luminance, then moves RGB toward (desaturate) or away from (saturate) it. let s = opa_weight * dab.opacity; if s <= 0.0 { return current; } let luma = dot(current.rgb, vec3(0.2126, 0.7152, 0.0722)); let luma_vec = vec3(luma); var adjusted: vec3; if dab.color_r < 0.5 { // Saturate: push RGB away from luma (increase chroma) adjusted = clamp(current.rgb + s * (current.rgb - luma_vec), vec3(0.0), vec3(1.0)); } else { // Desaturate: blend RGB toward luma adjusted = mix(current.rgb, luma_vec, s); } return vec4(adjusted, current.a); } else if dab.blend_mode == 8u { // Blur / Sharpen: 5×5 separable Gaussian kernel. // color_r: 0.0 = blur, 1.0 = sharpen // ndx: kernel radius in canvas pixels (> 0) // // Samples are placed on a grid at ±step and ±2*step per axis, where step = kr/2. // Weights are exp(-x²/2σ²) with σ = step, factored as a separable product. // This gives a true Gaussian falloff rather than a flat ring, so edges blend // into a smooth gradient rather than a flat averaged zone. let s = opa_weight * dab.opacity; if s <= 0.0 { return current; } let kr = max(dab.ndx, 1.0); let cx2 = f32(px) + 0.5; let cy2 = f32(py) + 0.5; let step = kr * 0.5; // 1-D Gaussian weights at distances 0, ±step, ±2*step (σ = step): // exp(0) = 1.0, exp(-0.5) ≈ 0.6065, exp(-2.0) ≈ 0.1353 var gauss = array(0.1353, 0.6065, 1.0, 0.6065, 0.1353); var blur_sum = vec4(0.0); var blur_w = 0.0; for (var iy = 0; iy < 5; iy++) { for (var ix = 0; ix < 5; ix++) { let w = gauss[ix] * gauss[iy]; let spx = cx2 + (f32(ix) - 2.0) * step; let spy = cy2 + (f32(iy) - 2.0) * step; blur_sum += bilinear_sample(spx, spy) * w; blur_w += w; } } let blurred = blur_sum / blur_w; let c = textureLoad(canvas_src, vec2(px, py), 0); var result: vec4; if dab.color_r < 0.5 { // Blur: blend current toward the Gaussian-weighted local average. result = mix(current, blurred, s); } else { // Sharpen: unsharp mask — push pixel away from the local average. // sharpened = 2*src - blurred → highlights diverge, shadows diverge. let sharpened = clamp(c * 2.0 - blurred, vec4(0.0), vec4(1.0)); result = mix(current, sharpened, s); } return result; } else { return current; } } // --------------------------------------------------------------------------- // Main entry point // --------------------------------------------------------------------------- @compute @workgroup_size(8, 8) fn main(@builtin(global_invocation_id) gid: vec3) { // Bounds check within the bounding box if gid.x >= params.bbox_w || gid.y >= params.bbox_h { return; } let px = i32(gid.x) + params.bbox_x0; let py = i32(gid.y) + params.bbox_y0; // Bounds check within the canvas (bbox may extend past canvas edges) if px < 0 || py < 0 || u32(px) >= params.canvas_w || u32(py) >= params.canvas_h { return; } // Read current pixel from source (canvas_dst was pre-filled from canvas_src // by the caller, but we read from canvas_src to ensure consistency) var current = textureLoad(canvas_src, vec2(px, py), 0); // Apply all dabs for this frame (sequential in the thread, no races between threads // since each thread owns a unique output pixel) for (var i = 0u; i < params.num_dabs; i++) { current = apply_dab(current, dabs[i], px, py); } textureStore(canvas_dst, vec2(px, py), current); }