Lightningbeam/lightningbeam-ui/lightningbeam-editor/src/panes/shaders/brush_dab.wgsl

// GPU brush dab compute shader.
//
// Renders all dabs for one stroke segment into the raster canvas.
// Uses a ping-pong pair: reads from `canvas_src` (texture_2d) via textureLoad,
// writes to `canvas_dst` (storage, write-only).
//
// `textureSample` is forbidden in compute shaders; bilinear filtering for the
// smudge tool is implemented manually using four textureLoad calls.
//
// Before this dispatch the caller copies `canvas_src` → `canvas_dst` so that pixels
// outside the union dab bounding box (not touched by the shader) remain unchanged.
//
// Dispatch: ceil(bbox_w / 8) × ceil(bbox_h / 8) × 1
// Each thread covers one pixel in the bounding-box-clamped canvas region.

// ---------------------------------------------------------------------------
// Data layout must match GpuDab in brush_engine.rs (64 bytes, 16-byte aligned).
// ---------------------------------------------------------------------------
struct GpuDab {
    x: f32, y: f32, radius: f32, hardness: f32,             // bytes  0–15
    opacity: f32, color_r: f32, color_g: f32, color_b: f32, // bytes 16–31
    color_a: f32, ndx: f32, ndy: f32, smudge_dist: f32,     // bytes 32–47
    blend_mode: u32, elliptical_dab_ratio: f32, elliptical_dab_angle: f32, lock_alpha: f32, // bytes 48–63
}

struct Params {
    bbox_x0:  i32,
    bbox_y0:  i32,
    bbox_w:   u32,
    bbox_h:   u32,
    num_dabs: u32,
    canvas_w: u32,
    canvas_h: u32,
    _pad:     u32,
}

@group(0) @binding(0) var<storage, read> dabs:       array<GpuDab>;
@group(0) @binding(1) var<uniform>       params:     Params;
@group(0) @binding(2) var               canvas_src: texture_2d<f32>;
@group(0) @binding(3) var               canvas_dst: texture_storage_2d<rgba8unorm, write>;

// ---------------------------------------------------------------------------
// Manual bilinear sample from canvas_src at sub-pixel coordinates (px, py).
// Out-of-bounds texels clamp to the canvas edge (replicates ClampToEdge).
// textureSample is forbidden in compute shaders; we use four textureLoad calls.
// ---------------------------------------------------------------------------
fn bilinear_sample(px: f32, py: f32) -> vec4<f32> {
    let cw = i32(params.canvas_w);
    let ch = i32(params.canvas_h);

    // Integer coords of the top-left sample
    let ix = i32(floor(px - 0.5));
    let iy = i32(floor(py - 0.5));

    // Fractional weights
    let fx = fract(px - 0.5);
    let fy = fract(py - 0.5);

    // Clamp to [0, dim-1]
    let x0 = clamp(ix,     0, cw - 1);
    let x1 = clamp(ix + 1, 0, cw - 1);
    let y0 = clamp(iy,     0, ch - 1);
    let y1 = clamp(iy + 1, 0, ch - 1);

    let s00 = textureLoad(canvas_src, vec2<i32>(x0, y0), 0);
    let s10 = textureLoad(canvas_src, vec2<i32>(x1, y0), 0);
    let s01 = textureLoad(canvas_src, vec2<i32>(x0, y1), 0);
    let s11 = textureLoad(canvas_src, vec2<i32>(x1, y1), 0);

    return mix(mix(s00, s10, fx), mix(s01, s11, fx), fy);
}

// ---------------------------------------------------------------------------
// Apply a single dab to `current` and return the updated colour.
// ---------------------------------------------------------------------------
fn apply_dab(current: vec4<f32>, dab: GpuDab, px: i32, py: i32) -> vec4<f32> {
    let dx = f32(px) + 0.5 - dab.x;
    let dy = f32(py) + 0.5 - dab.y;

    // Normalised squared distance — supports circular and elliptical dabs.
    var rr: f32;
    if dab.elliptical_dab_ratio > 1.001 {
        // Rotate into the dab's local frame.
        // Major axis is along dab.elliptical_dab_angle; minor axis is compressed by ratio.
        let c = cos(dab.elliptical_dab_angle);
        let s = sin(dab.elliptical_dab_angle);
        let dx_r =  dx * c + dy * s;                               // along major axis
        let dy_r = (-dx * s + dy * c) * dab.elliptical_dab_ratio;  // minor axis compressed
        rr = (dx_r * dx_r + dy_r * dy_r) / (dab.radius * dab.radius);
    } else {
        rr = (dx * dx + dy * dy) / (dab.radius * dab.radius);
    }
    if rr > 1.0 { return current; }

    // Quadratic falloff: flat inner core, smooth quadratic outer zone.
    // r is the actual normalised distance [0,1]; h controls the hard-core radius.
    // Inner zone (r ≤ h): fully opaque.
    // Outer zone (r > h): opa = ((1-r)/(1-h))^2, giving a smooth bell-shaped dab.
    let h = clamp(dab.hardness, 0.0, 1.0);
    let r = sqrt(rr);
    var opa_weight: f32;
    if h >= 1.0 || r <= h {
        opa_weight = 1.0;
    } else {
        let t = (1.0 - r) / (1.0 - h);
        opa_weight = t * t;
    }

    if dab.blend_mode == 0u {
        // Normal: "over" operator on premultiplied RGBA.
        // If lock_alpha > 0.5, preserve the destination alpha unchanged.
        let dab_a = opa_weight * dab.opacity * dab.color_a;
        if dab_a <= 0.0 { return current; }
        let ba = 1.0 - dab_a;
        let out_a = select(dab_a + ba * current.a, current.a, dab.lock_alpha > 0.5);
        return vec4<f32>(
            dab_a * dab.color_r + ba * current.r,
            dab_a * dab.color_g + ba * current.g,
            dab_a * dab.color_b + ba * current.b,
            out_a,
        );
    } else if dab.blend_mode == 1u {
        // Erase: multiplicative alpha reduction
        let dab_a = opa_weight * dab.opacity * dab.color_a;
        if dab_a <= 0.0 { return current; }
        let new_a = current.a * (1.0 - dab_a);
        let scale = select(0.0, new_a / current.a, current.a > 1e-6);
        return vec4<f32>(current.r * scale, current.g * scale, current.b * scale, new_a);
    } else if dab.blend_mode == 2u {
        // Smudge: directional warp — sample from position behind the stroke direction
        let alpha = opa_weight * dab.opacity;
        if alpha <= 0.0 { return current; }
        let src_x = f32(px) + 0.5 - dab.ndx * dab.smudge_dist;
        let src_y = f32(py) + 0.5 - dab.ndy * dab.smudge_dist;
        let src   = bilinear_sample(src_x, src_y);
        let da    = 1.0 - alpha;
        return vec4<f32>(
            alpha * src.r + da * current.r,
            alpha * src.g + da * current.g,
            alpha * src.b + da * current.b,
            alpha * src.a + da * current.a,
        );
    } else if dab.blend_mode == 3u {
        // Clone stamp: sample from (this_pixel + offset) in the source canvas.
        // color_r/color_g store the world-space offset (source_world - drag_start_world)
        // computed once when the stroke begins. Each pixel samples its own source texel.
        let alpha = opa_weight * dab.opacity;
        if alpha <= 0.0 { return current; }
        let src_x = f32(px) + 0.5 + dab.color_r;
        let src_y = f32(py) + 0.5 + dab.color_g;
        let src = bilinear_sample(src_x, src_y);
        let ba  = 1.0 - alpha;
        return vec4<f32>(
            alpha * src.r + ba * current.r,
            alpha * src.g + ba * current.g,
            alpha * src.b + ba * current.b,
            alpha * src.a + ba * current.a,
        );
    } else if dab.blend_mode == 5u {
        // Pattern stamp: procedural tiling pattern using brush color.
        // ndx = pattern_type (0=Checker, 1=Dots, 2=H-Lines, 3=V-Lines, 4=Diagonal, 5=Crosshatch)
        // ndy = pattern_scale (tile size in pixels, >= 1.0)
        let scale = max(dab.ndy, 1.0);
        let pt    = u32(dab.ndx);

        // Fractional position within the tile [0.0, 1.0)
        let tx = fract(f32(px) / scale);
        let ty = fract(f32(py) / scale);

        var on: bool;
        if pt == 0u {           // Checkerboard
            let cx = u32(floor(f32(px) / scale));
            let cy = u32(floor(f32(py) / scale));
            on = (cx + cy) % 2u == 0u;
        } else if pt == 1u {    // Polka dots (r ≈ 0.35 of cell radius)
            let ddx = tx - 0.5; let ddy = ty - 0.5;
            on = ddx * ddx + ddy * ddy < 0.1225;
        } else if pt == 2u {    // Horizontal lines (50% duty)
            on = ty < 0.5;
        } else if pt == 3u {    // Vertical lines (50% duty)
            on = tx < 0.5;
        } else if pt == 4u {    // Diagonal \ (top-left → bottom-right)
            on = fract((f32(px) + f32(py)) / scale) < 0.5;
        } else if pt == 5u {    // Diagonal / (top-right → bottom-left)
            on = fract((f32(px) - f32(py)) / scale) < 0.5;
        } else {                // Crosshatch (type 6+)
            on = tx < 0.4 || ty < 0.4;
        }

        if !on { return current; }

        // Paint with brush color — same compositing as Normal blend
        let dab_a = opa_weight * dab.opacity * dab.color_a;
        if dab_a <= 0.0 { return current; }
        let ba = 1.0 - dab_a;
        return vec4<f32>(
            dab_a * dab.color_r + ba * current.r,
            dab_a * dab.color_g + ba * current.g,
            dab_a * dab.color_b + ba * current.b,
            dab_a + ba * current.a,
        );
    } else if dab.blend_mode == 4u {
        // Healing brush: per-pixel color-corrected clone stamp.
        // color_r/color_g = source offset (ox, oy), same as clone stamp.
        // For each pixel: result = src_pixel + (local_dest_mean - local_src_mean)
        // Means are computed from 4 cardinal neighbors at ±half-radius — per-pixel, no banding.
        let alpha = opa_weight * dab.opacity;
        if alpha <= 0.0 { return current; }

        let cw = i32(params.canvas_w);
        let ch = i32(params.canvas_h);
        let ox = dab.color_r;
        let oy = dab.color_g;
        let hr = max(dab.radius * 0.5, 1.0);
        let ihr = i32(hr);

        // Per-pixel DESTINATION mean: 4 cardinal neighbors from canvas_src (pre-batch state)
        let d_n = textureLoad(canvas_src, vec2<i32>(px,                      clamp(py - ihr, 0, ch - 1)), 0);
        let d_s = textureLoad(canvas_src, vec2<i32>(px,                      clamp(py + ihr, 0, ch - 1)), 0);
        let d_w = textureLoad(canvas_src, vec2<i32>(clamp(px - ihr, 0, cw - 1), py                     ), 0);
        let d_e = textureLoad(canvas_src, vec2<i32>(clamp(px + ihr, 0, cw - 1), py                     ), 0);
        let d_mean = (d_n + d_s + d_w + d_e) * 0.25;

        // Per-pixel SOURCE mean: 4 cardinal neighbors at offset position (bilinear for sub-pixel offsets)
        let spx = f32(px) + 0.5 + ox;
        let spy = f32(py) + 0.5 + oy;
        let s_mean = (bilinear_sample(spx,        spy - hr)
                   + bilinear_sample(spx,        spy + hr)
                   + bilinear_sample(spx - hr,   spy     )
                   + bilinear_sample(spx + hr,   spy     )) * 0.25;

        // Source pixel + color correction
        let s_pixel   = bilinear_sample(spx, spy);
        let corrected = clamp(s_pixel + (d_mean - s_mean), vec4<f32>(0.0), vec4<f32>(1.0));

        let ba = 1.0 - alpha;
        return vec4<f32>(
            alpha * corrected.r + ba * current.r,
            alpha * corrected.g + ba * current.g,
            alpha * corrected.b + ba * current.b,
            alpha * corrected.a + ba * current.a,
        );
    } else if dab.blend_mode == 6u {
        // Dodge / Burn: power-curve exposure adjustment.
        // color_r: 0.0 = dodge, 1.0 = burn
        // Uses pow(channel, gamma) which is asymmetric across channels:
        //   burn  (gamma > 1): low channels compressed toward 0 faster than high ones → saturation increases
        //   dodge (gamma < 1): low channels lifted faster than high ones → saturation decreases
        // This matches the behaviour of GIMP / Photoshop dodge-burn tools.
        let s = opa_weight * dab.opacity;
        if s <= 0.0 { return current; }

        let rgb = max(current.rgb, vec3<f32>(0.0));
        var adjusted: vec3<f32>;
        if dab.color_r < 0.5 {
            // Dodge: gamma < 1 → brightens
            adjusted = pow(rgb, vec3<f32>(max(1.0 - s, 0.001)));
        } else {
            // Burn: gamma > 1 → darkens and increases saturation
            adjusted = pow(rgb, vec3<f32>(1.0 + s));
        }
        return vec4<f32>(clamp(adjusted, vec3<f32>(0.0), vec3<f32>(1.0)), current.a);
    } else if dab.blend_mode == 7u {
        // Sponge: saturate or desaturate existing pixels.
        // color_r: 0.0 = saturate, 1.0 = desaturate
        // Computes luminance, then moves RGB toward (desaturate) or away from (saturate) it.
        let s = opa_weight * dab.opacity;
        if s <= 0.0 { return current; }

        let luma = dot(current.rgb, vec3<f32>(0.2126, 0.7152, 0.0722));
        let luma_vec = vec3<f32>(luma);
        var adjusted: vec3<f32>;
        if dab.color_r < 0.5 {
            // Saturate: push RGB away from luma (increase chroma)
            adjusted = clamp(current.rgb + s * (current.rgb - luma_vec), vec3<f32>(0.0), vec3<f32>(1.0));
        } else {
            // Desaturate: blend RGB toward luma
            adjusted = mix(current.rgb, luma_vec, s);
        }
        return vec4<f32>(adjusted, current.a);
    } else if dab.blend_mode == 8u {
        // Blur / Sharpen: 5×5 separable Gaussian kernel.
        // color_r: 0.0 = blur, 1.0 = sharpen
        // ndx:     kernel radius in canvas pixels (> 0)
        //
        // Samples are placed on a grid at ±step and ±2*step per axis, where step = kr/2.
        // Weights are exp(-x²/2σ²) with σ = step, factored as a separable product.
        // This gives a true Gaussian falloff rather than a flat ring, so edges blend
        // into a smooth gradient rather than a flat averaged zone.
        let s = opa_weight * dab.opacity;
        if s <= 0.0 { return current; }

        let kr   = max(dab.ndx, 1.0);
        let cx2  = f32(px) + 0.5;
        let cy2  = f32(py) + 0.5;
        let step = kr * 0.5;

        // 1-D Gaussian weights at distances 0, ±step, ±2*step  (σ = step):
        //   exp(0) = 1.0,  exp(-0.5) ≈ 0.6065,  exp(-2.0) ≈ 0.1353
        var gauss = array<f32, 5>(0.1353, 0.6065, 1.0, 0.6065, 0.1353);

        var blur_sum = vec4<f32>(0.0);
        var blur_w   = 0.0;
        for (var iy = 0; iy < 5; iy++) {
            for (var ix = 0; ix < 5; ix++) {
                let w   = gauss[ix] * gauss[iy];
                let spx = cx2 + (f32(ix) - 2.0) * step;
                let spy = cy2 + (f32(iy) - 2.0) * step;
                blur_sum += bilinear_sample(spx, spy) * w;
                blur_w   += w;
            }
        }
        let blurred = blur_sum / blur_w;

        let c = textureLoad(canvas_src, vec2<i32>(px, py), 0);
        var result: vec4<f32>;
        if dab.color_r < 0.5 {
            // Blur: blend current toward the Gaussian-weighted local average.
            result = mix(current, blurred, s);
        } else {
            // Sharpen: unsharp mask — push pixel away from the local average.
            // sharpened = 2*src - blurred  →  highlights diverge, shadows diverge.
            let sharpened = clamp(c * 2.0 - blurred, vec4<f32>(0.0), vec4<f32>(1.0));
            result = mix(current, sharpened, s);
        }
        return result;
    } else {
        return current;
    }
}

// ---------------------------------------------------------------------------
// Main entry point
// ---------------------------------------------------------------------------
@compute @workgroup_size(8, 8)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
    // Bounds check within the bounding box
    if gid.x >= params.bbox_w || gid.y >= params.bbox_h { return; }

    let px = i32(gid.x) + params.bbox_x0;
    let py = i32(gid.y) + params.bbox_y0;

    // Bounds check within the canvas (bbox may extend past canvas edges)
    if px < 0 || py < 0 || u32(px) >= params.canvas_w || u32(py) >= params.canvas_h { return; }

    // Read current pixel from source (canvas_dst was pre-filled from canvas_src
    // by the caller, but we read from canvas_src to ensure consistency)
    var current = textureLoad(canvas_src, vec2<i32>(px, py), 0);

    // Apply all dabs for this frame (sequential in the thread, no races between threads
    // since each thread owns a unique output pixel)
    for (var i = 0u; i < params.num_dabs; i++) {
        current = apply_dab(current, dabs[i], px, py);
    }

    textureStore(canvas_dst, vec2<i32>(px, py), current);
}