vello/shader/draw_leaf.wgsl
Arman Uguray a016fc19de [draw_leaf] Don't write past the end of the draw_monoids buffer
The number of global invocations for draw_leaf can exceed the size of
the draw_monoids buffer which gets conservatively set to the number of
draw objects. Added an explicit bounds check to prevent the invalid
write.

This is not an issue when targeting wgpu as the WGSL compiler emits
implicit bounds checking. When targeting Metal, we disable implicit
bounds checks as that requires an extra buffer binding containing buffer
sizes.

This was caught by Xcode's Metal shader validation and resulted in
visual artifacts in native rendering.
2023-06-28 12:39:24 -07:00

264 lines
10 KiB
WebGPU Shading Language

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Finish prefix sum of drawtags, decode draw objects.
#import config
#import clip
#import drawtag
#import bbox
#import transform
@group(0) @binding(0)
var<uniform> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> reduced: array<DrawMonoid>;
@group(0) @binding(3)
var<storage> path_bbox: array<PathBbox>;
@group(0) @binding(4)
var<storage, read_write> draw_monoid: array<DrawMonoid>;
@group(0) @binding(5)
var<storage, read_write> info: array<u32>;
@group(0) @binding(6)
var<storage, read_write> clip_inp: array<ClipInp>;
let WG_SIZE = 256u;
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base + 1u]);
let c2 = bitcast<f32>(scene[base + 2u]);
let c3 = bitcast<f32>(scene[base + 3u]);
let c4 = bitcast<f32>(scene[base + 4u]);
let c5 = bitcast<f32>(scene[base + 5u]);
let matrx = vec4(c0, c1, c2, c3);
let translate = vec2(c4, c5);
return Transform(matrx, translate);
}
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let ix = global_id.x;
// Reduce prefix of workgroups up to this one
var agg = draw_monoid_identity();
if local_id.x < wg_id.x {
agg = reduced[local_id.x];
}
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
// Two barriers can be eliminated if we use separate shared arrays
// for prefix and intra-workgroup prefix sum.
workgroupBarrier();
var m = sh_scratch[0];
workgroupBarrier();
let tag_word = scene[config.drawtag_base + ix];
agg = map_draw_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x >= 1u << i {
let other = sh_scratch[local_id.x - (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
workgroupBarrier();
if local_id.x > 0u {
m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]);
}
// m now contains exclusive prefix sum of draw monoid
if ix < config.n_drawobj {
draw_monoid[ix] = m;
}
let dd = config.drawdata_base + m.scene_offset;
let di = m.info_offset;
if tag_word == DRAWTAG_FILL_COLOR || tag_word == DRAWTAG_FILL_LIN_GRADIENT ||
tag_word == DRAWTAG_FILL_RAD_GRADIENT || tag_word == DRAWTAG_FILL_IMAGE ||
tag_word == DRAWTAG_BEGIN_CLIP
{
let bbox = path_bbox[m.path_ix];
// TODO: bbox is mostly yagni here, sort that out. Maybe clips?
// let x0 = f32(bbox.x0);
// let y0 = f32(bbox.y0);
// let x1 = f32(bbox.x1);
// let y1 = f32(bbox.y1);
// let bbox_f = vec4(x0, y0, x1, y1);
let fill_mode = u32(bbox.linewidth >= 0.0);
var transform = Transform();
var linewidth = bbox.linewidth;
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT ||
tag_word == DRAWTAG_FILL_IMAGE
{
transform = read_transform(config.transform_base, bbox.trans_ix);
}
if linewidth >= 0.0 {
// Note: doesn't deal with anisotropic case
let matrx = transform.matrx;
linewidth *= sqrt(abs(matrx.x * matrx.w - matrx.y * matrx.z));
}
switch tag_word {
// DRAWTAG_FILL_COLOR
case 0x44u: {
info[di] = bitcast<u32>(linewidth);
}
// DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
p0 = transform_apply(transform, p0);
p1 = transform_apply(transform, p1);
let dxy = p1 - p0;
let scale = 1.0 / dot(dxy, dxy);
let line_xy = dxy * scale;
let line_c = -dot(p0, line_xy);
info[di + 1u] = bitcast<u32>(line_xy.x);
info[di + 2u] = bitcast<u32>(line_xy.y);
info[di + 3u] = bitcast<u32>(line_c);
}
// DRAWTAG_FILL_RAD_GRADIENT
case 0x29cu: {
// Two-point conical gradient implementation based
// on the algorithm at <https://skia.org/docs/dev/design/conical/>
// This epsilon matches what Skia uses
let GRADIENT_EPSILON = 1.0 / f32(1 << 12u);
info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
var r0 = bitcast<f32>(scene[dd + 5u]);
var r1 = bitcast<f32>(scene[dd + 6u]);
let user_to_gradient = transform_inverse(transform);
// Output variables
var xform = Transform();
var focal_x = 0.0;
var radius = 0.0;
var kind = 0u;
var flags = 0u;
if abs(r0 - r1) <= GRADIENT_EPSILON {
// When the radii are the same, emit a strip gradient
kind = RAD_GRAD_KIND_STRIP;
let scaled = r0 / distance(p0, p1);
xform = transform_mul(
two_point_to_unit_line(p0, p1),
user_to_gradient
);
radius = scaled * scaled;
} else {
// Assume a two point conical gradient unless the centers
// are equal.
kind = RAD_GRAD_KIND_CONE;
if all(p0 == p1) {
kind = RAD_GRAD_KIND_CIRCULAR;
// Nudge p0 a bit to avoid denormals.
p0 += GRADIENT_EPSILON;
}
if r1 == 0.0 {
// If r1 == 0.0, swap the points and radii
flags |= RAD_GRAD_SWAPPED;
let tmp_p = p0;
p0 = p1;
p1 = tmp_p;
let tmp_r = r0;
r0 = r1;
r1 = tmp_r;
}
focal_x = r0 / (r0 - r1);
let cf = (1.0 - focal_x) * p0 + focal_x * p1;
radius = r1 / (distance(cf, p1));
let user_to_unit_line = transform_mul(
two_point_to_unit_line(cf, p1),
user_to_gradient
);
var user_to_scaled = user_to_unit_line;
// When r == 1.0, focal point is on circle
if abs(radius - 1.0) <= GRADIENT_EPSILON {
kind = RAD_GRAD_KIND_FOCAL_ON_CIRCLE;
let scale = 0.5 * abs(1.0 - focal_x);
user_to_scaled = transform_mul(
Transform(vec4(scale, 0.0, 0.0, scale), vec2(0.0)),
user_to_unit_line
);
} else {
let a = radius * radius - 1.0;
let scale_ratio = abs(1.0 - focal_x) / a;
let scale_x = radius * scale_ratio;
let scale_y = sqrt(abs(a)) * scale_ratio;
user_to_scaled = transform_mul(
Transform(vec4(scale_x, 0.0, 0.0, scale_y), vec2(0.0)),
user_to_unit_line
);
}
xform = user_to_scaled;
}
info[di + 1u] = bitcast<u32>(xform.matrx.x);
info[di + 2u] = bitcast<u32>(xform.matrx.y);
info[di + 3u] = bitcast<u32>(xform.matrx.z);
info[di + 4u] = bitcast<u32>(xform.matrx.w);
info[di + 5u] = bitcast<u32>(xform.translate.x);
info[di + 6u] = bitcast<u32>(xform.translate.y);
info[di + 7u] = bitcast<u32>(focal_x);
info[di + 8u] = bitcast<u32>(radius);
info[di + 9u] = bitcast<u32>((flags << 3u) | kind);
}
// DRAWTAG_FILL_IMAGE
case 0x248u: {
info[di] = bitcast<u32>(linewidth);
let inv = transform_inverse(transform);
info[di + 1u] = bitcast<u32>(inv.matrx.x);
info[di + 2u] = bitcast<u32>(inv.matrx.y);
info[di + 3u] = bitcast<u32>(inv.matrx.z);
info[di + 4u] = bitcast<u32>(inv.matrx.w);
info[di + 5u] = bitcast<u32>(inv.translate.x);
info[di + 6u] = bitcast<u32>(inv.translate.y);
info[di + 7u] = scene[dd];
info[di + 8u] = scene[dd + 1u];
}
default: {}
}
}
if tag_word == DRAWTAG_BEGIN_CLIP || tag_word == DRAWTAG_END_CLIP {
var path_ix = ~ix;
if tag_word == DRAWTAG_BEGIN_CLIP {
path_ix = m.path_ix;
}
clip_inp[m.clip_ix] = ClipInp(ix, i32(path_ix));
}
}
fn two_point_to_unit_line(p0: vec2<f32>, p1: vec2<f32>) -> Transform {
let tmp1 = from_poly2(p0, p1);
let inv = transform_inverse(tmp1);
let tmp2 = from_poly2(vec2(0.0), vec2(1.0, 0.0));
return transform_mul(tmp2, inv);
}
fn from_poly2(p0: vec2<f32>, p1: vec2<f32>) -> Transform {
return Transform(
vec4(p1.y - p0.y, p0.x - p1.x, p1.x - p0.x, p1.y - p0.y),
vec2(p0.x, p0.y)
);
}