diff --git a/.vscode/settings.json b/.vscode/settings.json index 813fac6..b8fa532 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,7 @@ "wgsl-analyzer.customImports": { "bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl", "bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl", + "clip": "${workspaceFolder}/piet-wgsl/shader/shared/clip.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", "cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl", "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl new file mode 100644 index 0000000..f294317 --- /dev/null +++ b/piet-wgsl/shader/clip_leaf.wgsl @@ -0,0 +1,195 @@ + +#import config +#import bbox +#import clip +#import drawtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var clip_inp: array; + +@group(0) @binding(2) +var path_bboxes: array; + +@group(0) @binding(3) +var reduced: array; + +@group(0) @binding(4) +var clip_els: array; + +@group(0) @binding(5) +var draw_monoids: array; + +@group(0) @binding(6) +var clip_bboxes: array>; + +let WG_SIZE = 256u; +var sh_bic: array; +var sh_stack: array; +var sh_stack_bbox: array, WG_SIZE>; +var sh_bbox: array, WG_SIZE>; +var sh_link: array; + +fn search_link(bic: ptr, ix: u32) -> i32 { + var j = 0u; + while j < firstTrailingBit(WG_SIZE) { + let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); + if ((ix >> j) & 1u) != 0u { + let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); + if test.b >= 0u { + *bic = test; + ix -= 1u << j; + } + } + j += 1u; + } + if ix > 0u { + while j > 0u { + j -= 1u; + let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); + let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); + if test.b == 0u { + *bic = test; + ix -= 1u << j; + } + } + } + if ix > 0u { + return i32(ix) - 1; + } else { + return i32(~(*bic).a); + } +} + +fn load_clip_inp(ix: u32) -> i32 { + if ix < config.n_clip { + return clip_inp[ix]; + } else { + return i32(0x80000000); + } +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + var bic: Bic; + if local_id.x < wg_id.x { + bic = reduced[local_id.x]; + } + sh_bic[local_id.x] = bic; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_bic[local_id.x + (1u << i)]; + bic = bic_combine(bic, other); + } + workgroupBarrier(); + sh_bic[local_id.x] = bic; + } + workgroupBarrier(); + let stack_size = sh_bic[0].b; + // TODO: if stack depth > WG_SIZE desired, scan here + + // binary search in stack + let sp = WG_SIZE - 1u - local_id.x; + var ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = ix + ((WG_SIZE / 2u) >> i); + if sp < sh_bic[probe].b { + ix = probe; + } + } + let b = sh_bic[ix].b; + var bbox = vec4(-1e9, -1e9, 1e9, 1e9); + if sp < b { + let el = clip_els[ix * WG_SIZE + b - sp - 1u]; + sh_stack[local_id.x] = el.parent_ix; + bbox = el.bbox; + } + // forward scan of bbox values of prefix stack + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + sh_stack_bbox[local_id.x] = bbox; + workgroupBarrier(); + if local_id.x >= (1u << i) { + bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox); + } + workgroupBarrier(); + } + sh_stack_bbox[local_id.x] = bbox; + + // Read input and compute Bic binary tree + let inp = load_clip_inp(global_id.x); + let is_push = inp >= 0; + var bic = Bic(1u - u32(is_push), u32(is_push)); + sh_bic[local_id.x] = bic; + if is_push { + let path_bbox = path_bboxes[inp]; + bbox = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + var inbase = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i)); + workgroupBarrier(); + if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) { + let in_off = inbase + local_id.x * 2u; + sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]); + } + inbase = outbase; + } + workgroupBarrier(); + // search for predecessor node + bic = Bic(); + var link = search_link(&bic, local_id.x); + sh_link[local_id.x] = link; + workgroupBarrier(); + let grandparent = select(link - 1, sh_link[link], link >= 0); + var parent: i32; + if link >= 0 { + parent = i32(wg_id.x * WG_SIZE) + link; + } else if link + i32(stack_size) >= 0 { + parent = i32(sh_stack[i32(WG_SIZE) + link]); + } else { + parent = -1; + } + // bbox scan (intersect) across parent links + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + if i != 0u { + sh_link[local_id.x] = link; + } + sh_bbox[local_id.x] = bbox; + workgroupBarrier(); + if link >= 0 { + bbox = bbox_intersect(sh_bbox[link], bbox); + link = sh_link[link]; + } + workgroupBarrier(); + } + if link + i32(stack_size) >= 0 { + bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox); + } + // At this point, bbox is the intersection of bboxes on the path to the root + sh_bbox[local_id.x] = bbox; + workgroupBarrier(); + + if !is_push && global_id.x < config.n_clip { + // Fix up drawmonoid so path_ix of EndClip matches BeginClip + let path_ix = clip_inp[parent]; + draw_monoids[~inp].path_ix = u32(path_ix); + + if grandparent >= 0 { + bbox = sh_bbox[grandparent]; + } else if grandparent + i32(stack_size) >= 0 { + bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent]; + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + } + clip_bboxes[global_id.x] = bbox +} diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl new file mode 100644 index 0000000..3288b07 --- /dev/null +++ b/piet-wgsl/shader/clip_reduce.wgsl @@ -0,0 +1,66 @@ + +#import config +#import bbox +#import clip + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var clip_inp: array; + +@group(0) @binding(2) +var path_bboxes: array; + +@group(0) @binding(3) +var reduced: array; + +@group(0) @binding(4) +var clip_out: array; + +let WG_SIZE = 256u; +var sh_bic: array; +var sh_parent: array; +var sh_path_ix: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let inp = clip_inp[global_id.x]; + let is_push = inp >= 0; + var bic = Bic(1u - u32(is_push), u32(is_push)); + // reverse scan of bicyclic semigroup + sh_bic[local_id.x] = bic; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_bic[local_id.x + (1u << i)]; + bic = bic_combine(bic, other); + } + workgroupBarrier(); + sh_bic[local_id.x] = bic; + } + if local_id.x == 0u { + reduced[wg_id.x] = bic; + } + workgroupBarrier(); + let size = sh_bic[0].b; + bic = Bic(); + if is_push && bic.a == 0u { + let local_ix = size - bic.b - 1u; + sh_parent[local_ix] = local_id.x; + sh_path_ix[local_ix] = u32(inp); + } + workgroupBarrier(); + // TODO: possibly do forward scan here if depth can exceed wg size + if local_id.x < size { + let path_ix = sh_path_ix[local_id.x]; + let path_bbox = path_bboxes[path_ix]; + let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE; + let bbox = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + clip_out[global_id.x] = ClipEl(parent_ix, bbox); + } +} diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index e1e88d7..5741ec3 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -125,7 +125,27 @@ fn write_color(color: CmdColor) { ptcl[cmd_offset] = CMD_COLOR; ptcl[cmd_offset + 1u] = color.rgba_color; cmd_offset += 2u; +} +// Discussion point: these are basically copying from info to ptcl. We +// could just write an info offset and have fine bind that buffer and read +// from it. + +fn write_lin_grad(lin: CmdLinGrad) { + alloc_cmd(5u); + ptcl[cmd_offset] = CMD_LIN_GRAD; + ptcl[cmd_offset + 1u] = lin.index; + ptcl[cmd_offset + 2u] = bitcast(lin.line_x); + ptcl[cmd_offset + 3u] = bitcast(lin.line_y); + ptcl[cmd_offset + 4u] = bitcast(lin.line_c); + cmd_offset += 5u; +} + +fn write_rad_grad(rad: CmdRadGrad) { + alloc_cmd(12u); + ptcl[cmd_offset] = CMD_RAD_GRAD; + ptcl[cmd_offset + 1u] = rad.index; + cmd_offset += 12u; } @compute @workgroup_size(256) @@ -304,14 +324,38 @@ fn main( if clip_zero_depth == 0u { let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; let tile = tiles[tile_ix]; + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); switch drawtag { // DRAWTAG_FILL_COLOR case 0x44u: { - let linewidth = bitcast(info[di]); let rgba_color = scene[dd]; - write_path(tile, linewidth); write_color(CmdColor(rgba_color)); } + // DRAWTAG_FILL_LIN_GRADIENT + case 0x114u: { + var lin: CmdLinGrad; + lin.index = scene[dd]; + lin.line_x = bitcast(info[di + 1u]); + lin.line_y = bitcast(info[di + 2u]); + lin.line_c = bitcast(info[di + 3u]); + write_lin_grad(lin); + } + // DRAWTAG_FILL_RAD_GRADIENT + case 0x2dcu: { + var rad: CmdRadGrad; + rad.index = scene[dd]; + let m0 = bitcast(info[di + 1u]); + let m1 = bitcast(info[di + 2u]); + let m2 = bitcast(info[di + 3u]); + let m3 = bitcast(info[di + 4u]); + rad.matrx = vec4(m0, m1, m2, m3); + rad.xlat = vec2(bitcast(info[di + 5u]), bitcast(info[di + 6u])); + rad.c1 = vec2(bitcast(info[di + 7u]), bitcast(info[di + 8u])); + rad.ra = bitcast(info[di + 9u]); + rad.roff = bitcast(info[di + 10u]); + write_rad_grad(rad); + } default: {} } } diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index c347630..d76edef 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -42,9 +42,15 @@ var output: array; #ifdef full #import ptcl +let GRADIENT_WIDTH = 512; +let BLEND_STACK_SPLIT = 4u; + @group(0) @binding(4) var ptcl: array; +@group(0) @binding(5) +var gradients: texture_2d; + fn read_fill(cmd_ix: u32) -> CmdFill { let tile = ptcl[cmd_ix + 1u]; let backdrop = i32(ptcl[cmd_ix + 2u]); @@ -61,6 +67,33 @@ fn read_color(cmd_ix: u32) -> CmdColor { let rgba_color = ptcl[cmd_ix + 1u]; return CmdColor(rgba_color); } + +fn read_lin_grad(cmd_ix: u32) -> CmdLinGrad { + let index = ptcl[cmd_ix + 1u]; + let line_x = bitcast(ptcl[cmd_ix + 2u]); + let line_y = bitcast(ptcl[cmd_ix + 3u]); + let line_c = bitcast(ptcl[cmd_ix + 4u]); + return CmdLinGrad(index, line_x, line_y, line_c); +} + +fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad { + let index = ptcl[cmd_ix + 1u]; + let m0 = bitcast(ptcl[cmd_ix + 2u]); + let m1 = bitcast(ptcl[cmd_ix + 3u]); + let m2 = bitcast(ptcl[cmd_ix + 4u]); + let m3 = bitcast(ptcl[cmd_ix + 5u]); + let matrx = vec4(m0, m1, m2, m3); + let xlat = vec2(bitcast(ptcl[cmd_ix + 6u]), bitcast(ptcl[cmd_ix + 7u])); + let c1 = vec2(bitcast(ptcl[cmd_ix + 8u]), bitcast(ptcl[cmd_ix + 9u])); + let ra = bitcast(ptcl[cmd_ix + 10u]); + let roff = bitcast(ptcl[cmd_ix + 11u]); + return CmdRadGrad(index, matrx, xlat, c1, ra, roff); +} + +fn mix_blend_compose(backdrop: vec4, src: vec4, mode: u32) -> vec4 { + // TODO: ALL the blend modes. This is just vanilla src-over. + return backdrop * (1.0 - src.a) + src; +} #endif let PIXELS_PER_THREAD = 4u; @@ -147,6 +180,8 @@ fn main( let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); #ifdef full var rgba: array, PIXELS_PER_THREAD>; + var blend_stack: array, PIXELS_PER_THREAD>; + var clip_depth = 0u; var area: array; var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC; @@ -187,6 +222,66 @@ fn main( } cmd_ix += 2u; } + // CMD_LIN_GRAD + case 6u: { + let lin = read_lin_grad(cmd_ix); + let d = lin.line_x * xy.x + lin.line_y * xy.y + lin.line_c; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let my_d = d + lin.line_x * f32(i); + let x = i32(round(clamp(my_d, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1))); + let fg_rgba = textureLoad(gradients, vec2(x, i32(lin.index)), 0); + let fg_i = fg_rgba * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 12u; + } + // CMD_RAD_GRAD + case 7u: { + let rad = read_rad_grad(cmd_ix); + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let my_xy = vec2(xy.x + f32(i), xy.y); + // TODO: can hoist y, but for now stick to piet-gpu + let xy_xformed = rad.matrx.xz * my_xy.x + rad.matrx.yw * my_xy.y - rad.xlat; + let ba = dot(xy_xformed, rad.c1); + let ca = rad.ra * dot(xy_xformed, xy_xformed); + let t = sqrt(ba * ba + ca) - ba - rad.roff; + let x = i32(round(clamp(t, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1))); + let fg_rgba = textureLoad(gradients, vec2(x, i32(rad.index)), 0); + let fg_i = fg_rgba * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 12u; + } + // CMD_BEGIN_CLIP + case 9u: { + if clip_depth < BLEND_STACK_SPLIT { + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + blend_stack[clip_depth][i] = pack4x8unorm(rgba[i]); + rgba[i] = vec4(0.0); + } + } else { + // TODO: spill to memory + } + clip_depth += 1u; + cmd_ix += 1u; + } + // CMD_END_CLIP + case 10u: { + let blend = ptcl[cmd_ix + 1u]; + clip_depth -= 1u; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + var bg_rgba: u32; + if clip_depth < BLEND_STACK_SPLIT { + bg_rgba = blend_stack[clip_depth][i]; + } else { + // load from memory + } + let bg = unpack4x8unorm(bg_rgba); + let fg = rgba[i] * area[i]; + rgba[i] = mix_blend_compose(bg, fg, blend); + } + cmd_ix += 2u; + } // CMD_JUMP case 11u: { cmd_ix = ptcl[cmd_ix + 1u]; diff --git a/piet-wgsl/shader/shared/clip.wgsl b/piet-wgsl/shader/shared/clip.wgsl new file mode 100644 index 0000000..608e9bf --- /dev/null +++ b/piet-wgsl/shader/shared/clip.wgsl @@ -0,0 +1,14 @@ +struct Bic { + a: u32, + b: u32, +} + +fn bic_combine(x: Bic, y: Bic) -> Bic { + let m = min(x.b, y.a); + return Bic(x.a + y.a - m, x.b + y.b - m); +} + +struct ClipEl { + parent_ix: u32, + bbox: vec4, +} diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 73f4054..5db894b 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -20,6 +20,7 @@ struct Config { n_drawobj: u32, n_path: u32, + n_clip: u32, // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes diff --git a/piet-wgsl/shader/shared/ptcl.wgsl b/piet-wgsl/shader/shared/ptcl.wgsl index 8d6e869..92316cc 100644 --- a/piet-wgsl/shader/shared/ptcl.wgsl +++ b/piet-wgsl/shader/shared/ptcl.wgsl @@ -28,6 +28,10 @@ let CMD_FILL = 1u; let CMD_STROKE = 2u; let CMD_SOLID = 3u; let CMD_COLOR = 5u; +let CMD_LIN_GRAD = 6u; +let CMD_RAD_GRAD = 7u; +let CMD_BEGIN_CLIP = 9u; +let CMD_END_CLIP = 10u; let CMD_JUMP = 11u; // The individual PTCL structs are written here, but read/write is by @@ -50,3 +54,19 @@ struct CmdJump { struct CmdColor { rgba_color: u32, } + +struct CmdLinGrad { + index: u32, + line_x: f32, + line_y: f32, + line_c: f32, +} + +struct CmdRadGrad { + index: u32, + matrx: vec4, + xlat: vec2, + c1: vec2, + ra: f32, + roff: f32, +} diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 690e681..7d052cc 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -26,6 +26,7 @@ struct Config { height_in_tiles: u32, n_drawobj: u32, n_path: u32, + n_clip: u32, pathtag_base: u32, pathdata_base: u32, drawtag_base: u32, @@ -143,11 +144,13 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let n_path = data.n_path; // TODO: calculate for real when we do rectangles let n_drawobj = n_path; + let n_clip = 0; // TODO: wire up correctly let config = Config { width_in_tiles: 64, height_in_tiles: 64, n_drawobj, n_path, + n_clip, pathtag_base, pathdata_base, drawtag_base,