2022-11-20 03:45:42 +11:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
2022-11-11 14:48:36 +11:00
|
|
|
|
|
|
|
#import config
|
|
|
|
#import bbox
|
|
|
|
#import clip
|
|
|
|
#import drawtag
|
|
|
|
|
|
|
|
@group(0) @binding(0)
|
|
|
|
var<storage> config: Config;
|
|
|
|
|
|
|
|
@group(0) @binding(1)
|
|
|
|
var<storage> clip_inp: array<i32>;
|
|
|
|
|
|
|
|
@group(0) @binding(2)
|
|
|
|
var<storage> path_bboxes: array<PathBbox>;
|
|
|
|
|
|
|
|
@group(0) @binding(3)
|
|
|
|
var<storage> reduced: array<Bic>;
|
|
|
|
|
|
|
|
@group(0) @binding(4)
|
|
|
|
var<storage> clip_els: array<ClipEl>;
|
|
|
|
|
|
|
|
@group(0) @binding(5)
|
|
|
|
var<storage, read_write> draw_monoids: array<DrawMonoid>;
|
|
|
|
|
|
|
|
@group(0) @binding(6)
|
|
|
|
var<storage, read_write> clip_bboxes: array<vec4<f32>>;
|
|
|
|
|
|
|
|
let WG_SIZE = 256u;
|
|
|
|
var<workgroup> sh_bic: array<Bic, 510 >;
|
|
|
|
var<workgroup> sh_stack: array<u32, WG_SIZE>;
|
|
|
|
var<workgroup> sh_stack_bbox: array<vec4<f32>, WG_SIZE>;
|
|
|
|
var<workgroup> sh_bbox: array<vec4<f32>, WG_SIZE>;
|
|
|
|
var<workgroup> sh_link: array<i32, WG_SIZE>;
|
|
|
|
|
|
|
|
fn search_link(bic: ptr<function, Bic>, ix: u32) -> i32 {
|
2022-11-19 09:26:26 +11:00
|
|
|
var ix = ix;
|
2022-11-11 14:48:36 +11:00
|
|
|
var j = 0u;
|
|
|
|
while j < firstTrailingBit(WG_SIZE) {
|
|
|
|
let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
|
|
|
|
if ((ix >> j) & 1u) != 0u {
|
|
|
|
let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
|
2022-11-19 09:26:26 +11:00
|
|
|
if test.b > 0u {
|
|
|
|
break;
|
2022-11-11 14:48:36 +11:00
|
|
|
}
|
2022-11-19 09:26:26 +11:00
|
|
|
*bic = test;
|
|
|
|
ix -= 1u << j;
|
2022-11-11 14:48:36 +11:00
|
|
|
}
|
|
|
|
j += 1u;
|
|
|
|
}
|
|
|
|
if ix > 0u {
|
|
|
|
while j > 0u {
|
|
|
|
j -= 1u;
|
|
|
|
let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
|
|
|
|
let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
|
|
|
|
if test.b == 0u {
|
|
|
|
*bic = test;
|
|
|
|
ix -= 1u << j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ix > 0u {
|
|
|
|
return i32(ix) - 1;
|
|
|
|
} else {
|
2022-11-19 09:26:26 +11:00
|
|
|
return i32(~0u - (*bic).a);
|
2022-11-11 14:48:36 +11:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn load_clip_inp(ix: u32) -> i32 {
|
|
|
|
if ix < config.n_clip {
|
|
|
|
return clip_inp[ix];
|
|
|
|
} else {
|
2022-11-19 09:26:26 +11:00
|
|
|
return -2147483648;
|
|
|
|
// literal too large?
|
|
|
|
// return 0x80000000;
|
2022-11-11 14:48:36 +11:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
@compute @workgroup_size(256)
|
|
|
|
fn main(
|
|
|
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
|
|
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
|
|
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
|
|
|
) {
|
|
|
|
var bic: Bic;
|
|
|
|
if local_id.x < wg_id.x {
|
|
|
|
bic = reduced[local_id.x];
|
|
|
|
}
|
|
|
|
sh_bic[local_id.x] = bic;
|
|
|
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
|
|
|
workgroupBarrier();
|
|
|
|
if local_id.x + (1u << i) < WG_SIZE {
|
|
|
|
let other = sh_bic[local_id.x + (1u << i)];
|
|
|
|
bic = bic_combine(bic, other);
|
|
|
|
}
|
|
|
|
workgroupBarrier();
|
|
|
|
sh_bic[local_id.x] = bic;
|
|
|
|
}
|
|
|
|
workgroupBarrier();
|
|
|
|
let stack_size = sh_bic[0].b;
|
|
|
|
// TODO: if stack depth > WG_SIZE desired, scan here
|
|
|
|
|
|
|
|
// binary search in stack
|
|
|
|
let sp = WG_SIZE - 1u - local_id.x;
|
|
|
|
var ix = 0u;
|
|
|
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
|
|
|
let probe = ix + ((WG_SIZE / 2u) >> i);
|
|
|
|
if sp < sh_bic[probe].b {
|
|
|
|
ix = probe;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let b = sh_bic[ix].b;
|
|
|
|
var bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
|
|
|
if sp < b {
|
|
|
|
let el = clip_els[ix * WG_SIZE + b - sp - 1u];
|
|
|
|
sh_stack[local_id.x] = el.parent_ix;
|
|
|
|
bbox = el.bbox;
|
|
|
|
}
|
|
|
|
// forward scan of bbox values of prefix stack
|
|
|
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
|
|
|
sh_stack_bbox[local_id.x] = bbox;
|
|
|
|
workgroupBarrier();
|
|
|
|
if local_id.x >= (1u << i) {
|
|
|
|
bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox);
|
|
|
|
}
|
|
|
|
workgroupBarrier();
|
|
|
|
}
|
|
|
|
sh_stack_bbox[local_id.x] = bbox;
|
|
|
|
|
|
|
|
// Read input and compute Bic binary tree
|
|
|
|
let inp = load_clip_inp(global_id.x);
|
|
|
|
let is_push = inp >= 0;
|
|
|
|
var bic = Bic(1u - u32(is_push), u32(is_push));
|
|
|
|
sh_bic[local_id.x] = bic;
|
|
|
|
if is_push {
|
|
|
|
let path_bbox = path_bboxes[inp];
|
2022-11-19 09:26:26 +11:00
|
|
|
bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
|
2022-11-11 14:48:36 +11:00
|
|
|
} else {
|
|
|
|
bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
|
|
|
}
|
|
|
|
var inbase = 0u;
|
2022-11-19 09:26:26 +11:00
|
|
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) {
|
2022-11-11 14:48:36 +11:00
|
|
|
let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i));
|
|
|
|
workgroupBarrier();
|
|
|
|
if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) {
|
|
|
|
let in_off = inbase + local_id.x * 2u;
|
|
|
|
sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]);
|
|
|
|
}
|
|
|
|
inbase = outbase;
|
|
|
|
}
|
|
|
|
workgroupBarrier();
|
|
|
|
// search for predecessor node
|
|
|
|
bic = Bic();
|
|
|
|
var link = search_link(&bic, local_id.x);
|
|
|
|
sh_link[local_id.x] = link;
|
|
|
|
workgroupBarrier();
|
|
|
|
let grandparent = select(link - 1, sh_link[link], link >= 0);
|
|
|
|
var parent: i32;
|
|
|
|
if link >= 0 {
|
|
|
|
parent = i32(wg_id.x * WG_SIZE) + link;
|
|
|
|
} else if link + i32(stack_size) >= 0 {
|
|
|
|
parent = i32(sh_stack[i32(WG_SIZE) + link]);
|
|
|
|
} else {
|
|
|
|
parent = -1;
|
|
|
|
}
|
|
|
|
// bbox scan (intersect) across parent links
|
|
|
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
|
|
|
if i != 0u {
|
|
|
|
sh_link[local_id.x] = link;
|
|
|
|
}
|
|
|
|
sh_bbox[local_id.x] = bbox;
|
|
|
|
workgroupBarrier();
|
|
|
|
if link >= 0 {
|
|
|
|
bbox = bbox_intersect(sh_bbox[link], bbox);
|
|
|
|
link = sh_link[link];
|
|
|
|
}
|
|
|
|
workgroupBarrier();
|
|
|
|
}
|
|
|
|
if link + i32(stack_size) >= 0 {
|
|
|
|
bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox);
|
|
|
|
}
|
|
|
|
// At this point, bbox is the intersection of bboxes on the path to the root
|
|
|
|
sh_bbox[local_id.x] = bbox;
|
|
|
|
workgroupBarrier();
|
|
|
|
|
|
|
|
if !is_push && global_id.x < config.n_clip {
|
|
|
|
// Fix up drawmonoid so path_ix of EndClip matches BeginClip
|
|
|
|
let path_ix = clip_inp[parent];
|
|
|
|
draw_monoids[~inp].path_ix = u32(path_ix);
|
|
|
|
|
|
|
|
if grandparent >= 0 {
|
|
|
|
bbox = sh_bbox[grandparent];
|
|
|
|
} else if grandparent + i32(stack_size) >= 0 {
|
|
|
|
bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent];
|
|
|
|
} else {
|
|
|
|
bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
|
|
|
}
|
|
|
|
}
|
2022-11-19 09:26:26 +11:00
|
|
|
clip_bboxes[global_id.x] = bbox;
|
2022-11-11 14:48:36 +11:00
|
|
|
}
|