vello/piet-wgsl/shader/clip_reduce.wgsl
Raph Levien ef3ed3c9d7 Checkpoint of clip and gradient work
This is a checkpoint of partly completed work. Much of the GPU side is done, very little of the CPU side.

For clips, the clip_els bindings (binding 6 of draw_leaf) are not added. Clip logic is missing from coarse. The overflow buffer is missing from fine, as is its size calculation in coarse (but it should work as long as the max depth fits within BLEND_STACK_SPLIT).

For gradients, the texture binding is missing (binding 6) is missing from fine, as is the infrastructure in engine to deal with texture resources, and of course porting over the logic to fill it.

The code is not tested, bugs may lurk.
2022-11-10 19:48:36 -08:00

67 lines
1.9 KiB
GLSL

#import config
#import bbox
#import clip
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> clip_inp: array<i32>;
@group(0) @binding(2)
var<storage> path_bboxes: array<PathBbox>;
@group(0) @binding(3)
var<storage, read_write> reduced: array<Bic>;
@group(0) @binding(4)
var<storage, read_write> clip_out: array<ClipEl>;
let WG_SIZE = 256u;
var<workgroup> sh_bic: array<Bic, WG_SIZE>;
var<workgroup> sh_parent: array<u32, WG_SIZE>;
var<workgroup> sh_path_ix: array<u32, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let inp = clip_inp[global_id.x];
let is_push = inp >= 0;
var bic = Bic(1u - u32(is_push), u32(is_push));
// reverse scan of bicyclic semigroup
sh_bic[local_id.x] = bic;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_bic[local_id.x + (1u << i)];
bic = bic_combine(bic, other);
}
workgroupBarrier();
sh_bic[local_id.x] = bic;
}
if local_id.x == 0u {
reduced[wg_id.x] = bic;
}
workgroupBarrier();
let size = sh_bic[0].b;
bic = Bic();
if is_push && bic.a == 0u {
let local_ix = size - bic.b - 1u;
sh_parent[local_ix] = local_id.x;
sh_path_ix[local_ix] = u32(inp);
}
workgroupBarrier();
// TODO: possibly do forward scan here if depth can exceed wg size
if local_id.x < size {
let path_ix = sh_path_ix[local_id.x];
let path_bbox = path_bboxes[path_ix];
let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE;
let bbox = vec4<f32>(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1);
clip_out[global_id.x] = ClipEl(parent_ix, bbox);
}
}