mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
2c46228d06
When the bounding boxes of a path and its clip are disjoint (i.e. they do not intersect) the result of their intersection is a negative rectangle. When calculating the intersection of the bboxes, the binning stage ensures that the bbox is non-negative. It then normalizes the coordinates to bin dimensions and rounds the top-left corner down to the neareast integer and the bottom-right corner up. However this rounding causes zero-area bounding boxes to have a non-zero area and sends the bottom-right corner to the placed in the next bin. This causes fully clipped out draw objects to be included in binning, with an incorrect clip bounding box that causes them to be erroneously drawn with partial clipping. `binning` now takes care around this logic to make sure that a zero-area intersected-bbox gets skipped and clipped out. `tile_alloc`, also takes care in its logic. Fixes #286 and #333
172 lines
5.7 KiB
WebGPU Shading Language
172 lines
5.7 KiB
WebGPU Shading Language
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
// The binning stage
|
|
|
|
#import config
|
|
#import drawtag
|
|
#import bbox
|
|
#import bump
|
|
|
|
@group(0) @binding(0)
|
|
var<uniform> config: Config;
|
|
|
|
@group(0) @binding(1)
|
|
var<storage> draw_monoids: array<DrawMonoid>;
|
|
|
|
@group(0) @binding(2)
|
|
var<storage> path_bbox_buf: array<PathBbox>;
|
|
|
|
@group(0) @binding(3)
|
|
var<storage> clip_bbox_buf: array<vec4<f32>>;
|
|
|
|
@group(0) @binding(4)
|
|
var<storage, read_write> intersected_bbox: array<vec4<f32>>;
|
|
|
|
@group(0) @binding(5)
|
|
var<storage, read_write> bump: BumpAllocators;
|
|
|
|
@group(0) @binding(6)
|
|
var<storage, read_write> bin_data: array<u32>;
|
|
|
|
// TODO: put in common place
|
|
struct BinHeader {
|
|
element_count: u32,
|
|
chunk_offset: u32,
|
|
}
|
|
|
|
@group(0) @binding(7)
|
|
var<storage, read_write> bin_header: array<BinHeader>;
|
|
|
|
// conversion factors from coordinates to bin
|
|
let SX = 0.00390625;
|
|
let SY = 0.00390625;
|
|
//let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH);
|
|
//let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT);
|
|
|
|
let WG_SIZE = 256u;
|
|
let N_SLICE = 8u;
|
|
//let N_SLICE = WG_SIZE / 32u;
|
|
let N_SUBSLICE = 4u;
|
|
|
|
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
|
|
// store count values packed two u16's to a u32
|
|
var<workgroup> sh_count: array<array<u32, N_TILE>, N_SUBSLICE>;
|
|
var<workgroup> sh_chunk_offset: array<u32, N_TILE>;
|
|
|
|
@compute @workgroup_size(256)
|
|
fn main(
|
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
|
) {
|
|
for (var i = 0u; i < N_SLICE; i += 1u) {
|
|
atomicStore(&sh_bitmaps[i][local_id.x], 0u);
|
|
}
|
|
workgroupBarrier();
|
|
|
|
// Read inputs and determine coverage of bins
|
|
let element_ix = global_id.x;
|
|
var x0 = 0;
|
|
var y0 = 0;
|
|
var x1 = 0;
|
|
var y1 = 0;
|
|
if element_ix < config.n_drawobj {
|
|
let draw_monoid = draw_monoids[element_ix];
|
|
var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
|
|
if draw_monoid.clip_ix > 0u {
|
|
// TODO: `clip_ix` should always be valid as long as the monoids are correct. Leaving
|
|
// the bounds check in here for correctness but we should assert this condition instead
|
|
// once there is a debug-assertion mechanism.
|
|
clip_bbox = clip_bbox_buf[min(draw_monoid.clip_ix - 1u, config.n_clip - 1u)];
|
|
}
|
|
// For clip elements, clip_box is the bbox of the clip path,
|
|
// intersected with enclosing clips.
|
|
// For other elements, it is the bbox of the enclosing clips.
|
|
// TODO check this is true
|
|
|
|
let path_bbox = path_bbox_buf[draw_monoid.path_ix];
|
|
let pb = vec4<f32>(vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1));
|
|
let bbox = bbox_intersect(clip_bbox, pb);
|
|
|
|
intersected_bbox[element_ix] = bbox;
|
|
|
|
// `bbox_intersect` can result in a zero or negative area intersection if the path bbox lies
|
|
// outside the clip bbox. If that is the case, Don't round up the bottom-right corner of the
|
|
// and leave the coordinates at 0. This way the path will get clipped out and won't get
|
|
// assigned to a bin.
|
|
if bbox.x < bbox.z && bbox.y < bbox.w {
|
|
x0 = i32(floor(bbox.x * SX));
|
|
y0 = i32(floor(bbox.y * SY));
|
|
x1 = i32(ceil(bbox.z * SX));
|
|
y1 = i32(ceil(bbox.w * SY));
|
|
}
|
|
}
|
|
let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X);
|
|
let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y);
|
|
x0 = clamp(x0, 0, width_in_bins);
|
|
y0 = clamp(y0, 0, height_in_bins);
|
|
x1 = clamp(x1, 0, width_in_bins);
|
|
y1 = clamp(y1, 0, height_in_bins);
|
|
if x0 == x1 {
|
|
y1 = y0;
|
|
}
|
|
var x = x0;
|
|
var y = y0;
|
|
let my_slice = local_id.x / 32u;
|
|
let my_mask = 1u << (local_id.x & 31u);
|
|
while y < y1 {
|
|
atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask);
|
|
x += 1;
|
|
if x == x1 {
|
|
x = x0;
|
|
y += 1;
|
|
}
|
|
}
|
|
|
|
workgroupBarrier();
|
|
// Allocate output segments
|
|
var element_count = 0u;
|
|
for (var i = 0u; i < N_SUBSLICE; i += 1u) {
|
|
element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u][local_id.x]));
|
|
let element_count_lo = element_count;
|
|
element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u + 1u][local_id.x]));
|
|
let element_count_hi = element_count;
|
|
let element_count_packed = element_count_lo | (element_count_hi << 16u);
|
|
sh_count[i][local_id.x] = element_count_packed;
|
|
}
|
|
// element_count is the number of draw objects covering this thread's bin
|
|
var chunk_offset = atomicAdd(&bump.binning, element_count);
|
|
if chunk_offset + element_count > config.binning_size {
|
|
chunk_offset = 0u;
|
|
atomicOr(&bump.failed, STAGE_BINNING);
|
|
}
|
|
sh_chunk_offset[local_id.x] = chunk_offset;
|
|
bin_header[global_id.x].element_count = element_count;
|
|
bin_header[global_id.x].chunk_offset = chunk_offset;
|
|
workgroupBarrier();
|
|
|
|
// loop over bbox of bins touched by this draw object
|
|
x = x0;
|
|
y = y0;
|
|
while y < y1 {
|
|
let bin_ix = y * width_in_bins + x;
|
|
let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]);
|
|
// I think this predicate will always be true...
|
|
if (out_mask & my_mask) != 0u {
|
|
var idx = countOneBits(out_mask & (my_mask - 1u));
|
|
if my_slice > 0u {
|
|
let count_ix = my_slice - 1u;
|
|
let count_packed = sh_count[count_ix / 2u][bin_ix];
|
|
idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu;
|
|
}
|
|
let offset = config.bin_data_start + sh_chunk_offset[bin_ix];
|
|
bin_data[offset + idx] = element_ix;
|
|
}
|
|
x += 1;
|
|
if x == x1 {
|
|
x = x0;
|
|
y += 1;
|
|
}
|
|
}
|
|
}
|