diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index 60b12e0..138621e 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -35,6 +35,8 @@ layout(set = 0, binding = 3) buffer BinsBuf { #define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX)) #define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX)) +#define TSY (1.0 / float(TILE_HEIGHT_PX)) + // Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000) #define INFINITY (1.0 / 0.0) @@ -83,6 +85,7 @@ void main() { } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; float my_right_edge = INFINITY; + bool crosses_edge = false; switch (tag) { case Annotated_FillLine: case Annotated_StrokeLine: @@ -91,6 +94,7 @@ void main() { y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY)); x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX)); y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY)); + crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY); break; case Annotated_Fill: case Annotated_Stroke: @@ -101,7 +105,9 @@ void main() { y0 = int(floor(fill.bbox.y * SY)); x1 = int(ceil(fill.bbox.z * SX)); y1 = int(ceil(fill.bbox.w * SY)); - my_right_edge = x1; + // It probably makes more sense to track x1, to avoid having to redo + // the rounding to tile coords. + my_right_edge = fill.bbox.z; break; } @@ -131,6 +137,9 @@ void main() { } barrier(); } + if (crosses_edge) { + x1 = int(ceil(my_right_edge * SX)); + } // At this point, we run an iterator over the coverage area, // trying to keep divergence low. diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 4fda673..dc1713b 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 57abd73..2389e27 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -43,6 +43,7 @@ shared uint sh_elements_ref; shared uint sh_bitmaps[N_SLICE][N_TILE]; shared uint sh_backdrop[N_SLICE][N_TILE]; +shared uint sh_bd_sign[N_SLICE]; // scale factors useful for converting coordinates to tiles #define SX (1.0 / float(TILE_WIDTH_PX)) @@ -111,7 +112,10 @@ void main() { sh_first_el[th_ix] = chunk.n > 0 ? BinInstance_read(BinInstanceRef(start_chunk + BinChunk_size)).element_ix : ~0; } - uint count = 0; + if (th_ix < N_SLICE) { + sh_bd_sign[th_ix] = 0; + } + int backdrop = 0; while (true) { for (uint i = 0; i < N_SLICE; i++) { sh_bitmaps[i][th_ix] = 0; @@ -174,8 +178,11 @@ void main() { // Read one element, compute coverage. uint tag = Annotated_Nop; AnnotatedRef ref; + float right_edge = 0.0; if (th_ix + rd_ix < wr_ix) { - uint element_ix = sh_elements[(rd_ix + th_ix) % N_RINGBUF]; + uint rd_el_ix = (rd_ix + th_ix) % N_RINGBUF; + uint element_ix = sh_elements[rd_el_ix]; + right_edge = sh_right_edge[rd_el_ix]; ref = AnnotatedRef(element_ix * Annotated_size); tag = Annotated_tag(ref); } @@ -184,6 +191,8 @@ void main() { float a, b, c; // Bounding box of element in pixel coordinates. float xmin, xmax, ymin, ymax; + uint my_slice = th_ix / 32; + uint my_mask = 1 << (th_ix & 31); switch (tag) { case Annotated_FillLine: case Annotated_StrokeLine: @@ -194,6 +203,14 @@ void main() { ymax = max(line.p0.y, line.p1.y) + line.stroke.y; float dx = line.p1.x - line.p0.x; float dy = line.p1.y - line.p0.y; + if (tag == Annotated_FillLine) { + // Set bit for backdrop sign calculation, 1 is +1, 0 is -1. + if (dy < 0) { + atomicOr(sh_bd_sign[my_slice], my_mask); + } else { + atomicAnd(sh_bd_sign[my_slice], ~my_mask); + } + } // Set up for per-scanline coverage formula, below. float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy; c = abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y) * SX; @@ -226,14 +243,14 @@ void main() { // Compute bounding box in tiles and clip to this bin. int x0 = int(floor((xmin - xy0.x) * SX)); int x1 = int(ceil((xmax - xy0.x) * SX)); + int xr = int(ceil((right_edge - xy0.x) * SX)); int y0 = int(floor((ymin - xy0.y) * SY)); int y1 = int(ceil((ymax - xy0.y) * SY)); x0 = clamp(x0, 0, N_TILE_X); x1 = clamp(x1, x0, N_TILE_X); + xr = clamp(xr, 0, N_TILE_X); y0 = clamp(y0, 0, N_TILE_Y); y1 = clamp(y1, y0, N_TILE_Y); - uint my_slice = th_ix / 32; - uint my_mask = 1 << (th_ix & 31); float t = a + b * float(y0); for (uint y = y0; y < y1; y++) { uint xx0 = clamp(int(floor(t - c)), x0, x1); @@ -241,6 +258,15 @@ void main() { for (uint x = xx0; x < xx1; x++) { atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask); } + if (tag == Annotated_FillLine && ymin <= xy0.y + float(y * TILE_HEIGHT_PX)) { + // Assign backdrop to all tiles to the right of the ray crossing the + // top edge of this tile, up to the right edge of the fill bbox. + float xray = t - 0.5 * b; + xx0 = max(int(ceil(xray)), 0); + for (uint x = xx0; x < xr; x++) { + atomicOr(sh_backdrop[my_slice][y * N_TILE_X + x], my_mask); + } + } t += b; } barrier(); @@ -248,20 +274,34 @@ void main() { // Output elements for this tile, based on bitmaps. uint slice_ix = 0; uint bitmap = sh_bitmaps[0][th_ix]; + uint bd_bitmap = sh_backdrop[0][th_ix]; + uint combined = bitmap | bd_bitmap; while (true) { - if (bitmap == 0) { + if (combined == 0) { slice_ix++; if (slice_ix == N_SLICE) { break; } bitmap = sh_bitmaps[slice_ix][th_ix]; - if (bitmap == 0) { + bd_bitmap = sh_backdrop[slice_ix][th_ix]; + combined = bitmap | bd_bitmap; + if (combined == 0) { continue; } } - uint element_ref_ix = slice_ix * 32 + findLSB(bitmap); + uint element_ref_ix = slice_ix * 32 + findLSB(combined); uint element_ix = sh_elements[(rd_ix + element_ref_ix) % N_RINGBUF]; + // TODO: use bit magic to aggregate this calculation. + if ((bd_bitmap & (1 << (element_ref_ix & 31))) != 0) { + if ((sh_bd_sign[slice_ix] & (1 << (element_ref_ix & 31))) != 0) { + backdrop += 1; + } else { + backdrop -= 1; + } + } + + if ((bitmap & (1 << (element_ref_ix & 31))) != 0) { // At this point, we read the element again from global memory. // If that turns out to be expensive, maybe we can pack it into // shared memory (or perhaps just the tag). @@ -284,12 +324,19 @@ void main() { seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs; CmdFill cmd_fill; cmd_fill.seg_ref = first_seg_chunk.offset; + cmd_fill.backdrop = backdrop; cmd_fill.rgba_color = fill.rgba_color; alloc_cmd(cmd_ref, cmd_limit); Cmd_Fill_write(cmd_ref, cmd_fill); cmd_ref.offset += Cmd_size; chunk_n_segs = 0; + } else if (backdrop != 0) { + AnnoFill fill = Annotated_Fill_read(ref); + alloc_cmd(cmd_ref, cmd_limit); + Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color)); + cmd_ref.offset += Cmd_size; } + backdrop = 0; break; case Annotated_Stroke: if (chunk_n_segs > 0) { @@ -307,9 +354,10 @@ void main() { } break; } + } // clear LSB - bitmap &= bitmap - 1; + combined &= combined - 1; } barrier(); diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index e201c5e..bc097e4 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 70b02f5..65bbe5c 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -46,8 +46,8 @@ pub fn render_scene(rc: &mut impl RenderContext) { let circle = Circle::new(center, radius); rc.fill(circle, &color); } - let mut path = BezPath::new(); /* + let mut path = BezPath::new(); path.move_to((100.0, 1150.0)); path.line_to((200.0, 1200.0)); path.line_to((150.0, 1250.0));