diff --git a/piet-gpu-types/src/annotated.rs b/piet-gpu-types/src/annotated.rs index 681a7ec..cedbd3f 100644 --- a/piet-gpu-types/src/annotated.rs +++ b/piet-gpu-types/src/annotated.rs @@ -4,26 +4,33 @@ piet_gpu! { #[gpu_write] mod annotated { struct AnnoFill { - rgba_color: u32, + // The bbox is always first, as we take advantage of common + // layout when binning. bbox: [f32; 4], + rgba_color: u32, } struct AnnoFillMask { - mask: f32, bbox: [f32; 4], + mask: f32, } struct AnnoStroke { - rgba_color: u32, bbox: [f32; 4], + rgba_color: u32, // For the nonuniform scale case, this needs to be a 2x2 matrix. // That's expected to be uncommon, so we could special-case it. linewidth: f32, } + struct AnnoClip { + bbox: [f32; 4], + } enum Annotated { Nop, Stroke(AnnoStroke), Fill(AnnoFill), FillMask(AnnoFillMask), FillMaskInv(AnnoFillMask), + BeginClip(AnnoClip), + EndClip(AnnoClip), } } } diff --git a/piet-gpu-types/src/bins.rs b/piet-gpu-types/src/bins.rs index 1ac2413..88f16f1 100644 --- a/piet-gpu-types/src/bins.rs +++ b/piet-gpu-types/src/bins.rs @@ -7,9 +7,6 @@ piet_gpu! { mod bins { struct BinInstance { element_ix: u32, - // Right edge of the bounding box of the associated fill - // element; used in backdrop computation. - right_edge: f32, } struct BinChunk { diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index 95dcdc6..86e4572 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -30,6 +30,19 @@ piet_gpu! { backdrop: i32, mask: f32, } + struct CmdBeginClip { + tile_ref: u32, + backdrop: i32, + } + // This is mostly here for expedience and can always be optimized + // out for pure clips, but will be useful for blend groups. + struct CmdBeginSolidClip { + alpha: f32, + } + struct CmdEndClip { + // This will be 1.0 for clips, but we can imagine blend groups. + alpha: f32, + } struct CmdSolid { rgba_color: u32, } @@ -46,6 +59,9 @@ piet_gpu! { Fill(CmdFill), FillMask(CmdFillMask), FillMaskInv(CmdFillMask), + BeginClip(CmdBeginClip), + BeginSolidClip(CmdBeginSolidClip), + EndClip(CmdEndClip), Stroke(CmdStroke), Solid(CmdSolid), SolidMask(CmdSolidMask), diff --git a/piet-gpu-types/src/scene.rs b/piet-gpu-types/src/scene.rs index 1359c1b..855b500 100644 --- a/piet-gpu-types/src/scene.rs +++ b/piet-gpu-types/src/scene.rs @@ -1,6 +1,8 @@ use piet_gpu_derive::piet_gpu; -pub use self::scene::{CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform}; +pub use self::scene::{ + Clip, CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform, +}; piet_gpu! { #[rust_encode] @@ -36,6 +38,10 @@ piet_gpu! { mat: [f32; 4], translate: [f32; 2], } + struct Clip { + bbox: [f32; 4], + // TODO: add alpha? + } enum Element { Nop, // Another approach to encoding would be to use a single @@ -55,6 +61,8 @@ piet_gpu! { Transform(Transform), FillMask(FillMask), FillMaskInv(FillMask), + BeginClip(Clip), + EndClip(Clip), } } } diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 7876f9e..dfed520 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -165,6 +165,38 @@ fn trace_ptcl(buf: &[u32]) { } } } + 6 => { + let backdrop = buf[tile_offset / 4 + 2]; + println!(" {:x}: begin_clip {}", tile_offset, backdrop); + let mut seg_chunk = buf[tile_offset / 4 + 1] as usize; + let n = buf[seg_chunk / 4] as usize; + let segs = buf[seg_chunk / 4 + 2] as usize; + println!(" chunk @{:x}: n={}, segs @{:x}", seg_chunk, n, segs); + for i in 0..n { + let x0 = f32::from_bits(buf[segs / 4 + i * 5]); + let y0 = f32::from_bits(buf[segs / 4 + i * 5 + 1]); + let x1 = f32::from_bits(buf[segs / 4 + i * 5 + 2]); + let y1 = f32::from_bits(buf[segs / 4 + i * 5 + 3]); + let y_edge = f32::from_bits(buf[segs / 4 + i * 5 + 4]); + println!( + " ({:.3}, {:.3}) - ({:.3}, {:.3}) | {:.3}", + x0, y0, x1, y1, y_edge + ); + } + loop { + seg_chunk = buf[seg_chunk / 4 + 1] as usize; + if seg_chunk == 0 { + break; + } + } + } + 7 => { + let backdrop = buf[tile_offset / 4 + 1]; + println!("{:x}: solid_clip {:x}", tile_offset, backdrop); + } + 8 => { + println!("{:x}: end_clip", tile_offset); + } _ => { println!("{:x}: {}", tile_offset, tag); } @@ -246,9 +278,9 @@ fn main() -> Result<(), Error> { /* let mut data: Vec = Default::default(); - device.read_buffer(&renderer.tile_buf, &mut data).unwrap(); + renderer.tile_buf.read(&mut data).unwrap(); piet_gpu::dump_k1_data(&data); - //trace_ptcl(&data); + trace_ptcl(&data); */ let mut img_data: Vec = Default::default(); diff --git a/piet-gpu/shader/annotated.h b/piet-gpu/shader/annotated.h index 847ca06..677e473 100644 --- a/piet-gpu/shader/annotated.h +++ b/piet-gpu/shader/annotated.h @@ -12,13 +12,17 @@ struct AnnoStrokeRef { uint offset; }; +struct AnnoClipRef { + uint offset; +}; + struct AnnotatedRef { uint offset; }; struct AnnoFill { - uint rgba_color; vec4 bbox; + uint rgba_color; }; #define AnnoFill_size 20 @@ -28,8 +32,8 @@ AnnoFillRef AnnoFill_index(AnnoFillRef ref, uint index) { } struct AnnoFillMask { - float mask; vec4 bbox; + float mask; }; #define AnnoFillMask_size 20 @@ -39,8 +43,8 @@ AnnoFillMaskRef AnnoFillMask_index(AnnoFillMaskRef ref, uint index) { } struct AnnoStroke { - uint rgba_color; vec4 bbox; + uint rgba_color; float linewidth; }; @@ -50,11 +54,23 @@ AnnoStrokeRef AnnoStroke_index(AnnoStrokeRef ref, uint index) { return AnnoStrokeRef(ref.offset + index * AnnoStroke_size); } +struct AnnoClip { + vec4 bbox; +}; + +#define AnnoClip_size 16 + +AnnoClipRef AnnoClip_index(AnnoClipRef ref, uint index) { + return AnnoClipRef(ref.offset + index * AnnoClip_size); +} + #define Annotated_Nop 0 #define Annotated_Stroke 1 #define Annotated_Fill 2 #define Annotated_FillMask 3 #define Annotated_FillMaskInv 4 +#define Annotated_BeginClip 5 +#define Annotated_EndClip 6 #define Annotated_size 28 AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) { @@ -69,18 +85,18 @@ AnnoFill AnnoFill_read(AnnoFillRef ref) { uint raw3 = annotated[ix + 3]; uint raw4 = annotated[ix + 4]; AnnoFill s; - s.rgba_color = raw0; - s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4)); + s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.rgba_color = raw4; return s; } void AnnoFill_write(AnnoFillRef ref, AnnoFill s) { uint ix = ref.offset >> 2; - annotated[ix + 0] = s.rgba_color; - annotated[ix + 1] = floatBitsToUint(s.bbox.x); - annotated[ix + 2] = floatBitsToUint(s.bbox.y); - annotated[ix + 3] = floatBitsToUint(s.bbox.z); - annotated[ix + 4] = floatBitsToUint(s.bbox.w); + annotated[ix + 0] = floatBitsToUint(s.bbox.x); + annotated[ix + 1] = floatBitsToUint(s.bbox.y); + annotated[ix + 2] = floatBitsToUint(s.bbox.z); + annotated[ix + 3] = floatBitsToUint(s.bbox.w); + annotated[ix + 4] = s.rgba_color; } AnnoFillMask AnnoFillMask_read(AnnoFillMaskRef ref) { @@ -91,18 +107,18 @@ AnnoFillMask AnnoFillMask_read(AnnoFillMaskRef ref) { uint raw3 = annotated[ix + 3]; uint raw4 = annotated[ix + 4]; AnnoFillMask s; - s.mask = uintBitsToFloat(raw0); - s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4)); + s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.mask = uintBitsToFloat(raw4); return s; } void AnnoFillMask_write(AnnoFillMaskRef ref, AnnoFillMask s) { uint ix = ref.offset >> 2; - annotated[ix + 0] = floatBitsToUint(s.mask); - annotated[ix + 1] = floatBitsToUint(s.bbox.x); - annotated[ix + 2] = floatBitsToUint(s.bbox.y); - annotated[ix + 3] = floatBitsToUint(s.bbox.z); - annotated[ix + 4] = floatBitsToUint(s.bbox.w); + annotated[ix + 0] = floatBitsToUint(s.bbox.x); + annotated[ix + 1] = floatBitsToUint(s.bbox.y); + annotated[ix + 2] = floatBitsToUint(s.bbox.z); + annotated[ix + 3] = floatBitsToUint(s.bbox.w); + annotated[ix + 4] = floatBitsToUint(s.mask); } AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) { @@ -114,22 +130,41 @@ AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) { uint raw4 = annotated[ix + 4]; uint raw5 = annotated[ix + 5]; AnnoStroke s; - s.rgba_color = raw0; - s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4)); + s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.rgba_color = raw4; s.linewidth = uintBitsToFloat(raw5); return s; } void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) { uint ix = ref.offset >> 2; - annotated[ix + 0] = s.rgba_color; - annotated[ix + 1] = floatBitsToUint(s.bbox.x); - annotated[ix + 2] = floatBitsToUint(s.bbox.y); - annotated[ix + 3] = floatBitsToUint(s.bbox.z); - annotated[ix + 4] = floatBitsToUint(s.bbox.w); + annotated[ix + 0] = floatBitsToUint(s.bbox.x); + annotated[ix + 1] = floatBitsToUint(s.bbox.y); + annotated[ix + 2] = floatBitsToUint(s.bbox.z); + annotated[ix + 3] = floatBitsToUint(s.bbox.w); + annotated[ix + 4] = s.rgba_color; annotated[ix + 5] = floatBitsToUint(s.linewidth); } +AnnoClip AnnoClip_read(AnnoClipRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = annotated[ix + 0]; + uint raw1 = annotated[ix + 1]; + uint raw2 = annotated[ix + 2]; + uint raw3 = annotated[ix + 3]; + AnnoClip s; + s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + return s; +} + +void AnnoClip_write(AnnoClipRef ref, AnnoClip s) { + uint ix = ref.offset >> 2; + annotated[ix + 0] = floatBitsToUint(s.bbox.x); + annotated[ix + 1] = floatBitsToUint(s.bbox.y); + annotated[ix + 2] = floatBitsToUint(s.bbox.z); + annotated[ix + 3] = floatBitsToUint(s.bbox.w); +} + uint Annotated_tag(AnnotatedRef ref) { return annotated[ref.offset >> 2]; } @@ -150,6 +185,14 @@ AnnoFillMask Annotated_FillMaskInv_read(AnnotatedRef ref) { return AnnoFillMask_read(AnnoFillMaskRef(ref.offset + 4)); } +AnnoClip Annotated_BeginClip_read(AnnotatedRef ref) { + return AnnoClip_read(AnnoClipRef(ref.offset + 4)); +} + +AnnoClip Annotated_EndClip_read(AnnotatedRef ref) { + return AnnoClip_read(AnnoClipRef(ref.offset + 4)); +} + void Annotated_Nop_write(AnnotatedRef ref) { annotated[ref.offset >> 2] = Annotated_Nop; } @@ -174,3 +217,13 @@ void Annotated_FillMaskInv_write(AnnotatedRef ref, AnnoFillMask s) { AnnoFillMask_write(AnnoFillMaskRef(ref.offset + 4), s); } +void Annotated_BeginClip_write(AnnotatedRef ref, AnnoClip s) { + annotated[ref.offset >> 2] = Annotated_BeginClip; + AnnoClip_write(AnnoClipRef(ref.offset + 4), s); +} + +void Annotated_EndClip_write(AnnotatedRef ref, AnnoClip s) { + annotated[ref.offset >> 2] = Annotated_EndClip; + AnnoClip_write(AnnoClipRef(ref.offset + 4), s); +} + diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index 20c6ce9..6fa9f9f 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -57,6 +57,7 @@ void main() { case Annotated_Fill: case Annotated_FillMask: case Annotated_FillMaskInv: + case Annotated_BeginClip: PathRef path_ref = PathRef(element_ix * Path_size); Path path = Path_read(path_ref); sh_row_width[th_ix] = path.bbox.z - path.bbox.x; diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index e2093d9..ee4dda8 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index ee3301b..5dce813 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -57,22 +57,20 @@ void main() { tag = Annotated_tag(ref); } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; - float my_right_edge = INFINITY; switch (tag) { case Annotated_Fill: case Annotated_FillMask: case Annotated_FillMaskInv: case Annotated_Stroke: - // Note: we take advantage of the fact that fills and strokes - // have compatible layout. + case Annotated_BeginClip: + case Annotated_EndClip: + // Note: we take advantage of the fact that these drawing elements + // have the bbox at the same place in their layout. AnnoFill fill = Annotated_Fill_read(ref); x0 = int(floor(fill.bbox.x * SX)); y0 = int(floor(fill.bbox.y * SY)); x1 = int(ceil(fill.bbox.z * SX)); y1 = int(ceil(fill.bbox.w * SY)); - // It probably makes more sense to track x1, to avoid having to redo - // the rounding to tile coords. - my_right_edge = fill.bbox.z; break; } @@ -131,7 +129,7 @@ void main() { idx += count[my_slice - 1][bin_ix]; } uint out_offset = sh_chunk_start[bin_ix] + idx * BinInstance_size; - BinInstance_write(BinInstanceRef(out_offset), BinInstance(element_ix, my_right_edge)); + BinInstance_write(BinInstanceRef(out_offset), BinInstance(element_ix)); } x++; if (x == x1) { diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 50070a1..2c923c4 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/bins.h b/piet-gpu/shader/bins.h index 85f7536..3ce06e0 100644 --- a/piet-gpu/shader/bins.h +++ b/piet-gpu/shader/bins.h @@ -10,10 +10,9 @@ struct BinChunkRef { struct BinInstance { uint element_ix; - float right_edge; }; -#define BinInstance_size 8 +#define BinInstance_size 4 BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { return BinInstanceRef(ref.offset + index * BinInstance_size); @@ -33,17 +32,14 @@ BinChunkRef BinChunk_index(BinChunkRef ref, uint index) { BinInstance BinInstance_read(BinInstanceRef ref) { uint ix = ref.offset >> 2; uint raw0 = bins[ix + 0]; - uint raw1 = bins[ix + 1]; BinInstance s; s.element_ix = raw0; - s.right_edge = uintBitsToFloat(raw1); return s; } void BinInstance_write(BinInstanceRef ref, BinInstance s) { uint ix = ref.offset >> 2; bins[ix + 0] = s.element_ix; - bins[ix + 1] = floatBitsToUint(s.right_edge); } BinChunk BinChunk_read(BinChunkRef ref) { diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 16573e8..12ebb98 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -45,7 +45,6 @@ layout(set = 0, binding = 4) buffer PtclBuf { #define N_PART_READ (1 << LG_N_PART_READ) shared uint sh_elements[N_TILE]; -shared float sh_right_edge[N_TILE]; // Number of elements in the partition; prefix sum. shared uint sh_part_count[N_PART_READ]; @@ -148,7 +147,6 @@ void main() { BinInstanceRef inst_ref = BinInstanceRef(sh_part_elements[part_ix]); BinInstance inst = BinInstance_read(BinInstance_index(inst_ref, ix)); sh_elements[th_ix] = inst.element_ix; - sh_right_edge[th_ix] = inst.right_edge; } barrier(); @@ -161,10 +159,8 @@ void main() { uint tag = Annotated_Nop; uint element_ix; AnnotatedRef ref; - float right_edge = 0.0; if (th_ix + rd_ix < wr_ix) { element_ix = sh_elements[th_ix]; - right_edge = sh_right_edge[th_ix]; ref = AnnotatedRef(element_ix * Annotated_size); tag = Annotated_tag(ref); } @@ -176,10 +172,10 @@ void main() { case Annotated_FillMask: case Annotated_FillMaskInv: case Annotated_Stroke: - // Because the only elements we're processing right now are - // paths, we can just use the element index as the path index. - // In future, when we're doing a bunch of stuff, the path index - // should probably be stored in the annotated element. + case Annotated_BeginClip: + case Annotated_EndClip: + // We have one "path" for each element, even if the element isn't + // actually a path (currently EndClip, but images etc in the future). uint path_ix = element_ix; Path path = Path_read(PathRef(path_ix * Path_size)); uint stride = path.bbox.z - path.bbox.x; @@ -224,20 +220,25 @@ void main() { el_ix = probe; } } - AnnotatedRef ref = AnnotatedRef(el_ix * Annotated_size); + AnnotatedRef ref = AnnotatedRef(sh_elements[el_ix] * Annotated_size); uint tag = Annotated_tag(ref); uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0); uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + seq_ix % width; uint y = sh_tile_y0[el_ix] + seq_ix / width; - Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); - // Include the path in the tile if - // - the tile contains at least a segment (tile offset non-zero) - // - the tile is completely covered (backdrop non-zero) - // - the tile is not covered and we're filling everything outside the path (backdrop zero, inverse fills). - bool inside = tile.backdrop != 0; - bool fill = tag != Annotated_FillMaskInv; - if (tile.tile.offset != 0 || inside == fill) { + bool include_tile; + if (tag == Annotated_BeginClip || tag == Annotated_EndClip) { + include_tile = true; + } else { + Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); + // Include the path in the tile if + // - the tile contains at least a segment (tile offset non-zero) + // - the tile is completely covered (backdrop non-zero) + bool inside = tile.backdrop != 0; + bool fill = tag != Annotated_FillMaskInv; + include_tile = tile.tile.offset != 0 || inside == fill; + } + if (include_tile) { uint el_slice = el_ix / 32; uint el_mask = 1 << (el_ix & 31); atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask); @@ -247,8 +248,7 @@ void main() { barrier(); // Output non-segment elements for this tile. The thread does a sequential walk - // through the non-segment elements, and for segments, count and backdrop are - // aggregated using bit counting. + // through the non-segment elements. uint slice_ix = 0; uint bitmap = sh_bitmaps[0][th_ix]; while (true) { @@ -291,6 +291,27 @@ void main() { } cmd_ref.offset += Cmd_size; break; + case Annotated_BeginClip: + tile = Tile_read(TileRef(sh_tile_base[element_ref_ix] + + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); + alloc_cmd(cmd_ref, cmd_limit); + if (tile.tile.offset != 0) { + CmdBeginClip cmd_begin_clip; + cmd_begin_clip.tile_ref = tile.tile.offset; + cmd_begin_clip.backdrop = tile.backdrop; + Cmd_BeginClip_write(cmd_ref, cmd_begin_clip); + } else { + // TODO: here is where a bunch of optimization magic should happen + float alpha = tile.backdrop == 0 ? 0.0 : 1.0; + Cmd_BeginSolidClip_write(cmd_ref, CmdBeginSolidClip(alpha)); + } + cmd_ref.offset += Cmd_size; + break; + case Annotated_EndClip: + alloc_cmd(cmd_ref, cmd_limit); + Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0)); + cmd_ref.offset += Cmd_size; + break; case Annotated_FillMask: case Annotated_FillMaskInv: tile = Tile_read(TileRef(sh_tile_base[element_ref_ix] diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 7758b96..98dde1b 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index 8bb9a4d..7606554 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -132,9 +132,13 @@ State map_element(ElementRef ref) { case Element_FillMask: case Element_FillMaskInv: case Element_Stroke: + case Element_BeginClip: c.flags = FLAG_RESET_BBOX; c.path_count = 1; break; + case Element_EndClip: + c.path_count = 1; + break; case Element_SetLineWidth: SetLineWidth lw = Element_SetLineWidth_read(ref); c.linewidth = lw.width; @@ -421,6 +425,21 @@ void main() { out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); Annotated_FillMaskInv_write(out_ref, anno_fill_mask); break; + case Element_BeginClip: + Clip begin_clip = Element_BeginClip_read(this_ref); + AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox); + // This is the absolute bbox, it's been transformed during encoding. + anno_begin_clip.bbox = begin_clip.bbox; + out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); + Annotated_BeginClip_write(out_ref, anno_begin_clip); + break; + case Element_EndClip: + Clip end_clip = Element_EndClip_read(this_ref); + // This bbox is expected to be the same as the begin one. + AnnoClip anno_end_clip = AnnoClip(end_clip.bbox); + out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); + Annotated_EndClip_write(out_ref, anno_end_clip); + break; } } } diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index fed60fb..73ff3f4 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index d21aa87..a7d5e92 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -27,6 +27,8 @@ layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image; #include "ptcl.h" #include "tile.h" +#define BLEND_STACK_SIZE 4 + // Calculate coverage based on backdrop + coverage of each line segment float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) { // Probably better to store as float, but conversion is no doubt cheap. @@ -69,6 +71,8 @@ void main() { vec2 xy = vec2(xy_uint); vec3 rgb[CHUNK]; float mask[CHUNK]; + uint blend_stack[BLEND_STACK_SIZE][CHUNK]; + uint blend_sp = 0; for (uint i = 0; i < CHUNK; i++) { rgb[i] = vec3(0.5); mask[i] = 1.0; @@ -137,6 +141,30 @@ void main() { mask[k] = mix(mask[k], fill_mask.mask, 1.0 - area[k]); } break; + case Cmd_BeginClip: + CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref); + area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref); + for (uint k = 0; k < CHUNK; k++) { + blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); + } + blend_sp++; + break; + case Cmd_BeginSolidClip: + CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref); + float solid_alpha = begin_solid_clip.alpha; + for (uint k = 0; k < CHUNK; k++) { + blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], solid_alpha)); + } + blend_sp++; + break; + case Cmd_EndClip: + CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref); + blend_sp--; + for (uint k = 0; k < CHUNK; k++) { + vec4 rgba = unpackUnorm4x8(blend_stack[blend_sp][k]); + rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a); + } + break; case Cmd_Solid: CmdSolid solid = Cmd_Solid_read(cmd_ref); fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index d159fe5..a02387a 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index db5bc57..f82a031 100644 Binary files a/piet-gpu/shader/path_coarse.spv and b/piet-gpu/shader/path_coarse.spv differ diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index d1cc83d..db8d47b 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -20,6 +20,18 @@ struct CmdFillMaskRef { uint offset; }; +struct CmdBeginClipRef { + uint offset; +}; + +struct CmdBeginSolidClipRef { + uint offset; +}; + +struct CmdEndClipRef { + uint offset; +}; + struct CmdSolidRef { uint offset; }; @@ -103,6 +115,37 @@ CmdFillMaskRef CmdFillMask_index(CmdFillMaskRef ref, uint index) { return CmdFillMaskRef(ref.offset + index * CmdFillMask_size); } +struct CmdBeginClip { + uint tile_ref; + int backdrop; +}; + +#define CmdBeginClip_size 8 + +CmdBeginClipRef CmdBeginClip_index(CmdBeginClipRef ref, uint index) { + return CmdBeginClipRef(ref.offset + index * CmdBeginClip_size); +} + +struct CmdBeginSolidClip { + float alpha; +}; + +#define CmdBeginSolidClip_size 4 + +CmdBeginSolidClipRef CmdBeginSolidClip_index(CmdBeginSolidClipRef ref, uint index) { + return CmdBeginSolidClipRef(ref.offset + index * CmdBeginSolidClip_size); +} + +struct CmdEndClip { + float alpha; +}; + +#define CmdEndClip_size 4 + +CmdEndClipRef CmdEndClip_index(CmdEndClipRef ref, uint index) { + return CmdEndClipRef(ref.offset + index * CmdEndClip_size); +} + struct CmdSolid { uint rgba_color; }; @@ -139,10 +182,13 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) { #define Cmd_Fill 3 #define Cmd_FillMask 4 #define Cmd_FillMaskInv 5 -#define Cmd_Stroke 6 -#define Cmd_Solid 7 -#define Cmd_SolidMask 8 -#define Cmd_Jump 9 +#define Cmd_BeginClip 6 +#define Cmd_BeginSolidClip 7 +#define Cmd_EndClip 8 +#define Cmd_Stroke 9 +#define Cmd_Solid 10 +#define Cmd_SolidMask 11 +#define Cmd_Jump 12 #define Cmd_size 20 CmdRef Cmd_index(CmdRef ref, uint index) { @@ -271,6 +317,48 @@ void CmdFillMask_write(CmdFillMaskRef ref, CmdFillMask s) { ptcl[ix + 2] = floatBitsToUint(s.mask); } +CmdBeginClip CmdBeginClip_read(CmdBeginClipRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = ptcl[ix + 0]; + uint raw1 = ptcl[ix + 1]; + CmdBeginClip s; + s.tile_ref = raw0; + s.backdrop = int(raw1); + return s; +} + +void CmdBeginClip_write(CmdBeginClipRef ref, CmdBeginClip s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = s.tile_ref; + ptcl[ix + 1] = uint(s.backdrop); +} + +CmdBeginSolidClip CmdBeginSolidClip_read(CmdBeginSolidClipRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = ptcl[ix + 0]; + CmdBeginSolidClip s; + s.alpha = uintBitsToFloat(raw0); + return s; +} + +void CmdBeginSolidClip_write(CmdBeginSolidClipRef ref, CmdBeginSolidClip s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = floatBitsToUint(s.alpha); +} + +CmdEndClip CmdEndClip_read(CmdEndClipRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = ptcl[ix + 0]; + CmdEndClip s; + s.alpha = uintBitsToFloat(raw0); + return s; +} + +void CmdEndClip_write(CmdEndClipRef ref, CmdEndClip s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = floatBitsToUint(s.alpha); +} + CmdSolid CmdSolid_read(CmdSolidRef ref) { uint ix = ref.offset >> 2; uint raw0 = ptcl[ix + 0]; @@ -334,6 +422,18 @@ CmdFillMask Cmd_FillMaskInv_read(CmdRef ref) { return CmdFillMask_read(CmdFillMaskRef(ref.offset + 4)); } +CmdBeginClip Cmd_BeginClip_read(CmdRef ref) { + return CmdBeginClip_read(CmdBeginClipRef(ref.offset + 4)); +} + +CmdBeginSolidClip Cmd_BeginSolidClip_read(CmdRef ref) { + return CmdBeginSolidClip_read(CmdBeginSolidClipRef(ref.offset + 4)); +} + +CmdEndClip Cmd_EndClip_read(CmdRef ref) { + return CmdEndClip_read(CmdEndClipRef(ref.offset + 4)); +} + CmdStroke Cmd_Stroke_read(CmdRef ref) { return CmdStroke_read(CmdStrokeRef(ref.offset + 4)); } @@ -379,6 +479,21 @@ void Cmd_FillMaskInv_write(CmdRef ref, CmdFillMask s) { CmdFillMask_write(CmdFillMaskRef(ref.offset + 4), s); } +void Cmd_BeginClip_write(CmdRef ref, CmdBeginClip s) { + ptcl[ref.offset >> 2] = Cmd_BeginClip; + CmdBeginClip_write(CmdBeginClipRef(ref.offset + 4), s); +} + +void Cmd_BeginSolidClip_write(CmdRef ref, CmdBeginSolidClip s) { + ptcl[ref.offset >> 2] = Cmd_BeginSolidClip; + CmdBeginSolidClip_write(CmdBeginSolidClipRef(ref.offset + 4), s); +} + +void Cmd_EndClip_write(CmdRef ref, CmdEndClip s) { + ptcl[ref.offset >> 2] = Cmd_EndClip; + CmdEndClip_write(CmdEndClipRef(ref.offset + 4), s); +} + void Cmd_Stroke_write(CmdRef ref, CmdStroke s) { ptcl[ref.offset >> 2] = Cmd_Stroke; CmdStroke_write(CmdStrokeRef(ref.offset + 4), s); diff --git a/piet-gpu/shader/scene.h b/piet-gpu/shader/scene.h index 6823fe6..6558ad3 100644 --- a/piet-gpu/shader/scene.h +++ b/piet-gpu/shader/scene.h @@ -32,6 +32,10 @@ struct TransformRef { uint offset; }; +struct ClipRef { + uint offset; +}; + struct ElementRef { uint offset; }; @@ -123,6 +127,16 @@ TransformRef Transform_index(TransformRef ref, uint index) { return TransformRef(ref.offset + index * Transform_size); } +struct Clip { + vec4 bbox; +}; + +#define Clip_size 16 + +ClipRef Clip_index(ClipRef ref, uint index) { + return ClipRef(ref.offset + index * Clip_size); +} + #define Element_Nop 0 #define Element_StrokeLine 1 #define Element_FillLine 2 @@ -136,6 +150,8 @@ TransformRef Transform_index(TransformRef ref, uint index) { #define Element_Transform 10 #define Element_FillMask 11 #define Element_FillMaskInv 12 +#define Element_BeginClip 13 +#define Element_EndClip 14 #define Element_size 36 ElementRef Element_index(ElementRef ref, uint index) { @@ -233,6 +249,17 @@ Transform Transform_read(TransformRef ref) { return s; } +Clip Clip_read(ClipRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + uint raw1 = scene[ix + 1]; + uint raw2 = scene[ix + 2]; + uint raw3 = scene[ix + 3]; + Clip s; + s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + return s; +} + uint Element_tag(ElementRef ref) { return scene[ref.offset >> 2]; } @@ -285,3 +312,11 @@ FillMask Element_FillMaskInv_read(ElementRef ref) { return FillMask_read(FillMaskRef(ref.offset + 4)); } +Clip Element_BeginClip_read(ElementRef ref) { + return Clip_read(ClipRef(ref.offset + 4)); +} + +Clip Element_EndClip_read(ElementRef ref) { + return Clip_read(ClipRef(ref.offset + 4)); +} + diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index 3e1e52f..de6e827 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -50,8 +50,10 @@ void main() { case Annotated_FillMask: case Annotated_FillMaskInv: case Annotated_Stroke: - // Note: we take advantage of the fact that fills and strokes - // have compatible layout. + case Annotated_BeginClip: + case Annotated_EndClip: + // Note: we take advantage of the fact that fills, strokes, and + // clips have compatible layout. AnnoFill fill = Annotated_Fill_read(ref); x0 = int(floor(fill.bbox.x * SX)); y0 = int(floor(fill.bbox.y * SY)); @@ -67,6 +69,11 @@ void main() { Path path; path.bbox = uvec4(x0, y0, x1, y1); uint tile_count = (x1 - x0) * (y1 - y0); + if (tag == Annotated_EndClip) { + // Don't actually allocate tiles for an end clip, but we do want + // the path structure (especially bbox) allocated for it. + tile_count = 0; + } sh_tile_count[th_ix] = tile_count; // Prefix sum of sh_tile_count diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index af52665..93cdea6 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index dc3c2c8..6eff190 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -7,7 +7,7 @@ pub use render_ctx::PietGpuRenderContext; use rand::{Rng, RngCore}; -use piet::kurbo::{BezPath, Circle, Line, Point, Vec2}; +use piet::kurbo::{BezPath, Circle, Point, Vec2}; use piet::{Color, ImageFormat, RenderContext}; use piet_gpu_types::encoder::Encode; @@ -52,16 +52,23 @@ pub fn render_scene(rc: &mut impl RenderContext) { let circle = Circle::new(center, radius); rc.fill(circle, &color); } - /* + let _ = rc.save(); let mut path = BezPath::new(); - path.move_to((100.0, 1150.0)); - path.line_to((200.0, 1200.0)); - path.line_to((150.0, 1250.0)); + path.move_to((200.0, 150.0)); + path.line_to((100.0, 200.0)); + path.line_to((150.0, 250.0)); + path.close_path(); + rc.clip(path); + + let mut path = BezPath::new(); + path.move_to((100.0, 150.0)); + path.line_to((200.0, 200.0)); + path.line_to((150.0, 250.0)); path.close_path(); rc.fill(path, &Color::rgb8(128, 0, 128)); - */ + let _ = rc.restore(); rc.stroke( - Line::new((100.0, 100.0), (200.0, 150.0)), + piet::kurbo::Line::new((100.0, 100.0), (200.0, 150.0)), &Color::WHITE, 5.0, ); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 9593704..73d1f27 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -2,11 +2,12 @@ use std::{borrow::Cow, ops::RangeBounds}; use piet_gpu_types::encoder::{Encode, Encoder}; -use piet_gpu_types::scene::{CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke}; +use piet_gpu_types::scene::{ + Clip, CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform, +}; use piet::{ - kurbo::Size, - kurbo::{Affine, PathEl, Point, Rect, Shape}, + kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, HitTestPosition, TextAttribute, TextStorage, }; @@ -33,8 +34,14 @@ pub struct PietGpuRenderContext { stroke_width: f32, // We're tallying these cpu-side for expedience, but will probably // move this to some kind of readback from element processing. + /// The count of elements that make it through to coarse rasterization. path_count: usize, + /// The count of path segment elements. pathseg_count: usize, + + cur_transform: Affine, + state_stack: Vec, + clip_stack: Vec, } #[derive(Clone)] @@ -43,6 +50,23 @@ pub enum PietGpuBrush { Gradient, } +#[derive(Default)] +struct State { + /// The transform relative to the parent state. + rel_transform: Affine, + /// The transform at the parent state. + /// + /// This invariant should hold: transform * rel_transform = cur_transform + transform: Affine, + n_clip: usize, +} + +struct ClipElement { + /// Index of BeginClip element in element vec, for bbox fixup. + begin_ix: usize, + bbox: Option, +} + const TOLERANCE: f64 = 0.25; impl PietGpuRenderContext { @@ -58,6 +82,9 @@ impl PietGpuRenderContext { stroke_width, path_count: 0, pathseg_count: 0, + cur_transform: Affine::default(), + state_stack: Vec::new(), + clip_stack: Vec::new(), } } @@ -96,17 +123,19 @@ impl RenderContext for PietGpuRenderContext { fn clear(&mut self, _color: Color) {} fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush, width: f64) { - let width = width as f32; - if self.stroke_width != width { + let width_f32 = width as f32; + if self.stroke_width != width_f32 { self.elements - .push(Element::SetLineWidth(SetLineWidth { width })); - self.stroke_width = width; + .push(Element::SetLineWidth(SetLineWidth { width: width_f32 })); + self.stroke_width = width_f32; } let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); - let path = shape.path_elements(TOLERANCE); - self.encode_path(path, false); match brush { PietGpuBrush::Solid(rgba_color) => { + // Note: the bbox contribution of stroke becomes more complicated with miter joins. + self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5)); + let path = shape.path_elements(TOLERANCE); + self.encode_path(path, false); let stroke = Stroke { rgba_color }; self.elements.push(Element::Stroke(stroke)); self.path_count += 1; @@ -126,21 +155,36 @@ impl RenderContext for PietGpuRenderContext { fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush) { let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); - let path = shape.path_elements(TOLERANCE); - self.encode_path(path, true); - match brush { - PietGpuBrush::Solid(rgba_color) => { - let fill = Fill { rgba_color }; - self.elements.push(Element::Fill(fill)); - self.path_count += 1; - } - _ => (), + if let PietGpuBrush::Solid(rgba_color) = brush { + // Note: we might get a good speedup from using an approximate bounding box. + // Perhaps that should be added to kurbo. + self.accumulate_bbox(|| shape.bounding_box()); + let path = shape.path_elements(TOLERANCE); + self.encode_path(path, true); + let fill = Fill { rgba_color }; + self.elements.push(Element::Fill(fill)); + self.path_count += 1; } } fn fill_even_odd(&mut self, _shape: impl Shape, _brush: &impl IntoBrush) {} - fn clip(&mut self, _shape: impl Shape) {} + fn clip(&mut self, shape: impl Shape) { + let path = shape.path_elements(TOLERANCE); + self.encode_path(path, true); + let begin_ix = self.elements.len(); + self.elements.push(Element::BeginClip(Clip { + bbox: Default::default(), + })); + self.clip_stack.push(ClipElement { + bbox: None, + begin_ix, + }); + self.path_count += 1; + if let Some(tos) = self.state_stack.last_mut() { + tos.n_clip += 1; + } + } fn text(&mut self) -> &mut Self::Text { &mut self.inner_text @@ -149,15 +193,46 @@ impl RenderContext for PietGpuRenderContext { fn draw_text(&mut self, _layout: &Self::TextLayout, _pos: impl Into) {} fn save(&mut self) -> Result<(), Error> { + self.state_stack.push(State { + rel_transform: Affine::default(), + transform: self.cur_transform, + n_clip: 0, + }); Ok(()) } + fn restore(&mut self) -> Result<(), Error> { - Ok(()) + if let Some(state) = self.state_stack.pop() { + if state.rel_transform != Affine::default() { + let a_inv = state.rel_transform.inverse(); + self.elements + .push(Element::Transform(to_scene_transform(a_inv))); + } + self.cur_transform = state.transform; + for _ in 0..state.n_clip { + self.pop_clip(); + } + Ok(()) + } else { + Err(Error::StackUnbalance) + } } + fn finish(&mut self) -> Result<(), Error> { + for _ in 0..self.clip_stack.len() { + self.pop_clip(); + } Ok(()) } - fn transform(&mut self, _transform: Affine) {} + + fn transform(&mut self, transform: Affine) { + self.elements + .push(Element::Transform(to_scene_transform(transform))); + if let Some(tos) = self.state_stack.last_mut() { + tos.rel_transform *= transform; + } + self.cur_transform *= transform; + } fn make_image( &mut self, @@ -189,7 +264,13 @@ impl RenderContext for PietGpuRenderContext { fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush) {} fn current_transform(&self) -> Affine { - Default::default() + self.cur_transform + } + + fn with_save(&mut self, f: impl FnOnce(&mut Self) -> Result<(), Error>) -> Result<(), Error> { + self.save()?; + // Always try to restore the stack, even if `f` errored. + f(self).and(self.restore()) } } @@ -316,6 +397,48 @@ impl PietGpuRenderContext { } } } + + fn pop_clip(&mut self) { + let tos = self.clip_stack.pop().unwrap(); + let bbox = tos.bbox.unwrap_or_default(); + let bbox_f32_4 = rect_to_f32_4(bbox); + self.elements + .push(Element::EndClip(Clip { bbox: bbox_f32_4 })); + self.path_count += 1; + if let Element::BeginClip(begin_clip) = &mut self.elements[tos.begin_ix] { + begin_clip.bbox = bbox_f32_4; + } else { + unreachable!("expected BeginClip, not found"); + } + if let Some(bbox) = tos.bbox { + self.union_bbox(bbox); + } + } + + /// Accumulate a bbox. + /// + /// The bbox is given lazily as a closure, relative to the current transform. + /// It's lazy because we don't need to compute it unless we're inside a clip. + fn accumulate_bbox(&mut self, f: impl FnOnce() -> Rect) { + if !self.clip_stack.is_empty() { + let bbox = f(); + let bbox = self.cur_transform.transform_rect_bbox(bbox); + self.union_bbox(bbox); + } + } + + /// Accumulate an absolute bbox. + /// + /// The bbox is given already transformed into surface coordinates. + fn union_bbox(&mut self, bbox: Rect) { + if let Some(tos) = self.clip_stack.last_mut() { + tos.bbox = if let Some(old_bbox) = tos.bbox { + Some(old_bbox.union(bbox)) + } else { + Some(bbox) + }; + } + } } impl Text for PietGpuText { @@ -410,3 +533,20 @@ impl IntoBrush for PietGpuBrush { fn to_f32_2(point: Point) -> [f32; 2] { [point.x as f32, point.y as f32] } + +fn rect_to_f32_4(rect: Rect) -> [f32; 4] { + [ + rect.x0 as f32, + rect.y0 as f32, + rect.x1 as f32, + rect.y1 as f32, + ] +} + +fn to_scene_transform(transform: Affine) -> Transform { + let c = transform.as_coeffs(); + Transform { + mat: [c[0] as f32, c[1] as f32, c[2] as f32, c[3] as f32], + translate: [c[4] as f32, c[5] as f32], + } +}