diff --git a/Cargo.lock b/Cargo.lock index 5f9b877..af141b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,6 +139,7 @@ dependencies = [ "piet-gpu-types", "png", "rand", + "roxmltree", ] [[package]] @@ -243,6 +244,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "roxmltree" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5001f134077069d87f77c8b9452b690df2445f7a43f1c7ca4a1af8dd505789d" +dependencies = [ + "xmlparser", +] + [[package]] name = "syn" version = "1.0.17" @@ -287,3 +297,9 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xmlparser" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccb4240203dadf40be2de9369e5c6dec1bf427528115b030baca3334c18362d7" diff --git a/piet-gpu-types/src/fill_seg.rs b/piet-gpu-types/src/fill_seg.rs new file mode 100644 index 0000000..2242a84 --- /dev/null +++ b/piet-gpu-types/src/fill_seg.rs @@ -0,0 +1,37 @@ +use piet_gpu_derive::piet_gpu; + +// Structures representing segments for fill items. + +// There is some cut'n'paste here from stroke segments, which can be +// traced to the fact that buffers in GLSL are basically global. +// Maybe there's a way to address that, but in the meantime living +// with the duplication is easiest. + +piet_gpu! { + #[gpu_write] + mod fill_seg { + struct FillTileHeader { + n: u32, + items: Ref, + } + + struct FillItemHeader { + backdrop: i32, + segments: Ref, + } + + // TODO: strongly consider using f16. If so, these would be + // relative to the tile. We're doing f32 for now to minimize + // divergence from piet-metal originals. + struct FillSegment { + start: [f32; 2], + end: [f32; 2], + } + + struct FillSegChunk { + n: u32, + next: Ref, + // Segments follow (could represent this as a variable sized array). + } + } +} diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index db9516f..d85df70 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -1,4 +1,5 @@ pub mod encoder; +pub mod fill_seg; pub mod ptcl; pub mod scene; pub mod segment; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index 834f1b6..c0b9d7e 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -7,6 +7,7 @@ fn main() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()), + "fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), _ => println!("Oops, unknown module name"), diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index 36274c4..911f2c8 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -19,8 +19,10 @@ piet_gpu! { rgba_color: u32, } struct CmdFill { - start: [f32; 2], - end: [f32; 2], + // Should be Ref if we had cross-module references. + seg_ref: u32, + backdrop: i32, + rgba_color: u32, } struct CmdFillEdge { // The sign is only one bit. diff --git a/piet-gpu/Cargo.toml b/piet-gpu/Cargo.toml index b082868..2555e62 100644 --- a/piet-gpu/Cargo.toml +++ b/piet-gpu/Cargo.toml @@ -17,3 +17,4 @@ kurbo = "0.5.11" piet = "0.0.12" png = "0.16.2" rand = "0.7.3" +roxmltree = "0.11" diff --git a/piet-gpu/Ghostscript_Tiger.svg b/piet-gpu/Ghostscript_Tiger.svg new file mode 100644 index 0000000..033611d --- /dev/null +++ b/piet-gpu/Ghostscript_Tiger.svg @@ -0,0 +1,142 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 4a8181f..0aaecae 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -13,6 +13,8 @@ build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h -build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h ptcl.h setup.h +build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h -build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h setup.h +build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h + +build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h diff --git a/piet-gpu/shader/fill_seg.h b/piet-gpu/shader/fill_seg.h new file mode 100644 index 0000000..abe199f --- /dev/null +++ b/piet-gpu/shader/fill_seg.h @@ -0,0 +1,130 @@ +// Code auto-generated by piet-gpu-derive + +struct FillTileHeaderRef { + uint offset; +}; + +struct FillItemHeaderRef { + uint offset; +}; + +struct FillSegmentRef { + uint offset; +}; + +struct FillSegChunkRef { + uint offset; +}; + +struct FillTileHeader { + uint n; + FillItemHeaderRef items; +}; + +#define FillTileHeader_size 8 + +FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) { + return FillTileHeaderRef(ref.offset + index * FillTileHeader_size); +} + +struct FillItemHeader { + int backdrop; + FillSegChunkRef segments; +}; + +#define FillItemHeader_size 8 + +FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) { + return FillItemHeaderRef(ref.offset + index * FillItemHeader_size); +} + +struct FillSegment { + vec2 start; + vec2 end; +}; + +#define FillSegment_size 16 + +FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) { + return FillSegmentRef(ref.offset + index * FillSegment_size); +} + +struct FillSegChunk { + uint n; + FillSegChunkRef next; +}; + +#define FillSegChunk_size 8 + +FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) { + return FillSegChunkRef(ref.offset + index * FillSegChunk_size); +} + +FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = fill_seg[ix + 0]; + uint raw1 = fill_seg[ix + 1]; + FillTileHeader s; + s.n = raw0; + s.items = FillItemHeaderRef(raw1); + return s; +} + +void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) { + uint ix = ref.offset >> 2; + fill_seg[ix + 0] = s.n; + fill_seg[ix + 1] = s.items.offset; +} + +FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = fill_seg[ix + 0]; + uint raw1 = fill_seg[ix + 1]; + FillItemHeader s; + s.backdrop = int(raw0); + s.segments = FillSegChunkRef(raw1); + return s; +} + +void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) { + uint ix = ref.offset >> 2; + fill_seg[ix + 0] = uint(s.backdrop); + fill_seg[ix + 1] = s.segments.offset; +} + +FillSegment FillSegment_read(FillSegmentRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = fill_seg[ix + 0]; + uint raw1 = fill_seg[ix + 1]; + uint raw2 = fill_seg[ix + 2]; + uint raw3 = fill_seg[ix + 3]; + FillSegment s; + s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); + s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + return s; +} + +void FillSegment_write(FillSegmentRef ref, FillSegment s) { + uint ix = ref.offset >> 2; + fill_seg[ix + 0] = floatBitsToUint(s.start.x); + fill_seg[ix + 1] = floatBitsToUint(s.start.y); + fill_seg[ix + 2] = floatBitsToUint(s.end.x); + fill_seg[ix + 3] = floatBitsToUint(s.end.y); +} + +FillSegChunk FillSegChunk_read(FillSegChunkRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = fill_seg[ix + 0]; + uint raw1 = fill_seg[ix + 1]; + FillSegChunk s; + s.n = raw0; + s.next = FillSegChunkRef(raw1); + return s; +} + +void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) { + uint ix = ref.offset >> 2; + fill_seg[ix + 0] = s.n; + fill_seg[ix + 1] = s.next.offset; +} + diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp index ce99005..6b76c53 100644 --- a/piet-gpu/shader/kernel1.comp +++ b/piet-gpu/shader/kernel1.comp @@ -52,10 +52,19 @@ void main() { TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START); ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4); InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size); - uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_ALLOC - Instance_size; + uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size; uint stroke_chunk_n = 0; uint stroke_n = 0; + // State for fill references. All this is a bit cut'n'paste, but making a + // proper abstraction isn't easy. + TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START); + ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4); + InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size); + uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size; + uint fill_chunk_n = 0; + uint fill_n = 0; + vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); PietItemRef root = PietItemRef(0); SimpleGroup group = PietItem_Group_read(root); @@ -100,6 +109,20 @@ void main() { Instance_write(stroke_ref, ins); stroke_chunk_n++; stroke_ref.offset += Instance_size; + } else if (tag == PietItem_Fill) { + if (fill_ref.offset > fill_limit) { + uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC); + Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill))); + fill_chunk_start = ChunkRef(new_fill); + fill_ref = InstanceRef(new_fill + Chunk_size); + fill_n += fill_chunk_n; + fill_chunk_n = 0; + fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size; + } + Instance_write(fill_ref, ins); + fill_chunk_n++; + fill_ref.offset += Instance_size; + } } if (is_group) { @@ -129,4 +152,10 @@ void main() { Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0))); } tilegroup[stroke_start.offset >> 2] = stroke_n; + + fill_n += fill_chunk_n; + if (fill_n > 0) { + Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0))); + } + tilegroup[fill_start.offset >> 2] = fill_n; } diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv index 8430d74..358151d 100644 Binary files a/piet-gpu/shader/kernel1.spv and b/piet-gpu/shader/kernel1.spv differ diff --git a/piet-gpu/shader/kernel2f.comp b/piet-gpu/shader/kernel2f.comp new file mode 100644 index 0000000..14a6ead --- /dev/null +++ b/piet-gpu/shader/kernel2f.comp @@ -0,0 +1,165 @@ +// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill +// (polyline) items in the scene and generates a list of segments for each, for +// each tile. + +#version 450 +#extension GL_GOOGLE_include_directive : enable + +layout(local_size_x = 32) in; + +layout(set = 0, binding = 0) readonly buffer SceneBuf { + uint[] scene; +}; + +layout(set = 0, binding = 1) buffer TilegroupBuf { + uint[] tilegroup; +}; + +layout(set = 0, binding = 2) buffer FillSegBuf { + uint[] fill_seg; +}; + +layout(set = 0, binding = 3) buffer AllocBuf { + uint alloc; +}; + +#include "scene.h" +#include "tilegroup.h" +#include "fill_seg.h" + +#include "setup.h" + +// Ensure that there is space to encode a segment. +void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref, + inout FillSegChunkRef first_seg_chunk, inout uint seg_limit) +{ + if (chunk_n_segs == 0) { + if (seg_chunk_ref.offset + 40 > seg_limit) { + seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC); + seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size; + } + first_seg_chunk = seg_chunk_ref; + } else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) { + uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC); + seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size; + FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref))); + seg_chunk_ref.offset = new_chunk_ref; + chunk_n_segs = 0; + } + +} + +void main() { + uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; + uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); + vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); + TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START); + uint fill_n = tilegroup[fill_start.offset >> 2]; + + FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size); + if (fill_n > 0) { + ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4); + Chunk chunk = Chunk_read(chunk_ref); + InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size); + FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size)); + FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header)); + FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0); + uint seg_limit = 0; + // Iterate through items; fill_n holds count remaining. + while (true) { + if (chunk.chunk_n == 0) { + chunk_ref = chunk.next; + if (chunk_ref.offset == 0) { + break; + } + chunk = Chunk_read(chunk_ref); + fill_ref = InstanceRef(chunk_ref.offset + Chunk_size); + } + Instance ins = Instance_read(fill_ref); + PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref)); + + // Process the fill polyline item. + uint max_n_segs = fill.n_points - 1; + uint chunk_n_segs = 0; + int backdrop = 0; + FillSegChunkRef seg_chunk_ref; + FillSegChunkRef first_seg_chunk = FillSegChunkRef(0); + vec2 start = Point_read(fill.points).xy; + for (uint j = 0; j < max_n_segs; j++) { + fill.points.offset += Point_size; + vec2 end = Point_read(fill.points).xy; + + // Process one segment. + + // TODO: I think this would go more smoothly (and be easier to + // make numerically robust) if it were based on clipping the line + // to the tile box. See: + // https://tavianator.com/fast-branchless-raybounding-box-intersections/ + vec2 xymin = min(start, end); + vec2 xymax = max(start, end); + float a = end.y - start.y; + float b = start.x - end.x; + float c = -(a * start.x + b * start.y); + vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); + float ytop = max(xy0.y, xymin.y); + float ybot = min(xy1.y, xymax.y); + float s00 = sign(b * ytop + a * xy0.x + c); + float s01 = sign(b * ytop + a * xy1.x + c); + float s10 = sign(b * ybot + a * xy0.x + c); + float s11 = sign(b * ybot + a * xy1.x + c); + float sTopLeft = sign(b * xy0.y + a * xy0.x + c); + if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) { + backdrop -= int(s00); + } + + // This is adapted from piet-metal but could be improved. + + if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x) + && ytop < ybot + && s00 * s01 + s00 * s10 + s00 * s11 < 3.0) + { + if (xymin.x < xy0.x) { + float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b); + if (yEdge >= xy0.y && yEdge < xy1.y) { + // This is encoded the same as a general fill segment, but could be + // special-cased, either here or in rendering. (It was special-cased + // in piet-metal). + FillSegment edge_seg; + if (b > 0.0) { + end = vec2(xy0.x, yEdge); + edge_seg.start = end; + edge_seg.end = vec2(xy0.x, xy1.y); + } else { + start = vec2(xy0.x, yEdge); + edge_seg.start = vec2(xy0.x, xy1.y); + edge_seg.end = start; + } + alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit); + FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg); + chunk_n_segs++; + } + } + alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit); + FillSegment seg = FillSegment(start, end); + FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg); + chunk_n_segs++; + } + + start = end; + } + FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk)); + if (chunk_n_segs != 0) { + FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0))); + seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs; + } + + fill_ref.offset += Instance_size; + chunk.chunk_n--; + item_header.offset += FillItemHeader_size; + } + } else { + // As an optimization, we could just write 0 for the size. + FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0))); + } +} diff --git a/piet-gpu/shader/kernel2f.spv b/piet-gpu/shader/kernel2f.spv new file mode 100644 index 0000000..960741e Binary files /dev/null and b/piet-gpu/shader/kernel2f.spv differ diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp index 81c24d1..5b0e85a 100644 --- a/piet-gpu/shader/kernel3.comp +++ b/piet-gpu/shader/kernel3.comp @@ -21,17 +21,23 @@ layout(set = 0, binding = 2) buffer SegmentBuf { uint[] segment; }; -layout(set = 0, binding = 3) buffer PtclBuf { +// Used readonly +layout(set = 0, binding = 3) buffer FillSegmentBuf { + uint[] fill_seg; +}; + +layout(set = 0, binding = 4) buffer PtclBuf { uint[] ptcl; }; -layout(set = 0, binding = 4) buffer AllocBuf { +layout(set = 0, binding = 5) buffer AllocBuf { uint alloc; }; #include "scene.h" #include "tilegroup.h" #include "segment.h" +#include "fill_seg.h" #include "ptcl.h" #include "setup.h" @@ -56,6 +62,7 @@ void main() { uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size)); + FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size)); while (true) { uint tg_tag = TileGroup_tag(tg_ref); @@ -99,6 +106,22 @@ void main() { cmd_ref.offset += Cmd_size; } break; + case PietItem_Fill: + FillItemHeader fill_item = FillItemHeader_read(fill_th.items); + fill_th.items.offset += FillItemHeader_size; + // TODO: handle segments == 0 but backdrop != specially, it's a solid tile. + if (fill_item.segments.offset != 0 || fill_item.backdrop != 0) { + PietFill fill = PietItem_Fill_read(item_ref); + CmdFill cmd = CmdFill( + fill_item.segments.offset, + fill_item.backdrop, + fill.rgba_color + ); + alloc_cmd(cmd_ref, cmd_limit); + Cmd_Fill_write(cmd_ref, cmd); + cmd_ref.offset += Cmd_size; + } + break; } tg_ref.offset += TileGroup_size; } diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv index 3b8aa91..d20cc57 100644 Binary files a/piet-gpu/shader/kernel3.spv and b/piet-gpu/shader/kernel3.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index e30372a..d369f0e 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -19,12 +19,18 @@ layout(set = 0, binding = 1) buffer SegmentBuf { uint[] segment; }; -layout(set = 0, binding = 2) buffer ImageBuf { +// Used readonly +layout(set = 0, binding = 2) buffer FillSegBuf { + uint[] fill_seg; +}; + +layout(set = 0, binding = 3) buffer ImageBuf { uint[] image; }; #include "ptcl.h" #include "segment.h" +#include "fill_seg.h" #include "setup.h" @@ -70,6 +76,36 @@ void main() { alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0); rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); break; + case Cmd_Fill: + CmdFill fill = Cmd_Fill_read(cmd_ref); + // Probably better to store as float, but conversion is no doubt cheap. + float area = float(fill.backdrop); + FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref); + do { + FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref); + for (int i = 0; i < seg_chunk.n; i++) { + FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i)); + vec2 start = seg.start - xy; + vec2 end = seg.end - xy; + vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0); + if (window.x != window.y) { + vec2 t = (window - start.y) / (end.y - start.y); + vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y)); + float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6; + float xmax = max(xs.x, xs.y); + float b = min(xmax, 1.0); + float c = max(b, 0.0); + float d = max(xmin, 0.0); + float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin); + area += a * (window.x - window.y); + } + } + fill_seg_chunk_ref = seg_chunk.next; + } while (fill_seg_chunk_ref.offset != 0); + fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx; + alpha = min(abs(area), 1.0); + rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); + break; case Cmd_Jump: cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref); continue; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 99067bb..aab4107 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index e52f811..133b47a 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -72,11 +72,12 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) { } struct CmdFill { - vec2 start; - vec2 end; + uint seg_ref; + int backdrop; + uint rgba_color; }; -#define CmdFill_size 16 +#define CmdFill_size 12 CmdFillRef CmdFill_index(CmdFillRef ref, uint index) { return CmdFillRef(ref.offset + index * CmdFill_size); @@ -205,19 +206,18 @@ CmdFill CmdFill_read(CmdFillRef ref) { uint raw0 = ptcl[ix + 0]; uint raw1 = ptcl[ix + 1]; uint raw2 = ptcl[ix + 2]; - uint raw3 = ptcl[ix + 3]; CmdFill s; - s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); - s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.seg_ref = raw0; + s.backdrop = int(raw1); + s.rgba_color = raw2; return s; } void CmdFill_write(CmdFillRef ref, CmdFill s) { uint ix = ref.offset >> 2; - ptcl[ix + 0] = floatBitsToUint(s.start.x); - ptcl[ix + 1] = floatBitsToUint(s.start.y); - ptcl[ix + 2] = floatBitsToUint(s.end.x); - ptcl[ix + 3] = floatBitsToUint(s.end.y); + ptcl[ix + 0] = s.seg_ref; + ptcl[ix + 1] = uint(s.backdrop); + ptcl[ix + 2] = s.rgba_color; } CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) { diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 2bebabe..3d9cd53 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -19,10 +19,14 @@ // there is a region of size TILEGROUP_STRIDE for each tilegroup. // At offset 0 are the main instances, encoded with Jump. At offset // TILEGROUP_STROKE_START are the stroke instances, encoded with -// Head and Link. +// Head and Link. Similarly for fill. #define TILEGROUP_STRIDE 2048 #define TILEGROUP_STROKE_START 1024 +#define TILEGROUP_FILL_START 1536 #define TILEGROUP_STROKE_ALLOC 1024 +#define TILEGROUP_FILL_ALLOC 1024 +#define TILEGROUP_INITIAL_STROKE_ALLOC 512 +#define TILEGROUP_INITIAL_FILL_ALLOC 512 // TODO: compute all these diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 4416487..c40b4d5 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -10,9 +10,11 @@ use piet::{Color, RenderContext}; use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::{CmdBuf, Device, MemFlags}; +mod pico_svg; mod render_ctx; use render_ctx::PietGpuRenderContext; +use pico_svg::PicoSvg; const WIDTH: usize = 2048; const HEIGHT: usize = 1536; @@ -44,14 +46,22 @@ fn render_scene(rc: &mut impl RenderContext) { let circle = Circle::new(center, radius); rc.fill(circle, &color); } + let mut path = BezPath::new(); + path.move_to((100.0, 1150.0)); + path.line_to((200.0, 1200.0)); + path.line_to((150.0, 1250.0)); + path.close_path(); + rc.fill(path, &Color::rgb8(128, 0, 128)); rc.stroke( Line::new((100.0, 100.0), (200.0, 150.0)), &Color::WHITE, 5.0, ); - render_cardioid(rc); + //render_cardioid(rc); + render_tiger(rc); } +#[allow(unused)] fn render_cardioid(rc: &mut impl RenderContext) { let n = 91; let dth = std::f64::consts::PI * 2.0 / (n as f64); @@ -69,6 +79,17 @@ fn render_cardioid(rc: &mut impl RenderContext) { rc.stroke(&path, &Color::BLACK, 2.0); } +fn render_tiger(rc: &mut impl RenderContext) { + let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap(); + let start = std::time::Instant::now(); + let svg = PicoSvg::load(xml_str, 8.0).unwrap(); + println!("parsing time: {:?}", start.elapsed()); + + let start = std::time::Instant::now(); + svg.render(rc); + println!("flattening and encoding time: {:?}", start.elapsed()); +} + #[allow(unused)] fn dump_scene(buf: &[u8]) { for i in 0..(buf.len() / 4) { @@ -107,6 +128,7 @@ fn main() { let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap(); let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); + let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); let image_buf = device .create_buffer((WIDTH * HEIGHT * 4) as u64, host) .unwrap(); @@ -144,6 +166,26 @@ fn main() { ) .unwrap(); + let k2f_alloc_buf_host = device.create_buffer(4, host).unwrap(); + let k2f_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k2f_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE; + device + .write_buffer(&k2f_alloc_buf_host, &[k2f_alloc_start as u32]) + .unwrap(); + let k2f_code = include_bytes!("../shader/kernel2f.spv"); + let k2f_pipeline = device.create_simple_compute_pipeline(k2f_code, 4).unwrap(); + let k2f_ds = device + .create_descriptor_set( + &k2f_pipeline, + &[ + &scene_dev, + &tilegroup_buf, + &fill_seg_buf, + &k2f_alloc_buf_dev, + ], + ) + .unwrap(); + let k3_alloc_buf_host = device.create_buffer(4, host).unwrap(); let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; @@ -151,7 +193,7 @@ fn main() { .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) .unwrap(); let k3_code = include_bytes!("../shader/kernel3.spv"); - let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 5).unwrap(); + let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6).unwrap(); let k3_ds = device .create_descriptor_set( &k3_pipeline, @@ -159,6 +201,7 @@ fn main() { &scene_dev, &tilegroup_buf, &segment_buf, + &fill_seg_buf, &ptcl_buf, &k3_alloc_buf_dev, ], @@ -166,18 +209,26 @@ fn main() { .unwrap(); let k4_code = include_bytes!("../shader/kernel4.spv"); - let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3).unwrap(); + let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 4).unwrap(); let k4_ds = device - .create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &image_dev]) + .create_descriptor_set( + &k4_pipeline, + &[&ptcl_buf, &segment_buf, &fill_seg_buf, &image_dev], + ) .unwrap(); - let query_pool = device.create_query_pool(5).unwrap(); + let query_pool = device.create_query_pool(6).unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.copy_buffer(&scene_buf, &scene_dev); + // Note: we could use one alloc buf and reuse it. But we'll stick with + // multiple ones for clarity. cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev); cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev); + cmd_buf.copy_buffer(&k2f_alloc_buf_host, &k2f_alloc_buf_dev); cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev); + // Note: these clears aren't necessary, and are here to make inspection + // of the buffers cleaner. Can likely be removed. cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.memory_barrier(); @@ -196,20 +247,30 @@ fn main() { ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), ); cmd_buf.write_timestamp(&query_pool, 2); + // Note: this barrier is not necessary (k2f does not depend on + // k2s output), but I'm keeping it here to increase transparency + // of performance. + cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &k2f_pipeline, + &k2f_ds, + ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 2), + ); + cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.memory_barrier(); cmd_buf.dispatch( &k3_pipeline, &k3_ds, - ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), + ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 3), ); - cmd_buf.write_timestamp(&query_pool, 3); + cmd_buf.write_timestamp(&query_pool, 4); cmd_buf.memory_barrier(); cmd_buf.dispatch( &k4_pipeline, &k4_ds, ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), ); - cmd_buf.write_timestamp(&query_pool, 4); + cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.memory_barrier(); cmd_buf.copy_buffer(&image_dev, &image_buf); cmd_buf.finish(); @@ -217,17 +278,21 @@ fn main() { let timestamps = device.reap_query_pool(query_pool).unwrap(); println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); println!( - "Kernel 2 time: {:.3}ms", + "Kernel 2s time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3 ); println!( - "Kernel 3 time: {:.3}ms", + "Kernel 2f time: {:.3}ms", (timestamps[2] - timestamps[1]) * 1e3 ); println!( - "Render time: {:.3}ms", + "Kernel 3 time: {:.3}ms", (timestamps[3] - timestamps[2]) * 1e3 ); + println!( + "Render time: {:.3}ms", + (timestamps[4] - timestamps[3]) * 1e3 + ); /* let mut k1_data: Vec = Default::default(); diff --git a/piet-gpu/src/pico_svg.rs b/piet-gpu/src/pico_svg.rs new file mode 100644 index 0000000..2423dda --- /dev/null +++ b/piet-gpu/src/pico_svg.rs @@ -0,0 +1,80 @@ +//! A loader for a tiny fragment of SVG + +use std::str::FromStr; + +use roxmltree::Document; + +use kurbo::BezPath; + +use piet::{Color, RenderContext}; + +pub struct PicoSvg { + items: Vec, +} + +pub enum Item { + Fill(FillItem), + Stroke(StrokeItem), +} + +pub struct StrokeItem { + width: f64, + color: Color, + path: BezPath, +} + +pub struct FillItem { + color: Color, + path: BezPath, +} + +impl PicoSvg { + pub fn load(xml_string: &str, scale: f64) -> Result> { + let doc = Document::parse(xml_string)?; + let root = doc.root_element(); + let g = root.first_element_child().ok_or("no root element")?; + let mut items = Vec::new(); + for el in g.children() { + if el.is_element() { + let d = el.attribute("d").ok_or("missing 'd' attribute")?; + let bp = BezPath::from_svg(d)?; + let path = kurbo::Affine::scale(scale) * bp; + if let Some(fill_color) = el.attribute("fill") { + let color = parse_color(fill_color); + items.push(Item::Fill(FillItem { color, path: path.clone() })); + } + if let Some(stroke_color) = el.attribute("stroke") { + let width = f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?; + let color = parse_color(stroke_color); + items.push(Item::Stroke(StrokeItem { width, color, path })); + } + } + } + Ok(PicoSvg { items }) + } + + pub fn render(&self, rc: &mut impl RenderContext) { + for item in &self.items { + match item { + Item::Fill(fill_item) => { + rc.fill(&fill_item.path, &fill_item.color); + } + Item::Stroke(stroke_item) => { + rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width); + } + } + } + } +} + +fn parse_color(color: &str) -> Color { + if color.as_bytes()[0] == b'#' { + let mut hex = u32::from_str_radix(&color[1..], 16).unwrap(); + if color.len() == 4 { + hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11; + } + Color::from_rgba32_u32((hex << 8) + 0xff) + } else { + Color::from_rgba32_u32(0xff00ff80) + } +} diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index f5b6897..a0560da 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use piet_gpu_types::encoder::{Encode, Encoder, Ref}; use piet_gpu_types::scene; -use piet_gpu_types::scene::{Bbox, PietCircle, PietItem, PietStrokePolyLine, SimpleGroup}; +use piet_gpu_types::scene::{Bbox, PietCircle, PietFill, PietItem, PietStrokePolyLine, SimpleGroup}; use piet::kurbo::{Affine, PathEl, Point, Rect, Shape}; @@ -119,6 +119,7 @@ impl RenderContext for PietGpuRenderContext { n_points, points, }; + let bbox = bbox.inset(-0.5 * width); self.push_item(PietItem::Poly(poly_line), bbox); } _ => (), @@ -135,10 +136,11 @@ impl RenderContext for PietGpuRenderContext { } fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush) { + let bbox = shape.bounding_box(); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); - match shape.as_circle() { - Some(circle) => match brush { + if let Some(circle) = shape.as_circle() { + match brush { PietGpuBrush::Solid(rgba_color) => { let piet_circle = PietCircle { rgba_color, @@ -149,8 +151,22 @@ impl RenderContext for PietGpuRenderContext { self.push_item(PietItem::Circle(piet_circle), bbox); } _ => {} - }, - None => {} + } + return; + } + let path = shape.to_bez_path(TOLERANCE); + let (n_points, points) = flatten_shape(&mut self.encoder, path); + match brush { + PietGpuBrush::Solid(rgba_color) => { + let fill = PietFill { + flags: 0, + rgba_color, + n_points, + points, + }; + self.push_item(PietItem::Fill(fill), bbox); + } + _ => (), } }