diff --git a/piet-gpu-types/src/fill_seg.rs b/piet-gpu-types/src/fill_seg.rs deleted file mode 100644 index 2242a84..0000000 --- a/piet-gpu-types/src/fill_seg.rs +++ /dev/null @@ -1,37 +0,0 @@ -use piet_gpu_derive::piet_gpu; - -// Structures representing segments for fill items. - -// There is some cut'n'paste here from stroke segments, which can be -// traced to the fact that buffers in GLSL are basically global. -// Maybe there's a way to address that, but in the meantime living -// with the duplication is easiest. - -piet_gpu! { - #[gpu_write] - mod fill_seg { - struct FillTileHeader { - n: u32, - items: Ref, - } - - struct FillItemHeader { - backdrop: i32, - segments: Ref, - } - - // TODO: strongly consider using f16. If so, these would be - // relative to the tile. We're doing f32 for now to minimize - // divergence from piet-metal originals. - struct FillSegment { - start: [f32; 2], - end: [f32; 2], - } - - struct FillSegChunk { - n: u32, - next: Ref, - // Segments follow (could represent this as a variable sized array). - } - } -} diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index 29ed806..75a7731 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -3,10 +3,8 @@ pub mod annotated; pub mod bins; pub mod encoder; -pub mod fill_seg; pub mod ptcl; pub mod scene; -pub mod segment; pub mod state; pub mod test; pub mod tilegroup; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index 41ae021..9c40051 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -9,8 +9,6 @@ fn main() { "annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()), "bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), - "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()), - "fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), _ => println!("Oops, unknown module name"), diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index 911f2c8..534cf85 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -13,13 +13,13 @@ piet_gpu! { end: [f32; 2], } struct CmdStroke { - // Should be Ref if we had cross-module references. + // Should be Ref seg_ref: u32, half_width: f32, rgba_color: u32, } struct CmdFill { - // Should be Ref if we had cross-module references. + // Should be Ref seg_ref: u32, backdrop: i32, rgba_color: u32, @@ -51,5 +51,19 @@ piet_gpu! { Jump(CmdJump), Bail, } + + // TODO: strongly consider using f16. If so, these would be + // relative to the tile. We're doing f32 for now to minimize + // divergence from piet-metal originals. + struct Segment { + start: [f32; 2], + end: [f32; 2], + } + + struct SegChunk { + n: u32, + next: Ref, + // Segments follow (could represent this as a variable sized array). + } } } diff --git a/piet-gpu-types/src/segment.rs b/piet-gpu-types/src/segment.rs deleted file mode 100644 index 0b18ab8..0000000 --- a/piet-gpu-types/src/segment.rs +++ /dev/null @@ -1,32 +0,0 @@ -use piet_gpu_derive::piet_gpu; - -// Structures representing segments for stroke/fill items. - -piet_gpu! { - #[gpu_write] - mod segment { - struct TileHeader { - n: u32, - items: Ref, - } - - // Note: this is only suitable for strokes, fills require backdrop. - struct ItemHeader { - segments: Ref, - } - - // TODO: strongly consider using f16. If so, these would be - // relative to the tile. We're doing f32 for now to minimize - // divergence from piet-metal originals. - struct Segment { - start: [f32; 2], - end: [f32; 2], - } - - struct SegChunk { - n: u32, - next: Ref, - // Segments follow (could represent this as a variable sized array). - } - } -} diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 6a57917..3b6b963 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -9,19 +9,11 @@ rule glsl build image.spv: glsl image.comp | scene.h -build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h - -build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h - -build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h - -build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h - -build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h - build elements.spv: glsl elements.comp | scene.h state.h annotated.h build binning.spv: glsl binning.comp | annotated.h bins.h setup.h build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h + +build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h diff --git a/piet-gpu/shader/fill_seg.h b/piet-gpu/shader/fill_seg.h deleted file mode 100644 index abe199f..0000000 --- a/piet-gpu/shader/fill_seg.h +++ /dev/null @@ -1,130 +0,0 @@ -// Code auto-generated by piet-gpu-derive - -struct FillTileHeaderRef { - uint offset; -}; - -struct FillItemHeaderRef { - uint offset; -}; - -struct FillSegmentRef { - uint offset; -}; - -struct FillSegChunkRef { - uint offset; -}; - -struct FillTileHeader { - uint n; - FillItemHeaderRef items; -}; - -#define FillTileHeader_size 8 - -FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) { - return FillTileHeaderRef(ref.offset + index * FillTileHeader_size); -} - -struct FillItemHeader { - int backdrop; - FillSegChunkRef segments; -}; - -#define FillItemHeader_size 8 - -FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) { - return FillItemHeaderRef(ref.offset + index * FillItemHeader_size); -} - -struct FillSegment { - vec2 start; - vec2 end; -}; - -#define FillSegment_size 16 - -FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) { - return FillSegmentRef(ref.offset + index * FillSegment_size); -} - -struct FillSegChunk { - uint n; - FillSegChunkRef next; -}; - -#define FillSegChunk_size 8 - -FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) { - return FillSegChunkRef(ref.offset + index * FillSegChunk_size); -} - -FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = fill_seg[ix + 0]; - uint raw1 = fill_seg[ix + 1]; - FillTileHeader s; - s.n = raw0; - s.items = FillItemHeaderRef(raw1); - return s; -} - -void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) { - uint ix = ref.offset >> 2; - fill_seg[ix + 0] = s.n; - fill_seg[ix + 1] = s.items.offset; -} - -FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = fill_seg[ix + 0]; - uint raw1 = fill_seg[ix + 1]; - FillItemHeader s; - s.backdrop = int(raw0); - s.segments = FillSegChunkRef(raw1); - return s; -} - -void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) { - uint ix = ref.offset >> 2; - fill_seg[ix + 0] = uint(s.backdrop); - fill_seg[ix + 1] = s.segments.offset; -} - -FillSegment FillSegment_read(FillSegmentRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = fill_seg[ix + 0]; - uint raw1 = fill_seg[ix + 1]; - uint raw2 = fill_seg[ix + 2]; - uint raw3 = fill_seg[ix + 3]; - FillSegment s; - s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); - s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); - return s; -} - -void FillSegment_write(FillSegmentRef ref, FillSegment s) { - uint ix = ref.offset >> 2; - fill_seg[ix + 0] = floatBitsToUint(s.start.x); - fill_seg[ix + 1] = floatBitsToUint(s.start.y); - fill_seg[ix + 2] = floatBitsToUint(s.end.x); - fill_seg[ix + 3] = floatBitsToUint(s.end.y); -} - -FillSegChunk FillSegChunk_read(FillSegChunkRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = fill_seg[ix + 0]; - uint raw1 = fill_seg[ix + 1]; - FillSegChunk s; - s.n = raw0; - s.next = FillSegChunkRef(raw1); - return s; -} - -void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) { - uint ix = ref.offset >> 2; - fill_seg[ix + 0] = s.n; - fill_seg[ix + 1] = s.next.offset; -} - diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp deleted file mode 100644 index 6b76c53..0000000 --- a/piet-gpu/shader/kernel1.comp +++ /dev/null @@ -1,161 +0,0 @@ -// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph -// and outputs "instances" (references to item + translation) for each item -// that intersects the tilegroup. -// -// This implementation is simplistic and leaves a lot of performance on the -// table. A fancier implementation would use threadgroup shared memory or -// subgroups (or possibly both) to parallelize the reading of the input and -// the computation of tilegroup intersection. -// -// In addition, there are some features currently missing, such as support -// for clipping. - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -// It's possible we should lay this out with x and do our own math. -layout(local_size_x = 1, local_size_y = 32) in; - -layout(set = 0, binding = 0) readonly buffer SceneBuf { - uint[] scene; -}; - -layout(set = 0, binding = 1) buffer TilegroupBuf { - uint[] tilegroup; -}; - -layout(set = 0, binding = 2) buffer AllocBuf { - uint alloc; -}; - -#include "scene.h" -#include "tilegroup.h" - -#include "setup.h" - -#define MAX_STACK 8 - -struct StackElement { - PietItemRef group; - uint index; - vec2 offset; -}; - -void main() { - StackElement stack[MAX_STACK]; - uint stack_ix = 0; - uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; - TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE); - uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; - - // State for stroke references. - TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START); - ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4); - InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size); - uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size; - uint stroke_chunk_n = 0; - uint stroke_n = 0; - - // State for fill references. All this is a bit cut'n'paste, but making a - // proper abstraction isn't easy. - TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START); - ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4); - InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size); - uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size; - uint fill_chunk_n = 0; - uint fill_n = 0; - - vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); - PietItemRef root = PietItemRef(0); - SimpleGroup group = PietItem_Group_read(root); - StackElement tos = StackElement(root, 0, group.offset.xy); - - while (true) { - if (tos.index < group.n_items) { - Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index)); - vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy; - bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX)) - && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX)); - bool is_group = false; - uint tag; - if (hit) { - PietItemRef item_ref = PietItem_index(group.items, tos.index); - tag = PietItem_tag(item_ref); - is_group = tag == PietItem_Group; - } - if (hit && !is_group) { - PietItemRef item_ref = PietItem_index(group.items, tos.index); - Instance ins = Instance(item_ref.offset, tos.offset); - if (tg_ref.offset > tg_limit) { - // Allocation exceeded; do atomic bump alloc. - uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC); - Jump jump = Jump(TileGroupRef(new_tg)); - TileGroup_Jump_write(tg_ref, jump); - tg_ref = TileGroupRef(new_tg); - tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; - } - TileGroup_Instance_write(tg_ref, ins); - tg_ref.offset += TileGroup_size; - if (tag == PietItem_Poly) { - if (stroke_ref.offset > stroke_limit) { - uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC); - Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke))); - stroke_chunk_start = ChunkRef(new_stroke); - stroke_ref = InstanceRef(new_stroke + Chunk_size); - stroke_n += stroke_chunk_n; - stroke_chunk_n = 0; - stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size; - } - Instance_write(stroke_ref, ins); - stroke_chunk_n++; - stroke_ref.offset += Instance_size; - } else if (tag == PietItem_Fill) { - if (fill_ref.offset > fill_limit) { - uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC); - Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill))); - fill_chunk_start = ChunkRef(new_fill); - fill_ref = InstanceRef(new_fill + Chunk_size); - fill_n += fill_chunk_n; - fill_chunk_n = 0; - fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size; - } - Instance_write(fill_ref, ins); - fill_chunk_n++; - fill_ref.offset += Instance_size; - - } - } - if (is_group) { - PietItemRef item_ref = PietItem_index(group.items, tos.index); - tos.index++; - if (tos.index < group.n_items) { - stack[stack_ix++] = tos; - } - group = PietItem_Group_read(item_ref); - tos = StackElement(item_ref, 0, tos.offset + group.offset.xy); - } else { - tos.index++; - } - } else { - // processed all items in this group; pop the stack - if (stack_ix == 0) { - break; - } - tos = stack[--stack_ix]; - group = PietItem_Group_read(tos.group); - } - } - TileGroup_End_write(tg_ref); - - stroke_n += stroke_chunk_n; - if (stroke_n > 0) { - Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0))); - } - tilegroup[stroke_start.offset >> 2] = stroke_n; - - fill_n += fill_chunk_n; - if (fill_n > 0) { - Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0))); - } - tilegroup[fill_start.offset >> 2] = fill_n; -} diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv deleted file mode 100644 index 358151d..0000000 Binary files a/piet-gpu/shader/kernel1.spv and /dev/null differ diff --git a/piet-gpu/shader/kernel2f.comp b/piet-gpu/shader/kernel2f.comp deleted file mode 100644 index 7ea93bd..0000000 --- a/piet-gpu/shader/kernel2f.comp +++ /dev/null @@ -1,167 +0,0 @@ -// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill -// (polyline) items in the scene and generates a list of segments for each, for -// each tile. - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -layout(local_size_x = 32) in; - -layout(set = 0, binding = 0) readonly buffer SceneBuf { - uint[] scene; -}; - -layout(set = 0, binding = 1) buffer TilegroupBuf { - uint[] tilegroup; -}; - -layout(set = 0, binding = 2) buffer FillSegBuf { - uint[] fill_seg; -}; - -layout(set = 0, binding = 3) buffer AllocBuf { - uint alloc; -}; - -#include "scene.h" -#include "tilegroup.h" -#include "fill_seg.h" - -#include "setup.h" - -// Ensure that there is space to encode a segment. -void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref, - inout FillSegChunkRef first_seg_chunk, inout uint seg_limit) -{ - if (chunk_n_segs == 0) { - if (seg_chunk_ref.offset + 40 > seg_limit) { - seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC); - seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size; - } - first_seg_chunk = seg_chunk_ref; - } else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) { - uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC); - seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size; - FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref))); - seg_chunk_ref.offset = new_chunk_ref; - chunk_n_segs = 0; - } - -} - -void main() { - uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; - uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS - + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); - vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); - TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START); - uint fill_n = tilegroup[fill_start.offset >> 2]; - - FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size); - if (fill_n > 0) { - ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4); - Chunk chunk = Chunk_read(chunk_ref); - InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size); - FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size)); - FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header)); - FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0); - uint seg_limit = 0; - // Iterate through items; fill_n holds count remaining. - while (true) { - if (chunk.chunk_n == 0) { - chunk_ref = chunk.next; - if (chunk_ref.offset == 0) { - break; - } - chunk = Chunk_read(chunk_ref); - fill_ref = InstanceRef(chunk_ref.offset + Chunk_size); - } - Instance ins = Instance_read(fill_ref); - PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref)); - - // Process the fill polyline item. - uint max_n_segs = fill.n_points - 1; - uint chunk_n_segs = 0; - int backdrop = 0; - FillSegChunkRef seg_chunk_ref; - FillSegChunkRef first_seg_chunk = FillSegChunkRef(0); - vec2 start = Point_read(fill.points).xy; - for (uint j = 0; j < max_n_segs; j++) { - fill.points.offset += Point_size; - vec2 end = Point_read(fill.points).xy; - - // Process one segment. - - // TODO: I think this would go more smoothly (and be easier to - // make numerically robust) if it were based on clipping the line - // to the tile box. See: - // https://tavianator.com/fast-branchless-raybounding-box-intersections/ - vec2 xymin = min(start, end); - vec2 xymax = max(start, end); - float a = end.y - start.y; - float b = start.x - end.x; - float c = -(a * start.x + b * start.y); - vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); - float ytop = max(xy0.y, xymin.y); - float ybot = min(xy1.y, xymax.y); - float s00 = sign(b * ytop + a * xy0.x + c); - float s01 = sign(b * ytop + a * xy1.x + c); - float s10 = sign(b * ybot + a * xy0.x + c); - float s11 = sign(b * ybot + a * xy1.x + c); - float sTopLeft = sign(b * xy0.y + a * xy0.x + c); - if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) { - backdrop -= int(s00); - } - - // This is adapted from piet-metal but could be improved. - - if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x) - && ytop < ybot - && s00 * s01 + s00 * s10 + s00 * s11 < 3.0) - { - // avoid overwriting `end` so that it can be used as start - vec2 enc_end = end; - if (xymin.x < xy0.x) { - float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b); - if (yEdge >= xy0.y && yEdge < xy1.y) { - // This is encoded the same as a general fill segment, but could be - // special-cased, either here or in rendering. (It was special-cased - // in piet-metal). - FillSegment edge_seg; - if (b > 0.0) { - enc_end = vec2(xy0.x, yEdge); - edge_seg.start = enc_end; - edge_seg.end = vec2(xy0.x, xy1.y); - } else { - start = vec2(xy0.x, yEdge); - edge_seg.start = vec2(xy0.x, xy1.y); - edge_seg.end = start; - } - alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit); - FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg); - chunk_n_segs++; - } - } - alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit); - FillSegment seg = FillSegment(start, enc_end); - FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg); - chunk_n_segs++; - } - - start = end; - } - FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk)); - if (chunk_n_segs != 0) { - FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0))); - seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs; - } - - fill_ref.offset += Instance_size; - chunk.chunk_n--; - item_header.offset += FillItemHeader_size; - } - } else { - // As an optimization, we could just write 0 for the size. - FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0))); - } -} diff --git a/piet-gpu/shader/kernel2f.spv b/piet-gpu/shader/kernel2f.spv deleted file mode 100644 index 75a7a39..0000000 Binary files a/piet-gpu/shader/kernel2f.spv and /dev/null differ diff --git a/piet-gpu/shader/kernel2s.comp b/piet-gpu/shader/kernel2s.comp deleted file mode 100644 index d6b1571..0000000 --- a/piet-gpu/shader/kernel2s.comp +++ /dev/null @@ -1,137 +0,0 @@ -// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke -// (polyline) items in the scene and generates a list of segments for each, for -// each tile. - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -layout(local_size_x = 32) in; - -layout(set = 0, binding = 0) readonly buffer SceneBuf { - uint[] scene; -}; - -layout(set = 0, binding = 1) buffer TilegroupBuf { - uint[] tilegroup; -}; - -layout(set = 0, binding = 2) buffer SegmentBuf { - uint[] segment; -}; - -layout(set = 0, binding = 3) buffer AllocBuf { - uint alloc; -}; - -#include "scene.h" -#include "tilegroup.h" -#include "segment.h" - -#include "setup.h" - -void main() { - uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; - uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS - + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); - vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); - TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START); - uint stroke_n = tilegroup[stroke_start.offset >> 2]; - - TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size); - if (stroke_n > 0) { - ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4); - Chunk chunk = Chunk_read(chunk_ref); - InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); - ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size)); - TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header)); - SegChunkRef seg_chunk_ref = SegChunkRef(0); - uint seg_limit = 0; - // Iterate through items; stroke_n holds count remaining. - while (true) { - if (chunk.chunk_n == 0) { - chunk_ref = chunk.next; - if (chunk_ref.offset == 0) { - break; - } - chunk = Chunk_read(chunk_ref); - stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); - } - Instance ins = Instance_read(stroke_ref); - PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref)); - - // Process the stroke polyline item. - uint max_n_segs = poly.n_points - 1; - uint chunk_n_segs = 0; - SegChunkRef seg_chunk_ref; - vec2 start = Point_read(poly.points).xy; - for (uint j = 0; j < max_n_segs; j++) { - poly.points.offset += Point_size; - vec2 end = Point_read(poly.points).xy; - - // Process one segment. - - // This logic just tests for collision. What we probably want to do - // is a clipping algorithm like Liang-Barsky, and then store coords - // relative to the tile in f16. See also: - // https://tavianator.com/fast-branchless-raybounding-box-intersections/ - - // Also note that when we go to the fancy version, we want to compute - // the (horizontal projection of) the bounding box of the intersection - // once per tilegroup, so we can assign work to individual tiles. - - float a = end.y - start.y; - float b = start.x - end.x; - float c = -(a * start.x + b * start.y); - float half_width = 0.5 * poly.width; - // Tile boundaries padded by half-width. - float xmin = xy0.x - half_width; - float ymin = xy0.y - half_width; - float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width; - float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width; - float s00 = sign(b * ymin + a * xmin + c); - float s01 = sign(b * ymin + a * xmax + c); - float s10 = sign(b * ymax + a * xmin + c); - float s11 = sign(b * ymax + a * xmax + c); - // If bounding boxes intersect and not all four corners are on the same side, hit. - // Also note: this is designed to be false on NAN input. - if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax) - && max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax) - && s00 * s01 + s00 * s10 + s00 * s11 < 3.0) - { - // Allocate a chunk if needed. - if (chunk_n_segs == 0) { - if (seg_chunk_ref.offset + 40 > seg_limit) { - seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC); - seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size; - } - ItemHeader_write(item_header, ItemHeader(seg_chunk_ref)); - } else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) { - uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC); - seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size; - SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref))); - seg_chunk_ref.offset = new_chunk_ref; - chunk_n_segs = 0; - } - Segment seg = Segment(start, end); - Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg); - chunk_n_segs++; - } - - start = end; - } - if (chunk_n_segs == 0) { - ItemHeader_write(item_header, ItemHeader(SegChunkRef(0))); - } else { - SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0))); - seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs; - } - - stroke_ref.offset += Instance_size; - chunk.chunk_n--; - item_header.offset += ItemHeader_size; - } - } else { - // As an optimization, we could just write 0 for the size. - TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0))); - } -} diff --git a/piet-gpu/shader/kernel2s.spv b/piet-gpu/shader/kernel2s.spv deleted file mode 100644 index f7c27f0..0000000 Binary files a/piet-gpu/shader/kernel2s.spv and /dev/null differ diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp deleted file mode 100644 index bd6d809..0000000 --- a/piet-gpu/shader/kernel3.comp +++ /dev/null @@ -1,135 +0,0 @@ -// This is "kernel 3" in a 4-kernel pipeline. It walks the active items -// for the tilegroup and produces a per-tile command list for each tile. - -#version 450 -#extension GL_GOOGLE_include_directive : enable - -layout(local_size_x = 32, local_size_y = 1) in; - -layout(set = 0, binding = 0) readonly buffer SceneBuf { - uint[] scene; -}; - -// TODO: this should have a `readonly` qualifier, but then inclusion -// of ptcl.h would fail because of the writers. -layout(set = 0, binding = 1) buffer TilegroupBuf { - uint[] tilegroup; -}; - -// Used readonly -layout(set = 0, binding = 2) buffer SegmentBuf { - uint[] segment; -}; - -// Used readonly -layout(set = 0, binding = 3) buffer FillSegmentBuf { - uint[] fill_seg; -}; - -layout(set = 0, binding = 4) buffer PtclBuf { - uint[] ptcl; -}; - -layout(set = 0, binding = 5) buffer AllocBuf { - uint alloc; -}; - -#include "scene.h" -#include "tilegroup.h" -#include "segment.h" -#include "fill_seg.h" -#include "ptcl.h" - -#include "setup.h" - -void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) { - if (cmd_ref.offset > cmd_limit) { - uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC); - CmdJump jump = CmdJump(new_cmd); - Cmd_Jump_write(cmd_ref, jump); - cmd_ref = CmdRef(new_cmd); - cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size; - } -} - -void main() { - uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; - uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS - + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); - vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); - TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE); - CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); - uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; - - TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size)); - FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size)); - - while (true) { - uint tg_tag = TileGroup_tag(tg_ref); - if (tg_tag == TileGroup_End) { - break; - } - if (tg_tag == TileGroup_Jump) { - tg_ref = TileGroup_Jump_read(tg_ref).new_ref; - continue; - } - // Assume tg_tag is `Instance`, though there will be more cases. - Instance ins = TileGroup_Instance_read(tg_ref); - PietItemRef item_ref = PietItemRef(ins.item_ref); - uint item_tag = PietItem_tag(item_ref); - switch (item_tag) { - case PietItem_Circle: - PietCircle circle = PietItem_Circle_read(item_ref); - vec2 center = ins.offset + circle.center.xy; - float r = circle.radius; - if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX)) - && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX))) - { - CmdCircle cmd = CmdCircle(center, r, circle.rgba_color); - alloc_cmd(cmd_ref, cmd_limit); - Cmd_Circle_write(cmd_ref, cmd); - cmd_ref.offset += Cmd_size; - } - break; - case PietItem_Poly: - ItemHeader stroke_item = ItemHeader_read(stroke_th.items); - stroke_th.items.offset += ItemHeader_size; - if (stroke_item.segments.offset != 0) { - PietStrokePolyLine poly = PietItem_Poly_read(item_ref); - CmdStroke cmd = CmdStroke( - stroke_item.segments.offset, - 0.5 * poly.width, - poly.rgba_color - ); - alloc_cmd(cmd_ref, cmd_limit); - Cmd_Stroke_write(cmd_ref, cmd); - cmd_ref.offset += Cmd_size; - } - break; - case PietItem_Fill: - FillItemHeader fill_item = FillItemHeader_read(fill_th.items); - fill_th.items.offset += FillItemHeader_size; - // TODO: handle segments == 0 but backdrop != specially, it's a solid tile. - if (fill_item.segments.offset != 0) { - PietFill fill = PietItem_Fill_read(item_ref); - CmdFill cmd = CmdFill( - fill_item.segments.offset, - fill_item.backdrop, - fill.rgba_color - ); - alloc_cmd(cmd_ref, cmd_limit); - Cmd_Fill_write(cmd_ref, cmd); - cmd_ref.offset += Cmd_size; - } else if (fill_item.backdrop != 0) { - // TODO: truncate existing cmd list if alpha is opaque - PietFill fill = PietItem_Fill_read(item_ref); - alloc_cmd(cmd_ref, cmd_limit); - Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color)); - cmd_ref.offset += Cmd_size; - } - break; - } - tg_ref.offset += TileGroup_size; - } - Cmd_End_write(cmd_ref); -} diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv deleted file mode 100644 index c182337..0000000 Binary files a/piet-gpu/shader/kernel3.spv and /dev/null differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 2df43ec..bdc540c 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -9,26 +9,14 @@ layout(local_size_x = 16, local_size_y = 16) in; -// Same concern that this should be readonly as in kernel 3. +// This should be annotated readonly but infra doesn't support that yet. layout(set = 0, binding = 0) buffer PtclBuf { uint[] ptcl; }; -// Used readonly -layout(set = 0, binding = 1) buffer SegmentBuf { - uint[] segment; -}; - -// Used readonly -layout(set = 0, binding = 2) buffer FillSegBuf { - uint[] fill_seg; -}; - -layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image; +layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image; #include "ptcl.h" -#include "segment.h" -#include "fill_seg.h" #include "setup.h" @@ -79,11 +67,11 @@ void main() { CmdFill fill = Cmd_Fill_read(cmd_ref); // Probably better to store as float, but conversion is no doubt cheap. float area = float(fill.backdrop); - FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref); + SegChunkRef fill_seg_chunk_ref = SegChunkRef(fill.seg_ref); do { - FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref); + SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref); for (int i = 0; i < seg_chunk.n; i++) { - FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i)); + Segment seg = Segment_read(SegmentRef(fill_seg_chunk_ref.offset + SegChunk_size + Segment_size * i)); vec2 start = seg.start - xy; vec2 end = seg.end - xy; vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0); diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 00e1ac3..6658915 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 133b47a..56d4d17 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -36,6 +36,14 @@ struct CmdRef { uint offset; }; +struct SegmentRef { + uint offset; +}; + +struct SegChunkRef { + uint offset; +}; + struct CmdCircle { vec2 center; float radius; @@ -141,6 +149,28 @@ CmdRef Cmd_index(CmdRef ref, uint index) { return CmdRef(ref.offset + index * Cmd_size); } +struct Segment { + vec2 start; + vec2 end; +}; + +#define Segment_size 16 + +SegmentRef Segment_index(SegmentRef ref, uint index) { + return SegmentRef(ref.offset + index * Segment_size); +} + +struct SegChunk { + uint n; + SegChunkRef next; +}; + +#define SegChunk_size 8 + +SegChunkRef SegChunk_index(SegChunkRef ref, uint index) { + return SegChunkRef(ref.offset + index * SegChunk_size); +} + CmdCircle CmdCircle_read(CmdCircleRef ref) { uint ix = ref.offset >> 2; uint raw0 = ptcl[ix + 0]; @@ -362,3 +392,39 @@ void Cmd_Bail_write(CmdRef ref) { ptcl[ref.offset >> 2] = Cmd_Bail; } +Segment Segment_read(SegmentRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = ptcl[ix + 0]; + uint raw1 = ptcl[ix + 1]; + uint raw2 = ptcl[ix + 2]; + uint raw3 = ptcl[ix + 3]; + Segment s; + s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); + s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + return s; +} + +void Segment_write(SegmentRef ref, Segment s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = floatBitsToUint(s.start.x); + ptcl[ix + 1] = floatBitsToUint(s.start.y); + ptcl[ix + 2] = floatBitsToUint(s.end.x); + ptcl[ix + 3] = floatBitsToUint(s.end.y); +} + +SegChunk SegChunk_read(SegChunkRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = ptcl[ix + 0]; + uint raw1 = ptcl[ix + 1]; + SegChunk s; + s.n = raw0; + s.next = SegChunkRef(raw1); + return s; +} + +void SegChunk_write(SegChunkRef ref, SegChunk s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = s.n; + ptcl[ix + 1] = s.next.offset; +} + diff --git a/piet-gpu/shader/segment.h b/piet-gpu/shader/segment.h deleted file mode 100644 index 2843b64..0000000 --- a/piet-gpu/shader/segment.h +++ /dev/null @@ -1,126 +0,0 @@ -// Code auto-generated by piet-gpu-derive - -struct TileHeaderRef { - uint offset; -}; - -struct ItemHeaderRef { - uint offset; -}; - -struct SegmentRef { - uint offset; -}; - -struct SegChunkRef { - uint offset; -}; - -struct TileHeader { - uint n; - ItemHeaderRef items; -}; - -#define TileHeader_size 8 - -TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) { - return TileHeaderRef(ref.offset + index * TileHeader_size); -} - -struct ItemHeader { - SegChunkRef segments; -}; - -#define ItemHeader_size 4 - -ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) { - return ItemHeaderRef(ref.offset + index * ItemHeader_size); -} - -struct Segment { - vec2 start; - vec2 end; -}; - -#define Segment_size 16 - -SegmentRef Segment_index(SegmentRef ref, uint index) { - return SegmentRef(ref.offset + index * Segment_size); -} - -struct SegChunk { - uint n; - SegChunkRef next; -}; - -#define SegChunk_size 8 - -SegChunkRef SegChunk_index(SegChunkRef ref, uint index) { - return SegChunkRef(ref.offset + index * SegChunk_size); -} - -TileHeader TileHeader_read(TileHeaderRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = segment[ix + 0]; - uint raw1 = segment[ix + 1]; - TileHeader s; - s.n = raw0; - s.items = ItemHeaderRef(raw1); - return s; -} - -void TileHeader_write(TileHeaderRef ref, TileHeader s) { - uint ix = ref.offset >> 2; - segment[ix + 0] = s.n; - segment[ix + 1] = s.items.offset; -} - -ItemHeader ItemHeader_read(ItemHeaderRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = segment[ix + 0]; - ItemHeader s; - s.segments = SegChunkRef(raw0); - return s; -} - -void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) { - uint ix = ref.offset >> 2; - segment[ix + 0] = s.segments.offset; -} - -Segment Segment_read(SegmentRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = segment[ix + 0]; - uint raw1 = segment[ix + 1]; - uint raw2 = segment[ix + 2]; - uint raw3 = segment[ix + 3]; - Segment s; - s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); - s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); - return s; -} - -void Segment_write(SegmentRef ref, Segment s) { - uint ix = ref.offset >> 2; - segment[ix + 0] = floatBitsToUint(s.start.x); - segment[ix + 1] = floatBitsToUint(s.start.y); - segment[ix + 2] = floatBitsToUint(s.end.x); - segment[ix + 3] = floatBitsToUint(s.end.y); -} - -SegChunk SegChunk_read(SegChunkRef ref) { - uint ix = ref.offset >> 2; - uint raw0 = segment[ix + 0]; - uint raw1 = segment[ix + 1]; - SegChunk s; - s.n = raw0; - s.next = SegChunkRef(raw1); - return s; -} - -void SegChunk_write(SegChunkRef ref, SegChunk s) { - uint ix = ref.offset >> 2; - segment[ix + 0] = s.n; - segment[ix + 1] = s.next.offset; -} - diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 2527b50..0ac8299 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -209,16 +209,11 @@ impl Renderer { &[], )?; - // These will probably be combined with the ptcl buf, as they're all written by the - // same kernel now. - let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?; - let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?; - let k4_code = include_bytes!("../shader/kernel4.spv"); - let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?; + let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?; let k4_ds = device.create_descriptor_set( &k4_pipeline, - &[&ptcl_buf, &segment_buf, &fill_seg_buf], + &[&ptcl_buf], &[&image_dev], )?;