// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke // (polyline) items in the scene and generates a list of segments for each, for // each tile. #version 450 #extension GL_GOOGLE_include_directive : enable layout(local_size_x = 32) in; layout(set = 0, binding = 0) readonly buffer SceneBuf { uint[] scene; }; layout(set = 0, binding = 1) buffer TilegroupBuf { uint[] tilegroup; }; layout(set = 0, binding = 2) buffer SegmentBuf { uint[] segment; }; layout(set = 0, binding = 3) buffer AllocBuf { uint alloc; }; #include "scene.h" #include "tilegroup.h" #include "segment.h" #include "setup.h" void main() { uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES); vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START); uint stroke_n = tilegroup[stroke_start.offset >> 2]; TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size); if (stroke_n > 0) { ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4); Chunk chunk = Chunk_read(chunk_ref); InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size)); TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header)); SegChunkRef seg_chunk_ref = SegChunkRef(0); uint seg_limit = 0; // Iterate through items; stroke_n holds count remaining. while (true) { if (chunk.chunk_n == 0) { chunk_ref = chunk.next; if (chunk_ref.offset == 0) { break; } chunk = Chunk_read(chunk_ref); stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); } Instance ins = Instance_read(stroke_ref); PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref)); // Process the stroke polyline item. uint max_n_segs = poly.n_points - 1; uint chunk_n_segs = 0; SegChunkRef seg_chunk_ref; vec2 start = Point_read(poly.points).xy; for (uint j = 0; j < max_n_segs; j++) { poly.points.offset += Point_size; vec2 end = Point_read(poly.points).xy; // Process one segment. // This logic just tests for collision. What we probably want to do // is a clipping algorithm like Liang-Barsky, and then store coords // relative to the tile in f16. See also: // https://tavianator.com/fast-branchless-raybounding-box-intersections/ // Also note that when we go to the fancy version, we want to compute // the (horizontal projection of) the bounding box of the intersection // once per tilegroup, so we can assign work to individual tiles. float a = end.y - start.y; float b = start.x - end.x; float c = -(a * start.x + b * start.y); float half_width = 0.5 * poly.width; // Tile boundaries padded by half-width. float xmin = xy0.x - half_width; float ymin = xy0.y - half_width; float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width; float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width; float s00 = sign(b * ymin + a * xmin + c); float s01 = sign(b * ymin + a * xmax + c); float s10 = sign(b * ymax + a * xmin + c); float s11 = sign(b * ymax + a * xmax + c); // If bounding boxes intersect and not all four corners are on the same side, hit. // Also note: this is designed to be false on NAN input. if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax) && max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax) && s00 * s01 + s00 * s10 + s00 * s11 < 3.0) { // Allocate a chunk if needed. if (chunk_n_segs == 0) { if (seg_chunk_ref.offset + 40 > seg_limit) { seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC); seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size; } ItemHeader_write(item_header, ItemHeader(seg_chunk_ref)); } else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) { uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC); seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size; SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref))); seg_chunk_ref.offset = new_chunk_ref; chunk_n_segs = 0; } Segment seg = Segment(start, end); Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg); chunk_n_segs++; } start = end; } if (chunk_n_segs == 0) { ItemHeader_write(item_header, ItemHeader(SegChunkRef(0))); } else { SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0))); seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs; } stroke_ref.offset += Instance_size; chunk.chunk_n--; item_header.offset += ItemHeader_size; } } else { // As an optimization, we could just write 0 for the size. TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0))); } }