// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph // and outputs "instances" (references to item + translation) for each item // that intersects the tilegroup. // // This implementation is simplistic and leaves a lot of performance on the // table. A fancier implementation would use threadgroup shared memory or // subgroups (or possibly both) to parallelize the reading of the input and // the computation of tilegroup intersection. // // In addition, there are some features currently missing, such as support // for clipping. #version 450 #extension GL_GOOGLE_include_directive : enable // It's possible we should lay this out with x and do our own math. layout(local_size_x = 1, local_size_y = 32) in; layout(set = 0, binding = 0) readonly buffer SceneBuf { uint[] scene; }; layout(set = 0, binding = 1) buffer TilegroupBuf { uint[] tilegroup; }; layout(set = 0, binding = 2) buffer AllocBuf { uint alloc; }; #include "scene.h" #include "tilegroup.h" #include "setup.h" #define MAX_STACK 8 struct StackElement { PietItemRef group; uint index; vec2 offset; }; void main() { StackElement stack[MAX_STACK]; uint stack_ix = 0; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE); uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; // State for stroke references. TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START); ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4); InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size); uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size; uint stroke_chunk_n = 0; uint stroke_n = 0; // State for fill references. All this is a bit cut'n'paste, but making a // proper abstraction isn't easy. TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START); ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4); InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size); uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size; uint fill_chunk_n = 0; uint fill_n = 0; vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); PietItemRef root = PietItemRef(0); SimpleGroup group = PietItem_Group_read(root); StackElement tos = StackElement(root, 0, group.offset.xy); while (true) { if (tos.index < group.n_items) { Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index)); vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy; bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX)) && max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX)); bool is_group = false; uint tag; if (hit) { PietItemRef item_ref = PietItem_index(group.items, tos.index); tag = PietItem_tag(item_ref); is_group = tag == PietItem_Group; } if (hit && !is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); Instance ins = Instance(item_ref.offset, tos.offset); if (tg_ref.offset > tg_limit) { // Allocation exceeded; do atomic bump alloc. uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC); Jump jump = Jump(TileGroupRef(new_tg)); TileGroup_Jump_write(tg_ref, jump); tg_ref = TileGroupRef(new_tg); tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; } TileGroup_Instance_write(tg_ref, ins); tg_ref.offset += TileGroup_size; if (tag == PietItem_Poly) { if (stroke_ref.offset > stroke_limit) { uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC); Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke))); stroke_chunk_start = ChunkRef(new_stroke); stroke_ref = InstanceRef(new_stroke + Chunk_size); stroke_n += stroke_chunk_n; stroke_chunk_n = 0; stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size; } Instance_write(stroke_ref, ins); stroke_chunk_n++; stroke_ref.offset += Instance_size; } else if (tag == PietItem_Fill) { if (fill_ref.offset > fill_limit) { uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC); Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill))); fill_chunk_start = ChunkRef(new_fill); fill_ref = InstanceRef(new_fill + Chunk_size); fill_n += fill_chunk_n; fill_chunk_n = 0; fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size; } Instance_write(fill_ref, ins); fill_chunk_n++; fill_ref.offset += Instance_size; } } if (is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); tos.index++; if (tos.index < group.n_items) { stack[stack_ix++] = tos; } group = PietItem_Group_read(item_ref); tos = StackElement(item_ref, 0, tos.offset + group.offset.xy); } else { tos.index++; } } else { // processed all items in this group; pop the stack if (stack_ix == 0) { break; } tos = stack[--stack_ix]; group = PietItem_Group_read(tos.group); } } TileGroup_End_write(tg_ref); stroke_n += stroke_chunk_n; if (stroke_n > 0) { Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0))); } tilegroup[stroke_start.offset >> 2] = stroke_n; fill_n += fill_chunk_n; if (fill_n > 0) { Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0))); } tilegroup[fill_start.offset >> 2] = fill_n; }