vello/piet-gpu/shader/kernel1.comp
Raph Levien cb06b1bc3d Implement stroked polylines
This version seems to work but the allocation of segments has low
utilization. Probably best to allocate in chunks rather than try to
make them contiguous.
2020-04-28 18:45:59 -07:00

133 lines
5.1 KiB
Plaintext

// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
// and outputs "instances" (references to item + translation) for each item
// that intersects the tilegroup.
//
// This implementation is simplistic and leaves a lot of performance on the
// table. A fancier implementation would use threadgroup shared memory or
// subgroups (or possibly both) to parallelize the reading of the input and
// the computation of tilegroup intersection.
//
// In addition, there are some features currently missing, such as support
// for clipping.
#version 450
#extension GL_GOOGLE_include_directive : enable
// It's possible we should lay this out with x and do our own math.
layout(local_size_x = 1, local_size_y = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
layout(set = 0, binding = 2) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "setup.h"
#define MAX_STACK 8
struct StackElement {
PietItemRef group;
uint index;
vec2 offset;
};
void main() {
StackElement stack[MAX_STACK];
uint stack_ix = 0;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
// State for stroke references.
TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_ALLOC - Instance_size;
uint stroke_chunk_n = 0;
uint stroke_n = 0;
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root);
StackElement tos = StackElement(root, 0, group.offset.xy);
while (true) {
if (tos.index < group.n_items) {
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
bool is_group = false;
uint tag;
if (hit) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
tag = PietItem_tag(item_ref);
is_group = tag == PietItem_Group;
}
if (hit && !is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
Instance ins = Instance(item_ref.offset, tos.offset);
if (tg_ref.offset > tg_limit) {
// Allocation exceeded; do atomic bump alloc.
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
Jump jump = Jump(TileGroupRef(new_tg));
TileGroup_Jump_write(tg_ref, jump);
tg_ref = TileGroupRef(new_tg);
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
}
TileGroup_Instance_write(tg_ref, ins);
tg_ref.offset += TileGroup_size;
if (tag == PietItem_Poly) {
if (stroke_ref.offset > stroke_limit) {
uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC);
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke)));
stroke_chunk_start = ChunkRef(new_stroke);
stroke_ref = InstanceRef(new_stroke + Chunk_size);
stroke_n += stroke_chunk_n;
stroke_chunk_n = 0;
stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size;
}
Instance_write(stroke_ref, ins);
stroke_chunk_n++;
stroke_ref.offset += Instance_size;
}
}
if (is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
tos.index++;
if (tos.index < group.n_items) {
stack[stack_ix++] = tos;
}
group = PietItem_Group_read(item_ref);
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
} else {
tos.index++;
}
} else {
// processed all items in this group; pop the stack
if (stack_ix == 0) {
break;
}
tos = stack[--stack_ix];
group = PietItem_Group_read(tos.group);
}
}
TileGroup_End_write(tg_ref);
stroke_n += stroke_chunk_n;
if (stroke_n > 0) {
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
}
tilegroup[stroke_start.offset >> 2] = stroke_n;
}