vello/piet-gpu/shader/kernel1.comp

91 lines
3.2 KiB
Plaintext
Raw Normal View History

// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
// and outputs "instances" (references to item + translation) for each item
// that intersects the tilegroup.
//
// This implementation is simplistic and leaves a lot of performance on the
// table. A fancier implementation would use threadgroup shared memory or
// subgroups (or possibly both) to parallelize the reading of the input and
// the computation of tilegroup intersection.
//
// In addition, there are some features currently missing. One is the use of
// a bump allocator to extend the current fixed allocation. Another is support
// for clipping.
#version 450
#extension GL_GOOGLE_include_directive : enable
// It's possible we should lay this out with x and do our own math.
layout(local_size_x = 1, local_size_y = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
#include "scene.h"
#include "tilegroup.h"
#include "setup.h"
#define MAX_STACK 8
struct StackElement {
PietItemRef group;
uint index;
vec2 offset;
};
void main() {
StackElement stack[MAX_STACK];
uint stack_ix = 0;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root);
StackElement tos = StackElement(root, 0, group.offset.xy);
while (true) {
if (tos.index < group.n_items) {
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
bool is_group = false;
if (hit) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
is_group = PietItem_tag(item_ref) == PietItem_Group;
}
if (hit && !is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
Instance ins = Instance(item_ref.offset, tos.offset);
TileGroup_Instance_write(tg_ref, ins);
tg_ref.offset += TileGroup_size;
// TODO: bump allocate if allocation exceeded
}
if (is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
tos.index++;
if (tos.index < group.n_items) {
stack[stack_ix++] = tos;
}
group = PietItem_Group_read(item_ref);
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
} else {
tos.index++;
}
} else {
// processed all items in this group; pop the stack
if (stack_ix == 0) {
break;
}
tos = stack[--stack_ix];
group = PietItem_Group_read(tos.group);
}
}
TileGroup_End_write(tg_ref);
}