vello/piet-gpu/shader/kernel3.comp
Raph Levien cb06b1bc3d Implement stroked polylines
This version seems to work but the allocation of segments has low
utilization. Probably best to allocate in chunks rather than try to
make them contiguous.
2020-04-28 18:45:59 -07:00

108 lines
3.6 KiB
Plaintext

// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
// for the tilegroup and produces a per-tile command list for each tile.
#version 450
#extension GL_GOOGLE_include_directive : enable
layout(local_size_x = 32, local_size_y = 1) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
// TODO: this should have a `readonly` qualifier, but then inclusion
// of ptcl.h would fail because of the writers.
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
// Used readonly
layout(set = 0, binding = 2) buffer SegmentBuf {
uint[] segment;
};
layout(set = 0, binding = 3) buffer PtclBuf {
uint[] ptcl;
};
layout(set = 0, binding = 4) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "segment.h"
#include "ptcl.h"
#include "setup.h"
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset > cmd_limit) {
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
CmdJump jump = CmdJump(new_cmd);
Cmd_Jump_write(cmd_ref, jump);
cmd_ref = CmdRef(new_cmd);
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
}
}
void main() {
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
while (true) {
uint tg_tag = TileGroup_tag(tg_ref);
if (tg_tag == TileGroup_End) {
break;
}
if (tg_tag == TileGroup_Jump) {
tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
continue;
}
// Assume tg_tag is `Instance`, though there will be more cases.
Instance ins = TileGroup_Instance_read(tg_ref);
PietItemRef item_ref = PietItemRef(ins.item_ref);
uint item_tag = PietItem_tag(item_ref);
switch (item_tag) {
case PietItem_Circle:
PietCircle circle = PietItem_Circle_read(item_ref);
vec2 center = ins.offset + circle.center.xy;
float r = circle.radius;
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
{
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Circle_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size;
}
break;
case PietItem_Poly:
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
stroke_th.items.offset += ItemHeader_size;
if (stroke_item.n > 0) {
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
CmdStroke cmd = CmdStroke(
stroke_item.n,
stroke_item.segments.offset,
0.5 * poly.width,
poly.rgba_color
);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Stroke_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size;
}
break;
}
tg_ref.offset += TileGroup_size;
}
Cmd_End_write(cmd_ref);
}