vello/piet-gpu/shader/kernel3.comp

// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
// for the tilegroup and produces a per-tile command list for each tile.

#version 450
#extension GL_GOOGLE_include_directive : enable

layout(local_size_x = 32, local_size_y = 1) in;

layout(set = 0, binding = 0) readonly buffer SceneBuf {
    uint[] scene;
};

// TODO: this should have a `readonly` qualifier, but then inclusion
// of ptcl.h would fail because of the writers.
layout(set = 0, binding = 1) buffer TilegroupBuf {
    uint[] tilegroup;
};

// Used readonly
layout(set = 0, binding = 2) buffer SegmentBuf {
    uint[] segment;
};

layout(set = 0, binding = 3) buffer PtclBuf {
    uint[] ptcl;
};

layout(set = 0, binding = 4) buffer AllocBuf {
    uint alloc;
};

#include "scene.h"
#include "tilegroup.h"
#include "segment.h"
#include "ptcl.h"

#include "setup.h"

void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
    if (cmd_ref.offset > cmd_limit) {
        uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
        CmdJump jump = CmdJump(new_cmd);
        Cmd_Jump_write(cmd_ref, jump);
        cmd_ref = CmdRef(new_cmd);
        cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
    }
}

void main() {
    uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
    uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
        + (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
    vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
    TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
    CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
    uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;

    TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));

    while (true) {
        uint tg_tag = TileGroup_tag(tg_ref);
        if (tg_tag == TileGroup_End) {
            break;
        }
        if (tg_tag == TileGroup_Jump) {
            tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
            continue;
        }
        // Assume tg_tag is `Instance`, though there will be more cases.
        Instance ins = TileGroup_Instance_read(tg_ref);
        PietItemRef item_ref = PietItemRef(ins.item_ref);
        uint item_tag = PietItem_tag(item_ref);
        switch (item_tag) {
        case PietItem_Circle:
            PietCircle circle = PietItem_Circle_read(item_ref);
            vec2 center = ins.offset + circle.center.xy;
            float r = circle.radius;
            if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
                && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
            {
                CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
                alloc_cmd(cmd_ref, cmd_limit);
                Cmd_Circle_write(cmd_ref, cmd);
                cmd_ref.offset += Cmd_size;
            }
            break;
        case PietItem_Poly:
            ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
            stroke_th.items.offset += ItemHeader_size;
            if (stroke_item.n > 0) {
                PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
                CmdStroke cmd = CmdStroke(
                    stroke_item.n,
                    stroke_item.segments.offset,
                    0.5 * poly.width,
                    poly.rgba_color
                );
                alloc_cmd(cmd_ref, cmd_limit);
                Cmd_Stroke_write(cmd_ref, cmd);
                cmd_ref.offset += Cmd_size;
            }
            break;
        }
        tg_ref.offset += TileGroup_size;
    }
    Cmd_End_write(cmd_ref);
}