vello/piet-gpu/shader/backdrop.comp
Elias Naur 4de67d9081 unify GPU memory management
Merge all static and dynamic buffers to just one, "memory". Add a malloc
function for dynamic allocations.

Unify static allocation offsets into a "config" buffer containing scene setup
(number of paths, number of path segments), as well as the memory offsets of
the static allocations.

Finally, set an overflow flag when an allocation fail, and make sure to exit
shader execution as soon as that triggers. Add checks before beginning
execution in case the client wants to run two or more shaders before checking
the flag.

The "state" buffer is left alone because it needs zero'ing and because it is
accessed with the "volatile" keyword.

Fixes #40

Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-27 20:24:29 +01:00

101 lines
3.4 KiB
Plaintext

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Propagation of tile backdrop for filling.
//
// Each thread reads one path element and calculates the number of spanned tiles
// based on the bounding box.
// In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
// and propagated from the left to the right (prefix summed).
//
// Output state:
// - Each path element has an array of tiles covering the whole path based on boundig box
// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "tile.h"
shared uint sh_row_count[BACKDROP_WG];
shared uint sh_row_base[BACKDROP_WG];
shared uint sh_row_width[BACKDROP_WG];
void main() {
if (mem_overflow) {
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
// Work assignment: 1 thread : 1 path element
uint row_count = 0;
if (element_ix < conf.n_elements) {
uint tag = Annotated_tag(ref);
switch (tag) {
case Annotated_Fill:
case Annotated_BeginClip:
PathRef path_ref = PathRef(conf.tile_base + element_ix * Path_size);
Path path = Path_read(path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
if (row_count == 1) {
// Note: this can probably be expanded to width = 2 as
// long as it doesn't cross the left edge.
row_count = 0;
}
sh_row_base[th_ix] = (path.tiles.offset >> 2) + 1;
}
}
sh_row_count[th_ix] = row_count;
// Prefix sum of sh_row_count
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
row_count += sh_row_count[th_ix - (1 << i)];
}
barrier();
sh_row_count[th_ix] = row_count;
}
barrier();
// Work assignment: 1 thread : 1 path element row
uint total_rows = sh_row_count[BACKDROP_WG - 1];
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
// Binary search to find element
uint el_ix = 0;
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
if (row >= sh_row_count[probe - 1]) {
el_ix = probe;
}
}
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
uint width = sh_row_width[el_ix];
// Process one row sequentially
// Read backdrop value per tile and prefix sum it
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
uint sum = memory[tile_el_ix];
for (uint x = 1; x < width; x++) {
tile_el_ix += 2;
sum += memory[tile_el_ix];
memory[tile_el_ix] = sum;
}
}
}