vello/piet-gpu/shader/tile_alloc.comp
Elias Naur 6a4e26ef2a all: add optional memory checks
Defining MEM_DEBUG in mem.h will add a size field to Alloc and enable
bounds and alignment checks for every memory read and write.

Notes:
- Deriving an Alloc from Path.tiles is unsound, but it's more trouble to
  convert Path.tiles from TileRef to a variable sized Alloc.
- elements.comp note that "We should be able to use an array of structs but the
  NV shader compiler doesn't seem to like it". If that's still relevant, does
  the shared arrays of Allocs work?

Signed-off-by: Elias Naur <mail@eliasnaur.com>
2021-02-15 16:07:45 +01:00

109 lines
3.4 KiB
Plaintext

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG];
shared MallocResult sh_tile_alloc;
void main() {
if (mem_error != NO_ERROR) {
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < conf.n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref);
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Fill:
case Annotated_Stroke:
case Annotated_BeginClip:
case Annotated_EndClip:
// Note: we take advantage of the fact that fills, strokes, and
// clips have compatible layout.
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
x0 = int(floor(fill.bbox.x * SX));
y0 = int(floor(fill.bbox.y * SY));
x1 = int(ceil(fill.bbox.z * SX));
y1 = int(ceil(fill.bbox.w * SY));
break;
}
x0 = clamp(x0, 0, int(conf.width_in_tiles));
y0 = clamp(y0, 0, int(conf.height_in_tiles));
x1 = clamp(x1, 0, int(conf.width_in_tiles));
y1 = clamp(y1, 0, int(conf.height_in_tiles));
Path path;
path.bbox = uvec4(x0, y0, x1, y1);
uint tile_count = (x1 - x0) * (y1 - y0);
if (tag == Annotated_EndClip) {
// Don't actually allocate tiles for an end clip, but we do want
// the path structure (especially bbox) allocated for it.
tile_count = 0;
}
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
// Prefix sum of sh_tile_count
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
total_tile_count += sh_tile_count[th_ix - (1 << i)];
}
barrier();
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = malloc(total_tile_count * Tile_size);
}
barrier();
MallocResult alloc_start = sh_tile_alloc;
if (alloc_start.failed) {
return;
}
if (element_ix < conf.n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
path.tiles = TileRef(tiles_alloc.offset);
Path_write(conf.tile_alloc, path_ref, path);
}
// Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start.alloc.offset >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write
// by a significant amount.
write_mem(alloc_start.alloc, start_ix + i, 0);
}
}