mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
6a4e26ef2a
Defining MEM_DEBUG in mem.h will add a size field to Alloc and enable bounds and alignment checks for every memory read and write. Notes: - Deriving an Alloc from Path.tiles is unsound, but it's more trouble to convert Path.tiles from TileRef to a variable sized Alloc. - elements.comp note that "We should be able to use an array of structs but the NV shader compiler doesn't seem to like it". If that's still relevant, does the shared arrays of Allocs work? Signed-off-by: Elias Naur <mail@eliasnaur.com>
108 lines
3.4 KiB
Text
108 lines
3.4 KiB
Text
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
// Allocation and initialization of tiles for paths.
|
|
|
|
#version 450
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
#include "mem.h"
|
|
#include "setup.h"
|
|
|
|
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
|
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
|
|
|
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
|
|
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|
Config conf;
|
|
};
|
|
|
|
#include "annotated.h"
|
|
#include "tile.h"
|
|
|
|
// scale factors useful for converting coordinates to tiles
|
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
|
|
|
shared uint sh_tile_count[TILE_ALLOC_WG];
|
|
shared MallocResult sh_tile_alloc;
|
|
|
|
void main() {
|
|
if (mem_error != NO_ERROR) {
|
|
return;
|
|
}
|
|
|
|
uint th_ix = gl_LocalInvocationID.x;
|
|
uint element_ix = gl_GlobalInvocationID.x;
|
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
|
|
|
uint tag = Annotated_Nop;
|
|
if (element_ix < conf.n_elements) {
|
|
tag = Annotated_tag(conf.anno_alloc, ref);
|
|
}
|
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
|
switch (tag) {
|
|
case Annotated_Fill:
|
|
case Annotated_Stroke:
|
|
case Annotated_BeginClip:
|
|
case Annotated_EndClip:
|
|
// Note: we take advantage of the fact that fills, strokes, and
|
|
// clips have compatible layout.
|
|
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
|
|
x0 = int(floor(fill.bbox.x * SX));
|
|
y0 = int(floor(fill.bbox.y * SY));
|
|
x1 = int(ceil(fill.bbox.z * SX));
|
|
y1 = int(ceil(fill.bbox.w * SY));
|
|
break;
|
|
}
|
|
x0 = clamp(x0, 0, int(conf.width_in_tiles));
|
|
y0 = clamp(y0, 0, int(conf.height_in_tiles));
|
|
x1 = clamp(x1, 0, int(conf.width_in_tiles));
|
|
y1 = clamp(y1, 0, int(conf.height_in_tiles));
|
|
|
|
Path path;
|
|
path.bbox = uvec4(x0, y0, x1, y1);
|
|
uint tile_count = (x1 - x0) * (y1 - y0);
|
|
if (tag == Annotated_EndClip) {
|
|
// Don't actually allocate tiles for an end clip, but we do want
|
|
// the path structure (especially bbox) allocated for it.
|
|
tile_count = 0;
|
|
}
|
|
|
|
sh_tile_count[th_ix] = tile_count;
|
|
uint total_tile_count = tile_count;
|
|
// Prefix sum of sh_tile_count
|
|
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
|
barrier();
|
|
if (th_ix >= (1 << i)) {
|
|
total_tile_count += sh_tile_count[th_ix - (1 << i)];
|
|
}
|
|
barrier();
|
|
sh_tile_count[th_ix] = total_tile_count;
|
|
}
|
|
if (th_ix == TILE_ALLOC_WG - 1) {
|
|
sh_tile_alloc = malloc(total_tile_count * Tile_size);
|
|
}
|
|
barrier();
|
|
MallocResult alloc_start = sh_tile_alloc;
|
|
if (alloc_start.failed) {
|
|
return;
|
|
}
|
|
|
|
if (element_ix < conf.n_elements) {
|
|
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
|
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
|
|
path.tiles = TileRef(tiles_alloc.offset);
|
|
Path_write(conf.tile_alloc, path_ref, path);
|
|
}
|
|
|
|
// Zero out allocated tiles efficiently
|
|
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
|
uint start_ix = alloc_start.alloc.offset >> 2;
|
|
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
|
// Note: this interleaving is faster than using Tile_write
|
|
// by a significant amount.
|
|
write_mem(alloc_start.alloc, start_ix + i, 0);
|
|
}
|
|
}
|