vello/piet-gpu/shader/tile_alloc.comp
Elias Naur 8fab45544e shader: implement clip paths
Expand the the final kernel4 stage to maintain a per-pixel mask.

Introduce two new path elements, FillMask and FillMaskInv, to fill
the mask. FillMask acts like Fill, while FillMaskInv fills the area
outside the path.

SVG clipPaths is then representable by a FillMaskInv(0.0) for every nested
path, preceded by a FillMask(1.0) to clear the mask.

The bounding box for FillMaskInv elements is the entire screen; tightening of
the bounding box is left for future work. Note that a fullscreen bounding
box is not hopelessly inefficient because completely filling a tile with
a mask is just a single CmdSolidMask per tile.

Fixes #30

Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-10-09 13:20:26 +02:00

102 lines
2.9 KiB
Plaintext

// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf {
uint[] annotated;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
};
#include "annotated.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG];
shared uint sh_tile_alloc;
void main() {
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < n_elements) {
tag = Annotated_tag(ref);
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Fill:
case Annotated_FillMask:
case Annotated_FillMaskInv:
case Annotated_Stroke:
// Note: we take advantage of the fact that fills and strokes
// have compatible layout.
AnnoFill fill = Annotated_Fill_read(ref);
x0 = int(floor(fill.bbox.x * SX));
y0 = int(floor(fill.bbox.y * SY));
x1 = int(ceil(fill.bbox.z * SX));
y1 = int(ceil(fill.bbox.w * SY));
break;
}
x0 = clamp(x0, 0, WIDTH_IN_TILES);
y0 = clamp(y0, 0, HEIGHT_IN_TILES);
x1 = clamp(x1, 0, WIDTH_IN_TILES);
y1 = clamp(y1, 0, HEIGHT_IN_TILES);
Path path;
path.bbox = uvec4(x0, y0, x1, y1);
uint tile_count = (x1 - x0) * (y1 - y0);
sh_tile_count[th_ix] = tile_count;
// Prefix sum of sh_tile_count
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
tile_count += sh_tile_count[th_ix - (1 << i)];
}
barrier();
sh_tile_count[th_ix] = tile_count;
}
if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = atomicAdd(alloc, tile_count * Tile_size);
}
barrier();
uint alloc_start = sh_tile_alloc;
if (element_ix < n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
path.tiles = TileRef(alloc_start + Tile_size * tile_subix);
Path_write(path_ref, path);
}
// Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write
// by a significant amount.
tile[start_ix + i] = 0;
}
}