vello/piet-gpu/shader/path_coarse.comp
Raph Levien 3a8227d025 Non-load balanced coarse path raster
This is a bit of a revert of the load-balanced ("more parallel") coarse
path rasterizer, but includes fills and also uses atomicExchange.

I'm doing it this way because it should be considerably easier to do
flattening in this structure, even though there will be some performance
regression.
2020-06-09 15:09:53 -07:00

129 lines
4.4 KiB
Plaintext

// Coarse rasterization of path segments.
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#define LG_COARSE_WG 5
#define COARSE_WG (1 << LG_COARSE_WG)
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer PathSegBuf {
uint[] pathseg;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_paths;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
};
#include "pathseg.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
void main() {
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef(element_ix * PathSeg_size);
uint tag = PathSeg_Nop;
if (element_ix < n_pathseg) {
tag = PathSeg_tag(ref);
}
// Setup for coverage algorithm.
float a, b, c;
// Bounding box of element in pixel coordinates.
float xmin, xmax, ymin, ymax;
PathStrokeLine line;
float dx;
switch (tag) {
case PathSeg_FillLine:
case PathSeg_StrokeLine:
line = PathSeg_StrokeLine_read(ref);
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
dx = line.p1.x - line.p0.x;
float dy = line.p1.y - line.p0.y;
// Set up for per-scanline coverage formula, below.
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
b = invslope; // Note: assumes square tiles, otherwise scale.
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
break;
}
int x0 = int(floor((xmin) * SX));
int x1 = int(ceil((xmax) * SX));
int y0 = int(floor((ymin) * SY));
int y1 = int(ceil((ymax) * SY));
uint path_ix = line.path_ix;
Path path = Path_read(PathRef(path_ix * Path_size));
ivec4 bbox = ivec4(path.bbox);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float t = a + b * float(y0);
int stride = bbox.z - bbox.x;
int base = (y0 - bbox.y) * stride - bbox.x;
// TODO: can be tighter, use c to bound width
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
// Consider using subgroups to aggregate atomic add.
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
TileSeg tile_seg;
for (int y = y0; y < y1; y++) {
float tile_y0 = float(y * TILE_HEIGHT_PX);
if (tag == PathSeg_FillLine && min(line.p0.y, line.p1.y) <= tile_y0) {
int xray = max(int(ceil(t - 0.5 * b)), bbox.x);
if (xray < bbox.z) {
int backdrop = line.p1.y < line.p0.y ? 1 : -1;
TileRef tile_ref = Tile_index(path.tiles, uint(base + xray));
uint tile_el = tile_ref.offset >> 2;
atomicAdd(tile[tile_el + 1], backdrop);
}
}
int xx0 = clamp(int(floor(t - c)), x0, x1);
int xx1 = clamp(int(ceil(t + c)), x0, x1);
for (int x = xx0; x < xx1; x++) {
float tile_x0 = float(x * TILE_WIDTH_PX);
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
uint tile_el = tile_ref.offset >> 2;
uint old = atomicExchange(tile[tile_el], tile_offset);
tile_seg.start = line.p0;
tile_seg.end = line.p1;
float y_edge = 0.0;
if (tag == PathSeg_FillLine) {
y_edge = mix(line.p0.y, line.p1.y, (tile_x0 - line.p0.x) / dx);
if (min(line.p0.x, line.p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) {
if (line.p0.x > line.p1.x) {
tile_seg.end = vec2(tile_x0, y_edge);
} else {
tile_seg.start = vec2(tile_x0, y_edge);
}
} else {
y_edge = 1e9;
}
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
TileSeg_write(TileSegRef(tile_offset), tile_seg);
tile_offset += TileSeg_size;
}
t += b;
base += stride;
}
}