mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Non-load balanced coarse path raster
This is a bit of a revert of the load-balanced ("more parallel") coarse path rasterizer, but includes fills and also uses atomicExchange. I'm doing it this way because it should be considerably easier to do flattening in this structure, even though there will be some performance regression.
This commit is contained in:
parent
7118c8efc1
commit
3a8227d025
|
@ -33,24 +33,7 @@ layout(set = 0, binding = 2) buffer TileBuf {
|
||||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||||
|
|
||||||
shared uint sh_tile_count[COARSE_WG];
|
|
||||||
shared uint sh_width[COARSE_WG];
|
|
||||||
shared uint sh_draw_width[COARSE_WG];
|
|
||||||
shared uint sh_tag[COARSE_WG];
|
|
||||||
shared vec2 sh_p0[COARSE_WG];
|
|
||||||
shared vec2 sh_p1[COARSE_WG];
|
|
||||||
shared int sh_x0[COARSE_WG];
|
|
||||||
shared int sh_bbox_x1[COARSE_WG];
|
|
||||||
shared int sh_y0[COARSE_WG];
|
|
||||||
shared float sh_a[COARSE_WG];
|
|
||||||
shared float sh_b[COARSE_WG];
|
|
||||||
shared float sh_c[COARSE_WG];
|
|
||||||
shared uint sh_base[COARSE_WG];
|
|
||||||
shared uint sh_stride[COARSE_WG];
|
|
||||||
shared uint sh_alloc_start;
|
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathSegRef ref = PathSegRef(element_ix * PathSeg_size);
|
PathSegRef ref = PathSegRef(element_ix * PathSeg_size);
|
||||||
|
|
||||||
|
@ -58,32 +41,27 @@ void main() {
|
||||||
if (element_ix < n_pathseg) {
|
if (element_ix < n_pathseg) {
|
||||||
tag = PathSeg_tag(ref);
|
tag = PathSeg_tag(ref);
|
||||||
}
|
}
|
||||||
sh_tag[th_ix] = tag;
|
|
||||||
// Setup for coverage algorithm.
|
// Setup for coverage algorithm.
|
||||||
float a, b, c;
|
float a, b, c;
|
||||||
// Bounding box of element in pixel coordinates.
|
// Bounding box of element in pixel coordinates.
|
||||||
float xmin, xmax, ymin, ymax;
|
float xmin, xmax, ymin, ymax;
|
||||||
PathStrokeLine line;
|
PathStrokeLine line;
|
||||||
|
float dx;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case PathSeg_FillLine:
|
case PathSeg_FillLine:
|
||||||
case PathSeg_StrokeLine:
|
case PathSeg_StrokeLine:
|
||||||
line = PathSeg_StrokeLine_read(ref);
|
line = PathSeg_StrokeLine_read(ref);
|
||||||
sh_p0[th_ix] = line.p0;
|
|
||||||
sh_p1[th_ix] = line.p1;
|
|
||||||
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
||||||
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
||||||
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
||||||
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
||||||
float dx = line.p1.x - line.p0.x;
|
dx = line.p1.x - line.p0.x;
|
||||||
float dy = line.p1.y - line.p0.y;
|
float dy = line.p1.y - line.p0.y;
|
||||||
// Set up for per-scanline coverage formula, below.
|
// Set up for per-scanline coverage formula, below.
|
||||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
||||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||||
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||||
sh_a[th_ix] = a;
|
|
||||||
sh_b[th_ix] = b;
|
|
||||||
sh_c[th_ix] = c;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
int x0 = int(floor((xmin) * SX));
|
int x0 = int(floor((xmin) * SX));
|
||||||
|
@ -98,96 +76,53 @@ void main() {
|
||||||
y0 = clamp(y0, bbox.y, bbox.w);
|
y0 = clamp(y0, bbox.y, bbox.w);
|
||||||
x1 = clamp(x1, bbox.x, bbox.z);
|
x1 = clamp(x1, bbox.x, bbox.z);
|
||||||
y1 = clamp(y1, bbox.y, bbox.w);
|
y1 = clamp(y1, bbox.y, bbox.w);
|
||||||
sh_x0[th_ix] = x0;
|
float t = a + b * float(y0);
|
||||||
sh_bbox_x1[th_ix] = bbox.z;
|
|
||||||
// TODO: can get rid of this (fold into base), with care (also need to update `a`)
|
|
||||||
sh_y0[th_ix] = y0;
|
|
||||||
int stride = bbox.z - bbox.x;
|
int stride = bbox.z - bbox.x;
|
||||||
sh_stride[th_ix] = stride;
|
int base = (y0 - bbox.y) * stride - bbox.x;
|
||||||
sh_base[th_ix] = path.tiles.offset - (bbox.y * stride + bbox.x) * Tile_size;
|
// TODO: can be tighter, use c to bound width
|
||||||
uint width = uint(x1 - x0);
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||||
sh_width[th_ix] = width;
|
// Consider using subgroups to aggregate atomic add.
|
||||||
uint draw_width = min(width, uint(1.0 + ceil(2.0 * c)));
|
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
|
||||||
if (draw_width == 0 && bbox.x == 0 && bbox.z > 0) {
|
TileSeg tile_seg;
|
||||||
// Create opportunity to draw backdrop for segments to the left of viewport.
|
for (int y = y0; y < y1; y++) {
|
||||||
// Note: predicate can be strengthened to exclude segments that don't cross
|
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
||||||
// a horizontal tile boundary.
|
if (tag == PathSeg_FillLine && min(line.p0.y, line.p1.y) <= tile_y0) {
|
||||||
draw_width = 1;
|
int xray = max(int(ceil(t - 0.5 * b)), bbox.x);
|
||||||
}
|
if (xray < bbox.z) {
|
||||||
sh_draw_width[th_ix] = draw_width;
|
int backdrop = line.p1.y < line.p0.y ? 1 : -1;
|
||||||
uint tile_count = draw_width * uint(y1 - y0);
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + xray));
|
||||||
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
sh_tile_count[th_ix] = tile_count;
|
atomicAdd(tile[tile_el + 1], backdrop);
|
||||||
for (uint i = 0; i < LG_COARSE_WG; i++) {
|
|
||||||
barrier();
|
|
||||||
if (th_ix >= (1 << i)) {
|
|
||||||
tile_count += sh_tile_count[th_ix - (1 << i)];
|
|
||||||
}
|
|
||||||
barrier();
|
|
||||||
sh_tile_count[th_ix] = tile_count;
|
|
||||||
}
|
|
||||||
if (th_ix == COARSE_WG - 1) {
|
|
||||||
sh_alloc_start = atomicAdd(alloc, tile_count * TileSeg_size);
|
|
||||||
}
|
|
||||||
barrier();
|
|
||||||
uint alloc_start = sh_alloc_start;
|
|
||||||
uint total_tile_count = sh_tile_count[COARSE_WG - 1];
|
|
||||||
|
|
||||||
for (uint ix = th_ix; ix < total_tile_count; ix += COARSE_WG) {
|
|
||||||
// Binary search to find element
|
|
||||||
uint el_ix = 0;
|
|
||||||
for (uint i = 0; i < LG_COARSE_WG; i++) {
|
|
||||||
uint probe = el_ix + ((COARSE_WG / 2) >> i);
|
|
||||||
if (ix >= sh_tile_count[probe - 1]) {
|
|
||||||
el_ix = probe;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
|
|
||||||
uint draw_width = sh_draw_width[el_ix];
|
|
||||||
int x0 = sh_x0[el_ix];
|
|
||||||
int x1 = x0 + int(sh_width[el_ix]);
|
|
||||||
int dx = int(seq_ix % draw_width);
|
|
||||||
uint y = sh_y0[el_ix] + seq_ix / draw_width;
|
|
||||||
float b = sh_b[el_ix];
|
|
||||||
float t = sh_a[el_ix] + b * float(y);
|
|
||||||
float c = sh_c[el_ix];
|
|
||||||
int xx0 = clamp(int(floor(t - c)), x0, x1);
|
int xx0 = clamp(int(floor(t - c)), x0, x1);
|
||||||
int xx1 = clamp(int(ceil(t + c)), x0, x1);
|
int xx1 = clamp(int(ceil(t + c)), x0, x1);
|
||||||
int x = xx0 + dx;
|
for (int x = xx0; x < xx1; x++) {
|
||||||
vec2 tile_xy = vec2(x * TILE_WIDTH_PX, y * TILE_HEIGHT_PX);
|
float tile_x0 = float(x * TILE_WIDTH_PX);
|
||||||
vec2 p0 = sh_p0[el_ix];
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
|
||||||
vec2 p1 = sh_p1[el_ix];
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
uint tile_el = (sh_base[el_ix] + uint(y * sh_stride[el_ix] + x) * Tile_size) >> 2;
|
|
||||||
if (sh_tag[el_ix] == PathSeg_FillLine && dx == 0 && min(p0.y, p1.y) <= tile_xy.y) {
|
|
||||||
int xray = max(int(ceil(t - 0.5 * b)), x0);
|
|
||||||
if (xray < sh_bbox_x1[el_ix]) {
|
|
||||||
int backdrop = p1.y < p0.y ? 1 : -1;
|
|
||||||
atomicAdd(tile[tile_el + 1 + 2 * (xray - x)], backdrop);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (x < xx1) {
|
|
||||||
uint tile_offset = alloc_start + ix * TileSeg_size;
|
|
||||||
uint old = atomicExchange(tile[tile_el], tile_offset);
|
uint old = atomicExchange(tile[tile_el], tile_offset);
|
||||||
TileSeg tile_seg;
|
tile_seg.start = line.p0;
|
||||||
|
tile_seg.end = line.p1;
|
||||||
float y_edge = 0.0;
|
float y_edge = 0.0;
|
||||||
if (sh_tag[el_ix] == PathSeg_FillLine) {
|
if (tag == PathSeg_FillLine) {
|
||||||
y_edge = mix(p0.y, p1.y, (tile_xy.x - p0.x) / (p1.x - p0.x));
|
y_edge = mix(line.p0.y, line.p1.y, (tile_x0 - line.p0.x) / dx);
|
||||||
if (min(p0.x, p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
|
if (min(line.p0.x, line.p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) {
|
||||||
if (p0.x > p1.x) {
|
if (line.p0.x > line.p1.x) {
|
||||||
p1 = vec2(tile_xy.x, y_edge);
|
tile_seg.end = vec2(tile_x0, y_edge);
|
||||||
} else {
|
} else {
|
||||||
p0 = vec2(tile_xy.x, y_edge);
|
tile_seg.start = vec2(tile_x0, y_edge);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
y_edge = 1e9;
|
y_edge = 1e9;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tile_seg.start = p0;
|
|
||||||
tile_seg.end = p1;
|
|
||||||
tile_seg.y_edge = y_edge;
|
tile_seg.y_edge = y_edge;
|
||||||
tile_seg.next.offset = old;
|
tile_seg.next.offset = old;
|
||||||
TileSeg_write(TileSegRef(tile_offset), tile_seg);
|
TileSeg_write(TileSegRef(tile_offset), tile_seg);
|
||||||
|
tile_offset += TileSeg_size;
|
||||||
}
|
}
|
||||||
|
t += b;
|
||||||
|
base += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in a new issue