2020-12-11 15:01:48 +01:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
|
2020-06-05 15:07:02 -07:00
|
|
|
// Propagation of tile backdrop for filling.
|
2020-06-28 15:37:27 +02:00
|
|
|
//
|
2021-05-27 11:32:33 +09:00
|
|
|
// Each thread reads one path element and calculates the row and column counts of spanned tiles
|
2020-06-28 15:37:27 +02:00
|
|
|
// based on the bounding box.
|
2021-05-27 11:32:33 +09:00
|
|
|
// The row count then goes through a prefix sum to redistribute and load-balance the work across the workgroup.
|
|
|
|
// In the following step, the workgroup loops over the corresponding tile rows per element in parallel.
|
2020-06-28 15:37:27 +02:00
|
|
|
// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
|
|
|
|
// and propagated from the left to the right (prefix summed).
|
|
|
|
//
|
|
|
|
// Output state:
|
|
|
|
// - Each path element has an array of tiles covering the whole path based on boundig box
|
|
|
|
// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
|
2020-06-05 15:07:02 -07:00
|
|
|
|
|
|
|
#version 450
|
|
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
|
2020-12-11 18:30:20 +01:00
|
|
|
#include "mem.h"
|
2020-12-24 12:00:53 +01:00
|
|
|
#include "setup.h"
|
2020-06-05 15:07:02 -07:00
|
|
|
|
2020-09-13 12:58:47 +02:00
|
|
|
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
2020-06-06 08:23:40 -07:00
|
|
|
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
2021-06-08 16:29:40 +09:00
|
|
|
#ifndef BACKDROP_DIST_FACTOR
|
2021-05-27 11:32:33 +09:00
|
|
|
// Some paths (those covering a large area) can generate a lot of backdrop tiles; BACKDROP_DIST_FACTOR defines how much
|
|
|
|
// additional threads should we spawn for parallel row processing. The additional threads does not participate in the
|
|
|
|
// earlier stages (calculating the tile counts) but does work in the final prefix sum stage which has a lot more
|
|
|
|
// parallelism.
|
2021-06-08 16:29:40 +09:00
|
|
|
|
|
|
|
// This feature is opt-in: one variant is compiled with the following default, while the other variant is compiled with
|
|
|
|
// a larger BACKDROP_DIST_FACTOR, which is used on GPUs supporting a larger workgroup size to improve performance.
|
|
|
|
#define BACKDROP_DIST_FACTOR 1
|
|
|
|
#endif
|
2020-06-05 15:07:02 -07:00
|
|
|
|
2021-05-27 11:32:33 +09:00
|
|
|
layout(local_size_x = BACKDROP_WG, local_size_y = BACKDROP_DIST_FACTOR) in;
|
2020-06-05 15:07:02 -07:00
|
|
|
|
2020-12-11 18:30:20 +01:00
|
|
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|
|
|
Config conf;
|
2020-06-05 15:07:02 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
#include "annotated.h"
|
|
|
|
#include "tile.h"
|
|
|
|
|
2020-06-06 08:23:40 -07:00
|
|
|
shared uint sh_row_count[BACKDROP_WG];
|
2020-12-24 12:00:53 +01:00
|
|
|
shared Alloc sh_row_alloc[BACKDROP_WG];
|
2020-06-06 08:23:40 -07:00
|
|
|
shared uint sh_row_width[BACKDROP_WG];
|
|
|
|
|
2020-06-05 15:07:02 -07:00
|
|
|
void main() {
|
2021-05-27 11:32:33 +09:00
|
|
|
uint th_ix = gl_LocalInvocationIndex;
|
2020-06-05 15:07:02 -07:00
|
|
|
uint element_ix = gl_GlobalInvocationID.x;
|
2020-12-24 12:00:53 +01:00
|
|
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
2020-06-05 15:07:02 -07:00
|
|
|
|
2020-06-28 15:37:27 +02:00
|
|
|
// Work assignment: 1 thread : 1 path element
|
2020-06-06 08:23:40 -07:00
|
|
|
uint row_count = 0;
|
2021-04-12 14:41:03 +02:00
|
|
|
bool mem_ok = mem_error == NO_ERROR;
|
2021-05-27 11:32:33 +09:00
|
|
|
if (gl_LocalInvocationID.y == 0) {
|
|
|
|
if (element_ix < conf.n_elements) {
|
|
|
|
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
|
|
|
switch (tag.tag) {
|
|
|
|
case Annotated_Image:
|
|
|
|
case Annotated_BeginClip:
|
|
|
|
case Annotated_Color:
|
|
|
|
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Fall through.
|
|
|
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
|
|
|
Path path = Path_read(conf.tile_alloc, path_ref);
|
|
|
|
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
|
|
|
row_count = path.bbox.w - path.bbox.y;
|
|
|
|
// Paths that don't cross tile top edges don't have backdrops.
|
|
|
|
// Don't apply the optimization to paths that may cross the y = 0
|
|
|
|
// top edge, but clipped to 1 row.
|
|
|
|
if (row_count == 1 && path.bbox.y > 0) {
|
|
|
|
// Note: this can probably be expanded to width = 2 as
|
|
|
|
// long as it doesn't cross the left edge.
|
|
|
|
row_count = 0;
|
|
|
|
}
|
|
|
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
|
|
|
sh_row_alloc[th_ix] = path_alloc;
|
2021-03-17 12:02:41 +01:00
|
|
|
}
|
2020-06-06 08:23:40 -07:00
|
|
|
}
|
2021-05-27 11:32:33 +09:00
|
|
|
sh_row_count[th_ix] = row_count;
|
2020-06-05 15:07:02 -07:00
|
|
|
}
|
2020-06-06 08:23:40 -07:00
|
|
|
|
|
|
|
// Prefix sum of sh_row_count
|
|
|
|
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
|
|
|
barrier();
|
2021-05-27 11:32:33 +09:00
|
|
|
if (gl_LocalInvocationID.y == 0 && th_ix >= (1 << i)) {
|
2020-06-06 08:23:40 -07:00
|
|
|
row_count += sh_row_count[th_ix - (1 << i)];
|
|
|
|
}
|
|
|
|
barrier();
|
2021-05-27 11:32:33 +09:00
|
|
|
if (gl_LocalInvocationID.y == 0) {
|
|
|
|
sh_row_count[th_ix] = row_count;
|
|
|
|
}
|
2020-06-06 08:23:40 -07:00
|
|
|
}
|
|
|
|
barrier();
|
2020-06-28 15:37:27 +02:00
|
|
|
// Work assignment: 1 thread : 1 path element row
|
2020-06-06 08:23:40 -07:00
|
|
|
uint total_rows = sh_row_count[BACKDROP_WG - 1];
|
2021-05-27 11:32:33 +09:00
|
|
|
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG * BACKDROP_DIST_FACTOR) {
|
2020-06-06 08:23:40 -07:00
|
|
|
// Binary search to find element
|
|
|
|
uint el_ix = 0;
|
|
|
|
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
|
|
|
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
|
|
|
|
if (row >= sh_row_count[probe - 1]) {
|
|
|
|
el_ix = probe;
|
2020-06-05 15:07:02 -07:00
|
|
|
}
|
|
|
|
}
|
2020-06-06 08:23:40 -07:00
|
|
|
uint width = sh_row_width[el_ix];
|
2021-04-12 14:41:03 +02:00
|
|
|
if (width > 0 && mem_ok) {
|
2020-12-24 11:36:05 +01:00
|
|
|
// Process one row sequentially
|
|
|
|
// Read backdrop value per tile and prefix sum it
|
2020-12-24 12:00:53 +01:00
|
|
|
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
2020-12-24 11:36:05 +01:00
|
|
|
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
2020-12-24 12:00:53 +01:00
|
|
|
uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
|
|
|
|
uint sum = read_mem(tiles_alloc, tile_el_ix);
|
2020-12-24 11:36:05 +01:00
|
|
|
for (uint x = 1; x < width; x++) {
|
|
|
|
tile_el_ix += 2;
|
2020-12-24 12:00:53 +01:00
|
|
|
sum += read_mem(tiles_alloc, tile_el_ix);
|
|
|
|
write_mem(tiles_alloc, tile_el_ix, sum);
|
2020-12-24 11:36:05 +01:00
|
|
|
}
|
2020-06-06 08:23:40 -07:00
|
|
|
}
|
2020-06-05 15:07:02 -07:00
|
|
|
}
|
|
|
|
}
|