2020-12-12 01:01:48 +11:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
|
2020-06-06 08:07:02 +10:00
|
|
|
// Propagation of tile backdrop for filling.
|
2020-06-28 23:37:27 +10:00
|
|
|
//
|
|
|
|
// Each thread reads one path element and calculates the number of spanned tiles
|
|
|
|
// based on the bounding box.
|
|
|
|
// In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
|
|
|
|
// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
|
|
|
|
// and propagated from the left to the right (prefix summed).
|
|
|
|
//
|
|
|
|
// Output state:
|
|
|
|
// - Each path element has an array of tiles covering the whole path based on boundig box
|
|
|
|
// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
|
2020-06-06 08:07:02 +10:00
|
|
|
|
|
|
|
#version 450
|
|
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
#include "mem.h"
|
2020-12-24 22:00:53 +11:00
|
|
|
#include "setup.h"
|
2020-06-06 08:07:02 +10:00
|
|
|
|
2020-09-13 20:58:47 +10:00
|
|
|
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
2020-06-07 01:23:40 +10:00
|
|
|
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
2020-06-06 08:07:02 +10:00
|
|
|
|
|
|
|
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
|
|
|
|
2020-12-12 04:30:20 +11:00
|
|
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|
|
|
Config conf;
|
2020-06-06 08:07:02 +10:00
|
|
|
};
|
|
|
|
|
|
|
|
#include "annotated.h"
|
|
|
|
#include "tile.h"
|
|
|
|
|
2020-06-07 01:23:40 +10:00
|
|
|
shared uint sh_row_count[BACKDROP_WG];
|
2020-12-24 22:00:53 +11:00
|
|
|
shared Alloc sh_row_alloc[BACKDROP_WG];
|
2020-06-07 01:23:40 +10:00
|
|
|
shared uint sh_row_width[BACKDROP_WG];
|
|
|
|
|
2020-06-06 08:07:02 +10:00
|
|
|
void main() {
|
2020-12-24 22:00:53 +11:00
|
|
|
if (mem_error != NO_ERROR) {
|
2020-12-12 04:30:20 +11:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-06-07 01:23:40 +10:00
|
|
|
uint th_ix = gl_LocalInvocationID.x;
|
2020-06-06 08:07:02 +10:00
|
|
|
uint element_ix = gl_GlobalInvocationID.x;
|
2020-12-24 22:00:53 +11:00
|
|
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
2020-06-06 08:07:02 +10:00
|
|
|
|
2020-06-28 23:37:27 +10:00
|
|
|
// Work assignment: 1 thread : 1 path element
|
2020-06-07 01:23:40 +10:00
|
|
|
uint row_count = 0;
|
2020-12-12 04:30:20 +11:00
|
|
|
if (element_ix < conf.n_elements) {
|
2021-03-17 22:02:41 +11:00
|
|
|
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
|
|
|
switch (tag.tag) {
|
|
|
|
case Annotated_Color:
|
|
|
|
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Fall through.
|
implement FillImage command and sRGB support
FillImage is like Fill, except that it takes its color from one or
more image atlases.
kernel4 uses a single image for non-Vulkan hosts, and the dynamic sized array
of image descriptors on Vulkan.
A previous version of this commit used textures. I think images are a better
choice for piet-gpu, for several reasons:
- Texture sampling, in particular textureGrad, is slow on lower spec devices
such as Google Pixel. Texture sampling is particularly slow and difficult to
implement for CPU fallbacks.
- Texture sampling need more parameters, in particular the full u,v
transformation matrix, leading to a large increase in the command size. Since
all commands use the same size, that memory penalty is paid by all scenes, not
just scenes with textures.
- It is unlikely that piet-gpu will support every kind of fill for every
client, because each kind must be added to kernel4.
With FillImage, a client will prepare the image(s) in separate shader stages,
sampling and applying transformations and special effects as needed. Textures
that align with the output pixel grid can be used directly, without
pre-processing.
Note that the pre-processing step can run concurrently with the piet-gpu pipeline;
Only the last stage, kernel4, needs the images.
Pre-processing most likely uses fixed function vertex/fragment programs,
which on some GPUs may run in parallel with piet-gpu's compute programs.
While here, fix a few validation errors:
- Explicitly enable EXT_descriptor_indexing, KHR_maintenance3,
KHR_get_physical_device_properties2.
- Specify a vkDescriptorSetVariableDescriptorCountAllocateInfo for
vkAllocateDescriptorSets. Otherwise, variable image2D arrays won't work (but
sampler2D arrays do, at least on my setup).
Updates #38
Signed-off-by: Elias Naur <mail@eliasnaur.com>
2020-12-29 08:02:39 +11:00
|
|
|
case Annotated_FillImage:
|
2020-11-21 04:26:02 +11:00
|
|
|
case Annotated_BeginClip:
|
2020-12-24 22:00:53 +11:00
|
|
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
|
|
|
Path path = Path_read(conf.tile_alloc, path_ref);
|
2020-06-07 01:23:40 +10:00
|
|
|
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
|
|
|
row_count = path.bbox.w - path.bbox.y;
|
2020-12-24 21:41:19 +11:00
|
|
|
// Paths that don't cross tile top edges don't have backdrops.
|
|
|
|
// Don't apply the optimization to paths that may cross the y = 0
|
|
|
|
// top edge, but clipped to 1 row.
|
|
|
|
if (row_count == 1 && path.bbox.y > 0) {
|
2020-06-07 01:23:40 +10:00
|
|
|
// Note: this can probably be expanded to width = 2 as
|
|
|
|
// long as it doesn't cross the left edge.
|
|
|
|
row_count = 0;
|
|
|
|
}
|
2020-12-24 22:00:53 +11:00
|
|
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
|
|
|
sh_row_alloc[th_ix] = path_alloc;
|
2020-06-07 01:23:40 +10:00
|
|
|
}
|
2020-06-06 08:07:02 +10:00
|
|
|
}
|
2020-06-07 01:23:40 +10:00
|
|
|
|
|
|
|
sh_row_count[th_ix] = row_count;
|
|
|
|
// Prefix sum of sh_row_count
|
|
|
|
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
|
|
|
barrier();
|
|
|
|
if (th_ix >= (1 << i)) {
|
|
|
|
row_count += sh_row_count[th_ix - (1 << i)];
|
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
sh_row_count[th_ix] = row_count;
|
|
|
|
}
|
|
|
|
barrier();
|
2020-06-28 23:37:27 +10:00
|
|
|
// Work assignment: 1 thread : 1 path element row
|
2020-06-07 01:23:40 +10:00
|
|
|
uint total_rows = sh_row_count[BACKDROP_WG - 1];
|
|
|
|
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
|
|
|
|
// Binary search to find element
|
|
|
|
uint el_ix = 0;
|
|
|
|
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
|
|
|
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
|
|
|
|
if (row >= sh_row_count[probe - 1]) {
|
|
|
|
el_ix = probe;
|
2020-06-06 08:07:02 +10:00
|
|
|
}
|
|
|
|
}
|
2020-06-07 01:23:40 +10:00
|
|
|
uint width = sh_row_width[el_ix];
|
2020-12-24 21:36:05 +11:00
|
|
|
if (width > 0) {
|
|
|
|
// Process one row sequentially
|
|
|
|
// Read backdrop value per tile and prefix sum it
|
2020-12-24 22:00:53 +11:00
|
|
|
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
2020-12-24 21:36:05 +11:00
|
|
|
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
2020-12-24 22:00:53 +11:00
|
|
|
uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
|
|
|
|
uint sum = read_mem(tiles_alloc, tile_el_ix);
|
2020-12-24 21:36:05 +11:00
|
|
|
for (uint x = 1; x < width; x++) {
|
|
|
|
tile_el_ix += 2;
|
2020-12-24 22:00:53 +11:00
|
|
|
sum += read_mem(tiles_alloc, tile_el_ix);
|
|
|
|
write_mem(tiles_alloc, tile_el_ix, sum);
|
2020-12-24 21:36:05 +11:00
|
|
|
}
|
2020-06-07 01:23:40 +10:00
|
|
|
}
|
2020-06-06 08:07:02 +10:00
|
|
|
}
|
|
|
|
}
|