vello/piet-gpu/shader/tile_alloc.comp
Raph Levien 05e81acebc Basically get gradients working
Separate out render context upload from renderer creation. Upload ramps
to GPU buffer. Encode gradients to scene description. Fix a number of
bugs in uploading and processing.

This renders gradients in a test image, but has some shortcomings. For
one, staging buffers need to be applied for a couple things (they're
just host mapped for now). Also, the interaction between sRGB and
premultiplied alpha isn't quite right. The size of the gradient ramp
buffer is fixed and should be dynamic.

And of course there's always more optimization to be done, including
making the upload of gradient ramps more incremental, and probably
hashing of the stops instead of the processed ramps.
2021-08-09 16:16:46 -07:00

106 lines
3.4 KiB
GLSL

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG];
shared MallocResult sh_tile_alloc;
void main() {
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < conf.n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag;
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Color:
case Annotated_LinGradient:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
// Note: we take advantage of the fact that fills, strokes, and
// clips have compatible layout.
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
x0 = int(floor(clip.bbox.x * SX));
y0 = int(floor(clip.bbox.y * SY));
x1 = int(ceil(clip.bbox.z * SX));
y1 = int(ceil(clip.bbox.w * SY));
break;
}
x0 = clamp(x0, 0, int(conf.width_in_tiles));
y0 = clamp(y0, 0, int(conf.height_in_tiles));
x1 = clamp(x1, 0, int(conf.width_in_tiles));
y1 = clamp(y1, 0, int(conf.height_in_tiles));
Path path;
path.bbox = uvec4(x0, y0, x1, y1);
uint tile_count = (x1 - x0) * (y1 - y0);
if (tag == Annotated_EndClip) {
// Don't actually allocate tiles for an end clip, but we do want
// the path structure (especially bbox) allocated for it.
tile_count = 0;
}
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
// Prefix sum of sh_tile_count
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
total_tile_count += sh_tile_count[th_ix - (1 << i)];
}
barrier();
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = malloc(total_tile_count * Tile_size);
}
barrier();
MallocResult alloc_start = sh_tile_alloc;
if (alloc_start.failed || mem_error != NO_ERROR) {
return;
}
if (element_ix < conf.n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
path.tiles = TileRef(tiles_alloc.offset);
Path_write(conf.tile_alloc, path_ref, path);
}
// Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start.alloc.offset >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write
// by a significant amount.
write_mem(alloc_start.alloc, start_ix + i, 0);
}
}