2022-02-18 11:25:41 +11:00
|
|
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
|
|
|
|
// The reduce pass for clip stack processing.
|
|
|
|
|
|
|
|
// The primary input is a sequence of path ids representing paths to
|
|
|
|
// push, with a special value of ~0 to represent pop.
|
|
|
|
|
|
|
|
// For each path, the bounding box is found in the anno stream
|
|
|
|
// (anno_alloc), though this may change.
|
|
|
|
|
|
|
|
// Output is a stack monoid reduction for the partition. The Bic
|
|
|
|
// is stored in the BicBuf, and the stack slice in StackBuf.
|
|
|
|
|
|
|
|
// Note: for this shader, only pushes are represented in the stack
|
|
|
|
// monoid reduction output, so we don't have to worry about the
|
|
|
|
// interpretation of pops.
|
|
|
|
|
|
|
|
#version 450
|
|
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
|
|
|
|
#include "mem.h"
|
|
|
|
#include "setup.h"
|
|
|
|
|
|
|
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
|
|
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
|
|
|
#define PARTITION_SIZE WG_SIZE
|
|
|
|
|
|
|
|
layout(local_size_x = WG_SIZE) in;
|
|
|
|
|
|
|
|
layout(binding = 1) readonly buffer ConfigBuf {
|
|
|
|
Config conf;
|
|
|
|
};
|
|
|
|
|
|
|
|
// The intermediate state for clip processing.
|
|
|
|
struct ClipEl {
|
|
|
|
// index of parent node
|
|
|
|
uint parent_ix;
|
|
|
|
// bounding box
|
|
|
|
vec4 bbox;
|
|
|
|
};
|
|
|
|
|
|
|
|
// The bicyclic monoid
|
|
|
|
struct Bic {
|
|
|
|
uint a;
|
|
|
|
uint b;
|
|
|
|
};
|
|
|
|
|
|
|
|
Bic bic_combine(Bic x, Bic y) {
|
|
|
|
uint m = min(x.b, y.a);
|
|
|
|
return Bic(x.a + y.a - m, x.b + y.b - m);
|
|
|
|
}
|
|
|
|
|
|
|
|
shared Bic sh_bic[WG_SIZE];
|
|
|
|
shared uint sh_parent[WG_SIZE];
|
|
|
|
shared uint sh_path_ix[WG_SIZE];
|
|
|
|
shared vec4 sh_bbox[WG_SIZE];
|
|
|
|
|
|
|
|
// Load path's bbox from bbox (as written by pathseg).
|
|
|
|
vec4 load_path_bbox(uint path_ix) {
|
2022-03-03 09:44:03 +11:00
|
|
|
uint base = (conf.path_bbox_alloc.offset >> 2) + 6 * path_ix;
|
2022-02-18 11:25:41 +11:00
|
|
|
float bbox_l = float(memory[base]) - 32768.0;
|
|
|
|
float bbox_t = float(memory[base + 1]) - 32768.0;
|
|
|
|
float bbox_r = float(memory[base + 2]) - 32768.0;
|
|
|
|
float bbox_b = float(memory[base + 3]) - 32768.0;
|
|
|
|
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
|
|
|
|
return bbox;
|
|
|
|
}
|
|
|
|
|
|
|
|
vec4 bbox_intersect(vec4 a, vec4 b) {
|
|
|
|
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
|
|
|
}
|
|
|
|
|
|
|
|
void store_bic(uint ix, Bic bic) {
|
|
|
|
uint base = (conf.clip_bic_alloc.offset >> 2) + 2 * ix;
|
|
|
|
memory[base] = bic.a;
|
|
|
|
memory[base + 1] = bic.b;
|
|
|
|
}
|
|
|
|
|
|
|
|
void store_clip_el(uint ix, ClipEl el) {
|
|
|
|
uint base = (conf.clip_stack_alloc.offset >> 2) + 5 * ix;
|
|
|
|
memory[base] = el.parent_ix;
|
|
|
|
memory[base + 1] = floatBitsToUint(el.bbox.x);
|
|
|
|
memory[base + 2] = floatBitsToUint(el.bbox.y);
|
|
|
|
memory[base + 3] = floatBitsToUint(el.bbox.z);
|
|
|
|
memory[base + 4] = floatBitsToUint(el.bbox.w);
|
|
|
|
}
|
|
|
|
|
|
|
|
void main() {
|
|
|
|
uint th = gl_LocalInvocationID.x;
|
|
|
|
uint inp = memory[(conf.clip_alloc.offset >> 2) + gl_GlobalInvocationID.x];
|
|
|
|
bool is_push = int(inp) >= 0;
|
|
|
|
// reverse scan of bicyclic semigroup
|
|
|
|
Bic bic = Bic(1 - uint(is_push), uint(is_push));
|
|
|
|
sh_bic[gl_LocalInvocationID.x] = bic;
|
|
|
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
|
|
|
barrier();
|
|
|
|
if (th + (1u << i) < WG_SIZE) {
|
|
|
|
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
|
|
|
|
bic = bic_combine(bic, other);
|
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
sh_bic[th] = bic;
|
|
|
|
}
|
|
|
|
if (th == 0) {
|
|
|
|
store_bic(gl_WorkGroupID.x, bic);
|
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
uint size = sh_bic[0].b;
|
|
|
|
bic = Bic(0, 0);
|
|
|
|
if (th + 1 < WG_SIZE) {
|
|
|
|
bic = sh_bic[th + 1];
|
|
|
|
}
|
|
|
|
if (is_push && bic.a == 0) {
|
|
|
|
uint local_ix = size - bic.b - 1;
|
|
|
|
sh_parent[local_ix] = th;
|
|
|
|
sh_path_ix[local_ix] = inp;
|
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
// Do forward scan of bounding box intersection
|
|
|
|
vec4 bbox;
|
|
|
|
uint path_ix;
|
|
|
|
if (th < size) {
|
|
|
|
path_ix = sh_path_ix[th];
|
|
|
|
bbox = load_path_bbox(path_ix);
|
|
|
|
}
|
|
|
|
// Not necessary if depth is bounded by wg size
|
|
|
|
#if 0
|
|
|
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
|
|
|
// We gate so we never access uninit data, but it might
|
|
|
|
// be more efficient to avoid the conditionals.
|
|
|
|
if (th < size) {
|
|
|
|
sh_bbox[th] = bbox;
|
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
if (th < size && th >= (1u << i)) {
|
|
|
|
bbox = bbox_intersect(sh_bbox[th - (1u << i)], bbox);
|
|
|
|
}
|
|
|
|
barrier();
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (th < size) {
|
|
|
|
uint parent_ix = sh_parent[th] + gl_WorkGroupID.x * PARTITION_SIZE;
|
|
|
|
ClipEl el = ClipEl(parent_ix, bbox);
|
|
|
|
store_clip_el(gl_GlobalInvocationID.x, el);
|
|
|
|
}
|
|
|
|
}
|