vello/piet-gpu/shader/draw_reduce.comp
Raph Levien 875c8badf4 Add draw object stage
This is one of the stages in the new element pipeline. It's a simple
one, just a prefix sum of a couple counts, and some of it will probably
get merged with a downstream stage, but we'll do it separately for now
for convenience.

This patch also contains an update to Vulkan tools 1.2.198, which
accounts for the large diff of translated shaders.
2021-12-02 13:37:16 -08:00

62 lines
1.6 KiB
GLSL

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The reduction phase for draw scan implemented as a tree reduction.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE 9
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "drawtag.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) buffer OutBuf {
Monoid[] outbuf;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
Monoid agg = map_tag(tag_word);
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
// We could make this predicate tighter, but would it help?
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
agg = combine_tag_monoid(agg, other);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0) {
outbuf[gl_WorkGroupID.x] = agg;
}
}