mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
a0648a2153
The MSL translation of the prefix example had its bindings permuted; a flag prevents this (but, as is typical for shader translation, potentially creates other problems). Also use explicit unsigned literal to avoid DXC warnings.
54 lines
1.3 KiB
GLSL
54 lines
1.3 KiB
GLSL
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
// The reduction phase for prefix sum implemented as a tree reduction.
|
|
|
|
#version 450
|
|
|
|
#define N_ROWS 8
|
|
#define LG_WG_SIZE 9
|
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
|
|
|
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
|
|
|
struct Monoid {
|
|
uint element;
|
|
};
|
|
|
|
layout(set = 0, binding = 0) readonly buffer InBuf {
|
|
Monoid[] inbuf;
|
|
};
|
|
|
|
layout(set = 0, binding = 1) buffer OutBuf {
|
|
Monoid[] outbuf;
|
|
};
|
|
|
|
shared Monoid sh_scratch[WG_SIZE];
|
|
|
|
Monoid combine_monoid(Monoid a, Monoid b) {
|
|
return Monoid(a.element + b.element);
|
|
}
|
|
|
|
void main() {
|
|
uint ix = gl_GlobalInvocationID.x * N_ROWS;
|
|
// TODO: gate buffer read
|
|
Monoid agg = inbuf[ix];
|
|
for (uint i = 1; i < N_ROWS; i++) {
|
|
agg = combine_monoid(agg, inbuf[ix + i]);
|
|
}
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
|
barrier();
|
|
// We could make this predicate tighter, but would it help?
|
|
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
|
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
|
|
agg = combine_monoid(agg, other);
|
|
}
|
|
barrier();
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
if (gl_LocalInvocationID.x == 0) {
|
|
outbuf[gl_WorkGroupID.x] = agg;
|
|
}
|
|
}
|