// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense // The reduction phase for draw scan implemented as a tree reduction. #version 450 #extension GL_GOOGLE_include_directive : enable #include "mem.h" #include "setup.h" #define N_ROWS 8 #define LG_WG_SIZE (7 + LG_WG_FACTOR) #define WG_SIZE (1 << LG_WG_SIZE) #define PARTITION_SIZE (WG_SIZE * N_ROWS) layout(local_size_x = WG_SIZE, local_size_y = 1) in; layout(binding = 1) readonly buffer ConfigBuf { Config conf; }; layout(binding = 2) readonly buffer SceneBuf { uint[] scene; }; #include "scene.h" #include "drawtag.h" #define Monoid DrawMonoid layout(set = 0, binding = 3) buffer OutBuf { Monoid[] outbuf; }; shared Monoid sh_scratch[WG_SIZE]; void main() { uint ix = gl_GlobalInvocationID.x * N_ROWS; uint drawtag_base = conf.drawtag_offset >> 2; uint tag_word = scene[drawtag_base + ix]; Monoid agg = map_tag(tag_word); for (uint i = 1; i < N_ROWS; i++) { uint tag_word = scene[drawtag_base + ix + i]; agg = combine_draw_monoid(agg, map_tag(tag_word)); } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i = 0; i < LG_WG_SIZE; i++) { barrier(); // We could make this predicate tighter, but would it help? if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; agg = combine_draw_monoid(agg, other); } barrier(); sh_scratch[gl_LocalInvocationID.x] = agg; } if (gl_LocalInvocationID.x == 0) { outbuf[gl_WorkGroupID.x] = agg; } }