diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index 2d0a976..d37a2c6 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -221,8 +221,10 @@ void main() { if (gl_LocalInvocationID.x == WG_SIZE - 1) { // Note: with memory model, we'd want to generate the atomic store version of this. State_write(state_aggregate_ref(part_ix), agg); + } + memoryBarrierBuffer(); + if (gl_LocalInvocationID.x == WG_SIZE - 1) { uint flag = FLAG_AGGREGATE_READY; - memoryBarrierBuffer(); if (part_ix == 0) { State_write(state_prefix_ref(part_ix), agg); flag = FLAG_PREFIX_READY; @@ -274,11 +276,12 @@ void main() { State inclusive_prefix = combine_state(exclusive, agg); sh_prefix = exclusive; State_write(state_prefix_ref(part_ix), inclusive_prefix); - memoryBarrierBuffer(); - flag = FLAG_PREFIX_READY; - state[state_flag_index(part_ix)] = flag; } } + memoryBarrierBuffer(); + if (gl_LocalInvocationID.x == WG_SIZE - 1 && part_ix != 0) { + state[state_flag_index(part_ix)] = FLAG_PREFIX_READY; + } barrier(); if (part_ix != 0) { exclusive = sh_prefix; diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 8a4bda1..d1fd39a 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 3230c0b..e2f86f6 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -8,9 +8,6 @@ #version 450 #extension GL_GOOGLE_include_directive : enable -#ifdef ENABLE_IMAGE_INDICES -#extension GL_EXT_nonuniform_qualifier : enable -#endif #include "mem.h" #include "setup.h" diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index cfe5f28..cb41a4b 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -15,9 +15,6 @@ #define PTCL_INITIAL_ALLOC 1024 -// This is now set in the ninja file during compilation -//#define ENABLE_IMAGE_INDICES - // These should probably be renamed and/or reworked. In the binning // kernel, they represent the number of bins. Also, the workgroup size // of that kernel is equal to the number of bins, but should probably