piet-gpu/shader: treat memoryBarrierBuffer as a control barrier

memoryBarrierBuffer is mapped to the threadgroup_barrier function in
Metal, which is a control barrier that must be executed by all threads
(or none). This change establishes that property for the two memory
barriers we have.

While here, remove ENABLE_IMAGE_INDICES completely; it was disabled in
an earlier change.

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur 2021-08-20 11:20:39 +02:00
parent 698a1546d2
commit 039cfcf0de
4 changed files with 7 additions and 10 deletions

View file

@ -221,8 +221,10 @@ void main() {
if (gl_LocalInvocationID.x == WG_SIZE - 1) { if (gl_LocalInvocationID.x == WG_SIZE - 1) {
// Note: with memory model, we'd want to generate the atomic store version of this. // Note: with memory model, we'd want to generate the atomic store version of this.
State_write(state_aggregate_ref(part_ix), agg); State_write(state_aggregate_ref(part_ix), agg);
uint flag = FLAG_AGGREGATE_READY; }
memoryBarrierBuffer(); memoryBarrierBuffer();
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
uint flag = FLAG_AGGREGATE_READY;
if (part_ix == 0) { if (part_ix == 0) {
State_write(state_prefix_ref(part_ix), agg); State_write(state_prefix_ref(part_ix), agg);
flag = FLAG_PREFIX_READY; flag = FLAG_PREFIX_READY;
@ -274,11 +276,12 @@ void main() {
State inclusive_prefix = combine_state(exclusive, agg); State inclusive_prefix = combine_state(exclusive, agg);
sh_prefix = exclusive; sh_prefix = exclusive;
State_write(state_prefix_ref(part_ix), inclusive_prefix); State_write(state_prefix_ref(part_ix), inclusive_prefix);
memoryBarrierBuffer();
flag = FLAG_PREFIX_READY;
state[state_flag_index(part_ix)] = flag;
} }
} }
memoryBarrierBuffer();
if (gl_LocalInvocationID.x == WG_SIZE - 1 && part_ix != 0) {
state[state_flag_index(part_ix)] = FLAG_PREFIX_READY;
}
barrier(); barrier();
if (part_ix != 0) { if (part_ix != 0) {
exclusive = sh_prefix; exclusive = sh_prefix;

Binary file not shown.

View file

@ -8,9 +8,6 @@
#version 450 #version 450
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#ifdef ENABLE_IMAGE_INDICES
#extension GL_EXT_nonuniform_qualifier : enable
#endif
#include "mem.h" #include "mem.h"
#include "setup.h" #include "setup.h"

View file

@ -15,9 +15,6 @@
#define PTCL_INITIAL_ALLOC 1024 #define PTCL_INITIAL_ALLOC 1024
// This is now set in the ninja file during compilation
//#define ENABLE_IMAGE_INDICES
// These should probably be renamed and/or reworked. In the binning // These should probably be renamed and/or reworked. In the binning
// kernel, they represent the number of bins. Also, the workgroup size // kernel, they represent the number of bins. Also, the workgroup size
// of that kernel is equal to the number of bins, but should probably // of that kernel is equal to the number of bins, but should probably