mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
10a624ee75
This is our version of the standard message passing litmus test for atomics. It does a bunch in parallel and permutes the reads and writes extensively, so it's been more sensitive than existing tests.
55 lines
1.7 KiB
Plaintext
Generated
55 lines
1.7 KiB
Plaintext
Generated
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
#pragma clang diagnostic ignored "-Wunused-variable"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
#include <metal_atomic>
|
|
|
|
using namespace metal;
|
|
|
|
struct Element
|
|
{
|
|
uint data;
|
|
uint flag;
|
|
};
|
|
|
|
struct DataBuf
|
|
{
|
|
Element data[1];
|
|
};
|
|
|
|
struct ControlBuf
|
|
{
|
|
uint failures;
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
|
|
|
static inline __attribute__((always_inline))
|
|
uint permute_flag_ix(thread const uint& data_ix)
|
|
{
|
|
return (data_ix * 419u) & 65535u;
|
|
}
|
|
|
|
kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
|
{
|
|
atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
|
|
threadgroup_barrier(mem_flags::mem_device);
|
|
uint param = gl_GlobalInvocationID.x;
|
|
uint write_flag_ix = permute_flag_ix(param);
|
|
atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
|
|
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
|
|
uint param_1 = read_ix;
|
|
uint read_flag_ix = permute_flag_ix(param_1);
|
|
uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
|
|
uint flag = _58;
|
|
threadgroup_barrier(mem_flags::mem_device);
|
|
uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
|
|
uint data = _62;
|
|
if (flag > data)
|
|
{
|
|
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
|
|
}
|
|
}
|
|
|