mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-11 04:51:32 +11:00
a0648a2153
The MSL translation of the prefix example had its bindings permuted; a flag prevents this (but, as is typical for shader translation, potentially creates other problems). Also use explicit unsigned literal to avoid DXC warnings.
69 lines
1.7 KiB
Plaintext
Generated
69 lines
1.7 KiB
Plaintext
Generated
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct Monoid
|
|
{
|
|
uint element;
|
|
};
|
|
|
|
struct Monoid_1
|
|
{
|
|
uint element;
|
|
};
|
|
|
|
struct InBuf
|
|
{
|
|
Monoid_1 inbuf[1];
|
|
};
|
|
|
|
struct OutBuf
|
|
{
|
|
Monoid_1 outbuf[1];
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
|
|
|
static inline __attribute__((always_inline))
|
|
Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
|
|
{
|
|
return Monoid{ a.element + b.element };
|
|
}
|
|
|
|
kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _127 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
|
{
|
|
threadgroup Monoid sh_scratch[512];
|
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
|
Monoid agg;
|
|
agg.element = _40.inbuf[ix].element;
|
|
Monoid param_1;
|
|
for (uint i = 1u; i < 8u; i++)
|
|
{
|
|
Monoid param = agg;
|
|
param_1.element = _40.inbuf[ix + i].element;
|
|
agg = combine_monoid(param, param_1);
|
|
}
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
|
{
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
|
{
|
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
|
Monoid param_2 = agg;
|
|
Monoid param_3 = other;
|
|
agg = combine_monoid(param_2, param_3);
|
|
}
|
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
if (gl_LocalInvocationID.x == 0u)
|
|
{
|
|
_127.outbuf[gl_WorkGroupID.x].element = agg.element;
|
|
}
|
|
}
|
|
|