mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
a0648a2153
The MSL translation of the prefix example had its bindings permuted; a flag prevents this (but, as is typical for shader translation, potentially creates other problems). Also use explicit unsigned literal to avoid DXC warnings.
224 lines
6.3 KiB
HLSL
224 lines
6.3 KiB
HLSL
struct Monoid
|
|
{
|
|
uint element;
|
|
};
|
|
|
|
struct State
|
|
{
|
|
uint flag;
|
|
Monoid aggregate;
|
|
Monoid prefix;
|
|
};
|
|
|
|
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
|
|
|
static const Monoid _185 = { 0u };
|
|
|
|
globallycoherent RWByteAddressBuffer _43 : register(u2);
|
|
ByteAddressBuffer _67 : register(t0);
|
|
RWByteAddressBuffer _372 : register(u1);
|
|
|
|
static uint3 gl_LocalInvocationID;
|
|
struct SPIRV_Cross_Input
|
|
{
|
|
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
|
};
|
|
|
|
groupshared uint sh_part_ix;
|
|
groupshared Monoid sh_scratch[512];
|
|
groupshared uint sh_flag;
|
|
groupshared Monoid sh_prefix;
|
|
|
|
Monoid combine_monoid(Monoid a, Monoid b)
|
|
{
|
|
Monoid _22 = { a.element + b.element };
|
|
return _22;
|
|
}
|
|
|
|
void comp_main()
|
|
{
|
|
if (gl_LocalInvocationID.x == 0u)
|
|
{
|
|
uint _47;
|
|
_43.InterlockedAdd(0, 1u, _47);
|
|
sh_part_ix = _47;
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
uint part_ix = sh_part_ix;
|
|
uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u);
|
|
Monoid _71;
|
|
_71.element = _67.Load(ix * 4 + 0);
|
|
Monoid local[16];
|
|
local[0].element = _71.element;
|
|
Monoid param_1;
|
|
for (uint i = 1u; i < 16u; i++)
|
|
{
|
|
Monoid param = local[i - 1u];
|
|
Monoid _94;
|
|
_94.element = _67.Load((ix + i) * 4 + 0);
|
|
param_1.element = _94.element;
|
|
local[i] = combine_monoid(param, param_1);
|
|
}
|
|
Monoid agg = local[15];
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
|
{
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
|
{
|
|
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
|
Monoid param_2 = other;
|
|
Monoid param_3 = agg;
|
|
agg = combine_monoid(param_2, param_3);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
_43.Store(part_ix * 12 + 8, agg.element);
|
|
if (part_ix == 0u)
|
|
{
|
|
_43.Store(12, agg.element);
|
|
}
|
|
}
|
|
DeviceMemoryBarrier();
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
uint flag = 1u;
|
|
if (part_ix == 0u)
|
|
{
|
|
flag = 2u;
|
|
}
|
|
_43.Store(part_ix * 12 + 4, flag);
|
|
}
|
|
Monoid exclusive = _185;
|
|
if (part_ix != 0u)
|
|
{
|
|
uint look_back_ix = part_ix - 1u;
|
|
uint their_ix = 0u;
|
|
Monoid their_prefix;
|
|
Monoid their_agg;
|
|
Monoid m;
|
|
while (true)
|
|
{
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
sh_flag = _43.Load(look_back_ix * 12 + 4);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
DeviceMemoryBarrier();
|
|
uint flag_1 = sh_flag;
|
|
if (flag_1 == 2u)
|
|
{
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
Monoid _223;
|
|
_223.element = _43.Load(look_back_ix * 12 + 12);
|
|
their_prefix.element = _223.element;
|
|
Monoid param_4 = their_prefix;
|
|
Monoid param_5 = exclusive;
|
|
exclusive = combine_monoid(param_4, param_5);
|
|
}
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if (flag_1 == 1u)
|
|
{
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
Monoid _245;
|
|
_245.element = _43.Load(look_back_ix * 12 + 8);
|
|
their_agg.element = _245.element;
|
|
Monoid param_6 = their_agg;
|
|
Monoid param_7 = exclusive;
|
|
exclusive = combine_monoid(param_6, param_7);
|
|
}
|
|
look_back_ix--;
|
|
their_ix = 0u;
|
|
continue;
|
|
}
|
|
}
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
Monoid _267;
|
|
_267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
|
|
m.element = _267.element;
|
|
if (their_ix == 0u)
|
|
{
|
|
their_agg = m;
|
|
}
|
|
else
|
|
{
|
|
Monoid param_8 = their_agg;
|
|
Monoid param_9 = m;
|
|
their_agg = combine_monoid(param_8, param_9);
|
|
}
|
|
their_ix++;
|
|
if (their_ix == 8192u)
|
|
{
|
|
Monoid param_10 = their_agg;
|
|
Monoid param_11 = exclusive;
|
|
exclusive = combine_monoid(param_10, param_11);
|
|
if (look_back_ix == 0u)
|
|
{
|
|
sh_flag = 2u;
|
|
}
|
|
else
|
|
{
|
|
look_back_ix--;
|
|
their_ix = 0u;
|
|
}
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
flag_1 = sh_flag;
|
|
if (flag_1 == 2u)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
Monoid param_12 = exclusive;
|
|
Monoid param_13 = agg;
|
|
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
|
|
sh_prefix = exclusive;
|
|
_43.Store(part_ix * 12 + 12, inclusive_prefix.element);
|
|
}
|
|
DeviceMemoryBarrier();
|
|
if (gl_LocalInvocationID.x == 511u)
|
|
{
|
|
_43.Store(part_ix * 12 + 4, 2u);
|
|
}
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (part_ix != 0u)
|
|
{
|
|
exclusive = sh_prefix;
|
|
}
|
|
Monoid row = exclusive;
|
|
if (gl_LocalInvocationID.x > 0u)
|
|
{
|
|
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u];
|
|
Monoid param_14 = row;
|
|
Monoid param_15 = other_1;
|
|
row = combine_monoid(param_14, param_15);
|
|
}
|
|
for (uint i_2 = 0u; i_2 < 16u; i_2++)
|
|
{
|
|
Monoid param_16 = row;
|
|
Monoid param_17 = local[i_2];
|
|
Monoid m_1 = combine_monoid(param_16, param_17);
|
|
_372.Store((ix + i_2) * 4 + 0, m_1.element);
|
|
}
|
|
}
|
|
|
|
[numthreads(512, 1, 1)]
|
|
void main(SPIRV_Cross_Input stage_input)
|
|
{
|
|
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
|
comp_main();
|
|
}
|