remove shader gen directories

This commit is contained in:
Chad Brokaw 2022-07-14 14:57:17 -04:00
parent af55dbfa4e
commit 8de34f8728
118 changed files with 0 additions and 18495 deletions

Binary file not shown.

View file

@ -1,62 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
RWByteAddressBuffer _57 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
float mod(float x, float y)
{
return x - y * floor(x / y);
}
float2 mod(float2 x, float2 y)
{
return x - y * floor(x / y);
}
float3 mod(float3 x, float3 y)
{
return x - y * floor(x / y);
}
float4 mod(float4 x, float4 y)
{
return x - y * floor(x / y);
}
uint collatz_iterations(inout uint n)
{
uint i = 0u;
while (n != 1u)
{
if (mod(float(n), 2.0f) == 0.0f)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
void comp_main()
{
uint index = gl_GlobalInvocationID.x;
uint param = _57.Load(index * 4 + 0);
uint _65 = collatz_iterations(param);
_57.Store(index * 4 + 0, _65);
}
[numthreads(1, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,48 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
template<typename Tx, typename Ty>
inline Tx mod(Tx x, Ty y)
{
return x - y * floor(x / y);
}
struct PrimeIndices
{
uint indices[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
static inline __attribute__((always_inline))
uint collatz_iterations(thread uint& n)
{
uint i = 0u;
while (n != 1u)
{
if (mod(float(n), 2.0) == 0.0)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
kernel void main0(device PrimeIndices& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint index = gl_GlobalInvocationID.x;
uint param = _57.indices[index];
uint _65 = collatz_iterations(param);
_57.indices[index] = _65;
}

Binary file not shown.

Binary file not shown.

View file

@ -1,257 +0,0 @@
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _59 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
static uint gl_LocalInvocationIndex;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
uint gl_LocalInvocationIndex : SV_GroupIndex;
};
groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256];
bool check_deps(uint dep_stage)
{
uint _65;
_59.InterlockedOr(4, 0u, _65);
return (_65 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _59.Load(offset * 4 + 12);
return v;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _146 = { raw2 };
s.tiles = _146;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_59.Store(offset * 4 + 12, val);
}
void comp_main()
{
uint param = 7u;
bool _154 = check_deps(param);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _181.Load(4))
{
PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
PathRef path_ref = _195;
Alloc _200;
_200.offset = _181.Load(20);
Alloc param_1;
param_1.offset = _200.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_231 = path.bbox.y > 0u;
}
else
{
_231 = _225;
}
if (_231)
{
row_count = 0u;
}
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_283 = th_ix >= (1u << i);
}
else
{
_283 = _276;
}
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _360;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_360 = sh_row_count[el_ix - 1u];
}
else
{
_360 = 0u;
}
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12);
}
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
comp_main();
}

View file

@ -1,263 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
{
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
return (_65 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_59.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_59);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_59);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_59);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_59.memory[offset] = val;
}
kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint param = 7u;
bool _154 = check_deps(param, v_59);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _181.conf.n_elements)
{
PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_1;
param_1.offset = _181.conf.tile_alloc.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2, v_59);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_231 = path.bbox.y > 0u;
}
else
{
_231 = _225;
}
if (_231)
{
row_count = 0u;
}
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_283 = th_ix >= (1u << i);
}
else
{
_283 = _276;
}
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _360;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_360 = sh_row_count[el_ix - 1u];
}
else
{
_360 = 0u;
}
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7, v_59);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9, v_59);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12, v_59);
}
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,257 +0,0 @@
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
RWByteAddressBuffer _59 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
static uint gl_LocalInvocationIndex;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
uint gl_LocalInvocationIndex : SV_GroupIndex;
};
groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256];
bool check_deps(uint dep_stage)
{
uint _65;
_59.InterlockedOr(4, 0u, _65);
return (_65 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _59.Load(offset * 4 + 12);
return v;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _146 = { raw2 };
s.tiles = _146;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_59.Store(offset * 4 + 12, val);
}
void comp_main()
{
uint param = 7u;
bool _154 = check_deps(param);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _181.Load(4))
{
PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
PathRef path_ref = _195;
Alloc _200;
_200.offset = _181.Load(20);
Alloc param_1;
param_1.offset = _200.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_231 = path.bbox.y > 0u;
}
else
{
_231 = _225;
}
if (_231)
{
row_count = 0u;
}
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_283 = th_ix >= (1u << i);
}
else
{
_283 = _276;
}
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _360;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_360 = sh_row_count[el_ix - 1u];
}
else
{
_360 = 0u;
}
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12);
}
}
}
}
[numthreads(256, 4, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
comp_main();
}

View file

@ -1,263 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
{
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
return (_65 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_59.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_59);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_59);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_59);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_59.memory[offset] = val;
}
kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint param = 7u;
bool _154 = check_deps(param, v_59);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _181.conf.n_elements)
{
PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_1;
param_1.offset = _181.conf.tile_alloc.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2, v_59);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_231 = path.bbox.y > 0u;
}
else
{
_231 = _225;
}
if (_231)
{
row_count = 0u;
}
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_283 = th_ix >= (1u << i);
}
else
{
_283 = _276;
}
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _360;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_360 = sh_row_count[el_ix - 1u];
}
else
{
_360 = 0u;
}
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7, v_59);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9, v_59);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12, v_59);
}
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,67 +0,0 @@
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
ByteAddressBuffer _21 : register(t1, space0);
RWByteAddressBuffer _45 : register(u0, space0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(80))
{
uint out_ix = (_21.Load(44) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 12, 65535u);
_45.Store((out_ix + 1u) * 4 + 12, 65535u);
_45.Store((out_ix + 2u) * 4 + 12, 0u);
_45.Store((out_ix + 3u) * 4 + 12, 0u);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,70 +0,0 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.conf.n_path)
{
uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix);
_45.memory[out_ix] = 65535u;
_45.memory[out_ix + 1u] = 65535u;
_45.memory[out_ix + 2u] = 0u;
_45.memory[out_ix + 3u] = 0u;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,274 +0,0 @@
struct Alloc
{
uint offset;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _57 : register(u0, space0);
ByteAddressBuffer _101 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
};
groupshared uint bitmaps[8][256];
groupshared uint count[8][256];
groupshared uint sh_chunk_offset[256];
DrawMonoid load_draw_monoid(uint element_ix)
{
uint base = (_101.Load(48) >> uint(2)) + (4u * element_ix);
uint path_ix = _57.Load(base * 4 + 12);
uint clip_ix = _57.Load((base + 1u) * 4 + 12);
uint scene_offset = _57.Load((base + 2u) * 4 + 12);
uint info_offset = _57.Load((base + 3u) * 4 + 12);
DrawMonoid _136 = { path_ix, clip_ix, scene_offset, info_offset };
return _136;
}
float4 load_clip_bbox(uint clip_ix)
{
uint base = (_101.Load(64) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_57.Load(base * 4 + 12));
float y0 = asfloat(_57.Load((base + 1u) * 4 + 12));
float x1 = asfloat(_57.Load((base + 2u) * 4 + 12));
float y1 = asfloat(_57.Load((base + 3u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_101.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_57.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_57.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_57.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_57.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_draw_bbox(uint draw_ix, float4 bbox)
{
uint base = (_101.Load(68) >> uint(2)) + (4u * draw_ix);
_57.Store(base * 4 + 12, asuint(bbox.x));
_57.Store((base + 1u) * 4 + 12, asuint(bbox.y));
_57.Store((base + 2u) * 4 + 12, asuint(bbox.z));
_57.Store((base + 3u) * 4 + 12, asuint(bbox.w));
}
uint malloc_stage(uint size, uint mem_size, uint stage)
{
uint _65;
_57.InterlockedAdd(0, size, _65);
uint offset = _65;
if ((offset + size) > mem_size)
{
uint _76;
_57.InterlockedOr(4, stage, _76);
offset = 0u;
}
return offset;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_57.Store(offset * 4 + 12, val);
}
void comp_main()
{
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
bitmaps[i][gl_LocalInvocationID.x] = 0u;
}
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if (element_ix < _101.Load(4))
{
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1);
}
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _354 = bbox;
float4 _356 = bbox;
float2 _358 = max(_354.xy, _356.zw);
bbox.z = _358.x;
bbox.w = _358.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6);
x0 = int(floor(bbox.x * 0.00390625f));
y0 = int(floor(bbox.y * 0.00390625f));
x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f));
}
uint width_in_bins = ((_101.Load(12) + 16u) - 1u) / 16u;
uint height_in_bins = ((_101.Load(16) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
y1 = clamp(y1, y0, int(height_in_bins));
if (x0 == x1)
{
y1 = y0;
}
int x = x0;
int y = y0;
uint my_slice = gl_LocalInvocationID.x / 32u;
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _460;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _460);
x++;
if (x == x1)
{
x = x0;
y++;
}
}
GroupMemoryBarrierWithGroupSync();
uint element_count = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint chunk_offset = 0u;
if (element_count != 0u)
{
uint param_7 = element_count * 4u;
uint param_8 = _101.Load(0);
uint param_9 = 1u;
uint _510 = malloc_stage(param_7, param_8, param_9);
chunk_offset = _510;
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
}
uint out_ix = (_101.Load(24) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _532;
_532.offset = _101.Load(24);
Alloc param_10;
param_10.offset = _532.offset;
uint param_11 = out_ix;
uint param_12 = element_count;
write_mem(param_10, param_11, param_12);
Alloc _544;
_544.offset = _101.Load(24);
Alloc param_13;
param_13.offset = _544.offset;
uint param_14 = out_ix + 1u;
uint param_15 = chunk_offset;
write_mem(param_13, param_14, param_15);
GroupMemoryBarrierWithGroupSync();
x = x0;
y = y0;
while (y < y1)
{
uint bin_ix = (uint(y) * width_in_bins) + uint(x);
uint out_mask = bitmaps[my_slice][bin_ix];
if ((out_mask & my_mask) != 0u)
{
uint idx = uint(int(countbits(out_mask & (my_mask - 1u))));
if (my_slice > 0u)
{
idx += count[my_slice - 1u][bin_ix];
}
uint chunk_offset_1 = sh_chunk_offset[bin_ix];
if (chunk_offset_1 != 0u)
{
_57.Store(((chunk_offset_1 >> uint(2)) + idx) * 4 + 12, element_ix);
}
}
x++;
if (x == x1)
{
x = x0;
y++;
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
comp_main();
}

View file

@ -1,282 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Alloc
{
uint offset;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_101.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_57.memory[base];
uint clip_ix = v_57.memory[base + 1u];
uint scene_offset = v_57.memory[base + 2u];
uint info_offset = v_57.memory[base + 3u];
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
}
static inline __attribute__((always_inline))
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_101.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_57.memory[base]);
float y0 = as_type<float>(v_57.memory[base + 1u]);
float x1 = as_type<float>(v_57.memory[base + 2u]);
float y1 = as_type<float>(v_57.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_101.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_57.memory[base]) - 32768.0;
float bbox_t = float(v_57.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_57.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_57.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_101.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_57.memory[base] = as_type<uint>(bbox.x);
v_57.memory[base + 1u] = as_type<uint>(bbox.y);
v_57.memory[base + 2u] = as_type<uint>(bbox.z);
v_57.memory[base + 3u] = as_type<uint>(bbox.w);
}
static inline __attribute__((always_inline))
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_57)
{
uint _65 = atomic_fetch_add_explicit((device atomic_uint*)&v_57.mem_offset, size, memory_order_relaxed);
uint offset = _65;
if ((offset + size) > mem_size)
{
uint _76 = atomic_fetch_or_explicit((device atomic_uint*)&v_57.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return offset;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_57)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_57.memory[offset] = val;
}
kernel void main0(device Memory& v_57 [[buffer(0)]], const device ConfigBuf& v_101 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint bitmaps[8][256];
threadgroup uint count[8][256];
threadgroup uint sh_chunk_offset[256];
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
bitmaps[i][gl_LocalInvocationID.x] = 0u;
}
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if (element_ix < v_101.conf.n_elements)
{
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param, v_57, v_101);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1, v_57, v_101);
}
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2, v_57, v_101);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _354 = bbox;
float4 _356 = bbox;
float2 _358 = fast::max(_354.xy, _356.zw);
bbox.z = _358.x;
bbox.w = _358.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6, v_57, v_101);
x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625));
}
uint width_in_bins = ((v_101.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_101.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
y1 = clamp(y1, y0, int(height_in_bins));
if (x0 == x1)
{
y1 = y0;
}
int x = x0;
int y = y0;
uint my_slice = gl_LocalInvocationID.x / 32u;
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _460 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++;
if (x == x1)
{
x = x0;
y++;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_count = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint chunk_offset = 0u;
if (element_count != 0u)
{
uint param_7 = element_count * 4u;
uint param_8 = v_101.conf.mem_size;
uint param_9 = 1u;
uint _510 = malloc_stage(param_7, param_8, param_9, v_57);
chunk_offset = _510;
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
}
uint out_ix = (v_101.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_10;
param_10.offset = v_101.conf.bin_alloc.offset;
uint param_11 = out_ix;
uint param_12 = element_count;
write_mem(param_10, param_11, param_12, v_57);
Alloc param_13;
param_13.offset = v_101.conf.bin_alloc.offset;
uint param_14 = out_ix + 1u;
uint param_15 = chunk_offset;
write_mem(param_13, param_14, param_15, v_57);
threadgroup_barrier(mem_flags::mem_threadgroup);
x = x0;
y = y0;
while (y < y1)
{
uint bin_ix = (uint(y) * width_in_bins) + uint(x);
uint out_mask = bitmaps[my_slice][bin_ix];
if ((out_mask & my_mask) != 0u)
{
uint idx = uint(int(popcount(out_mask & (my_mask - 1u))));
if (my_slice > 0u)
{
idx += count[my_slice - 1u][bin_ix];
}
uint chunk_offset_1 = sh_chunk_offset[bin_ix];
if (chunk_offset_1 != 0u)
{
v_57.memory[(chunk_offset_1 >> uint(2)) + idx] = element_ix;
}
}
x++;
if (x == x1)
{
x = x0;
y++;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,372 +0,0 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _394 = { 0u, 0u };
ByteAddressBuffer _80 : register(t1, space0);
RWByteAddressBuffer _96 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[510];
groupshared uint sh_stack[256];
groupshared float4 sh_stack_bbox[256];
groupshared uint sh_link[256];
groupshared float4 sh_bbox[256];
Bic load_bic(uint ix)
{
uint base = (_80.Load(56) >> uint(2)) + (2u * ix);
Bic _287 = { _96.Load(base * 4 + 12), _96.Load((base + 1u) * 4 + 12) };
return _287;
}
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _72;
}
ClipEl load_clip_el(uint ix)
{
uint base = (_80.Load(60) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 12);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 12));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 12));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 12));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1);
ClipEl _336 = { parent_ix, bbox };
return _336;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
uint load_path_ix(uint ix)
{
if (ix < _80.Load(84))
{
return _96.Load(((_80.Load(52) >> uint(2)) + ix) * 4 + 12);
}
else
{
return 2147483648u;
}
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_80.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
uint search_link(inout Bic bic)
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
void store_clip_bbox(uint ix, float4 bbox)
{
uint base = (_80.Load(64) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 12, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 12, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 12, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 12, asuint(bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
Bic bic = _394;
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
GroupMemoryBarrierWithGroupSync();
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6);
bool is_push = int(inp) >= 0;
Bic _560 = { 1u - uint(is_push), uint(is_push) };
bic = _560;
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7);
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
GroupMemoryBarrierWithGroupSync();
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
GroupMemoryBarrierWithGroupSync();
bic = _394;
Bic param_10 = bic;
uint _619 = search_link(param_10);
bic = param_10;
uint link = _619;
sh_link[th] = link;
GroupMemoryBarrierWithGroupSync();
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
GroupMemoryBarrierWithGroupSync();
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
uint path_ix = inp;
bool _718 = !is_push;
bool _726;
if (_718)
{
_726 = gl_GlobalInvocationID.x < _80.Load(84);
}
else
{
_726 = _718;
}
if (_726)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(48) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 12, path_ix);
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17);
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,372 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
return Bic{ v_96.memory[base], v_96.memory[base + 1u] };
}
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
uint parent_ix = v_96.memory[base];
float x0 = as_type<float>(v_96.memory[base + 1u]);
float y0 = as_type<float>(v_96.memory[base + 2u]);
float x1 = as_type<float>(v_96.memory[base + 3u]);
float y1 = as_type<float>(v_96.memory[base + 4u]);
float4 bbox = float4(x0, y0, x1, y1);
return ClipEl{ parent_ix, bbox };
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
if (ix < v_80.conf.n_clip)
{
return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix];
}
else
{
return 2147483648u;
}
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_96.memory[base]) - 32768.0;
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_96.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510])
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
static inline __attribute__((always_inline))
void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix);
v_96.memory[base] = as_type<uint>(bbox.x);
v_96.memory[base + 1u] = as_type<uint>(bbox.y);
v_96.memory[base + 2u] = as_type<uint>(bbox.z);
v_96.memory[base + 3u] = as_type<uint>(bbox.w);
}
kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup Bic sh_bic[510];
threadgroup uint sh_stack[256];
threadgroup float4 sh_stack_bbox[256];
threadgroup uint sh_link[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
Bic bic = Bic{ 0u, 0u };
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param, v_80, v_96);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3, v_80, v_96);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6, v_80, v_96);
bool is_push = int(inp) >= 0;
bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7, v_80, v_96);
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
bic = Bic{ 0u, 0u };
Bic param_10 = bic;
uint _619 = search_link(param_10, gl_LocalInvocationID, sh_bic);
bic = param_10;
uint link = _619;
sh_link[th] = link;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = inp;
bool _718 = !is_push;
bool _726;
if (_718)
{
_726 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
}
else
{
_726 = _718;
}
if (_726)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96);
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp));
v_96.memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17, v_80, v_96);
}

Binary file not shown.

Binary file not shown.

View file

@ -1,182 +0,0 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _268 = { 0u, 0u };
ByteAddressBuffer _64 : register(t1, space0);
RWByteAddressBuffer _80 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[256];
groupshared uint sh_parent[256];
groupshared uint sh_path_ix[256];
groupshared float4 sh_bbox[256];
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _56;
}
void store_bic(uint ix, Bic bic)
{
uint base = (_64.Load(56) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 12, bic.a);
_80.Store((base + 1u) * 4 + 12, bic.b);
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_64.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_clip_el(uint ix, ClipEl el)
{
uint base = (_64.Load(60) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 12, el.parent_ix);
_80.Store((base + 1u) * 4 + 12, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 12, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 12, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 12, asuint(el.bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
uint inp = _80.Load(((_64.Load(52) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 12);
bool is_push = int(inp) >= 0;
Bic _208 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _208;
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
uint size = sh_bic[0].b;
bic = _268;
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _284;
if (is_push)
{
_284 = bic.a == 0u;
}
else
{
_284 = is_push;
}
if (_284)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
GroupMemoryBarrierWithGroupSync();
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl _332 = { parent_ix, bbox };
ClipEl el = _332;
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,179 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
v_80.memory[base] = bic.a;
v_80.memory[base + 1u] = bic.b;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_80.memory[base]) - 32768.0;
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_80.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
v_80.memory[base] = el.parent_ix;
v_80.memory[base + 1u] = as_type<uint>(el.bbox.x);
v_80.memory[base + 2u] = as_type<uint>(el.bbox.y);
v_80.memory[base + 3u] = as_type<uint>(el.bbox.z);
v_80.memory[base + 4u] = as_type<uint>(el.bbox.w);
}
kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Bic sh_bic[256];
threadgroup uint sh_parent[256];
threadgroup uint sh_path_ix[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x];
bool is_push = int(inp) >= 0;
Bic bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3, v_64, v_80);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint size = sh_bic[0].b;
bic = Bic{ 0u, 0u };
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _284;
if (is_push)
{
_284 = bic.a == 0u;
}
else
{
_284 = is_push;
}
if (_284)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4, v_64, v_80);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl el = ClipEl{ parent_ix, bbox };
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6, v_64, v_80);
}
}

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -1,269 +0,0 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _23 = { 0u, 0u, 0u, 0u };
ByteAddressBuffer _93 : register(t1, space0);
ByteAddressBuffer _103 : register(t2, space0);
ByteAddressBuffer _203 : register(t3, space0);
RWByteAddressBuffer _285 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[256];
DrawMonoid map_tag(uint tag_word)
{
uint has_path = uint(tag_word != 0u);
DrawMonoid _76 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
return _76;
}
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
DrawMonoid draw_monoid_identity()
{
return _23;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _93.Load(104) >> uint(2);
uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
DrawMonoid local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
tag_word = _103.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_1 = tag_word;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_4 = other;
DrawMonoid param_5 = agg;
agg = combine_draw_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
DrawMonoid _209;
_209.path_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 0);
_209.clip_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 4);
_209.scene_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 8);
_209.info_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 12);
row.path_ix = _209.path_ix;
row.clip_ix = _209.clip_ix;
row.scene_offset = _209.scene_offset;
row.info_offset = _209.info_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_6 = row;
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _93.Load(108) >> uint(2);
uint drawinfo_base = _93.Load(72) >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_93.Load(48) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.Load(52) >> uint(2);
float4 mat;
float2 translate;
float2 p0;
float2 p1;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_8 = m;
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
_285.Store((out_base + (i_2 * 4u)) * 4 + 12, m.path_ix);
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 12, m.clip_ix);
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 12, m.scene_offset);
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 12, m.info_offset);
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0);
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_93.Load(44) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.Load(bbox_offset * 4 + 12)) - 32768.0f;
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 12));
uint fill_mode = uint(linewidth >= 0.0f);
if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u))
{
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 12);
uint t = (_93.Load(40) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_285.Load(t * 4 + 12), _285.Load((t + 1u) * 4 + 12), _285.Load((t + 2u) * 4 + 12), _285.Load((t + 3u) * 4 + 12)));
if ((tag_word == 276u) || (tag_word == 732u))
{
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 12), _285.Load((t + 5u) * 4 + 12)));
}
}
if (linewidth >= 0.0f)
{
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
}
switch (tag_word)
{
case 68u:
case 72u:
{
_285.Store(di * 4 + 12, asuint(linewidth));
break;
}
case 276u:
{
_285.Store(di * 4 + 12, asuint(linewidth));
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 dxy = p1 - p0;
float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_285.Store((di + 1u) * 4 + 12, asuint(line_x));
_285.Store((di + 2u) * 4 + 12, asuint(line_y));
_285.Store((di + 3u) * 4 + 12, asuint(line_c));
break;
}
case 732u:
{
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
float r0 = asfloat(_103.Load((dd + 5u) * 4 + 0));
float r1 = asfloat(_103.Load((dd + 6u) * 4 + 0));
float inv_det = 1.0f / ((mat.x * mat.w) - (mat.y * mat.z));
float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det;
float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y);
inv_tr += p0;
float2 center1 = p1 - p0;
float rr = r1 / (r1 - r0);
float rainv = rr / ((r1 * r1) - dot(center1, center1));
float2 c1 = center1 * rainv;
float ra = rr * rainv;
float roff = rr - 1.0f;
_285.Store(di * 4 + 12, asuint(linewidth));
_285.Store((di + 1u) * 4 + 12, asuint(inv_mat.x));
_285.Store((di + 2u) * 4 + 12, asuint(inv_mat.y));
_285.Store((di + 3u) * 4 + 12, asuint(inv_mat.z));
_285.Store((di + 4u) * 4 + 12, asuint(inv_mat.w));
_285.Store((di + 5u) * 4 + 12, asuint(inv_tr.x));
_285.Store((di + 6u) * 4 + 12, asuint(inv_tr.y));
_285.Store((di + 7u) * 4 + 12, asuint(c1.x));
_285.Store((di + 8u) * 4 + 12, asuint(c1.y));
_285.Store((di + 9u) * 4 + 12, asuint(ra));
_285.Store((di + 10u) * 4 + 12, asuint(roff));
break;
}
case 5u:
{
break;
}
}
}
if ((tag_word == 5u) || (tag_word == 37u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 5u)
{
path_ix = m.path_ix;
}
_285.Store((clip_out_base + m.clip_ix) * 4 + 12, path_ix);
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,318 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct ParentBuf
{
DrawMonoid_1 parent[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
uint has_path = uint(tag_word != 0u);
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
}
static inline __attribute__((always_inline))
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid draw_monoid_identity()
{
return DrawMonoid{ 0u, 0u, 0u, 0u };
}
kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _93.conf.drawtag_offset >> uint(2);
uint tag_word = _103.scene[drawtag_base + ix];
uint param = tag_word;
DrawMonoid agg = map_tag(param);
spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
tag_word = _103.scene[(drawtag_base + ix) + i];
uint param_1 = tag_word;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_4 = other;
DrawMonoid param_5 = agg;
agg = combine_draw_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _206 = gl_WorkGroupID.x - 1u;
row.path_ix = _203.parent[_206].path_ix;
row.clip_ix = _203.parent[_206].clip_ix;
row.scene_offset = _203.parent[_206].scene_offset;
row.info_offset = _203.parent[_206].info_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_6 = row;
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _93.conf.drawdata_offset >> uint(2);
uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2);
float4 mat;
float2 translate;
float2 p0;
float2 p1;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_8 = m;
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
_285.memory[out_base + (i_2 * 4u)] = m.path_ix;
_285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix;
_285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset;
_285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset;
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _103.scene[(drawtag_base + ix) + i_2];
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.memory[bbox_offset]) - 32768.0;
float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = as_type<float>(_285.memory[bbox_offset + 4u]);
uint fill_mode = uint(linewidth >= 0.0);
if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u))
{
uint trans_ix = _285.memory[bbox_offset + 5u];
uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u]));
if ((tag_word == 276u) || (tag_word == 732u))
{
translate = as_type<float2>(uint2(_285.memory[t + 4u], _285.memory[t + 5u]));
}
}
if (linewidth >= 0.0)
{
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
}
switch (tag_word)
{
case 68u:
case 72u:
{
_285.memory[di] = as_type<uint>(linewidth);
break;
}
case 276u:
{
_285.memory[di] = as_type<uint>(linewidth);
p0 = as_type<float2>(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u]));
p1 = as_type<float2>(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u]));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 dxy = p1 - p0;
float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_285.memory[di + 1u] = as_type<uint>(line_x);
_285.memory[di + 2u] = as_type<uint>(line_y);
_285.memory[di + 3u] = as_type<uint>(line_c);
break;
}
case 732u:
{
p0 = as_type<float2>(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u]));
p1 = as_type<float2>(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u]));
float r0 = as_type<float>(_103.scene[dd + 5u]);
float r1 = as_type<float>(_103.scene[dd + 6u]);
float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z));
float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det;
float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y);
inv_tr += p0;
float2 center1 = p1 - p0;
float rr = r1 / (r1 - r0);
float rainv = rr / ((r1 * r1) - dot(center1, center1));
float2 c1 = center1 * rainv;
float ra = rr * rainv;
float roff = rr - 1.0;
_285.memory[di] = as_type<uint>(linewidth);
_285.memory[di + 1u] = as_type<uint>(inv_mat.x);
_285.memory[di + 2u] = as_type<uint>(inv_mat.y);
_285.memory[di + 3u] = as_type<uint>(inv_mat.z);
_285.memory[di + 4u] = as_type<uint>(inv_mat.w);
_285.memory[di + 5u] = as_type<uint>(inv_tr.x);
_285.memory[di + 6u] = as_type<uint>(inv_tr.y);
_285.memory[di + 7u] = as_type<uint>(c1.x);
_285.memory[di + 8u] = as_type<uint>(c1.y);
_285.memory[di + 9u] = as_type<uint>(ra);
_285.memory[di + 10u] = as_type<uint>(roff);
break;
}
case 5u:
{
break;
}
}
}
if ((tag_word == 5u) || (tag_word == 37u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 5u)
{
path_ix = m.path_ix;
}
_285.memory[clip_out_base + m.clip_ix] = path_ix;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,127 +0,0 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _87 : register(t1, space0);
ByteAddressBuffer _97 : register(t2, space0);
RWByteAddressBuffer _188 : register(u3, space0);
RWByteAddressBuffer _206 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[256];
DrawMonoid map_tag(uint tag_word)
{
uint has_path = uint(tag_word != 0u);
DrawMonoid _70 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
return _70;
}
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _87.Load(104) >> uint(2);
uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
for (uint i = 1u; i < 8u; i++)
{
uint tag_word_1 = _97.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_1 = tag_word_1;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_4 = agg;
DrawMonoid param_5 = other;
agg = combine_draw_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_188.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix);
_188.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix);
_188.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset);
_188.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,142 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
uint has_path = uint(tag_word != 0u);
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
}
static inline __attribute__((always_inline))
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
kernel void main0(const device ConfigBuf& _87 [[buffer(1)]], const device SceneBuf& _97 [[buffer(2)]], device OutBuf& _188 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _87.conf.drawtag_offset >> uint(2);
uint tag_word = _97.scene[drawtag_base + ix];
uint param = tag_word;
DrawMonoid agg = map_tag(param);
for (uint i = 1u; i < 8u; i++)
{
uint tag_word_1 = _97.scene[(drawtag_base + ix) + i];
uint param_1 = tag_word_1;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_4 = agg;
DrawMonoid param_5 = other;
agg = combine_draw_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_188.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_188.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
_188.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset;
_188.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,108 +0,0 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _18 = { 0u, 0u, 0u, 0u };
RWByteAddressBuffer _71 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[256];
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
DrawMonoid draw_monoid_identity()
{
return _18;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
DrawMonoid _75;
_75.path_ix = _71.Load(ix * 16 + 0);
_75.clip_ix = _71.Load(ix * 16 + 4);
_75.scene_offset = _71.Load(ix * 16 + 8);
_75.info_offset = _71.Load(ix * 16 + 12);
DrawMonoid local[8];
local[0].path_ix = _75.path_ix;
local[0].clip_ix = _75.clip_ix;
local[0].scene_offset = _75.scene_offset;
local[0].info_offset = _75.info_offset;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
DrawMonoid param = local[i - 1u];
DrawMonoid _106;
_106.path_ix = _71.Load((ix + i) * 16 + 0);
_106.clip_ix = _71.Load((ix + i) * 16 + 4);
_106.scene_offset = _71.Load((ix + i) * 16 + 8);
_106.info_offset = _71.Load((ix + i) * 16 + 12);
param_1.path_ix = _106.path_ix;
param_1.clip_ix = _106.clip_ix;
param_1.scene_offset = _106.scene_offset;
param_1.info_offset = _106.info_offset;
local[i] = combine_draw_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_draw_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _199 = ix + i_2;
_71.Store(_199 * 16 + 0, m.path_ix);
_71.Store(_199 * 16 + 4, m.clip_ix);
_71.Store(_199 * 16 + 8, m.scene_offset);
_71.Store(_199 * 16 + 12, m.info_offset);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,140 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct DataBuf
{
DrawMonoid_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid draw_monoid_identity()
{
return DrawMonoid{ 0u, 0u, 0u, 0u };
}
kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<DrawMonoid, 8> local;
local[0].path_ix = _71.data[ix].path_ix;
local[0].clip_ix = _71.data[ix].clip_ix;
local[0].scene_offset = _71.data[ix].scene_offset;
local[0].info_offset = _71.data[ix].info_offset;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _100 = ix + i;
DrawMonoid param = local[i - 1u];
param_1.path_ix = _71.data[_100].path_ix;
param_1.clip_ix = _71.data[_100].clip_ix;
param_1.scene_offset = _71.data[_100].scene_offset;
param_1.info_offset = _71.data[_100].info_offset;
local[i] = combine_draw_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_draw_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _199 = ix + i_2;
_71.data[_199].path_ix = m.path_ix;
_71.data[_199].clip_ix = m.clip_ix;
_71.data[_199].scene_offset = m.scene_offset;
_71.data[_199].info_offset = m.info_offset;
}
}

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -1,673 +0,0 @@
struct Alloc
{
uint offset;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct PathSegTag
{
uint tag;
uint flags;
};
struct TileRef
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct TileSegRef
{
uint offset;
};
struct TileSeg
{
float2 origin;
float2 _vector;
float y_edge;
TileSegRef next;
};
struct SubdivResult
{
float val;
float a0;
float a2;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
static const PathSegTag _722 = { 0u, 0u };
RWByteAddressBuffer _143 : register(u0, space0);
ByteAddressBuffer _711 : register(t1, space0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
static bool mem_ok;
bool check_deps(uint dep_stage)
{
uint _149;
_143.InterlockedOr(4, 0u, _149);
return (_149 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _143.Load(offset * 4 + 12);
return v;
}
PathSegTag PathSeg_tag(Alloc a, PathSegRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
PathSegTag _362 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _362;
}
PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 6u;
uint raw6 = read_mem(param_12, param_13);
Alloc param_14 = a;
uint param_15 = ix + 7u;
uint raw7 = read_mem(param_14, param_15);
Alloc param_16 = a;
uint param_17 = ix + 8u;
uint raw8 = read_mem(param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 9u;
uint raw9 = read_mem(param_18, param_19);
Alloc param_20 = a;
uint param_21 = ix + 10u;
uint raw10 = read_mem(param_20, param_21);
Alloc param_22 = a;
uint param_23 = ix + 11u;
uint raw11 = read_mem(param_22, param_23);
PathCubic s;
s.p0 = float2(asfloat(raw0), asfloat(raw1));
s.p1 = float2(asfloat(raw2), asfloat(raw3));
s.p2 = float2(asfloat(raw4), asfloat(raw5));
s.p3 = float2(asfloat(raw6), asfloat(raw7));
s.path_ix = raw8;
s.trans_ix = raw9;
s.stroke = float2(asfloat(raw10), asfloat(raw11));
return s;
}
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref)
{
PathCubicRef _368 = { ref.offset + 4u };
Alloc param = a;
PathCubicRef param_1 = _368;
return PathCubic_read(param, param_1);
}
float2 eval_cubic(float2 p0, float2 p1, float2 p2, float2 p3, float t)
{
float mt = 1.0f - t;
return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0f)) + (((p2 * (mt * 3.0f)) + (p3 * t)) * t)) * t);
}
float approx_parabola_integral(float x)
{
return x * rsqrt(sqrt(0.3300000131130218505859375f + (0.201511204242706298828125f + ((0.25f * x) * x))));
}
SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol)
{
float2 d01 = p1 - p0;
float2 d12 = p2 - p1;
float2 dd = d01 - d12;
float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);
float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;
float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;
float scale = abs(_cross / (length(dd) * (x2 - x0)));
float param = x0;
float a0 = approx_parabola_integral(param);
float param_1 = x2;
float a2 = approx_parabola_integral(param_1);
float val = 0.0f;
if (scale < 1000000000.0f)
{
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if (sign(x0) == sign(x2))
{
val = da * sqrt_scale;
}
else
{
float xmin = sqrt_tol / sqrt_scale;
float param_2 = xmin;
val = (sqrt_tol * da) / approx_parabola_integral(param_2);
}
}
SubdivResult _690 = { val, a0, a2 };
return _690;
}
uint fill_mode_from_flags(uint flags)
{
return flags & 1u;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _422 = { raw2 };
s.tiles = _422;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok_1)
{
Alloc a;
a.offset = offset;
return a;
}
float approx_parabola_inv_integral(float x)
{
return x * sqrt(0.61000001430511474609375f + (0.1520999968051910400390625f + ((0.25f * x) * x)));
}
float2 eval_quad(float2 p0, float2 p1, float2 p2, float t)
{
float mt = 1.0f - t;
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t);
}
uint malloc_stage(uint size, uint mem_size, uint stage)
{
uint _158;
_143.InterlockedAdd(0, size, _158);
uint offset = _158;
if ((offset + size) > mem_size)
{
uint _168;
_143.InterlockedOr(4, stage, _168);
offset = 0u;
}
return offset;
}
TileRef Tile_index(TileRef ref, uint index)
{
TileRef _380 = { ref.offset + (index * 8u) };
return _380;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_143.Store(offset * 4 + 12, val);
}
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.origin.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.origin.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s._vector.x);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s._vector.y);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.y_edge);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.next.offset;
write_mem(param_15, param_16, param_17);
}
void comp_main()
{
mem_ok = true;
uint param = 7u;
bool _694 = check_deps(param);
if (!_694)
{
return;
}
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef _719 = { _711.Load(32) + (element_ix * 52u) };
PathSegRef ref = _719;
PathSegTag tag = _722;
if (element_ix < _711.Load(8))
{
Alloc _732;
_732.offset = _711.Load(32);
Alloc param_1;
param_1.offset = _732.offset;
PathSegRef param_2 = ref;
tag = PathSeg_tag(param_1, param_2);
}
switch (tag.tag)
{
case 1u:
{
Alloc _745;
_745.offset = _711.Load(32);
Alloc param_3;
param_3.offset = _745.offset;
PathSegRef param_4 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_3, param_4);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u);
n_quads = min(n_quads, 16u);
float val = 0.0f;
float2 qp0 = cubic.p0;
float _step = 1.0f / float(n_quads);
SubdivResult keep_params[16];
for (uint i = 0u; i < n_quads; i++)
{
float t = float(i + 1u) * _step;
float2 param_5 = cubic.p0;
float2 param_6 = cubic.p1;
float2 param_7 = cubic.p2;
float2 param_8 = cubic.p3;
float param_9 = t;
float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
float2 param_10 = cubic.p0;
float2 param_11 = cubic.p1;
float2 param_12 = cubic.p2;
float2 param_13 = cubic.p3;
float param_14 = t - (0.5f * _step);
float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f);
float2 param_15 = qp0;
float2 param_16 = qp1;
float2 param_17 = qp2;
float param_18 = 0.4743416607379913330078125f;
SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u);
uint param_19 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_19) == 1u;
uint path_ix = cubic.path_ix;
PathRef _901 = { _711.Load(20) + (path_ix * 12u) };
Alloc _904;
_904.offset = _711.Load(20);
Alloc param_20;
param_20.offset = _904.offset;
PathRef param_21 = _901;
Path path = Path_read(param_20, param_21);
uint param_22 = path.tiles.offset;
uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_24 = true;
Alloc path_alloc = new_alloc(param_22, param_23, param_24);
int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0;
qp0 = cubic.p0;
float v_step = val / float(n);
int n_out = 1;
float val_sum = 0.0f;
float2 p1;
float _1143;
TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{
float t_1 = float(i_1 + 1u) * _step;
float2 param_25 = cubic.p0;
float2 param_26 = cubic.p1;
float2 param_27 = cubic.p2;
float2 param_28 = cubic.p3;
float param_29 = t_1;
float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
float2 param_30 = cubic.p0;
float2 param_31 = cubic.p1;
float2 param_32 = cubic.p2;
float2 param_33 = cubic.p3;
float param_34 = t_1 - (0.5f * _step);
float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f);
SubdivResult params_1 = keep_params[i_1];
float param_35 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_35);
float param_36 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_36);
float uscale = 1.0f / (u2 - u0);
float target = float(n_out) * v_step;
for (;;)
{
bool _1036 = uint(n_out) == n;
bool _1046;
if (!_1036)
{
_1046 = target < (val_sum + params_1.val);
}
else
{
_1046 = _1036;
}
if (_1046)
{
if (uint(n_out) == n)
{
p1 = cubic.p3;
}
else
{
float u = (target - val_sum) / params_1.val;
float a = lerp(params_1.a0, params_1.a2, u);
float param_37 = a;
float au = approx_parabola_inv_integral(param_37);
float t_2 = (au - u0) * uscale;
float2 param_38 = qp0;
float2 param_39 = qp1_1;
float2 param_40 = qp2_1;
float param_41 = t_2;
p1 = eval_quad(param_38, param_39, param_40, param_41);
}
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
float dx = p1.x - p0.x;
float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10f)
{
_1143 = 1000000000.0f;
}
else
{
_1143 = dx / dy;
}
float invslope = _1143;
float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f;
float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f;
int x0 = int(floor(xmin * 0.0625f));
int x1 = int(floor(xmax * 0.0625f) + 1.0f);
int y0 = int(floor(ymin * 0.0625f));
int y1 = int(floor(ymax * 0.0625f) + 1.0f);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float xc = a_1 + (b * float(y0));
int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint malloc_size = n_tile_alloc * 24u;
uint param_42 = malloc_size;
uint param_43 = _711.Load(0);
uint param_44 = 4u;
uint _1265 = malloc_stage(param_42, param_43, param_44);
uint tile_offset = _1265;
if (tile_offset == 0u)
{
mem_ok = false;
}
uint param_45 = tile_offset;
uint param_46 = malloc_size;
bool param_47 = true;
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
int xray = int(floor(p0.x * 0.0625f));
int last_xray = int(floor(p1.x * 0.0625f));
if (p0.y > p1.y)
{
int tmp = xray;
xray = last_xray;
last_xray = tmp;
}
for (int y = y0; y < y1; y++)
{
float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x);
bool _1322 = !is_stroke;
bool _1332;
if (_1322)
{
_1332 = min(p0.y, p1.y) < tile_y0;
}
else
{
_1332 = _1322;
}
bool _1339;
if (_1332)
{
_1339 = xbackdrop < bbox.z;
}
else
{
_1339 = _1332;
}
if (_1339)
{
int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_48 = path.tiles;
uint param_49 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_48, param_49);
uint tile_el = tile_ref.offset >> uint(2);
uint _1369;
_143.InterlockedAdd((tile_el + 1u) * 4 + 12, uint(backdrop), _1369);
}
int next_xray = last_xray;
if (y < (y1 - 1))
{
float tile_y1 = float((y + 1) * 16);
float x_edge = lerp(p0.x, p1.x, (tile_y1 - p0.y) / dy);
next_xray = int(floor(x_edge * 0.0625f));
}
int min_xray = min(xray, next_xray);
int max_xray = max(xray, next_xray);
int xx0 = min(int(floor(xc - c)), min_xray);
int xx1 = max(int(ceil(xc + c)), (max_xray + 1));
xx0 = clamp(xx0, x0, x1);
xx1 = clamp(xx1, x0, x1);
for (int x = xx0; x < xx1; x++)
{
float tile_x0 = float(x * 16);
TileRef _1449 = { path.tiles.offset };
TileRef param_50 = _1449;
uint param_51 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_50, param_51);
uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u;
uint _1465;
_143.InterlockedExchange(tile_el_1 * 4 + 12, tile_offset, _1465);
old = _1465;
tile_seg.origin = p0;
tile_seg._vector = p1 - p0;
float y_edge = 0.0f;
if (!is_stroke)
{
y_edge = lerp(p0.y, p1.y, (tile_x0 - p0.x) / dx);
if (min(p0.x, p1.x) < tile_x0)
{
float2 p = float2(tile_x0, y_edge);
if (p0.x > p1.x)
{
tile_seg._vector = p - p0;
}
else
{
tile_seg.origin = p;
tile_seg._vector = p1 - p;
}
if (tile_seg._vector.x == 0.0f)
{
tile_seg._vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10f;
}
}
if ((x <= min_xray) || (max_xray < x))
{
y_edge = 1000000000.0f;
}
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
if (mem_ok)
{
TileSegRef _1550 = { tile_offset };
Alloc param_52 = tile_alloc;
TileSegRef param_53 = _1550;
TileSeg param_54 = tile_seg;
TileSeg_write(param_52, param_53, param_54);
}
tile_offset += 24u;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out++;
target += v_step;
p0 = p1;
continue;
}
else
{
break;
}
}
val_sum += params_1.val;
qp0 = qp2_1;
}
break;
}
}
}
[numthreads(32, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,718 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Alloc
{
uint offset;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct PathSegTag
{
uint tag;
uint flags;
};
struct TileRef
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct TileSegRef
{
uint offset;
};
struct TileSeg
{
float2 origin;
float2 vector;
float y_edge;
TileSegRef next;
};
struct SubdivResult
{
float val;
float a0;
float a2;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_143)
{
uint _149 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, 0u, memory_order_relaxed);
return (_149 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_143)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_143.memory[offset];
return v;
}
static inline __attribute__((always_inline))
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_143);
return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_143)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_143);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_143);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_143);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_143);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_143);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11, v_143);
Alloc param_12 = a;
uint param_13 = ix + 6u;
uint raw6 = read_mem(param_12, param_13, v_143);
Alloc param_14 = a;
uint param_15 = ix + 7u;
uint raw7 = read_mem(param_14, param_15, v_143);
Alloc param_16 = a;
uint param_17 = ix + 8u;
uint raw8 = read_mem(param_16, param_17, v_143);
Alloc param_18 = a;
uint param_19 = ix + 9u;
uint raw9 = read_mem(param_18, param_19, v_143);
Alloc param_20 = a;
uint param_21 = ix + 10u;
uint raw10 = read_mem(param_20, param_21, v_143);
Alloc param_22 = a;
uint param_23 = ix + 11u;
uint raw11 = read_mem(param_22, param_23, v_143);
PathCubic s;
s.p0 = float2(as_type<float>(raw0), as_type<float>(raw1));
s.p1 = float2(as_type<float>(raw2), as_type<float>(raw3));
s.p2 = float2(as_type<float>(raw4), as_type<float>(raw5));
s.p3 = float2(as_type<float>(raw6), as_type<float>(raw7));
s.path_ix = raw8;
s.trans_ix = raw9;
s.stroke = float2(as_type<float>(raw10), as_type<float>(raw11));
return s;
}
static inline __attribute__((always_inline))
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
{
Alloc param = a;
PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u };
return PathCubic_read(param, param_1, v_143);
}
static inline __attribute__((always_inline))
float2 eval_cubic(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float2& p3, thread const float& t)
{
float mt = 1.0 - t;
return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t);
}
static inline __attribute__((always_inline))
float approx_parabola_integral(thread const float& x)
{
return x * rsqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x))));
}
static inline __attribute__((always_inline))
SubdivResult estimate_subdiv(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& sqrt_tol)
{
float2 d01 = p1 - p0;
float2 d12 = p2 - p1;
float2 dd = d01 - d12;
float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);
float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;
float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;
float scale = abs(_cross / (length(dd) * (x2 - x0)));
float param = x0;
float a0 = approx_parabola_integral(param);
float param_1 = x2;
float a2 = approx_parabola_integral(param_1);
float val = 0.0;
if (scale < 1000000000.0)
{
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if (sign(x0) == sign(x2))
{
val = da * sqrt_scale;
}
else
{
float xmin = sqrt_tol / sqrt_scale;
float param_2 = xmin;
val = (sqrt_tol * da) / approx_parabola_integral(param_2);
}
}
return SubdivResult{ val, a0, a2 };
}
static inline __attribute__((always_inline))
uint fill_mode_from_flags(thread const uint& flags)
{
return flags & 1u;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_143)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_143);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_143);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_143);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
float approx_parabola_inv_integral(thread const float& x)
{
return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x)));
}
static inline __attribute__((always_inline))
float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& t)
{
float mt = 1.0 - t;
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t);
}
static inline __attribute__((always_inline))
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_143)
{
uint _158 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.mem_offset, size, memory_order_relaxed);
uint offset = _158;
if ((offset + size) > mem_size)
{
uint _168 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return offset;
}
static inline __attribute__((always_inline))
TileRef Tile_index(thread const TileRef& ref, thread const uint& index)
{
return TileRef{ ref.offset + (index * 8u) };
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_143)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_143.memory[offset] = val;
}
static inline __attribute__((always_inline))
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_143)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.origin.x);
write_mem(param, param_1, param_2, v_143);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.origin.y);
write_mem(param_3, param_4, param_5, v_143);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.vector.x);
write_mem(param_6, param_7, param_8, v_143);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.vector.y);
write_mem(param_9, param_10, param_11, v_143);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.y_edge);
write_mem(param_12, param_13, param_14, v_143);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.next.offset;
write_mem(param_15, param_16, param_17, v_143);
}
kernel void main0(device Memory& v_143 [[buffer(0)]], const device ConfigBuf& _711 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
bool mem_ok = true;
uint param = 7u;
bool _694 = check_deps(param, v_143);
if (!_694)
{
return;
}
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef{ _711.conf.pathseg_alloc.offset + (element_ix * 52u) };
PathSegTag tag = PathSegTag{ 0u, 0u };
if (element_ix < _711.conf.n_pathseg)
{
Alloc param_1;
param_1.offset = _711.conf.pathseg_alloc.offset;
PathSegRef param_2 = ref;
tag = PathSeg_tag(param_1, param_2, v_143);
}
switch (tag.tag)
{
case 1u:
{
Alloc param_3;
param_3.offset = _711.conf.pathseg_alloc.offset;
PathSegRef param_4 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_3, param_4, v_143);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);
n_quads = min(n_quads, 16u);
float val = 0.0;
float2 qp0 = cubic.p0;
float _step = 1.0 / float(n_quads);
spvUnsafeArray<SubdivResult, 16> keep_params;
for (uint i = 0u; i < n_quads; i++)
{
float t = float(i + 1u) * _step;
float2 param_5 = cubic.p0;
float2 param_6 = cubic.p1;
float2 param_7 = cubic.p2;
float2 param_8 = cubic.p3;
float param_9 = t;
float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
float2 param_10 = cubic.p0;
float2 param_11 = cubic.p1;
float2 param_12 = cubic.p2;
float2 param_13 = cubic.p3;
float param_14 = t - (0.5 * _step);
float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);
float2 param_15 = qp0;
float2 param_16 = qp1;
float2 param_17 = qp2;
float param_18 = 0.4743416607379913330078125;
SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);
uint param_19 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_19) == 1u;
uint path_ix = cubic.path_ix;
Alloc param_20;
param_20.offset = _711.conf.tile_alloc.offset;
PathRef param_21 = PathRef{ _711.conf.tile_alloc.offset + (path_ix * 12u) };
Path path = Path_read(param_20, param_21, v_143);
uint param_22 = path.tiles.offset;
uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_24 = true;
Alloc path_alloc = new_alloc(param_22, param_23, param_24);
int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0;
qp0 = cubic.p0;
float v_step = val / float(n);
int n_out = 1;
float val_sum = 0.0;
float2 p1;
float _1143;
TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{
float t_1 = float(i_1 + 1u) * _step;
float2 param_25 = cubic.p0;
float2 param_26 = cubic.p1;
float2 param_27 = cubic.p2;
float2 param_28 = cubic.p3;
float param_29 = t_1;
float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
float2 param_30 = cubic.p0;
float2 param_31 = cubic.p1;
float2 param_32 = cubic.p2;
float2 param_33 = cubic.p3;
float param_34 = t_1 - (0.5 * _step);
float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);
SubdivResult params_1 = keep_params[i_1];
float param_35 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_35);
float param_36 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_36);
float uscale = 1.0 / (u2 - u0);
float target = float(n_out) * v_step;
for (;;)
{
bool _1036 = uint(n_out) == n;
bool _1046;
if (!_1036)
{
_1046 = target < (val_sum + params_1.val);
}
else
{
_1046 = _1036;
}
if (_1046)
{
if (uint(n_out) == n)
{
p1 = cubic.p3;
}
else
{
float u = (target - val_sum) / params_1.val;
float a = mix(params_1.a0, params_1.a2, u);
float param_37 = a;
float au = approx_parabola_inv_integral(param_37);
float t_2 = (au - u0) * uscale;
float2 param_38 = qp0;
float2 param_39 = qp1_1;
float2 param_40 = qp2_1;
float param_41 = t_2;
p1 = eval_quad(param_38, param_39, param_40, param_41);
}
float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x;
float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x;
float ymin = fast::min(p0.y, p1.y) - cubic.stroke.y;
float ymax = fast::max(p0.y, p1.y) + cubic.stroke.y;
float dx = p1.x - p0.x;
float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10)
{
_1143 = 1000000000.0;
}
else
{
_1143 = dx / dy;
}
float invslope = _1143;
float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625;
float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625;
int x0 = int(floor(xmin * 0.0625));
int x1 = int(floor(xmax * 0.0625) + 1.0);
int y0 = int(floor(ymin * 0.0625));
int y1 = int(floor(ymax * 0.0625) + 1.0);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float xc = a_1 + (b * float(y0));
int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint malloc_size = n_tile_alloc * 24u;
uint param_42 = malloc_size;
uint param_43 = _711.conf.mem_size;
uint param_44 = 4u;
uint _1265 = malloc_stage(param_42, param_43, param_44, v_143);
uint tile_offset = _1265;
if (tile_offset == 0u)
{
mem_ok = false;
}
uint param_45 = tile_offset;
uint param_46 = malloc_size;
bool param_47 = true;
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
int xray = int(floor(p0.x * 0.0625));
int last_xray = int(floor(p1.x * 0.0625));
if (p0.y > p1.y)
{
int tmp = xray;
xray = last_xray;
last_xray = tmp;
}
for (int y = y0; y < y1; y++)
{
float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x);
bool _1322 = !is_stroke;
bool _1332;
if (_1322)
{
_1332 = fast::min(p0.y, p1.y) < tile_y0;
}
else
{
_1332 = _1322;
}
bool _1339;
if (_1332)
{
_1339 = xbackdrop < bbox.z;
}
else
{
_1339 = _1332;
}
if (_1339)
{
int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_48 = path.tiles;
uint param_49 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_48, param_49);
uint tile_el = tile_ref.offset >> uint(2);
uint _1369 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
}
int next_xray = last_xray;
if (y < (y1 - 1))
{
float tile_y1 = float((y + 1) * 16);
float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
next_xray = int(floor(x_edge * 0.0625));
}
int min_xray = min(xray, next_xray);
int max_xray = max(xray, next_xray);
int xx0 = min(int(floor(xc - c)), min_xray);
int xx1 = max(int(ceil(xc + c)), (max_xray + 1));
xx0 = clamp(xx0, x0, x1);
xx1 = clamp(xx1, x0, x1);
for (int x = xx0; x < xx1; x++)
{
float tile_x0 = float(x * 16);
TileRef param_50 = TileRef{ path.tiles.offset };
uint param_51 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_50, param_51);
uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u;
uint _1465 = atomic_exchange_explicit((device atomic_uint*)&v_143.memory[tile_el_1], tile_offset, memory_order_relaxed);
old = _1465;
tile_seg.origin = p0;
tile_seg.vector = p1 - p0;
float y_edge = 0.0;
if (!is_stroke)
{
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
if (fast::min(p0.x, p1.x) < tile_x0)
{
float2 p = float2(tile_x0, y_edge);
if (p0.x > p1.x)
{
tile_seg.vector = p - p0;
}
else
{
tile_seg.origin = p;
tile_seg.vector = p1 - p;
}
if (tile_seg.vector.x == 0.0)
{
tile_seg.vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10;
}
}
if ((x <= min_xray) || (max_xray < x))
{
y_edge = 1000000000.0;
}
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
if (mem_ok)
{
Alloc param_52 = tile_alloc;
TileSegRef param_53 = TileSegRef{ tile_offset };
TileSeg param_54 = tile_seg;
TileSeg_write(param_52, param_53, param_54, v_143);
}
tile_offset += 24u;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out++;
target += v_step;
p0 = p1;
continue;
}
else
{
break;
}
}
val_sum += params_1.val;
qp0 = qp2_1;
}
break;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,662 +0,0 @@
struct Alloc
{
uint offset;
};
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct Monoid
{
float4 bbox;
uint flags;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const TagMonoid _135 = { 0u, 0u, 0u, 0u, 0u };
static const Monoid _567 = { 0.0f.xxxx, 0u };
RWByteAddressBuffer _111 : register(u0, space0);
ByteAddressBuffer _574 : register(t2, space0);
ByteAddressBuffer _639 : register(t1, space0);
ByteAddressBuffer _710 : register(t3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared TagMonoid sh_tag[256];
groupshared Monoid sh_scratch[256];
TagMonoid reduce_tag(uint tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u)));
c.path_ix = uint(int(countbits(tag_word & 269488144u)));
c.trans_ix = uint(int(countbits(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
TagMonoid tag_monoid_identity()
{
return _135;
}
float2 read_f32_point(uint ix)
{
float x = asfloat(_574.Load(ix * 4 + 0));
float y = asfloat(_574.Load((ix + 1u) * 4 + 0));
return float2(x, y);
}
float2 read_i16_point(uint ix)
{
uint raw = _574.Load(ix * 4 + 0);
float x = float(int(raw << uint(16)) >> 16);
float y = float(int(raw) >> 16);
return float2(x, y);
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _111.Load(offset * 4 + 12);
return v;
}
TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11);
TransformSeg s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
return s;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_111.Store(offset * 4 + 12, val);
}
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.p0.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.p0.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.p1.x);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.p1.y);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.p2.x);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = asuint(s.p2.y);
write_mem(param_15, param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = asuint(s.p3.x);
write_mem(param_18, param_19, param_20);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = asuint(s.p3.y);
write_mem(param_21, param_22, param_23);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = s.path_ix;
write_mem(param_24, param_25, param_26);
Alloc param_27 = a;
uint param_28 = ix + 9u;
uint param_29 = s.trans_ix;
write_mem(param_27, param_28, param_29);
Alloc param_30 = a;
uint param_31 = ix + 10u;
uint param_32 = asuint(s.stroke.x);
write_mem(param_30, param_31, param_32);
Alloc param_33 = a;
uint param_34 = ix + 11u;
uint param_35 = asuint(s.stroke.y);
write_mem(param_33, param_34, param_35);
}
void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2);
PathCubicRef _458 = { ref.offset + 4u };
Alloc param_3 = a;
PathCubicRef param_4 = _458;
PathCubic param_5 = s;
PathCubic_write(param_3, param_4, param_5);
}
Monoid combine_monoid(Monoid a, Monoid b)
{
Monoid c;
c.bbox = b.bbox;
bool _472 = (a.flags & 1u) == 0u;
bool _480;
if (_472)
{
_480 = b.bbox.z <= b.bbox.x;
}
else
{
_480 = _472;
}
bool _488;
if (_480)
{
_488 = b.bbox.w <= b.bbox.y;
}
else
{
_488 = _480;
}
if (_488)
{
c.bbox = a.bbox;
}
else
{
bool _498 = (a.flags & 1u) == 0u;
bool _505;
if (_498)
{
_505 = (b.flags & 2u) == 0u;
}
else
{
_505 = _498;
}
bool _522;
if (_505)
{
bool _512 = a.bbox.z > a.bbox.x;
bool _521;
if (!_512)
{
_521 = a.bbox.w > a.bbox.y;
}
else
{
_521 = _512;
}
_522 = _521;
}
else
{
_522 = _505;
}
if (_522)
{
float4 _529 = c.bbox;
float2 _531 = min(a.bbox.xy, _529.xy);
c.bbox.x = _531.x;
c.bbox.y = _531.y;
float4 _540 = c.bbox;
float2 _542 = max(a.bbox.zw, _540.zw);
c.bbox.z = _542.x;
c.bbox.w = _542.y;
}
}
c.flags = (a.flags & 2u) | b.flags;
c.flags |= ((a.flags & 1u) << uint(1));
return c;
}
Monoid monoid_identity()
{
return _567;
}
uint round_down(float x)
{
return uint(max(0.0f, floor(x) + 32768.0f));
}
uint round_up(float x)
{
return uint(min(65535.0f, ceil(x) + 32768.0f));
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(96) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i))
{
TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)];
TagMonoid param_1 = other;
TagMonoid param_2 = local_tm;
local_tm = combine_tag_monoid(param_1, param_2);
}
GroupMemoryBarrierWithGroupSync();
sh_tag[gl_LocalInvocationID.x] = local_tm;
}
GroupMemoryBarrierWithGroupSync();
TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
TagMonoid _716;
_716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
_716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
_716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
_716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
_716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
tm.trans_ix = _716.trans_ix;
tm.linewidth_ix = _716.linewidth_ix;
tm.pathseg_ix = _716.pathseg_ix;
tm.path_ix = _716.path_ix;
tm.pathseg_offset = _716.pathseg_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
TagMonoid param_3 = tm;
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.Load(100) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(92) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef _771 = { _639.Load(40) + (trans_ix * 24u) };
TransformSegRef trans_ref = _771;
PathSegRef _780 = { _639.Load(32) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _780;
float linewidth[4];
uint save_trans_ix[4];
float2 p0;
float2 p1;
float2 p2;
float2 p3;
Alloc param_13;
Monoid local[4];
PathCubic cubic;
Alloc param_15;
for (uint i_1 = 0u; i_1 < 4u; i_1++)
{
linewidth[i_1] = asfloat(_574.Load(lw_ix * 4 + 0));
save_trans_ix[i_1] = trans_ix;
uint tag_byte = tag_word >> (i_1 * 8u);
uint seg_type = tag_byte & 3u;
if (seg_type != 0u)
{
if ((tag_byte & 8u) != 0u)
{
uint param_5 = ps_ix;
p0 = read_f32_point(param_5);
uint param_6 = ps_ix + 2u;
p1 = read_f32_point(param_6);
if (seg_type >= 2u)
{
uint param_7 = ps_ix + 4u;
p2 = read_f32_point(param_7);
if (seg_type == 3u)
{
uint param_8 = ps_ix + 6u;
p3 = read_f32_point(param_8);
}
}
}
else
{
uint param_9 = ps_ix;
p0 = read_i16_point(param_9);
uint param_10 = ps_ix + 1u;
p1 = read_i16_point(param_10);
if (seg_type >= 2u)
{
uint param_11 = ps_ix + 2u;
p2 = read_i16_point(param_11);
if (seg_type == 3u)
{
uint param_12 = ps_ix + 3u;
p3 = read_i16_point(param_12);
}
}
}
Alloc _876;
_876.offset = _639.Load(40);
param_13.offset = _876.offset;
TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate;
float4 bbox = float4(min(p0, p1), max(p0, p1));
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _946 = bbox;
float2 _949 = min(_946.xy, p2);
bbox.x = _949.x;
bbox.y = _949.y;
float4 _954 = bbox;
float2 _957 = max(_954.zw, p2);
bbox.z = _957.x;
bbox.w = _957.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _982 = bbox;
float2 _985 = min(_982.xy, p3);
bbox.x = _985.x;
bbox.y = _985.y;
float4 _990 = bbox;
float2 _993 = max(_990.zw, p3);
bbox.z = _993.x;
bbox.w = _993.y;
}
else
{
p3 = p2;
p2 = lerp(p1, p2, 0.3333333432674407958984375f.xx);
p1 = lerp(p1, p0, 0.3333333432674407958984375f.xx);
}
}
else
{
p3 = p1;
p2 = lerp(p3, p0, 0.3333333432674407958984375f.xx);
p1 = lerp(p0, p3, 0.3333333432674407958984375f.xx);
}
float2 stroke = 0.0f.xx;
if (linewidth[i_1] >= 0.0f)
{
stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth[i_1]);
bbox += float4(-stroke, stroke);
}
local[i_1].bbox = bbox;
local[i_1].flags = 0u;
cubic.p0 = p0;
cubic.p1 = p1;
cubic.p2 = p2;
cubic.p3 = p3;
cubic.path_ix = tm.path_ix;
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
Alloc _1088;
_1088.offset = _639.Load(32);
param_15.offset = _1088.offset;
PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode;
PathCubic param_18 = cubic;
PathSeg_Cubic_write(param_15, param_16, param_17, param_18);
ps_ref.offset += 52u;
uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u);
uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u));
ps_ix += n_words;
}
else
{
local[i_1].bbox = 0.0f.xxxx;
uint is_path = (tag_byte >> uint(4)) & 1u;
local[i_1].flags = is_path;
tm.path_ix += is_path;
trans_ix += ((tag_byte >> uint(5)) & 1u);
trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u);
lw_ix += ((tag_byte >> uint(6)) & 1u);
}
}
Monoid agg = local[0];
for (uint i_2 = 1u; i_2 < 4u; i_2++)
{
Monoid param_19 = agg;
Monoid param_20 = local[i_2];
agg = combine_monoid(param_19, param_20);
local[i_2] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_3 = 0u; i_3 < 8u; i_3++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_3))
{
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)];
Monoid param_21 = other_1;
Monoid param_22 = agg;
agg = combine_monoid(param_21, param_22);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.Load(44) >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_4 = 0u; i_4 < 4u; i_4++)
{
Monoid param_23 = row;
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1263 = i_4 == 3u;
bool _1269;
if (_1263)
{
_1269 = gl_LocalInvocationID.x == 255u;
}
else
{
_1269 = _1263;
}
if (_1269)
{
do_atomic = true;
}
if ((m.flags & 1u) != 0u)
{
_111.Store((bbox_out_ix + 4u) * 4 + 12, asuint(linewidth[i_4]));
_111.Store((bbox_out_ix + 5u) * 4 + 12, save_trans_ix[i_4]);
if ((m.flags & 2u) == 0u)
{
do_atomic = true;
}
else
{
float param_25 = m.bbox.x;
_111.Store(bbox_out_ix * 4 + 12, round_down(param_25));
float param_26 = m.bbox.y;
_111.Store((bbox_out_ix + 1u) * 4 + 12, round_down(param_26));
float param_27 = m.bbox.z;
_111.Store((bbox_out_ix + 2u) * 4 + 12, round_up(param_27));
float param_28 = m.bbox.w;
_111.Store((bbox_out_ix + 3u) * 4 + 12, round_up(param_28));
bbox_out_ix += 6u;
do_atomic = false;
}
}
if (do_atomic)
{
bool _1334 = m.bbox.z > m.bbox.x;
bool _1343;
if (!_1334)
{
_1343 = m.bbox.w > m.bbox.y;
}
else
{
_1343 = _1334;
}
if (_1343)
{
float param_29 = m.bbox.x;
uint _1352;
_111.InterlockedMin(bbox_out_ix * 4 + 12, round_down(param_29), _1352);
float param_30 = m.bbox.y;
uint _1360;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 12, round_down(param_30), _1360);
float param_31 = m.bbox.z;
uint _1368;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 12, round_up(param_31), _1368);
float param_32 = m.bbox.w;
uint _1376;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 12, round_up(param_32), _1376);
}
bbox_out_ix += 6u;
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,719 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Alloc
{
uint offset;
};
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct Monoid
{
float4 bbox;
uint flags;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct SceneBuf
{
uint scene[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct ParentBuf
{
TagMonoid_1 parent[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid reduce_tag(thread const uint& tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u)));
c.path_ix = uint(int(popcount(tag_word & 269488144u)));
c.trans_ix = uint(int(popcount(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
static inline __attribute__((always_inline))
TagMonoid tag_monoid_identity()
{
return TagMonoid{ 0u, 0u, 0u, 0u, 0u };
}
static inline __attribute__((always_inline))
float2 read_f32_point(thread const uint& ix, const device SceneBuf& v_574)
{
float x = as_type<float>(v_574.scene[ix]);
float y = as_type<float>(v_574.scene[ix + 1u]);
return float2(x, y);
}
static inline __attribute__((always_inline))
float2 read_i16_point(thread const uint& ix, const device SceneBuf& v_574)
{
uint raw = v_574.scene[ix];
float x = float(int(raw << uint(16)) >> 16);
float y = float(int(raw) >> 16);
return float2(x, y);
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_111)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_111.memory[offset];
return v;
}
static inline __attribute__((always_inline))
TransformSeg TransformSeg_read(thread const Alloc& a, thread const TransformSegRef& ref, device Memory& v_111)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_111);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_111);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_111);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_111);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_111);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11, v_111);
TransformSeg s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
return s;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_111)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_111.memory[offset] = val;
}
static inline __attribute__((always_inline))
void PathCubic_write(thread const Alloc& a, thread const PathCubicRef& ref, thread const PathCubic& s, device Memory& v_111)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.p0.x);
write_mem(param, param_1, param_2, v_111);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.p0.y);
write_mem(param_3, param_4, param_5, v_111);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.p1.x);
write_mem(param_6, param_7, param_8, v_111);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.p1.y);
write_mem(param_9, param_10, param_11, v_111);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.p2.x);
write_mem(param_12, param_13, param_14, v_111);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = as_type<uint>(s.p2.y);
write_mem(param_15, param_16, param_17, v_111);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.p3.x);
write_mem(param_18, param_19, param_20, v_111);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.p3.y);
write_mem(param_21, param_22, param_23, v_111);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = s.path_ix;
write_mem(param_24, param_25, param_26, v_111);
Alloc param_27 = a;
uint param_28 = ix + 9u;
uint param_29 = s.trans_ix;
write_mem(param_27, param_28, param_29, v_111);
Alloc param_30 = a;
uint param_31 = ix + 10u;
uint param_32 = as_type<uint>(s.stroke.x);
write_mem(param_30, param_31, param_32, v_111);
Alloc param_33 = a;
uint param_34 = ix + 11u;
uint param_35 = as_type<uint>(s.stroke.y);
write_mem(param_33, param_34, param_35, v_111);
}
static inline __attribute__((always_inline))
void PathSeg_Cubic_write(thread const Alloc& a, thread const PathSegRef& ref, thread const uint& flags, thread const PathCubic& s, device Memory& v_111)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2, v_111);
Alloc param_3 = a;
PathCubicRef param_4 = PathCubicRef{ ref.offset + 4u };
PathCubic param_5 = s;
PathCubic_write(param_3, param_4, param_5, v_111);
}
static inline __attribute__((always_inline))
Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
{
Monoid c;
c.bbox = b.bbox;
bool _472 = (a.flags & 1u) == 0u;
bool _480;
if (_472)
{
_480 = b.bbox.z <= b.bbox.x;
}
else
{
_480 = _472;
}
bool _488;
if (_480)
{
_488 = b.bbox.w <= b.bbox.y;
}
else
{
_488 = _480;
}
if (_488)
{
c.bbox = a.bbox;
}
else
{
bool _498 = (a.flags & 1u) == 0u;
bool _505;
if (_498)
{
_505 = (b.flags & 2u) == 0u;
}
else
{
_505 = _498;
}
bool _522;
if (_505)
{
bool _512 = a.bbox.z > a.bbox.x;
bool _521;
if (!_512)
{
_521 = a.bbox.w > a.bbox.y;
}
else
{
_521 = _512;
}
_522 = _521;
}
else
{
_522 = _505;
}
if (_522)
{
float4 _529 = c.bbox;
float2 _531 = fast::min(a.bbox.xy, _529.xy);
c.bbox.x = _531.x;
c.bbox.y = _531.y;
float4 _540 = c.bbox;
float2 _542 = fast::max(a.bbox.zw, _540.zw);
c.bbox.z = _542.x;
c.bbox.w = _542.y;
}
}
c.flags = (a.flags & 2u) | b.flags;
c.flags |= ((a.flags & 1u) << uint(1));
return c;
}
static inline __attribute__((always_inline))
Monoid monoid_identity()
{
return Monoid{ float4(0.0), 0u };
}
static inline __attribute__((always_inline))
uint round_down(thread const float& x)
{
return uint(fast::max(0.0, floor(x) + 32768.0));
}
static inline __attribute__((always_inline))
uint round_up(thread const float& x)
{
return uint(fast::min(65535.0, ceil(x) + 32768.0));
}
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_tag[256];
threadgroup Monoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = v_574.scene[(_639.conf.pathtag_offset >> uint(2)) + (ix >> uint(2))];
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i))
{
TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)];
TagMonoid param_1 = other;
TagMonoid param_2 = local_tm;
local_tm = combine_tag_monoid(param_1, param_2);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_tag[gl_LocalInvocationID.x] = local_tm;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _713 = gl_WorkGroupID.x - 1u;
tm.trans_ix = _710.parent[_713].trans_ix;
tm.linewidth_ix = _710.parent[_713].linewidth_ix;
tm.pathseg_ix = _710.parent[_713].pathseg_ix;
tm.path_ix = _710.parent[_713].path_ix;
tm.pathseg_offset = _710.parent[_713].pathseg_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
TagMonoid param_3 = tm;
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.conf.pathseg_offset >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.conf.linewidth_offset >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef trans_ref = TransformSegRef{ _639.conf.trans_alloc.offset + (trans_ix * 24u) };
PathSegRef ps_ref = PathSegRef{ _639.conf.pathseg_alloc.offset + (tm.pathseg_ix * 52u) };
spvUnsafeArray<float, 4> linewidth;
spvUnsafeArray<uint, 4> save_trans_ix;
float2 p0;
float2 p1;
float2 p2;
float2 p3;
Alloc param_13;
spvUnsafeArray<Monoid, 4> local;
PathCubic cubic;
Alloc param_15;
for (uint i_1 = 0u; i_1 < 4u; i_1++)
{
linewidth[i_1] = as_type<float>(v_574.scene[lw_ix]);
save_trans_ix[i_1] = trans_ix;
uint tag_byte = tag_word >> (i_1 * 8u);
uint seg_type = tag_byte & 3u;
if (seg_type != 0u)
{
if ((tag_byte & 8u) != 0u)
{
uint param_5 = ps_ix;
p0 = read_f32_point(param_5, v_574);
uint param_6 = ps_ix + 2u;
p1 = read_f32_point(param_6, v_574);
if (seg_type >= 2u)
{
uint param_7 = ps_ix + 4u;
p2 = read_f32_point(param_7, v_574);
if (seg_type == 3u)
{
uint param_8 = ps_ix + 6u;
p3 = read_f32_point(param_8, v_574);
}
}
}
else
{
uint param_9 = ps_ix;
p0 = read_i16_point(param_9, v_574);
uint param_10 = ps_ix + 1u;
p1 = read_i16_point(param_10, v_574);
if (seg_type >= 2u)
{
uint param_11 = ps_ix + 2u;
p2 = read_i16_point(param_11, v_574);
if (seg_type == 3u)
{
uint param_12 = ps_ix + 3u;
p3 = read_i16_point(param_12, v_574);
}
}
}
param_13.offset = _639.conf.trans_alloc.offset;
TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14, v_111);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate;
float4 bbox = float4(fast::min(p0, p1), fast::max(p0, p1));
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _946 = bbox;
float2 _949 = fast::min(_946.xy, p2);
bbox.x = _949.x;
bbox.y = _949.y;
float4 _954 = bbox;
float2 _957 = fast::max(_954.zw, p2);
bbox.z = _957.x;
bbox.w = _957.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _982 = bbox;
float2 _985 = fast::min(_982.xy, p3);
bbox.x = _985.x;
bbox.y = _985.y;
float4 _990 = bbox;
float2 _993 = fast::max(_990.zw, p3);
bbox.z = _993.x;
bbox.w = _993.y;
}
else
{
p3 = p2;
p2 = mix(p1, p2, float2(0.3333333432674407958984375));
p1 = mix(p1, p0, float2(0.3333333432674407958984375));
}
}
else
{
p3 = p1;
p2 = mix(p3, p0, float2(0.3333333432674407958984375));
p1 = mix(p0, p3, float2(0.3333333432674407958984375));
}
float2 stroke = float2(0.0);
if (linewidth[i_1] >= 0.0)
{
stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5 * linewidth[i_1]);
bbox += float4(-stroke, stroke);
}
local[i_1].bbox = bbox;
local[i_1].flags = 0u;
cubic.p0 = p0;
cubic.p1 = p1;
cubic.p2 = p2;
cubic.p3 = p3;
cubic.path_ix = tm.path_ix;
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0);
param_15.offset = _639.conf.pathseg_alloc.offset;
PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode;
PathCubic param_18 = cubic;
PathSeg_Cubic_write(param_15, param_16, param_17, param_18, v_111);
ps_ref.offset += 52u;
uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u);
uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u));
ps_ix += n_words;
}
else
{
local[i_1].bbox = float4(0.0);
uint is_path = (tag_byte >> uint(4)) & 1u;
local[i_1].flags = is_path;
tm.path_ix += is_path;
trans_ix += ((tag_byte >> uint(5)) & 1u);
trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u);
lw_ix += ((tag_byte >> uint(6)) & 1u);
}
}
Monoid agg = local[0];
for (uint i_2 = 1u; i_2 < 4u; i_2++)
{
Monoid param_19 = agg;
Monoid param_20 = local[i_2];
agg = combine_monoid(param_19, param_20);
local[i_2] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_3 = 0u; i_3 < 8u; i_3++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_3))
{
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)];
Monoid param_21 = other_1;
Monoid param_22 = agg;
agg = combine_monoid(param_21, param_22);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_4 = 0u; i_4 < 4u; i_4++)
{
Monoid param_23 = row;
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1263 = i_4 == 3u;
bool _1269;
if (_1263)
{
_1269 = gl_LocalInvocationID.x == 255u;
}
else
{
_1269 = _1263;
}
if (_1269)
{
do_atomic = true;
}
if ((m.flags & 1u) != 0u)
{
v_111.memory[bbox_out_ix + 4u] = as_type<uint>(linewidth[i_4]);
v_111.memory[bbox_out_ix + 5u] = save_trans_ix[i_4];
if ((m.flags & 2u) == 0u)
{
do_atomic = true;
}
else
{
float param_25 = m.bbox.x;
v_111.memory[bbox_out_ix] = round_down(param_25);
float param_26 = m.bbox.y;
v_111.memory[bbox_out_ix + 1u] = round_down(param_26);
float param_27 = m.bbox.z;
v_111.memory[bbox_out_ix + 2u] = round_up(param_27);
float param_28 = m.bbox.w;
v_111.memory[bbox_out_ix + 3u] = round_up(param_28);
bbox_out_ix += 6u;
do_atomic = false;
}
}
if (do_atomic)
{
bool _1334 = m.bbox.z > m.bbox.x;
bool _1343;
if (!_1334)
{
_1343 = m.bbox.w > m.bbox.y;
}
else
{
_1343 = _1334;
}
if (_1343)
{
float param_29 = m.bbox.x;
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y;
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z;
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w;
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
}
bbox_out_ix += 6u;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,139 +0,0 @@
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
ByteAddressBuffer _139 : register(t1, space0);
ByteAddressBuffer _151 : register(t2, space0);
RWByteAddressBuffer _238 : register(u3, space0);
RWByteAddressBuffer _258 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared TagMonoid sh_scratch[128];
TagMonoid reduce_tag(uint tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u)));
c.path_ix = uint(int(countbits(tag_word & 269488144u)));
c.trans_ix = uint(int(countbits(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(96) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _151.Load((scene_ix + i) * 4 + 0);
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
agg = combine_tag_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 7u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u)
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
TagMonoid param_4 = agg;
TagMonoid param_5 = other;
agg = combine_tag_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
_238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
_238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
_238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
_238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
}
}
[numthreads(128, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,156 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct OutBuf
{
TagMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(128u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid reduce_tag(thread const uint& tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u)));
c.path_ix = uint(int(popcount(tag_word & 269488144u)));
c.trans_ix = uint(int(popcount(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_scratch[128];
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
uint tag_word = _151.scene[scene_ix];
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _151.scene[scene_ix + i];
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
agg = combine_tag_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 7u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u)
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
TagMonoid param_4 = agg;
TagMonoid param_5 = other;
agg = combine_tag_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
_238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
_238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
_238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,115 +0,0 @@
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const TagMonoid _18 = { 0u, 0u, 0u, 0u, 0u };
RWByteAddressBuffer _78 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared TagMonoid sh_scratch[256];
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
TagMonoid tag_monoid_identity()
{
return _18;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TagMonoid _82;
_82.trans_ix = _78.Load(ix * 20 + 0);
_82.linewidth_ix = _78.Load(ix * 20 + 4);
_82.pathseg_ix = _78.Load(ix * 20 + 8);
_82.path_ix = _78.Load(ix * 20 + 12);
_82.pathseg_offset = _78.Load(ix * 20 + 16);
TagMonoid local[8];
local[0].trans_ix = _82.trans_ix;
local[0].linewidth_ix = _82.linewidth_ix;
local[0].pathseg_ix = _82.pathseg_ix;
local[0].path_ix = _82.path_ix;
local[0].pathseg_offset = _82.pathseg_offset;
TagMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
TagMonoid param = local[i - 1u];
TagMonoid _115;
_115.trans_ix = _78.Load((ix + i) * 20 + 0);
_115.linewidth_ix = _78.Load((ix + i) * 20 + 4);
_115.pathseg_ix = _78.Load((ix + i) * 20 + 8);
_115.path_ix = _78.Load((ix + i) * 20 + 12);
_115.pathseg_offset = _78.Load((ix + i) * 20 + 16);
param_1.trans_ix = _115.trans_ix;
param_1.linewidth_ix = _115.linewidth_ix;
param_1.pathseg_ix = _115.pathseg_ix;
param_1.path_ix = _115.path_ix;
param_1.pathseg_offset = _115.pathseg_offset;
local[i] = combine_tag_monoid(param, param_1);
}
TagMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
TagMonoid param_2 = other;
TagMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
TagMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
TagMonoid param_4 = row;
TagMonoid param_5 = local[i_2];
TagMonoid m = combine_tag_monoid(param_4, param_5);
uint _210 = ix + i_2;
_78.Store(_210 * 20 + 0, m.trans_ix);
_78.Store(_210 * 20 + 4, m.linewidth_ix);
_78.Store(_210 * 20 + 8, m.pathseg_ix);
_78.Store(_210 * 20 + 12, m.path_ix);
_78.Store(_210 * 20 + 16, m.pathseg_offset);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,146 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct DataBuf
{
TagMonoid_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
static inline __attribute__((always_inline))
TagMonoid tag_monoid_identity()
{
return TagMonoid{ 0u, 0u, 0u, 0u, 0u };
}
kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup TagMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<TagMonoid, 8> local;
local[0].trans_ix = _78.data[ix].trans_ix;
local[0].linewidth_ix = _78.data[ix].linewidth_ix;
local[0].pathseg_ix = _78.data[ix].pathseg_ix;
local[0].path_ix = _78.data[ix].path_ix;
local[0].pathseg_offset = _78.data[ix].pathseg_offset;
TagMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _109 = ix + i;
TagMonoid param = local[i - 1u];
param_1.trans_ix = _78.data[_109].trans_ix;
param_1.linewidth_ix = _78.data[_109].linewidth_ix;
param_1.pathseg_ix = _78.data[_109].pathseg_ix;
param_1.path_ix = _78.data[_109].path_ix;
param_1.pathseg_offset = _78.data[_109].pathseg_offset;
local[i] = combine_tag_monoid(param, param_1);
}
TagMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
TagMonoid param_2 = other;
TagMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
TagMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
TagMonoid param_4 = row;
TagMonoid param_5 = local[i_2];
TagMonoid m = combine_tag_monoid(param_4, param_5);
uint _210 = ix + i_2;
_78.data[_210].trans_ix = m.trans_ix;
_78.data[_210].linewidth_ix = m.linewidth_ix;
_78.data[_210].pathseg_ix = m.pathseg_ix;
_78.data[_210].path_ix = m.path_ix;
_78.data[_210].pathseg_offset = m.pathseg_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,236 +0,0 @@
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _53 : register(u0, space0);
ByteAddressBuffer _148 : register(t1, space0);
ByteAddressBuffer _232 : register(t2, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared uint sh_tile_count[256];
groupshared uint sh_tile_offset;
bool check_deps(uint dep_stage)
{
uint _60;
_53.InterlockedOr(4, 0u, _60);
return (_60 & dep_stage) == 0u;
}
float4 load_draw_bbox(uint draw_ix)
{
uint base = (_148.Load(68) >> uint(2)) + (4u * draw_ix);
float x0 = asfloat(_53.Load(base * 4 + 12));
float y0 = asfloat(_53.Load((base + 1u) * 4 + 12));
float x1 = asfloat(_53.Load((base + 2u) * 4 + 12));
float y1 = asfloat(_53.Load((base + 3u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
uint malloc_stage(uint size, uint mem_size, uint stage)
{
uint _70;
_53.InterlockedAdd(0, size, _70);
uint offset = _70;
if ((offset + size) > mem_size)
{
uint _80;
_53.InterlockedOr(4, stage, _80);
offset = 0u;
}
return offset;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_53.Store(offset * 4 + 12, val);
}
void Path_write(Alloc a, PathRef ref, Path s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8);
}
void comp_main()
{
uint param = 1u;
bool _192 = check_deps(param);
if (!_192)
{
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef _216 = { _148.Load(20) + (element_ix * 12u) };
PathRef path_ref = _216;
uint drawtag_base = _148.Load(104) >> uint(2);
uint drawtag = 0u;
if (element_ix < _148.Load(4))
{
drawtag = _232.Load((drawtag_base + element_ix) * 4 + 0);
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u))
{
uint param_1 = element_ix;
float4 bbox = load_draw_bbox(param_1);
x0 = int(floor(bbox.x * 0.0625f));
y0 = int(floor(bbox.y * 0.0625f));
x1 = int(ceil(bbox.z * 0.0625f));
y1 = int(ceil(bbox.w * 0.0625f));
}
x0 = clamp(x0, 0, int(_148.Load(12)));
y0 = clamp(y0, 0, int(_148.Load(16)));
x1 = clamp(x1, 0, int(_148.Load(12)));
y1 = clamp(y1, 0, int(_148.Load(16)));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if (th_ix >= (1u << i))
{
total_tile_count += sh_tile_count[th_ix - (1u << i)];
}
GroupMemoryBarrierWithGroupSync();
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == 255u)
{
uint param_2 = total_tile_count * 8u;
uint param_3 = _148.Load(0);
uint param_4 = 2u;
uint _370 = malloc_stage(param_2, param_3, param_4);
sh_tile_offset = _370;
}
GroupMemoryBarrierWithGroupSync();
uint offset_start = sh_tile_offset;
if (offset_start == 0u)
{
return;
}
if (element_ix < _148.Load(4))
{
uint _387;
if (th_ix > 0u)
{
_387 = sh_tile_count[th_ix - 1u];
}
else
{
_387 = 0u;
}
uint tile_subix = _387;
TileRef _400 = { offset_start + (8u * tile_subix) };
path.tiles = _400;
Alloc _406;
_406.offset = _148.Load(20);
Alloc param_5;
param_5.offset = _406.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = offset_start >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
_53.Store((start_ix + i_1) * 4 + 12, 0u);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,247 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_53)
{
uint _60 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, 0u, memory_order_relaxed);
return (_60 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_53, const device ConfigBuf& v_148)
{
uint base = (v_148.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
float x0 = as_type<float>(v_53.memory[base]);
float y0 = as_type<float>(v_53.memory[base + 1u]);
float x1 = as_type<float>(v_53.memory[base + 2u]);
float y1 = as_type<float>(v_53.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_53)
{
uint _70 = atomic_fetch_add_explicit((device atomic_uint*)&v_53.mem_offset, size, memory_order_relaxed);
uint offset = _70;
if ((offset + size) > mem_size)
{
uint _80 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return offset;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_53)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_53.memory[offset] = val;
}
static inline __attribute__((always_inline))
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_53)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2, v_53);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5, v_53);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8, v_53);
}
kernel void main0(device Memory& v_53 [[buffer(0)]], const device ConfigBuf& v_148 [[buffer(1)]], const device SceneBuf& _232 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup uint sh_tile_count[256];
threadgroup uint sh_tile_offset;
uint param = 1u;
bool _192 = check_deps(param, v_53);
if (!_192)
{
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef{ v_148.conf.tile_alloc.offset + (element_ix * 12u) };
uint drawtag_base = v_148.conf.drawtag_offset >> uint(2);
uint drawtag = 0u;
if (element_ix < v_148.conf.n_elements)
{
drawtag = _232.scene[drawtag_base + element_ix];
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u))
{
uint param_1 = element_ix;
float4 bbox = load_draw_bbox(param_1, v_53, v_148);
x0 = int(floor(bbox.x * 0.0625));
y0 = int(floor(bbox.y * 0.0625));
x1 = int(ceil(bbox.z * 0.0625));
y1 = int(ceil(bbox.w * 0.0625));
}
x0 = clamp(x0, 0, int(v_148.conf.width_in_tiles));
y0 = clamp(y0, 0, int(v_148.conf.height_in_tiles));
x1 = clamp(x1, 0, int(v_148.conf.width_in_tiles));
y1 = clamp(y1, 0, int(v_148.conf.height_in_tiles));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th_ix >= (1u << i))
{
total_tile_count += sh_tile_count[th_ix - (1u << i)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == 255u)
{
uint param_2 = total_tile_count * 8u;
uint param_3 = v_148.conf.mem_size;
uint param_4 = 2u;
uint _370 = malloc_stage(param_2, param_3, param_4, v_53);
sh_tile_offset = _370;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint offset_start = sh_tile_offset;
if (offset_start == 0u)
{
return;
}
if (element_ix < v_148.conf.n_elements)
{
uint _387;
if (th_ix > 0u)
{
_387 = sh_tile_count[th_ix - 1u];
}
else
{
_387 = 0u;
}
uint tile_subix = _387;
path.tiles = TileRef{ offset_start + (8u * tile_subix) };
Alloc param_5;
param_5.offset = v_148.conf.tile_alloc.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7, v_53);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = offset_start >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
v_53.memory[start_ix + i_1] = 0u;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,235 +0,0 @@
struct Alloc
{
uint offset;
};
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Transform _225 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
RWByteAddressBuffer _71 : register(u0, space0);
ByteAddressBuffer _97 : register(t2, space0);
ByteAddressBuffer _279 : register(t1, space0);
ByteAddressBuffer _377 : register(t3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Transform sh_scratch[256];
Transform Transform_read(TransformRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _97.Load((ix + 0u) * 4 + 0);
uint raw1 = _97.Load((ix + 1u) * 4 + 0);
uint raw2 = _97.Load((ix + 2u) * 4 + 0);
uint raw3 = _97.Load((ix + 3u) * 4 + 0);
uint raw4 = _97.Load((ix + 4u) * 4 + 0);
uint raw5 = _97.Load((ix + 5u) * 4 + 0);
Transform s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
return s;
}
TransformRef Transform_index(TransformRef ref, uint index)
{
TransformRef _85 = { ref.offset + (index * 24u) };
return _85;
}
Transform combine_monoid(Transform a, Transform b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
Transform monoid_identity()
{
return _225;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_71.Store(offset * 4 + 12, val);
}
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.mat.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.mat.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.mat.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.mat.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.translate.x);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = asuint(s.translate.y);
write_mem(param_15, param_16, param_17);
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _286 = { _279.Load(88) + (ix * 24u) };
TransformRef ref = _286;
TransformRef param = ref;
Transform agg = Transform_read(param);
Transform local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3);
agg = combine_monoid(param_4, param_5);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_6 = other;
Transform param_7 = agg;
agg = combine_monoid(param_6, param_7);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
Transform _383;
_383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
_383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
row.mat = _383.mat;
row.translate = _383.translate;
}
if (gl_LocalInvocationID.x > 0u)
{
Transform param_8 = row;
Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_monoid(param_8, param_9);
}
Alloc param_12;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_10 = row;
Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11);
TransformSeg _423 = { m.mat, m.translate };
TransformSeg transform = _423;
TransformSegRef _433 = { _279.Load(40) + ((ix + i_2) * 24u) };
TransformSegRef trans_ref = _433;
Alloc _437;
_437.offset = _279.Load(40);
param_12.offset = _437.offset;
TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,289 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Alloc
{
uint offset;
};
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
struct SceneBuf
{
uint scene[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Transform_1
{
float4 mat;
float2 translate;
char _m0_final_padding[8];
};
struct ParentBuf
{
Transform_1 parent[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_97)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_97.scene[ix + 0u];
uint raw1 = v_97.scene[ix + 1u];
uint raw2 = v_97.scene[ix + 2u];
uint raw3 = v_97.scene[ix + 3u];
uint raw4 = v_97.scene[ix + 4u];
uint raw5 = v_97.scene[ix + 5u];
Transform s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
return s;
}
static inline __attribute__((always_inline))
TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index)
{
return TransformRef{ ref.offset + (index * 24u) };
}
static inline __attribute__((always_inline))
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
static inline __attribute__((always_inline))
Transform monoid_identity()
{
return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) };
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_71)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_71.memory[offset] = val;
}
static inline __attribute__((always_inline))
void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref, thread const TransformSeg& s, device Memory& v_71)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.mat.x);
write_mem(param, param_1, param_2, v_71);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.mat.y);
write_mem(param_3, param_4, param_5, v_71);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.mat.z);
write_mem(param_6, param_7, param_8, v_71);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.mat.w);
write_mem(param_9, param_10, param_11, v_71);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.translate.x);
write_mem(param_12, param_13, param_14, v_71);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = as_type<uint>(s.translate.y);
write_mem(param_15, param_16, param_17, v_71);
}
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _279 [[buffer(1)]], const device SceneBuf& v_97 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef ref = TransformRef{ _279.conf.trans_offset + (ix * 24u) };
TransformRef param = ref;
Transform agg = Transform_read(param, v_97);
spvUnsafeArray<Transform, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3, v_97);
agg = combine_monoid(param_4, param_5);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_6 = other;
Transform param_7 = agg;
agg = combine_monoid(param_6, param_7);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _380 = gl_WorkGroupID.x - 1u;
row.mat = _377.parent[_380].mat;
row.translate = _377.parent[_380].translate;
}
if (gl_LocalInvocationID.x > 0u)
{
Transform param_8 = row;
Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_monoid(param_8, param_9);
}
Alloc param_12;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_10 = row;
Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11);
TransformSeg transform = TransformSeg{ m.mat, m.translate };
TransformSegRef trans_ref = TransformSegRef{ _279.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
param_12.offset = _279.conf.trans_alloc.offset;
TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14, v_71);
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,141 +0,0 @@
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _49 : register(t2, space0);
ByteAddressBuffer _161 : register(t1, space0);
RWByteAddressBuffer _250 : register(u3, space0);
RWByteAddressBuffer _266 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Transform sh_scratch[256];
Transform Transform_read(TransformRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _49.Load((ix + 0u) * 4 + 0);
uint raw1 = _49.Load((ix + 1u) * 4 + 0);
uint raw2 = _49.Load((ix + 2u) * 4 + 0);
uint raw3 = _49.Load((ix + 3u) * 4 + 0);
uint raw4 = _49.Load((ix + 4u) * 4 + 0);
uint raw5 = _49.Load((ix + 5u) * 4 + 0);
Transform s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
return s;
}
TransformRef Transform_index(TransformRef ref, uint index)
{
TransformRef _37 = { ref.offset + (index * 24u) };
return _37;
}
Transform combine_monoid(Transform a, Transform b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(88) + (ix * 24u) };
TransformRef ref = _168;
TransformRef param = ref;
Transform agg = Transform_read(param);
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3);
agg = combine_monoid(param_4, param_5);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
Transform param_6 = agg;
Transform param_7 = other;
agg = combine_monoid(param_6, param_7);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_250.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat));
_250.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate));
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,155 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct SceneBuf
{
uint scene[1];
};
struct Alloc
{
uint offset;
};
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Transform_1
{
float4 mat;
float2 translate;
char _m0_final_padding[8];
};
struct OutBuf
{
Transform_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_49.scene[ix + 0u];
uint raw1 = v_49.scene[ix + 1u];
uint raw2 = v_49.scene[ix + 2u];
uint raw3 = v_49.scene[ix + 3u];
uint raw4 = v_49.scene[ix + 4u];
uint raw5 = v_49.scene[ix + 5u];
Transform s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
return s;
}
static inline __attribute__((always_inline))
TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index)
{
return TransformRef{ ref.offset + (index * 24u) };
}
static inline __attribute__((always_inline))
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _250 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) };
TransformRef param = ref;
Transform agg = Transform_read(param, v_49);
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3, v_49);
agg = combine_monoid(param_4, param_5);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
Transform param_6 = agg;
Transform param_7 = other;
agg = combine_monoid(param_6, param_7);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_250.outbuf[gl_WorkGroupID.x].mat = agg.mat;
_250.outbuf[gl_WorkGroupID.x].translate = agg.translate;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,94 +0,0 @@
struct Transform
{
float4 mat;
float2 translate;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
RWByteAddressBuffer _89 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Transform sh_scratch[256];
Transform combine_monoid(Transform a, Transform b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
Transform monoid_identity()
{
return _23;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
Transform _93;
_93.mat = asfloat(_89.Load4(ix * 32 + 0));
_93.translate = asfloat(_89.Load2(ix * 32 + 16));
Transform local[8];
local[0].mat = _93.mat;
local[0].translate = _93.translate;
Transform param_1;
for (uint i = 1u; i < 8u; i++)
{
Transform param = local[i - 1u];
Transform _119;
_119.mat = asfloat(_89.Load4((ix + i) * 32 + 0));
_119.translate = asfloat(_89.Load2((ix + i) * 32 + 16));
param_1.mat = _119.mat;
param_1.translate = _119.translate;
local[i] = combine_monoid(param, param_1);
}
Transform agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_2 = other;
Transform param_3 = agg;
agg = combine_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
Transform row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_4 = row;
Transform param_5 = local[i_2];
Transform m = combine_monoid(param_4, param_5);
uint _208 = ix + i_2;
_89.Store4(_208 * 32 + 0, asuint(m.mat));
_89.Store2(_208 * 32 + 16, asuint(m.translate));
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,129 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Transform
{
float4 mat;
float2 translate;
};
struct Transform_1
{
float4 mat;
float2 translate;
char _m0_final_padding[8];
};
struct DataBuf
{
Transform_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
static inline __attribute__((always_inline))
Transform monoid_identity()
{
return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) };
}
kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<Transform, 8> local;
local[0].mat = _89.data[ix].mat;
local[0].translate = _89.data[ix].translate;
Transform param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _113 = ix + i;
Transform param = local[i - 1u];
param_1.mat = _89.data[_113].mat;
param_1.translate = _89.data[_113].translate;
local[i] = combine_monoid(param, param_1);
}
Transform agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_2 = other;
Transform param_3 = agg;
agg = combine_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
Transform row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_4 = row;
Transform param_5 = local[i_2];
Transform m = combine_monoid(param_4, param_5);
uint _208 = ix + i_2;
_89.data[_208].mat = m.mat;
_89.data[_208].translate = m.translate;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,26 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _19 : register(t0);
RWByteAddressBuffer _32 : register(u1);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _19.Load(0))
{
_32.Store(ix * 4 + 0, _19.Load(4));
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,27 +0,0 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct ConfigBuf
{
uint size;
uint value;
};
struct TargetBuf
{
uint data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _19.size)
{
_32.data[ix] = _19.value;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,39 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _56 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint rng = gl_GlobalInvocationID.x + 1u;
for (uint i = 0u; i < 100u; i++)
{
rng ^= (rng << uint(13));
rng ^= (rng >> uint(17));
rng ^= (rng << uint(5));
uint bucket = rng % 65536u;
if (bucket != 0u)
{
uint _61;
_56.InterlockedAdd(0, 2u, _61);
uint alloc = _61 + 65536u;
uint _67;
_56.InterlockedExchange(bucket * 4 + 0, alloc, _67);
uint old = _67;
_56.Store(alloc * 4 + 0, old);
_56.Store((alloc + 1u) * 4 + 0, gl_GlobalInvocationID.x);
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,36 +0,0 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct MemBuf
{
uint mem[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(device MemBuf& _56 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint rng = gl_GlobalInvocationID.x + 1u;
for (uint i = 0u; i < 100u; i++)
{
rng ^= (rng << uint(13));
rng ^= (rng >> uint(17));
rng ^= (rng << uint(5));
uint bucket = rng % 65536u;
if (bucket != 0u)
{
uint _61 = atomic_fetch_add_explicit((device atomic_uint*)&_56.mem[0], 2u, memory_order_relaxed);
uint alloc = _61 + 65536u;
uint _67 = atomic_exchange_explicit((device atomic_uint*)&_56.mem[bucket], alloc, memory_order_relaxed);
uint old = _67;
_56.mem[alloc] = old;
_56.mem[alloc + 1u] = gl_GlobalInvocationID.x;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,54 +0,0 @@
struct Element
{
uint data;
uint flag;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer data_buf : register(u0);
RWByteAddressBuffer control_buf : register(u1);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
uint permute_flag_ix(uint data_ix)
{
return (data_ix * 419u) & 65535u;
}
void comp_main()
{
uint _76;
data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76);
DeviceMemoryBarrier();
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
uint _77;
data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
uint _58;
data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58);
uint flag = _58;
DeviceMemoryBarrier();
uint _62;
data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62);
uint data = _62;
if (flag > data)
{
uint _73;
control_buf.InterlockedAdd(0, 1u, _73);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,54 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Element
{
uint data;
uint flag;
};
struct DataBuf
{
Element data[1];
};
struct ControlBuf
{
uint failures;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
uint permute_flag_ix(thread const uint& data_ix)
{
return (data_ix * 419u) & 65535u;
}
kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
threadgroup_barrier(mem_flags::mem_device);
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
uint flag = _58;
threadgroup_barrier(mem_flags::mem_device);
uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
uint data = _62;
if (flag > data)
{
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,225 +0,0 @@
struct Monoid
{
uint element;
};
struct State
{
uint flag;
Monoid aggregate;
Monoid prefix;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
static const Monoid _185 = { 0u };
globallycoherent RWByteAddressBuffer _43 : register(u2);
ByteAddressBuffer _67 : register(t0);
RWByteAddressBuffer _372 : register(u1);
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
};
groupshared uint sh_part_ix;
groupshared Monoid sh_scratch[512];
groupshared uint sh_flag;
groupshared Monoid sh_prefix;
Monoid combine_monoid(Monoid a, Monoid b)
{
Monoid _22 = { a.element + b.element };
return _22;
}
void comp_main()
{
if (gl_LocalInvocationID.x == 0u)
{
uint _47;
_43.InterlockedAdd(0, 1u, _47);
sh_part_ix = _47;
}
GroupMemoryBarrierWithGroupSync();
uint part_ix = sh_part_ix;
uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u);
Monoid _71;
_71.element = _67.Load(ix * 4 + 0);
Monoid local[16];
local[0].element = _71.element;
Monoid param_1;
for (uint i = 1u; i < 16u; i++)
{
Monoid param = local[i - 1u];
Monoid _94;
_94.element = _67.Load((ix + i) * 4 + 0);
param_1.element = _94.element;
local[i] = combine_monoid(param, param_1);
}
Monoid agg = local[15];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Monoid param_2 = other;
Monoid param_3 = agg;
agg = combine_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 511u)
{
_43.Store(part_ix * 12 + 8, agg.element);
if (part_ix == 0u)
{
_43.Store(12, agg.element);
}
}
DeviceMemoryBarrier();
if (gl_LocalInvocationID.x == 511u)
{
uint flag = 1u;
if (part_ix == 0u)
{
flag = 2u;
}
_43.Store(part_ix * 12 + 4, flag);
}
Monoid exclusive = _185;
if (part_ix != 0u)
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
{
if (gl_LocalInvocationID.x == 511u)
{
sh_flag = _43.Load(look_back_ix * 12 + 4);
}
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
uint flag_1 = sh_flag;
GroupMemoryBarrierWithGroupSync();
if (flag_1 == 2u)
{
if (gl_LocalInvocationID.x == 511u)
{
Monoid _223;
_223.element = _43.Load(look_back_ix * 12 + 12);
their_prefix.element = _223.element;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
}
break;
}
else
{
if (flag_1 == 1u)
{
if (gl_LocalInvocationID.x == 511u)
{
Monoid _245;
_245.element = _43.Load(look_back_ix * 12 + 8);
their_agg.element = _245.element;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
}
look_back_ix--;
their_ix = 0u;
continue;
}
}
if (gl_LocalInvocationID.x == 511u)
{
Monoid _267;
_267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0);
m.element = _267.element;
if (their_ix == 0u)
{
their_agg = m;
}
else
{
Monoid param_8 = their_agg;
Monoid param_9 = m;
their_agg = combine_monoid(param_8, param_9);
}
their_ix++;
if (their_ix == 8192u)
{
Monoid param_10 = their_agg;
Monoid param_11 = exclusive;
exclusive = combine_monoid(param_10, param_11);
if (look_back_ix == 0u)
{
sh_flag = 2u;
}
else
{
look_back_ix--;
their_ix = 0u;
}
}
}
GroupMemoryBarrierWithGroupSync();
flag_1 = sh_flag;
GroupMemoryBarrierWithGroupSync();
if (flag_1 == 2u)
{
break;
}
}
if (gl_LocalInvocationID.x == 511u)
{
Monoid param_12 = exclusive;
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
_43.Store(part_ix * 12 + 12, inclusive_prefix.element);
}
DeviceMemoryBarrier();
if (gl_LocalInvocationID.x == 511u)
{
_43.Store(part_ix * 12 + 4, 2u);
}
}
GroupMemoryBarrierWithGroupSync();
if (part_ix != 0u)
{
exclusive = sh_prefix;
}
Monoid row = exclusive;
if (gl_LocalInvocationID.x > 0u)
{
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u];
Monoid param_14 = row;
Monoid param_15 = other_1;
row = combine_monoid(param_14, param_15);
}
for (uint i_2 = 0u; i_2 < 16u; i_2++)
{
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
_372.Store((ix + i_2) * 4 + 0, m_1.element);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
comp_main();
}

View file

@ -1,264 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Monoid
{
uint element;
};
struct Monoid_1
{
uint element;
};
struct State
{
uint flag;
Monoid_1 aggregate;
Monoid_1 prefix;
};
struct StateBuf
{
uint part_counter;
State state[1];
};
struct InBuf
{
Monoid_1 inbuf[1];
};
struct OutBuf
{
Monoid_1 outbuf[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
static inline __attribute__((always_inline))
Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
{
return Monoid{ a.element + b.element };
}
kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_part_ix;
threadgroup Monoid sh_scratch[512];
threadgroup uint sh_flag;
threadgroup Monoid sh_prefix;
if (gl_LocalInvocationID.x == 0u)
{
uint _47 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_43.part_counter, 1u, memory_order_relaxed);
sh_part_ix = _47;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint part_ix = sh_part_ix;
uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u);
spvUnsafeArray<Monoid, 16> local;
local[0].element = _67.inbuf[ix].element;
Monoid param_1;
for (uint i = 1u; i < 16u; i++)
{
Monoid param = local[i - 1u];
param_1.element = _67.inbuf[ix + i].element;
local[i] = combine_monoid(param, param_1);
}
Monoid agg = local[15];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 9u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Monoid param_2 = other;
Monoid param_3 = agg;
agg = combine_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 511u)
{
_43.state[part_ix].aggregate.element = agg.element;
if (part_ix == 0u)
{
_43.state[0].prefix.element = agg.element;
}
}
threadgroup_barrier(mem_flags::mem_device);
if (gl_LocalInvocationID.x == 511u)
{
uint flag = 1u;
if (part_ix == 0u)
{
flag = 2u;
}
_43.state[part_ix].flag = flag;
}
Monoid exclusive = Monoid{ 0u };
if (part_ix != 0u)
{
uint look_back_ix = part_ix - 1u;
uint their_ix = 0u;
Monoid their_prefix;
Monoid their_agg;
Monoid m;
while (true)
{
if (gl_LocalInvocationID.x == 511u)
{
sh_flag = _43.state[look_back_ix].flag;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device);
uint flag_1 = sh_flag;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (flag_1 == 2u)
{
if (gl_LocalInvocationID.x == 511u)
{
their_prefix.element = _43.state[look_back_ix].prefix.element;
Monoid param_4 = their_prefix;
Monoid param_5 = exclusive;
exclusive = combine_monoid(param_4, param_5);
}
break;
}
else
{
if (flag_1 == 1u)
{
if (gl_LocalInvocationID.x == 511u)
{
their_agg.element = _43.state[look_back_ix].aggregate.element;
Monoid param_6 = their_agg;
Monoid param_7 = exclusive;
exclusive = combine_monoid(param_6, param_7);
}
look_back_ix--;
their_ix = 0u;
continue;
}
}
if (gl_LocalInvocationID.x == 511u)
{
m.element = _67.inbuf[(look_back_ix * 8192u) + their_ix].element;
if (their_ix == 0u)
{
their_agg = m;
}
else
{
Monoid param_8 = their_agg;
Monoid param_9 = m;
their_agg = combine_monoid(param_8, param_9);
}
their_ix++;
if (their_ix == 8192u)
{
Monoid param_10 = their_agg;
Monoid param_11 = exclusive;
exclusive = combine_monoid(param_10, param_11);
if (look_back_ix == 0u)
{
sh_flag = 2u;
}
else
{
look_back_ix--;
their_ix = 0u;
}
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
flag_1 = sh_flag;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (flag_1 == 2u)
{
break;
}
}
if (gl_LocalInvocationID.x == 511u)
{
Monoid param_12 = exclusive;
Monoid param_13 = agg;
Monoid inclusive_prefix = combine_monoid(param_12, param_13);
sh_prefix = exclusive;
_43.state[part_ix].prefix.element = inclusive_prefix.element;
}
threadgroup_barrier(mem_flags::mem_device);
if (gl_LocalInvocationID.x == 511u)
{
_43.state[part_ix].flag = 2u;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (part_ix != 0u)
{
exclusive = sh_prefix;
}
Monoid row = exclusive;
if (gl_LocalInvocationID.x > 0u)
{
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u];
Monoid param_14 = row;
Monoid param_15 = other_1;
row = combine_monoid(param_14, param_15);
}
for (uint i_2 = 0u; i_2 < 16u; i_2++)
{
Monoid param_16 = row;
Monoid param_17 = local[i_2];
Monoid m_1 = combine_monoid(param_16, param_17);
_372.outbuf[ix + i_2].element = m_1.element;
}
}

Some files were not shown because too many files have changed in this diff Show more