Remove generated shaders from dev branch

This commit is contained in:
Raph Levien 2022-07-13 11:24:19 -07:00
parent b77df99159
commit 64e6268059
122 changed files with 1 additions and 18684 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@
**/*.rs.bk
.ninja_deps
.ninja_log
**/shader/gen

Binary file not shown.

View file

@ -1,62 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
RWByteAddressBuffer _57 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
float mod(float x, float y)
{
return x - y * floor(x / y);
}
float2 mod(float2 x, float2 y)
{
return x - y * floor(x / y);
}
float3 mod(float3 x, float3 y)
{
return x - y * floor(x / y);
}
float4 mod(float4 x, float4 y)
{
return x - y * floor(x / y);
}
uint collatz_iterations(inout uint n)
{
uint i = 0u;
while (n != 1u)
{
if (mod(float(n), 2.0f) == 0.0f)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
void comp_main()
{
uint index = gl_GlobalInvocationID.x;
uint param = _57.Load(index * 4 + 0);
uint _65 = collatz_iterations(param);
_57.Store(index * 4 + 0, _65);
}
[numthreads(1, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,48 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct PrimeIndices
{
uint indices[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()
template<typename Tx, typename Ty>
inline Tx mod(Tx x, Ty y)
{
return x - y * floor(x / y);
}
static inline __attribute__((always_inline))
uint collatz_iterations(thread uint& n)
{
uint i = 0u;
while (n != 1u)
{
if (mod(float(n), 2.0) == 0.0)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
kernel void main0(device PrimeIndices& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint index = gl_GlobalInvocationID.x;
uint param = _57.indices[index];
uint _65 = collatz_iterations(param);
_57.indices[index] = _65;
}

Binary file not shown.

Binary file not shown.

View file

@ -1,244 +0,0 @@
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
static uint gl_LocalInvocationIndex;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
uint gl_LocalInvocationIndex : SV_GroupIndex;
};
groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256];
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _67.Load(offset * 4 + 8);
return v;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _134 = { raw2 };
s.tiles = _134;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_67.Store(offset * 4 + 8, val);
}
void comp_main()
{
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.Load(0))
{
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param;
param.offset = _185.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_216 = path.bbox.y > 0u;
}
else
{
_216 = _210;
}
if (_216)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_269 = th_ix >= (1u << i);
}
else
{
_269 = _262;
}
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _348;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
}
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11);
}
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
comp_main();
}

View file

@ -1,247 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_67.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_67);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_67);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_67);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_67.memory[offset] = val;
}
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.conf.n_elements)
{
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param;
param.offset = _166.conf.tile_alloc.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1, v_67);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_216 = path.bbox.y > 0u;
}
else
{
_216 = _210;
}
if (_216)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_269 = th_ix >= (1u << i);
}
else
{
_269 = _262;
}
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _348;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
}
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11, v_67);
}
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,244 +0,0 @@
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
static uint gl_LocalInvocationIndex;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
uint gl_LocalInvocationIndex : SV_GroupIndex;
};
groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256];
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _67.Load(offset * 4 + 8);
return v;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _134 = { raw2 };
s.tiles = _134;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_67.Store(offset * 4 + 8, val);
}
void comp_main()
{
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.Load(0))
{
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param;
param.offset = _185.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_216 = path.bbox.y > 0u;
}
else
{
_216 = _210;
}
if (_216)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_269 = th_ix >= (1u << i);
}
else
{
_269 = _262;
}
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _348;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
}
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11);
}
}
}
}
[numthreads(256, 4, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
comp_main();
}

View file

@ -1,247 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Alloc
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u);
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_67.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_67);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_67);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_67);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_67.memory[offset] = val;
}
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.conf.n_elements)
{
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param;
param.offset = _166.conf.tile_alloc.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1, v_67);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
{
_216 = path.bbox.y > 0u;
}
else
{
_216 = _210;
}
if (_216)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
}
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
{
_269 = th_ix >= (1u << i);
}
else
{
_269 = _262;
}
if (_269)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.y == 0u)
{
sh_row_count[th_ix] = row_count;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _348;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = el_ix + (128u >> i_1);
if (row >= sh_row_count[probe - 1u])
{
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
}
uint seq_ix = row - _348;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11, v_67);
}
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,66 +0,0 @@
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
ByteAddressBuffer _21 : register(t1, space0);
RWByteAddressBuffer _45 : register(u0, space0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(76))
{
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u);
_45.Store((out_ix + 1u) * 4 + 8, 65535u);
_45.Store((out_ix + 2u) * 4 + 8, 0u);
_45.Store((out_ix + 3u) * 4 + 8, 0u);
}
}
[numthreads(512, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,68 +0,0 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.conf.n_path)
{
uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix);
_45.memory[out_ix] = 65535u;
_45.memory[out_ix + 1u] = 65535u;
_45.memory[out_ix + 2u] = 0u;
_45.memory[out_ix + 3u] = 0u;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,342 +0,0 @@
struct Alloc
{
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _81 : register(u0, space0);
ByteAddressBuffer _156 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
};
groupshared uint bitmaps[8][256];
groupshared bool sh_alloc_failed;
groupshared uint count[8][256];
groupshared Alloc sh_chunk_alloc[256];
DrawMonoid load_draw_monoid(uint element_ix)
{
uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix);
uint path_ix = _81.Load(base * 4 + 8);
uint clip_ix = _81.Load((base + 1u) * 4 + 8);
uint scene_offset = _81.Load((base + 2u) * 4 + 8);
uint info_offset = _81.Load((base + 3u) * 4 + 8);
DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset };
return _190;
}
float4 load_clip_bbox(uint clip_ix)
{
uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_81.Load(base * 4 + 8));
float y0 = asfloat(_81.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_81.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_81.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
void store_draw_bbox(uint draw_ix, float4 bbox)
{
uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix);
_81.Store(base * 4 + 8, asuint(bbox.x));
_81.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_81.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_81.Store((base + 3u) * 4 + 8, asuint(bbox.w));
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
MallocResult malloc(uint size)
{
uint _87;
_81.InterlockedAdd(0, size, _87);
uint offset = _87;
uint _94;
_81.GetDimensions(_94);
_94 = (_94 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_94) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _116;
_81.InterlockedMax(4, 1u, _116);
return r;
}
return r;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_81.Store(offset * 4 + 8, val);
}
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2);
}
void comp_main()
{
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
bitmaps[i][gl_LocalInvocationID.x] = 0u;
}
if (gl_LocalInvocationID.x == 0u)
{
sh_alloc_failed = false;
}
GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if (element_ix < _156.Load(0))
{
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1);
}
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox;
float4 _419 = bbox;
float2 _421 = max(_417.xy, _419.zw);
bbox.z = _421.x;
bbox.w = _421.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6);
x0 = int(floor(bbox.x * 0.00390625f));
y0 = int(floor(bbox.y * 0.00390625f));
x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f));
}
uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
y1 = clamp(y1, y0, int(height_in_bins));
if (x0 == x1)
{
y1 = y0;
}
int x = x0;
int y = y0;
uint my_slice = gl_LocalInvocationID.x / 32u;
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _523;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523);
x++;
if (x == x1)
{
x = x0;
y++;
}
}
GroupMemoryBarrierWithGroupSync();
uint element_count = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_7 = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u)
{
uint param_10 = element_count * 4u;
MallocResult _573 = malloc(param_10);
MallocResult chunk = _573;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
{
sh_alloc_failed = true;
}
}
uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _603;
_603.offset = _156.Load(20);
Alloc param_11;
param_11.offset = _603.offset;
uint param_12 = out_ix;
uint param_13 = element_count;
write_mem(param_11, param_12, param_13);
Alloc _615;
_615.offset = _156.Load(20);
Alloc param_14;
param_14.offset = _615.offset;
uint param_15 = out_ix + 1u;
uint param_16 = chunk_alloc.offset;
write_mem(param_14, param_15, param_16);
GroupMemoryBarrierWithGroupSync();
bool _630;
if (!sh_alloc_failed)
{
_630 = _81.Load(4) != 0u;
}
else
{
_630 = sh_alloc_failed;
}
if (_630)
{
return;
}
x = x0;
y = y0;
while (y < y1)
{
uint bin_ix = (uint(y) * width_in_bins) + uint(x);
uint out_mask = bitmaps[my_slice][bin_ix];
if ((out_mask & my_mask) != 0u)
{
uint idx = uint(int(countbits(out_mask & (my_mask - 1u))));
if (my_slice > 0u)
{
idx += count[my_slice - 1u][bin_ix];
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
BinInstanceRef _692 = { out_offset };
BinInstance _694 = { element_ix };
Alloc param_17 = out_alloc;
BinInstanceRef param_18 = _692;
BinInstance param_19 = _694;
BinInstance_write(param_17, param_18, param_19);
}
x++;
if (x == x1)
{
x = x0;
y++;
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
comp_main();
}

View file

@ -1,347 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Alloc
{
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_81.memory[base];
uint clip_ix = v_81.memory[base + 1u];
uint scene_offset = v_81.memory[base + 2u];
uint info_offset = v_81.memory[base + 3u];
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
}
static inline __attribute__((always_inline))
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_81.memory[base]);
float y0 = as_type<float>(v_81.memory[base + 1u]);
float x1 = as_type<float>(v_81.memory[base + 2u]);
float y1 = as_type<float>(v_81.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_81.memory[base]) - 32768.0;
float bbox_t = float(v_81.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_81.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_81.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
{
uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_81.memory[base] = as_type<uint>(bbox.x);
v_81.memory[base + 1u] = as_type<uint>(bbox.y);
v_81.memory[base + 2u] = as_type<uint>(bbox.z);
v_81.memory[base + 3u] = as_type<uint>(bbox.w);
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize)
{
uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed);
uint offset = _87;
MallocResult r;
r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed);
return r;
}
return r;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_81.memory[offset] = val;
}
static inline __attribute__((always_inline))
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_81, v_81BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed;
threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256];
constant uint& v_81BufferSize = spvBufferSizeConstants[0];
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
bitmaps[i][gl_LocalInvocationID.x] = 0u;
}
if (gl_LocalInvocationID.x == 0u)
{
sh_alloc_failed = short(false);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if (element_ix < v_156.conf.n_elements)
{
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156);
}
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox;
float4 _419 = bbox;
float2 _421 = fast::max(_417.xy, _419.zw);
bbox.z = _421.x;
bbox.w = _421.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156);
x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625));
}
uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
y1 = clamp(y1, y0, int(height_in_bins));
if (x0 == x1)
{
y1 = y0;
}
int x = x0;
int y = y0;
uint my_slice = gl_LocalInvocationID.x / 32u;
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++;
if (x == x1)
{
x = x0;
y++;
}
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_count = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_7 = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u)
{
uint param_10 = element_count * 4u;
MallocResult _573 = malloc(param_10, v_81, v_81BufferSize);
MallocResult chunk = _573;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
{
sh_alloc_failed = short(true);
}
}
uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_11;
param_11.offset = v_156.conf.bin_alloc.offset;
uint param_12 = out_ix;
uint param_13 = element_count;
write_mem(param_11, param_12, param_13, v_81, v_81BufferSize);
Alloc param_14;
param_14.offset = v_156.conf.bin_alloc.offset;
uint param_15 = out_ix + 1u;
uint param_16 = chunk_alloc.offset;
write_mem(param_14, param_15, param_16, v_81, v_81BufferSize);
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _630;
if (!bool(sh_alloc_failed))
{
_630 = v_81.mem_error != 0u;
}
else
{
_630 = bool(sh_alloc_failed);
}
if (_630)
{
return;
}
x = x0;
y = y0;
while (y < y1)
{
uint bin_ix = (uint(y) * width_in_bins) + uint(x);
uint out_mask = bitmaps[my_slice][bin_ix];
if ((out_mask & my_mask) != 0u)
{
uint idx = uint(int(popcount(out_mask & (my_mask - 1u))));
if (my_slice > 0u)
{
idx += count[my_slice - 1u][bin_ix];
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
Alloc param_17 = out_alloc;
BinInstanceRef param_18 = BinInstanceRef{ out_offset };
BinInstance param_19 = BinInstance{ element_ix };
BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize);
}
x++;
if (x == x1)
{
x = x0;
y++;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,371 +0,0 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _393 = { 0u, 0u };
ByteAddressBuffer _80 : register(t1, space0);
RWByteAddressBuffer _96 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[510];
groupshared uint sh_stack[256];
groupshared float4 sh_stack_bbox[256];
groupshared uint sh_link[256];
groupshared float4 sh_bbox[256];
Bic load_bic(uint ix)
{
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
return _286;
}
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _72;
}
ClipEl load_clip_el(uint ix)
{
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 8);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
ClipEl _335 = { parent_ix, bbox };
return _335;
}
float4 bbox_intersect(float4 a, float4 b)
{
return float4(max(a.xy, b.xy), min(a.zw, b.zw));
}
uint load_path_ix(uint ix)
{
if (ix < _80.Load(80))
{
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
}
else
{
return 2147483648u;
}
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
uint search_link(inout Bic bic)
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
void store_clip_bbox(uint ix, float4 bbox)
{
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 8, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
Bic bic = _393;
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
GroupMemoryBarrierWithGroupSync();
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6);
bool is_push = int(inp) >= 0;
Bic _559 = { 1u - uint(is_push), uint(is_push) };
bic = _559;
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7);
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
GroupMemoryBarrierWithGroupSync();
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
GroupMemoryBarrierWithGroupSync();
bic = _393;
Bic param_10 = bic;
uint _618 = search_link(param_10);
bic = param_10;
uint link = _618;
sh_link[th] = link;
GroupMemoryBarrierWithGroupSync();
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
GroupMemoryBarrierWithGroupSync();
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < _80.Load(80);
}
else
{
_725 = _717;
}
if (_725)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17);
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,370 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
return Bic{ v_96.memory[base], v_96.memory[base + 1u] };
}
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
uint parent_ix = v_96.memory[base];
float x0 = as_type<float>(v_96.memory[base + 1u]);
float y0 = as_type<float>(v_96.memory[base + 2u]);
float x1 = as_type<float>(v_96.memory[base + 3u]);
float y1 = as_type<float>(v_96.memory[base + 4u]);
float4 bbox = float4(x0, y0, x1, y1);
return ClipEl{ parent_ix, bbox };
}
static inline __attribute__((always_inline))
float4 bbox_intersect(thread const float4& a, thread const float4& b)
{
return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw));
}
static inline __attribute__((always_inline))
uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96)
{
if (ix < v_80.conf.n_clip)
{
return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix];
}
else
{
return 2147483648u;
}
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_96.memory[base]) - 32768.0;
float bbox_t = float(v_96.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_96.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_96.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510])
{
uint ix = gl_LocalInvocationID.x;
uint j = 0u;
while (j < 8u)
{
uint base = 512u - (2u << (8u - j));
if (((ix >> j) & 1u) != 0u)
{
Bic param = sh_bic[(base + (ix >> j)) - 1u];
Bic param_1 = bic;
Bic test = bic_combine(param, param_1);
if (test.b > 0u)
{
break;
}
bic = test;
ix -= (1u << j);
}
j++;
}
if (ix > 0u)
{
while (j > 0u)
{
j--;
uint base_1 = 512u - (2u << (8u - j));
Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u];
Bic param_3 = bic;
Bic test_1 = bic_combine(param_2, param_3);
if (test_1.b == 0u)
{
bic = test_1;
ix -= (1u << j);
}
}
}
if (ix > 0u)
{
return ix - 1u;
}
else
{
return 4294967295u - bic.a;
}
}
static inline __attribute__((always_inline))
void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96)
{
uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix);
v_96.memory[base] = as_type<uint>(bbox.x);
v_96.memory[base + 1u] = as_type<uint>(bbox.y);
v_96.memory[base + 2u] = as_type<uint>(bbox.z);
v_96.memory[base + 3u] = as_type<uint>(bbox.w);
}
kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup Bic sh_bic[510];
threadgroup uint sh_stack[256];
threadgroup float4 sh_stack_bbox[256];
threadgroup uint sh_link[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
Bic bic = Bic{ 0u, 0u };
if (th < gl_WorkGroupID.x)
{
uint param = th;
bic = load_bic(param, v_80, v_96);
}
sh_bic[th] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[th + (1u << i)];
Bic param_1 = bic;
Bic param_2 = other;
bic = bic_combine(param_1, param_2);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint stack_size = sh_bic[0].b;
uint sp = 255u - th;
uint ix = 0u;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
uint probe = ix + (128u >> i_1);
if (sp < sh_bic[probe].b)
{
ix = probe;
}
}
uint b = sh_bic[ix].b;
float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
if (sp < b)
{
uint param_3 = (((ix * 256u) + b) - sp) - 1u;
ClipEl el = load_clip_el(param_3, v_80, v_96);
sh_stack[th] = el.parent_ix;
bbox = el.bbox;
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
sh_stack_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th >= (1u << i_2))
{
float4 param_4 = sh_stack_bbox[th - (1u << i_2)];
float4 param_5 = bbox;
bbox = bbox_intersect(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
sh_stack_bbox[th] = bbox;
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6, v_80, v_96);
bool is_push = int(inp) >= 0;
bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[th] = bic;
if (is_push)
{
uint param_7 = inp;
bbox = load_path_bbox(param_7, v_80, v_96);
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
uint inbase = 0u;
for (uint i_3 = 0u; i_3 < 7u; i_3++)
{
uint outbase = 512u - (1u << (8u - i_3));
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th < (1u << (7u - i_3)))
{
Bic param_8 = sh_bic[inbase + (th * 2u)];
Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u];
sh_bic[outbase + th] = bic_combine(param_8, param_9);
}
inbase = outbase;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
bic = Bic{ 0u, 0u };
Bic param_10 = bic;
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
bic = param_10;
uint link = _618;
sh_link[th] = link;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint grandparent;
if (int(link) >= 0)
{
grandparent = sh_link[link];
}
else
{
grandparent = link - 1u;
}
uint parent;
if (int(link) >= 0)
{
parent = (gl_WorkGroupID.x * 256u) + link;
}
else
{
if (int(link + stack_size) >= 0)
{
parent = sh_stack[256u + link];
}
else
{
parent = 4294967295u;
}
}
for (uint i_4 = 0u; i_4 < 8u; i_4++)
{
if (i_4 != 0u)
{
sh_link[th] = link;
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (int(link) >= 0)
{
float4 param_11 = sh_bbox[link];
float4 param_12 = bbox;
bbox = bbox_intersect(param_11, param_12);
link = sh_link[link];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
}
if (int(link + stack_size) >= 0)
{
float4 param_13 = sh_stack_bbox[256u + link];
float4 param_14 = bbox;
bbox = bbox_intersect(param_13, param_14);
}
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
{
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
}
else
{
_725 = _717;
}
if (_725)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96);
uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp));
v_96.memory[drawmonoid_out_base] = path_ix;
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];
}
else
{
if (int(grandparent + stack_size) >= 0)
{
bbox = sh_stack_bbox[256u + grandparent];
}
else
{
bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
}
}
}
uint param_16 = gl_GlobalInvocationID.x;
float4 param_17 = bbox;
store_clip_bbox(param_16, param_17, v_80, v_96);
}

Binary file not shown.

Binary file not shown.

View file

@ -1,181 +0,0 @@
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _267 = { 0u, 0u };
ByteAddressBuffer _64 : register(t1, space0);
RWByteAddressBuffer _80 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Bic sh_bic[256];
groupshared uint sh_parent[256];
groupshared uint sh_path_ix[256];
groupshared float4 sh_bbox[256];
Bic bic_combine(Bic x, Bic y)
{
uint m = min(x.b, y.a);
Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m };
return _56;
}
void store_bic(uint ix, Bic bic)
{
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 8, bic.a);
_80.Store((base + 1u) * 4 + 8, bic.b);
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_clip_el(uint ix, ClipEl el)
{
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 8, el.parent_ix);
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
bool is_push = int(inp) >= 0;
Bic _207 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _207;
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
GroupMemoryBarrierWithGroupSync();
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
uint size = sh_bic[0].b;
bic = _267;
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
if (is_push)
{
_283 = bic.a == 0u;
}
else
{
_283 = is_push;
}
if (_283)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
GroupMemoryBarrierWithGroupSync();
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl _331 = { parent_ix, bbox };
ClipEl el = _331;
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,177 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct Bic
{
uint a;
uint b;
};
struct ClipEl
{
uint parent_ix;
float4 bbox;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Bic bic_combine(thread const Bic& x, thread const Bic& y)
{
uint m = min(x.b, y.a);
return Bic{ (x.a + y.a) - m, (x.b + y.b) - m };
}
static inline __attribute__((always_inline))
void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix);
v_80.memory[base] = bic.a;
v_80.memory[base + 1u] = bic.b;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_80.memory[base]) - 32768.0;
float bbox_t = float(v_80.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_80.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_80.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
static inline __attribute__((always_inline))
void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80)
{
uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix);
v_80.memory[base] = el.parent_ix;
v_80.memory[base + 1u] = as_type<uint>(el.bbox.x);
v_80.memory[base + 2u] = as_type<uint>(el.bbox.y);
v_80.memory[base + 3u] = as_type<uint>(el.bbox.z);
v_80.memory[base + 4u] = as_type<uint>(el.bbox.w);
}
kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Bic sh_bic[256];
threadgroup uint sh_parent[256];
threadgroup uint sh_path_ix[256];
threadgroup float4 sh_bbox[256];
uint th = gl_LocalInvocationID.x;
uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x];
bool is_push = int(inp) >= 0;
Bic bic = Bic{ 1u - uint(is_push), uint(is_push) };
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((th + (1u << i)) < 256u)
{
Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)];
Bic param = bic;
Bic param_1 = other;
bic = bic_combine(param, param_1);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_bic[th] = bic;
}
if (th == 0u)
{
uint param_2 = gl_WorkGroupID.x;
Bic param_3 = bic;
store_bic(param_2, param_3, v_64, v_80);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint size = sh_bic[0].b;
bic = Bic{ 0u, 0u };
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
if (is_push)
{
_283 = bic.a == 0u;
}
else
{
_283 = is_push;
}
if (_283)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
sh_path_ix[local_ix] = inp;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
float4 bbox;
if (th < size)
{
uint path_ix = sh_path_ix[th];
uint param_4 = path_ix;
bbox = load_path_bbox(param_4, v_64, v_80);
}
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl el = ClipEl{ parent_ix, bbox };
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6, v_64, v_80);
}
}

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -1,268 +0,0 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _23 = { 0u, 0u, 0u, 0u };
ByteAddressBuffer _93 : register(t1, space0);
ByteAddressBuffer _103 : register(t2, space0);
ByteAddressBuffer _203 : register(t3, space0);
RWByteAddressBuffer _285 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[256];
DrawMonoid map_tag(uint tag_word)
{
uint has_path = uint(tag_word != 0u);
DrawMonoid _76 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
return _76;
}
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
DrawMonoid draw_monoid_identity()
{
return _23;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _93.Load(100) >> uint(2);
uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
DrawMonoid local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
tag_word = _103.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_1 = tag_word;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_4 = other;
DrawMonoid param_5 = agg;
agg = combine_draw_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
DrawMonoid _209;
_209.path_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 0);
_209.clip_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 4);
_209.scene_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 8);
_209.info_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 12);
row.path_ix = _209.path_ix;
row.clip_ix = _209.clip_ix;
row.scene_offset = _209.scene_offset;
row.info_offset = _209.info_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_6 = row;
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _93.Load(104) >> uint(2);
uint drawinfo_base = _93.Load(68) >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_93.Load(44) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.Load(48) >> uint(2);
float4 mat;
float2 translate;
float2 p0;
float2 p1;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_8 = m;
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
_285.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix);
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix);
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset);
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset);
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0);
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_93.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 8));
uint fill_mode = uint(linewidth >= 0.0f);
if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u))
{
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_93.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_285.Load(t * 4 + 8), _285.Load((t + 1u) * 4 + 8), _285.Load((t + 2u) * 4 + 8), _285.Load((t + 3u) * 4 + 8)));
if ((tag_word == 276u) || (tag_word == 732u))
{
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 8), _285.Load((t + 5u) * 4 + 8)));
}
}
if (linewidth >= 0.0f)
{
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
}
switch (tag_word)
{
case 68u:
case 72u:
{
_285.Store(di * 4 + 8, asuint(linewidth));
break;
}
case 276u:
{
_285.Store(di * 4 + 8, asuint(linewidth));
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 dxy = p1 - p0;
float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_285.Store((di + 1u) * 4 + 8, asuint(line_x));
_285.Store((di + 2u) * 4 + 8, asuint(line_y));
_285.Store((di + 3u) * 4 + 8, asuint(line_c));
break;
}
case 732u:
{
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
float r0 = asfloat(_103.Load((dd + 5u) * 4 + 0));
float r1 = asfloat(_103.Load((dd + 6u) * 4 + 0));
float inv_det = 1.0f / ((mat.x * mat.w) - (mat.y * mat.z));
float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det;
float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y);
inv_tr += p0;
float2 center1 = p1 - p0;
float rr = r1 / (r1 - r0);
float rainv = rr / ((r1 * r1) - dot(center1, center1));
float2 c1 = center1 * rainv;
float ra = rr * rainv;
float roff = rr - 1.0f;
_285.Store(di * 4 + 8, asuint(linewidth));
_285.Store((di + 1u) * 4 + 8, asuint(inv_mat.x));
_285.Store((di + 2u) * 4 + 8, asuint(inv_mat.y));
_285.Store((di + 3u) * 4 + 8, asuint(inv_mat.z));
_285.Store((di + 4u) * 4 + 8, asuint(inv_mat.w));
_285.Store((di + 5u) * 4 + 8, asuint(inv_tr.x));
_285.Store((di + 6u) * 4 + 8, asuint(inv_tr.y));
_285.Store((di + 7u) * 4 + 8, asuint(c1.x));
_285.Store((di + 8u) * 4 + 8, asuint(c1.y));
_285.Store((di + 9u) * 4 + 8, asuint(ra));
_285.Store((di + 10u) * 4 + 8, asuint(roff));
break;
}
case 5u:
{
break;
}
}
}
if ((tag_word == 5u) || (tag_word == 37u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 5u)
{
path_ix = m.path_ix;
}
_285.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,316 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct ParentBuf
{
DrawMonoid_1 parent[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
uint has_path = uint(tag_word != 0u);
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
}
static inline __attribute__((always_inline))
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid draw_monoid_identity()
{
return DrawMonoid{ 0u, 0u, 0u, 0u };
}
kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _93.conf.drawtag_offset >> uint(2);
uint tag_word = _103.scene[drawtag_base + ix];
uint param = tag_word;
DrawMonoid agg = map_tag(param);
spvUnsafeArray<DrawMonoid, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
tag_word = _103.scene[(drawtag_base + ix) + i];
uint param_1 = tag_word;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_4 = other;
DrawMonoid param_5 = agg;
agg = combine_draw_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _206 = gl_WorkGroupID.x - 1u;
row.path_ix = _203.parent[_206].path_ix;
row.clip_ix = _203.parent[_206].clip_ix;
row.scene_offset = _203.parent[_206].scene_offset;
row.info_offset = _203.parent[_206].info_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
DrawMonoid param_6 = row;
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _93.conf.drawdata_offset >> uint(2);
uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2);
float4 mat;
float2 translate;
float2 p0;
float2 p1;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid m = row;
if (i_2 > 0u)
{
DrawMonoid param_8 = m;
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
_285.memory[out_base + (i_2 * 4u)] = m.path_ix;
_285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix;
_285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset;
_285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset;
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _103.scene[(drawtag_base + ix) + i_2];
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.memory[bbox_offset]) - 32768.0;
float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0;
float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0;
float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = as_type<float>(_285.memory[bbox_offset + 4u]);
uint fill_mode = uint(linewidth >= 0.0);
if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u))
{
uint trans_ix = _285.memory[bbox_offset + 5u];
uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
mat = as_type<float4>(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u]));
if ((tag_word == 276u) || (tag_word == 732u))
{
translate = as_type<float2>(uint2(_285.memory[t + 4u], _285.memory[t + 5u]));
}
}
if (linewidth >= 0.0)
{
linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z)));
}
switch (tag_word)
{
case 68u:
case 72u:
{
_285.memory[di] = as_type<uint>(linewidth);
break;
}
case 276u:
{
_285.memory[di] = as_type<uint>(linewidth);
p0 = as_type<float2>(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u]));
p1 = as_type<float2>(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u]));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate;
float2 dxy = p1 - p0;
float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y));
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_285.memory[di + 1u] = as_type<uint>(line_x);
_285.memory[di + 2u] = as_type<uint>(line_y);
_285.memory[di + 3u] = as_type<uint>(line_c);
break;
}
case 732u:
{
p0 = as_type<float2>(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u]));
p1 = as_type<float2>(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u]));
float r0 = as_type<float>(_103.scene[dd + 5u]);
float r1 = as_type<float>(_103.scene[dd + 6u]);
float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z));
float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det;
float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y);
inv_tr += p0;
float2 center1 = p1 - p0;
float rr = r1 / (r1 - r0);
float rainv = rr / ((r1 * r1) - dot(center1, center1));
float2 c1 = center1 * rainv;
float ra = rr * rainv;
float roff = rr - 1.0;
_285.memory[di] = as_type<uint>(linewidth);
_285.memory[di + 1u] = as_type<uint>(inv_mat.x);
_285.memory[di + 2u] = as_type<uint>(inv_mat.y);
_285.memory[di + 3u] = as_type<uint>(inv_mat.z);
_285.memory[di + 4u] = as_type<uint>(inv_mat.w);
_285.memory[di + 5u] = as_type<uint>(inv_tr.x);
_285.memory[di + 6u] = as_type<uint>(inv_tr.y);
_285.memory[di + 7u] = as_type<uint>(c1.x);
_285.memory[di + 8u] = as_type<uint>(c1.y);
_285.memory[di + 9u] = as_type<uint>(ra);
_285.memory[di + 10u] = as_type<uint>(roff);
break;
}
case 5u:
{
break;
}
}
}
if ((tag_word == 5u) || (tag_word == 37u))
{
uint path_ix = ~(out_ix + i_2);
if (tag_word == 5u)
{
path_ix = m.path_ix;
}
_285.memory[clip_out_base + m.clip_ix] = path_ix;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,126 +0,0 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _87 : register(t1, space0);
ByteAddressBuffer _97 : register(t2, space0);
RWByteAddressBuffer _188 : register(u3, space0);
RWByteAddressBuffer _206 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[256];
DrawMonoid map_tag(uint tag_word)
{
uint has_path = uint(tag_word != 0u);
DrawMonoid _70 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
return _70;
}
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _87.Load(100) >> uint(2);
uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
for (uint i = 1u; i < 8u; i++)
{
uint tag_word_1 = _97.Load(((drawtag_base + ix) + i) * 4 + 0);
uint param_1 = tag_word_1;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_4 = agg;
DrawMonoid param_5 = other;
agg = combine_draw_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_188.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix);
_188.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix);
_188.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset);
_188.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,140 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct OutBuf
{
DrawMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid map_tag(thread const uint& tag_word)
{
uint has_path = uint(tag_word != 0u);
return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u };
}
static inline __attribute__((always_inline))
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
kernel void main0(const device ConfigBuf& _87 [[buffer(1)]], const device SceneBuf& _97 [[buffer(2)]], device OutBuf& _188 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _87.conf.drawtag_offset >> uint(2);
uint tag_word = _97.scene[drawtag_base + ix];
uint param = tag_word;
DrawMonoid agg = map_tag(param);
for (uint i = 1u; i < 8u; i++)
{
uint tag_word_1 = _97.scene[(drawtag_base + ix) + i];
uint param_1 = tag_word_1;
DrawMonoid param_2 = agg;
DrawMonoid param_3 = map_tag(param_1);
agg = combine_draw_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
DrawMonoid param_4 = agg;
DrawMonoid param_5 = other;
agg = combine_draw_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_188.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_188.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
_188.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset;
_188.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,108 +0,0 @@
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const DrawMonoid _18 = { 0u, 0u, 0u, 0u };
RWByteAddressBuffer _71 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared DrawMonoid sh_scratch[256];
DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
DrawMonoid draw_monoid_identity()
{
return _18;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
DrawMonoid _75;
_75.path_ix = _71.Load(ix * 16 + 0);
_75.clip_ix = _71.Load(ix * 16 + 4);
_75.scene_offset = _71.Load(ix * 16 + 8);
_75.info_offset = _71.Load(ix * 16 + 12);
DrawMonoid local[8];
local[0].path_ix = _75.path_ix;
local[0].clip_ix = _75.clip_ix;
local[0].scene_offset = _75.scene_offset;
local[0].info_offset = _75.info_offset;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
DrawMonoid param = local[i - 1u];
DrawMonoid _106;
_106.path_ix = _71.Load((ix + i) * 16 + 0);
_106.clip_ix = _71.Load((ix + i) * 16 + 4);
_106.scene_offset = _71.Load((ix + i) * 16 + 8);
_106.info_offset = _71.Load((ix + i) * 16 + 12);
param_1.path_ix = _106.path_ix;
param_1.clip_ix = _106.clip_ix;
param_1.scene_offset = _106.scene_offset;
param_1.info_offset = _106.info_offset;
local[i] = combine_draw_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_draw_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _199 = ix + i_2;
_71.Store(_199 * 16 + 0, m.path_ix);
_71.Store(_199 * 16 + 4, m.clip_ix);
_71.Store(_199 * 16 + 8, m.scene_offset);
_71.Store(_199 * 16 + 12, m.info_offset);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,140 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct DrawMonoid
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct DrawMonoid_1
{
uint path_ix;
uint clip_ix;
uint scene_offset;
uint info_offset;
};
struct DataBuf
{
DrawMonoid_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
{
DrawMonoid c;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
static inline __attribute__((always_inline))
DrawMonoid draw_monoid_identity()
{
return DrawMonoid{ 0u, 0u, 0u, 0u };
}
kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup DrawMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<DrawMonoid, 8> local;
local[0].path_ix = _71.data[ix].path_ix;
local[0].clip_ix = _71.data[ix].clip_ix;
local[0].scene_offset = _71.data[ix].scene_offset;
local[0].info_offset = _71.data[ix].info_offset;
DrawMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _100 = ix + i;
DrawMonoid param = local[i - 1u];
param_1.path_ix = _71.data[_100].path_ix;
param_1.clip_ix = _71.data[_100].clip_ix;
param_1.scene_offset = _71.data[_100].scene_offset;
param_1.info_offset = _71.data[_100].info_offset;
local[i] = combine_draw_monoid(param, param_1);
}
DrawMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
DrawMonoid param_2 = other;
DrawMonoid param_3 = agg;
agg = combine_draw_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
DrawMonoid row = draw_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
DrawMonoid param_4 = row;
DrawMonoid param_5 = local[i_2];
DrawMonoid m = combine_draw_monoid(param_4, param_5);
uint _199 = ix + i_2;
_71.data[_199].path_ix = m.path_ix;
_71.data[_199].clip_ix = m.clip_ix;
_71.data[_199].scene_offset = m.scene_offset;
_71.data[_199].info_offset = m.info_offset;
}
}

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -1,673 +0,0 @@
struct Alloc
{
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct PathSegTag
{
uint tag;
uint flags;
};
struct TileRef
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct TileSegRef
{
uint offset;
};
struct TileSeg
{
float2 origin;
float2 _vector;
float y_edge;
TileSegRef next;
};
struct SubdivResult
{
float val;
float a0;
float a2;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
static const PathSegTag _721 = { 0u, 0u };
RWByteAddressBuffer _136 : register(u0, space0);
ByteAddressBuffer _710 : register(t1, space0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _136.Load(offset * 4 + 8);
return v;
}
PathSegTag PathSeg_tag(Alloc a, PathSegRef ref)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
PathSegTag _367 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _367;
}
PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 6u;
uint raw6 = read_mem(param_12, param_13);
Alloc param_14 = a;
uint param_15 = ix + 7u;
uint raw7 = read_mem(param_14, param_15);
Alloc param_16 = a;
uint param_17 = ix + 8u;
uint raw8 = read_mem(param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 9u;
uint raw9 = read_mem(param_18, param_19);
Alloc param_20 = a;
uint param_21 = ix + 10u;
uint raw10 = read_mem(param_20, param_21);
Alloc param_22 = a;
uint param_23 = ix + 11u;
uint raw11 = read_mem(param_22, param_23);
PathCubic s;
s.p0 = float2(asfloat(raw0), asfloat(raw1));
s.p1 = float2(asfloat(raw2), asfloat(raw3));
s.p2 = float2(asfloat(raw4), asfloat(raw5));
s.p3 = float2(asfloat(raw6), asfloat(raw7));
s.path_ix = raw8;
s.trans_ix = raw9;
s.stroke = float2(asfloat(raw10), asfloat(raw11));
return s;
}
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref)
{
PathCubicRef _373 = { ref.offset + 4u };
Alloc param = a;
PathCubicRef param_1 = _373;
return PathCubic_read(param, param_1);
}
float2 eval_cubic(float2 p0, float2 p1, float2 p2, float2 p3, float t)
{
float mt = 1.0f - t;
return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0f)) + (((p2 * (mt * 3.0f)) + (p3 * t)) * t)) * t);
}
float approx_parabola_integral(float x)
{
return x * rsqrt(sqrt(0.3300000131130218505859375f + (0.201511204242706298828125f + ((0.25f * x) * x))));
}
SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol)
{
float2 d01 = p1 - p0;
float2 d12 = p2 - p1;
float2 dd = d01 - d12;
float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);
float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;
float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;
float scale = abs(_cross / (length(dd) * (x2 - x0)));
float param = x0;
float a0 = approx_parabola_integral(param);
float param_1 = x2;
float a2 = approx_parabola_integral(param_1);
float val = 0.0f;
if (scale < 1000000000.0f)
{
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if (sign(x0) == sign(x2))
{
val = da * sqrt_scale;
}
else
{
float xmin = sqrt_tol / sqrt_scale;
float param_2 = xmin;
val = (sqrt_tol * da) / approx_parabola_integral(param_2);
}
}
SubdivResult _695 = { val, a0, a2 };
return _695;
}
uint fill_mode_from_flags(uint flags)
{
return flags & 1u;
}
Path Path_read(Alloc a, PathRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _427 = { raw2 };
s.tiles = _427;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
float approx_parabola_inv_integral(float x)
{
return x * sqrt(0.61000001430511474609375f + (0.1520999968051910400390625f + ((0.25f * x) * x)));
}
float2 eval_quad(float2 p0, float2 p1, float2 p2, float t)
{
float mt = 1.0f - t;
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t);
}
MallocResult malloc(uint size)
{
uint _142;
_136.InterlockedAdd(0, size, _142);
uint offset = _142;
uint _149;
_136.GetDimensions(_149);
_149 = (_149 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_149) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _171;
_136.InterlockedMax(4, 1u, _171);
return r;
}
return r;
}
TileRef Tile_index(TileRef ref, uint index)
{
TileRef _385 = { ref.offset + (index * 8u) };
return _385;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_136.Store(offset * 4 + 8, val);
}
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.origin.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.origin.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s._vector.x);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s._vector.y);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.y_edge);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.next.offset;
write_mem(param_15, param_16, param_17);
}
void comp_main()
{
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef _718 = { _710.Load(28) + (element_ix * 52u) };
PathSegRef ref = _718;
PathSegTag tag = _721;
if (element_ix < _710.Load(4))
{
Alloc _731;
_731.offset = _710.Load(28);
Alloc param;
param.offset = _731.offset;
PathSegRef param_1 = ref;
tag = PathSeg_tag(param, param_1);
}
bool mem_ok = _136.Load(4) == 0u;
switch (tag.tag)
{
case 1u:
{
Alloc _748;
_748.offset = _710.Load(28);
Alloc param_2;
param_2.offset = _748.offset;
PathSegRef param_3 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u);
n_quads = min(n_quads, 16u);
float val = 0.0f;
float2 qp0 = cubic.p0;
float _step = 1.0f / float(n_quads);
SubdivResult keep_params[16];
for (uint i = 0u; i < n_quads; i++)
{
float t = float(i + 1u) * _step;
float2 param_4 = cubic.p0;
float2 param_5 = cubic.p1;
float2 param_6 = cubic.p2;
float2 param_7 = cubic.p3;
float param_8 = t;
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);
float2 param_9 = cubic.p0;
float2 param_10 = cubic.p1;
float2 param_11 = cubic.p2;
float2 param_12 = cubic.p3;
float param_13 = t - (0.5f * _step);
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);
qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f);
float2 param_14 = qp0;
float2 param_15 = qp1;
float2 param_16 = qp2;
float param_17 = 0.4743416607379913330078125f;
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u);
uint param_18 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_18) == 1u;
uint path_ix = cubic.path_ix;
PathRef _904 = { _710.Load(16) + (path_ix * 12u) };
Alloc _907;
_907.offset = _710.Load(16);
Alloc param_19;
param_19.offset = _907.offset;
PathRef param_20 = _904;
Path path = Path_read(param_19, param_20);
uint param_21 = path.tiles.offset;
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_23 = mem_ok;
Alloc path_alloc = new_alloc(param_21, param_22, param_23);
int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0;
qp0 = cubic.p0;
float v_step = val / float(n);
int n_out = 1;
float val_sum = 0.0f;
float2 p1;
float _1147;
TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{
float t_1 = float(i_1 + 1u) * _step;
float2 param_24 = cubic.p0;
float2 param_25 = cubic.p1;
float2 param_26 = cubic.p2;
float2 param_27 = cubic.p3;
float param_28 = t_1;
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);
float2 param_29 = cubic.p0;
float2 param_30 = cubic.p1;
float2 param_31 = cubic.p2;
float2 param_32 = cubic.p3;
float param_33 = t_1 - (0.5f * _step);
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);
qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f);
SubdivResult params_1 = keep_params[i_1];
float param_34 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_34);
float param_35 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_35);
float uscale = 1.0f / (u2 - u0);
float target = float(n_out) * v_step;
for (;;)
{
bool _1040 = uint(n_out) == n;
bool _1050;
if (!_1040)
{
_1050 = target < (val_sum + params_1.val);
}
else
{
_1050 = _1040;
}
if (_1050)
{
if (uint(n_out) == n)
{
p1 = cubic.p3;
}
else
{
float u = (target - val_sum) / params_1.val;
float a = lerp(params_1.a0, params_1.a2, u);
float param_36 = a;
float au = approx_parabola_inv_integral(param_36);
float t_2 = (au - u0) * uscale;
float2 param_37 = qp0;
float2 param_38 = qp1_1;
float2 param_39 = qp2_1;
float param_40 = t_2;
p1 = eval_quad(param_37, param_38, param_39, param_40);
}
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
float dx = p1.x - p0.x;
float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10f)
{
_1147 = 1000000000.0f;
}
else
{
_1147 = dx / dy;
}
float invslope = _1147;
float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f;
float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f;
int x0 = int(floor(xmin * 0.0625f));
int x1 = int(floor(xmax * 0.0625f) + 1.0f);
int y0 = int(floor(ymin * 0.0625f));
int y1 = int(floor(ymax * 0.0625f) + 1.0f);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float xc = a_1 + (b * float(y0));
int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint param_41 = n_tile_alloc * 24u;
MallocResult _1263 = malloc(param_41);
MallocResult tile_alloc = _1263;
if (tile_alloc.failed || (!mem_ok))
{
return;
}
uint tile_offset = tile_alloc.alloc.offset;
int xray = int(floor(p0.x * 0.0625f));
int last_xray = int(floor(p1.x * 0.0625f));
if (p0.y > p1.y)
{
int tmp = xray;
xray = last_xray;
last_xray = tmp;
}
for (int y = y0; y < y1; y++)
{
float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x);
bool _1319 = !is_stroke;
bool _1329;
if (_1319)
{
_1329 = min(p0.y, p1.y) < tile_y0;
}
else
{
_1329 = _1319;
}
bool _1336;
if (_1329)
{
_1336 = xbackdrop < bbox.z;
}
else
{
_1336 = _1329;
}
if (_1336)
{
int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_42 = path.tiles;
uint param_43 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_42, param_43);
uint tile_el = tile_ref.offset >> uint(2);
Alloc param_44 = path_alloc;
uint param_45 = tile_el + 1u;
if (touch_mem(param_44, param_45))
{
uint _1374;
_136.InterlockedAdd((tile_el + 1u) * 4 + 8, uint(backdrop), _1374);
}
}
int next_xray = last_xray;
if (y < (y1 - 1))
{
float tile_y1 = float((y + 1) * 16);
float x_edge = lerp(p0.x, p1.x, (tile_y1 - p0.y) / dy);
next_xray = int(floor(x_edge * 0.0625f));
}
int min_xray = min(xray, next_xray);
int max_xray = max(xray, next_xray);
int xx0 = min(int(floor(xc - c)), min_xray);
int xx1 = max(int(ceil(xc + c)), (max_xray + 1));
xx0 = clamp(xx0, x0, x1);
xx1 = clamp(xx1, x0, x1);
for (int x = xx0; x < xx1; x++)
{
float tile_x0 = float(x * 16);
TileRef _1454 = { path.tiles.offset };
TileRef param_46 = _1454;
uint param_47 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_46, param_47);
uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u;
Alloc param_48 = path_alloc;
uint param_49 = tile_el_1;
if (touch_mem(param_48, param_49))
{
uint _1477;
_136.InterlockedExchange(tile_el_1 * 4 + 8, tile_offset, _1477);
old = _1477;
}
tile_seg.origin = p0;
tile_seg._vector = p1 - p0;
float y_edge = 0.0f;
if (!is_stroke)
{
y_edge = lerp(p0.y, p1.y, (tile_x0 - p0.x) / dx);
if (min(p0.x, p1.x) < tile_x0)
{
float2 p = float2(tile_x0, y_edge);
if (p0.x > p1.x)
{
tile_seg._vector = p - p0;
}
else
{
tile_seg.origin = p;
tile_seg._vector = p1 - p;
}
if (tile_seg._vector.x == 0.0f)
{
tile_seg._vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10f;
}
}
if ((x <= min_xray) || (max_xray < x))
{
y_edge = 1000000000.0f;
}
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
TileSegRef _1559 = { tile_offset };
Alloc param_50 = tile_alloc.alloc;
TileSegRef param_51 = _1559;
TileSeg param_52 = tile_seg;
TileSeg_write(param_50, param_51, param_52);
tile_offset += 24u;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out++;
target += v_step;
p0 = p1;
continue;
}
else
{
break;
}
}
val_sum += params_1.val;
qp0 = qp2_1;
}
break;
}
}
}
[numthreads(32, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,717 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Alloc
{
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct PathSegTag
{
uint tag;
uint flags;
};
struct TileRef
{
uint offset;
};
struct PathRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct TileSegRef
{
uint offset;
};
struct TileSeg
{
float2 origin;
float2 vector;
float y_edge;
TileSegRef next;
};
struct SubdivResult
{
float val;
float a0;
float a2;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u);
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_136, constant uint& v_136BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_136.memory[offset];
return v;
}
static inline __attribute__((always_inline))
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_136, v_136BufferSize);
return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_136, v_136BufferSize);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_136, v_136BufferSize);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11, v_136, v_136BufferSize);
Alloc param_12 = a;
uint param_13 = ix + 6u;
uint raw6 = read_mem(param_12, param_13, v_136, v_136BufferSize);
Alloc param_14 = a;
uint param_15 = ix + 7u;
uint raw7 = read_mem(param_14, param_15, v_136, v_136BufferSize);
Alloc param_16 = a;
uint param_17 = ix + 8u;
uint raw8 = read_mem(param_16, param_17, v_136, v_136BufferSize);
Alloc param_18 = a;
uint param_19 = ix + 9u;
uint raw9 = read_mem(param_18, param_19, v_136, v_136BufferSize);
Alloc param_20 = a;
uint param_21 = ix + 10u;
uint raw10 = read_mem(param_20, param_21, v_136, v_136BufferSize);
Alloc param_22 = a;
uint param_23 = ix + 11u;
uint raw11 = read_mem(param_22, param_23, v_136, v_136BufferSize);
PathCubic s;
s.p0 = float2(as_type<float>(raw0), as_type<float>(raw1));
s.p1 = float2(as_type<float>(raw2), as_type<float>(raw3));
s.p2 = float2(as_type<float>(raw4), as_type<float>(raw5));
s.p3 = float2(as_type<float>(raw6), as_type<float>(raw7));
s.path_ix = raw8;
s.trans_ix = raw9;
s.stroke = float2(as_type<float>(raw10), as_type<float>(raw11));
return s;
}
static inline __attribute__((always_inline))
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
{
Alloc param = a;
PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u };
return PathCubic_read(param, param_1, v_136, v_136BufferSize);
}
static inline __attribute__((always_inline))
float2 eval_cubic(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float2& p3, thread const float& t)
{
float mt = 1.0 - t;
return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t);
}
static inline __attribute__((always_inline))
float approx_parabola_integral(thread const float& x)
{
return x * rsqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x))));
}
static inline __attribute__((always_inline))
SubdivResult estimate_subdiv(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& sqrt_tol)
{
float2 d01 = p1 - p0;
float2 d12 = p2 - p1;
float2 dd = d01 - d12;
float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x);
float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross;
float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross;
float scale = abs(_cross / (length(dd) * (x2 - x0)));
float param = x0;
float a0 = approx_parabola_integral(param);
float param_1 = x2;
float a2 = approx_parabola_integral(param_1);
float val = 0.0;
if (scale < 1000000000.0)
{
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if (sign(x0) == sign(x2))
{
val = da * sqrt_scale;
}
else
{
float xmin = sqrt_tol / sqrt_scale;
float param_2 = xmin;
val = (sqrt_tol * da) / approx_parabola_integral(param_2);
}
}
return SubdivResult{ val, a0, a2 };
}
static inline __attribute__((always_inline))
uint fill_mode_from_flags(thread const uint& flags)
{
return flags & 1u;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
return s;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
float approx_parabola_inv_integral(thread const float& x)
{
return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x)));
}
static inline __attribute__((always_inline))
float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& t)
{
float mt = 1.0 - t;
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t);
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_136, constant uint& v_136BufferSize)
{
uint _142 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.mem_offset, size, memory_order_relaxed);
uint offset = _142;
MallocResult r;
r.failed = (offset + size) > uint(int((v_136BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _171 = atomic_fetch_max_explicit((device atomic_uint*)&v_136.mem_error, 1u, memory_order_relaxed);
return r;
}
return r;
}
static inline __attribute__((always_inline))
TileRef Tile_index(thread const TileRef& ref, thread const uint& index)
{
return TileRef{ ref.offset + (index * 8u) };
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_136, constant uint& v_136BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_136.memory[offset] = val;
}
static inline __attribute__((always_inline))
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_136, constant uint& v_136BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.origin.x);
write_mem(param, param_1, param_2, v_136, v_136BufferSize);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.origin.y);
write_mem(param_3, param_4, param_5, v_136, v_136BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.vector.x);
write_mem(param_6, param_7, param_8, v_136, v_136BufferSize);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.vector.y);
write_mem(param_9, param_10, param_11, v_136, v_136BufferSize);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.y_edge);
write_mem(param_12, param_13, param_14, v_136, v_136BufferSize);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.next.offset;
write_mem(param_15, param_16, param_17, v_136, v_136BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_136 [[buffer(0)]], const device ConfigBuf& _710 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
constant uint& v_136BufferSize = spvBufferSizeConstants[0];
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef{ _710.conf.pathseg_alloc.offset + (element_ix * 52u) };
PathSegTag tag = PathSegTag{ 0u, 0u };
if (element_ix < _710.conf.n_pathseg)
{
Alloc param;
param.offset = _710.conf.pathseg_alloc.offset;
PathSegRef param_1 = ref;
tag = PathSeg_tag(param, param_1, v_136, v_136BufferSize);
}
bool mem_ok = v_136.mem_error == 0u;
switch (tag.tag)
{
case 1u:
{
Alloc param_2;
param_2.offset = _710.conf.pathseg_alloc.offset;
PathSegRef param_3 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3, v_136, v_136BufferSize);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);
n_quads = min(n_quads, 16u);
float val = 0.0;
float2 qp0 = cubic.p0;
float _step = 1.0 / float(n_quads);
spvUnsafeArray<SubdivResult, 16> keep_params;
for (uint i = 0u; i < n_quads; i++)
{
float t = float(i + 1u) * _step;
float2 param_4 = cubic.p0;
float2 param_5 = cubic.p1;
float2 param_6 = cubic.p2;
float2 param_7 = cubic.p3;
float param_8 = t;
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);
float2 param_9 = cubic.p0;
float2 param_10 = cubic.p1;
float2 param_11 = cubic.p2;
float2 param_12 = cubic.p3;
float param_13 = t - (0.5 * _step);
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);
qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);
float2 param_14 = qp0;
float2 param_15 = qp1;
float2 param_16 = qp2;
float param_17 = 0.4743416607379913330078125;
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);
uint param_18 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_18) == 1u;
uint path_ix = cubic.path_ix;
Alloc param_19;
param_19.offset = _710.conf.tile_alloc.offset;
PathRef param_20 = PathRef{ _710.conf.tile_alloc.offset + (path_ix * 12u) };
Path path = Path_read(param_19, param_20, v_136, v_136BufferSize);
uint param_21 = path.tiles.offset;
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_23 = mem_ok;
Alloc path_alloc = new_alloc(param_21, param_22, param_23);
int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0;
qp0 = cubic.p0;
float v_step = val / float(n);
int n_out = 1;
float val_sum = 0.0;
float2 p1;
float _1147;
TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{
float t_1 = float(i_1 + 1u) * _step;
float2 param_24 = cubic.p0;
float2 param_25 = cubic.p1;
float2 param_26 = cubic.p2;
float2 param_27 = cubic.p3;
float param_28 = t_1;
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);
float2 param_29 = cubic.p0;
float2 param_30 = cubic.p1;
float2 param_31 = cubic.p2;
float2 param_32 = cubic.p3;
float param_33 = t_1 - (0.5 * _step);
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);
qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);
SubdivResult params_1 = keep_params[i_1];
float param_34 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_34);
float param_35 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_35);
float uscale = 1.0 / (u2 - u0);
float target = float(n_out) * v_step;
for (;;)
{
bool _1040 = uint(n_out) == n;
bool _1050;
if (!_1040)
{
_1050 = target < (val_sum + params_1.val);
}
else
{
_1050 = _1040;
}
if (_1050)
{
if (uint(n_out) == n)
{
p1 = cubic.p3;
}
else
{
float u = (target - val_sum) / params_1.val;
float a = mix(params_1.a0, params_1.a2, u);
float param_36 = a;
float au = approx_parabola_inv_integral(param_36);
float t_2 = (au - u0) * uscale;
float2 param_37 = qp0;
float2 param_38 = qp1_1;
float2 param_39 = qp2_1;
float param_40 = t_2;
p1 = eval_quad(param_37, param_38, param_39, param_40);
}
float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x;
float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x;
float ymin = fast::min(p0.y, p1.y) - cubic.stroke.y;
float ymax = fast::max(p0.y, p1.y) + cubic.stroke.y;
float dx = p1.x - p0.x;
float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10)
{
_1147 = 1000000000.0;
}
else
{
_1147 = dx / dy;
}
float invslope = _1147;
float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625;
float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625;
int x0 = int(floor(xmin * 0.0625));
int x1 = int(floor(xmax * 0.0625) + 1.0);
int y0 = int(floor(ymin * 0.0625));
int y1 = int(floor(ymax * 0.0625) + 1.0);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float xc = a_1 + (b * float(y0));
int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint param_41 = n_tile_alloc * 24u;
MallocResult _1263 = malloc(param_41, v_136, v_136BufferSize);
MallocResult tile_alloc = _1263;
if (tile_alloc.failed || (!mem_ok))
{
return;
}
uint tile_offset = tile_alloc.alloc.offset;
int xray = int(floor(p0.x * 0.0625));
int last_xray = int(floor(p1.x * 0.0625));
if (p0.y > p1.y)
{
int tmp = xray;
xray = last_xray;
last_xray = tmp;
}
for (int y = y0; y < y1; y++)
{
float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x);
bool _1319 = !is_stroke;
bool _1329;
if (_1319)
{
_1329 = fast::min(p0.y, p1.y) < tile_y0;
}
else
{
_1329 = _1319;
}
bool _1336;
if (_1329)
{
_1336 = xbackdrop < bbox.z;
}
else
{
_1336 = _1329;
}
if (_1336)
{
int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_42 = path.tiles;
uint param_43 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_42, param_43);
uint tile_el = tile_ref.offset >> uint(2);
Alloc param_44 = path_alloc;
uint param_45 = tile_el + 1u;
if (touch_mem(param_44, param_45))
{
uint _1374 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
}
}
int next_xray = last_xray;
if (y < (y1 - 1))
{
float tile_y1 = float((y + 1) * 16);
float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
next_xray = int(floor(x_edge * 0.0625));
}
int min_xray = min(xray, next_xray);
int max_xray = max(xray, next_xray);
int xx0 = min(int(floor(xc - c)), min_xray);
int xx1 = max(int(ceil(xc + c)), (max_xray + 1));
xx0 = clamp(xx0, x0, x1);
xx1 = clamp(xx1, x0, x1);
for (int x = xx0; x < xx1; x++)
{
float tile_x0 = float(x * 16);
TileRef param_46 = TileRef{ path.tiles.offset };
uint param_47 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_46, param_47);
uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u;
Alloc param_48 = path_alloc;
uint param_49 = tile_el_1;
if (touch_mem(param_48, param_49))
{
uint _1477 = atomic_exchange_explicit((device atomic_uint*)&v_136.memory[tile_el_1], tile_offset, memory_order_relaxed);
old = _1477;
}
tile_seg.origin = p0;
tile_seg.vector = p1 - p0;
float y_edge = 0.0;
if (!is_stroke)
{
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
if (fast::min(p0.x, p1.x) < tile_x0)
{
float2 p = float2(tile_x0, y_edge);
if (p0.x > p1.x)
{
tile_seg.vector = p - p0;
}
else
{
tile_seg.origin = p;
tile_seg.vector = p1 - p;
}
if (tile_seg.vector.x == 0.0)
{
tile_seg.vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10;
}
}
if ((x <= min_xray) || (max_xray < x))
{
y_edge = 1000000000.0;
}
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
Alloc param_50 = tile_alloc.alloc;
TileSegRef param_51 = TileSegRef{ tile_offset };
TileSeg param_52 = tile_seg;
TileSeg_write(param_50, param_51, param_52, v_136, v_136BufferSize);
tile_offset += 24u;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out++;
target += v_step;
p0 = p1;
continue;
}
else
{
break;
}
}
val_sum += params_1.val;
qp0 = qp2_1;
}
break;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,661 +0,0 @@
struct Alloc
{
uint offset;
};
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct Monoid
{
float4 bbox;
uint flags;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const TagMonoid _135 = { 0u, 0u, 0u, 0u, 0u };
static const Monoid _567 = { 0.0f.xxxx, 0u };
RWByteAddressBuffer _111 : register(u0, space0);
ByteAddressBuffer _574 : register(t2, space0);
ByteAddressBuffer _639 : register(t1, space0);
ByteAddressBuffer _710 : register(t3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared TagMonoid sh_tag[256];
groupshared Monoid sh_scratch[256];
TagMonoid reduce_tag(uint tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u)));
c.path_ix = uint(int(countbits(tag_word & 269488144u)));
c.trans_ix = uint(int(countbits(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
TagMonoid tag_monoid_identity()
{
return _135;
}
float2 read_f32_point(uint ix)
{
float x = asfloat(_574.Load(ix * 4 + 0));
float y = asfloat(_574.Load((ix + 1u) * 4 + 0));
return float2(x, y);
}
float2 read_i16_point(uint ix)
{
uint raw = _574.Load(ix * 4 + 0);
float x = float(int(raw << uint(16)) >> 16);
float y = float(int(raw) >> 16);
return float2(x, y);
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
uint read_mem(Alloc alloc, uint offset)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = _111.Load(offset * 4 + 8);
return v;
}
TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11);
TransformSeg s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
return s;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_111.Store(offset * 4 + 8, val);
}
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.p0.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.p0.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.p1.x);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.p1.y);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.p2.x);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = asuint(s.p2.y);
write_mem(param_15, param_16, param_17);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = asuint(s.p3.x);
write_mem(param_18, param_19, param_20);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = asuint(s.p3.y);
write_mem(param_21, param_22, param_23);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = s.path_ix;
write_mem(param_24, param_25, param_26);
Alloc param_27 = a;
uint param_28 = ix + 9u;
uint param_29 = s.trans_ix;
write_mem(param_27, param_28, param_29);
Alloc param_30 = a;
uint param_31 = ix + 10u;
uint param_32 = asuint(s.stroke.x);
write_mem(param_30, param_31, param_32);
Alloc param_33 = a;
uint param_34 = ix + 11u;
uint param_35 = asuint(s.stroke.y);
write_mem(param_33, param_34, param_35);
}
void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2);
PathCubicRef _458 = { ref.offset + 4u };
Alloc param_3 = a;
PathCubicRef param_4 = _458;
PathCubic param_5 = s;
PathCubic_write(param_3, param_4, param_5);
}
Monoid combine_monoid(Monoid a, Monoid b)
{
Monoid c;
c.bbox = b.bbox;
bool _472 = (a.flags & 1u) == 0u;
bool _480;
if (_472)
{
_480 = b.bbox.z <= b.bbox.x;
}
else
{
_480 = _472;
}
bool _488;
if (_480)
{
_488 = b.bbox.w <= b.bbox.y;
}
else
{
_488 = _480;
}
if (_488)
{
c.bbox = a.bbox;
}
else
{
bool _498 = (a.flags & 1u) == 0u;
bool _505;
if (_498)
{
_505 = (b.flags & 2u) == 0u;
}
else
{
_505 = _498;
}
bool _522;
if (_505)
{
bool _512 = a.bbox.z > a.bbox.x;
bool _521;
if (!_512)
{
_521 = a.bbox.w > a.bbox.y;
}
else
{
_521 = _512;
}
_522 = _521;
}
else
{
_522 = _505;
}
if (_522)
{
float4 _529 = c.bbox;
float2 _531 = min(a.bbox.xy, _529.xy);
c.bbox.x = _531.x;
c.bbox.y = _531.y;
float4 _540 = c.bbox;
float2 _542 = max(a.bbox.zw, _540.zw);
c.bbox.z = _542.x;
c.bbox.w = _542.y;
}
}
c.flags = (a.flags & 2u) | b.flags;
c.flags |= ((a.flags & 1u) << uint(1));
return c;
}
Monoid monoid_identity()
{
return _567;
}
uint round_down(float x)
{
return uint(max(0.0f, floor(x) + 32768.0f));
}
uint round_up(float x)
{
return uint(min(65535.0f, ceil(x) + 32768.0f));
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i))
{
TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)];
TagMonoid param_1 = other;
TagMonoid param_2 = local_tm;
local_tm = combine_tag_monoid(param_1, param_2);
}
GroupMemoryBarrierWithGroupSync();
sh_tag[gl_LocalInvocationID.x] = local_tm;
}
GroupMemoryBarrierWithGroupSync();
TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
TagMonoid _716;
_716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0);
_716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4);
_716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8);
_716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12);
_716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16);
tm.trans_ix = _716.trans_ix;
tm.linewidth_ix = _716.linewidth_ix;
tm.pathseg_ix = _716.pathseg_ix;
tm.path_ix = _716.path_ix;
tm.pathseg_offset = _716.pathseg_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
TagMonoid param_3 = tm;
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
TransformSegRef trans_ref = _771;
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _781;
float linewidth[4];
uint save_trans_ix[4];
float2 p0;
float2 p1;
float2 p2;
float2 p3;
Alloc param_13;
Monoid local[4];
PathCubic cubic;
Alloc param_15;
for (uint i_1 = 0u; i_1 < 4u; i_1++)
{
linewidth[i_1] = asfloat(_574.Load(lw_ix * 4 + 0));
save_trans_ix[i_1] = trans_ix;
uint tag_byte = tag_word >> (i_1 * 8u);
uint seg_type = tag_byte & 3u;
if (seg_type != 0u)
{
if ((tag_byte & 8u) != 0u)
{
uint param_5 = ps_ix;
p0 = read_f32_point(param_5);
uint param_6 = ps_ix + 2u;
p1 = read_f32_point(param_6);
if (seg_type >= 2u)
{
uint param_7 = ps_ix + 4u;
p2 = read_f32_point(param_7);
if (seg_type == 3u)
{
uint param_8 = ps_ix + 6u;
p3 = read_f32_point(param_8);
}
}
}
else
{
uint param_9 = ps_ix;
p0 = read_i16_point(param_9);
uint param_10 = ps_ix + 1u;
p1 = read_i16_point(param_10);
if (seg_type >= 2u)
{
uint param_11 = ps_ix + 2u;
p2 = read_i16_point(param_11);
if (seg_type == 3u)
{
uint param_12 = ps_ix + 3u;
p3 = read_i16_point(param_12);
}
}
}
Alloc _877;
_877.offset = _639.Load(36);
param_13.offset = _877.offset;
TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate;
float4 bbox = float4(min(p0, p1), max(p0, p1));
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _947 = bbox;
float2 _950 = min(_947.xy, p2);
bbox.x = _950.x;
bbox.y = _950.y;
float4 _955 = bbox;
float2 _958 = max(_955.zw, p2);
bbox.z = _958.x;
bbox.w = _958.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _983 = bbox;
float2 _986 = min(_983.xy, p3);
bbox.x = _986.x;
bbox.y = _986.y;
float4 _991 = bbox;
float2 _994 = max(_991.zw, p3);
bbox.z = _994.x;
bbox.w = _994.y;
}
else
{
p3 = p2;
p2 = lerp(p1, p2, 0.3333333432674407958984375f.xx);
p1 = lerp(p1, p0, 0.3333333432674407958984375f.xx);
}
}
else
{
p3 = p1;
p2 = lerp(p3, p0, 0.3333333432674407958984375f.xx);
p1 = lerp(p0, p3, 0.3333333432674407958984375f.xx);
}
float2 stroke = 0.0f.xx;
if (linewidth[i_1] >= 0.0f)
{
stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth[i_1]);
bbox += float4(-stroke, stroke);
}
local[i_1].bbox = bbox;
local[i_1].flags = 0u;
cubic.p0 = p0;
cubic.p1 = p1;
cubic.p2 = p2;
cubic.p3 = p3;
cubic.path_ix = tm.path_ix;
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
Alloc _1089;
_1089.offset = _639.Load(28);
param_15.offset = _1089.offset;
PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode;
PathCubic param_18 = cubic;
PathSeg_Cubic_write(param_15, param_16, param_17, param_18);
ps_ref.offset += 52u;
uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u);
uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u));
ps_ix += n_words;
}
else
{
local[i_1].bbox = 0.0f.xxxx;
uint is_path = (tag_byte >> uint(4)) & 1u;
local[i_1].flags = is_path;
tm.path_ix += is_path;
trans_ix += ((tag_byte >> uint(5)) & 1u);
trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u);
lw_ix += ((tag_byte >> uint(6)) & 1u);
}
}
Monoid agg = local[0];
for (uint i_2 = 1u; i_2 < 4u; i_2++)
{
Monoid param_19 = agg;
Monoid param_20 = local[i_2];
agg = combine_monoid(param_19, param_20);
local[i_2] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_3 = 0u; i_3 < 8u; i_3++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_3))
{
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)];
Monoid param_21 = other_1;
Monoid param_22 = agg;
agg = combine_monoid(param_21, param_22);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_4 = 0u; i_4 < 4u; i_4++)
{
Monoid param_23 = row;
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1264 = i_4 == 3u;
bool _1270;
if (_1264)
{
_1270 = gl_LocalInvocationID.x == 255u;
}
else
{
_1270 = _1264;
}
if (_1270)
{
do_atomic = true;
}
if ((m.flags & 1u) != 0u)
{
_111.Store((bbox_out_ix + 4u) * 4 + 8, asuint(linewidth[i_4]));
_111.Store((bbox_out_ix + 5u) * 4 + 8, save_trans_ix[i_4]);
if ((m.flags & 2u) == 0u)
{
do_atomic = true;
}
else
{
float param_25 = m.bbox.x;
_111.Store(bbox_out_ix * 4 + 8, round_down(param_25));
float param_26 = m.bbox.y;
_111.Store((bbox_out_ix + 1u) * 4 + 8, round_down(param_26));
float param_27 = m.bbox.z;
_111.Store((bbox_out_ix + 2u) * 4 + 8, round_up(param_27));
float param_28 = m.bbox.w;
_111.Store((bbox_out_ix + 3u) * 4 + 8, round_up(param_28));
bbox_out_ix += 6u;
do_atomic = false;
}
}
if (do_atomic)
{
bool _1335 = m.bbox.z > m.bbox.x;
bool _1344;
if (!_1335)
{
_1344 = m.bbox.w > m.bbox.y;
}
else
{
_1344 = _1335;
}
if (_1344)
{
float param_29 = m.bbox.x;
uint _1353;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
float param_30 = m.bbox.y;
uint _1361;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
float param_31 = m.bbox.z;
uint _1369;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
float param_32 = m.bbox.w;
uint _1377;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
}
bbox_out_ix += 6u;
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,717 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Alloc
{
uint offset;
};
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct PathCubicRef
{
uint offset;
};
struct PathCubic
{
float2 p0;
float2 p1;
float2 p2;
float2 p3;
uint path_ix;
uint trans_ix;
float2 stroke;
};
struct PathSegRef
{
uint offset;
};
struct Monoid
{
float4 bbox;
uint flags;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct SceneBuf
{
uint scene[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct ParentBuf
{
TagMonoid_1 parent[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid reduce_tag(thread const uint& tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u)));
c.path_ix = uint(int(popcount(tag_word & 269488144u)));
c.trans_ix = uint(int(popcount(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
static inline __attribute__((always_inline))
TagMonoid tag_monoid_identity()
{
return TagMonoid{ 0u, 0u, 0u, 0u, 0u };
}
static inline __attribute__((always_inline))
float2 read_f32_point(thread const uint& ix, const device SceneBuf& v_574)
{
float x = as_type<float>(v_574.scene[ix]);
float y = as_type<float>(v_574.scene[ix + 1u]);
return float2(x, y);
}
static inline __attribute__((always_inline))
float2 read_i16_point(thread const uint& ix, const device SceneBuf& v_574)
{
uint raw = v_574.scene[ix];
float x = float(int(raw << uint(16)) >> 16);
float y = float(int(raw) >> 16);
return float2(x, y);
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_111)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return 0u;
}
uint v = v_111.memory[offset];
return v;
}
static inline __attribute__((always_inline))
TransformSeg TransformSeg_read(thread const Alloc& a, thread const TransformSegRef& ref, device Memory& v_111)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_111);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_111);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_111);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_111);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_111);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11, v_111);
TransformSeg s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
return s;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_111)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_111.memory[offset] = val;
}
static inline __attribute__((always_inline))
void PathCubic_write(thread const Alloc& a, thread const PathCubicRef& ref, thread const PathCubic& s, device Memory& v_111)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.p0.x);
write_mem(param, param_1, param_2, v_111);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.p0.y);
write_mem(param_3, param_4, param_5, v_111);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.p1.x);
write_mem(param_6, param_7, param_8, v_111);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.p1.y);
write_mem(param_9, param_10, param_11, v_111);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.p2.x);
write_mem(param_12, param_13, param_14, v_111);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = as_type<uint>(s.p2.y);
write_mem(param_15, param_16, param_17, v_111);
Alloc param_18 = a;
uint param_19 = ix + 6u;
uint param_20 = as_type<uint>(s.p3.x);
write_mem(param_18, param_19, param_20, v_111);
Alloc param_21 = a;
uint param_22 = ix + 7u;
uint param_23 = as_type<uint>(s.p3.y);
write_mem(param_21, param_22, param_23, v_111);
Alloc param_24 = a;
uint param_25 = ix + 8u;
uint param_26 = s.path_ix;
write_mem(param_24, param_25, param_26, v_111);
Alloc param_27 = a;
uint param_28 = ix + 9u;
uint param_29 = s.trans_ix;
write_mem(param_27, param_28, param_29, v_111);
Alloc param_30 = a;
uint param_31 = ix + 10u;
uint param_32 = as_type<uint>(s.stroke.x);
write_mem(param_30, param_31, param_32, v_111);
Alloc param_33 = a;
uint param_34 = ix + 11u;
uint param_35 = as_type<uint>(s.stroke.y);
write_mem(param_33, param_34, param_35, v_111);
}
static inline __attribute__((always_inline))
void PathSeg_Cubic_write(thread const Alloc& a, thread const PathSegRef& ref, thread const uint& flags, thread const PathCubic& s, device Memory& v_111)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint param_2 = (flags << uint(16)) | 1u;
write_mem(param, param_1, param_2, v_111);
Alloc param_3 = a;
PathCubicRef param_4 = PathCubicRef{ ref.offset + 4u };
PathCubic param_5 = s;
PathCubic_write(param_3, param_4, param_5, v_111);
}
static inline __attribute__((always_inline))
Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b)
{
Monoid c;
c.bbox = b.bbox;
bool _472 = (a.flags & 1u) == 0u;
bool _480;
if (_472)
{
_480 = b.bbox.z <= b.bbox.x;
}
else
{
_480 = _472;
}
bool _488;
if (_480)
{
_488 = b.bbox.w <= b.bbox.y;
}
else
{
_488 = _480;
}
if (_488)
{
c.bbox = a.bbox;
}
else
{
bool _498 = (a.flags & 1u) == 0u;
bool _505;
if (_498)
{
_505 = (b.flags & 2u) == 0u;
}
else
{
_505 = _498;
}
bool _522;
if (_505)
{
bool _512 = a.bbox.z > a.bbox.x;
bool _521;
if (!_512)
{
_521 = a.bbox.w > a.bbox.y;
}
else
{
_521 = _512;
}
_522 = _521;
}
else
{
_522 = _505;
}
if (_522)
{
float4 _529 = c.bbox;
float2 _531 = fast::min(a.bbox.xy, _529.xy);
c.bbox.x = _531.x;
c.bbox.y = _531.y;
float4 _540 = c.bbox;
float2 _542 = fast::max(a.bbox.zw, _540.zw);
c.bbox.z = _542.x;
c.bbox.w = _542.y;
}
}
c.flags = (a.flags & 2u) | b.flags;
c.flags |= ((a.flags & 1u) << uint(1));
return c;
}
static inline __attribute__((always_inline))
Monoid monoid_identity()
{
return Monoid{ float4(0.0), 0u };
}
static inline __attribute__((always_inline))
uint round_down(thread const float& x)
{
return uint(fast::max(0.0, floor(x) + 32768.0));
}
static inline __attribute__((always_inline))
uint round_up(thread const float& x)
{
return uint(fast::min(65535.0, ceil(x) + 32768.0));
}
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_tag[256];
threadgroup Monoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = v_574.scene[(_639.conf.pathtag_offset >> uint(2)) + (ix >> uint(2))];
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i))
{
TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)];
TagMonoid param_1 = other;
TagMonoid param_2 = local_tm;
local_tm = combine_tag_monoid(param_1, param_2);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_tag[gl_LocalInvocationID.x] = local_tm;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
TagMonoid tm = tag_monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _713 = gl_WorkGroupID.x - 1u;
tm.trans_ix = _710.parent[_713].trans_ix;
tm.linewidth_ix = _710.parent[_713].linewidth_ix;
tm.pathseg_ix = _710.parent[_713].pathseg_ix;
tm.path_ix = _710.parent[_713].path_ix;
tm.pathseg_offset = _710.parent[_713].pathseg_offset;
}
if (gl_LocalInvocationID.x > 0u)
{
TagMonoid param_3 = tm;
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.conf.pathseg_offset >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.conf.linewidth_offset >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef trans_ref = TransformSegRef{ _639.conf.trans_alloc.offset + (trans_ix * 24u) };
PathSegRef ps_ref = PathSegRef{ _639.conf.pathseg_alloc.offset + (tm.pathseg_ix * 52u) };
spvUnsafeArray<float, 4> linewidth;
spvUnsafeArray<uint, 4> save_trans_ix;
float2 p0;
float2 p1;
float2 p2;
float2 p3;
Alloc param_13;
spvUnsafeArray<Monoid, 4> local;
PathCubic cubic;
Alloc param_15;
for (uint i_1 = 0u; i_1 < 4u; i_1++)
{
linewidth[i_1] = as_type<float>(v_574.scene[lw_ix]);
save_trans_ix[i_1] = trans_ix;
uint tag_byte = tag_word >> (i_1 * 8u);
uint seg_type = tag_byte & 3u;
if (seg_type != 0u)
{
if ((tag_byte & 8u) != 0u)
{
uint param_5 = ps_ix;
p0 = read_f32_point(param_5, v_574);
uint param_6 = ps_ix + 2u;
p1 = read_f32_point(param_6, v_574);
if (seg_type >= 2u)
{
uint param_7 = ps_ix + 4u;
p2 = read_f32_point(param_7, v_574);
if (seg_type == 3u)
{
uint param_8 = ps_ix + 6u;
p3 = read_f32_point(param_8, v_574);
}
}
}
else
{
uint param_9 = ps_ix;
p0 = read_i16_point(param_9, v_574);
uint param_10 = ps_ix + 1u;
p1 = read_i16_point(param_10, v_574);
if (seg_type >= 2u)
{
uint param_11 = ps_ix + 2u;
p2 = read_i16_point(param_11, v_574);
if (seg_type == 3u)
{
uint param_12 = ps_ix + 3u;
p3 = read_i16_point(param_12, v_574);
}
}
}
param_13.offset = _639.conf.trans_alloc.offset;
TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14, v_111);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate;
float4 bbox = float4(fast::min(p0, p1), fast::max(p0, p1));
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _947 = bbox;
float2 _950 = fast::min(_947.xy, p2);
bbox.x = _950.x;
bbox.y = _950.y;
float4 _955 = bbox;
float2 _958 = fast::max(_955.zw, p2);
bbox.z = _958.x;
bbox.w = _958.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _983 = bbox;
float2 _986 = fast::min(_983.xy, p3);
bbox.x = _986.x;
bbox.y = _986.y;
float4 _991 = bbox;
float2 _994 = fast::max(_991.zw, p3);
bbox.z = _994.x;
bbox.w = _994.y;
}
else
{
p3 = p2;
p2 = mix(p1, p2, float2(0.3333333432674407958984375));
p1 = mix(p1, p0, float2(0.3333333432674407958984375));
}
}
else
{
p3 = p1;
p2 = mix(p3, p0, float2(0.3333333432674407958984375));
p1 = mix(p0, p3, float2(0.3333333432674407958984375));
}
float2 stroke = float2(0.0);
if (linewidth[i_1] >= 0.0)
{
stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5 * linewidth[i_1]);
bbox += float4(-stroke, stroke);
}
local[i_1].bbox = bbox;
local[i_1].flags = 0u;
cubic.p0 = p0;
cubic.p1 = p1;
cubic.p2 = p2;
cubic.p3 = p3;
cubic.path_ix = tm.path_ix;
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0);
param_15.offset = _639.conf.pathseg_alloc.offset;
PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode;
PathCubic param_18 = cubic;
PathSeg_Cubic_write(param_15, param_16, param_17, param_18, v_111);
ps_ref.offset += 52u;
uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u);
uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u));
ps_ix += n_words;
}
else
{
local[i_1].bbox = float4(0.0);
uint is_path = (tag_byte >> uint(4)) & 1u;
local[i_1].flags = is_path;
tm.path_ix += is_path;
trans_ix += ((tag_byte >> uint(5)) & 1u);
trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u);
lw_ix += ((tag_byte >> uint(6)) & 1u);
}
}
Monoid agg = local[0];
for (uint i_2 = 1u; i_2 < 4u; i_2++)
{
Monoid param_19 = agg;
Monoid param_20 = local[i_2];
agg = combine_monoid(param_19, param_20);
local[i_2] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_3 = 0u; i_3 < 8u; i_3++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_3))
{
Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)];
Monoid param_21 = other_1;
Monoid param_22 = agg;
agg = combine_monoid(param_21, param_22);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_4 = 0u; i_4 < 4u; i_4++)
{
Monoid param_23 = row;
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1264 = i_4 == 3u;
bool _1270;
if (_1264)
{
_1270 = gl_LocalInvocationID.x == 255u;
}
else
{
_1270 = _1264;
}
if (_1270)
{
do_atomic = true;
}
if ((m.flags & 1u) != 0u)
{
v_111.memory[bbox_out_ix + 4u] = as_type<uint>(linewidth[i_4]);
v_111.memory[bbox_out_ix + 5u] = save_trans_ix[i_4];
if ((m.flags & 2u) == 0u)
{
do_atomic = true;
}
else
{
float param_25 = m.bbox.x;
v_111.memory[bbox_out_ix] = round_down(param_25);
float param_26 = m.bbox.y;
v_111.memory[bbox_out_ix + 1u] = round_down(param_26);
float param_27 = m.bbox.z;
v_111.memory[bbox_out_ix + 2u] = round_up(param_27);
float param_28 = m.bbox.w;
v_111.memory[bbox_out_ix + 3u] = round_up(param_28);
bbox_out_ix += 6u;
do_atomic = false;
}
}
if (do_atomic)
{
bool _1335 = m.bbox.z > m.bbox.x;
bool _1344;
if (!_1335)
{
_1344 = m.bbox.w > m.bbox.y;
}
else
{
_1344 = _1335;
}
if (_1344)
{
float param_29 = m.bbox.x;
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y;
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z;
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w;
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
}
bbox_out_ix += 6u;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,138 +0,0 @@
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
ByteAddressBuffer _139 : register(t1, space0);
ByteAddressBuffer _151 : register(t2, space0);
RWByteAddressBuffer _238 : register(u3, space0);
RWByteAddressBuffer _258 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared TagMonoid sh_scratch[128];
TagMonoid reduce_tag(uint tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u)));
c.path_ix = uint(int(countbits(tag_word & 269488144u)));
c.trans_ix = uint(int(countbits(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(92) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _151.Load((scene_ix + i) * 4 + 0);
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
agg = combine_tag_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 7u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u)
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
TagMonoid param_4 = agg;
TagMonoid param_5 = other;
agg = combine_tag_monoid(param_4, param_5);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
_238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
_238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
_238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
_238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
}
}
[numthreads(128, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,154 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct OutBuf
{
TagMonoid_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(128u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid reduce_tag(thread const uint& tag_word)
{
uint point_count = tag_word & 50529027u;
TagMonoid c;
c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u)));
c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u)));
c.path_ix = uint(int(popcount(tag_word & 269488144u)));
c.trans_ix = uint(int(popcount(tag_word & 538976288u)));
uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u);
uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u));
a += (a >> uint(8));
a += (a >> uint(16));
c.pathseg_offset = a & 255u;
return c;
}
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup TagMonoid sh_scratch[128];
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
uint tag_word = _151.scene[scene_ix];
uint param = tag_word;
TagMonoid agg = reduce_tag(param);
for (uint i = 1u; i < 2u; i++)
{
tag_word = _151.scene[scene_ix + i];
uint param_1 = tag_word;
TagMonoid param_2 = agg;
TagMonoid param_3 = reduce_tag(param_1);
agg = combine_tag_monoid(param_2, param_3);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 7u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u)
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
TagMonoid param_4 = agg;
TagMonoid param_5 = other;
agg = combine_tag_monoid(param_4, param_5);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
_238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
_238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
_238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
_238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,115 +0,0 @@
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const TagMonoid _18 = { 0u, 0u, 0u, 0u, 0u };
RWByteAddressBuffer _78 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared TagMonoid sh_scratch[256];
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
TagMonoid tag_monoid_identity()
{
return _18;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TagMonoid _82;
_82.trans_ix = _78.Load(ix * 20 + 0);
_82.linewidth_ix = _78.Load(ix * 20 + 4);
_82.pathseg_ix = _78.Load(ix * 20 + 8);
_82.path_ix = _78.Load(ix * 20 + 12);
_82.pathseg_offset = _78.Load(ix * 20 + 16);
TagMonoid local[8];
local[0].trans_ix = _82.trans_ix;
local[0].linewidth_ix = _82.linewidth_ix;
local[0].pathseg_ix = _82.pathseg_ix;
local[0].path_ix = _82.path_ix;
local[0].pathseg_offset = _82.pathseg_offset;
TagMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
TagMonoid param = local[i - 1u];
TagMonoid _115;
_115.trans_ix = _78.Load((ix + i) * 20 + 0);
_115.linewidth_ix = _78.Load((ix + i) * 20 + 4);
_115.pathseg_ix = _78.Load((ix + i) * 20 + 8);
_115.path_ix = _78.Load((ix + i) * 20 + 12);
_115.pathseg_offset = _78.Load((ix + i) * 20 + 16);
param_1.trans_ix = _115.trans_ix;
param_1.linewidth_ix = _115.linewidth_ix;
param_1.pathseg_ix = _115.pathseg_ix;
param_1.path_ix = _115.path_ix;
param_1.pathseg_offset = _115.pathseg_offset;
local[i] = combine_tag_monoid(param, param_1);
}
TagMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
TagMonoid param_2 = other;
TagMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
TagMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
TagMonoid param_4 = row;
TagMonoid param_5 = local[i_2];
TagMonoid m = combine_tag_monoid(param_4, param_5);
uint _210 = ix + i_2;
_78.Store(_210 * 20 + 0, m.trans_ix);
_78.Store(_210 * 20 + 4, m.linewidth_ix);
_78.Store(_210 * 20 + 8, m.pathseg_ix);
_78.Store(_210 * 20 + 12, m.path_ix);
_78.Store(_210 * 20 + 16, m.pathseg_offset);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,146 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct TagMonoid
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct TagMonoid_1
{
uint trans_ix;
uint linewidth_ix;
uint pathseg_ix;
uint path_ix;
uint pathseg_offset;
};
struct DataBuf
{
TagMonoid_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
{
TagMonoid c;
c.trans_ix = a.trans_ix + b.trans_ix;
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.path_ix = a.path_ix + b.path_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
return c;
}
static inline __attribute__((always_inline))
TagMonoid tag_monoid_identity()
{
return TagMonoid{ 0u, 0u, 0u, 0u, 0u };
}
kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup TagMonoid sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<TagMonoid, 8> local;
local[0].trans_ix = _78.data[ix].trans_ix;
local[0].linewidth_ix = _78.data[ix].linewidth_ix;
local[0].pathseg_ix = _78.data[ix].pathseg_ix;
local[0].path_ix = _78.data[ix].path_ix;
local[0].pathseg_offset = _78.data[ix].pathseg_offset;
TagMonoid param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _109 = ix + i;
TagMonoid param = local[i - 1u];
param_1.trans_ix = _78.data[_109].trans_ix;
param_1.linewidth_ix = _78.data[_109].linewidth_ix;
param_1.pathseg_ix = _78.data[_109].pathseg_ix;
param_1.path_ix = _78.data[_109].path_ix;
param_1.pathseg_offset = _78.data[_109].pathseg_offset;
local[i] = combine_tag_monoid(param, param_1);
}
TagMonoid agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
TagMonoid param_2 = other;
TagMonoid param_3 = agg;
agg = combine_tag_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
TagMonoid row = tag_monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
TagMonoid param_4 = row;
TagMonoid param_5 = local[i_2];
TagMonoid m = combine_tag_monoid(param_4, param_5);
uint _210 = ix + i_2;
_78.data[_210].trans_ix = m.trans_ix;
_78.data[_210].linewidth_ix = m.linewidth_ix;
_78.data[_210].pathseg_ix = m.pathseg_ix;
_78.data[_210].path_ix = m.path_ix;
_78.data[_210].pathseg_offset = m.pathseg_offset;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,264 +0,0 @@
struct Alloc
{
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _70 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
ByteAddressBuffer _257 : register(t2, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared uint sh_tile_count[256];
groupshared MallocResult sh_tile_alloc;
float4 load_draw_bbox(uint draw_ix)
{
uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix);
float x0 = asfloat(_70.Load(base * 4 + 8));
float y0 = asfloat(_70.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_70.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_70.Load((base + 3u) * 4 + 8));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
MallocResult malloc(uint size)
{
uint _76;
_70.InterlockedAdd(0, size, _76);
uint offset = _76;
uint _83;
_70.GetDimensions(_83);
_83 = (_83 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_83) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _105;
_70.InterlockedMax(4, 1u, _105);
return r;
}
return r;
}
Alloc slice_mem(Alloc a, uint offset, uint size)
{
Alloc _131 = { a.offset + offset };
return _131;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_70.Store(offset * 4 + 8, val);
}
void Path_write(Alloc a, PathRef ref, Path s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8);
}
void comp_main()
{
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef _241 = { _181.Load(16) + (element_ix * 12u) };
PathRef path_ref = _241;
uint drawtag_base = _181.Load(100) >> uint(2);
uint drawtag = 0u;
if (element_ix < _181.Load(0))
{
drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0);
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u))
{
uint param = element_ix;
float4 bbox = load_draw_bbox(param);
x0 = int(floor(bbox.x * 0.0625f));
y0 = int(floor(bbox.y * 0.0625f));
x1 = int(ceil(bbox.z * 0.0625f));
y1 = int(ceil(bbox.w * 0.0625f));
}
x0 = clamp(x0, 0, int(_181.Load(8)));
y0 = clamp(y0, 0, int(_181.Load(12)));
x1 = clamp(x1, 0, int(_181.Load(8)));
y1 = clamp(y1, 0, int(_181.Load(12)));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
if (th_ix >= (1u << i))
{
total_tile_count += sh_tile_count[th_ix - (1u << i)];
}
GroupMemoryBarrierWithGroupSync();
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == 255u)
{
uint param_1 = total_tile_count * 8u;
MallocResult _392 = malloc(param_1);
sh_tile_alloc = _392;
}
GroupMemoryBarrierWithGroupSync();
MallocResult alloc_start = sh_tile_alloc;
bool _403;
if (!alloc_start.failed)
{
_403 = _70.Load(4) != 0u;
}
else
{
_403 = alloc_start.failed;
}
if (_403)
{
return;
}
if (element_ix < _181.Load(0))
{
uint _416;
if (th_ix > 0u)
{
_416 = sh_tile_count[th_ix - 1u];
}
else
{
_416 = 0u;
}
uint tile_subix = _416;
Alloc param_2 = alloc_start.alloc;
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
TileRef _438 = { tiles_alloc.offset };
path.tiles = _438;
Alloc _444;
_444.offset = _181.Load(16);
Alloc param_5;
param_5.offset = _444.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
Alloc param_8 = alloc_start.alloc;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,273 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Alloc
{
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathRef
{
uint offset;
};
struct TileRef
{
uint offset;
};
struct Path
{
uint4 bbox;
TileRef tiles;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct SceneBuf
{
uint scene[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181)
{
uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
float x0 = as_type<float>(v_70.memory[base]);
float y0 = as_type<float>(v_70.memory[base + 1u]);
float x1 = as_type<float>(v_70.memory[base + 2u]);
float y1 = as_type<float>(v_70.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize)
{
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed);
uint offset = _76;
MallocResult r;
r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{
uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed);
return r;
}
return r;
}
static inline __attribute__((always_inline))
Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size)
{
return Alloc{ a.offset + offset };
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_70.memory[offset] = val;
}
static inline __attribute__((always_inline))
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2, v_70, v_70BufferSize);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5, v_70, v_70BufferSize);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8, v_70, v_70BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup uint sh_tile_count[256];
threadgroup MallocResult sh_tile_alloc;
constant uint& v_70BufferSize = spvBufferSizeConstants[0];
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) };
uint drawtag_base = v_181.conf.drawtag_offset >> uint(2);
uint drawtag = 0u;
if (element_ix < v_181.conf.n_elements)
{
drawtag = _257.scene[drawtag_base + element_ix];
}
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u))
{
uint param = element_ix;
float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181);
x0 = int(floor(bbox.x * 0.0625));
y0 = int(floor(bbox.y * 0.0625));
x1 = int(ceil(bbox.z * 0.0625));
y1 = int(ceil(bbox.w * 0.0625));
}
x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles));
y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles));
x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles));
y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (th_ix >= (1u << i))
{
total_tile_count += sh_tile_count[th_ix - (1u << i)];
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == 255u)
{
uint param_1 = total_tile_count * 8u;
MallocResult _392 = malloc(param_1, v_70, v_70BufferSize);
sh_tile_alloc = _392;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
MallocResult alloc_start = sh_tile_alloc;
bool _403;
if (!alloc_start.failed)
{
_403 = v_70.mem_error != 0u;
}
else
{
_403 = alloc_start.failed;
}
if (_403)
{
return;
}
if (element_ix < v_181.conf.n_elements)
{
uint _416;
if (th_ix > 0u)
{
_416 = sh_tile_count[th_ix - 1u];
}
else
{
_416 = 0u;
}
uint tile_subix = _416;
Alloc param_2 = alloc_start.alloc;
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
path.tiles = TileRef{ tiles_alloc.offset };
Alloc param_5;
param_5.offset = v_181.conf.tile_alloc.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7, v_70, v_70BufferSize);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
Alloc param_8 = alloc_start.alloc;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10, v_70, v_70BufferSize);
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,234 +0,0 @@
struct Alloc
{
uint offset;
};
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
RWByteAddressBuffer _71 : register(u0, space0);
ByteAddressBuffer _96 : register(t2, space0);
ByteAddressBuffer _278 : register(t1, space0);
ByteAddressBuffer _376 : register(t3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Transform sh_scratch[256];
Transform Transform_read(TransformRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _96.Load((ix + 0u) * 4 + 0);
uint raw1 = _96.Load((ix + 1u) * 4 + 0);
uint raw2 = _96.Load((ix + 2u) * 4 + 0);
uint raw3 = _96.Load((ix + 3u) * 4 + 0);
uint raw4 = _96.Load((ix + 4u) * 4 + 0);
uint raw5 = _96.Load((ix + 5u) * 4 + 0);
Transform s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
return s;
}
TransformRef Transform_index(TransformRef ref, uint index)
{
TransformRef _85 = { ref.offset + (index * 24u) };
return _85;
}
Transform combine_monoid(Transform a, Transform b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
Transform monoid_identity()
{
return _224;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
}
void write_mem(Alloc alloc, uint offset, uint val)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
_71.Store(offset * 4 + 8, val);
}
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = asuint(s.mat.x);
write_mem(param, param_1, param_2);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = asuint(s.mat.y);
write_mem(param_3, param_4, param_5);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = asuint(s.mat.z);
write_mem(param_6, param_7, param_8);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = asuint(s.mat.w);
write_mem(param_9, param_10, param_11);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = asuint(s.translate.x);
write_mem(param_12, param_13, param_14);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = asuint(s.translate.y);
write_mem(param_15, param_16, param_17);
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(84) + (ix * 24u) };
TransformRef ref = _285;
TransformRef param = ref;
Transform agg = Transform_read(param);
Transform local[8];
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3);
agg = combine_monoid(param_4, param_5);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_6 = other;
Transform param_7 = agg;
agg = combine_monoid(param_6, param_7);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
Transform _382;
_382.mat = asfloat(_376.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
_382.translate = asfloat(_376.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
row.mat = _382.mat;
row.translate = _382.translate;
}
if (gl_LocalInvocationID.x > 0u)
{
Transform param_8 = row;
Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_monoid(param_8, param_9);
}
Alloc param_12;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_10 = row;
Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11);
TransformSeg _422 = { m.mat, m.translate };
TransformSeg transform = _422;
TransformSegRef _432 = { _278.Load(36) + ((ix + i_2) * 24u) };
TransformSegRef trans_ref = _432;
Alloc _436;
_436.offset = _278.Load(36);
param_12.offset = _436.offset;
TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,287 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Alloc
{
uint offset;
};
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct TransformSegRef
{
uint offset;
};
struct TransformSeg
{
float4 mat;
float2 translate;
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
struct SceneBuf
{
uint scene[1];
};
struct Alloc_1
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc_1 tile_alloc;
Alloc_1 bin_alloc;
Alloc_1 ptcl_alloc;
Alloc_1 pathseg_alloc;
Alloc_1 anno_alloc;
Alloc_1 trans_alloc;
Alloc_1 path_bbox_alloc;
Alloc_1 drawmonoid_alloc;
Alloc_1 clip_alloc;
Alloc_1 clip_bic_alloc;
Alloc_1 clip_stack_alloc;
Alloc_1 clip_bbox_alloc;
Alloc_1 draw_bbox_alloc;
Alloc_1 drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Transform_1
{
float4 mat;
float2 translate;
char _m0_final_padding[8];
};
struct ParentBuf
{
Transform_1 parent[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_96.scene[ix + 0u];
uint raw1 = v_96.scene[ix + 1u];
uint raw2 = v_96.scene[ix + 2u];
uint raw3 = v_96.scene[ix + 3u];
uint raw4 = v_96.scene[ix + 4u];
uint raw5 = v_96.scene[ix + 5u];
Transform s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
return s;
}
static inline __attribute__((always_inline))
TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index)
{
return TransformRef{ ref.offset + (index * 24u) };
}
static inline __attribute__((always_inline))
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
static inline __attribute__((always_inline))
Transform monoid_identity()
{
return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) };
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
return true;
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_71)
{
Alloc param = alloc;
uint param_1 = offset;
if (!touch_mem(param, param_1))
{
return;
}
v_71.memory[offset] = val;
}
static inline __attribute__((always_inline))
void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref, thread const TransformSeg& s, device Memory& v_71)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.mat.x);
write_mem(param, param_1, param_2, v_71);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.mat.y);
write_mem(param_3, param_4, param_5, v_71);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.mat.z);
write_mem(param_6, param_7, param_8, v_71);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.mat.w);
write_mem(param_9, param_10, param_11, v_71);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.translate.x);
write_mem(param_12, param_13, param_14, v_71);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = as_type<uint>(s.translate.y);
write_mem(param_15, param_16, param_17, v_71);
}
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _376 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) };
TransformRef param = ref;
Transform agg = Transform_read(param, v_96);
spvUnsafeArray<Transform, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3, v_96);
agg = combine_monoid(param_4, param_5);
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_6 = other;
Transform param_7 = agg;
agg = combine_monoid(param_6, param_7);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _379 = gl_WorkGroupID.x - 1u;
row.mat = _376.parent[_379].mat;
row.translate = _376.parent[_379].translate;
}
if (gl_LocalInvocationID.x > 0u)
{
Transform param_8 = row;
Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_monoid(param_8, param_9);
}
Alloc param_12;
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_10 = row;
Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11);
TransformSeg transform = TransformSeg{ m.mat, m.translate };
TransformSegRef trans_ref = TransformSegRef{ _278.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
param_12.offset = _278.conf.trans_alloc.offset;
TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14, v_71);
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,140 +0,0 @@
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _49 : register(t2, space0);
ByteAddressBuffer _161 : register(t1, space0);
RWByteAddressBuffer _250 : register(u3, space0);
RWByteAddressBuffer _266 : register(u0, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_WorkGroupID : SV_GroupID;
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Transform sh_scratch[256];
Transform Transform_read(TransformRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _49.Load((ix + 0u) * 4 + 0);
uint raw1 = _49.Load((ix + 1u) * 4 + 0);
uint raw2 = _49.Load((ix + 2u) * 4 + 0);
uint raw3 = _49.Load((ix + 3u) * 4 + 0);
uint raw4 = _49.Load((ix + 4u) * 4 + 0);
uint raw5 = _49.Load((ix + 5u) * 4 + 0);
Transform s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
return s;
}
TransformRef Transform_index(TransformRef ref, uint index)
{
TransformRef _37 = { ref.offset + (index * 24u) };
return _37;
}
Transform combine_monoid(Transform a, Transform b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(84) + (ix * 24u) };
TransformRef ref = _168;
TransformRef param = ref;
Transform agg = Transform_read(param);
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3);
agg = combine_monoid(param_4, param_5);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
Transform param_6 = agg;
Transform param_7 = other;
agg = combine_monoid(param_6, param_7);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_250.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat));
_250.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate));
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_WorkGroupID = stage_input.gl_WorkGroupID;
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,153 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct TransformRef
{
uint offset;
};
struct Transform
{
float4 mat;
float2 translate;
};
struct SceneBuf
{
uint scene[1];
};
struct Alloc
{
uint offset;
};
struct Config
{
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
Alloc path_bbox_alloc;
Alloc drawmonoid_alloc;
Alloc clip_alloc;
Alloc clip_bic_alloc;
Alloc clip_stack_alloc;
Alloc clip_bbox_alloc;
Alloc draw_bbox_alloc;
Alloc drawinfo_alloc;
uint n_trans;
uint n_path;
uint n_clip;
uint trans_offset;
uint linewidth_offset;
uint pathtag_offset;
uint pathseg_offset;
uint drawtag_offset;
uint drawdata_offset;
};
struct ConfigBuf
{
Config conf;
};
struct Transform_1
{
float4 mat;
float2 translate;
char _m0_final_padding[8];
};
struct OutBuf
{
Transform_1 outbuf[1];
};
struct Memory
{
uint mem_offset;
uint mem_error;
uint memory[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_49.scene[ix + 0u];
uint raw1 = v_49.scene[ix + 1u];
uint raw2 = v_49.scene[ix + 2u];
uint raw3 = v_49.scene[ix + 3u];
uint raw4 = v_49.scene[ix + 4u];
uint raw5 = v_49.scene[ix + 5u];
Transform s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
return s;
}
static inline __attribute__((always_inline))
TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index)
{
return TransformRef{ ref.offset + (index * 24u) };
}
static inline __attribute__((always_inline))
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _250 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) };
TransformRef param = ref;
Transform agg = Transform_read(param, v_49);
for (uint i = 1u; i < 8u; i++)
{
TransformRef param_1 = ref;
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3, v_49);
agg = combine_monoid(param_4, param_5);
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
{
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
Transform param_6 = agg;
Transform param_7 = other;
agg = combine_monoid(param_6, param_7);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
if (gl_LocalInvocationID.x == 0u)
{
_250.outbuf[gl_WorkGroupID.x].mat = agg.mat;
_250.outbuf[gl_WorkGroupID.x].translate = agg.translate;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,94 +0,0 @@
struct Transform
{
float4 mat;
float2 translate;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
RWByteAddressBuffer _89 : register(u0, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_LocalInvocationID : SV_GroupThreadID;
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
groupshared Transform sh_scratch[256];
Transform combine_monoid(Transform a, Transform b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
Transform monoid_identity()
{
return _23;
}
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
Transform _93;
_93.mat = asfloat(_89.Load4(ix * 32 + 0));
_93.translate = asfloat(_89.Load2(ix * 32 + 16));
Transform local[8];
local[0].mat = _93.mat;
local[0].translate = _93.translate;
Transform param_1;
for (uint i = 1u; i < 8u; i++)
{
Transform param = local[i - 1u];
Transform _119;
_119.mat = asfloat(_89.Load4((ix + i) * 32 + 0));
_119.translate = asfloat(_89.Load2((ix + i) * 32 + 16));
param_1.mat = _119.mat;
param_1.translate = _119.translate;
local[i] = combine_monoid(param, param_1);
}
Transform agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
GroupMemoryBarrierWithGroupSync();
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_2 = other;
Transform param_3 = agg;
agg = combine_monoid(param_2, param_3);
}
GroupMemoryBarrierWithGroupSync();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
GroupMemoryBarrierWithGroupSync();
Transform row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_4 = row;
Transform param_5 = local[i_2];
Transform m = combine_monoid(param_4, param_5);
uint _208 = ix + i_2;
_89.Store4(_208 * 32 + 0, asuint(m.mat));
_89.Store2(_208 * 32 + 16, asuint(m.translate));
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,129 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
struct Transform
{
float4 mat;
float2 translate;
};
struct Transform_1
{
float4 mat;
float2 translate;
char _m0_final_padding[8];
};
struct DataBuf
{
Transform_1 data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
{
Transform c;
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
return c;
}
static inline __attribute__((always_inline))
Transform monoid_identity()
{
return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) };
}
kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
spvUnsafeArray<Transform, 8> local;
local[0].mat = _89.data[ix].mat;
local[0].translate = _89.data[ix].translate;
Transform param_1;
for (uint i = 1u; i < 8u; i++)
{
uint _113 = ix + i;
Transform param = local[i - 1u];
param_1.mat = _89.data[_113].mat;
param_1.translate = _89.data[_113].translate;
local[i] = combine_monoid(param, param_1);
}
Transform agg = local[7];
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i_1 = 0u; i_1 < 8u; i_1++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
if (gl_LocalInvocationID.x >= (1u << i_1))
{
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
Transform param_2 = other;
Transform param_3 = agg;
agg = combine_monoid(param_2, param_3);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
sh_scratch[gl_LocalInvocationID.x] = agg;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
Transform row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
row = sh_scratch[gl_LocalInvocationID.x - 1u];
}
for (uint i_2 = 0u; i_2 < 8u; i_2++)
{
Transform param_4 = row;
Transform param_5 = local[i_2];
Transform m = combine_monoid(param_4, param_5);
uint _208 = ix + i_2;
_89.data[_208].mat = m.mat;
_89.data[_208].translate = m.translate;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,26 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
ByteAddressBuffer _19 : register(t0);
RWByteAddressBuffer _32 : register(u1);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _19.Load(0))
{
_32.Store(ix * 4 + 0, _19.Load(4));
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,27 +0,0 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct ConfigBuf
{
uint size;
uint value;
};
struct TargetBuf
{
uint data[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _19.size)
{
_32.data[ix] = _19.value;
}
}

Binary file not shown.

View file

@ -1,43 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
RWByteAddressBuffer _53 : register(u1);
ByteAddressBuffer _59 : register(t0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
uint collatz_iterations(inout uint n)
{
uint i = 0u;
while (n != 1u)
{
if ((n % 2u) == 0u)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
void comp_main()
{
uint index = gl_GlobalInvocationID.x;
uint param = _59.Load(index * 4 + 0);
uint _65 = collatz_iterations(param);
_53.Store(index * 4 + 0, _65);
}
[numthreads(1, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,46 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct OutBuf
{
uint out_buf[1];
};
struct InBuf
{
uint in_buf[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
static inline __attribute__((always_inline))
uint collatz_iterations(thread uint& n)
{
uint i = 0u;
while (n != 1u)
{
if ((n % 2u) == 0u)
{
n /= 2u;
}
else
{
n = (3u * n) + 1u;
}
i++;
}
return i;
}
kernel void main0(device OutBuf& _53 [[buffer(0)]], const device InBuf& _59 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint index = gl_GlobalInvocationID.x;
uint param = _59.in_buf[index];
uint _65 = collatz_iterations(param);
_53.out_buf[index] = _65;
}

Binary file not shown.

Binary file not shown.

View file

@ -1,39 +0,0 @@
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _56 : register(u0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
void comp_main()
{
uint rng = gl_GlobalInvocationID.x + 1u;
for (uint i = 0u; i < 100u; i++)
{
rng ^= (rng << uint(13));
rng ^= (rng >> uint(17));
rng ^= (rng << uint(5));
uint bucket = rng % 65536u;
if (bucket != 0u)
{
uint _61;
_56.InterlockedAdd(0, 2u, _61);
uint alloc = _61 + 65536u;
uint _67;
_56.InterlockedExchange(bucket * 4 + 0, alloc, _67);
uint old = _67;
_56.Store(alloc * 4 + 0, old);
_56.Store((alloc + 1u) * 4 + 0, gl_GlobalInvocationID.x);
}
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,36 +0,0 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct MemBuf
{
uint mem[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
kernel void main0(device MemBuf& _56 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint rng = gl_GlobalInvocationID.x + 1u;
for (uint i = 0u; i < 100u; i++)
{
rng ^= (rng << uint(13));
rng ^= (rng >> uint(17));
rng ^= (rng << uint(5));
uint bucket = rng % 65536u;
if (bucket != 0u)
{
uint _61 = atomic_fetch_add_explicit((device atomic_uint*)&_56.mem[0], 2u, memory_order_relaxed);
uint alloc = _61 + 65536u;
uint _67 = atomic_exchange_explicit((device atomic_uint*)&_56.mem[bucket], alloc, memory_order_relaxed);
uint old = _67;
_56.mem[alloc] = old;
_56.mem[alloc + 1u] = gl_GlobalInvocationID.x;
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -1,54 +0,0 @@
struct Element
{
uint data;
uint flag;
};
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer data_buf : register(u0);
RWByteAddressBuffer control_buf : register(u1);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
{
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
uint permute_flag_ix(uint data_ix)
{
return (data_ix * 419u) & 65535u;
}
void comp_main()
{
uint _76;
data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76);
DeviceMemoryBarrier();
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
uint _77;
data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
uint _58;
data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58);
uint flag = _58;
DeviceMemoryBarrier();
uint _62;
data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62);
uint data = _62;
if (flag > data)
{
uint _73;
control_buf.InterlockedAdd(0, 1u, _73);
}
}
[numthreads(256, 1, 1)]
void main(SPIRV_Cross_Input stage_input)
{
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
comp_main();
}

View file

@ -1,54 +0,0 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Element
{
uint data;
uint flag;
};
struct DataBuf
{
Element data[1];
};
struct ControlBuf
{
uint failures;
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
uint permute_flag_ix(thread const uint& data_ix)
{
return (data_ix * 419u) & 65535u;
}
kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed);
threadgroup_barrier(mem_flags::mem_device);
uint param = gl_GlobalInvocationID.x;
uint write_flag_ix = permute_flag_ix(param);
atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed);
uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u;
uint param_1 = read_ix;
uint read_flag_ix = permute_flag_ix(param_1);
uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed);
uint flag = _58;
threadgroup_barrier(mem_flags::mem_device);
uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed);
uint data = _62;
if (flag > data)
{
uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed);
}
}

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more