commit compiled shaders

This commit is contained in:
Commit by GitHub Action 2022-07-14 14:28:45 +00:00
parent d529d3b0e8
commit bbdd4432f5
68 changed files with 1994 additions and 2094 deletions

Binary file not shown.

View file

@ -21,6 +21,7 @@ struct Path
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -52,8 +53,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0);
RWByteAddressBuffer _59 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -69,6 +70,13 @@ groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256];
bool check_deps(uint dep_stage)
{
uint _65;
_59.InterlockedOr(4, 0u, _65);
return (_65 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
@ -82,7 +90,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _67.Load(offset * 4 + 8);
uint v = _59.Load(offset * 4 + 12);
return v;
}
@ -100,8 +108,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _134 = { raw2 };
s.tiles = _134;
TileRef _146 = { raw2 };
s.tiles = _146;
return s;
}
@ -120,47 +128,52 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_67.Store(offset * 4 + 8, val);
_59.Store(offset * 4 + 12, val);
}
void comp_main()
{
uint param = 7u;
bool _154 = check_deps(param);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.Load(0))
if (element_ix < _181.Load(4))
{
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param;
param.offset = _185.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1);
PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
PathRef path_ref = _195;
Alloc _200;
_200.offset = _181.Load(20);
Alloc param_1;
param_1.offset = _200.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_216 = path.bbox.y > 0u;
_231 = path.bbox.y > 0u;
}
else
{
_216 = _210;
_231 = _225;
}
if (_216)
if (_231)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
@ -168,17 +181,17 @@ void comp_main()
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_269 = th_ix >= (1u << i);
_283 = th_ix >= (1u << i);
}
else
{
_269 = _262;
_283 = _276;
}
if (_269)
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -190,7 +203,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _348;
uint _360;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
@ -203,32 +216,32 @@ void comp_main()
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
_360 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
_360 = 0u;
}
uint seq_ix = row - _348;
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12);
}
}
}

View file

@ -1,7 +1,9 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
@ -30,6 +32,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -40,6 +43,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -76,6 +80,13 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
{
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
return (_65 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
@ -83,7 +94,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
@ -91,23 +102,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{
return 0u;
}
uint v = v_67.memory[offset];
uint v = v_59.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_67);
uint raw0 = read_mem(param, param_1, v_59);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_67);
uint raw1 = read_mem(param_2, param_3, v_59);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_67);
uint raw2 = read_mem(param_4, param_5, v_59);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
@ -123,7 +134,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
@ -131,47 +142,52 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_67.memory[offset] = val;
v_59.memory[offset] = val;
}
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint param = 7u;
bool _154 = check_deps(param, v_59);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.conf.n_elements)
if (element_ix < _181.conf.n_elements)
{
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param;
param.offset = _166.conf.tile_alloc.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1, v_67);
PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_1;
param_1.offset = _181.conf.tile_alloc.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2, v_59);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_216 = path.bbox.y > 0u;
_231 = path.bbox.y > 0u;
}
else
{
_216 = _210;
_231 = _225;
}
if (_216)
if (_231)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
@ -179,17 +195,17 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_269 = th_ix >= (1u << i);
_283 = th_ix >= (1u << i);
}
else
{
_269 = _262;
_283 = _276;
}
if (_269)
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -201,7 +217,7 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _348;
uint _360;
for (uint row = th_ix; row < total_rows; row += 256u)
{
uint el_ix = 0u;
@ -214,32 +230,32 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
_360 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
_360 = 0u;
}
uint seq_ix = row - _348;
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7, v_59);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11, v_67);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9, v_59);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12, v_59);
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -21,6 +21,7 @@ struct Path
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -52,8 +53,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
RWByteAddressBuffer _67 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0);
RWByteAddressBuffer _59 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -69,6 +70,13 @@ groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256];
bool check_deps(uint dep_stage)
{
uint _65;
_59.InterlockedOr(4, 0u, _65);
return (_65 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
@ -82,7 +90,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _67.Load(offset * 4 + 8);
uint v = _59.Load(offset * 4 + 12);
return v;
}
@ -100,8 +108,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _134 = { raw2 };
s.tiles = _134;
TileRef _146 = { raw2 };
s.tiles = _146;
return s;
}
@ -120,47 +128,52 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_67.Store(offset * 4 + 8, val);
_59.Store(offset * 4 + 12, val);
}
void comp_main()
{
uint param = 7u;
bool _154 = check_deps(param);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.Load(0))
if (element_ix < _181.Load(4))
{
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
PathRef path_ref = _180;
Alloc _185;
_185.offset = _166.Load(16);
Alloc param;
param.offset = _185.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1);
PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
PathRef path_ref = _195;
Alloc _200;
_200.offset = _181.Load(20);
Alloc param_1;
param_1.offset = _200.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_216 = path.bbox.y > 0u;
_231 = path.bbox.y > 0u;
}
else
{
_216 = _210;
_231 = _225;
}
if (_216)
if (_231)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
@ -168,17 +181,17 @@ void comp_main()
for (uint i = 0u; i < 8u; i++)
{
GroupMemoryBarrierWithGroupSync();
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_269 = th_ix >= (1u << i);
_283 = th_ix >= (1u << i);
}
else
{
_269 = _262;
_283 = _276;
}
if (_269)
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -190,7 +203,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255];
uint _348;
uint _360;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
@ -203,32 +216,32 @@ void comp_main()
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
_360 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
_360 = 0u;
}
uint seq_ix = row - _348;
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12);
}
}
}

View file

@ -1,7 +1,9 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
@ -30,6 +32,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -40,6 +43,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -76,6 +80,13 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
{
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
return (_65 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
@ -83,7 +94,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
@ -91,23 +102,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{
return 0u;
}
uint v = v_67.memory[offset];
uint v = v_59.memory[offset];
return v;
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_67);
uint raw0 = read_mem(param, param_1, v_59);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_67);
uint raw1 = read_mem(param_2, param_3, v_59);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_67);
uint raw2 = read_mem(param_4, param_5, v_59);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
@ -123,7 +134,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
{
Alloc param = alloc;
uint param_1 = offset;
@ -131,47 +142,52 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_67.memory[offset] = val;
v_59.memory[offset] = val;
}
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256];
uint param = 7u;
bool _154 = check_deps(param, v_59);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u)
{
if (element_ix < _166.conf.n_elements)
if (element_ix < _181.conf.n_elements)
{
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param;
param.offset = _166.conf.tile_alloc.offset;
PathRef param_1 = path_ref;
Path path = Path_read(param, param_1, v_67);
PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param_1;
param_1.offset = _181.conf.tile_alloc.offset;
PathRef param_2 = path_ref;
Path path = Path_read(param_1, param_2, v_59);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u;
bool _216;
if (_210)
bool _225 = row_count == 1u;
bool _231;
if (_225)
{
_216 = path.bbox.y > 0u;
_231 = path.bbox.y > 0u;
}
else
{
_216 = _210;
_231 = _225;
}
if (_216)
if (_231)
{
row_count = 0u;
}
uint param_2 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok;
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
uint param_3 = path.tiles.offset;
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_5 = true;
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc;
}
sh_row_count[th_ix] = row_count;
@ -179,17 +195,17 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
for (uint i = 0u; i < 8u; i++)
{
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _262 = gl_LocalInvocationID.y == 0u;
bool _269;
if (_262)
bool _276 = gl_LocalInvocationID.y == 0u;
bool _283;
if (_276)
{
_269 = th_ix >= (1u << i);
_283 = th_ix >= (1u << i);
}
else
{
_269 = _262;
_283 = _276;
}
if (_269)
if (_283)
{
row_count += sh_row_count[th_ix - (1u << i)];
}
@ -201,7 +217,7 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255];
uint _348;
uint _360;
for (uint row = th_ix; row < total_rows; row += 1024u)
{
uint el_ix = 0u;
@ -214,32 +230,32 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
}
}
uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok)
if (width > 0u)
{
Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u)
{
_348 = sh_row_count[el_ix - 1u];
_360 = sh_row_count[el_ix - 1u];
}
else
{
_348 = 0u;
_360 = 0u;
}
uint seq_ix = row - _348;
uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc;
uint param_6 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67);
Alloc param_6 = tiles_alloc;
uint param_7 = tile_el_ix;
uint sum = read_mem(param_6, param_7, v_59);
for (uint x = 1u; x < width; x++)
{
tile_el_ix += 2u;
Alloc param_7 = tiles_alloc;
uint param_8 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67);
Alloc param_9 = tiles_alloc;
uint param_10 = tile_el_ix;
uint param_11 = sum;
write_mem(param_9, param_10, param_11, v_67);
Alloc param_8 = tiles_alloc;
uint param_9 = tile_el_ix;
sum += read_mem(param_8, param_9, v_59);
Alloc param_10 = tiles_alloc;
uint param_11 = tile_el_ix;
uint param_12 = sum;
write_mem(param_10, param_11, param_12, v_59);
}
}
}

Binary file not shown.

Binary file not shown.

View file

@ -5,6 +5,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -48,13 +49,13 @@ struct SPIRV_Cross_Input
void comp_main()
{
uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(76))
if (ix < _21.Load(80))
{
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u);
_45.Store((out_ix + 1u) * 4 + 8, 65535u);
_45.Store((out_ix + 2u) * 4 + 8, 0u);
_45.Store((out_ix + 3u) * 4 + 8, 0u);
uint out_ix = (_21.Load(44) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 12, 65535u);
_45.Store((out_ix + 1u) * 4 + 12, 65535u);
_45.Store((out_ix + 2u) * 4 + 12, 0u);
_45.Store((out_ix + 3u) * 4 + 12, 0u);
}
}

View file

@ -10,6 +10,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -48,6 +49,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};

Binary file not shown.

Binary file not shown.

View file

@ -3,22 +3,6 @@ struct Alloc
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct DrawMonoid
{
uint path_ix;
@ -29,6 +13,7 @@ struct DrawMonoid
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -60,8 +45,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _81 : register(u0, space0);
ByteAddressBuffer _156 : register(t1, space0);
RWByteAddressBuffer _57 : register(u0, space0);
ByteAddressBuffer _101 : register(t1, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -72,39 +57,38 @@ struct SPIRV_Cross_Input
};
groupshared uint bitmaps[8][256];
groupshared bool sh_alloc_failed;
groupshared uint count[8][256];
groupshared Alloc sh_chunk_alloc[256];
groupshared uint sh_chunk_offset[256];
DrawMonoid load_draw_monoid(uint element_ix)
{
uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix);
uint path_ix = _81.Load(base * 4 + 8);
uint clip_ix = _81.Load((base + 1u) * 4 + 8);
uint scene_offset = _81.Load((base + 2u) * 4 + 8);
uint info_offset = _81.Load((base + 3u) * 4 + 8);
DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset };
return _190;
uint base = (_101.Load(48) >> uint(2)) + (4u * element_ix);
uint path_ix = _57.Load(base * 4 + 12);
uint clip_ix = _57.Load((base + 1u) * 4 + 12);
uint scene_offset = _57.Load((base + 2u) * 4 + 12);
uint info_offset = _57.Load((base + 3u) * 4 + 12);
DrawMonoid _136 = { path_ix, clip_ix, scene_offset, info_offset };
return _136;
}
float4 load_clip_bbox(uint clip_ix)
{
uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_81.Load(base * 4 + 8));
float y0 = asfloat(_81.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_81.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_81.Load((base + 3u) * 4 + 8));
uint base = (_101.Load(64) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_57.Load(base * 4 + 12));
float y0 = asfloat(_57.Load((base + 1u) * 4 + 12));
float x1 = asfloat(_57.Load((base + 2u) * 4 + 12));
float y1 = asfloat(_57.Load((base + 3u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f;
uint base = (_101.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_57.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_57.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_57.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_57.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
@ -116,41 +100,25 @@ float4 bbox_intersect(float4 a, float4 b)
void store_draw_bbox(uint draw_ix, float4 bbox)
{
uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix);
_81.Store(base * 4 + 8, asuint(bbox.x));
_81.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_81.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_81.Store((base + 3u) * 4 + 8, asuint(bbox.w));
uint base = (_101.Load(68) >> uint(2)) + (4u * draw_ix);
_57.Store(base * 4 + 12, asuint(bbox.x));
_57.Store((base + 1u) * 4 + 12, asuint(bbox.y));
_57.Store((base + 2u) * 4 + 12, asuint(bbox.z));
_57.Store((base + 3u) * 4 + 12, asuint(bbox.w));
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
uint malloc_stage(uint size, uint mem_size, uint stage)
{
Alloc a;
a.offset = offset;
return a;
}
MallocResult malloc(uint size)
{
uint _87;
_81.InterlockedAdd(0, size, _87);
uint offset = _87;
uint _94;
_81.GetDimensions(_94);
_94 = (_94 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_94) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
uint _65;
_57.InterlockedAdd(0, size, _65);
uint offset = _65;
if ((offset + size) > mem_size)
{
uint _116;
_81.InterlockedMax(4, 1u, _116);
return r;
uint _76;
_57.InterlockedOr(4, stage, _76);
offset = 0u;
}
return r;
return offset;
}
bool touch_mem(Alloc alloc, uint offset)
@ -166,16 +134,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_81.Store(offset * 4 + 8, val);
}
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2);
_57.Store(offset * 4 + 12, val);
}
void comp_main()
@ -185,17 +144,12 @@ void comp_main()
{
bitmaps[i][gl_LocalInvocationID.x] = 0u;
}
if (gl_LocalInvocationID.x == 0u)
{
sh_alloc_failed = false;
}
GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if (element_ix < _156.Load(0))
if (element_ix < _101.Load(4))
{
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param);
@ -212,11 +166,11 @@ void comp_main()
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox;
float4 _419 = bbox;
float2 _421 = max(_417.xy, _419.zw);
bbox.z = _421.x;
bbox.w = _421.y;
float4 _354 = bbox;
float4 _356 = bbox;
float2 _358 = max(_354.xy, _356.zw);
bbox.z = _358.x;
bbox.w = _358.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6);
@ -225,8 +179,8 @@ void comp_main()
x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f));
}
uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u;
uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u;
uint width_in_bins = ((_101.Load(12) + 16u) - 1u) / 16u;
uint height_in_bins = ((_101.Load(16) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
@ -241,8 +195,8 @@ void comp_main()
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _523;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523);
uint _460;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _460);
x++;
if (x == x1)
{
@ -257,51 +211,32 @@ void comp_main()
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_7 = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
uint chunk_offset = 0u;
if (element_count != 0u)
{
uint param_10 = element_count * 4u;
MallocResult _573 = malloc(param_10);
MallocResult chunk = _573;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
{
sh_alloc_failed = true;
}
uint param_7 = element_count * 4u;
uint param_8 = _101.Load(0);
uint param_9 = 1u;
uint _510 = malloc_stage(param_7, param_8, param_9);
chunk_offset = _510;
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
}
uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _603;
_603.offset = _156.Load(20);
Alloc param_11;
param_11.offset = _603.offset;
uint param_12 = out_ix;
uint param_13 = element_count;
write_mem(param_11, param_12, param_13);
Alloc _615;
_615.offset = _156.Load(20);
Alloc param_14;
param_14.offset = _615.offset;
uint param_15 = out_ix + 1u;
uint param_16 = chunk_alloc.offset;
write_mem(param_14, param_15, param_16);
uint out_ix = (_101.Load(24) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _532;
_532.offset = _101.Load(24);
Alloc param_10;
param_10.offset = _532.offset;
uint param_11 = out_ix;
uint param_12 = element_count;
write_mem(param_10, param_11, param_12);
Alloc _544;
_544.offset = _101.Load(24);
Alloc param_13;
param_13.offset = _544.offset;
uint param_14 = out_ix + 1u;
uint param_15 = chunk_offset;
write_mem(param_13, param_14, param_15);
GroupMemoryBarrierWithGroupSync();
bool _630;
if (!sh_alloc_failed)
{
_630 = _81.Load(4) != 0u;
}
else
{
_630 = sh_alloc_failed;
}
if (_630)
{
return;
}
x = x0;
y = y0;
while (y < y1)
@ -315,14 +250,11 @@ void comp_main()
{
idx += count[my_slice - 1u][bin_ix];
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
BinInstanceRef _692 = { out_offset };
BinInstance _694 = { element_ix };
Alloc param_17 = out_alloc;
BinInstanceRef param_18 = _692;
BinInstance param_19 = _694;
BinInstance_write(param_17, param_18, param_19);
uint chunk_offset_1 = sh_chunk_offset[bin_ix];
if (chunk_offset_1 != 0u)
{
_57.Store(((chunk_offset_1 >> uint(2)) + idx) * 4 + 12, element_ix);
}
}
x++;
if (x == x1)

View file

@ -12,22 +12,6 @@ struct Alloc
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct DrawMonoid
{
uint path_ix;
@ -40,6 +24,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -50,6 +35,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -87,36 +73,36 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_81.memory[base];
uint clip_ix = v_81.memory[base + 1u];
uint scene_offset = v_81.memory[base + 2u];
uint info_offset = v_81.memory[base + 3u];
uint base = (v_101.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_57.memory[base];
uint clip_ix = v_57.memory[base + 1u];
uint scene_offset = v_57.memory[base + 2u];
uint info_offset = v_57.memory[base + 3u];
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
}
static inline __attribute__((always_inline))
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_81.memory[base]);
float y0 = as_type<float>(v_81.memory[base + 1u]);
float x1 = as_type<float>(v_81.memory[base + 2u]);
float y1 = as_type<float>(v_81.memory[base + 3u]);
uint base = (v_101.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_57.memory[base]);
float y0 = as_type<float>(v_57.memory[base + 1u]);
float x1 = as_type<float>(v_57.memory[base + 2u]);
float y1 = as_type<float>(v_57.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_81.memory[base]) - 32768.0;
float bbox_t = float(v_81.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_81.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_81.memory[base + 3u]) - 32768.0;
uint base = (v_101.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_57.memory[base]) - 32768.0;
float bbox_t = float(v_57.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_57.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_57.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
@ -128,40 +114,26 @@ float4 bbox_intersect(thread const float4& a, thread const float4& b)
}
static inline __attribute__((always_inline))
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_57, const device ConfigBuf& v_101)
{
uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_81.memory[base] = as_type<uint>(bbox.x);
v_81.memory[base + 1u] = as_type<uint>(bbox.y);
v_81.memory[base + 2u] = as_type<uint>(bbox.z);
v_81.memory[base + 3u] = as_type<uint>(bbox.w);
uint base = (v_101.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_57.memory[base] = as_type<uint>(bbox.x);
v_57.memory[base + 1u] = as_type<uint>(bbox.y);
v_57.memory[base + 2u] = as_type<uint>(bbox.z);
v_57.memory[base + 3u] = as_type<uint>(bbox.w);
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_57)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize)
{
uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed);
uint offset = _87;
MallocResult r;
r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
uint _65 = atomic_fetch_add_explicit((device atomic_uint*)&v_57.mem_offset, size, memory_order_relaxed);
uint offset = _65;
if ((offset + size) > mem_size)
{
uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed);
return r;
uint _76 = atomic_fetch_or_explicit((device atomic_uint*)&v_57.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return r;
return offset;
}
static inline __attribute__((always_inline))
@ -171,7 +143,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_57)
{
Alloc param = alloc;
uint param_1 = offset;
@ -179,73 +151,56 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_81.memory[offset] = val;
v_57.memory[offset] = val;
}
static inline __attribute__((always_inline))
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_81, v_81BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_57 [[buffer(0)]], const device ConfigBuf& v_101 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed;
threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256];
constant uint& v_81BufferSize = spvBufferSizeConstants[0];
threadgroup uint sh_chunk_offset[256];
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++)
{
bitmaps[i][gl_LocalInvocationID.x] = 0u;
}
if (gl_LocalInvocationID.x == 0u)
{
sh_alloc_failed = short(false);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0;
int y0 = 0;
int x1 = 0;
int y1 = 0;
if (element_ix < v_156.conf.n_elements)
if (element_ix < v_101.conf.n_elements)
{
uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156);
DrawMonoid draw_monoid = load_draw_monoid(param, v_57, v_101);
uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u)
{
uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156);
clip_bbox = load_clip_bbox(param_1, v_57, v_101);
}
uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156);
float4 path_bbox = load_path_bbox(param_2, v_57, v_101);
float4 param_3 = path_bbox;
float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox;
float4 _419 = bbox;
float2 _421 = fast::max(_417.xy, _419.zw);
bbox.z = _421.x;
bbox.w = _421.y;
float4 _354 = bbox;
float4 _356 = bbox;
float2 _358 = fast::max(_354.xy, _356.zw);
bbox.z = _358.x;
bbox.w = _358.y;
uint param_5 = element_ix;
float4 param_6 = bbox;
store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156);
store_draw_bbox(param_5, param_6, v_57, v_101);
x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625));
}
uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u;
uint width_in_bins = ((v_101.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_101.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
@ -260,7 +215,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1)
{
uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
uint _460 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++;
if (x == x1)
{
@ -275,47 +230,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count;
}
uint param_7 = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
uint chunk_offset = 0u;
if (element_count != 0u)
{
uint param_10 = element_count * 4u;
MallocResult _573 = malloc(param_10, v_81, v_81BufferSize);
MallocResult chunk = _573;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed)
{
sh_alloc_failed = short(true);
}
uint param_7 = element_count * 4u;
uint param_8 = v_101.conf.mem_size;
uint param_9 = 1u;
uint _510 = malloc_stage(param_7, param_8, param_9, v_57);
chunk_offset = _510;
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
}
uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_11;
param_11.offset = v_156.conf.bin_alloc.offset;
uint param_12 = out_ix;
uint param_13 = element_count;
write_mem(param_11, param_12, param_13, v_81, v_81BufferSize);
Alloc param_14;
param_14.offset = v_156.conf.bin_alloc.offset;
uint param_15 = out_ix + 1u;
uint param_16 = chunk_alloc.offset;
write_mem(param_14, param_15, param_16, v_81, v_81BufferSize);
uint out_ix = (v_101.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_10;
param_10.offset = v_101.conf.bin_alloc.offset;
uint param_11 = out_ix;
uint param_12 = element_count;
write_mem(param_10, param_11, param_12, v_57);
Alloc param_13;
param_13.offset = v_101.conf.bin_alloc.offset;
uint param_14 = out_ix + 1u;
uint param_15 = chunk_offset;
write_mem(param_13, param_14, param_15, v_57);
threadgroup_barrier(mem_flags::mem_threadgroup);
bool _630;
if (!bool(sh_alloc_failed))
{
_630 = v_81.mem_error != 0u;
}
else
{
_630 = bool(sh_alloc_failed);
}
if (_630)
{
return;
}
x = x0;
y = y0;
while (y < y1)
@ -329,12 +265,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
{
idx += count[my_slice - 1u][bin_ix];
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u);
Alloc param_17 = out_alloc;
BinInstanceRef param_18 = BinInstanceRef{ out_offset };
BinInstance param_19 = BinInstance{ element_ix };
BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize);
uint chunk_offset_1 = sh_chunk_offset[bin_ix];
if (chunk_offset_1 != 0u)
{
v_57.memory[(chunk_offset_1 >> uint(2)) + idx] = element_ix;
}
}
x++;
if (x == x1)

Binary file not shown.

Binary file not shown.

View file

@ -17,6 +17,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -48,7 +49,7 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _393 = { 0u, 0u };
static const Bic _394 = { 0u, 0u };
ByteAddressBuffer _80 : register(t1, space0);
RWByteAddressBuffer _96 : register(u0, space0);
@ -71,9 +72,9 @@ groupshared float4 sh_bbox[256];
Bic load_bic(uint ix)
{
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
return _286;
uint base = (_80.Load(56) >> uint(2)) + (2u * ix);
Bic _287 = { _96.Load(base * 4 + 12), _96.Load((base + 1u) * 4 + 12) };
return _287;
}
Bic bic_combine(Bic x, Bic y)
@ -85,15 +86,15 @@ Bic bic_combine(Bic x, Bic y)
ClipEl load_clip_el(uint ix)
{
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 8);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
uint base = (_80.Load(60) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 12);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 12));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 12));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 12));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1);
ClipEl _335 = { parent_ix, bbox };
return _335;
ClipEl _336 = { parent_ix, bbox };
return _336;
}
float4 bbox_intersect(float4 a, float4 b)
@ -103,9 +104,9 @@ float4 bbox_intersect(float4 a, float4 b)
uint load_path_ix(uint ix)
{
if (ix < _80.Load(80))
if (ix < _80.Load(84))
{
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
return _96.Load(((_80.Load(52) >> uint(2)) + ix) * 4 + 12);
}
else
{
@ -115,11 +116,11 @@ uint load_path_ix(uint ix)
float4 load_path_bbox(uint path_ix)
{
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
uint base = (_80.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
@ -173,17 +174,17 @@ uint search_link(inout Bic bic)
void store_clip_bbox(uint ix, float4 bbox)
{
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 8, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
uint base = (_80.Load(64) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 12, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 12, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 12, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 12, asuint(bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
Bic bic = _393;
Bic bic = _394;
if (th < gl_WorkGroupID.x)
{
uint param = th;
@ -240,8 +241,8 @@ void comp_main()
uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6);
bool is_push = int(inp) >= 0;
Bic _559 = { 1u - uint(is_push), uint(is_push) };
bic = _559;
Bic _560 = { 1u - uint(is_push), uint(is_push) };
bic = _560;
sh_bic[th] = bic;
if (is_push)
{
@ -266,11 +267,11 @@ void comp_main()
inbase = outbase;
}
GroupMemoryBarrierWithGroupSync();
bic = _393;
bic = _394;
Bic param_10 = bic;
uint _618 = search_link(param_10);
uint _619 = search_link(param_10);
bic = param_10;
uint link = _618;
uint link = _619;
sh_link[th] = link;
GroupMemoryBarrierWithGroupSync();
uint grandparent;
@ -324,22 +325,22 @@ void comp_main()
sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync();
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
bool _718 = !is_push;
bool _726;
if (_718)
{
_725 = gl_GlobalInvocationID.x < _80.Load(80);
_726 = gl_GlobalInvocationID.x < _80.Load(84);
}
else
{
_725 = _717;
_726 = _718;
}
if (_725)
if (_726)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
uint drawmonoid_out_base = (_80.Load(48) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 12, path_ix);
if (int(grandparent) >= 0)
{
bbox = sh_bbox[grandparent];

View file

@ -24,6 +24,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -62,6 +63,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -275,9 +277,9 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
threadgroup_barrier(mem_flags::mem_threadgroup);
bic = Bic{ 0u, 0u };
Bic param_10 = bic;
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
uint _619 = search_link(param_10, gl_LocalInvocationID, sh_bic);
bic = param_10;
uint link = _618;
uint link = _619;
sh_link[th] = link;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint grandparent;
@ -331,17 +333,17 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = inp;
bool _717 = !is_push;
bool _725;
if (_717)
bool _718 = !is_push;
bool _726;
if (_718)
{
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
_726 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
}
else
{
_725 = _717;
_726 = _718;
}
if (_725)
if (_726)
{
uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96);

Binary file not shown.

Binary file not shown.

View file

@ -17,6 +17,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -48,7 +49,7 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _267 = { 0u, 0u };
static const Bic _268 = { 0u, 0u };
ByteAddressBuffer _64 : register(t1, space0);
RWByteAddressBuffer _80 : register(u0, space0);
@ -77,39 +78,39 @@ Bic bic_combine(Bic x, Bic y)
void store_bic(uint ix, Bic bic)
{
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 8, bic.a);
_80.Store((base + 1u) * 4 + 8, bic.b);
uint base = (_64.Load(56) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 12, bic.a);
_80.Store((base + 1u) * 4 + 12, bic.b);
}
float4 load_path_bbox(uint path_ix)
{
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
uint base = (_64.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox;
}
void store_clip_el(uint ix, ClipEl el)
{
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 8, el.parent_ix);
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
uint base = (_64.Load(60) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 12, el.parent_ix);
_80.Store((base + 1u) * 4 + 12, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 12, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 12, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 12, asuint(el.bbox.w));
}
void comp_main()
{
uint th = gl_LocalInvocationID.x;
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
uint inp = _80.Load(((_64.Load(52) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 12);
bool is_push = int(inp) >= 0;
Bic _207 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _207;
Bic _208 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _208;
sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++)
{
@ -132,21 +133,21 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint size = sh_bic[0].b;
bic = _267;
bic = _268;
if ((th + 1u) < 256u)
{
bic = sh_bic[th + 1u];
}
bool _283;
bool _284;
if (is_push)
{
_283 = bic.a == 0u;
_284 = bic.a == 0u;
}
else
{
_283 = is_push;
_284 = is_push;
}
if (_283)
if (_284)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;
@ -163,8 +164,8 @@ void comp_main()
if (th < size)
{
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl _331 = { parent_ix, bbox };
ClipEl el = _331;
ClipEl _332 = { parent_ix, bbox };
ClipEl el = _332;
uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el;
store_clip_el(param_5, param_6);

View file

@ -24,6 +24,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -62,6 +63,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -142,16 +144,16 @@ kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_6
{
bic = sh_bic[th + 1u];
}
bool _283;
bool _284;
if (is_push)
{
_283 = bic.a == 0u;
_284 = bic.a == 0u;
}
else
{
_283 = is_push;
_284 = is_push;
}
if (_283)
if (_284)
{
uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th;

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -13,6 +13,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -88,7 +89,7 @@ DrawMonoid draw_monoid_identity()
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _93.Load(100) >> uint(2);
uint drawtag_base = _93.Load(104) >> uint(2);
uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);
@ -137,11 +138,11 @@ void comp_main()
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7);
}
uint drawdata_base = _93.Load(104) >> uint(2);
uint drawinfo_base = _93.Load(68) >> uint(2);
uint drawdata_base = _93.Load(108) >> uint(2);
uint drawinfo_base = _93.Load(72) >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_93.Load(44) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.Load(48) >> uint(2);
uint out_base = (_93.Load(48) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.Load(52) >> uint(2);
float4 mat;
float2 translate;
float2 p0;
@ -155,31 +156,31 @@ void comp_main()
DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9);
}
_285.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix);
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix);
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset);
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset);
_285.Store((out_base + (i_2 * 4u)) * 4 + 12, m.path_ix);
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 12, m.clip_ix);
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 12, m.scene_offset);
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 12, m.info_offset);
uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0);
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
{
uint bbox_offset = (_93.Load(40) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.Load(bbox_offset * 4 + 8)) - 32768.0f;
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
uint bbox_offset = (_93.Load(44) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.Load(bbox_offset * 4 + 12)) - 32768.0f;
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 8));
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 12));
uint fill_mode = uint(linewidth >= 0.0f);
if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u))
{
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 8);
uint t = (_93.Load(36) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_285.Load(t * 4 + 8), _285.Load((t + 1u) * 4 + 8), _285.Load((t + 2u) * 4 + 8), _285.Load((t + 3u) * 4 + 8)));
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 12);
uint t = (_93.Load(40) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_285.Load(t * 4 + 12), _285.Load((t + 1u) * 4 + 12), _285.Load((t + 2u) * 4 + 12), _285.Load((t + 3u) * 4 + 12)));
if ((tag_word == 276u) || (tag_word == 732u))
{
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 8), _285.Load((t + 5u) * 4 + 8)));
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 12), _285.Load((t + 5u) * 4 + 12)));
}
}
if (linewidth >= 0.0f)
@ -191,12 +192,12 @@ void comp_main()
case 68u:
case 72u:
{
_285.Store(di * 4 + 8, asuint(linewidth));
_285.Store(di * 4 + 12, asuint(linewidth));
break;
}
case 276u:
{
_285.Store(di * 4 + 8, asuint(linewidth));
_285.Store(di * 4 + 12, asuint(linewidth));
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
@ -206,9 +207,9 @@ void comp_main()
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
float line_c = -((p0.x * line_x) + (p0.y * line_y));
_285.Store((di + 1u) * 4 + 8, asuint(line_x));
_285.Store((di + 2u) * 4 + 8, asuint(line_y));
_285.Store((di + 3u) * 4 + 8, asuint(line_c));
_285.Store((di + 1u) * 4 + 12, asuint(line_x));
_285.Store((di + 2u) * 4 + 12, asuint(line_y));
_285.Store((di + 3u) * 4 + 12, asuint(line_c));
break;
}
case 732u:
@ -227,17 +228,17 @@ void comp_main()
float2 c1 = center1 * rainv;
float ra = rr * rainv;
float roff = rr - 1.0f;
_285.Store(di * 4 + 8, asuint(linewidth));
_285.Store((di + 1u) * 4 + 8, asuint(inv_mat.x));
_285.Store((di + 2u) * 4 + 8, asuint(inv_mat.y));
_285.Store((di + 3u) * 4 + 8, asuint(inv_mat.z));
_285.Store((di + 4u) * 4 + 8, asuint(inv_mat.w));
_285.Store((di + 5u) * 4 + 8, asuint(inv_tr.x));
_285.Store((di + 6u) * 4 + 8, asuint(inv_tr.y));
_285.Store((di + 7u) * 4 + 8, asuint(c1.x));
_285.Store((di + 8u) * 4 + 8, asuint(c1.y));
_285.Store((di + 9u) * 4 + 8, asuint(ra));
_285.Store((di + 10u) * 4 + 8, asuint(roff));
_285.Store(di * 4 + 12, asuint(linewidth));
_285.Store((di + 1u) * 4 + 12, asuint(inv_mat.x));
_285.Store((di + 2u) * 4 + 12, asuint(inv_mat.y));
_285.Store((di + 3u) * 4 + 12, asuint(inv_mat.z));
_285.Store((di + 4u) * 4 + 12, asuint(inv_mat.w));
_285.Store((di + 5u) * 4 + 12, asuint(inv_tr.x));
_285.Store((di + 6u) * 4 + 12, asuint(inv_tr.y));
_285.Store((di + 7u) * 4 + 12, asuint(c1.x));
_285.Store((di + 8u) * 4 + 12, asuint(c1.y));
_285.Store((di + 9u) * 4 + 12, asuint(ra));
_285.Store((di + 10u) * 4 + 12, asuint(roff));
break;
}
case 5u:
@ -253,7 +254,7 @@ void comp_main()
{
path_ix = m.path_ix;
}
_285.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
_285.Store((clip_out_base + m.clip_ix) * 4 + 12, path_ix);
}
}
}

View file

@ -59,6 +59,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -115,6 +116,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};

Binary file not shown.

Binary file not shown.

View file

@ -13,6 +13,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -81,7 +82,7 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _87.Load(100) >> uint(2);
uint drawtag_base = _87.Load(104) >> uint(2);
uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word;
DrawMonoid agg = map_tag(param);

View file

@ -20,6 +20,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -76,6 +77,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};

Binary file not shown.

Binary file not shown.

View file

@ -130,6 +130,7 @@ struct TileSeg
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -163,9 +164,10 @@ static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
RWByteAddressBuffer _297 : register(u0, space0);
ByteAddressBuffer _1681 : register(t1, space0);
RWTexture2D<unorm float4> image_atlas : register(u3, space0);
RWTexture2D<unorm float4> gradients : register(u4, space0);
RWTexture2D<unorm float4> image : register(u2, space0);
RWByteAddressBuffer _2506 : register(u2, space0);
RWTexture2D<unorm float4> image_atlas : register(u4, space0);
RWTexture2D<unorm float4> gradients : register(u5, space0);
RWTexture2D<unorm float4> image : register(u3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -206,7 +208,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _297.Load(offset * 4 + 8);
uint v = _297.Load(offset * 4 + 12);
return v;
}
@ -989,9 +991,9 @@ CmdJump Cmd_Jump_read(Alloc a, CmdRef ref)
void comp_main()
{
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x;
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x;
Alloc _1696;
_1696.offset = _1681.Load(24);
_1696.offset = _1681.Load(28);
Alloc param;
param.offset = _1696.offset;
uint param_1 = tile_ix * 1024u;
@ -999,7 +1001,7 @@ void comp_main()
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1705 = { cmd_alloc.offset };
CmdRef cmd_ref = _1705;
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12);
cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint);
@ -1009,14 +1011,13 @@ void comp_main()
rgba[i] = 0.0f.xxxx;
}
uint clip_depth = 0u;
bool mem_ok = _297.Load(4) == 0u;
float df[8];
TileSegRef tile_seg_ref;
float area[8];
uint blend_stack[4][8];
uint base_ix_1;
uint bg_rgba;
while (mem_ok)
while (true)
{
Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref;
@ -1036,13 +1037,13 @@ void comp_main()
{
df[k] = 1000000000.0f;
}
TileSegRef _1810 = { stroke.tile_ref };
tile_seg_ref = _1810;
TileSegRef _1805 = { stroke.tile_ref };
tile_seg_ref = _1805;
do
{
uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u;
bool param_9 = mem_ok;
bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11);
@ -1073,13 +1074,13 @@ void comp_main()
{
area[k_3] = float(fill.backdrop);
}
TileSegRef _1930 = { fill.tile_ref };
tile_seg_ref = _1930;
TileSegRef _1924 = { fill.tile_ref };
tile_seg_ref = _1924;
do
{
uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u;
bool param_17 = mem_ok;
bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19);
@ -1163,10 +1164,10 @@ void comp_main()
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba = gradients[int2(x, int(lin.index))];
float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2264.x;
fg_rgba.y = _2264.y;
fg_rgba.z = _2264.z;
float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2257.x;
fg_rgba.y = _2257.y;
fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
}
@ -1189,10 +1190,10 @@ void comp_main()
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2374.z;
float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
}
@ -1206,9 +1207,9 @@ void comp_main()
CmdImage fill_img = Cmd_Image_read(param_34, param_35);
uint2 param_36 = xy_uint;
CmdImage param_37 = fill_img;
float4 _2417[8];
fillImage(_2417, param_36, param_37);
float4 img[8] = _2417;
float4 _2410[8];
fillImage(_2410, param_36, param_37);
float4 img[8] = _2410;
for (uint k_11 = 0u; k_11 < 8u; k_11++)
{
float4 fg_k_3 = img[k_11] * area[k_11];
@ -1224,8 +1225,8 @@ void comp_main()
for (uint k_12 = 0u; k_12 < 8u; k_12++)
{
float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479;
uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = 0.0f.xxxx;
}
}
@ -1235,8 +1236,8 @@ void comp_main()
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
_297.Store((base_ix + k_13) * 4 + 8, _2522);
uint _2519 = packsRGB(param_39);
_2506.Store((base_ix + k_13) * 4 + 0, _2519);
rgba[k_13] = 0.0f.xxxx;
}
}
@ -1262,7 +1263,7 @@ void comp_main()
}
else
{
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8);
bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0);
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);
@ -1279,8 +1280,8 @@ void comp_main()
{
Alloc param_46 = cmd_alloc;
CmdRef param_47 = cmd_ref;
CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2621;
CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2618;
cmd_alloc.offset = cmd_ref.offset;
break;
}

View file

@ -178,6 +178,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -188,6 +189,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -222,6 +224,11 @@ struct ConfigBuf
Config conf;
};
struct BlendBuf
{
uint blend_mem[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u);
static inline __attribute__((always_inline))
@ -1047,7 +1054,7 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me
return CmdJump_read(param, param_1, v_297);
}
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d<float, access::write> image [[texture(3)]], texture2d<float> image_atlas [[texture(4)]], texture2d<float> gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
Alloc param;
@ -1066,14 +1073,13 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
rgba[i] = float4(0.0);
}
uint clip_depth = 0u;
bool mem_ok = v_297.mem_error == 0u;
spvUnsafeArray<float, 8> df;
TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
uint base_ix_1;
uint bg_rgba;
while (mem_ok)
while (true)
{
Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref;
@ -1098,7 +1104,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{
uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u;
bool param_9 = mem_ok;
bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11, v_297);
@ -1134,7 +1140,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{
uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u;
bool param_17 = mem_ok;
bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297);
@ -1218,10 +1224,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2264.x;
fg_rgba.y = _2264.y;
fg_rgba.z = _2264.z;
float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2257.x;
fg_rgba.y = _2257.y;
fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
}
@ -1244,10 +1250,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2374.z;
float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
}
@ -1278,8 +1284,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_12 = 0u; k_12 < 8u; k_12++)
{
float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479;
uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = float4(0.0);
}
}
@ -1289,8 +1295,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
v_297.memory[base_ix + k_13] = _2522;
uint _2519 = packsRGB(param_39);
_2506.blend_mem[base_ix + k_13] = _2519;
rgba[k_13] = float4(0.0);
}
}
@ -1316,7 +1322,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
}
else
{
bg_rgba = v_297.memory[base_ix_1 + k_14];
bg_rgba = _2506.blend_mem[base_ix_1 + k_14];
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);

Binary file not shown.

Binary file not shown.

View file

@ -130,6 +130,7 @@ struct TileSeg
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -163,9 +164,10 @@ static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
RWByteAddressBuffer _297 : register(u0, space0);
ByteAddressBuffer _1681 : register(t1, space0);
RWTexture2D<unorm float4> image_atlas : register(u3, space0);
RWTexture2D<unorm float4> gradients : register(u4, space0);
RWTexture2D<unorm float> image : register(u2, space0);
RWByteAddressBuffer _2506 : register(u2, space0);
RWTexture2D<unorm float4> image_atlas : register(u4, space0);
RWTexture2D<unorm float4> gradients : register(u5, space0);
RWTexture2D<unorm float> image : register(u3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -206,7 +208,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _297.Load(offset * 4 + 8);
uint v = _297.Load(offset * 4 + 12);
return v;
}
@ -989,9 +991,9 @@ CmdJump Cmd_Jump_read(Alloc a, CmdRef ref)
void comp_main()
{
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x;
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x;
Alloc _1696;
_1696.offset = _1681.Load(24);
_1696.offset = _1681.Load(28);
Alloc param;
param.offset = _1696.offset;
uint param_1 = tile_ix * 1024u;
@ -999,7 +1001,7 @@ void comp_main()
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1705 = { cmd_alloc.offset };
CmdRef cmd_ref = _1705;
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12);
cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint);
@ -1009,14 +1011,13 @@ void comp_main()
rgba[i] = 0.0f.xxxx;
}
uint clip_depth = 0u;
bool mem_ok = _297.Load(4) == 0u;
float df[8];
TileSegRef tile_seg_ref;
float area[8];
uint blend_stack[4][8];
uint base_ix_1;
uint bg_rgba;
while (mem_ok)
while (true)
{
Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref;
@ -1036,13 +1037,13 @@ void comp_main()
{
df[k] = 1000000000.0f;
}
TileSegRef _1810 = { stroke.tile_ref };
tile_seg_ref = _1810;
TileSegRef _1805 = { stroke.tile_ref };
tile_seg_ref = _1805;
do
{
uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u;
bool param_9 = mem_ok;
bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11);
@ -1073,13 +1074,13 @@ void comp_main()
{
area[k_3] = float(fill.backdrop);
}
TileSegRef _1930 = { fill.tile_ref };
tile_seg_ref = _1930;
TileSegRef _1924 = { fill.tile_ref };
tile_seg_ref = _1924;
do
{
uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u;
bool param_17 = mem_ok;
bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19);
@ -1163,10 +1164,10 @@ void comp_main()
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba = gradients[int2(x, int(lin.index))];
float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2264.x;
fg_rgba.y = _2264.y;
fg_rgba.z = _2264.z;
float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2257.x;
fg_rgba.y = _2257.y;
fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
}
@ -1189,10 +1190,10 @@ void comp_main()
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2374.z;
float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
}
@ -1206,9 +1207,9 @@ void comp_main()
CmdImage fill_img = Cmd_Image_read(param_34, param_35);
uint2 param_36 = xy_uint;
CmdImage param_37 = fill_img;
float4 _2417[8];
fillImage(_2417, param_36, param_37);
float4 img[8] = _2417;
float4 _2410[8];
fillImage(_2410, param_36, param_37);
float4 img[8] = _2410;
for (uint k_11 = 0u; k_11 < 8u; k_11++)
{
float4 fg_k_3 = img[k_11] * area[k_11];
@ -1224,8 +1225,8 @@ void comp_main()
for (uint k_12 = 0u; k_12 < 8u; k_12++)
{
float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479;
uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = 0.0f.xxxx;
}
}
@ -1235,8 +1236,8 @@ void comp_main()
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
_297.Store((base_ix + k_13) * 4 + 8, _2522);
uint _2519 = packsRGB(param_39);
_2506.Store((base_ix + k_13) * 4 + 0, _2519);
rgba[k_13] = 0.0f.xxxx;
}
}
@ -1262,7 +1263,7 @@ void comp_main()
}
else
{
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8);
bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0);
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);
@ -1279,8 +1280,8 @@ void comp_main()
{
Alloc param_46 = cmd_alloc;
CmdRef param_47 = cmd_ref;
CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2621;
CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2618;
cmd_alloc.offset = cmd_ref.offset;
break;
}

View file

@ -178,6 +178,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -188,6 +189,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -222,6 +224,11 @@ struct ConfigBuf
Config conf;
};
struct BlendBuf
{
uint blend_mem[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u);
static inline __attribute__((always_inline))
@ -1047,7 +1054,7 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me
return CmdJump_read(param, param_1, v_297);
}
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d<float, access::write> image [[texture(3)]], texture2d<float> image_atlas [[texture(4)]], texture2d<float> gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{
uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
Alloc param;
@ -1066,14 +1073,13 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
rgba[i] = float4(0.0);
}
uint clip_depth = 0u;
bool mem_ok = v_297.mem_error == 0u;
spvUnsafeArray<float, 8> df;
TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
uint base_ix_1;
uint bg_rgba;
while (mem_ok)
while (true)
{
Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref;
@ -1098,7 +1104,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{
uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u;
bool param_9 = mem_ok;
bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11, v_297);
@ -1134,7 +1140,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{
uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u;
bool param_17 = mem_ok;
bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297);
@ -1218,10 +1224,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29);
fg_rgba.x = _2264.x;
fg_rgba.y = _2264.y;
fg_rgba.z = _2264.z;
float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2257.x;
fg_rgba.y = _2257.y;
fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
}
@ -1244,10 +1250,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x;
fg_rgba_1.y = _2374.y;
fg_rgba_1.z = _2374.z;
float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
}
@ -1278,8 +1284,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_12 = 0u; k_12 < 8u; k_12++)
{
float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479;
uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = float4(0.0);
}
}
@ -1289,8 +1295,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_13 = 0u; k_13 < 8u; k_13++)
{
float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39);
v_297.memory[base_ix + k_13] = _2522;
uint _2519 = packsRGB(param_39);
_2506.blend_mem[base_ix + k_13] = _2519;
rgba[k_13] = float4(0.0);
}
}
@ -1316,7 +1322,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
}
else
{
bg_rgba = v_297.memory[base_ix_1 + k_14];
bg_rgba = _2506.blend_mem[base_ix_1 + k_14];
}
uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42);

Binary file not shown.

Binary file not shown.

View file

@ -3,12 +3,6 @@ struct Alloc
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathCubicRef
{
uint offset;
@ -74,6 +68,7 @@ struct SubdivResult
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -105,10 +100,10 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
static const PathSegTag _721 = { 0u, 0u };
static const PathSegTag _722 = { 0u, 0u };
RWByteAddressBuffer _136 : register(u0, space0);
ByteAddressBuffer _710 : register(t1, space0);
RWByteAddressBuffer _143 : register(u0, space0);
ByteAddressBuffer _711 : register(t1, space0);
static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input
@ -116,6 +111,15 @@ struct SPIRV_Cross_Input
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
};
static bool mem_ok;
bool check_deps(uint dep_stage)
{
uint _149;
_143.InterlockedOr(4, 0u, _149);
return (_149 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset)
{
return true;
@ -129,7 +133,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _136.Load(offset * 4 + 8);
uint v = _143.Load(offset * 4 + 12);
return v;
}
@ -138,8 +142,8 @@ PathSegTag PathSeg_tag(Alloc a, PathSegRef ref)
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1);
PathSegTag _367 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _367;
PathSegTag _362 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _362;
}
PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
@ -194,9 +198,9 @@ PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref)
{
PathCubicRef _373 = { ref.offset + 4u };
PathCubicRef _368 = { ref.offset + 4u };
Alloc param = a;
PathCubicRef param_1 = _373;
PathCubicRef param_1 = _368;
return PathCubic_read(param, param_1);
}
@ -240,8 +244,8 @@ SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol)
val = (sqrt_tol * da) / approx_parabola_integral(param_2);
}
}
SubdivResult _695 = { val, a0, a2 };
return _695;
SubdivResult _690 = { val, a0, a2 };
return _690;
}
uint fill_mode_from_flags(uint flags)
@ -263,12 +267,12 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _427 = { raw2 };
s.tiles = _427;
TileRef _422 = { raw2 };
s.tiles = _422;
return s;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
Alloc new_alloc(uint offset, uint size, bool mem_ok_1)
{
Alloc a;
a.offset = offset;
@ -286,33 +290,24 @@ float2 eval_quad(float2 p0, float2 p1, float2 p2, float t)
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t);
}
MallocResult malloc(uint size)
uint malloc_stage(uint size, uint mem_size, uint stage)
{
uint _142;
_136.InterlockedAdd(0, size, _142);
uint offset = _142;
uint _149;
_136.GetDimensions(_149);
_149 = (_149 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_149) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
uint _158;
_143.InterlockedAdd(0, size, _158);
uint offset = _158;
if ((offset + size) > mem_size)
{
uint _171;
_136.InterlockedMax(4, 1u, _171);
return r;
uint _168;
_143.InterlockedOr(4, stage, _168);
offset = 0u;
}
return r;
return offset;
}
TileRef Tile_index(TileRef ref, uint index)
{
TileRef _385 = { ref.offset + (index * 8u) };
return _385;
TileRef _380 = { ref.offset + (index * 8u) };
return _380;
}
void write_mem(Alloc alloc, uint offset, uint val)
@ -323,7 +318,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_136.Store(offset * 4 + 8, val);
_143.Store(offset * 4 + 12, val);
}
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
@ -357,30 +352,36 @@ void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
void comp_main()
{
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef _718 = { _710.Load(28) + (element_ix * 52u) };
PathSegRef ref = _718;
PathSegTag tag = _721;
if (element_ix < _710.Load(4))
mem_ok = true;
uint param = 7u;
bool _694 = check_deps(param);
if (!_694)
{
Alloc _731;
_731.offset = _710.Load(28);
Alloc param;
param.offset = _731.offset;
PathSegRef param_1 = ref;
tag = PathSeg_tag(param, param_1);
return;
}
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef _719 = { _711.Load(32) + (element_ix * 52u) };
PathSegRef ref = _719;
PathSegTag tag = _722;
if (element_ix < _711.Load(8))
{
Alloc _732;
_732.offset = _711.Load(32);
Alloc param_1;
param_1.offset = _732.offset;
PathSegRef param_2 = ref;
tag = PathSeg_tag(param_1, param_2);
}
bool mem_ok = _136.Load(4) == 0u;
switch (tag.tag)
{
case 1u:
{
Alloc _748;
_748.offset = _710.Load(28);
Alloc param_2;
param_2.offset = _748.offset;
PathSegRef param_3 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3);
Alloc _745;
_745.offset = _711.Load(32);
Alloc param_3;
param_3.offset = _745.offset;
PathSegRef param_4 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_3, param_4);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u);
@ -392,43 +393,43 @@ void comp_main()
for (uint i = 0u; i < n_quads; i++)
{
float t = float(i + 1u) * _step;
float2 param_4 = cubic.p0;
float2 param_5 = cubic.p1;
float2 param_6 = cubic.p2;
float2 param_7 = cubic.p3;
float param_8 = t;
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);
float2 param_9 = cubic.p0;
float2 param_10 = cubic.p1;
float2 param_11 = cubic.p2;
float2 param_12 = cubic.p3;
float param_13 = t - (0.5f * _step);
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);
float2 param_5 = cubic.p0;
float2 param_6 = cubic.p1;
float2 param_7 = cubic.p2;
float2 param_8 = cubic.p3;
float param_9 = t;
float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
float2 param_10 = cubic.p0;
float2 param_11 = cubic.p1;
float2 param_12 = cubic.p2;
float2 param_13 = cubic.p3;
float param_14 = t - (0.5f * _step);
float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f);
float2 param_14 = qp0;
float2 param_15 = qp1;
float2 param_16 = qp2;
float param_17 = 0.4743416607379913330078125f;
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);
float2 param_15 = qp0;
float2 param_16 = qp1;
float2 param_17 = qp2;
float param_18 = 0.4743416607379913330078125f;
SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u);
uint param_18 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_18) == 1u;
uint param_19 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_19) == 1u;
uint path_ix = cubic.path_ix;
PathRef _904 = { _710.Load(16) + (path_ix * 12u) };
Alloc _907;
_907.offset = _710.Load(16);
Alloc param_19;
param_19.offset = _907.offset;
PathRef param_20 = _904;
Path path = Path_read(param_19, param_20);
uint param_21 = path.tiles.offset;
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_23 = mem_ok;
Alloc path_alloc = new_alloc(param_21, param_22, param_23);
PathRef _901 = { _711.Load(20) + (path_ix * 12u) };
Alloc _904;
_904.offset = _711.Load(20);
Alloc param_20;
param_20.offset = _904.offset;
PathRef param_21 = _901;
Path path = Path_read(param_20, param_21);
uint param_22 = path.tiles.offset;
uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_24 = true;
Alloc path_alloc = new_alloc(param_22, param_23, param_24);
int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0;
qp0 = cubic.p0;
@ -436,44 +437,44 @@ void comp_main()
int n_out = 1;
float val_sum = 0.0f;
float2 p1;
float _1147;
float _1143;
TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{
float t_1 = float(i_1 + 1u) * _step;
float2 param_24 = cubic.p0;
float2 param_25 = cubic.p1;
float2 param_26 = cubic.p2;
float2 param_27 = cubic.p3;
float param_28 = t_1;
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);
float2 param_29 = cubic.p0;
float2 param_30 = cubic.p1;
float2 param_31 = cubic.p2;
float2 param_32 = cubic.p3;
float param_33 = t_1 - (0.5f * _step);
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);
float2 param_25 = cubic.p0;
float2 param_26 = cubic.p1;
float2 param_27 = cubic.p2;
float2 param_28 = cubic.p3;
float param_29 = t_1;
float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
float2 param_30 = cubic.p0;
float2 param_31 = cubic.p1;
float2 param_32 = cubic.p2;
float2 param_33 = cubic.p3;
float param_34 = t_1 - (0.5f * _step);
float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f);
SubdivResult params_1 = keep_params[i_1];
float param_34 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_34);
float param_35 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_35);
float param_35 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_35);
float param_36 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_36);
float uscale = 1.0f / (u2 - u0);
float target = float(n_out) * v_step;
for (;;)
{
bool _1040 = uint(n_out) == n;
bool _1050;
if (!_1040)
bool _1036 = uint(n_out) == n;
bool _1046;
if (!_1036)
{
_1050 = target < (val_sum + params_1.val);
_1046 = target < (val_sum + params_1.val);
}
else
{
_1050 = _1040;
_1046 = _1036;
}
if (_1050)
if (_1046)
{
if (uint(n_out) == n)
{
@ -483,14 +484,14 @@ void comp_main()
{
float u = (target - val_sum) / params_1.val;
float a = lerp(params_1.a0, params_1.a2, u);
float param_36 = a;
float au = approx_parabola_inv_integral(param_36);
float param_37 = a;
float au = approx_parabola_inv_integral(param_37);
float t_2 = (au - u0) * uscale;
float2 param_37 = qp0;
float2 param_38 = qp1_1;
float2 param_39 = qp2_1;
float param_40 = t_2;
p1 = eval_quad(param_37, param_38, param_39, param_40);
float2 param_38 = qp0;
float2 param_39 = qp1_1;
float2 param_40 = qp2_1;
float param_41 = t_2;
p1 = eval_quad(param_38, param_39, param_40, param_41);
}
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
@ -500,13 +501,13 @@ void comp_main()
float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10f)
{
_1147 = 1000000000.0f;
_1143 = 1000000000.0f;
}
else
{
_1147 = dx / dy;
_1143 = dx / dy;
}
float invslope = _1147;
float invslope = _1143;
float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f;
float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f;
@ -522,14 +523,20 @@ void comp_main()
int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint param_41 = n_tile_alloc * 24u;
MallocResult _1263 = malloc(param_41);
MallocResult tile_alloc = _1263;
if (tile_alloc.failed || (!mem_ok))
uint malloc_size = n_tile_alloc * 24u;
uint param_42 = malloc_size;
uint param_43 = _711.Load(0);
uint param_44 = 4u;
uint _1265 = malloc_stage(param_42, param_43, param_44);
uint tile_offset = _1265;
if (tile_offset == 0u)
{
return;
mem_ok = false;
}
uint tile_offset = tile_alloc.alloc.offset;
uint param_45 = tile_offset;
uint param_46 = malloc_size;
bool param_47 = true;
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
int xray = int(floor(p0.x * 0.0625f));
int last_xray = int(floor(p1.x * 0.0625f));
if (p0.y > p1.y)
@ -542,39 +549,34 @@ void comp_main()
{
float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x);
bool _1319 = !is_stroke;
bool _1329;
if (_1319)
bool _1322 = !is_stroke;
bool _1332;
if (_1322)
{
_1329 = min(p0.y, p1.y) < tile_y0;
_1332 = min(p0.y, p1.y) < tile_y0;
}
else
{
_1329 = _1319;
_1332 = _1322;
}
bool _1336;
if (_1329)
bool _1339;
if (_1332)
{
_1336 = xbackdrop < bbox.z;
_1339 = xbackdrop < bbox.z;
}
else
{
_1336 = _1329;
_1339 = _1332;
}
if (_1336)
if (_1339)
{
int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_42 = path.tiles;
uint param_43 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_42, param_43);
TileRef param_48 = path.tiles;
uint param_49 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_48, param_49);
uint tile_el = tile_ref.offset >> uint(2);
Alloc param_44 = path_alloc;
uint param_45 = tile_el + 1u;
if (touch_mem(param_44, param_45))
{
uint _1374;
_136.InterlockedAdd((tile_el + 1u) * 4 + 8, uint(backdrop), _1374);
}
uint _1369;
_143.InterlockedAdd((tile_el + 1u) * 4 + 12, uint(backdrop), _1369);
}
int next_xray = last_xray;
if (y < (y1 - 1))
@ -592,20 +594,15 @@ void comp_main()
for (int x = xx0; x < xx1; x++)
{
float tile_x0 = float(x * 16);
TileRef _1454 = { path.tiles.offset };
TileRef param_46 = _1454;
uint param_47 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_46, param_47);
TileRef _1449 = { path.tiles.offset };
TileRef param_50 = _1449;
uint param_51 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_50, param_51);
uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u;
Alloc param_48 = path_alloc;
uint param_49 = tile_el_1;
if (touch_mem(param_48, param_49))
{
uint _1477;
_136.InterlockedExchange(tile_el_1 * 4 + 8, tile_offset, _1477);
old = _1477;
}
uint _1465;
_143.InterlockedExchange(tile_el_1 * 4 + 12, tile_offset, _1465);
old = _1465;
tile_seg.origin = p0;
tile_seg._vector = p1 - p0;
float y_edge = 0.0f;
@ -636,11 +633,14 @@ void comp_main()
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
TileSegRef _1559 = { tile_offset };
Alloc param_50 = tile_alloc.alloc;
TileSegRef param_51 = _1559;
TileSeg param_52 = tile_seg;
TileSeg_write(param_50, param_51, param_52);
if (mem_ok)
{
TileSegRef _1550 = { tile_offset };
Alloc param_52 = tile_alloc;
TileSegRef param_53 = _1550;
TileSeg param_54 = tile_seg;
TileSeg_write(param_52, param_53, param_54);
}
tile_offset += 24u;
}
xc += b;

View file

@ -51,12 +51,6 @@ struct Alloc
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathCubicRef
{
uint offset;
@ -124,6 +118,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -134,6 +129,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -170,6 +166,13 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_143)
{
uint _149 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, 0u, memory_order_relaxed);
return (_149 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{
@ -177,7 +180,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_136, constant uint& v_136BufferSize)
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_143)
{
Alloc param = alloc;
uint param_1 = offset;
@ -185,59 +188,59 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{
return 0u;
}
uint v = v_136.memory[offset];
uint v = v_143.memory[offset];
return v;
}
static inline __attribute__((always_inline))
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
{
Alloc param = a;
uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_136, v_136BufferSize);
uint tag_and_flags = read_mem(param, param_1, v_143);
return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
}
static inline __attribute__((always_inline))
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_143)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize);
uint raw0 = read_mem(param, param_1, v_143);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize);
uint raw1 = read_mem(param_2, param_3, v_143);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize);
uint raw2 = read_mem(param_4, param_5, v_143);
Alloc param_6 = a;
uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_136, v_136BufferSize);
uint raw3 = read_mem(param_6, param_7, v_143);
Alloc param_8 = a;
uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_136, v_136BufferSize);
uint raw4 = read_mem(param_8, param_9, v_143);
Alloc param_10 = a;
uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11, v_136, v_136BufferSize);
uint raw5 = read_mem(param_10, param_11, v_143);
Alloc param_12 = a;
uint param_13 = ix + 6u;
uint raw6 = read_mem(param_12, param_13, v_136, v_136BufferSize);
uint raw6 = read_mem(param_12, param_13, v_143);
Alloc param_14 = a;
uint param_15 = ix + 7u;
uint raw7 = read_mem(param_14, param_15, v_136, v_136BufferSize);
uint raw7 = read_mem(param_14, param_15, v_143);
Alloc param_16 = a;
uint param_17 = ix + 8u;
uint raw8 = read_mem(param_16, param_17, v_136, v_136BufferSize);
uint raw8 = read_mem(param_16, param_17, v_143);
Alloc param_18 = a;
uint param_19 = ix + 9u;
uint raw9 = read_mem(param_18, param_19, v_136, v_136BufferSize);
uint raw9 = read_mem(param_18, param_19, v_143);
Alloc param_20 = a;
uint param_21 = ix + 10u;
uint raw10 = read_mem(param_20, param_21, v_136, v_136BufferSize);
uint raw10 = read_mem(param_20, param_21, v_143);
Alloc param_22 = a;
uint param_23 = ix + 11u;
uint raw11 = read_mem(param_22, param_23, v_136, v_136BufferSize);
uint raw11 = read_mem(param_22, param_23, v_143);
PathCubic s;
s.p0 = float2(as_type<float>(raw0), as_type<float>(raw1));
s.p1 = float2(as_type<float>(raw2), as_type<float>(raw3));
@ -250,11 +253,11 @@ PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref,
}
static inline __attribute__((always_inline))
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
{
Alloc param = a;
PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u };
return PathCubic_read(param, param_1, v_136, v_136BufferSize);
return PathCubic_read(param, param_1, v_143);
}
static inline __attribute__((always_inline))
@ -310,18 +313,18 @@ uint fill_mode_from_flags(thread const uint& flags)
}
static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_143)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize);
uint raw0 = read_mem(param, param_1, v_143);
Alloc param_2 = a;
uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize);
uint raw1 = read_mem(param_2, param_3, v_143);
Alloc param_4 = a;
uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize);
uint raw2 = read_mem(param_4, param_5, v_143);
Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 };
@ -350,22 +353,16 @@ float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_136, constant uint& v_136BufferSize)
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_143)
{
uint _142 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.mem_offset, size, memory_order_relaxed);
uint offset = _142;
MallocResult r;
r.failed = (offset + size) > uint(int((v_136BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
uint _158 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.mem_offset, size, memory_order_relaxed);
uint offset = _158;
if ((offset + size) > mem_size)
{
uint _171 = atomic_fetch_max_explicit((device atomic_uint*)&v_136.mem_error, 1u, memory_order_relaxed);
return r;
uint _168 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return r;
return offset;
}
static inline __attribute__((always_inline))
@ -375,7 +372,7 @@ TileRef Tile_index(thread const TileRef& ref, thread const uint& index)
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_136, constant uint& v_136BufferSize)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_143)
{
Alloc param = alloc;
uint param_1 = offset;
@ -383,61 +380,66 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_136.memory[offset] = val;
v_143.memory[offset] = val;
}
static inline __attribute__((always_inline))
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_136, constant uint& v_136BufferSize)
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_143)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.origin.x);
write_mem(param, param_1, param_2, v_136, v_136BufferSize);
write_mem(param, param_1, param_2, v_143);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.origin.y);
write_mem(param_3, param_4, param_5, v_136, v_136BufferSize);
write_mem(param_3, param_4, param_5, v_143);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.vector.x);
write_mem(param_6, param_7, param_8, v_136, v_136BufferSize);
write_mem(param_6, param_7, param_8, v_143);
Alloc param_9 = a;
uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.vector.y);
write_mem(param_9, param_10, param_11, v_136, v_136BufferSize);
write_mem(param_9, param_10, param_11, v_143);
Alloc param_12 = a;
uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.y_edge);
write_mem(param_12, param_13, param_14, v_136, v_136BufferSize);
write_mem(param_12, param_13, param_14, v_143);
Alloc param_15 = a;
uint param_16 = ix + 5u;
uint param_17 = s.next.offset;
write_mem(param_15, param_16, param_17, v_136, v_136BufferSize);
write_mem(param_15, param_16, param_17, v_143);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_136 [[buffer(0)]], const device ConfigBuf& _710 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
kernel void main0(device Memory& v_143 [[buffer(0)]], const device ConfigBuf& _711 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
constant uint& v_136BufferSize = spvBufferSizeConstants[0];
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef{ _710.conf.pathseg_alloc.offset + (element_ix * 52u) };
PathSegTag tag = PathSegTag{ 0u, 0u };
if (element_ix < _710.conf.n_pathseg)
bool mem_ok = true;
uint param = 7u;
bool _694 = check_deps(param, v_143);
if (!_694)
{
Alloc param;
param.offset = _710.conf.pathseg_alloc.offset;
PathSegRef param_1 = ref;
tag = PathSeg_tag(param, param_1, v_136, v_136BufferSize);
return;
}
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef{ _711.conf.pathseg_alloc.offset + (element_ix * 52u) };
PathSegTag tag = PathSegTag{ 0u, 0u };
if (element_ix < _711.conf.n_pathseg)
{
Alloc param_1;
param_1.offset = _711.conf.pathseg_alloc.offset;
PathSegRef param_2 = ref;
tag = PathSeg_tag(param_1, param_2, v_143);
}
bool mem_ok = v_136.mem_error == 0u;
switch (tag.tag)
{
case 1u:
{
Alloc param_2;
param_2.offset = _710.conf.pathseg_alloc.offset;
PathSegRef param_3 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3, v_136, v_136BufferSize);
Alloc param_3;
param_3.offset = _711.conf.pathseg_alloc.offset;
PathSegRef param_4 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_3, param_4, v_143);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);
@ -449,40 +451,40 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
for (uint i = 0u; i < n_quads; i++)
{
float t = float(i + 1u) * _step;
float2 param_4 = cubic.p0;
float2 param_5 = cubic.p1;
float2 param_6 = cubic.p2;
float2 param_7 = cubic.p3;
float param_8 = t;
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);
float2 param_9 = cubic.p0;
float2 param_10 = cubic.p1;
float2 param_11 = cubic.p2;
float2 param_12 = cubic.p3;
float param_13 = t - (0.5 * _step);
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);
float2 param_5 = cubic.p0;
float2 param_6 = cubic.p1;
float2 param_7 = cubic.p2;
float2 param_8 = cubic.p3;
float param_9 = t;
float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
float2 param_10 = cubic.p0;
float2 param_11 = cubic.p1;
float2 param_12 = cubic.p2;
float2 param_13 = cubic.p3;
float param_14 = t - (0.5 * _step);
float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);
float2 param_14 = qp0;
float2 param_15 = qp1;
float2 param_16 = qp2;
float param_17 = 0.4743416607379913330078125;
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);
float2 param_15 = qp0;
float2 param_16 = qp1;
float2 param_17 = qp2;
float param_18 = 0.4743416607379913330078125;
SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);
uint param_18 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_18) == 1u;
uint param_19 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_19) == 1u;
uint path_ix = cubic.path_ix;
Alloc param_19;
param_19.offset = _710.conf.tile_alloc.offset;
PathRef param_20 = PathRef{ _710.conf.tile_alloc.offset + (path_ix * 12u) };
Path path = Path_read(param_19, param_20, v_136, v_136BufferSize);
uint param_21 = path.tiles.offset;
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_23 = mem_ok;
Alloc path_alloc = new_alloc(param_21, param_22, param_23);
Alloc param_20;
param_20.offset = _711.conf.tile_alloc.offset;
PathRef param_21 = PathRef{ _711.conf.tile_alloc.offset + (path_ix * 12u) };
Path path = Path_read(param_20, param_21, v_143);
uint param_22 = path.tiles.offset;
uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_24 = true;
Alloc path_alloc = new_alloc(param_22, param_23, param_24);
int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0;
qp0 = cubic.p0;
@ -490,44 +492,44 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
int n_out = 1;
float val_sum = 0.0;
float2 p1;
float _1147;
float _1143;
TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{
float t_1 = float(i_1 + 1u) * _step;
float2 param_24 = cubic.p0;
float2 param_25 = cubic.p1;
float2 param_26 = cubic.p2;
float2 param_27 = cubic.p3;
float param_28 = t_1;
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);
float2 param_29 = cubic.p0;
float2 param_30 = cubic.p1;
float2 param_31 = cubic.p2;
float2 param_32 = cubic.p3;
float param_33 = t_1 - (0.5 * _step);
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);
float2 param_25 = cubic.p0;
float2 param_26 = cubic.p1;
float2 param_27 = cubic.p2;
float2 param_28 = cubic.p3;
float param_29 = t_1;
float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
float2 param_30 = cubic.p0;
float2 param_31 = cubic.p1;
float2 param_32 = cubic.p2;
float2 param_33 = cubic.p3;
float param_34 = t_1 - (0.5 * _step);
float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);
SubdivResult params_1 = keep_params[i_1];
float param_34 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_34);
float param_35 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_35);
float param_35 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_35);
float param_36 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_36);
float uscale = 1.0 / (u2 - u0);
float target = float(n_out) * v_step;
for (;;)
{
bool _1040 = uint(n_out) == n;
bool _1050;
if (!_1040)
bool _1036 = uint(n_out) == n;
bool _1046;
if (!_1036)
{
_1050 = target < (val_sum + params_1.val);
_1046 = target < (val_sum + params_1.val);
}
else
{
_1050 = _1040;
_1046 = _1036;
}
if (_1050)
if (_1046)
{
if (uint(n_out) == n)
{
@ -537,14 +539,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
{
float u = (target - val_sum) / params_1.val;
float a = mix(params_1.a0, params_1.a2, u);
float param_36 = a;
float au = approx_parabola_inv_integral(param_36);
float param_37 = a;
float au = approx_parabola_inv_integral(param_37);
float t_2 = (au - u0) * uscale;
float2 param_37 = qp0;
float2 param_38 = qp1_1;
float2 param_39 = qp2_1;
float param_40 = t_2;
p1 = eval_quad(param_37, param_38, param_39, param_40);
float2 param_38 = qp0;
float2 param_39 = qp1_1;
float2 param_40 = qp2_1;
float param_41 = t_2;
p1 = eval_quad(param_38, param_39, param_40, param_41);
}
float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x;
float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x;
@ -554,13 +556,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10)
{
_1147 = 1000000000.0;
_1143 = 1000000000.0;
}
else
{
_1147 = dx / dy;
_1143 = dx / dy;
}
float invslope = _1147;
float invslope = _1143;
float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625;
float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625;
@ -576,14 +578,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint param_41 = n_tile_alloc * 24u;
MallocResult _1263 = malloc(param_41, v_136, v_136BufferSize);
MallocResult tile_alloc = _1263;
if (tile_alloc.failed || (!mem_ok))
uint malloc_size = n_tile_alloc * 24u;
uint param_42 = malloc_size;
uint param_43 = _711.conf.mem_size;
uint param_44 = 4u;
uint _1265 = malloc_stage(param_42, param_43, param_44, v_143);
uint tile_offset = _1265;
if (tile_offset == 0u)
{
return;
mem_ok = false;
}
uint tile_offset = tile_alloc.alloc.offset;
uint param_45 = tile_offset;
uint param_46 = malloc_size;
bool param_47 = true;
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
int xray = int(floor(p0.x * 0.0625));
int last_xray = int(floor(p1.x * 0.0625));
if (p0.y > p1.y)
@ -596,38 +604,33 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
{
float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x);
bool _1319 = !is_stroke;
bool _1329;
if (_1319)
bool _1322 = !is_stroke;
bool _1332;
if (_1322)
{
_1329 = fast::min(p0.y, p1.y) < tile_y0;
_1332 = fast::min(p0.y, p1.y) < tile_y0;
}
else
{
_1329 = _1319;
_1332 = _1322;
}
bool _1336;
if (_1329)
bool _1339;
if (_1332)
{
_1336 = xbackdrop < bbox.z;
_1339 = xbackdrop < bbox.z;
}
else
{
_1336 = _1329;
_1339 = _1332;
}
if (_1336)
if (_1339)
{
int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_42 = path.tiles;
uint param_43 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_42, param_43);
TileRef param_48 = path.tiles;
uint param_49 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_48, param_49);
uint tile_el = tile_ref.offset >> uint(2);
Alloc param_44 = path_alloc;
uint param_45 = tile_el + 1u;
if (touch_mem(param_44, param_45))
{
uint _1374 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
}
uint _1369 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
}
int next_xray = last_xray;
if (y < (y1 - 1))
@ -645,18 +648,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
for (int x = xx0; x < xx1; x++)
{
float tile_x0 = float(x * 16);
TileRef param_46 = TileRef{ path.tiles.offset };
uint param_47 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_46, param_47);
TileRef param_50 = TileRef{ path.tiles.offset };
uint param_51 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_50, param_51);
uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u;
Alloc param_48 = path_alloc;
uint param_49 = tile_el_1;
if (touch_mem(param_48, param_49))
{
uint _1477 = atomic_exchange_explicit((device atomic_uint*)&v_136.memory[tile_el_1], tile_offset, memory_order_relaxed);
old = _1477;
}
uint _1465 = atomic_exchange_explicit((device atomic_uint*)&v_143.memory[tile_el_1], tile_offset, memory_order_relaxed);
old = _1465;
tile_seg.origin = p0;
tile_seg.vector = p1 - p0;
float y_edge = 0.0;
@ -687,10 +685,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
Alloc param_50 = tile_alloc.alloc;
TileSegRef param_51 = TileSegRef{ tile_offset };
TileSeg param_52 = tile_seg;
TileSeg_write(param_50, param_51, param_52, v_136, v_136BufferSize);
if (mem_ok)
{
Alloc param_52 = tile_alloc;
TileSegRef param_53 = TileSegRef{ tile_offset };
TileSeg param_54 = tile_seg;
TileSeg_write(param_52, param_53, param_54, v_143);
}
tile_offset += 24u;
}
xc += b;

Binary file not shown.

Binary file not shown.

View file

@ -52,6 +52,7 @@ struct Monoid
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -164,7 +165,7 @@ uint read_mem(Alloc alloc, uint offset)
{
return 0u;
}
uint v = _111.Load(offset * 4 + 8);
uint v = _111.Load(offset * 4 + 12);
return v;
}
@ -203,7 +204,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_111.Store(offset * 4 + 8, val);
_111.Store(offset * 4 + 12, val);
}
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s)
@ -365,7 +366,7 @@ uint round_up(float x)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint tag_word = _574.Load(((_639.Load(96) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word;
TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -404,14 +405,14 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4);
}
uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix;
uint ps_ix = (_639.Load(100) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(92) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix;
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
TransformSegRef _771 = { _639.Load(40) + (trans_ix * 24u) };
TransformSegRef trans_ref = _771;
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _781;
PathSegRef _780 = { _639.Load(32) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _780;
float linewidth[4];
uint save_trans_ix[4];
float2 p0;
@ -464,9 +465,9 @@ void comp_main()
}
}
}
Alloc _877;
_877.offset = _639.Load(36);
param_13.offset = _877.offset;
Alloc _876;
_876.offset = _639.Load(40);
param_13.offset = _876.offset;
TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
@ -475,25 +476,25 @@ void comp_main()
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _947 = bbox;
float2 _950 = min(_947.xy, p2);
bbox.x = _950.x;
bbox.y = _950.y;
float4 _955 = bbox;
float2 _958 = max(_955.zw, p2);
bbox.z = _958.x;
bbox.w = _958.y;
float4 _946 = bbox;
float2 _949 = min(_946.xy, p2);
bbox.x = _949.x;
bbox.y = _949.y;
float4 _954 = bbox;
float2 _957 = max(_954.zw, p2);
bbox.z = _957.x;
bbox.w = _957.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _983 = bbox;
float2 _986 = min(_983.xy, p3);
bbox.x = _986.x;
bbox.y = _986.y;
float4 _991 = bbox;
float2 _994 = max(_991.zw, p3);
bbox.z = _994.x;
bbox.w = _994.y;
float4 _982 = bbox;
float2 _985 = min(_982.xy, p3);
bbox.x = _985.x;
bbox.y = _985.y;
float4 _990 = bbox;
float2 _993 = max(_990.zw, p3);
bbox.z = _993.x;
bbox.w = _993.y;
}
else
{
@ -524,9 +525,9 @@ void comp_main()
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
Alloc _1089;
_1089.offset = _639.Load(28);
param_15.offset = _1089.offset;
Alloc _1088;
_1088.offset = _639.Load(32);
param_15.offset = _1088.offset;
PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode;
PathCubic param_18 = cubic;
@ -571,7 +572,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 6u);
uint bbox_out_ix = (_639.Load(44) >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u)
{
@ -583,24 +584,24 @@ void comp_main()
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1264 = i_4 == 3u;
bool _1270;
if (_1264)
bool _1263 = i_4 == 3u;
bool _1269;
if (_1263)
{
_1270 = gl_LocalInvocationID.x == 255u;
_1269 = gl_LocalInvocationID.x == 255u;
}
else
{
_1270 = _1264;
_1269 = _1263;
}
if (_1270)
if (_1269)
{
do_atomic = true;
}
if ((m.flags & 1u) != 0u)
{
_111.Store((bbox_out_ix + 4u) * 4 + 8, asuint(linewidth[i_4]));
_111.Store((bbox_out_ix + 5u) * 4 + 8, save_trans_ix[i_4]);
_111.Store((bbox_out_ix + 4u) * 4 + 12, asuint(linewidth[i_4]));
_111.Store((bbox_out_ix + 5u) * 4 + 12, save_trans_ix[i_4]);
if ((m.flags & 2u) == 0u)
{
do_atomic = true;
@ -608,43 +609,43 @@ void comp_main()
else
{
float param_25 = m.bbox.x;
_111.Store(bbox_out_ix * 4 + 8, round_down(param_25));
_111.Store(bbox_out_ix * 4 + 12, round_down(param_25));
float param_26 = m.bbox.y;
_111.Store((bbox_out_ix + 1u) * 4 + 8, round_down(param_26));
_111.Store((bbox_out_ix + 1u) * 4 + 12, round_down(param_26));
float param_27 = m.bbox.z;
_111.Store((bbox_out_ix + 2u) * 4 + 8, round_up(param_27));
_111.Store((bbox_out_ix + 2u) * 4 + 12, round_up(param_27));
float param_28 = m.bbox.w;
_111.Store((bbox_out_ix + 3u) * 4 + 8, round_up(param_28));
_111.Store((bbox_out_ix + 3u) * 4 + 12, round_up(param_28));
bbox_out_ix += 6u;
do_atomic = false;
}
}
if (do_atomic)
{
bool _1335 = m.bbox.z > m.bbox.x;
bool _1344;
if (!_1335)
bool _1334 = m.bbox.z > m.bbox.x;
bool _1343;
if (!_1334)
{
_1344 = m.bbox.w > m.bbox.y;
_1343 = m.bbox.w > m.bbox.y;
}
else
{
_1344 = _1335;
_1343 = _1334;
}
if (_1344)
if (_1343)
{
float param_29 = m.bbox.x;
uint _1353;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
uint _1352;
_111.InterlockedMin(bbox_out_ix * 4 + 12, round_down(param_29), _1352);
float param_30 = m.bbox.y;
uint _1361;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
uint _1360;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 12, round_down(param_30), _1360);
float param_31 = m.bbox.z;
uint _1369;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
uint _1368;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 12, round_up(param_31), _1368);
float param_32 = m.bbox.w;
uint _1377;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
uint _1376;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 12, round_up(param_32), _1376);
}
bbox_out_ix += 6u;
}

View file

@ -102,6 +102,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -117,6 +118,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -545,25 +547,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
if (seg_type >= 2u)
{
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _947 = bbox;
float2 _950 = fast::min(_947.xy, p2);
bbox.x = _950.x;
bbox.y = _950.y;
float4 _955 = bbox;
float2 _958 = fast::max(_955.zw, p2);
bbox.z = _958.x;
bbox.w = _958.y;
float4 _946 = bbox;
float2 _949 = fast::min(_946.xy, p2);
bbox.x = _949.x;
bbox.y = _949.y;
float4 _954 = bbox;
float2 _957 = fast::max(_954.zw, p2);
bbox.z = _957.x;
bbox.w = _957.y;
if (seg_type == 3u)
{
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _983 = bbox;
float2 _986 = fast::min(_983.xy, p3);
bbox.x = _986.x;
bbox.y = _986.y;
float4 _991 = bbox;
float2 _994 = fast::max(_991.zw, p3);
bbox.z = _994.x;
bbox.w = _994.y;
float4 _982 = bbox;
float2 _985 = fast::min(_982.xy, p3);
bbox.x = _985.x;
bbox.y = _985.y;
float4 _990 = bbox;
float2 _993 = fast::max(_990.zw, p3);
bbox.z = _993.x;
bbox.w = _993.y;
}
else
{
@ -651,17 +653,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false;
bool _1264 = i_4 == 3u;
bool _1270;
if (_1264)
bool _1263 = i_4 == 3u;
bool _1269;
if (_1263)
{
_1270 = gl_LocalInvocationID.x == 255u;
_1269 = gl_LocalInvocationID.x == 255u;
}
else
{
_1270 = _1264;
_1269 = _1263;
}
if (_1270)
if (_1269)
{
do_atomic = true;
}
@ -689,26 +691,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
}
if (do_atomic)
{
bool _1335 = m.bbox.z > m.bbox.x;
bool _1344;
if (!_1335)
bool _1334 = m.bbox.z > m.bbox.x;
bool _1343;
if (!_1334)
{
_1344 = m.bbox.w > m.bbox.y;
_1343 = m.bbox.w > m.bbox.y;
}
else
{
_1344 = _1335;
_1343 = _1334;
}
if (_1344)
if (_1343)
{
float param_29 = m.bbox.x;
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y;
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z;
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w;
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
}
bbox_out_ix += 6u;
}

Binary file not shown.

Binary file not shown.

View file

@ -14,6 +14,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -92,7 +93,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(92) >> uint(2)) + ix;
uint scene_ix = (_139.Load(96) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word;
TagMonoid agg = reduce_tag(param);

View file

@ -21,6 +21,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -78,6 +79,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};

Binary file not shown.

Binary file not shown.

View file

@ -3,12 +3,6 @@ struct Alloc
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathRef
{
uint offset;
@ -27,6 +21,7 @@ struct Path
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -58,9 +53,9 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _70 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0);
ByteAddressBuffer _257 : register(t2, space0);
RWByteAddressBuffer _53 : register(u0, space0);
ByteAddressBuffer _148 : register(t1, space0);
ByteAddressBuffer _232 : register(t2, space0);
static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID;
@ -71,53 +66,38 @@ struct SPIRV_Cross_Input
};
groupshared uint sh_tile_count[256];
groupshared MallocResult sh_tile_alloc;
groupshared uint sh_tile_offset;
bool check_deps(uint dep_stage)
{
uint _60;
_53.InterlockedOr(4, 0u, _60);
return (_60 & dep_stage) == 0u;
}
float4 load_draw_bbox(uint draw_ix)
{
uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix);
float x0 = asfloat(_70.Load(base * 4 + 8));
float y0 = asfloat(_70.Load((base + 1u) * 4 + 8));
float x1 = asfloat(_70.Load((base + 2u) * 4 + 8));
float y1 = asfloat(_70.Load((base + 3u) * 4 + 8));
uint base = (_148.Load(68) >> uint(2)) + (4u * draw_ix);
float x0 = asfloat(_53.Load(base * 4 + 12));
float y0 = asfloat(_53.Load((base + 1u) * 4 + 12));
float x1 = asfloat(_53.Load((base + 2u) * 4 + 12));
float y1 = asfloat(_53.Load((base + 3u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
Alloc new_alloc(uint offset, uint size, bool mem_ok)
uint malloc_stage(uint size, uint mem_size, uint stage)
{
Alloc a;
a.offset = offset;
return a;
}
MallocResult malloc(uint size)
{
uint _76;
_70.InterlockedAdd(0, size, _76);
uint offset = _76;
uint _83;
_70.GetDimensions(_83);
_83 = (_83 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_83) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
uint _70;
_53.InterlockedAdd(0, size, _70);
uint offset = _70;
if ((offset + size) > mem_size)
{
uint _105;
_70.InterlockedMax(4, 1u, _105);
return r;
uint _80;
_53.InterlockedOr(4, stage, _80);
offset = 0u;
}
return r;
}
Alloc slice_mem(Alloc a, uint offset, uint size)
{
Alloc _131 = { a.offset + offset };
return _131;
return offset;
}
bool touch_mem(Alloc alloc, uint offset)
@ -133,7 +113,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_70.Store(offset * 4 + 8, val);
_53.Store(offset * 4 + 12, val);
}
void Path_write(Alloc a, PathRef ref, Path s)
@ -155,15 +135,21 @@ void Path_write(Alloc a, PathRef ref, Path s)
void comp_main()
{
uint param = 1u;
bool _192 = check_deps(param);
if (!_192)
{
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef _241 = { _181.Load(16) + (element_ix * 12u) };
PathRef path_ref = _241;
uint drawtag_base = _181.Load(100) >> uint(2);
PathRef _216 = { _148.Load(20) + (element_ix * 12u) };
PathRef path_ref = _216;
uint drawtag_base = _148.Load(104) >> uint(2);
uint drawtag = 0u;
if (element_ix < _181.Load(0))
if (element_ix < _148.Load(4))
{
drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0);
drawtag = _232.Load((drawtag_base + element_ix) * 4 + 0);
}
int x0 = 0;
int y0 = 0;
@ -171,17 +157,17 @@ void comp_main()
int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u))
{
uint param = element_ix;
float4 bbox = load_draw_bbox(param);
uint param_1 = element_ix;
float4 bbox = load_draw_bbox(param_1);
x0 = int(floor(bbox.x * 0.0625f));
y0 = int(floor(bbox.y * 0.0625f));
x1 = int(ceil(bbox.z * 0.0625f));
y1 = int(ceil(bbox.w * 0.0625f));
}
x0 = clamp(x0, 0, int(_181.Load(8)));
y0 = clamp(y0, 0, int(_181.Load(12)));
x1 = clamp(x1, 0, int(_181.Load(8)));
y1 = clamp(y1, 0, int(_181.Load(12)));
x0 = clamp(x0, 0, int(_148.Load(12)));
y0 = clamp(y0, 0, int(_148.Load(16)));
x1 = clamp(x1, 0, int(_148.Load(12)));
y1 = clamp(y1, 0, int(_148.Load(16)));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
@ -199,59 +185,45 @@ void comp_main()
}
if (th_ix == 255u)
{
uint param_1 = total_tile_count * 8u;
MallocResult _392 = malloc(param_1);
sh_tile_alloc = _392;
uint param_2 = total_tile_count * 8u;
uint param_3 = _148.Load(0);
uint param_4 = 2u;
uint _370 = malloc_stage(param_2, param_3, param_4);
sh_tile_offset = _370;
}
GroupMemoryBarrierWithGroupSync();
MallocResult alloc_start = sh_tile_alloc;
bool _403;
if (!alloc_start.failed)
{
_403 = _70.Load(4) != 0u;
}
else
{
_403 = alloc_start.failed;
}
if (_403)
uint offset_start = sh_tile_offset;
if (offset_start == 0u)
{
return;
}
if (element_ix < _181.Load(0))
if (element_ix < _148.Load(4))
{
uint _416;
uint _387;
if (th_ix > 0u)
{
_416 = sh_tile_count[th_ix - 1u];
_387 = sh_tile_count[th_ix - 1u];
}
else
{
_416 = 0u;
_387 = 0u;
}
uint tile_subix = _416;
Alloc param_2 = alloc_start.alloc;
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
TileRef _438 = { tiles_alloc.offset };
path.tiles = _438;
Alloc _444;
_444.offset = _181.Load(16);
uint tile_subix = _387;
TileRef _400 = { offset_start + (8u * tile_subix) };
path.tiles = _400;
Alloc _406;
_406.offset = _148.Load(20);
Alloc param_5;
param_5.offset = _444.offset;
param_5.offset = _406.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2);
uint start_ix = offset_start >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
Alloc param_8 = alloc_start.alloc;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10);
_53.Store((start_ix + i_1) * 4 + 12, 0u);
}
}

View file

@ -12,12 +12,6 @@ struct Alloc
uint offset;
};
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathRef
{
uint offset;
@ -38,6 +32,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -48,6 +43,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -90,48 +86,35 @@ struct SceneBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181)
bool check_deps(thread const uint& dep_stage, device Memory& v_53)
{
uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
float x0 = as_type<float>(v_70.memory[base]);
float y0 = as_type<float>(v_70.memory[base + 1u]);
float x1 = as_type<float>(v_70.memory[base + 2u]);
float y1 = as_type<float>(v_70.memory[base + 3u]);
uint _60 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, 0u, memory_order_relaxed);
return (_60 & dep_stage) == 0u;
}
static inline __attribute__((always_inline))
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_53, const device ConfigBuf& v_148)
{
uint base = (v_148.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
float x0 = as_type<float>(v_53.memory[base]);
float y0 = as_type<float>(v_53.memory[base + 1u]);
float x1 = as_type<float>(v_53.memory[base + 2u]);
float y1 = as_type<float>(v_53.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1);
return bbox;
}
static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_53)
{
Alloc a;
a.offset = offset;
return a;
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize)
{
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed);
uint offset = _76;
MallocResult r;
r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
uint _70 = atomic_fetch_add_explicit((device atomic_uint*)&v_53.mem_offset, size, memory_order_relaxed);
uint offset = _70;
if ((offset + size) > mem_size)
{
uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed);
return r;
uint _80 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, stage, memory_order_relaxed);
offset = 0u;
}
return r;
}
static inline __attribute__((always_inline))
Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size)
{
return Alloc{ a.offset + offset };
return offset;
}
static inline __attribute__((always_inline))
@ -141,7 +124,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
}
static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize)
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_53)
{
Alloc param = alloc;
uint param_1 = offset;
@ -149,40 +132,45 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{
return;
}
v_70.memory[offset] = val;
v_53.memory[offset] = val;
}
static inline __attribute__((always_inline))
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize)
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_53)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2, v_70, v_70BufferSize);
write_mem(param, param_1, param_2, v_53);
Alloc param_3 = a;
uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5, v_70, v_70BufferSize);
write_mem(param_3, param_4, param_5, v_53);
Alloc param_6 = a;
uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8, v_70, v_70BufferSize);
write_mem(param_6, param_7, param_8, v_53);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
kernel void main0(device Memory& v_53 [[buffer(0)]], const device ConfigBuf& v_148 [[buffer(1)]], const device SceneBuf& _232 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
threadgroup uint sh_tile_count[256];
threadgroup MallocResult sh_tile_alloc;
constant uint& v_70BufferSize = spvBufferSizeConstants[0];
threadgroup uint sh_tile_offset;
uint param = 1u;
bool _192 = check_deps(param, v_53);
if (!_192)
{
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) };
uint drawtag_base = v_181.conf.drawtag_offset >> uint(2);
PathRef path_ref = PathRef{ v_148.conf.tile_alloc.offset + (element_ix * 12u) };
uint drawtag_base = v_148.conf.drawtag_offset >> uint(2);
uint drawtag = 0u;
if (element_ix < v_181.conf.n_elements)
if (element_ix < v_148.conf.n_elements)
{
drawtag = _257.scene[drawtag_base + element_ix];
drawtag = _232.scene[drawtag_base + element_ix];
}
int x0 = 0;
int y0 = 0;
@ -190,17 +178,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u))
{
uint param = element_ix;
float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181);
uint param_1 = element_ix;
float4 bbox = load_draw_bbox(param_1, v_53, v_148);
x0 = int(floor(bbox.x * 0.0625));
y0 = int(floor(bbox.y * 0.0625));
x1 = int(ceil(bbox.z * 0.0625));
y1 = int(ceil(bbox.w * 0.0625));
}
x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles));
y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles));
x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles));
y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles));
x0 = clamp(x0, 0, int(v_148.conf.width_in_tiles));
y0 = clamp(y0, 0, int(v_148.conf.height_in_tiles));
x1 = clamp(x1, 0, int(v_148.conf.width_in_tiles));
y1 = clamp(y1, 0, int(v_148.conf.height_in_tiles));
Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0));
@ -218,56 +206,42 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
}
if (th_ix == 255u)
{
uint param_1 = total_tile_count * 8u;
MallocResult _392 = malloc(param_1, v_70, v_70BufferSize);
sh_tile_alloc = _392;
uint param_2 = total_tile_count * 8u;
uint param_3 = v_148.conf.mem_size;
uint param_4 = 2u;
uint _370 = malloc_stage(param_2, param_3, param_4, v_53);
sh_tile_offset = _370;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
MallocResult alloc_start = sh_tile_alloc;
bool _403;
if (!alloc_start.failed)
{
_403 = v_70.mem_error != 0u;
}
else
{
_403 = alloc_start.failed;
}
if (_403)
uint offset_start = sh_tile_offset;
if (offset_start == 0u)
{
return;
}
if (element_ix < v_181.conf.n_elements)
if (element_ix < v_148.conf.n_elements)
{
uint _416;
uint _387;
if (th_ix > 0u)
{
_416 = sh_tile_count[th_ix - 1u];
_387 = sh_tile_count[th_ix - 1u];
}
else
{
_416 = 0u;
_387 = 0u;
}
uint tile_subix = _416;
Alloc param_2 = alloc_start.alloc;
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
path.tiles = TileRef{ tiles_alloc.offset };
uint tile_subix = _387;
path.tiles = TileRef{ offset_start + (8u * tile_subix) };
Alloc param_5;
param_5.offset = v_181.conf.tile_alloc.offset;
param_5.offset = v_148.conf.tile_alloc.offset;
PathRef param_6 = path_ref;
Path param_7 = path;
Path_write(param_5, param_6, param_7, v_70, v_70BufferSize);
Path_write(param_5, param_6, param_7, v_53);
}
uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2);
uint start_ix = offset_start >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{
Alloc param_8 = alloc_start.alloc;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10, v_70, v_70BufferSize);
v_53.memory[start_ix + i_1] = 0u;
}
}

Binary file not shown.

Binary file not shown.

View file

@ -27,6 +27,7 @@ struct TransformSeg
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -58,12 +59,12 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
static const Transform _225 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
RWByteAddressBuffer _71 : register(u0, space0);
ByteAddressBuffer _96 : register(t2, space0);
ByteAddressBuffer _278 : register(t1, space0);
ByteAddressBuffer _376 : register(t3, space0);
ByteAddressBuffer _97 : register(t2, space0);
ByteAddressBuffer _279 : register(t1, space0);
ByteAddressBuffer _377 : register(t3, space0);
static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID;
@ -80,12 +81,12 @@ groupshared Transform sh_scratch[256];
Transform Transform_read(TransformRef ref)
{
uint ix = ref.offset >> uint(2);
uint raw0 = _96.Load((ix + 0u) * 4 + 0);
uint raw1 = _96.Load((ix + 1u) * 4 + 0);
uint raw2 = _96.Load((ix + 2u) * 4 + 0);
uint raw3 = _96.Load((ix + 3u) * 4 + 0);
uint raw4 = _96.Load((ix + 4u) * 4 + 0);
uint raw5 = _96.Load((ix + 5u) * 4 + 0);
uint raw0 = _97.Load((ix + 0u) * 4 + 0);
uint raw1 = _97.Load((ix + 1u) * 4 + 0);
uint raw2 = _97.Load((ix + 2u) * 4 + 0);
uint raw3 = _97.Load((ix + 3u) * 4 + 0);
uint raw4 = _97.Load((ix + 4u) * 4 + 0);
uint raw5 = _97.Load((ix + 5u) * 4 + 0);
Transform s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5));
@ -108,7 +109,7 @@ Transform combine_monoid(Transform a, Transform b)
Transform monoid_identity()
{
return _224;
return _225;
}
bool touch_mem(Alloc alloc, uint offset)
@ -124,7 +125,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{
return;
}
_71.Store(offset * 4 + 8, val);
_71.Store(offset * 4 + 12, val);
}
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
@ -159,8 +160,8 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(84) + (ix * 24u) };
TransformRef ref = _285;
TransformRef _286 = { _279.Load(88) + (ix * 24u) };
TransformRef ref = _286;
TransformRef param = ref;
Transform agg = Transform_read(param);
Transform local[8];
@ -193,11 +194,11 @@ void comp_main()
Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
Transform _382;
_382.mat = asfloat(_376.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
_382.translate = asfloat(_376.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
row.mat = _382.mat;
row.translate = _382.translate;
Transform _383;
_383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
_383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
row.mat = _383.mat;
row.translate = _383.translate;
}
if (gl_LocalInvocationID.x > 0u)
{
@ -211,13 +212,13 @@ void comp_main()
Transform param_10 = row;
Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11);
TransformSeg _422 = { m.mat, m.translate };
TransformSeg transform = _422;
TransformSegRef _432 = { _278.Load(36) + ((ix + i_2) * 24u) };
TransformSegRef trans_ref = _432;
Alloc _436;
_436.offset = _278.Load(36);
param_12.offset = _436.offset;
TransformSeg _423 = { m.mat, m.translate };
TransformSeg transform = _423;
TransformSegRef _433 = { _279.Load(40) + ((ix + i_2) * 24u) };
TransformSegRef trans_ref = _433;
Alloc _437;
_437.offset = _279.Load(40);
param_12.offset = _437.offset;
TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14);

View file

@ -75,6 +75,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};
@ -90,6 +91,7 @@ struct Alloc_1
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -139,15 +141,15 @@ struct ParentBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96)
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_97)
{
uint ix = ref.offset >> uint(2);
uint raw0 = v_96.scene[ix + 0u];
uint raw1 = v_96.scene[ix + 1u];
uint raw2 = v_96.scene[ix + 2u];
uint raw3 = v_96.scene[ix + 3u];
uint raw4 = v_96.scene[ix + 4u];
uint raw5 = v_96.scene[ix + 5u];
uint raw0 = v_97.scene[ix + 0u];
uint raw1 = v_97.scene[ix + 1u];
uint raw2 = v_97.scene[ix + 2u];
uint raw3 = v_97.scene[ix + 3u];
uint raw4 = v_97.scene[ix + 4u];
uint raw5 = v_97.scene[ix + 5u];
Transform s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
@ -223,13 +225,13 @@ void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref
write_mem(param_15, param_16, param_17, v_71);
}
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _376 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _279 [[buffer(1)]], const device SceneBuf& v_97 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{
threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) };
TransformRef ref = TransformRef{ _279.conf.trans_offset + (ix * 24u) };
TransformRef param = ref;
Transform agg = Transform_read(param, v_96);
Transform agg = Transform_read(param, v_97);
spvUnsafeArray<Transform, 8> local;
local[0] = agg;
for (uint i = 1u; i < 8u; i++)
@ -238,7 +240,7 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg;
Transform param_5 = Transform_read(param_3, v_96);
Transform param_5 = Transform_read(param_3, v_97);
agg = combine_monoid(param_4, param_5);
local[i] = agg;
}
@ -260,9 +262,9 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u)
{
uint _379 = gl_WorkGroupID.x - 1u;
row.mat = _376.parent[_379].mat;
row.translate = _376.parent[_379].translate;
uint _380 = gl_WorkGroupID.x - 1u;
row.mat = _377.parent[_380].mat;
row.translate = _377.parent[_380].translate;
}
if (gl_LocalInvocationID.x > 0u)
{
@ -277,8 +279,8 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11);
TransformSeg transform = TransformSeg{ m.mat, m.translate };
TransformSegRef trans_ref = TransformSegRef{ _278.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
param_12.offset = _278.conf.trans_alloc.offset;
TransformSegRef trans_ref = TransformSegRef{ _279.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
param_12.offset = _279.conf.trans_alloc.offset;
TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14, v_71);

Binary file not shown.

Binary file not shown.

View file

@ -16,6 +16,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -96,7 +97,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main()
{
uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(84) + (ix * 24u) };
TransformRef _168 = { _161.Load(88) + (ix * 24u) };
TransformRef ref = _168;
TransformRef param = ref;
Transform agg = Transform_read(param);

View file

@ -28,6 +28,7 @@ struct Alloc
struct Config
{
uint mem_size;
uint n_elements;
uint n_pathseg;
uint width_in_tiles;
@ -78,6 +79,7 @@ struct Memory
{
uint mem_offset;
uint mem_error;
uint blend_offset;
uint memory[1];
};

Binary file not shown.