commit compiled shaders

This commit is contained in:
Commit by GitHub Action 2022-07-14 14:28:45 +00:00
parent d529d3b0e8
commit bbdd4432f5
68 changed files with 1994 additions and 2094 deletions

Binary file not shown.

View file

@ -21,6 +21,7 @@ struct Path
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -52,8 +53,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _67 : register(u0, space0); RWByteAddressBuffer _59 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0); ByteAddressBuffer _181 : register(t1, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -69,6 +70,13 @@ groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256]; groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256]; groupshared uint sh_row_count[256];
bool check_deps(uint dep_stage)
{
uint _65;
_59.InterlockedOr(4, 0u, _65);
return (_65 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
{ {
return true; return true;
@ -82,7 +90,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _67.Load(offset * 4 + 8); uint v = _59.Load(offset * 4 + 12);
return v; return v;
} }
@ -100,8 +108,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _134 = { raw2 }; TileRef _146 = { raw2 };
s.tiles = _134; s.tiles = _146;
return s; return s;
} }
@ -120,47 +128,52 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_67.Store(offset * 4 + 8, val); _59.Store(offset * 4 + 12, val);
} }
void comp_main() void comp_main()
{ {
uint param = 7u;
bool _154 = check_deps(param);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _166.Load(0)) if (element_ix < _181.Load(4))
{ {
PathRef _180 = { _166.Load(16) + (element_ix * 12u) }; PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
PathRef path_ref = _180; PathRef path_ref = _195;
Alloc _185; Alloc _200;
_185.offset = _166.Load(16); _200.offset = _181.Load(20);
Alloc param; Alloc param_1;
param.offset = _185.offset; param_1.offset = _200.offset;
PathRef param_1 = path_ref; PathRef param_2 = path_ref;
Path path = Path_read(param, param_1); Path path = Path_read(param_1, param_2);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u; bool _225 = row_count == 1u;
bool _216; bool _231;
if (_210) if (_225)
{ {
_216 = path.bbox.y > 0u; _231 = path.bbox.y > 0u;
} }
else else
{ {
_216 = _210; _231 = _225;
} }
if (_216) if (_231)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_2 = path.tiles.offset; uint param_3 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok; bool param_5 = true;
Alloc path_alloc = new_alloc(param_2, param_3, param_4); Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
@ -168,17 +181,17 @@ void comp_main()
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _262 = gl_LocalInvocationID.y == 0u; bool _276 = gl_LocalInvocationID.y == 0u;
bool _269; bool _283;
if (_262) if (_276)
{ {
_269 = th_ix >= (1u << i); _283 = th_ix >= (1u << i);
} }
else else
{ {
_269 = _262; _283 = _276;
} }
if (_269) if (_283)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -190,7 +203,7 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _348; uint _360;
for (uint row = th_ix; row < total_rows; row += 256u) for (uint row = th_ix; row < total_rows; row += 256u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -203,32 +216,32 @@ void comp_main()
} }
} }
uint width = sh_row_width[el_ix]; uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok) if (width > 0u)
{ {
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_348 = sh_row_count[el_ix - 1u]; _360 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_348 = 0u; _360 = 0u;
} }
uint seq_ix = row - _348; uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc; Alloc param_6 = tiles_alloc;
uint param_6 = tile_el_ix; uint param_7 = tile_el_ix;
uint sum = read_mem(param_5, param_6); uint sum = read_mem(param_6, param_7);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_7 = tiles_alloc; Alloc param_8 = tiles_alloc;
uint param_8 = tile_el_ix; uint param_9 = tile_el_ix;
sum += read_mem(param_7, param_8); sum += read_mem(param_8, param_9);
Alloc param_9 = tiles_alloc; Alloc param_10 = tiles_alloc;
uint param_10 = tile_el_ix; uint param_11 = tile_el_ix;
uint param_11 = sum; uint param_12 = sum;
write_mem(param_9, param_10, param_11); write_mem(param_10, param_11, param_12);
} }
} }
} }

View file

@ -1,7 +1,9 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib> #include <metal_stdlib>
#include <simd/simd.h> #include <simd/simd.h>
#include <metal_atomic>
using namespace metal; using namespace metal;
@ -30,6 +32,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -40,6 +43,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -76,6 +80,13 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
{
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
return (_65 & dep_stage) == 0u;
}
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset) bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{ {
@ -83,7 +94,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -91,23 +102,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{ {
return 0u; return 0u;
} }
uint v = v_67.memory[offset]; uint v = v_59.memory[offset];
return v; return v;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67) Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_67); uint raw0 = read_mem(param, param_1, v_59);
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_67); uint raw1 = read_mem(param_2, param_3, v_59);
Alloc param_4 = a; Alloc param_4 = a;
uint param_5 = ix + 2u; uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_67); uint raw2 = read_mem(param_4, param_5, v_59);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 }; s.tiles = TileRef{ raw2 };
@ -123,7 +134,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -131,47 +142,52 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_67.memory[offset] = val; v_59.memory[offset] = val;
} }
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint sh_row_width[256]; threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256]; threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256]; threadgroup uint sh_row_count[256];
uint param = 7u;
bool _154 = check_deps(param, v_59);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _166.conf.n_elements) if (element_ix < _181.conf.n_elements)
{ {
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) }; PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param; Alloc param_1;
param.offset = _166.conf.tile_alloc.offset; param_1.offset = _181.conf.tile_alloc.offset;
PathRef param_1 = path_ref; PathRef param_2 = path_ref;
Path path = Path_read(param, param_1, v_67); Path path = Path_read(param_1, param_2, v_59);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u; bool _225 = row_count == 1u;
bool _216; bool _231;
if (_210) if (_225)
{ {
_216 = path.bbox.y > 0u; _231 = path.bbox.y > 0u;
} }
else else
{ {
_216 = _210; _231 = _225;
} }
if (_216) if (_231)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_2 = path.tiles.offset; uint param_3 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok; bool param_5 = true;
Alloc path_alloc = new_alloc(param_2, param_3, param_4); Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
@ -179,17 +195,17 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _262 = gl_LocalInvocationID.y == 0u; bool _276 = gl_LocalInvocationID.y == 0u;
bool _269; bool _283;
if (_262) if (_276)
{ {
_269 = th_ix >= (1u << i); _283 = th_ix >= (1u << i);
} }
else else
{ {
_269 = _262; _283 = _276;
} }
if (_269) if (_283)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -201,7 +217,7 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _348; uint _360;
for (uint row = th_ix; row < total_rows; row += 256u) for (uint row = th_ix; row < total_rows; row += 256u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -214,32 +230,32 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
} }
} }
uint width = sh_row_width[el_ix]; uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok) if (width > 0u)
{ {
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_348 = sh_row_count[el_ix - 1u]; _360 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_348 = 0u; _360 = 0u;
} }
uint seq_ix = row - _348; uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc; Alloc param_6 = tiles_alloc;
uint param_6 = tile_el_ix; uint param_7 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67); uint sum = read_mem(param_6, param_7, v_59);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_7 = tiles_alloc; Alloc param_8 = tiles_alloc;
uint param_8 = tile_el_ix; uint param_9 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67); sum += read_mem(param_8, param_9, v_59);
Alloc param_9 = tiles_alloc; Alloc param_10 = tiles_alloc;
uint param_10 = tile_el_ix; uint param_11 = tile_el_ix;
uint param_11 = sum; uint param_12 = sum;
write_mem(param_9, param_10, param_11, v_67); write_mem(param_10, param_11, param_12, v_59);
} }
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -21,6 +21,7 @@ struct Path
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -52,8 +53,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
RWByteAddressBuffer _67 : register(u0, space0); RWByteAddressBuffer _59 : register(u0, space0);
ByteAddressBuffer _166 : register(t1, space0); ByteAddressBuffer _181 : register(t1, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -69,6 +70,13 @@ groupshared uint sh_row_width[256];
groupshared Alloc sh_row_alloc[256]; groupshared Alloc sh_row_alloc[256];
groupshared uint sh_row_count[256]; groupshared uint sh_row_count[256];
bool check_deps(uint dep_stage)
{
uint _65;
_59.InterlockedOr(4, 0u, _65);
return (_65 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
{ {
return true; return true;
@ -82,7 +90,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _67.Load(offset * 4 + 8); uint v = _59.Load(offset * 4 + 12);
return v; return v;
} }
@ -100,8 +108,8 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _134 = { raw2 }; TileRef _146 = { raw2 };
s.tiles = _134; s.tiles = _146;
return s; return s;
} }
@ -120,47 +128,52 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_67.Store(offset * 4 + 8, val); _59.Store(offset * 4 + 12, val);
} }
void comp_main() void comp_main()
{ {
uint param = 7u;
bool _154 = check_deps(param);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = _67.Load(4) == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _166.Load(0)) if (element_ix < _181.Load(4))
{ {
PathRef _180 = { _166.Load(16) + (element_ix * 12u) }; PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
PathRef path_ref = _180; PathRef path_ref = _195;
Alloc _185; Alloc _200;
_185.offset = _166.Load(16); _200.offset = _181.Load(20);
Alloc param; Alloc param_1;
param.offset = _185.offset; param_1.offset = _200.offset;
PathRef param_1 = path_ref; PathRef param_2 = path_ref;
Path path = Path_read(param, param_1); Path path = Path_read(param_1, param_2);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u; bool _225 = row_count == 1u;
bool _216; bool _231;
if (_210) if (_225)
{ {
_216 = path.bbox.y > 0u; _231 = path.bbox.y > 0u;
} }
else else
{ {
_216 = _210; _231 = _225;
} }
if (_216) if (_231)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_2 = path.tiles.offset; uint param_3 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok; bool param_5 = true;
Alloc path_alloc = new_alloc(param_2, param_3, param_4); Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
@ -168,17 +181,17 @@ void comp_main()
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _262 = gl_LocalInvocationID.y == 0u; bool _276 = gl_LocalInvocationID.y == 0u;
bool _269; bool _283;
if (_262) if (_276)
{ {
_269 = th_ix >= (1u << i); _283 = th_ix >= (1u << i);
} }
else else
{ {
_269 = _262; _283 = _276;
} }
if (_269) if (_283)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -190,7 +203,7 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _348; uint _360;
for (uint row = th_ix; row < total_rows; row += 1024u) for (uint row = th_ix; row < total_rows; row += 1024u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -203,32 +216,32 @@ void comp_main()
} }
} }
uint width = sh_row_width[el_ix]; uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok) if (width > 0u)
{ {
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_348 = sh_row_count[el_ix - 1u]; _360 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_348 = 0u; _360 = 0u;
} }
uint seq_ix = row - _348; uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc; Alloc param_6 = tiles_alloc;
uint param_6 = tile_el_ix; uint param_7 = tile_el_ix;
uint sum = read_mem(param_5, param_6); uint sum = read_mem(param_6, param_7);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_7 = tiles_alloc; Alloc param_8 = tiles_alloc;
uint param_8 = tile_el_ix; uint param_9 = tile_el_ix;
sum += read_mem(param_7, param_8); sum += read_mem(param_8, param_9);
Alloc param_9 = tiles_alloc; Alloc param_10 = tiles_alloc;
uint param_10 = tile_el_ix; uint param_11 = tile_el_ix;
uint param_11 = sum; uint param_12 = sum;
write_mem(param_9, param_10, param_11); write_mem(param_10, param_11, param_12);
} }
} }
} }

View file

@ -1,7 +1,9 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes" #pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib> #include <metal_stdlib>
#include <simd/simd.h> #include <simd/simd.h>
#include <metal_atomic>
using namespace metal; using namespace metal;
@ -30,6 +32,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -40,6 +43,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -76,6 +80,13 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
{
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
return (_65 & dep_stage) == 0u;
}
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset) bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{ {
@ -83,7 +94,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -91,23 +102,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{ {
return 0u; return 0u;
} }
uint v = v_67.memory[offset]; uint v = v_59.memory[offset];
return v; return v;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67) Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_67); uint raw0 = read_mem(param, param_1, v_59);
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_67); uint raw1 = read_mem(param_2, param_3, v_59);
Alloc param_4 = a; Alloc param_4 = a;
uint param_5 = ix + 2u; uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_67); uint raw2 = read_mem(param_4, param_5, v_59);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 }; s.tiles = TileRef{ raw2 };
@ -123,7 +134,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -131,47 +142,52 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_67.memory[offset] = val; v_59.memory[offset] = val;
} }
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint sh_row_width[256]; threadgroup uint sh_row_width[256];
threadgroup Alloc sh_row_alloc[256]; threadgroup Alloc sh_row_alloc[256];
threadgroup uint sh_row_count[256]; threadgroup uint sh_row_count[256];
uint param = 7u;
bool _154 = check_deps(param, v_59);
if (!_154)
{
return;
}
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
uint row_count = 0u; uint row_count = 0u;
bool mem_ok = v_67.mem_error == 0u;
if (gl_LocalInvocationID.y == 0u) if (gl_LocalInvocationID.y == 0u)
{ {
if (element_ix < _166.conf.n_elements) if (element_ix < _181.conf.n_elements)
{ {
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) }; PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
Alloc param; Alloc param_1;
param.offset = _166.conf.tile_alloc.offset; param_1.offset = _181.conf.tile_alloc.offset;
PathRef param_1 = path_ref; PathRef param_2 = path_ref;
Path path = Path_read(param, param_1, v_67); Path path = Path_read(param_1, param_2, v_59);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
bool _210 = row_count == 1u; bool _225 = row_count == 1u;
bool _216; bool _231;
if (_210) if (_225)
{ {
_216 = path.bbox.y > 0u; _231 = path.bbox.y > 0u;
} }
else else
{ {
_216 = _210; _231 = _225;
} }
if (_216) if (_231)
{ {
row_count = 0u; row_count = 0u;
} }
uint param_2 = path.tiles.offset; uint param_3 = path.tiles.offset;
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_4 = mem_ok; bool param_5 = true;
Alloc path_alloc = new_alloc(param_2, param_3, param_4); Alloc path_alloc = new_alloc(param_3, param_4, param_5);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
@ -179,17 +195,17 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _262 = gl_LocalInvocationID.y == 0u; bool _276 = gl_LocalInvocationID.y == 0u;
bool _269; bool _283;
if (_262) if (_276)
{ {
_269 = th_ix >= (1u << i); _283 = th_ix >= (1u << i);
} }
else else
{ {
_269 = _262; _283 = _276;
} }
if (_269) if (_283)
{ {
row_count += sh_row_count[th_ix - (1u << i)]; row_count += sh_row_count[th_ix - (1u << i)];
} }
@ -201,7 +217,7 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint total_rows = sh_row_count[255]; uint total_rows = sh_row_count[255];
uint _348; uint _360;
for (uint row = th_ix; row < total_rows; row += 1024u) for (uint row = th_ix; row < total_rows; row += 1024u)
{ {
uint el_ix = 0u; uint el_ix = 0u;
@ -214,32 +230,32 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
} }
} }
uint width = sh_row_width[el_ix]; uint width = sh_row_width[el_ix];
if ((width > 0u) && mem_ok) if (width > 0u)
{ {
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];
if (el_ix > 0u) if (el_ix > 0u)
{ {
_348 = sh_row_count[el_ix - 1u]; _360 = sh_row_count[el_ix - 1u];
} }
else else
{ {
_348 = 0u; _360 = 0u;
} }
uint seq_ix = row - _348; uint seq_ix = row - _360;
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
Alloc param_5 = tiles_alloc; Alloc param_6 = tiles_alloc;
uint param_6 = tile_el_ix; uint param_7 = tile_el_ix;
uint sum = read_mem(param_5, param_6, v_67); uint sum = read_mem(param_6, param_7, v_59);
for (uint x = 1u; x < width; x++) for (uint x = 1u; x < width; x++)
{ {
tile_el_ix += 2u; tile_el_ix += 2u;
Alloc param_7 = tiles_alloc; Alloc param_8 = tiles_alloc;
uint param_8 = tile_el_ix; uint param_9 = tile_el_ix;
sum += read_mem(param_7, param_8, v_67); sum += read_mem(param_8, param_9, v_59);
Alloc param_9 = tiles_alloc; Alloc param_10 = tiles_alloc;
uint param_10 = tile_el_ix; uint param_11 = tile_el_ix;
uint param_11 = sum; uint param_12 = sum;
write_mem(param_9, param_10, param_11, v_67); write_mem(param_10, param_11, param_12, v_59);
} }
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -5,6 +5,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -48,13 +49,13 @@ struct SPIRV_Cross_Input
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x; uint ix = gl_GlobalInvocationID.x;
if (ix < _21.Load(76)) if (ix < _21.Load(80))
{ {
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix); uint out_ix = (_21.Load(44) >> uint(2)) + (6u * ix);
_45.Store(out_ix * 4 + 8, 65535u); _45.Store(out_ix * 4 + 12, 65535u);
_45.Store((out_ix + 1u) * 4 + 8, 65535u); _45.Store((out_ix + 1u) * 4 + 12, 65535u);
_45.Store((out_ix + 2u) * 4 + 8, 0u); _45.Store((out_ix + 2u) * 4 + 12, 0u);
_45.Store((out_ix + 3u) * 4 + 8, 0u); _45.Store((out_ix + 3u) * 4 + 12, 0u);
} }
} }

View file

@ -10,6 +10,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -48,6 +49,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };

Binary file not shown.

Binary file not shown.

View file

@ -3,22 +3,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct DrawMonoid struct DrawMonoid
{ {
uint path_ix; uint path_ix;
@ -29,6 +13,7 @@ struct DrawMonoid
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -60,8 +45,8 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _81 : register(u0, space0); RWByteAddressBuffer _57 : register(u0, space0);
ByteAddressBuffer _156 : register(t1, space0); ByteAddressBuffer _101 : register(t1, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -72,39 +57,38 @@ struct SPIRV_Cross_Input
}; };
groupshared uint bitmaps[8][256]; groupshared uint bitmaps[8][256];
groupshared bool sh_alloc_failed;
groupshared uint count[8][256]; groupshared uint count[8][256];
groupshared Alloc sh_chunk_alloc[256]; groupshared uint sh_chunk_offset[256];
DrawMonoid load_draw_monoid(uint element_ix) DrawMonoid load_draw_monoid(uint element_ix)
{ {
uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix); uint base = (_101.Load(48) >> uint(2)) + (4u * element_ix);
uint path_ix = _81.Load(base * 4 + 8); uint path_ix = _57.Load(base * 4 + 12);
uint clip_ix = _81.Load((base + 1u) * 4 + 8); uint clip_ix = _57.Load((base + 1u) * 4 + 12);
uint scene_offset = _81.Load((base + 2u) * 4 + 8); uint scene_offset = _57.Load((base + 2u) * 4 + 12);
uint info_offset = _81.Load((base + 3u) * 4 + 8); uint info_offset = _57.Load((base + 3u) * 4 + 12);
DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset }; DrawMonoid _136 = { path_ix, clip_ix, scene_offset, info_offset };
return _190; return _136;
} }
float4 load_clip_bbox(uint clip_ix) float4 load_clip_bbox(uint clip_ix)
{ {
uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix); uint base = (_101.Load(64) >> uint(2)) + (4u * clip_ix);
float x0 = asfloat(_81.Load(base * 4 + 8)); float x0 = asfloat(_57.Load(base * 4 + 12));
float y0 = asfloat(_81.Load((base + 1u) * 4 + 8)); float y0 = asfloat(_57.Load((base + 1u) * 4 + 12));
float x1 = asfloat(_81.Load((base + 2u) * 4 + 8)); float x1 = asfloat(_57.Load((base + 2u) * 4 + 12));
float y1 = asfloat(_81.Load((base + 3u) * 4 + 8)); float y1 = asfloat(_57.Load((base + 3u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
return bbox; return bbox;
} }
float4 load_path_bbox(uint path_ix) float4 load_path_bbox(uint path_ix)
{ {
uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix); uint base = (_101.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f; float bbox_l = float(_57.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_57.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_57.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_57.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox; return bbox;
} }
@ -116,41 +100,25 @@ float4 bbox_intersect(float4 a, float4 b)
void store_draw_bbox(uint draw_ix, float4 bbox) void store_draw_bbox(uint draw_ix, float4 bbox)
{ {
uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix); uint base = (_101.Load(68) >> uint(2)) + (4u * draw_ix);
_81.Store(base * 4 + 8, asuint(bbox.x)); _57.Store(base * 4 + 12, asuint(bbox.x));
_81.Store((base + 1u) * 4 + 8, asuint(bbox.y)); _57.Store((base + 1u) * 4 + 12, asuint(bbox.y));
_81.Store((base + 2u) * 4 + 8, asuint(bbox.z)); _57.Store((base + 2u) * 4 + 12, asuint(bbox.z));
_81.Store((base + 3u) * 4 + 8, asuint(bbox.w)); _57.Store((base + 3u) * 4 + 12, asuint(bbox.w));
} }
Alloc new_alloc(uint offset, uint size, bool mem_ok) uint malloc_stage(uint size, uint mem_size, uint stage)
{ {
Alloc a; uint _65;
a.offset = offset; _57.InterlockedAdd(0, size, _65);
return a; uint offset = _65;
} if ((offset + size) > mem_size)
MallocResult malloc(uint size)
{
uint _87;
_81.InterlockedAdd(0, size, _87);
uint offset = _87;
uint _94;
_81.GetDimensions(_94);
_94 = (_94 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_94) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{ {
uint _116; uint _76;
_81.InterlockedMax(4, 1u, _116); _57.InterlockedOr(4, stage, _76);
return r; offset = 0u;
} }
return r; return offset;
} }
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
@ -166,16 +134,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_81.Store(offset * 4 + 8, val); _57.Store(offset * 4 + 12, val);
}
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2);
} }
void comp_main() void comp_main()
@ -185,17 +144,12 @@ void comp_main()
{ {
bitmaps[i][gl_LocalInvocationID.x] = 0u; bitmaps[i][gl_LocalInvocationID.x] = 0u;
} }
if (gl_LocalInvocationID.x == 0u)
{
sh_alloc_failed = false;
}
GroupMemoryBarrierWithGroupSync();
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
int x1 = 0; int x1 = 0;
int y1 = 0; int y1 = 0;
if (element_ix < _156.Load(0)) if (element_ix < _101.Load(4))
{ {
uint param = element_ix; uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param); DrawMonoid draw_monoid = load_draw_monoid(param);
@ -212,11 +166,11 @@ void comp_main()
float4 param_3 = path_bbox; float4 param_3 = path_bbox;
float4 param_4 = clip_bbox; float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4); float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox; float4 _354 = bbox;
float4 _419 = bbox; float4 _356 = bbox;
float2 _421 = max(_417.xy, _419.zw); float2 _358 = max(_354.xy, _356.zw);
bbox.z = _421.x; bbox.z = _358.x;
bbox.w = _421.y; bbox.w = _358.y;
uint param_5 = element_ix; uint param_5 = element_ix;
float4 param_6 = bbox; float4 param_6 = bbox;
store_draw_bbox(param_5, param_6); store_draw_bbox(param_5, param_6);
@ -225,8 +179,8 @@ void comp_main()
x1 = int(ceil(bbox.z * 0.00390625f)); x1 = int(ceil(bbox.z * 0.00390625f));
y1 = int(ceil(bbox.w * 0.00390625f)); y1 = int(ceil(bbox.w * 0.00390625f));
} }
uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u; uint width_in_bins = ((_101.Load(12) + 16u) - 1u) / 16u;
uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u; uint height_in_bins = ((_101.Load(16) + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins)); x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins)); y0 = clamp(y0, 0, int(height_in_bins));
@ -241,8 +195,8 @@ void comp_main()
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1) while (y < y1)
{ {
uint _523; uint _460;
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523); InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _460);
x++; x++;
if (x == x1) if (x == x1)
{ {
@ -257,51 +211,32 @@ void comp_main()
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count; count[i_1][gl_LocalInvocationID.x] = element_count;
} }
uint param_7 = 0u; uint chunk_offset = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u) if (element_count != 0u)
{ {
uint param_10 = element_count * 4u; uint param_7 = element_count * 4u;
MallocResult _573 = malloc(param_10); uint param_8 = _101.Load(0);
MallocResult chunk = _573; uint param_9 = 1u;
chunk_alloc = chunk.alloc; uint _510 = malloc_stage(param_7, param_8, param_9);
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; chunk_offset = _510;
if (chunk.failed) sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
{
sh_alloc_failed = true;
}
} }
uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); uint out_ix = (_101.Load(24) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc _603; Alloc _532;
_603.offset = _156.Load(20); _532.offset = _101.Load(24);
Alloc param_11; Alloc param_10;
param_11.offset = _603.offset; param_10.offset = _532.offset;
uint param_12 = out_ix; uint param_11 = out_ix;
uint param_13 = element_count; uint param_12 = element_count;
write_mem(param_11, param_12, param_13); write_mem(param_10, param_11, param_12);
Alloc _615; Alloc _544;
_615.offset = _156.Load(20); _544.offset = _101.Load(24);
Alloc param_14; Alloc param_13;
param_14.offset = _615.offset; param_13.offset = _544.offset;
uint param_15 = out_ix + 1u; uint param_14 = out_ix + 1u;
uint param_16 = chunk_alloc.offset; uint param_15 = chunk_offset;
write_mem(param_14, param_15, param_16); write_mem(param_13, param_14, param_15);
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bool _630;
if (!sh_alloc_failed)
{
_630 = _81.Load(4) != 0u;
}
else
{
_630 = sh_alloc_failed;
}
if (_630)
{
return;
}
x = x0; x = x0;
y = y0; y = y0;
while (y < y1) while (y < y1)
@ -315,14 +250,11 @@ void comp_main()
{ {
idx += count[my_slice - 1u][bin_ix]; idx += count[my_slice - 1u][bin_ix];
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint chunk_offset_1 = sh_chunk_offset[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u); if (chunk_offset_1 != 0u)
BinInstanceRef _692 = { out_offset }; {
BinInstance _694 = { element_ix }; _57.Store(((chunk_offset_1 >> uint(2)) + idx) * 4 + 12, element_ix);
Alloc param_17 = out_alloc; }
BinInstanceRef param_18 = _692;
BinInstance param_19 = _694;
BinInstance_write(param_17, param_18, param_19);
} }
x++; x++;
if (x == x1) if (x == x1)

View file

@ -12,22 +12,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct BinInstanceRef
{
uint offset;
};
struct BinInstance
{
uint element_ix;
};
struct DrawMonoid struct DrawMonoid
{ {
uint path_ix; uint path_ix;
@ -40,6 +24,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -50,6 +35,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -87,36 +73,36 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_57, const device ConfigBuf& v_101)
{ {
uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix); uint base = (v_101.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
uint path_ix = v_81.memory[base]; uint path_ix = v_57.memory[base];
uint clip_ix = v_81.memory[base + 1u]; uint clip_ix = v_57.memory[base + 1u];
uint scene_offset = v_81.memory[base + 2u]; uint scene_offset = v_57.memory[base + 2u];
uint info_offset = v_81.memory[base + 3u]; uint info_offset = v_57.memory[base + 3u];
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset }; return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_57, const device ConfigBuf& v_101)
{ {
uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix); uint base = (v_101.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
float x0 = as_type<float>(v_81.memory[base]); float x0 = as_type<float>(v_57.memory[base]);
float y0 = as_type<float>(v_81.memory[base + 1u]); float y0 = as_type<float>(v_57.memory[base + 1u]);
float x1 = as_type<float>(v_81.memory[base + 2u]); float x1 = as_type<float>(v_57.memory[base + 2u]);
float y1 = as_type<float>(v_81.memory[base + 3u]); float y1 = as_type<float>(v_57.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
return bbox; return bbox;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) float4 load_path_bbox(thread const uint& path_ix, device Memory& v_57, const device ConfigBuf& v_101)
{ {
uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); uint base = (v_101.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
float bbox_l = float(v_81.memory[base]) - 32768.0; float bbox_l = float(v_57.memory[base]) - 32768.0;
float bbox_t = float(v_81.memory[base + 1u]) - 32768.0; float bbox_t = float(v_57.memory[base + 1u]) - 32768.0;
float bbox_r = float(v_81.memory[base + 2u]) - 32768.0; float bbox_r = float(v_57.memory[base + 2u]) - 32768.0;
float bbox_b = float(v_81.memory[base + 3u]) - 32768.0; float bbox_b = float(v_57.memory[base + 3u]) - 32768.0;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox; return bbox;
} }
@ -128,40 +114,26 @@ float4 bbox_intersect(thread const float4& a, thread const float4& b)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156) void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_57, const device ConfigBuf& v_101)
{ {
uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); uint base = (v_101.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
v_81.memory[base] = as_type<uint>(bbox.x); v_57.memory[base] = as_type<uint>(bbox.x);
v_81.memory[base + 1u] = as_type<uint>(bbox.y); v_57.memory[base + 1u] = as_type<uint>(bbox.y);
v_81.memory[base + 2u] = as_type<uint>(bbox.z); v_57.memory[base + 2u] = as_type<uint>(bbox.z);
v_81.memory[base + 3u] = as_type<uint>(bbox.w); v_57.memory[base + 3u] = as_type<uint>(bbox.w);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_57)
{ {
Alloc a; uint _65 = atomic_fetch_add_explicit((device atomic_uint*)&v_57.mem_offset, size, memory_order_relaxed);
a.offset = offset; uint offset = _65;
return a; if ((offset + size) > mem_size)
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize)
{
uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed);
uint offset = _87;
MallocResult r;
r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{ {
uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed); uint _76 = atomic_fetch_or_explicit((device atomic_uint*)&v_57.mem_error, stage, memory_order_relaxed);
return r; offset = 0u;
} }
return r; return offset;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -171,7 +143,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_57)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -179,73 +151,56 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_81.memory[offset] = val; v_57.memory[offset] = val;
} }
static inline __attribute__((always_inline)) kernel void main0(device Memory& v_57 [[buffer(0)]], const device ConfigBuf& v_101 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize)
{
uint ix = ref.offset >> uint(2);
Alloc param = a;
uint param_1 = ix + 0u;
uint param_2 = s.element_ix;
write_mem(param, param_1, param_2, v_81, v_81BufferSize);
}
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
threadgroup uint bitmaps[8][256]; threadgroup uint bitmaps[8][256];
threadgroup short sh_alloc_failed;
threadgroup uint count[8][256]; threadgroup uint count[8][256];
threadgroup Alloc sh_chunk_alloc[256]; threadgroup uint sh_chunk_offset[256];
constant uint& v_81BufferSize = spvBufferSizeConstants[0];
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
bitmaps[i][gl_LocalInvocationID.x] = 0u; bitmaps[i][gl_LocalInvocationID.x] = 0u;
} }
if (gl_LocalInvocationID.x == 0u)
{
sh_alloc_failed = short(false);
}
threadgroup_barrier(mem_flags::mem_threadgroup);
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
int x1 = 0; int x1 = 0;
int y1 = 0; int y1 = 0;
if (element_ix < v_156.conf.n_elements) if (element_ix < v_101.conf.n_elements)
{ {
uint param = element_ix; uint param = element_ix;
DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156); DrawMonoid draw_monoid = load_draw_monoid(param, v_57, v_101);
uint path_ix = draw_monoid.path_ix; uint path_ix = draw_monoid.path_ix;
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
uint clip_ix = draw_monoid.clip_ix; uint clip_ix = draw_monoid.clip_ix;
if (clip_ix > 0u) if (clip_ix > 0u)
{ {
uint param_1 = clip_ix - 1u; uint param_1 = clip_ix - 1u;
clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156); clip_bbox = load_clip_bbox(param_1, v_57, v_101);
} }
uint param_2 = path_ix; uint param_2 = path_ix;
float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156); float4 path_bbox = load_path_bbox(param_2, v_57, v_101);
float4 param_3 = path_bbox; float4 param_3 = path_bbox;
float4 param_4 = clip_bbox; float4 param_4 = clip_bbox;
float4 bbox = bbox_intersect(param_3, param_4); float4 bbox = bbox_intersect(param_3, param_4);
float4 _417 = bbox; float4 _354 = bbox;
float4 _419 = bbox; float4 _356 = bbox;
float2 _421 = fast::max(_417.xy, _419.zw); float2 _358 = fast::max(_354.xy, _356.zw);
bbox.z = _421.x; bbox.z = _358.x;
bbox.w = _421.y; bbox.w = _358.y;
uint param_5 = element_ix; uint param_5 = element_ix;
float4 param_6 = bbox; float4 param_6 = bbox;
store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156); store_draw_bbox(param_5, param_6, v_57, v_101);
x0 = int(floor(bbox.x * 0.00390625)); x0 = int(floor(bbox.x * 0.00390625));
y0 = int(floor(bbox.y * 0.00390625)); y0 = int(floor(bbox.y * 0.00390625));
x1 = int(ceil(bbox.z * 0.00390625)); x1 = int(ceil(bbox.z * 0.00390625));
y1 = int(ceil(bbox.w * 0.00390625)); y1 = int(ceil(bbox.w * 0.00390625));
} }
uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u; uint width_in_bins = ((v_101.conf.width_in_tiles + 16u) - 1u) / 16u;
uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u; uint height_in_bins = ((v_101.conf.height_in_tiles + 16u) - 1u) / 16u;
x0 = clamp(x0, 0, int(width_in_bins)); x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins)); x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins)); y0 = clamp(y0, 0, int(height_in_bins));
@ -260,7 +215,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
while (y < y1) while (y < y1)
{ {
uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); uint _460 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
x++; x++;
if (x == x1) if (x == x1)
{ {
@ -275,47 +230,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
count[i_1][gl_LocalInvocationID.x] = element_count; count[i_1][gl_LocalInvocationID.x] = element_count;
} }
uint param_7 = 0u; uint chunk_offset = 0u;
uint param_8 = 0u;
bool param_9 = true;
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
if (element_count != 0u) if (element_count != 0u)
{ {
uint param_10 = element_count * 4u; uint param_7 = element_count * 4u;
MallocResult _573 = malloc(param_10, v_81, v_81BufferSize); uint param_8 = v_101.conf.mem_size;
MallocResult chunk = _573; uint param_9 = 1u;
chunk_alloc = chunk.alloc; uint _510 = malloc_stage(param_7, param_8, param_9, v_57);
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc; chunk_offset = _510;
if (chunk.failed) sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
{
sh_alloc_failed = short(true);
}
} }
uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); uint out_ix = (v_101.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
Alloc param_11; Alloc param_10;
param_11.offset = v_156.conf.bin_alloc.offset; param_10.offset = v_101.conf.bin_alloc.offset;
uint param_12 = out_ix; uint param_11 = out_ix;
uint param_13 = element_count; uint param_12 = element_count;
write_mem(param_11, param_12, param_13, v_81, v_81BufferSize); write_mem(param_10, param_11, param_12, v_57);
Alloc param_14; Alloc param_13;
param_14.offset = v_156.conf.bin_alloc.offset; param_13.offset = v_101.conf.bin_alloc.offset;
uint param_15 = out_ix + 1u; uint param_14 = out_ix + 1u;
uint param_16 = chunk_alloc.offset; uint param_15 = chunk_offset;
write_mem(param_14, param_15, param_16, v_81, v_81BufferSize); write_mem(param_13, param_14, param_15, v_57);
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bool _630;
if (!bool(sh_alloc_failed))
{
_630 = v_81.mem_error != 0u;
}
else
{
_630 = bool(sh_alloc_failed);
}
if (_630)
{
return;
}
x = x0; x = x0;
y = y0; y = y0;
while (y < y1) while (y < y1)
@ -329,12 +265,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
{ {
idx += count[my_slice - 1u][bin_ix]; idx += count[my_slice - 1u][bin_ix];
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint chunk_offset_1 = sh_chunk_offset[bin_ix];
uint out_offset = out_alloc.offset + (idx * 4u); if (chunk_offset_1 != 0u)
Alloc param_17 = out_alloc; {
BinInstanceRef param_18 = BinInstanceRef{ out_offset }; v_57.memory[(chunk_offset_1 >> uint(2)) + idx] = element_ix;
BinInstance param_19 = BinInstance{ element_ix }; }
BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize);
} }
x++; x++;
if (x == x1) if (x == x1)

Binary file not shown.

Binary file not shown.

View file

@ -17,6 +17,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -48,7 +49,7 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _393 = { 0u, 0u }; static const Bic _394 = { 0u, 0u };
ByteAddressBuffer _80 : register(t1, space0); ByteAddressBuffer _80 : register(t1, space0);
RWByteAddressBuffer _96 : register(u0, space0); RWByteAddressBuffer _96 : register(u0, space0);
@ -71,9 +72,9 @@ groupshared float4 sh_bbox[256];
Bic load_bic(uint ix) Bic load_bic(uint ix)
{ {
uint base = (_80.Load(52) >> uint(2)) + (2u * ix); uint base = (_80.Load(56) >> uint(2)) + (2u * ix);
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) }; Bic _287 = { _96.Load(base * 4 + 12), _96.Load((base + 1u) * 4 + 12) };
return _286; return _287;
} }
Bic bic_combine(Bic x, Bic y) Bic bic_combine(Bic x, Bic y)
@ -85,15 +86,15 @@ Bic bic_combine(Bic x, Bic y)
ClipEl load_clip_el(uint ix) ClipEl load_clip_el(uint ix)
{ {
uint base = (_80.Load(56) >> uint(2)) + (5u * ix); uint base = (_80.Load(60) >> uint(2)) + (5u * ix);
uint parent_ix = _96.Load(base * 4 + 8); uint parent_ix = _96.Load(base * 4 + 12);
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8)); float x0 = asfloat(_96.Load((base + 1u) * 4 + 12));
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8)); float y0 = asfloat(_96.Load((base + 2u) * 4 + 12));
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8)); float x1 = asfloat(_96.Load((base + 3u) * 4 + 12));
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8)); float y1 = asfloat(_96.Load((base + 4u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
ClipEl _335 = { parent_ix, bbox }; ClipEl _336 = { parent_ix, bbox };
return _335; return _336;
} }
float4 bbox_intersect(float4 a, float4 b) float4 bbox_intersect(float4 a, float4 b)
@ -103,9 +104,9 @@ float4 bbox_intersect(float4 a, float4 b)
uint load_path_ix(uint ix) uint load_path_ix(uint ix)
{ {
if (ix < _80.Load(80)) if (ix < _80.Load(84))
{ {
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8); return _96.Load(((_80.Load(52) >> uint(2)) + ix) * 4 + 12);
} }
else else
{ {
@ -115,11 +116,11 @@ uint load_path_ix(uint ix)
float4 load_path_bbox(uint path_ix) float4 load_path_bbox(uint path_ix)
{ {
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix); uint base = (_80.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f; float bbox_l = float(_96.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_96.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_96.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_96.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox; return bbox;
} }
@ -173,17 +174,17 @@ uint search_link(inout Bic bic)
void store_clip_bbox(uint ix, float4 bbox) void store_clip_bbox(uint ix, float4 bbox)
{ {
uint base = (_80.Load(60) >> uint(2)) + (4u * ix); uint base = (_80.Load(64) >> uint(2)) + (4u * ix);
_96.Store(base * 4 + 8, asuint(bbox.x)); _96.Store(base * 4 + 12, asuint(bbox.x));
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y)); _96.Store((base + 1u) * 4 + 12, asuint(bbox.y));
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z)); _96.Store((base + 2u) * 4 + 12, asuint(bbox.z));
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w)); _96.Store((base + 3u) * 4 + 12, asuint(bbox.w));
} }
void comp_main() void comp_main()
{ {
uint th = gl_LocalInvocationID.x; uint th = gl_LocalInvocationID.x;
Bic bic = _393; Bic bic = _394;
if (th < gl_WorkGroupID.x) if (th < gl_WorkGroupID.x)
{ {
uint param = th; uint param = th;
@ -240,8 +241,8 @@ void comp_main()
uint param_6 = gl_GlobalInvocationID.x; uint param_6 = gl_GlobalInvocationID.x;
uint inp = load_path_ix(param_6); uint inp = load_path_ix(param_6);
bool is_push = int(inp) >= 0; bool is_push = int(inp) >= 0;
Bic _559 = { 1u - uint(is_push), uint(is_push) }; Bic _560 = { 1u - uint(is_push), uint(is_push) };
bic = _559; bic = _560;
sh_bic[th] = bic; sh_bic[th] = bic;
if (is_push) if (is_push)
{ {
@ -266,11 +267,11 @@ void comp_main()
inbase = outbase; inbase = outbase;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
bic = _393; bic = _394;
Bic param_10 = bic; Bic param_10 = bic;
uint _618 = search_link(param_10); uint _619 = search_link(param_10);
bic = param_10; bic = param_10;
uint link = _618; uint link = _619;
sh_link[th] = link; sh_link[th] = link;
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint grandparent; uint grandparent;
@ -324,22 +325,22 @@ void comp_main()
sh_bbox[th] = bbox; sh_bbox[th] = bbox;
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint path_ix = inp; uint path_ix = inp;
bool _717 = !is_push; bool _718 = !is_push;
bool _725; bool _726;
if (_717) if (_718)
{ {
_725 = gl_GlobalInvocationID.x < _80.Load(80); _726 = gl_GlobalInvocationID.x < _80.Load(84);
} }
else else
{ {
_725 = _717; _726 = _718;
} }
if (_725) if (_726)
{ {
uint param_15 = parent; uint param_15 = parent;
path_ix = load_path_ix(param_15); path_ix = load_path_ix(param_15);
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp)); uint drawmonoid_out_base = (_80.Load(48) >> uint(2)) + (4u * (~inp));
_96.Store(drawmonoid_out_base * 4 + 8, path_ix); _96.Store(drawmonoid_out_base * 4 + 12, path_ix);
if (int(grandparent) >= 0) if (int(grandparent) >= 0)
{ {
bbox = sh_bbox[grandparent]; bbox = sh_bbox[grandparent];

View file

@ -24,6 +24,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -62,6 +63,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -275,9 +277,9 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
bic = Bic{ 0u, 0u }; bic = Bic{ 0u, 0u };
Bic param_10 = bic; Bic param_10 = bic;
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic); uint _619 = search_link(param_10, gl_LocalInvocationID, sh_bic);
bic = param_10; bic = param_10;
uint link = _618; uint link = _619;
sh_link[th] = link; sh_link[th] = link;
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint grandparent; uint grandparent;
@ -331,17 +333,17 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
sh_bbox[th] = bbox; sh_bbox[th] = bbox;
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
uint path_ix = inp; uint path_ix = inp;
bool _717 = !is_push; bool _718 = !is_push;
bool _725; bool _726;
if (_717) if (_718)
{ {
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip; _726 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
} }
else else
{ {
_725 = _717; _726 = _718;
} }
if (_725) if (_726)
{ {
uint param_15 = parent; uint param_15 = parent;
path_ix = load_path_ix(param_15, v_80, v_96); path_ix = load_path_ix(param_15, v_80, v_96);

Binary file not shown.

Binary file not shown.

View file

@ -17,6 +17,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -48,7 +49,7 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Bic _267 = { 0u, 0u }; static const Bic _268 = { 0u, 0u };
ByteAddressBuffer _64 : register(t1, space0); ByteAddressBuffer _64 : register(t1, space0);
RWByteAddressBuffer _80 : register(u0, space0); RWByteAddressBuffer _80 : register(u0, space0);
@ -77,39 +78,39 @@ Bic bic_combine(Bic x, Bic y)
void store_bic(uint ix, Bic bic) void store_bic(uint ix, Bic bic)
{ {
uint base = (_64.Load(52) >> uint(2)) + (2u * ix); uint base = (_64.Load(56) >> uint(2)) + (2u * ix);
_80.Store(base * 4 + 8, bic.a); _80.Store(base * 4 + 12, bic.a);
_80.Store((base + 1u) * 4 + 8, bic.b); _80.Store((base + 1u) * 4 + 12, bic.b);
} }
float4 load_path_bbox(uint path_ix) float4 load_path_bbox(uint path_ix)
{ {
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix); uint base = (_64.Load(44) >> uint(2)) + (6u * path_ix);
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f; float bbox_l = float(_80.Load(base * 4 + 12)) - 32768.0f;
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_80.Load((base + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_80.Load((base + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_80.Load((base + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
return bbox; return bbox;
} }
void store_clip_el(uint ix, ClipEl el) void store_clip_el(uint ix, ClipEl el)
{ {
uint base = (_64.Load(56) >> uint(2)) + (5u * ix); uint base = (_64.Load(60) >> uint(2)) + (5u * ix);
_80.Store(base * 4 + 8, el.parent_ix); _80.Store(base * 4 + 12, el.parent_ix);
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x)); _80.Store((base + 1u) * 4 + 12, asuint(el.bbox.x));
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y)); _80.Store((base + 2u) * 4 + 12, asuint(el.bbox.y));
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z)); _80.Store((base + 3u) * 4 + 12, asuint(el.bbox.z));
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w)); _80.Store((base + 4u) * 4 + 12, asuint(el.bbox.w));
} }
void comp_main() void comp_main()
{ {
uint th = gl_LocalInvocationID.x; uint th = gl_LocalInvocationID.x;
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8); uint inp = _80.Load(((_64.Load(52) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 12);
bool is_push = int(inp) >= 0; bool is_push = int(inp) >= 0;
Bic _207 = { 1u - uint(is_push), uint(is_push) }; Bic _208 = { 1u - uint(is_push), uint(is_push) };
Bic bic = _207; Bic bic = _208;
sh_bic[gl_LocalInvocationID.x] = bic; sh_bic[gl_LocalInvocationID.x] = bic;
for (uint i = 0u; i < 8u; i++) for (uint i = 0u; i < 8u; i++)
{ {
@ -132,21 +133,21 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint size = sh_bic[0].b; uint size = sh_bic[0].b;
bic = _267; bic = _268;
if ((th + 1u) < 256u) if ((th + 1u) < 256u)
{ {
bic = sh_bic[th + 1u]; bic = sh_bic[th + 1u];
} }
bool _283; bool _284;
if (is_push) if (is_push)
{ {
_283 = bic.a == 0u; _284 = bic.a == 0u;
} }
else else
{ {
_283 = is_push; _284 = is_push;
} }
if (_283) if (_284)
{ {
uint local_ix = (size - bic.b) - 1u; uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th; sh_parent[local_ix] = th;
@ -163,8 +164,8 @@ void comp_main()
if (th < size) if (th < size)
{ {
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
ClipEl _331 = { parent_ix, bbox }; ClipEl _332 = { parent_ix, bbox };
ClipEl el = _331; ClipEl el = _332;
uint param_5 = gl_GlobalInvocationID.x; uint param_5 = gl_GlobalInvocationID.x;
ClipEl param_6 = el; ClipEl param_6 = el;
store_clip_el(param_5, param_6); store_clip_el(param_5, param_6);

View file

@ -24,6 +24,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -62,6 +63,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -142,16 +144,16 @@ kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_6
{ {
bic = sh_bic[th + 1u]; bic = sh_bic[th + 1u];
} }
bool _283; bool _284;
if (is_push) if (is_push)
{ {
_283 = bic.a == 0u; _284 = bic.a == 0u;
} }
else else
{ {
_283 = is_push; _284 = is_push;
} }
if (_283) if (_284)
{ {
uint local_ix = (size - bic.b) - 1u; uint local_ix = (size - bic.b) - 1u;
sh_parent[local_ix] = th; sh_parent[local_ix] = th;

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

Binary file not shown.

Binary file not shown.

View file

@ -13,6 +13,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -88,7 +89,7 @@ DrawMonoid draw_monoid_identity()
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _93.Load(100) >> uint(2); uint drawtag_base = _93.Load(104) >> uint(2);
uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0); uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word; uint param = tag_word;
DrawMonoid agg = map_tag(param); DrawMonoid agg = map_tag(param);
@ -137,11 +138,11 @@ void comp_main()
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
row = combine_draw_monoid(param_6, param_7); row = combine_draw_monoid(param_6, param_7);
} }
uint drawdata_base = _93.Load(104) >> uint(2); uint drawdata_base = _93.Load(108) >> uint(2);
uint drawinfo_base = _93.Load(68) >> uint(2); uint drawinfo_base = _93.Load(72) >> uint(2);
uint out_ix = gl_GlobalInvocationID.x * 8u; uint out_ix = gl_GlobalInvocationID.x * 8u;
uint out_base = (_93.Load(44) >> uint(2)) + (out_ix * 4u); uint out_base = (_93.Load(48) >> uint(2)) + (out_ix * 4u);
uint clip_out_base = _93.Load(48) >> uint(2); uint clip_out_base = _93.Load(52) >> uint(2);
float4 mat; float4 mat;
float2 translate; float2 translate;
float2 p0; float2 p0;
@ -155,31 +156,31 @@ void comp_main()
DrawMonoid param_9 = local[i_2 - 1u]; DrawMonoid param_9 = local[i_2 - 1u];
m = combine_draw_monoid(param_8, param_9); m = combine_draw_monoid(param_8, param_9);
} }
_285.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix); _285.Store((out_base + (i_2 * 4u)) * 4 + 12, m.path_ix);
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix); _285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 12, m.clip_ix);
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset); _285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 12, m.scene_offset);
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset); _285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 12, m.info_offset);
uint dd = drawdata_base + (m.scene_offset >> uint(2)); uint dd = drawdata_base + (m.scene_offset >> uint(2));
uint di = drawinfo_base + (m.info_offset >> uint(2)); uint di = drawinfo_base + (m.info_offset >> uint(2));
tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0); tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0);
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
{ {
uint bbox_offset = (_93.Load(40) >> uint(2)) + (6u * m.path_ix); uint bbox_offset = (_93.Load(44) >> uint(2)) + (6u * m.path_ix);
float bbox_l = float(_285.Load(bbox_offset * 4 + 8)) - 32768.0f; float bbox_l = float(_285.Load(bbox_offset * 4 + 12)) - 32768.0f;
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 12)) - 32768.0f;
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 12)) - 32768.0f;
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 12)) - 32768.0f;
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 8)); float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 12));
uint fill_mode = uint(linewidth >= 0.0f); uint fill_mode = uint(linewidth >= 0.0f);
if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u)) if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u))
{ {
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 8); uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 12);
uint t = (_93.Load(36) >> uint(2)) + (6u * trans_ix); uint t = (_93.Load(40) >> uint(2)) + (6u * trans_ix);
mat = asfloat(uint4(_285.Load(t * 4 + 8), _285.Load((t + 1u) * 4 + 8), _285.Load((t + 2u) * 4 + 8), _285.Load((t + 3u) * 4 + 8))); mat = asfloat(uint4(_285.Load(t * 4 + 12), _285.Load((t + 1u) * 4 + 12), _285.Load((t + 2u) * 4 + 12), _285.Load((t + 3u) * 4 + 12)));
if ((tag_word == 276u) || (tag_word == 732u)) if ((tag_word == 276u) || (tag_word == 732u))
{ {
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 8), _285.Load((t + 5u) * 4 + 8))); translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 12), _285.Load((t + 5u) * 4 + 12)));
} }
} }
if (linewidth >= 0.0f) if (linewidth >= 0.0f)
@ -191,12 +192,12 @@ void comp_main()
case 68u: case 68u:
case 72u: case 72u:
{ {
_285.Store(di * 4 + 8, asuint(linewidth)); _285.Store(di * 4 + 12, asuint(linewidth));
break; break;
} }
case 276u: case 276u:
{ {
_285.Store(di * 4 + 8, asuint(linewidth)); _285.Store(di * 4 + 12, asuint(linewidth));
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
@ -206,9 +207,9 @@ void comp_main()
float line_x = dxy.x * scale; float line_x = dxy.x * scale;
float line_y = dxy.y * scale; float line_y = dxy.y * scale;
float line_c = -((p0.x * line_x) + (p0.y * line_y)); float line_c = -((p0.x * line_x) + (p0.y * line_y));
_285.Store((di + 1u) * 4 + 8, asuint(line_x)); _285.Store((di + 1u) * 4 + 12, asuint(line_x));
_285.Store((di + 2u) * 4 + 8, asuint(line_y)); _285.Store((di + 2u) * 4 + 12, asuint(line_y));
_285.Store((di + 3u) * 4 + 8, asuint(line_c)); _285.Store((di + 3u) * 4 + 12, asuint(line_c));
break; break;
} }
case 732u: case 732u:
@ -227,17 +228,17 @@ void comp_main()
float2 c1 = center1 * rainv; float2 c1 = center1 * rainv;
float ra = rr * rainv; float ra = rr * rainv;
float roff = rr - 1.0f; float roff = rr - 1.0f;
_285.Store(di * 4 + 8, asuint(linewidth)); _285.Store(di * 4 + 12, asuint(linewidth));
_285.Store((di + 1u) * 4 + 8, asuint(inv_mat.x)); _285.Store((di + 1u) * 4 + 12, asuint(inv_mat.x));
_285.Store((di + 2u) * 4 + 8, asuint(inv_mat.y)); _285.Store((di + 2u) * 4 + 12, asuint(inv_mat.y));
_285.Store((di + 3u) * 4 + 8, asuint(inv_mat.z)); _285.Store((di + 3u) * 4 + 12, asuint(inv_mat.z));
_285.Store((di + 4u) * 4 + 8, asuint(inv_mat.w)); _285.Store((di + 4u) * 4 + 12, asuint(inv_mat.w));
_285.Store((di + 5u) * 4 + 8, asuint(inv_tr.x)); _285.Store((di + 5u) * 4 + 12, asuint(inv_tr.x));
_285.Store((di + 6u) * 4 + 8, asuint(inv_tr.y)); _285.Store((di + 6u) * 4 + 12, asuint(inv_tr.y));
_285.Store((di + 7u) * 4 + 8, asuint(c1.x)); _285.Store((di + 7u) * 4 + 12, asuint(c1.x));
_285.Store((di + 8u) * 4 + 8, asuint(c1.y)); _285.Store((di + 8u) * 4 + 12, asuint(c1.y));
_285.Store((di + 9u) * 4 + 8, asuint(ra)); _285.Store((di + 9u) * 4 + 12, asuint(ra));
_285.Store((di + 10u) * 4 + 8, asuint(roff)); _285.Store((di + 10u) * 4 + 12, asuint(roff));
break; break;
} }
case 5u: case 5u:
@ -253,7 +254,7 @@ void comp_main()
{ {
path_ix = m.path_ix; path_ix = m.path_ix;
} }
_285.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); _285.Store((clip_out_base + m.clip_ix) * 4 + 12, path_ix);
} }
} }
} }

View file

@ -59,6 +59,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -115,6 +116,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };

Binary file not shown.

Binary file not shown.

View file

@ -13,6 +13,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -81,7 +82,7 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
uint drawtag_base = _87.Load(100) >> uint(2); uint drawtag_base = _87.Load(104) >> uint(2);
uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0); uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0);
uint param = tag_word; uint param = tag_word;
DrawMonoid agg = map_tag(param); DrawMonoid agg = map_tag(param);

View file

@ -20,6 +20,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -76,6 +77,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };

Binary file not shown.

Binary file not shown.

View file

@ -130,6 +130,7 @@ struct TileSeg
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -163,9 +164,10 @@ static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
RWByteAddressBuffer _297 : register(u0, space0); RWByteAddressBuffer _297 : register(u0, space0);
ByteAddressBuffer _1681 : register(t1, space0); ByteAddressBuffer _1681 : register(t1, space0);
RWTexture2D<unorm float4> image_atlas : register(u3, space0); RWByteAddressBuffer _2506 : register(u2, space0);
RWTexture2D<unorm float4> gradients : register(u4, space0); RWTexture2D<unorm float4> image_atlas : register(u4, space0);
RWTexture2D<unorm float4> image : register(u2, space0); RWTexture2D<unorm float4> gradients : register(u5, space0);
RWTexture2D<unorm float4> image : register(u3, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -206,7 +208,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _297.Load(offset * 4 + 8); uint v = _297.Load(offset * 4 + 12);
return v; return v;
} }
@ -989,9 +991,9 @@ CmdJump Cmd_Jump_read(Alloc a, CmdRef ref)
void comp_main() void comp_main()
{ {
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x; uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x;
Alloc _1696; Alloc _1696;
_1696.offset = _1681.Load(24); _1696.offset = _1681.Load(28);
Alloc param; Alloc param;
param.offset = _1696.offset; param.offset = _1696.offset;
uint param_1 = tile_ix * 1024u; uint param_1 = tile_ix * 1024u;
@ -999,7 +1001,7 @@ void comp_main()
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1705 = { cmd_alloc.offset }; CmdRef _1705 = { cmd_alloc.offset };
CmdRef cmd_ref = _1705; CmdRef cmd_ref = _1705;
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8); uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12);
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint); float2 xy = float2(xy_uint);
@ -1009,14 +1011,13 @@ void comp_main()
rgba[i] = 0.0f.xxxx; rgba[i] = 0.0f.xxxx;
} }
uint clip_depth = 0u; uint clip_depth = 0u;
bool mem_ok = _297.Load(4) == 0u;
float df[8]; float df[8];
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
float area[8]; float area[8];
uint blend_stack[4][8]; uint blend_stack[4][8];
uint base_ix_1; uint base_ix_1;
uint bg_rgba; uint bg_rgba;
while (mem_ok) while (true)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref; CmdRef param_4 = cmd_ref;
@ -1036,13 +1037,13 @@ void comp_main()
{ {
df[k] = 1000000000.0f; df[k] = 1000000000.0f;
} }
TileSegRef _1810 = { stroke.tile_ref }; TileSegRef _1805 = { stroke.tile_ref };
tile_seg_ref = _1810; tile_seg_ref = _1805;
do do
{ {
uint param_7 = tile_seg_ref.offset; uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u; uint param_8 = 24u;
bool param_9 = mem_ok; bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9); Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref; TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11); TileSeg seg = TileSeg_read(param_10, param_11);
@ -1073,13 +1074,13 @@ void comp_main()
{ {
area[k_3] = float(fill.backdrop); area[k_3] = float(fill.backdrop);
} }
TileSegRef _1930 = { fill.tile_ref }; TileSegRef _1924 = { fill.tile_ref };
tile_seg_ref = _1930; tile_seg_ref = _1924;
do do
{ {
uint param_15 = tile_seg_ref.offset; uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u; uint param_16 = 24u;
bool param_17 = mem_ok; bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17); Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref; TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19); TileSeg seg_1 = TileSeg_read(param_18, param_19);
@ -1163,10 +1164,10 @@ void comp_main()
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba = gradients[int2(x, int(lin.index))]; float4 fg_rgba = gradients[int2(x, int(lin.index))];
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29); float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2264.x; fg_rgba.x = _2257.x;
fg_rgba.y = _2264.y; fg_rgba.y = _2257.y;
fg_rgba.z = _2264.z; fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
} }
@ -1189,10 +1190,10 @@ void comp_main()
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33); float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x; fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2374.y; fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2374.z; fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
} }
@ -1206,9 +1207,9 @@ void comp_main()
CmdImage fill_img = Cmd_Image_read(param_34, param_35); CmdImage fill_img = Cmd_Image_read(param_34, param_35);
uint2 param_36 = xy_uint; uint2 param_36 = xy_uint;
CmdImage param_37 = fill_img; CmdImage param_37 = fill_img;
float4 _2417[8]; float4 _2410[8];
fillImage(_2417, param_36, param_37); fillImage(_2410, param_36, param_37);
float4 img[8] = _2417; float4 img[8] = _2410;
for (uint k_11 = 0u; k_11 < 8u; k_11++) for (uint k_11 = 0u; k_11 < 8u; k_11++)
{ {
float4 fg_k_3 = img[k_11] * area[k_11]; float4 fg_k_3 = img[k_11] * area[k_11];
@ -1224,8 +1225,8 @@ void comp_main()
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38); uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479; blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = 0.0f.xxxx; rgba[k_12] = 0.0f.xxxx;
} }
} }
@ -1235,8 +1236,8 @@ void comp_main()
for (uint k_13 = 0u; k_13 < 8u; k_13++) for (uint k_13 = 0u; k_13 < 8u; k_13++)
{ {
float4 param_39 = float4(rgba[k_13]); float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39); uint _2519 = packsRGB(param_39);
_297.Store((base_ix + k_13) * 4 + 8, _2522); _2506.Store((base_ix + k_13) * 4 + 0, _2519);
rgba[k_13] = 0.0f.xxxx; rgba[k_13] = 0.0f.xxxx;
} }
} }
@ -1262,7 +1263,7 @@ void comp_main()
} }
else else
{ {
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8); bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0);
} }
uint param_42 = bg_rgba; uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42); float4 bg = unpacksRGB(param_42);
@ -1279,8 +1280,8 @@ void comp_main()
{ {
Alloc param_46 = cmd_alloc; Alloc param_46 = cmd_alloc;
CmdRef param_47 = cmd_ref; CmdRef param_47 = cmd_ref;
CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref }; CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2621; cmd_ref = _2618;
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }

View file

@ -178,6 +178,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -188,6 +189,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -222,6 +224,11 @@ struct ConfigBuf
Config conf; Config conf;
}; };
struct BlendBuf
{
uint blend_mem[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -1047,7 +1054,7 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me
return CmdJump_read(param, param_1, v_297); return CmdJump_read(param, param_1, v_297);
} }
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d<float, access::write> image [[texture(3)]], texture2d<float> image_atlas [[texture(4)]], texture2d<float> gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
Alloc param; Alloc param;
@ -1066,14 +1073,13 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
rgba[i] = float4(0.0); rgba[i] = float4(0.0);
} }
uint clip_depth = 0u; uint clip_depth = 0u;
bool mem_ok = v_297.mem_error == 0u;
spvUnsafeArray<float, 8> df; spvUnsafeArray<float, 8> df;
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area; spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack; spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
uint base_ix_1; uint base_ix_1;
uint bg_rgba; uint bg_rgba;
while (mem_ok) while (true)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref; CmdRef param_4 = cmd_ref;
@ -1098,7 +1104,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{ {
uint param_7 = tile_seg_ref.offset; uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u; uint param_8 = 24u;
bool param_9 = mem_ok; bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9); Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref; TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11, v_297); TileSeg seg = TileSeg_read(param_10, param_11, v_297);
@ -1134,7 +1140,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{ {
uint param_15 = tile_seg_ref.offset; uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u; uint param_16 = 24u;
bool param_17 = mem_ok; bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17); Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref; TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297);
@ -1218,10 +1224,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29); float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2264.x; fg_rgba.x = _2257.x;
fg_rgba.y = _2264.y; fg_rgba.y = _2257.y;
fg_rgba.z = _2264.z; fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
} }
@ -1244,10 +1250,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33); float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x; fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2374.y; fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2374.z; fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
} }
@ -1278,8 +1284,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38); uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479; blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = float4(0.0); rgba[k_12] = float4(0.0);
} }
} }
@ -1289,8 +1295,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_13 = 0u; k_13 < 8u; k_13++) for (uint k_13 = 0u; k_13 < 8u; k_13++)
{ {
float4 param_39 = float4(rgba[k_13]); float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39); uint _2519 = packsRGB(param_39);
v_297.memory[base_ix + k_13] = _2522; _2506.blend_mem[base_ix + k_13] = _2519;
rgba[k_13] = float4(0.0); rgba[k_13] = float4(0.0);
} }
} }
@ -1316,7 +1322,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
} }
else else
{ {
bg_rgba = v_297.memory[base_ix_1 + k_14]; bg_rgba = _2506.blend_mem[base_ix_1 + k_14];
} }
uint param_42 = bg_rgba; uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42); float4 bg = unpacksRGB(param_42);

Binary file not shown.

Binary file not shown.

View file

@ -130,6 +130,7 @@ struct TileSeg
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -163,9 +164,10 @@ static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
RWByteAddressBuffer _297 : register(u0, space0); RWByteAddressBuffer _297 : register(u0, space0);
ByteAddressBuffer _1681 : register(t1, space0); ByteAddressBuffer _1681 : register(t1, space0);
RWTexture2D<unorm float4> image_atlas : register(u3, space0); RWByteAddressBuffer _2506 : register(u2, space0);
RWTexture2D<unorm float4> gradients : register(u4, space0); RWTexture2D<unorm float4> image_atlas : register(u4, space0);
RWTexture2D<unorm float> image : register(u2, space0); RWTexture2D<unorm float4> gradients : register(u5, space0);
RWTexture2D<unorm float> image : register(u3, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -206,7 +208,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _297.Load(offset * 4 + 8); uint v = _297.Load(offset * 4 + 12);
return v; return v;
} }
@ -989,9 +991,9 @@ CmdJump Cmd_Jump_read(Alloc a, CmdRef ref)
void comp_main() void comp_main()
{ {
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x; uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x;
Alloc _1696; Alloc _1696;
_1696.offset = _1681.Load(24); _1696.offset = _1681.Load(28);
Alloc param; Alloc param;
param.offset = _1696.offset; param.offset = _1696.offset;
uint param_1 = tile_ix * 1024u; uint param_1 = tile_ix * 1024u;
@ -999,7 +1001,7 @@ void comp_main()
Alloc cmd_alloc = slice_mem(param, param_1, param_2); Alloc cmd_alloc = slice_mem(param, param_1, param_2);
CmdRef _1705 = { cmd_alloc.offset }; CmdRef _1705 = { cmd_alloc.offset };
CmdRef cmd_ref = _1705; CmdRef cmd_ref = _1705;
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8); uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12);
cmd_ref.offset += 4u; cmd_ref.offset += 4u;
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
float2 xy = float2(xy_uint); float2 xy = float2(xy_uint);
@ -1009,14 +1011,13 @@ void comp_main()
rgba[i] = 0.0f.xxxx; rgba[i] = 0.0f.xxxx;
} }
uint clip_depth = 0u; uint clip_depth = 0u;
bool mem_ok = _297.Load(4) == 0u;
float df[8]; float df[8];
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
float area[8]; float area[8];
uint blend_stack[4][8]; uint blend_stack[4][8];
uint base_ix_1; uint base_ix_1;
uint bg_rgba; uint bg_rgba;
while (mem_ok) while (true)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref; CmdRef param_4 = cmd_ref;
@ -1036,13 +1037,13 @@ void comp_main()
{ {
df[k] = 1000000000.0f; df[k] = 1000000000.0f;
} }
TileSegRef _1810 = { stroke.tile_ref }; TileSegRef _1805 = { stroke.tile_ref };
tile_seg_ref = _1810; tile_seg_ref = _1805;
do do
{ {
uint param_7 = tile_seg_ref.offset; uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u; uint param_8 = 24u;
bool param_9 = mem_ok; bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9); Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref; TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11); TileSeg seg = TileSeg_read(param_10, param_11);
@ -1073,13 +1074,13 @@ void comp_main()
{ {
area[k_3] = float(fill.backdrop); area[k_3] = float(fill.backdrop);
} }
TileSegRef _1930 = { fill.tile_ref }; TileSegRef _1924 = { fill.tile_ref };
tile_seg_ref = _1930; tile_seg_ref = _1924;
do do
{ {
uint param_15 = tile_seg_ref.offset; uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u; uint param_16 = 24u;
bool param_17 = mem_ok; bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17); Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref; TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19); TileSeg seg_1 = TileSeg_read(param_18, param_19);
@ -1163,10 +1164,10 @@ void comp_main()
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba = gradients[int2(x, int(lin.index))]; float4 fg_rgba = gradients[int2(x, int(lin.index))];
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29); float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2264.x; fg_rgba.x = _2257.x;
fg_rgba.y = _2264.y; fg_rgba.y = _2257.y;
fg_rgba.z = _2264.z; fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
} }
@ -1189,10 +1190,10 @@ void comp_main()
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33); float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x; fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2374.y; fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2374.z; fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
} }
@ -1206,9 +1207,9 @@ void comp_main()
CmdImage fill_img = Cmd_Image_read(param_34, param_35); CmdImage fill_img = Cmd_Image_read(param_34, param_35);
uint2 param_36 = xy_uint; uint2 param_36 = xy_uint;
CmdImage param_37 = fill_img; CmdImage param_37 = fill_img;
float4 _2417[8]; float4 _2410[8];
fillImage(_2417, param_36, param_37); fillImage(_2410, param_36, param_37);
float4 img[8] = _2417; float4 img[8] = _2410;
for (uint k_11 = 0u; k_11 < 8u; k_11++) for (uint k_11 = 0u; k_11 < 8u; k_11++)
{ {
float4 fg_k_3 = img[k_11] * area[k_11]; float4 fg_k_3 = img[k_11] * area[k_11];
@ -1224,8 +1225,8 @@ void comp_main()
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38); uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479; blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = 0.0f.xxxx; rgba[k_12] = 0.0f.xxxx;
} }
} }
@ -1235,8 +1236,8 @@ void comp_main()
for (uint k_13 = 0u; k_13 < 8u; k_13++) for (uint k_13 = 0u; k_13 < 8u; k_13++)
{ {
float4 param_39 = float4(rgba[k_13]); float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39); uint _2519 = packsRGB(param_39);
_297.Store((base_ix + k_13) * 4 + 8, _2522); _2506.Store((base_ix + k_13) * 4 + 0, _2519);
rgba[k_13] = 0.0f.xxxx; rgba[k_13] = 0.0f.xxxx;
} }
} }
@ -1262,7 +1263,7 @@ void comp_main()
} }
else else
{ {
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8); bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0);
} }
uint param_42 = bg_rgba; uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42); float4 bg = unpacksRGB(param_42);
@ -1279,8 +1280,8 @@ void comp_main()
{ {
Alloc param_46 = cmd_alloc; Alloc param_46 = cmd_alloc;
CmdRef param_47 = cmd_ref; CmdRef param_47 = cmd_ref;
CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref }; CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref };
cmd_ref = _2621; cmd_ref = _2618;
cmd_alloc.offset = cmd_ref.offset; cmd_alloc.offset = cmd_ref.offset;
break; break;
} }

View file

@ -178,6 +178,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -188,6 +189,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -222,6 +224,11 @@ struct ConfigBuf
Config conf; Config conf;
}; };
struct BlendBuf
{
uint blend_mem[1];
};
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -1047,7 +1054,7 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me
return CmdJump_read(param, param_1, v_297); return CmdJump_read(param, param_1, v_297);
} }
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d<float, access::write> image [[texture(3)]], texture2d<float> image_atlas [[texture(4)]], texture2d<float> gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
{ {
uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
Alloc param; Alloc param;
@ -1066,14 +1073,13 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
rgba[i] = float4(0.0); rgba[i] = float4(0.0);
} }
uint clip_depth = 0u; uint clip_depth = 0u;
bool mem_ok = v_297.mem_error == 0u;
spvUnsafeArray<float, 8> df; spvUnsafeArray<float, 8> df;
TileSegRef tile_seg_ref; TileSegRef tile_seg_ref;
spvUnsafeArray<float, 8> area; spvUnsafeArray<float, 8> area;
spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack; spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
uint base_ix_1; uint base_ix_1;
uint bg_rgba; uint bg_rgba;
while (mem_ok) while (true)
{ {
Alloc param_3 = cmd_alloc; Alloc param_3 = cmd_alloc;
CmdRef param_4 = cmd_ref; CmdRef param_4 = cmd_ref;
@ -1098,7 +1104,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{ {
uint param_7 = tile_seg_ref.offset; uint param_7 = tile_seg_ref.offset;
uint param_8 = 24u; uint param_8 = 24u;
bool param_9 = mem_ok; bool param_9 = true;
Alloc param_10 = new_alloc(param_7, param_8, param_9); Alloc param_10 = new_alloc(param_7, param_8, param_9);
TileSegRef param_11 = tile_seg_ref; TileSegRef param_11 = tile_seg_ref;
TileSeg seg = TileSeg_read(param_10, param_11, v_297); TileSeg seg = TileSeg_read(param_10, param_11, v_297);
@ -1134,7 +1140,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
{ {
uint param_15 = tile_seg_ref.offset; uint param_15 = tile_seg_ref.offset;
uint param_16 = 24u; uint param_16 = 24u;
bool param_17 = mem_ok; bool param_17 = true;
Alloc param_18 = new_alloc(param_15, param_16, param_17); Alloc param_18 = new_alloc(param_15, param_16, param_17);
TileSegRef param_19 = tile_seg_ref; TileSegRef param_19 = tile_seg_ref;
TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297);
@ -1218,10 +1224,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
float3 param_29 = fg_rgba.xyz; float3 param_29 = fg_rgba.xyz;
float3 _2264 = fromsRGB(param_29); float3 _2257 = fromsRGB(param_29);
fg_rgba.x = _2264.x; fg_rgba.x = _2257.x;
fg_rgba.y = _2264.y; fg_rgba.y = _2257.y;
fg_rgba.z = _2264.z; fg_rgba.z = _2257.z;
float4 fg_k_1 = fg_rgba * area[k_9]; float4 fg_k_1 = fg_rgba * area[k_9];
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
} }
@ -1244,10 +1250,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
float3 param_33 = fg_rgba_1.xyz; float3 param_33 = fg_rgba_1.xyz;
float3 _2374 = fromsRGB(param_33); float3 _2367 = fromsRGB(param_33);
fg_rgba_1.x = _2374.x; fg_rgba_1.x = _2367.x;
fg_rgba_1.y = _2374.y; fg_rgba_1.y = _2367.y;
fg_rgba_1.z = _2374.z; fg_rgba_1.z = _2367.z;
float4 fg_k_2 = fg_rgba_1 * area[k_10]; float4 fg_k_2 = fg_rgba_1 * area[k_10];
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
} }
@ -1278,8 +1284,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_12 = 0u; k_12 < 8u; k_12++) for (uint k_12 = 0u; k_12 < 8u; k_12++)
{ {
float4 param_38 = float4(rgba[k_12]); float4 param_38 = float4(rgba[k_12]);
uint _2479 = packsRGB(param_38); uint _2472 = packsRGB(param_38);
blend_stack[clip_depth][k_12] = _2479; blend_stack[clip_depth][k_12] = _2472;
rgba[k_12] = float4(0.0); rgba[k_12] = float4(0.0);
} }
} }
@ -1289,8 +1295,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
for (uint k_13 = 0u; k_13 < 8u; k_13++) for (uint k_13 = 0u; k_13 < 8u; k_13++)
{ {
float4 param_39 = float4(rgba[k_13]); float4 param_39 = float4(rgba[k_13]);
uint _2522 = packsRGB(param_39); uint _2519 = packsRGB(param_39);
v_297.memory[base_ix + k_13] = _2522; _2506.blend_mem[base_ix + k_13] = _2519;
rgba[k_13] = float4(0.0); rgba[k_13] = float4(0.0);
} }
} }
@ -1316,7 +1322,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
} }
else else
{ {
bg_rgba = v_297.memory[base_ix_1 + k_14]; bg_rgba = _2506.blend_mem[base_ix_1 + k_14];
} }
uint param_42 = bg_rgba; uint param_42 = bg_rgba;
float4 bg = unpacksRGB(param_42); float4 bg = unpacksRGB(param_42);

Binary file not shown.

Binary file not shown.

View file

@ -3,12 +3,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathCubicRef struct PathCubicRef
{ {
uint offset; uint offset;
@ -74,6 +68,7 @@ struct SubdivResult
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -105,10 +100,10 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
static const PathSegTag _721 = { 0u, 0u }; static const PathSegTag _722 = { 0u, 0u };
RWByteAddressBuffer _136 : register(u0, space0); RWByteAddressBuffer _143 : register(u0, space0);
ByteAddressBuffer _710 : register(t1, space0); ByteAddressBuffer _711 : register(t1, space0);
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
struct SPIRV_Cross_Input struct SPIRV_Cross_Input
@ -116,6 +111,15 @@ struct SPIRV_Cross_Input
uint3 gl_GlobalInvocationID : SV_DispatchThreadID; uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
}; };
static bool mem_ok;
bool check_deps(uint dep_stage)
{
uint _149;
_143.InterlockedOr(4, 0u, _149);
return (_149 & dep_stage) == 0u;
}
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
{ {
return true; return true;
@ -129,7 +133,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _136.Load(offset * 4 + 8); uint v = _143.Load(offset * 4 + 12);
return v; return v;
} }
@ -138,8 +142,8 @@ PathSegTag PathSeg_tag(Alloc a, PathSegRef ref)
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1); uint tag_and_flags = read_mem(param, param_1);
PathSegTag _367 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; PathSegTag _362 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
return _367; return _362;
} }
PathCubic PathCubic_read(Alloc a, PathCubicRef ref) PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
@ -194,9 +198,9 @@ PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref) PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref)
{ {
PathCubicRef _373 = { ref.offset + 4u }; PathCubicRef _368 = { ref.offset + 4u };
Alloc param = a; Alloc param = a;
PathCubicRef param_1 = _373; PathCubicRef param_1 = _368;
return PathCubic_read(param, param_1); return PathCubic_read(param, param_1);
} }
@ -240,8 +244,8 @@ SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol)
val = (sqrt_tol * da) / approx_parabola_integral(param_2); val = (sqrt_tol * da) / approx_parabola_integral(param_2);
} }
} }
SubdivResult _695 = { val, a0, a2 }; SubdivResult _690 = { val, a0, a2 };
return _695; return _690;
} }
uint fill_mode_from_flags(uint flags) uint fill_mode_from_flags(uint flags)
@ -263,12 +267,12 @@ Path Path_read(Alloc a, PathRef ref)
uint raw2 = read_mem(param_4, param_5); uint raw2 = read_mem(param_4, param_5);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
TileRef _427 = { raw2 }; TileRef _422 = { raw2 };
s.tiles = _427; s.tiles = _422;
return s; return s;
} }
Alloc new_alloc(uint offset, uint size, bool mem_ok) Alloc new_alloc(uint offset, uint size, bool mem_ok_1)
{ {
Alloc a; Alloc a;
a.offset = offset; a.offset = offset;
@ -286,33 +290,24 @@ float2 eval_quad(float2 p0, float2 p1, float2 p2, float t)
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t); return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t);
} }
MallocResult malloc(uint size) uint malloc_stage(uint size, uint mem_size, uint stage)
{ {
uint _142; uint _158;
_136.InterlockedAdd(0, size, _142); _143.InterlockedAdd(0, size, _158);
uint offset = _142; uint offset = _158;
uint _149; if ((offset + size) > mem_size)
_136.GetDimensions(_149);
_149 = (_149 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_149) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{ {
uint _171; uint _168;
_136.InterlockedMax(4, 1u, _171); _143.InterlockedOr(4, stage, _168);
return r; offset = 0u;
} }
return r; return offset;
} }
TileRef Tile_index(TileRef ref, uint index) TileRef Tile_index(TileRef ref, uint index)
{ {
TileRef _385 = { ref.offset + (index * 8u) }; TileRef _380 = { ref.offset + (index * 8u) };
return _385; return _380;
} }
void write_mem(Alloc alloc, uint offset, uint val) void write_mem(Alloc alloc, uint offset, uint val)
@ -323,7 +318,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_136.Store(offset * 4 + 8, val); _143.Store(offset * 4 + 12, val);
} }
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
@ -357,30 +352,36 @@ void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
void comp_main() void comp_main()
{ {
uint element_ix = gl_GlobalInvocationID.x; mem_ok = true;
PathSegRef _718 = { _710.Load(28) + (element_ix * 52u) }; uint param = 7u;
PathSegRef ref = _718; bool _694 = check_deps(param);
PathSegTag tag = _721; if (!_694)
if (element_ix < _710.Load(4))
{ {
Alloc _731; return;
_731.offset = _710.Load(28); }
Alloc param; uint element_ix = gl_GlobalInvocationID.x;
param.offset = _731.offset; PathSegRef _719 = { _711.Load(32) + (element_ix * 52u) };
PathSegRef param_1 = ref; PathSegRef ref = _719;
tag = PathSeg_tag(param, param_1); PathSegTag tag = _722;
if (element_ix < _711.Load(8))
{
Alloc _732;
_732.offset = _711.Load(32);
Alloc param_1;
param_1.offset = _732.offset;
PathSegRef param_2 = ref;
tag = PathSeg_tag(param_1, param_2);
} }
bool mem_ok = _136.Load(4) == 0u;
switch (tag.tag) switch (tag.tag)
{ {
case 1u: case 1u:
{ {
Alloc _748; Alloc _745;
_748.offset = _710.Load(28); _745.offset = _711.Load(32);
Alloc param_2; Alloc param_3;
param_2.offset = _748.offset; param_3.offset = _745.offset;
PathSegRef param_3 = ref; PathSegRef param_4 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3); PathCubic cubic = PathSeg_Cubic_read(param_3, param_4);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3; float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u); uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u);
@ -392,43 +393,43 @@ void comp_main()
for (uint i = 0u; i < n_quads; i++) for (uint i = 0u; i < n_quads; i++)
{ {
float t = float(i + 1u) * _step; float t = float(i + 1u) * _step;
float2 param_4 = cubic.p0; float2 param_5 = cubic.p0;
float2 param_5 = cubic.p1; float2 param_6 = cubic.p1;
float2 param_6 = cubic.p2; float2 param_7 = cubic.p2;
float2 param_7 = cubic.p3; float2 param_8 = cubic.p3;
float param_8 = t; float param_9 = t;
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8); float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
float2 param_9 = cubic.p0; float2 param_10 = cubic.p0;
float2 param_10 = cubic.p1; float2 param_11 = cubic.p1;
float2 param_11 = cubic.p2; float2 param_12 = cubic.p2;
float2 param_12 = cubic.p3; float2 param_13 = cubic.p3;
float param_13 = t - (0.5f * _step); float param_14 = t - (0.5f * _step);
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13); float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f); qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f);
float2 param_14 = qp0; float2 param_15 = qp0;
float2 param_15 = qp1; float2 param_16 = qp1;
float2 param_16 = qp2; float2 param_17 = qp2;
float param_17 = 0.4743416607379913330078125f; float param_18 = 0.4743416607379913330078125f;
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17); SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
keep_params[i] = params; keep_params[i] = params;
val += params.val; val += params.val;
qp0 = qp2; qp0 = qp2;
} }
uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u); uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u);
uint param_18 = tag.flags; uint param_19 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_18) == 1u; bool is_stroke = fill_mode_from_flags(param_19) == 1u;
uint path_ix = cubic.path_ix; uint path_ix = cubic.path_ix;
PathRef _904 = { _710.Load(16) + (path_ix * 12u) }; PathRef _901 = { _711.Load(20) + (path_ix * 12u) };
Alloc _907; Alloc _904;
_907.offset = _710.Load(16); _904.offset = _711.Load(20);
Alloc param_19; Alloc param_20;
param_19.offset = _907.offset; param_20.offset = _904.offset;
PathRef param_20 = _904; PathRef param_21 = _901;
Path path = Path_read(param_19, param_20); Path path = Path_read(param_20, param_21);
uint param_21 = path.tiles.offset; uint param_22 = path.tiles.offset;
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_23 = mem_ok; bool param_24 = true;
Alloc path_alloc = new_alloc(param_21, param_22, param_23); Alloc path_alloc = new_alloc(param_22, param_23, param_24);
int4 bbox = int4(path.bbox); int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0; float2 p0 = cubic.p0;
qp0 = cubic.p0; qp0 = cubic.p0;
@ -436,44 +437,44 @@ void comp_main()
int n_out = 1; int n_out = 1;
float val_sum = 0.0f; float val_sum = 0.0f;
float2 p1; float2 p1;
float _1147; float _1143;
TileSeg tile_seg; TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++) for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{ {
float t_1 = float(i_1 + 1u) * _step; float t_1 = float(i_1 + 1u) * _step;
float2 param_24 = cubic.p0; float2 param_25 = cubic.p0;
float2 param_25 = cubic.p1; float2 param_26 = cubic.p1;
float2 param_26 = cubic.p2; float2 param_27 = cubic.p2;
float2 param_27 = cubic.p3; float2 param_28 = cubic.p3;
float param_28 = t_1; float param_29 = t_1;
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28); float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
float2 param_29 = cubic.p0; float2 param_30 = cubic.p0;
float2 param_30 = cubic.p1; float2 param_31 = cubic.p1;
float2 param_31 = cubic.p2; float2 param_32 = cubic.p2;
float2 param_32 = cubic.p3; float2 param_33 = cubic.p3;
float param_33 = t_1 - (0.5f * _step); float param_34 = t_1 - (0.5f * _step);
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33); float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f); qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f);
SubdivResult params_1 = keep_params[i_1]; SubdivResult params_1 = keep_params[i_1];
float param_34 = params_1.a0; float param_35 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_34); float u0 = approx_parabola_inv_integral(param_35);
float param_35 = params_1.a2; float param_36 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_35); float u2 = approx_parabola_inv_integral(param_36);
float uscale = 1.0f / (u2 - u0); float uscale = 1.0f / (u2 - u0);
float target = float(n_out) * v_step; float target = float(n_out) * v_step;
for (;;) for (;;)
{ {
bool _1040 = uint(n_out) == n; bool _1036 = uint(n_out) == n;
bool _1050; bool _1046;
if (!_1040) if (!_1036)
{ {
_1050 = target < (val_sum + params_1.val); _1046 = target < (val_sum + params_1.val);
} }
else else
{ {
_1050 = _1040; _1046 = _1036;
} }
if (_1050) if (_1046)
{ {
if (uint(n_out) == n) if (uint(n_out) == n)
{ {
@ -483,14 +484,14 @@ void comp_main()
{ {
float u = (target - val_sum) / params_1.val; float u = (target - val_sum) / params_1.val;
float a = lerp(params_1.a0, params_1.a2, u); float a = lerp(params_1.a0, params_1.a2, u);
float param_36 = a; float param_37 = a;
float au = approx_parabola_inv_integral(param_36); float au = approx_parabola_inv_integral(param_37);
float t_2 = (au - u0) * uscale; float t_2 = (au - u0) * uscale;
float2 param_37 = qp0; float2 param_38 = qp0;
float2 param_38 = qp1_1; float2 param_39 = qp1_1;
float2 param_39 = qp2_1; float2 param_40 = qp2_1;
float param_40 = t_2; float param_41 = t_2;
p1 = eval_quad(param_37, param_38, param_39, param_40); p1 = eval_quad(param_38, param_39, param_40, param_41);
} }
float xmin = min(p0.x, p1.x) - cubic.stroke.x; float xmin = min(p0.x, p1.x) - cubic.stroke.x;
float xmax = max(p0.x, p1.x) + cubic.stroke.x; float xmax = max(p0.x, p1.x) + cubic.stroke.x;
@ -500,13 +501,13 @@ void comp_main()
float dy = p1.y - p0.y; float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10f) if (abs(dy) < 9.999999717180685365747194737196e-10f)
{ {
_1147 = 1000000000.0f; _1143 = 1000000000.0f;
} }
else else
{ {
_1147 = dx / dy; _1143 = dx / dy;
} }
float invslope = _1147; float invslope = _1143;
float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f; float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f;
float b = invslope; float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f; float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f;
@ -522,14 +523,20 @@ void comp_main()
int stride = bbox.z - bbox.x; int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x; int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint param_41 = n_tile_alloc * 24u; uint malloc_size = n_tile_alloc * 24u;
MallocResult _1263 = malloc(param_41); uint param_42 = malloc_size;
MallocResult tile_alloc = _1263; uint param_43 = _711.Load(0);
if (tile_alloc.failed || (!mem_ok)) uint param_44 = 4u;
uint _1265 = malloc_stage(param_42, param_43, param_44);
uint tile_offset = _1265;
if (tile_offset == 0u)
{ {
return; mem_ok = false;
} }
uint tile_offset = tile_alloc.alloc.offset; uint param_45 = tile_offset;
uint param_46 = malloc_size;
bool param_47 = true;
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
int xray = int(floor(p0.x * 0.0625f)); int xray = int(floor(p0.x * 0.0625f));
int last_xray = int(floor(p1.x * 0.0625f)); int last_xray = int(floor(p1.x * 0.0625f));
if (p0.y > p1.y) if (p0.y > p1.y)
@ -542,39 +549,34 @@ void comp_main()
{ {
float tile_y0 = float(y * 16); float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x); int xbackdrop = max((xray + 1), bbox.x);
bool _1319 = !is_stroke; bool _1322 = !is_stroke;
bool _1329; bool _1332;
if (_1319) if (_1322)
{ {
_1329 = min(p0.y, p1.y) < tile_y0; _1332 = min(p0.y, p1.y) < tile_y0;
} }
else else
{ {
_1329 = _1319; _1332 = _1322;
} }
bool _1336; bool _1339;
if (_1329) if (_1332)
{ {
_1336 = xbackdrop < bbox.z; _1339 = xbackdrop < bbox.z;
} }
else else
{ {
_1336 = _1329; _1339 = _1332;
} }
if (_1336) if (_1339)
{ {
int backdrop = (p1.y < p0.y) ? 1 : (-1); int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_42 = path.tiles; TileRef param_48 = path.tiles;
uint param_43 = uint(base + xbackdrop); uint param_49 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_42, param_43); TileRef tile_ref = Tile_index(param_48, param_49);
uint tile_el = tile_ref.offset >> uint(2); uint tile_el = tile_ref.offset >> uint(2);
Alloc param_44 = path_alloc; uint _1369;
uint param_45 = tile_el + 1u; _143.InterlockedAdd((tile_el + 1u) * 4 + 12, uint(backdrop), _1369);
if (touch_mem(param_44, param_45))
{
uint _1374;
_136.InterlockedAdd((tile_el + 1u) * 4 + 8, uint(backdrop), _1374);
}
} }
int next_xray = last_xray; int next_xray = last_xray;
if (y < (y1 - 1)) if (y < (y1 - 1))
@ -592,20 +594,15 @@ void comp_main()
for (int x = xx0; x < xx1; x++) for (int x = xx0; x < xx1; x++)
{ {
float tile_x0 = float(x * 16); float tile_x0 = float(x * 16);
TileRef _1454 = { path.tiles.offset }; TileRef _1449 = { path.tiles.offset };
TileRef param_46 = _1454; TileRef param_50 = _1449;
uint param_47 = uint(base + x); uint param_51 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_46, param_47); TileRef tile_ref_1 = Tile_index(param_50, param_51);
uint tile_el_1 = tile_ref_1.offset >> uint(2); uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u; uint old = 0u;
Alloc param_48 = path_alloc; uint _1465;
uint param_49 = tile_el_1; _143.InterlockedExchange(tile_el_1 * 4 + 12, tile_offset, _1465);
if (touch_mem(param_48, param_49)) old = _1465;
{
uint _1477;
_136.InterlockedExchange(tile_el_1 * 4 + 8, tile_offset, _1477);
old = _1477;
}
tile_seg.origin = p0; tile_seg.origin = p0;
tile_seg._vector = p1 - p0; tile_seg._vector = p1 - p0;
float y_edge = 0.0f; float y_edge = 0.0f;
@ -636,11 +633,14 @@ void comp_main()
} }
tile_seg.y_edge = y_edge; tile_seg.y_edge = y_edge;
tile_seg.next.offset = old; tile_seg.next.offset = old;
TileSegRef _1559 = { tile_offset }; if (mem_ok)
Alloc param_50 = tile_alloc.alloc; {
TileSegRef param_51 = _1559; TileSegRef _1550 = { tile_offset };
TileSeg param_52 = tile_seg; Alloc param_52 = tile_alloc;
TileSeg_write(param_50, param_51, param_52); TileSegRef param_53 = _1550;
TileSeg param_54 = tile_seg;
TileSeg_write(param_52, param_53, param_54);
}
tile_offset += 24u; tile_offset += 24u;
} }
xc += b; xc += b;

View file

@ -51,12 +51,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathCubicRef struct PathCubicRef
{ {
uint offset; uint offset;
@ -124,6 +118,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -134,6 +129,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -170,6 +166,13 @@ struct ConfigBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u);
static inline __attribute__((always_inline))
bool check_deps(thread const uint& dep_stage, device Memory& v_143)
{
uint _149 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, 0u, memory_order_relaxed);
return (_149 & dep_stage) == 0u;
}
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
bool touch_mem(thread const Alloc& alloc, thread const uint& offset) bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
{ {
@ -177,7 +180,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_136, constant uint& v_136BufferSize) uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_143)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -185,59 +188,59 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
{ {
return 0u; return 0u;
} }
uint v = v_136.memory[offset]; uint v = v_143.memory[offset];
return v; return v;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize) PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
{ {
Alloc param = a; Alloc param = a;
uint param_1 = ref.offset >> uint(2); uint param_1 = ref.offset >> uint(2);
uint tag_and_flags = read_mem(param, param_1, v_136, v_136BufferSize); uint tag_and_flags = read_mem(param, param_1, v_143);
return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_136, constant uint& v_136BufferSize) PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_143)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize); uint raw0 = read_mem(param, param_1, v_143);
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize); uint raw1 = read_mem(param_2, param_3, v_143);
Alloc param_4 = a; Alloc param_4 = a;
uint param_5 = ix + 2u; uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize); uint raw2 = read_mem(param_4, param_5, v_143);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 3u; uint param_7 = ix + 3u;
uint raw3 = read_mem(param_6, param_7, v_136, v_136BufferSize); uint raw3 = read_mem(param_6, param_7, v_143);
Alloc param_8 = a; Alloc param_8 = a;
uint param_9 = ix + 4u; uint param_9 = ix + 4u;
uint raw4 = read_mem(param_8, param_9, v_136, v_136BufferSize); uint raw4 = read_mem(param_8, param_9, v_143);
Alloc param_10 = a; Alloc param_10 = a;
uint param_11 = ix + 5u; uint param_11 = ix + 5u;
uint raw5 = read_mem(param_10, param_11, v_136, v_136BufferSize); uint raw5 = read_mem(param_10, param_11, v_143);
Alloc param_12 = a; Alloc param_12 = a;
uint param_13 = ix + 6u; uint param_13 = ix + 6u;
uint raw6 = read_mem(param_12, param_13, v_136, v_136BufferSize); uint raw6 = read_mem(param_12, param_13, v_143);
Alloc param_14 = a; Alloc param_14 = a;
uint param_15 = ix + 7u; uint param_15 = ix + 7u;
uint raw7 = read_mem(param_14, param_15, v_136, v_136BufferSize); uint raw7 = read_mem(param_14, param_15, v_143);
Alloc param_16 = a; Alloc param_16 = a;
uint param_17 = ix + 8u; uint param_17 = ix + 8u;
uint raw8 = read_mem(param_16, param_17, v_136, v_136BufferSize); uint raw8 = read_mem(param_16, param_17, v_143);
Alloc param_18 = a; Alloc param_18 = a;
uint param_19 = ix + 9u; uint param_19 = ix + 9u;
uint raw9 = read_mem(param_18, param_19, v_136, v_136BufferSize); uint raw9 = read_mem(param_18, param_19, v_143);
Alloc param_20 = a; Alloc param_20 = a;
uint param_21 = ix + 10u; uint param_21 = ix + 10u;
uint raw10 = read_mem(param_20, param_21, v_136, v_136BufferSize); uint raw10 = read_mem(param_20, param_21, v_143);
Alloc param_22 = a; Alloc param_22 = a;
uint param_23 = ix + 11u; uint param_23 = ix + 11u;
uint raw11 = read_mem(param_22, param_23, v_136, v_136BufferSize); uint raw11 = read_mem(param_22, param_23, v_143);
PathCubic s; PathCubic s;
s.p0 = float2(as_type<float>(raw0), as_type<float>(raw1)); s.p0 = float2(as_type<float>(raw0), as_type<float>(raw1));
s.p1 = float2(as_type<float>(raw2), as_type<float>(raw3)); s.p1 = float2(as_type<float>(raw2), as_type<float>(raw3));
@ -250,11 +253,11 @@ PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref,
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize) PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
{ {
Alloc param = a; Alloc param = a;
PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u }; PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u };
return PathCubic_read(param, param_1, v_136, v_136BufferSize); return PathCubic_read(param, param_1, v_143);
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -310,18 +313,18 @@ uint fill_mode_from_flags(thread const uint& flags)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_136, constant uint& v_136BufferSize) Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_143)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize); uint raw0 = read_mem(param, param_1, v_143);
Alloc param_2 = a; Alloc param_2 = a;
uint param_3 = ix + 1u; uint param_3 = ix + 1u;
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize); uint raw1 = read_mem(param_2, param_3, v_143);
Alloc param_4 = a; Alloc param_4 = a;
uint param_5 = ix + 2u; uint param_5 = ix + 2u;
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize); uint raw2 = read_mem(param_4, param_5, v_143);
Path s; Path s;
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
s.tiles = TileRef{ raw2 }; s.tiles = TileRef{ raw2 };
@ -350,22 +353,16 @@ float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_136, constant uint& v_136BufferSize) uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_143)
{ {
uint _142 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.mem_offset, size, memory_order_relaxed); uint _158 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.mem_offset, size, memory_order_relaxed);
uint offset = _142; uint offset = _158;
MallocResult r; if ((offset + size) > mem_size)
r.failed = (offset + size) > uint(int((v_136BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{ {
uint _171 = atomic_fetch_max_explicit((device atomic_uint*)&v_136.mem_error, 1u, memory_order_relaxed); uint _168 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, stage, memory_order_relaxed);
return r; offset = 0u;
} }
return r; return offset;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -375,7 +372,7 @@ TileRef Tile_index(thread const TileRef& ref, thread const uint& index)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_136, constant uint& v_136BufferSize) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_143)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -383,61 +380,66 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_136.memory[offset] = val; v_143.memory[offset] = val;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_136, constant uint& v_136BufferSize) void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_143)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = as_type<uint>(s.origin.x); uint param_2 = as_type<uint>(s.origin.x);
write_mem(param, param_1, param_2, v_136, v_136BufferSize); write_mem(param, param_1, param_2, v_143);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = as_type<uint>(s.origin.y); uint param_5 = as_type<uint>(s.origin.y);
write_mem(param_3, param_4, param_5, v_136, v_136BufferSize); write_mem(param_3, param_4, param_5, v_143);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = as_type<uint>(s.vector.x); uint param_8 = as_type<uint>(s.vector.x);
write_mem(param_6, param_7, param_8, v_136, v_136BufferSize); write_mem(param_6, param_7, param_8, v_143);
Alloc param_9 = a; Alloc param_9 = a;
uint param_10 = ix + 3u; uint param_10 = ix + 3u;
uint param_11 = as_type<uint>(s.vector.y); uint param_11 = as_type<uint>(s.vector.y);
write_mem(param_9, param_10, param_11, v_136, v_136BufferSize); write_mem(param_9, param_10, param_11, v_143);
Alloc param_12 = a; Alloc param_12 = a;
uint param_13 = ix + 4u; uint param_13 = ix + 4u;
uint param_14 = as_type<uint>(s.y_edge); uint param_14 = as_type<uint>(s.y_edge);
write_mem(param_12, param_13, param_14, v_136, v_136BufferSize); write_mem(param_12, param_13, param_14, v_143);
Alloc param_15 = a; Alloc param_15 = a;
uint param_16 = ix + 5u; uint param_16 = ix + 5u;
uint param_17 = s.next.offset; uint param_17 = s.next.offset;
write_mem(param_15, param_16, param_17, v_136, v_136BufferSize); write_mem(param_15, param_16, param_17, v_143);
} }
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_136 [[buffer(0)]], const device ConfigBuf& _710 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) kernel void main0(device Memory& v_143 [[buffer(0)]], const device ConfigBuf& _711 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{ {
constant uint& v_136BufferSize = spvBufferSizeConstants[0]; bool mem_ok = true;
uint element_ix = gl_GlobalInvocationID.x; uint param = 7u;
PathSegRef ref = PathSegRef{ _710.conf.pathseg_alloc.offset + (element_ix * 52u) }; bool _694 = check_deps(param, v_143);
PathSegTag tag = PathSegTag{ 0u, 0u }; if (!_694)
if (element_ix < _710.conf.n_pathseg)
{ {
Alloc param; return;
param.offset = _710.conf.pathseg_alloc.offset; }
PathSegRef param_1 = ref; uint element_ix = gl_GlobalInvocationID.x;
tag = PathSeg_tag(param, param_1, v_136, v_136BufferSize); PathSegRef ref = PathSegRef{ _711.conf.pathseg_alloc.offset + (element_ix * 52u) };
PathSegTag tag = PathSegTag{ 0u, 0u };
if (element_ix < _711.conf.n_pathseg)
{
Alloc param_1;
param_1.offset = _711.conf.pathseg_alloc.offset;
PathSegRef param_2 = ref;
tag = PathSeg_tag(param_1, param_2, v_143);
} }
bool mem_ok = v_136.mem_error == 0u;
switch (tag.tag) switch (tag.tag)
{ {
case 1u: case 1u:
{ {
Alloc param_2; Alloc param_3;
param_2.offset = _710.conf.pathseg_alloc.offset; param_3.offset = _711.conf.pathseg_alloc.offset;
PathSegRef param_3 = ref; PathSegRef param_4 = ref;
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3, v_136, v_136BufferSize); PathCubic cubic = PathSeg_Cubic_read(param_3, param_4, v_143);
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3; float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u); uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);
@ -449,40 +451,40 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
for (uint i = 0u; i < n_quads; i++) for (uint i = 0u; i < n_quads; i++)
{ {
float t = float(i + 1u) * _step; float t = float(i + 1u) * _step;
float2 param_4 = cubic.p0; float2 param_5 = cubic.p0;
float2 param_5 = cubic.p1; float2 param_6 = cubic.p1;
float2 param_6 = cubic.p2; float2 param_7 = cubic.p2;
float2 param_7 = cubic.p3; float2 param_8 = cubic.p3;
float param_8 = t; float param_9 = t;
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8); float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
float2 param_9 = cubic.p0; float2 param_10 = cubic.p0;
float2 param_10 = cubic.p1; float2 param_11 = cubic.p1;
float2 param_11 = cubic.p2; float2 param_12 = cubic.p2;
float2 param_12 = cubic.p3; float2 param_13 = cubic.p3;
float param_13 = t - (0.5 * _step); float param_14 = t - (0.5 * _step);
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13); float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5); qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);
float2 param_14 = qp0; float2 param_15 = qp0;
float2 param_15 = qp1; float2 param_16 = qp1;
float2 param_16 = qp2; float2 param_17 = qp2;
float param_17 = 0.4743416607379913330078125; float param_18 = 0.4743416607379913330078125;
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17); SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
keep_params[i] = params; keep_params[i] = params;
val += params.val; val += params.val;
qp0 = qp2; qp0 = qp2;
} }
uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u); uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);
uint param_18 = tag.flags; uint param_19 = tag.flags;
bool is_stroke = fill_mode_from_flags(param_18) == 1u; bool is_stroke = fill_mode_from_flags(param_19) == 1u;
uint path_ix = cubic.path_ix; uint path_ix = cubic.path_ix;
Alloc param_19; Alloc param_20;
param_19.offset = _710.conf.tile_alloc.offset; param_20.offset = _711.conf.tile_alloc.offset;
PathRef param_20 = PathRef{ _710.conf.tile_alloc.offset + (path_ix * 12u) }; PathRef param_21 = PathRef{ _711.conf.tile_alloc.offset + (path_ix * 12u) };
Path path = Path_read(param_19, param_20, v_136, v_136BufferSize); Path path = Path_read(param_20, param_21, v_143);
uint param_21 = path.tiles.offset; uint param_22 = path.tiles.offset;
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
bool param_23 = mem_ok; bool param_24 = true;
Alloc path_alloc = new_alloc(param_21, param_22, param_23); Alloc path_alloc = new_alloc(param_22, param_23, param_24);
int4 bbox = int4(path.bbox); int4 bbox = int4(path.bbox);
float2 p0 = cubic.p0; float2 p0 = cubic.p0;
qp0 = cubic.p0; qp0 = cubic.p0;
@ -490,44 +492,44 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
int n_out = 1; int n_out = 1;
float val_sum = 0.0; float val_sum = 0.0;
float2 p1; float2 p1;
float _1147; float _1143;
TileSeg tile_seg; TileSeg tile_seg;
for (uint i_1 = 0u; i_1 < n_quads; i_1++) for (uint i_1 = 0u; i_1 < n_quads; i_1++)
{ {
float t_1 = float(i_1 + 1u) * _step; float t_1 = float(i_1 + 1u) * _step;
float2 param_24 = cubic.p0; float2 param_25 = cubic.p0;
float2 param_25 = cubic.p1; float2 param_26 = cubic.p1;
float2 param_26 = cubic.p2; float2 param_27 = cubic.p2;
float2 param_27 = cubic.p3; float2 param_28 = cubic.p3;
float param_28 = t_1; float param_29 = t_1;
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28); float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
float2 param_29 = cubic.p0; float2 param_30 = cubic.p0;
float2 param_30 = cubic.p1; float2 param_31 = cubic.p1;
float2 param_31 = cubic.p2; float2 param_32 = cubic.p2;
float2 param_32 = cubic.p3; float2 param_33 = cubic.p3;
float param_33 = t_1 - (0.5 * _step); float param_34 = t_1 - (0.5 * _step);
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33); float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5); qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);
SubdivResult params_1 = keep_params[i_1]; SubdivResult params_1 = keep_params[i_1];
float param_34 = params_1.a0; float param_35 = params_1.a0;
float u0 = approx_parabola_inv_integral(param_34); float u0 = approx_parabola_inv_integral(param_35);
float param_35 = params_1.a2; float param_36 = params_1.a2;
float u2 = approx_parabola_inv_integral(param_35); float u2 = approx_parabola_inv_integral(param_36);
float uscale = 1.0 / (u2 - u0); float uscale = 1.0 / (u2 - u0);
float target = float(n_out) * v_step; float target = float(n_out) * v_step;
for (;;) for (;;)
{ {
bool _1040 = uint(n_out) == n; bool _1036 = uint(n_out) == n;
bool _1050; bool _1046;
if (!_1040) if (!_1036)
{ {
_1050 = target < (val_sum + params_1.val); _1046 = target < (val_sum + params_1.val);
} }
else else
{ {
_1050 = _1040; _1046 = _1036;
} }
if (_1050) if (_1046)
{ {
if (uint(n_out) == n) if (uint(n_out) == n)
{ {
@ -537,14 +539,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
{ {
float u = (target - val_sum) / params_1.val; float u = (target - val_sum) / params_1.val;
float a = mix(params_1.a0, params_1.a2, u); float a = mix(params_1.a0, params_1.a2, u);
float param_36 = a; float param_37 = a;
float au = approx_parabola_inv_integral(param_36); float au = approx_parabola_inv_integral(param_37);
float t_2 = (au - u0) * uscale; float t_2 = (au - u0) * uscale;
float2 param_37 = qp0; float2 param_38 = qp0;
float2 param_38 = qp1_1; float2 param_39 = qp1_1;
float2 param_39 = qp2_1; float2 param_40 = qp2_1;
float param_40 = t_2; float param_41 = t_2;
p1 = eval_quad(param_37, param_38, param_39, param_40); p1 = eval_quad(param_38, param_39, param_40, param_41);
} }
float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x; float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x;
float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x; float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x;
@ -554,13 +556,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
float dy = p1.y - p0.y; float dy = p1.y - p0.y;
if (abs(dy) < 9.999999717180685365747194737196e-10) if (abs(dy) < 9.999999717180685365747194737196e-10)
{ {
_1147 = 1000000000.0; _1143 = 1000000000.0;
} }
else else
{ {
_1147 = dx / dy; _1143 = dx / dy;
} }
float invslope = _1147; float invslope = _1143;
float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625; float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625;
float b = invslope; float b = invslope;
float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625; float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625;
@ -576,14 +578,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
int stride = bbox.z - bbox.x; int stride = bbox.z - bbox.x;
int base = ((y0 - bbox.y) * stride) - bbox.x; int base = ((y0 - bbox.y) * stride) - bbox.x;
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
uint param_41 = n_tile_alloc * 24u; uint malloc_size = n_tile_alloc * 24u;
MallocResult _1263 = malloc(param_41, v_136, v_136BufferSize); uint param_42 = malloc_size;
MallocResult tile_alloc = _1263; uint param_43 = _711.conf.mem_size;
if (tile_alloc.failed || (!mem_ok)) uint param_44 = 4u;
uint _1265 = malloc_stage(param_42, param_43, param_44, v_143);
uint tile_offset = _1265;
if (tile_offset == 0u)
{ {
return; mem_ok = false;
} }
uint tile_offset = tile_alloc.alloc.offset; uint param_45 = tile_offset;
uint param_46 = malloc_size;
bool param_47 = true;
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
int xray = int(floor(p0.x * 0.0625)); int xray = int(floor(p0.x * 0.0625));
int last_xray = int(floor(p1.x * 0.0625)); int last_xray = int(floor(p1.x * 0.0625));
if (p0.y > p1.y) if (p0.y > p1.y)
@ -596,38 +604,33 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
{ {
float tile_y0 = float(y * 16); float tile_y0 = float(y * 16);
int xbackdrop = max((xray + 1), bbox.x); int xbackdrop = max((xray + 1), bbox.x);
bool _1319 = !is_stroke; bool _1322 = !is_stroke;
bool _1329; bool _1332;
if (_1319) if (_1322)
{ {
_1329 = fast::min(p0.y, p1.y) < tile_y0; _1332 = fast::min(p0.y, p1.y) < tile_y0;
} }
else else
{ {
_1329 = _1319; _1332 = _1322;
} }
bool _1336; bool _1339;
if (_1329) if (_1332)
{ {
_1336 = xbackdrop < bbox.z; _1339 = xbackdrop < bbox.z;
} }
else else
{ {
_1336 = _1329; _1339 = _1332;
} }
if (_1336) if (_1339)
{ {
int backdrop = (p1.y < p0.y) ? 1 : (-1); int backdrop = (p1.y < p0.y) ? 1 : (-1);
TileRef param_42 = path.tiles; TileRef param_48 = path.tiles;
uint param_43 = uint(base + xbackdrop); uint param_49 = uint(base + xbackdrop);
TileRef tile_ref = Tile_index(param_42, param_43); TileRef tile_ref = Tile_index(param_48, param_49);
uint tile_el = tile_ref.offset >> uint(2); uint tile_el = tile_ref.offset >> uint(2);
Alloc param_44 = path_alloc; uint _1369 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
uint param_45 = tile_el + 1u;
if (touch_mem(param_44, param_45))
{
uint _1374 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
}
} }
int next_xray = last_xray; int next_xray = last_xray;
if (y < (y1 - 1)) if (y < (y1 - 1))
@ -645,18 +648,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
for (int x = xx0; x < xx1; x++) for (int x = xx0; x < xx1; x++)
{ {
float tile_x0 = float(x * 16); float tile_x0 = float(x * 16);
TileRef param_46 = TileRef{ path.tiles.offset }; TileRef param_50 = TileRef{ path.tiles.offset };
uint param_47 = uint(base + x); uint param_51 = uint(base + x);
TileRef tile_ref_1 = Tile_index(param_46, param_47); TileRef tile_ref_1 = Tile_index(param_50, param_51);
uint tile_el_1 = tile_ref_1.offset >> uint(2); uint tile_el_1 = tile_ref_1.offset >> uint(2);
uint old = 0u; uint old = 0u;
Alloc param_48 = path_alloc; uint _1465 = atomic_exchange_explicit((device atomic_uint*)&v_143.memory[tile_el_1], tile_offset, memory_order_relaxed);
uint param_49 = tile_el_1; old = _1465;
if (touch_mem(param_48, param_49))
{
uint _1477 = atomic_exchange_explicit((device atomic_uint*)&v_136.memory[tile_el_1], tile_offset, memory_order_relaxed);
old = _1477;
}
tile_seg.origin = p0; tile_seg.origin = p0;
tile_seg.vector = p1 - p0; tile_seg.vector = p1 - p0;
float y_edge = 0.0; float y_edge = 0.0;
@ -687,10 +685,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
tile_seg.y_edge = y_edge; tile_seg.y_edge = y_edge;
tile_seg.next.offset = old; tile_seg.next.offset = old;
Alloc param_50 = tile_alloc.alloc; if (mem_ok)
TileSegRef param_51 = TileSegRef{ tile_offset }; {
TileSeg param_52 = tile_seg; Alloc param_52 = tile_alloc;
TileSeg_write(param_50, param_51, param_52, v_136, v_136BufferSize); TileSegRef param_53 = TileSegRef{ tile_offset };
TileSeg param_54 = tile_seg;
TileSeg_write(param_52, param_53, param_54, v_143);
}
tile_offset += 24u; tile_offset += 24u;
} }
xc += b; xc += b;

Binary file not shown.

Binary file not shown.

View file

@ -52,6 +52,7 @@ struct Monoid
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -164,7 +165,7 @@ uint read_mem(Alloc alloc, uint offset)
{ {
return 0u; return 0u;
} }
uint v = _111.Load(offset * 4 + 8); uint v = _111.Load(offset * 4 + 12);
return v; return v;
} }
@ -203,7 +204,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_111.Store(offset * 4 + 8, val); _111.Store(offset * 4 + 12, val);
} }
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s)
@ -365,7 +366,7 @@ uint round_up(float x)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 4u; uint ix = gl_GlobalInvocationID.x * 4u;
uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0); uint tag_word = _574.Load(((_639.Load(96) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid local_tm = reduce_tag(param); TagMonoid local_tm = reduce_tag(param);
sh_tag[gl_LocalInvocationID.x] = local_tm; sh_tag[gl_LocalInvocationID.x] = local_tm;
@ -404,14 +405,14 @@ void comp_main()
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
tm = combine_tag_monoid(param_3, param_4); tm = combine_tag_monoid(param_3, param_4);
} }
uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset; uint ps_ix = (_639.Load(100) >> uint(2)) + tm.pathseg_offset;
uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix; uint lw_ix = (_639.Load(92) >> uint(2)) + tm.linewidth_ix;
uint save_path_ix = tm.path_ix; uint save_path_ix = tm.path_ix;
uint trans_ix = tm.trans_ix; uint trans_ix = tm.trans_ix;
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) }; TransformSegRef _771 = { _639.Load(40) + (trans_ix * 24u) };
TransformSegRef trans_ref = _771; TransformSegRef trans_ref = _771;
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) }; PathSegRef _780 = { _639.Load(32) + (tm.pathseg_ix * 52u) };
PathSegRef ps_ref = _781; PathSegRef ps_ref = _780;
float linewidth[4]; float linewidth[4];
uint save_trans_ix[4]; uint save_trans_ix[4];
float2 p0; float2 p0;
@ -464,9 +465,9 @@ void comp_main()
} }
} }
} }
Alloc _877; Alloc _876;
_877.offset = _639.Load(36); _876.offset = _639.Load(40);
param_13.offset = _877.offset; param_13.offset = _876.offset;
TransformSegRef param_14 = trans_ref; TransformSegRef param_14 = trans_ref;
TransformSeg transform = TransformSeg_read(param_13, param_14); TransformSeg transform = TransformSeg_read(param_13, param_14);
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
@ -475,25 +476,25 @@ void comp_main()
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _947 = bbox; float4 _946 = bbox;
float2 _950 = min(_947.xy, p2); float2 _949 = min(_946.xy, p2);
bbox.x = _950.x; bbox.x = _949.x;
bbox.y = _950.y; bbox.y = _949.y;
float4 _955 = bbox; float4 _954 = bbox;
float2 _958 = max(_955.zw, p2); float2 _957 = max(_954.zw, p2);
bbox.z = _958.x; bbox.z = _957.x;
bbox.w = _958.y; bbox.w = _957.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _983 = bbox; float4 _982 = bbox;
float2 _986 = min(_983.xy, p3); float2 _985 = min(_982.xy, p3);
bbox.x = _986.x; bbox.x = _985.x;
bbox.y = _986.y; bbox.y = _985.y;
float4 _991 = bbox; float4 _990 = bbox;
float2 _994 = max(_991.zw, p3); float2 _993 = max(_990.zw, p3);
bbox.z = _994.x; bbox.z = _993.x;
bbox.w = _994.y; bbox.w = _993.y;
} }
else else
{ {
@ -524,9 +525,9 @@ void comp_main()
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
cubic.stroke = stroke; cubic.stroke = stroke;
uint fill_mode = uint(linewidth[i_1] >= 0.0f); uint fill_mode = uint(linewidth[i_1] >= 0.0f);
Alloc _1089; Alloc _1088;
_1089.offset = _639.Load(28); _1088.offset = _639.Load(32);
param_15.offset = _1089.offset; param_15.offset = _1088.offset;
PathSegRef param_16 = ps_ref; PathSegRef param_16 = ps_ref;
uint param_17 = fill_mode; uint param_17 = fill_mode;
PathCubic param_18 = cubic; PathCubic param_18 = cubic;
@ -571,7 +572,7 @@ void comp_main()
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
uint path_ix = save_path_ix; uint path_ix = save_path_ix;
uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 6u); uint bbox_out_ix = (_639.Load(44) >> uint(2)) + (path_ix * 6u);
Monoid row = monoid_identity(); Monoid row = monoid_identity();
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -583,24 +584,24 @@ void comp_main()
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1264 = i_4 == 3u; bool _1263 = i_4 == 3u;
bool _1270; bool _1269;
if (_1264) if (_1263)
{ {
_1270 = gl_LocalInvocationID.x == 255u; _1269 = gl_LocalInvocationID.x == 255u;
} }
else else
{ {
_1270 = _1264; _1269 = _1263;
} }
if (_1270) if (_1269)
{ {
do_atomic = true; do_atomic = true;
} }
if ((m.flags & 1u) != 0u) if ((m.flags & 1u) != 0u)
{ {
_111.Store((bbox_out_ix + 4u) * 4 + 8, asuint(linewidth[i_4])); _111.Store((bbox_out_ix + 4u) * 4 + 12, asuint(linewidth[i_4]));
_111.Store((bbox_out_ix + 5u) * 4 + 8, save_trans_ix[i_4]); _111.Store((bbox_out_ix + 5u) * 4 + 12, save_trans_ix[i_4]);
if ((m.flags & 2u) == 0u) if ((m.flags & 2u) == 0u)
{ {
do_atomic = true; do_atomic = true;
@ -608,43 +609,43 @@ void comp_main()
else else
{ {
float param_25 = m.bbox.x; float param_25 = m.bbox.x;
_111.Store(bbox_out_ix * 4 + 8, round_down(param_25)); _111.Store(bbox_out_ix * 4 + 12, round_down(param_25));
float param_26 = m.bbox.y; float param_26 = m.bbox.y;
_111.Store((bbox_out_ix + 1u) * 4 + 8, round_down(param_26)); _111.Store((bbox_out_ix + 1u) * 4 + 12, round_down(param_26));
float param_27 = m.bbox.z; float param_27 = m.bbox.z;
_111.Store((bbox_out_ix + 2u) * 4 + 8, round_up(param_27)); _111.Store((bbox_out_ix + 2u) * 4 + 12, round_up(param_27));
float param_28 = m.bbox.w; float param_28 = m.bbox.w;
_111.Store((bbox_out_ix + 3u) * 4 + 8, round_up(param_28)); _111.Store((bbox_out_ix + 3u) * 4 + 12, round_up(param_28));
bbox_out_ix += 6u; bbox_out_ix += 6u;
do_atomic = false; do_atomic = false;
} }
} }
if (do_atomic) if (do_atomic)
{ {
bool _1335 = m.bbox.z > m.bbox.x; bool _1334 = m.bbox.z > m.bbox.x;
bool _1344; bool _1343;
if (!_1335) if (!_1334)
{ {
_1344 = m.bbox.w > m.bbox.y; _1343 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1344 = _1335; _1343 = _1334;
} }
if (_1344) if (_1343)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1353; uint _1352;
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353); _111.InterlockedMin(bbox_out_ix * 4 + 12, round_down(param_29), _1352);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1361; uint _1360;
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361); _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 12, round_down(param_30), _1360);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1369; uint _1368;
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369); _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 12, round_up(param_31), _1368);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1377; uint _1376;
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377); _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 12, round_up(param_32), _1376);
} }
bbox_out_ix += 6u; bbox_out_ix += 6u;
} }

View file

@ -102,6 +102,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -117,6 +118,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -545,25 +547,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
if (seg_type >= 2u) if (seg_type >= 2u)
{ {
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
float4 _947 = bbox; float4 _946 = bbox;
float2 _950 = fast::min(_947.xy, p2); float2 _949 = fast::min(_946.xy, p2);
bbox.x = _950.x; bbox.x = _949.x;
bbox.y = _950.y; bbox.y = _949.y;
float4 _955 = bbox; float4 _954 = bbox;
float2 _958 = fast::max(_955.zw, p2); float2 _957 = fast::max(_954.zw, p2);
bbox.z = _958.x; bbox.z = _957.x;
bbox.w = _958.y; bbox.w = _957.y;
if (seg_type == 3u) if (seg_type == 3u)
{ {
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
float4 _983 = bbox; float4 _982 = bbox;
float2 _986 = fast::min(_983.xy, p3); float2 _985 = fast::min(_982.xy, p3);
bbox.x = _986.x; bbox.x = _985.x;
bbox.y = _986.y; bbox.y = _985.y;
float4 _991 = bbox; float4 _990 = bbox;
float2 _994 = fast::max(_991.zw, p3); float2 _993 = fast::max(_990.zw, p3);
bbox.z = _994.x; bbox.z = _993.x;
bbox.w = _994.y; bbox.w = _993.y;
} }
else else
{ {
@ -651,17 +653,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
Monoid param_24 = local[i_4]; Monoid param_24 = local[i_4];
Monoid m = combine_monoid(param_23, param_24); Monoid m = combine_monoid(param_23, param_24);
bool do_atomic = false; bool do_atomic = false;
bool _1264 = i_4 == 3u; bool _1263 = i_4 == 3u;
bool _1270; bool _1269;
if (_1264) if (_1263)
{ {
_1270 = gl_LocalInvocationID.x == 255u; _1269 = gl_LocalInvocationID.x == 255u;
} }
else else
{ {
_1270 = _1264; _1269 = _1263;
} }
if (_1270) if (_1269)
{ {
do_atomic = true; do_atomic = true;
} }
@ -689,26 +691,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
} }
if (do_atomic) if (do_atomic)
{ {
bool _1335 = m.bbox.z > m.bbox.x; bool _1334 = m.bbox.z > m.bbox.x;
bool _1344; bool _1343;
if (!_1335) if (!_1334)
{ {
_1344 = m.bbox.w > m.bbox.y; _1343 = m.bbox.w > m.bbox.y;
} }
else else
{ {
_1344 = _1335; _1343 = _1334;
} }
if (_1344) if (_1343)
{ {
float param_29 = m.bbox.x; float param_29 = m.bbox.x;
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
float param_30 = m.bbox.y; float param_30 = m.bbox.y;
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
float param_31 = m.bbox.z; float param_31 = m.bbox.z;
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
float param_32 = m.bbox.w; float param_32 = m.bbox.w;
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
} }
bbox_out_ix += 6u; bbox_out_ix += 6u;
} }

Binary file not shown.

Binary file not shown.

View file

@ -14,6 +14,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -92,7 +93,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 2u; uint ix = gl_GlobalInvocationID.x * 2u;
uint scene_ix = (_139.Load(92) >> uint(2)) + ix; uint scene_ix = (_139.Load(96) >> uint(2)) + ix;
uint tag_word = _151.Load(scene_ix * 4 + 0); uint tag_word = _151.Load(scene_ix * 4 + 0);
uint param = tag_word; uint param = tag_word;
TagMonoid agg = reduce_tag(param); TagMonoid agg = reduce_tag(param);

View file

@ -21,6 +21,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -78,6 +79,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };

Binary file not shown.

Binary file not shown.

View file

@ -3,12 +3,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -27,6 +21,7 @@ struct Path
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -58,9 +53,9 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
RWByteAddressBuffer _70 : register(u0, space0); RWByteAddressBuffer _53 : register(u0, space0);
ByteAddressBuffer _181 : register(t1, space0); ByteAddressBuffer _148 : register(t1, space0);
ByteAddressBuffer _257 : register(t2, space0); ByteAddressBuffer _232 : register(t2, space0);
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
static uint3 gl_GlobalInvocationID; static uint3 gl_GlobalInvocationID;
@ -71,53 +66,38 @@ struct SPIRV_Cross_Input
}; };
groupshared uint sh_tile_count[256]; groupshared uint sh_tile_count[256];
groupshared MallocResult sh_tile_alloc; groupshared uint sh_tile_offset;
bool check_deps(uint dep_stage)
{
uint _60;
_53.InterlockedOr(4, 0u, _60);
return (_60 & dep_stage) == 0u;
}
float4 load_draw_bbox(uint draw_ix) float4 load_draw_bbox(uint draw_ix)
{ {
uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix); uint base = (_148.Load(68) >> uint(2)) + (4u * draw_ix);
float x0 = asfloat(_70.Load(base * 4 + 8)); float x0 = asfloat(_53.Load(base * 4 + 12));
float y0 = asfloat(_70.Load((base + 1u) * 4 + 8)); float y0 = asfloat(_53.Load((base + 1u) * 4 + 12));
float x1 = asfloat(_70.Load((base + 2u) * 4 + 8)); float x1 = asfloat(_53.Load((base + 2u) * 4 + 12));
float y1 = asfloat(_70.Load((base + 3u) * 4 + 8)); float y1 = asfloat(_53.Load((base + 3u) * 4 + 12));
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
return bbox; return bbox;
} }
Alloc new_alloc(uint offset, uint size, bool mem_ok) uint malloc_stage(uint size, uint mem_size, uint stage)
{ {
Alloc a; uint _70;
a.offset = offset; _53.InterlockedAdd(0, size, _70);
return a; uint offset = _70;
} if ((offset + size) > mem_size)
MallocResult malloc(uint size)
{
uint _76;
_70.InterlockedAdd(0, size, _76);
uint offset = _76;
uint _83;
_70.GetDimensions(_83);
_83 = (_83 - 8) / 4;
MallocResult r;
r.failed = (offset + size) > uint(int(_83) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{ {
uint _105; uint _80;
_70.InterlockedMax(4, 1u, _105); _53.InterlockedOr(4, stage, _80);
return r; offset = 0u;
} }
return r; return offset;
}
Alloc slice_mem(Alloc a, uint offset, uint size)
{
Alloc _131 = { a.offset + offset };
return _131;
} }
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
@ -133,7 +113,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_70.Store(offset * 4 + 8, val); _53.Store(offset * 4 + 12, val);
} }
void Path_write(Alloc a, PathRef ref, Path s) void Path_write(Alloc a, PathRef ref, Path s)
@ -155,15 +135,21 @@ void Path_write(Alloc a, PathRef ref, Path s)
void comp_main() void comp_main()
{ {
uint param = 1u;
bool _192 = check_deps(param);
if (!_192)
{
return;
}
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathRef _241 = { _181.Load(16) + (element_ix * 12u) }; PathRef _216 = { _148.Load(20) + (element_ix * 12u) };
PathRef path_ref = _241; PathRef path_ref = _216;
uint drawtag_base = _181.Load(100) >> uint(2); uint drawtag_base = _148.Load(104) >> uint(2);
uint drawtag = 0u; uint drawtag = 0u;
if (element_ix < _181.Load(0)) if (element_ix < _148.Load(4))
{ {
drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0); drawtag = _232.Load((drawtag_base + element_ix) * 4 + 0);
} }
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
@ -171,17 +157,17 @@ void comp_main()
int y1 = 0; int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u)) if ((drawtag != 0u) && (drawtag != 37u))
{ {
uint param = element_ix; uint param_1 = element_ix;
float4 bbox = load_draw_bbox(param); float4 bbox = load_draw_bbox(param_1);
x0 = int(floor(bbox.x * 0.0625f)); x0 = int(floor(bbox.x * 0.0625f));
y0 = int(floor(bbox.y * 0.0625f)); y0 = int(floor(bbox.y * 0.0625f));
x1 = int(ceil(bbox.z * 0.0625f)); x1 = int(ceil(bbox.z * 0.0625f));
y1 = int(ceil(bbox.w * 0.0625f)); y1 = int(ceil(bbox.w * 0.0625f));
} }
x0 = clamp(x0, 0, int(_181.Load(8))); x0 = clamp(x0, 0, int(_148.Load(12)));
y0 = clamp(y0, 0, int(_181.Load(12))); y0 = clamp(y0, 0, int(_148.Load(16)));
x1 = clamp(x1, 0, int(_181.Load(8))); x1 = clamp(x1, 0, int(_148.Load(12)));
y1 = clamp(y1, 0, int(_181.Load(12))); y1 = clamp(y1, 0, int(_148.Load(16)));
Path path; Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0)); uint tile_count = uint((x1 - x0) * (y1 - y0));
@ -199,59 +185,45 @@ void comp_main()
} }
if (th_ix == 255u) if (th_ix == 255u)
{ {
uint param_1 = total_tile_count * 8u; uint param_2 = total_tile_count * 8u;
MallocResult _392 = malloc(param_1); uint param_3 = _148.Load(0);
sh_tile_alloc = _392; uint param_4 = 2u;
uint _370 = malloc_stage(param_2, param_3, param_4);
sh_tile_offset = _370;
} }
GroupMemoryBarrierWithGroupSync(); GroupMemoryBarrierWithGroupSync();
MallocResult alloc_start = sh_tile_alloc; uint offset_start = sh_tile_offset;
bool _403; if (offset_start == 0u)
if (!alloc_start.failed)
{
_403 = _70.Load(4) != 0u;
}
else
{
_403 = alloc_start.failed;
}
if (_403)
{ {
return; return;
} }
if (element_ix < _181.Load(0)) if (element_ix < _148.Load(4))
{ {
uint _416; uint _387;
if (th_ix > 0u) if (th_ix > 0u)
{ {
_416 = sh_tile_count[th_ix - 1u]; _387 = sh_tile_count[th_ix - 1u];
} }
else else
{ {
_416 = 0u; _387 = 0u;
} }
uint tile_subix = _416; uint tile_subix = _387;
Alloc param_2 = alloc_start.alloc; TileRef _400 = { offset_start + (8u * tile_subix) };
uint param_3 = 8u * tile_subix; path.tiles = _400;
uint param_4 = 8u * tile_count; Alloc _406;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4); _406.offset = _148.Load(20);
TileRef _438 = { tiles_alloc.offset };
path.tiles = _438;
Alloc _444;
_444.offset = _181.Load(16);
Alloc param_5; Alloc param_5;
param_5.offset = _444.offset; param_5.offset = _406.offset;
PathRef param_6 = path_ref; PathRef param_6 = path_ref;
Path param_7 = path; Path param_7 = path;
Path_write(param_5, param_6, param_7); Path_write(param_5, param_6, param_7);
} }
uint total_count = sh_tile_count[255] * 2u; uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2); uint start_ix = offset_start >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{ {
Alloc param_8 = alloc_start.alloc; _53.Store((start_ix + i_1) * 4 + 12, 0u);
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10);
} }
} }

View file

@ -12,12 +12,6 @@ struct Alloc
uint offset; uint offset;
}; };
struct MallocResult
{
Alloc alloc;
bool failed;
};
struct PathRef struct PathRef
{ {
uint offset; uint offset;
@ -38,6 +32,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -48,6 +43,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -90,48 +86,35 @@ struct SceneBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181) bool check_deps(thread const uint& dep_stage, device Memory& v_53)
{ {
uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); uint _60 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, 0u, memory_order_relaxed);
float x0 = as_type<float>(v_70.memory[base]); return (_60 & dep_stage) == 0u;
float y0 = as_type<float>(v_70.memory[base + 1u]); }
float x1 = as_type<float>(v_70.memory[base + 2u]);
float y1 = as_type<float>(v_70.memory[base + 3u]); static inline __attribute__((always_inline))
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_53, const device ConfigBuf& v_148)
{
uint base = (v_148.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
float x0 = as_type<float>(v_53.memory[base]);
float y0 = as_type<float>(v_53.memory[base + 1u]);
float x1 = as_type<float>(v_53.memory[base + 2u]);
float y1 = as_type<float>(v_53.memory[base + 3u]);
float4 bbox = float4(x0, y0, x1, y1); float4 bbox = float4(x0, y0, x1, y1);
return bbox; return bbox;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_53)
{ {
Alloc a; uint _70 = atomic_fetch_add_explicit((device atomic_uint*)&v_53.mem_offset, size, memory_order_relaxed);
a.offset = offset; uint offset = _70;
return a; if ((offset + size) > mem_size)
}
static inline __attribute__((always_inline))
MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize)
{
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed);
uint offset = _76;
MallocResult r;
r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4);
uint param = offset;
uint param_1 = size;
bool param_2 = !r.failed;
r.alloc = new_alloc(param, param_1, param_2);
if (r.failed)
{ {
uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed); uint _80 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, stage, memory_order_relaxed);
return r; offset = 0u;
} }
return r; return offset;
}
static inline __attribute__((always_inline))
Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size)
{
return Alloc{ a.offset + offset };
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
@ -141,7 +124,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize) void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_53)
{ {
Alloc param = alloc; Alloc param = alloc;
uint param_1 = offset; uint param_1 = offset;
@ -149,40 +132,45 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
{ {
return; return;
} }
v_70.memory[offset] = val; v_53.memory[offset] = val;
} }
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize) void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_53)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
Alloc param = a; Alloc param = a;
uint param_1 = ix + 0u; uint param_1 = ix + 0u;
uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
write_mem(param, param_1, param_2, v_70, v_70BufferSize); write_mem(param, param_1, param_2, v_53);
Alloc param_3 = a; Alloc param_3 = a;
uint param_4 = ix + 1u; uint param_4 = ix + 1u;
uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
write_mem(param_3, param_4, param_5, v_70, v_70BufferSize); write_mem(param_3, param_4, param_5, v_53);
Alloc param_6 = a; Alloc param_6 = a;
uint param_7 = ix + 2u; uint param_7 = ix + 2u;
uint param_8 = s.tiles.offset; uint param_8 = s.tiles.offset;
write_mem(param_6, param_7, param_8, v_70, v_70BufferSize); write_mem(param_6, param_7, param_8, v_53);
} }
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) kernel void main0(device Memory& v_53 [[buffer(0)]], const device ConfigBuf& v_148 [[buffer(1)]], const device SceneBuf& _232 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{ {
threadgroup uint sh_tile_count[256]; threadgroup uint sh_tile_count[256];
threadgroup MallocResult sh_tile_alloc; threadgroup uint sh_tile_offset;
constant uint& v_70BufferSize = spvBufferSizeConstants[0]; uint param = 1u;
bool _192 = check_deps(param, v_53);
if (!_192)
{
return;
}
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) }; PathRef path_ref = PathRef{ v_148.conf.tile_alloc.offset + (element_ix * 12u) };
uint drawtag_base = v_181.conf.drawtag_offset >> uint(2); uint drawtag_base = v_148.conf.drawtag_offset >> uint(2);
uint drawtag = 0u; uint drawtag = 0u;
if (element_ix < v_181.conf.n_elements) if (element_ix < v_148.conf.n_elements)
{ {
drawtag = _257.scene[drawtag_base + element_ix]; drawtag = _232.scene[drawtag_base + element_ix];
} }
int x0 = 0; int x0 = 0;
int y0 = 0; int y0 = 0;
@ -190,17 +178,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
int y1 = 0; int y1 = 0;
if ((drawtag != 0u) && (drawtag != 37u)) if ((drawtag != 0u) && (drawtag != 37u))
{ {
uint param = element_ix; uint param_1 = element_ix;
float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181); float4 bbox = load_draw_bbox(param_1, v_53, v_148);
x0 = int(floor(bbox.x * 0.0625)); x0 = int(floor(bbox.x * 0.0625));
y0 = int(floor(bbox.y * 0.0625)); y0 = int(floor(bbox.y * 0.0625));
x1 = int(ceil(bbox.z * 0.0625)); x1 = int(ceil(bbox.z * 0.0625));
y1 = int(ceil(bbox.w * 0.0625)); y1 = int(ceil(bbox.w * 0.0625));
} }
x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles)); x0 = clamp(x0, 0, int(v_148.conf.width_in_tiles));
y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles)); y0 = clamp(y0, 0, int(v_148.conf.height_in_tiles));
x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles)); x1 = clamp(x1, 0, int(v_148.conf.width_in_tiles));
y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles)); y1 = clamp(y1, 0, int(v_148.conf.height_in_tiles));
Path path; Path path;
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
uint tile_count = uint((x1 - x0) * (y1 - y0)); uint tile_count = uint((x1 - x0) * (y1 - y0));
@ -218,56 +206,42 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
} }
if (th_ix == 255u) if (th_ix == 255u)
{ {
uint param_1 = total_tile_count * 8u; uint param_2 = total_tile_count * 8u;
MallocResult _392 = malloc(param_1, v_70, v_70BufferSize); uint param_3 = v_148.conf.mem_size;
sh_tile_alloc = _392; uint param_4 = 2u;
uint _370 = malloc_stage(param_2, param_3, param_4, v_53);
sh_tile_offset = _370;
} }
threadgroup_barrier(mem_flags::mem_threadgroup); threadgroup_barrier(mem_flags::mem_threadgroup);
MallocResult alloc_start = sh_tile_alloc; uint offset_start = sh_tile_offset;
bool _403; if (offset_start == 0u)
if (!alloc_start.failed)
{
_403 = v_70.mem_error != 0u;
}
else
{
_403 = alloc_start.failed;
}
if (_403)
{ {
return; return;
} }
if (element_ix < v_181.conf.n_elements) if (element_ix < v_148.conf.n_elements)
{ {
uint _416; uint _387;
if (th_ix > 0u) if (th_ix > 0u)
{ {
_416 = sh_tile_count[th_ix - 1u]; _387 = sh_tile_count[th_ix - 1u];
} }
else else
{ {
_416 = 0u; _387 = 0u;
} }
uint tile_subix = _416; uint tile_subix = _387;
Alloc param_2 = alloc_start.alloc; path.tiles = TileRef{ offset_start + (8u * tile_subix) };
uint param_3 = 8u * tile_subix;
uint param_4 = 8u * tile_count;
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
path.tiles = TileRef{ tiles_alloc.offset };
Alloc param_5; Alloc param_5;
param_5.offset = v_181.conf.tile_alloc.offset; param_5.offset = v_148.conf.tile_alloc.offset;
PathRef param_6 = path_ref; PathRef param_6 = path_ref;
Path param_7 = path; Path param_7 = path;
Path_write(param_5, param_6, param_7, v_70, v_70BufferSize); Path_write(param_5, param_6, param_7, v_53);
} }
uint total_count = sh_tile_count[255] * 2u; uint total_count = sh_tile_count[255] * 2u;
uint start_ix = alloc_start.alloc.offset >> uint(2); uint start_ix = offset_start >> uint(2);
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
{ {
Alloc param_8 = alloc_start.alloc; v_53.memory[start_ix + i_1] = 0u;
uint param_9 = start_ix + i_1;
uint param_10 = 0u;
write_mem(param_8, param_9, param_10, v_70, v_70BufferSize);
} }
} }

Binary file not shown.

Binary file not shown.

View file

@ -27,6 +27,7 @@ struct TransformSeg
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -58,12 +59,12 @@ struct Config
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx }; static const Transform _225 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
RWByteAddressBuffer _71 : register(u0, space0); RWByteAddressBuffer _71 : register(u0, space0);
ByteAddressBuffer _96 : register(t2, space0); ByteAddressBuffer _97 : register(t2, space0);
ByteAddressBuffer _278 : register(t1, space0); ByteAddressBuffer _279 : register(t1, space0);
ByteAddressBuffer _376 : register(t3, space0); ByteAddressBuffer _377 : register(t3, space0);
static uint3 gl_WorkGroupID; static uint3 gl_WorkGroupID;
static uint3 gl_LocalInvocationID; static uint3 gl_LocalInvocationID;
@ -80,12 +81,12 @@ groupshared Transform sh_scratch[256];
Transform Transform_read(TransformRef ref) Transform Transform_read(TransformRef ref)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = _96.Load((ix + 0u) * 4 + 0); uint raw0 = _97.Load((ix + 0u) * 4 + 0);
uint raw1 = _96.Load((ix + 1u) * 4 + 0); uint raw1 = _97.Load((ix + 1u) * 4 + 0);
uint raw2 = _96.Load((ix + 2u) * 4 + 0); uint raw2 = _97.Load((ix + 2u) * 4 + 0);
uint raw3 = _96.Load((ix + 3u) * 4 + 0); uint raw3 = _97.Load((ix + 3u) * 4 + 0);
uint raw4 = _96.Load((ix + 4u) * 4 + 0); uint raw4 = _97.Load((ix + 4u) * 4 + 0);
uint raw5 = _96.Load((ix + 5u) * 4 + 0); uint raw5 = _97.Load((ix + 5u) * 4 + 0);
Transform s; Transform s;
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
s.translate = float2(asfloat(raw4), asfloat(raw5)); s.translate = float2(asfloat(raw4), asfloat(raw5));
@ -108,7 +109,7 @@ Transform combine_monoid(Transform a, Transform b)
Transform monoid_identity() Transform monoid_identity()
{ {
return _224; return _225;
} }
bool touch_mem(Alloc alloc, uint offset) bool touch_mem(Alloc alloc, uint offset)
@ -124,7 +125,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
{ {
return; return;
} }
_71.Store(offset * 4 + 8, val); _71.Store(offset * 4 + 12, val);
} }
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
@ -159,8 +160,8 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _285 = { _278.Load(84) + (ix * 24u) }; TransformRef _286 = { _279.Load(88) + (ix * 24u) };
TransformRef ref = _285; TransformRef ref = _286;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);
Transform local[8]; Transform local[8];
@ -193,11 +194,11 @@ void comp_main()
Transform row = monoid_identity(); Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
Transform _382; Transform _383;
_382.mat = asfloat(_376.Load4((gl_WorkGroupID.x - 1u) * 32 + 0)); _383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
_382.translate = asfloat(_376.Load2((gl_WorkGroupID.x - 1u) * 32 + 16)); _383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
row.mat = _382.mat; row.mat = _383.mat;
row.translate = _382.translate; row.translate = _383.translate;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -211,13 +212,13 @@ void comp_main()
Transform param_10 = row; Transform param_10 = row;
Transform param_11 = local[i_2]; Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11); Transform m = combine_monoid(param_10, param_11);
TransformSeg _422 = { m.mat, m.translate }; TransformSeg _423 = { m.mat, m.translate };
TransformSeg transform = _422; TransformSeg transform = _423;
TransformSegRef _432 = { _278.Load(36) + ((ix + i_2) * 24u) }; TransformSegRef _433 = { _279.Load(40) + ((ix + i_2) * 24u) };
TransformSegRef trans_ref = _432; TransformSegRef trans_ref = _433;
Alloc _436; Alloc _437;
_436.offset = _278.Load(36); _437.offset = _279.Load(40);
param_12.offset = _436.offset; param_12.offset = _437.offset;
TransformSegRef param_13 = trans_ref; TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform; TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14); TransformSeg_write(param_12, param_13, param_14);

View file

@ -75,6 +75,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };
@ -90,6 +91,7 @@ struct Alloc_1
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -139,15 +141,15 @@ struct ParentBuf
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
static inline __attribute__((always_inline)) static inline __attribute__((always_inline))
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96) Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_97)
{ {
uint ix = ref.offset >> uint(2); uint ix = ref.offset >> uint(2);
uint raw0 = v_96.scene[ix + 0u]; uint raw0 = v_97.scene[ix + 0u];
uint raw1 = v_96.scene[ix + 1u]; uint raw1 = v_97.scene[ix + 1u];
uint raw2 = v_96.scene[ix + 2u]; uint raw2 = v_97.scene[ix + 2u];
uint raw3 = v_96.scene[ix + 3u]; uint raw3 = v_97.scene[ix + 3u];
uint raw4 = v_96.scene[ix + 4u]; uint raw4 = v_97.scene[ix + 4u];
uint raw5 = v_96.scene[ix + 5u]; uint raw5 = v_97.scene[ix + 5u];
Transform s; Transform s;
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3)); s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5)); s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
@ -223,13 +225,13 @@ void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref
write_mem(param_15, param_16, param_17, v_71); write_mem(param_15, param_16, param_17, v_71);
} }
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _376 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _279 [[buffer(1)]], const device SceneBuf& v_97 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
{ {
threadgroup Transform sh_scratch[256]; threadgroup Transform sh_scratch[256];
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) }; TransformRef ref = TransformRef{ _279.conf.trans_offset + (ix * 24u) };
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param, v_96); Transform agg = Transform_read(param, v_97);
spvUnsafeArray<Transform, 8> local; spvUnsafeArray<Transform, 8> local;
local[0] = agg; local[0] = agg;
for (uint i = 1u; i < 8u; i++) for (uint i = 1u; i < 8u; i++)
@ -238,7 +240,7 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
uint param_2 = i; uint param_2 = i;
TransformRef param_3 = Transform_index(param_1, param_2); TransformRef param_3 = Transform_index(param_1, param_2);
Transform param_4 = agg; Transform param_4 = agg;
Transform param_5 = Transform_read(param_3, v_96); Transform param_5 = Transform_read(param_3, v_97);
agg = combine_monoid(param_4, param_5); agg = combine_monoid(param_4, param_5);
local[i] = agg; local[i] = agg;
} }
@ -260,9 +262,9 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
Transform row = monoid_identity(); Transform row = monoid_identity();
if (gl_WorkGroupID.x > 0u) if (gl_WorkGroupID.x > 0u)
{ {
uint _379 = gl_WorkGroupID.x - 1u; uint _380 = gl_WorkGroupID.x - 1u;
row.mat = _376.parent[_379].mat; row.mat = _377.parent[_380].mat;
row.translate = _376.parent[_379].translate; row.translate = _377.parent[_380].translate;
} }
if (gl_LocalInvocationID.x > 0u) if (gl_LocalInvocationID.x > 0u)
{ {
@ -277,8 +279,8 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
Transform param_11 = local[i_2]; Transform param_11 = local[i_2];
Transform m = combine_monoid(param_10, param_11); Transform m = combine_monoid(param_10, param_11);
TransformSeg transform = TransformSeg{ m.mat, m.translate }; TransformSeg transform = TransformSeg{ m.mat, m.translate };
TransformSegRef trans_ref = TransformSegRef{ _278.conf.trans_alloc.offset + ((ix + i_2) * 24u) }; TransformSegRef trans_ref = TransformSegRef{ _279.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
param_12.offset = _278.conf.trans_alloc.offset; param_12.offset = _279.conf.trans_alloc.offset;
TransformSegRef param_13 = trans_ref; TransformSegRef param_13 = trans_ref;
TransformSeg param_14 = transform; TransformSeg param_14 = transform;
TransformSeg_write(param_12, param_13, param_14, v_71); TransformSeg_write(param_12, param_13, param_14, v_71);

Binary file not shown.

Binary file not shown.

View file

@ -16,6 +16,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -96,7 +97,7 @@ Transform combine_monoid(Transform a, Transform b)
void comp_main() void comp_main()
{ {
uint ix = gl_GlobalInvocationID.x * 8u; uint ix = gl_GlobalInvocationID.x * 8u;
TransformRef _168 = { _161.Load(84) + (ix * 24u) }; TransformRef _168 = { _161.Load(88) + (ix * 24u) };
TransformRef ref = _168; TransformRef ref = _168;
TransformRef param = ref; TransformRef param = ref;
Transform agg = Transform_read(param); Transform agg = Transform_read(param);

View file

@ -28,6 +28,7 @@ struct Alloc
struct Config struct Config
{ {
uint mem_size;
uint n_elements; uint n_elements;
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;
@ -78,6 +79,7 @@ struct Memory
{ {
uint mem_offset; uint mem_offset;
uint mem_error; uint mem_error;
uint blend_offset;
uint memory[1]; uint memory[1];
}; };

Binary file not shown.