diff --git a/piet-gpu-hal/examples/shader/gen/collatz.dxil b/piet-gpu-hal/examples/shader/gen/collatz.dxil deleted file mode 100644 index a03f96a..0000000 Binary files a/piet-gpu-hal/examples/shader/gen/collatz.dxil and /dev/null differ diff --git a/piet-gpu-hal/examples/shader/gen/collatz.hlsl b/piet-gpu-hal/examples/shader/gen/collatz.hlsl deleted file mode 100644 index 762f06d..0000000 --- a/piet-gpu-hal/examples/shader/gen/collatz.hlsl +++ /dev/null @@ -1,62 +0,0 @@ -static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); - -RWByteAddressBuffer _57 : register(u0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -float mod(float x, float y) -{ - return x - y * floor(x / y); -} - -float2 mod(float2 x, float2 y) -{ - return x - y * floor(x / y); -} - -float3 mod(float3 x, float3 y) -{ - return x - y * floor(x / y); -} - -float4 mod(float4 x, float4 y) -{ - return x - y * floor(x / y); -} - -uint collatz_iterations(inout uint n) -{ - uint i = 0u; - while (n != 1u) - { - if (mod(float(n), 2.0f) == 0.0f) - { - n /= 2u; - } - else - { - n = (3u * n) + 1u; - } - i++; - } - return i; -} - -void comp_main() -{ - uint index = gl_GlobalInvocationID.x; - uint param = _57.Load(index * 4 + 0); - uint _65 = collatz_iterations(param); - _57.Store(index * 4 + 0, _65); -} - -[numthreads(1, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu-hal/examples/shader/gen/collatz.msl b/piet-gpu-hal/examples/shader/gen/collatz.msl deleted file mode 100644 index 1b75efe..0000000 --- a/piet-gpu-hal/examples/shader/gen/collatz.msl +++ /dev/null @@ -1,48 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -inline Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} - -struct PrimeIndices -{ - uint indices[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); - -static inline __attribute__((always_inline)) -uint collatz_iterations(thread uint& n) -{ - uint i = 0u; - while (n != 1u) - { - if (mod(float(n), 2.0) == 0.0) - { - n /= 2u; - } - else - { - n = (3u * n) + 1u; - } - i++; - } - return i; -} - -kernel void main0(device PrimeIndices& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint index = gl_GlobalInvocationID.x; - uint param = _57.indices[index]; - uint _65 = collatz_iterations(param); - _57.indices[index] = _65; -} - diff --git a/piet-gpu-hal/examples/shader/gen/collatz.spv b/piet-gpu-hal/examples/shader/gen/collatz.spv deleted file mode 100644 index 886797e..0000000 Binary files a/piet-gpu-hal/examples/shader/gen/collatz.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil deleted file mode 100644 index 50f5bad..0000000 Binary files a/piet-gpu/shader/gen/backdrop.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/backdrop.hlsl b/piet-gpu/shader/gen/backdrop.hlsl deleted file mode 100644 index 2ed8898..0000000 --- a/piet-gpu/shader/gen/backdrop.hlsl +++ /dev/null @@ -1,257 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _59 : register(u0, space0); -ByteAddressBuffer _181 : register(t1, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -static uint gl_LocalInvocationIndex; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; - uint gl_LocalInvocationIndex : SV_GroupIndex; -}; - -groupshared uint sh_row_width[256]; -groupshared Alloc sh_row_alloc[256]; -groupshared uint sh_row_count[256]; - -bool check_deps(uint dep_stage) -{ - uint _65; - _59.InterlockedOr(4, 0u, _65); - return (_65 & dep_stage) == 0u; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _59.Load(offset * 4 + 12); - return v; -} - -Path Path_read(Alloc a, PathRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _146 = { raw2 }; - s.tiles = _146; - return s; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _59.Store(offset * 4 + 12, val); -} - -void comp_main() -{ - uint param = 7u; - bool _154 = check_deps(param); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.Load(4)) - { - PathRef _195 = { _181.Load(20) + (element_ix * 12u) }; - PathRef path_ref = _195; - Alloc _200; - _200.offset = _181.Load(20); - Alloc param_1; - param_1.offset = _200.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - GroupMemoryBarrierWithGroupSync(); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 256u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12); - } - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; - comp_main(); -} diff --git a/piet-gpu/shader/gen/backdrop.msl b/piet-gpu/shader/gen/backdrop.msl deleted file mode 100644 index 3726dff..0000000 --- a/piet-gpu/shader/gen/backdrop.msl +++ /dev/null @@ -1,263 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_59) -{ - uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed); - return (_65 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_59.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_59); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_59); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_59); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_59.memory[offset] = val; -} - -kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_row_width[256]; - threadgroup Alloc sh_row_alloc[256]; - threadgroup uint sh_row_count[256]; - uint param = 7u; - bool _154 = check_deps(param, v_59); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.conf.n_elements) - { - PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) }; - Alloc param_1; - param_1.offset = _181.conf.tile_alloc.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2, v_59); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 256u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7, v_59); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9, v_59); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12, v_59); - } - } - } -} - diff --git a/piet-gpu/shader/gen/backdrop.spv b/piet-gpu/shader/gen/backdrop.spv deleted file mode 100644 index b8a74ea..0000000 Binary files a/piet-gpu/shader/gen/backdrop.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/backdrop_lg.dxil b/piet-gpu/shader/gen/backdrop_lg.dxil deleted file mode 100644 index 06bacaf..0000000 Binary files a/piet-gpu/shader/gen/backdrop_lg.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/backdrop_lg.hlsl b/piet-gpu/shader/gen/backdrop_lg.hlsl deleted file mode 100644 index e547762..0000000 --- a/piet-gpu/shader/gen/backdrop_lg.hlsl +++ /dev/null @@ -1,257 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u); - -RWByteAddressBuffer _59 : register(u0, space0); -ByteAddressBuffer _181 : register(t1, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -static uint gl_LocalInvocationIndex; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; - uint gl_LocalInvocationIndex : SV_GroupIndex; -}; - -groupshared uint sh_row_width[256]; -groupshared Alloc sh_row_alloc[256]; -groupshared uint sh_row_count[256]; - -bool check_deps(uint dep_stage) -{ - uint _65; - _59.InterlockedOr(4, 0u, _65); - return (_65 & dep_stage) == 0u; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _59.Load(offset * 4 + 12); - return v; -} - -Path Path_read(Alloc a, PathRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _146 = { raw2 }; - s.tiles = _146; - return s; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _59.Store(offset * 4 + 12, val); -} - -void comp_main() -{ - uint param = 7u; - bool _154 = check_deps(param); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.Load(4)) - { - PathRef _195 = { _181.Load(20) + (element_ix * 12u) }; - PathRef path_ref = _195; - Alloc _200; - _200.offset = _181.Load(20); - Alloc param_1; - param_1.offset = _200.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - GroupMemoryBarrierWithGroupSync(); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 1024u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12); - } - } - } -} - -[numthreads(256, 4, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; - comp_main(); -} diff --git a/piet-gpu/shader/gen/backdrop_lg.msl b/piet-gpu/shader/gen/backdrop_lg.msl deleted file mode 100644 index 68f0905..0000000 --- a/piet-gpu/shader/gen/backdrop_lg.msl +++ /dev/null @@ -1,263 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_59) -{ - uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed); - return (_65 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_59.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_59); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_59); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_59); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_59.memory[offset] = val; -} - -kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_row_width[256]; - threadgroup Alloc sh_row_alloc[256]; - threadgroup uint sh_row_count[256]; - uint param = 7u; - bool _154 = check_deps(param, v_59); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.conf.n_elements) - { - PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) }; - Alloc param_1; - param_1.offset = _181.conf.tile_alloc.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2, v_59); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 1024u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7, v_59); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9, v_59); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12, v_59); - } - } - } -} - diff --git a/piet-gpu/shader/gen/backdrop_lg.spv b/piet-gpu/shader/gen/backdrop_lg.spv deleted file mode 100644 index 2819ec5..0000000 Binary files a/piet-gpu/shader/gen/backdrop_lg.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil deleted file mode 100644 index 82cfb03..0000000 Binary files a/piet-gpu/shader/gen/bbox_clear.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl deleted file mode 100644 index 5d29894..0000000 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ /dev/null @@ -1,67 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -ByteAddressBuffer _21 : register(t1, space0); -RWByteAddressBuffer _45 : register(u0, space0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _21.Load(80)) - { - uint out_ix = (_21.Load(44) >> uint(2)) + (6u * ix); - _45.Store(out_ix * 4 + 12, 65535u); - _45.Store((out_ix + 1u) * 4 + 12, 65535u); - _45.Store((out_ix + 2u) * 4 + 12, 0u); - _45.Store((out_ix + 3u) * 4 + 12, 0u); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl deleted file mode 100644 index 289fc9a..0000000 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _21.conf.n_path) - { - uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix); - _45.memory[out_ix] = 65535u; - _45.memory[out_ix + 1u] = 65535u; - _45.memory[out_ix + 2u] = 0u; - _45.memory[out_ix + 3u] = 0u; - } -} - diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv deleted file mode 100644 index f1ef371..0000000 Binary files a/piet-gpu/shader/gen/bbox_clear.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil deleted file mode 100644 index 5c89a15..0000000 Binary files a/piet-gpu/shader/gen/binning.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/binning.hlsl b/piet-gpu/shader/gen/binning.hlsl deleted file mode 100644 index 7096371..0000000 --- a/piet-gpu/shader/gen/binning.hlsl +++ /dev/null @@ -1,274 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _57 : register(u0, space0); -ByteAddressBuffer _101 : register(t1, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -groupshared uint bitmaps[8][256]; -groupshared uint count[8][256]; -groupshared uint sh_chunk_offset[256]; - -DrawMonoid load_draw_monoid(uint element_ix) -{ - uint base = (_101.Load(48) >> uint(2)) + (4u * element_ix); - uint path_ix = _57.Load(base * 4 + 12); - uint clip_ix = _57.Load((base + 1u) * 4 + 12); - uint scene_offset = _57.Load((base + 2u) * 4 + 12); - uint info_offset = _57.Load((base + 3u) * 4 + 12); - DrawMonoid _136 = { path_ix, clip_ix, scene_offset, info_offset }; - return _136; -} - -float4 load_clip_bbox(uint clip_ix) -{ - uint base = (_101.Load(64) >> uint(2)) + (4u * clip_ix); - float x0 = asfloat(_57.Load(base * 4 + 12)); - float y0 = asfloat(_57.Load((base + 1u) * 4 + 12)); - float x1 = asfloat(_57.Load((base + 2u) * 4 + 12)); - float y1 = asfloat(_57.Load((base + 3u) * 4 + 12)); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -float4 load_path_bbox(uint path_ix) -{ - uint base = (_101.Load(44) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_57.Load(base * 4 + 12)) - 32768.0f; - float bbox_t = float(_57.Load((base + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_57.Load((base + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_57.Load((base + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -float4 bbox_intersect(float4 a, float4 b) -{ - return float4(max(a.xy, b.xy), min(a.zw, b.zw)); -} - -void store_draw_bbox(uint draw_ix, float4 bbox) -{ - uint base = (_101.Load(68) >> uint(2)) + (4u * draw_ix); - _57.Store(base * 4 + 12, asuint(bbox.x)); - _57.Store((base + 1u) * 4 + 12, asuint(bbox.y)); - _57.Store((base + 2u) * 4 + 12, asuint(bbox.z)); - _57.Store((base + 3u) * 4 + 12, asuint(bbox.w)); -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _65; - _57.InterlockedAdd(0, size, _65); - uint offset = _65; - if ((offset + size) > mem_size) - { - uint _76; - _57.InterlockedOr(4, stage, _76); - offset = 0u; - } - return offset; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _57.Store(offset * 4 + 12, val); -} - -void comp_main() -{ - uint my_partition = gl_WorkGroupID.x; - for (uint i = 0u; i < 8u; i++) - { - bitmaps[i][gl_LocalInvocationID.x] = 0u; - } - uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if (element_ix < _101.Load(4)) - { - uint param = element_ix; - DrawMonoid draw_monoid = load_draw_monoid(param); - uint path_ix = draw_monoid.path_ix; - float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - uint clip_ix = draw_monoid.clip_ix; - if (clip_ix > 0u) - { - uint param_1 = clip_ix - 1u; - clip_bbox = load_clip_bbox(param_1); - } - uint param_2 = path_ix; - float4 path_bbox = load_path_bbox(param_2); - float4 param_3 = path_bbox; - float4 param_4 = clip_bbox; - float4 bbox = bbox_intersect(param_3, param_4); - float4 _354 = bbox; - float4 _356 = bbox; - float2 _358 = max(_354.xy, _356.zw); - bbox.z = _358.x; - bbox.w = _358.y; - uint param_5 = element_ix; - float4 param_6 = bbox; - store_draw_bbox(param_5, param_6); - x0 = int(floor(bbox.x * 0.00390625f)); - y0 = int(floor(bbox.y * 0.00390625f)); - x1 = int(ceil(bbox.z * 0.00390625f)); - y1 = int(ceil(bbox.w * 0.00390625f)); - } - uint width_in_bins = ((_101.Load(12) + 16u) - 1u) / 16u; - uint height_in_bins = ((_101.Load(16) + 16u) - 1u) / 16u; - x0 = clamp(x0, 0, int(width_in_bins)); - x1 = clamp(x1, x0, int(width_in_bins)); - y0 = clamp(y0, 0, int(height_in_bins)); - y1 = clamp(y1, y0, int(height_in_bins)); - if (x0 == x1) - { - y1 = y0; - } - int x = x0; - int y = y0; - uint my_slice = gl_LocalInvocationID.x / 32u; - uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); - while (y < y1) - { - uint _460; - InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _460); - x++; - if (x == x1) - { - x = x0; - y++; - } - } - GroupMemoryBarrierWithGroupSync(); - uint element_count = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); - count[i_1][gl_LocalInvocationID.x] = element_count; - } - uint chunk_offset = 0u; - if (element_count != 0u) - { - uint param_7 = element_count * 4u; - uint param_8 = _101.Load(0); - uint param_9 = 1u; - uint _510 = malloc_stage(param_7, param_8, param_9); - chunk_offset = _510; - sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset; - } - uint out_ix = (_101.Load(24) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc _532; - _532.offset = _101.Load(24); - Alloc param_10; - param_10.offset = _532.offset; - uint param_11 = out_ix; - uint param_12 = element_count; - write_mem(param_10, param_11, param_12); - Alloc _544; - _544.offset = _101.Load(24); - Alloc param_13; - param_13.offset = _544.offset; - uint param_14 = out_ix + 1u; - uint param_15 = chunk_offset; - write_mem(param_13, param_14, param_15); - GroupMemoryBarrierWithGroupSync(); - x = x0; - y = y0; - while (y < y1) - { - uint bin_ix = (uint(y) * width_in_bins) + uint(x); - uint out_mask = bitmaps[my_slice][bin_ix]; - if ((out_mask & my_mask) != 0u) - { - uint idx = uint(int(countbits(out_mask & (my_mask - 1u)))); - if (my_slice > 0u) - { - idx += count[my_slice - 1u][bin_ix]; - } - uint chunk_offset_1 = sh_chunk_offset[bin_ix]; - if (chunk_offset_1 != 0u) - { - _57.Store(((chunk_offset_1 >> uint(2)) + idx) * 4 + 12, element_ix); - } - } - x++; - if (x == x1) - { - x = x0; - y++; - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/binning.msl b/piet-gpu/shader/gen/binning.msl deleted file mode 100644 index d3ef95c..0000000 --- a/piet-gpu/shader/gen/binning.msl +++ /dev/null @@ -1,282 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix); - uint path_ix = v_57.memory[base]; - uint clip_ix = v_57.memory[base + 1u]; - uint scene_offset = v_57.memory[base + 2u]; - uint info_offset = v_57.memory[base + 3u]; - return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset }; -} - -static inline __attribute__((always_inline)) -float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix); - float x0 = as_type(v_57.memory[base]); - float y0 = as_type(v_57.memory[base + 1u]); - float x1 = as_type(v_57.memory[base + 2u]); - float y1 = as_type(v_57.memory[base + 3u]); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_57.memory[base]) - 32768.0; - float bbox_t = float(v_57.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_57.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_57.memory[base + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -static inline __attribute__((always_inline)) -float4 bbox_intersect(thread const float4& a, thread const float4& b) -{ - return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw)); -} - -static inline __attribute__((always_inline)) -void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); - v_57.memory[base] = as_type(bbox.x); - v_57.memory[base + 1u] = as_type(bbox.y); - v_57.memory[base + 2u] = as_type(bbox.z); - v_57.memory[base + 3u] = as_type(bbox.w); -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_57) -{ - uint _65 = atomic_fetch_add_explicit((device atomic_uint*)&v_57.mem_offset, size, memory_order_relaxed); - uint offset = _65; - if ((offset + size) > mem_size) - { - uint _76 = atomic_fetch_or_explicit((device atomic_uint*)&v_57.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_57) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_57.memory[offset] = val; -} - -kernel void main0(device Memory& v_57 [[buffer(0)]], const device ConfigBuf& v_101 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint bitmaps[8][256]; - threadgroup uint count[8][256]; - threadgroup uint sh_chunk_offset[256]; - uint my_partition = gl_WorkGroupID.x; - for (uint i = 0u; i < 8u; i++) - { - bitmaps[i][gl_LocalInvocationID.x] = 0u; - } - uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if (element_ix < v_101.conf.n_elements) - { - uint param = element_ix; - DrawMonoid draw_monoid = load_draw_monoid(param, v_57, v_101); - uint path_ix = draw_monoid.path_ix; - float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - uint clip_ix = draw_monoid.clip_ix; - if (clip_ix > 0u) - { - uint param_1 = clip_ix - 1u; - clip_bbox = load_clip_bbox(param_1, v_57, v_101); - } - uint param_2 = path_ix; - float4 path_bbox = load_path_bbox(param_2, v_57, v_101); - float4 param_3 = path_bbox; - float4 param_4 = clip_bbox; - float4 bbox = bbox_intersect(param_3, param_4); - float4 _354 = bbox; - float4 _356 = bbox; - float2 _358 = fast::max(_354.xy, _356.zw); - bbox.z = _358.x; - bbox.w = _358.y; - uint param_5 = element_ix; - float4 param_6 = bbox; - store_draw_bbox(param_5, param_6, v_57, v_101); - x0 = int(floor(bbox.x * 0.00390625)); - y0 = int(floor(bbox.y * 0.00390625)); - x1 = int(ceil(bbox.z * 0.00390625)); - y1 = int(ceil(bbox.w * 0.00390625)); - } - uint width_in_bins = ((v_101.conf.width_in_tiles + 16u) - 1u) / 16u; - uint height_in_bins = ((v_101.conf.height_in_tiles + 16u) - 1u) / 16u; - x0 = clamp(x0, 0, int(width_in_bins)); - x1 = clamp(x1, x0, int(width_in_bins)); - y0 = clamp(y0, 0, int(height_in_bins)); - y1 = clamp(y1, y0, int(height_in_bins)); - if (x0 == x1) - { - y1 = y0; - } - int x = x0; - int y = y0; - uint my_slice = gl_LocalInvocationID.x / 32u; - uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); - while (y < y1) - { - uint _460 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); - x++; - if (x == x1) - { - x = x0; - y++; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint element_count = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); - count[i_1][gl_LocalInvocationID.x] = element_count; - } - uint chunk_offset = 0u; - if (element_count != 0u) - { - uint param_7 = element_count * 4u; - uint param_8 = v_101.conf.mem_size; - uint param_9 = 1u; - uint _510 = malloc_stage(param_7, param_8, param_9, v_57); - chunk_offset = _510; - sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset; - } - uint out_ix = (v_101.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc param_10; - param_10.offset = v_101.conf.bin_alloc.offset; - uint param_11 = out_ix; - uint param_12 = element_count; - write_mem(param_10, param_11, param_12, v_57); - Alloc param_13; - param_13.offset = v_101.conf.bin_alloc.offset; - uint param_14 = out_ix + 1u; - uint param_15 = chunk_offset; - write_mem(param_13, param_14, param_15, v_57); - threadgroup_barrier(mem_flags::mem_threadgroup); - x = x0; - y = y0; - while (y < y1) - { - uint bin_ix = (uint(y) * width_in_bins) + uint(x); - uint out_mask = bitmaps[my_slice][bin_ix]; - if ((out_mask & my_mask) != 0u) - { - uint idx = uint(int(popcount(out_mask & (my_mask - 1u)))); - if (my_slice > 0u) - { - idx += count[my_slice - 1u][bin_ix]; - } - uint chunk_offset_1 = sh_chunk_offset[bin_ix]; - if (chunk_offset_1 != 0u) - { - v_57.memory[(chunk_offset_1 >> uint(2)) + idx] = element_ix; - } - } - x++; - if (x == x1) - { - x = x0; - y++; - } - } -} - diff --git a/piet-gpu/shader/gen/binning.spv b/piet-gpu/shader/gen/binning.spv deleted file mode 100644 index 1a5c2e1..0000000 Binary files a/piet-gpu/shader/gen/binning.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil deleted file mode 100644 index d5123cb..0000000 Binary files a/piet-gpu/shader/gen/clip_leaf.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/clip_leaf.hlsl b/piet-gpu/shader/gen/clip_leaf.hlsl deleted file mode 100644 index 4eb9994..0000000 --- a/piet-gpu/shader/gen/clip_leaf.hlsl +++ /dev/null @@ -1,372 +0,0 @@ -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Bic _394 = { 0u, 0u }; - -ByteAddressBuffer _80 : register(t1, space0); -RWByteAddressBuffer _96 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Bic sh_bic[510]; -groupshared uint sh_stack[256]; -groupshared float4 sh_stack_bbox[256]; -groupshared uint sh_link[256]; -groupshared float4 sh_bbox[256]; - -Bic load_bic(uint ix) -{ - uint base = (_80.Load(56) >> uint(2)) + (2u * ix); - Bic _287 = { _96.Load(base * 4 + 12), _96.Load((base + 1u) * 4 + 12) }; - return _287; -} - -Bic bic_combine(Bic x, Bic y) -{ - uint m = min(x.b, y.a); - Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m }; - return _72; -} - -ClipEl load_clip_el(uint ix) -{ - uint base = (_80.Load(60) >> uint(2)) + (5u * ix); - uint parent_ix = _96.Load(base * 4 + 12); - float x0 = asfloat(_96.Load((base + 1u) * 4 + 12)); - float y0 = asfloat(_96.Load((base + 2u) * 4 + 12)); - float x1 = asfloat(_96.Load((base + 3u) * 4 + 12)); - float y1 = asfloat(_96.Load((base + 4u) * 4 + 12)); - float4 bbox = float4(x0, y0, x1, y1); - ClipEl _336 = { parent_ix, bbox }; - return _336; -} - -float4 bbox_intersect(float4 a, float4 b) -{ - return float4(max(a.xy, b.xy), min(a.zw, b.zw)); -} - -uint load_path_ix(uint ix) -{ - if (ix < _80.Load(84)) - { - return _96.Load(((_80.Load(52) >> uint(2)) + ix) * 4 + 12); - } - else - { - return 2147483648u; - } -} - -float4 load_path_bbox(uint path_ix) -{ - uint base = (_80.Load(44) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_96.Load(base * 4 + 12)) - 32768.0f; - float bbox_t = float(_96.Load((base + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_96.Load((base + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_96.Load((base + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -uint search_link(inout Bic bic) -{ - uint ix = gl_LocalInvocationID.x; - uint j = 0u; - while (j < 8u) - { - uint base = 512u - (2u << (8u - j)); - if (((ix >> j) & 1u) != 0u) - { - Bic param = sh_bic[(base + (ix >> j)) - 1u]; - Bic param_1 = bic; - Bic test = bic_combine(param, param_1); - if (test.b > 0u) - { - break; - } - bic = test; - ix -= (1u << j); - } - j++; - } - if (ix > 0u) - { - while (j > 0u) - { - j--; - uint base_1 = 512u - (2u << (8u - j)); - Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u]; - Bic param_3 = bic; - Bic test_1 = bic_combine(param_2, param_3); - if (test_1.b == 0u) - { - bic = test_1; - ix -= (1u << j); - } - } - } - if (ix > 0u) - { - return ix - 1u; - } - else - { - return 4294967295u - bic.a; - } -} - -void store_clip_bbox(uint ix, float4 bbox) -{ - uint base = (_80.Load(64) >> uint(2)) + (4u * ix); - _96.Store(base * 4 + 12, asuint(bbox.x)); - _96.Store((base + 1u) * 4 + 12, asuint(bbox.y)); - _96.Store((base + 2u) * 4 + 12, asuint(bbox.z)); - _96.Store((base + 3u) * 4 + 12, asuint(bbox.w)); -} - -void comp_main() -{ - uint th = gl_LocalInvocationID.x; - Bic bic = _394; - if (th < gl_WorkGroupID.x) - { - uint param = th; - bic = load_bic(param); - } - sh_bic[th] = bic; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[th + (1u << i)]; - Bic param_1 = bic; - Bic param_2 = other; - bic = bic_combine(param_1, param_2); - } - GroupMemoryBarrierWithGroupSync(); - sh_bic[th] = bic; - } - GroupMemoryBarrierWithGroupSync(); - uint stack_size = sh_bic[0].b; - uint sp = 255u - th; - uint ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = ix + (128u >> i_1); - if (sp < sh_bic[probe].b) - { - ix = probe; - } - } - uint b = sh_bic[ix].b; - float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - if (sp < b) - { - uint param_3 = (((ix * 256u) + b) - sp) - 1u; - ClipEl el = load_clip_el(param_3); - sh_stack[th] = el.parent_ix; - bbox = el.bbox; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - sh_stack_bbox[th] = bbox; - GroupMemoryBarrierWithGroupSync(); - if (th >= (1u << i_2)) - { - float4 param_4 = sh_stack_bbox[th - (1u << i_2)]; - float4 param_5 = bbox; - bbox = bbox_intersect(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - } - sh_stack_bbox[th] = bbox; - uint param_6 = gl_GlobalInvocationID.x; - uint inp = load_path_ix(param_6); - bool is_push = int(inp) >= 0; - Bic _560 = { 1u - uint(is_push), uint(is_push) }; - bic = _560; - sh_bic[th] = bic; - if (is_push) - { - uint param_7 = inp; - bbox = load_path_bbox(param_7); - } - else - { - bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - } - uint inbase = 0u; - for (uint i_3 = 0u; i_3 < 7u; i_3++) - { - uint outbase = 512u - (1u << (8u - i_3)); - GroupMemoryBarrierWithGroupSync(); - if (th < (1u << (7u - i_3))) - { - Bic param_8 = sh_bic[inbase + (th * 2u)]; - Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u]; - sh_bic[outbase + th] = bic_combine(param_8, param_9); - } - inbase = outbase; - } - GroupMemoryBarrierWithGroupSync(); - bic = _394; - Bic param_10 = bic; - uint _619 = search_link(param_10); - bic = param_10; - uint link = _619; - sh_link[th] = link; - GroupMemoryBarrierWithGroupSync(); - uint grandparent; - if (int(link) >= 0) - { - grandparent = sh_link[link]; - } - else - { - grandparent = link - 1u; - } - uint parent; - if (int(link) >= 0) - { - parent = (gl_WorkGroupID.x * 256u) + link; - } - else - { - if (int(link + stack_size) >= 0) - { - parent = sh_stack[256u + link]; - } - else - { - parent = 4294967295u; - } - } - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - if (i_4 != 0u) - { - sh_link[th] = link; - } - sh_bbox[th] = bbox; - GroupMemoryBarrierWithGroupSync(); - if (int(link) >= 0) - { - float4 param_11 = sh_bbox[link]; - float4 param_12 = bbox; - bbox = bbox_intersect(param_11, param_12); - link = sh_link[link]; - } - GroupMemoryBarrierWithGroupSync(); - } - if (int(link + stack_size) >= 0) - { - float4 param_13 = sh_stack_bbox[256u + link]; - float4 param_14 = bbox; - bbox = bbox_intersect(param_13, param_14); - } - sh_bbox[th] = bbox; - GroupMemoryBarrierWithGroupSync(); - uint path_ix = inp; - bool _718 = !is_push; - bool _726; - if (_718) - { - _726 = gl_GlobalInvocationID.x < _80.Load(84); - } - else - { - _726 = _718; - } - if (_726) - { - uint param_15 = parent; - path_ix = load_path_ix(param_15); - uint drawmonoid_out_base = (_80.Load(48) >> uint(2)) + (4u * (~inp)); - _96.Store(drawmonoid_out_base * 4 + 12, path_ix); - if (int(grandparent) >= 0) - { - bbox = sh_bbox[grandparent]; - } - else - { - if (int(grandparent + stack_size) >= 0) - { - bbox = sh_stack_bbox[256u + grandparent]; - } - else - { - bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - } - } - } - uint param_16 = gl_GlobalInvocationID.x; - float4 param_17 = bbox; - store_clip_bbox(param_16, param_17); -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/clip_leaf.msl b/piet-gpu/shader/gen/clip_leaf.msl deleted file mode 100644 index c9456e8..0000000 --- a/piet-gpu/shader/gen/clip_leaf.msl +++ /dev/null @@ -1,372 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix); - return Bic{ v_96.memory[base], v_96.memory[base + 1u] }; -} - -static inline __attribute__((always_inline)) -Bic bic_combine(thread const Bic& x, thread const Bic& y) -{ - uint m = min(x.b, y.a); - return Bic{ (x.a + y.a) - m, (x.b + y.b) - m }; -} - -static inline __attribute__((always_inline)) -ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix); - uint parent_ix = v_96.memory[base]; - float x0 = as_type(v_96.memory[base + 1u]); - float y0 = as_type(v_96.memory[base + 2u]); - float x1 = as_type(v_96.memory[base + 3u]); - float y1 = as_type(v_96.memory[base + 4u]); - float4 bbox = float4(x0, y0, x1, y1); - return ClipEl{ parent_ix, bbox }; -} - -static inline __attribute__((always_inline)) -float4 bbox_intersect(thread const float4& a, thread const float4& b) -{ - return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw)); -} - -static inline __attribute__((always_inline)) -uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - if (ix < v_80.conf.n_clip) - { - return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix]; - } - else - { - return 2147483648u; - } -} - -static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_96.memory[base]) - 32768.0; - float bbox_t = float(v_96.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_96.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_96.memory[base + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -static inline __attribute__((always_inline)) -uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510]) -{ - uint ix = gl_LocalInvocationID.x; - uint j = 0u; - while (j < 8u) - { - uint base = 512u - (2u << (8u - j)); - if (((ix >> j) & 1u) != 0u) - { - Bic param = sh_bic[(base + (ix >> j)) - 1u]; - Bic param_1 = bic; - Bic test = bic_combine(param, param_1); - if (test.b > 0u) - { - break; - } - bic = test; - ix -= (1u << j); - } - j++; - } - if (ix > 0u) - { - while (j > 0u) - { - j--; - uint base_1 = 512u - (2u << (8u - j)); - Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u]; - Bic param_3 = bic; - Bic test_1 = bic_combine(param_2, param_3); - if (test_1.b == 0u) - { - bic = test_1; - ix -= (1u << j); - } - } - } - if (ix > 0u) - { - return ix - 1u; - } - else - { - return 4294967295u - bic.a; - } -} - -static inline __attribute__((always_inline)) -void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix); - v_96.memory[base] = as_type(bbox.x); - v_96.memory[base + 1u] = as_type(bbox.y); - v_96.memory[base + 2u] = as_type(bbox.z); - v_96.memory[base + 3u] = as_type(bbox.w); -} - -kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - threadgroup Bic sh_bic[510]; - threadgroup uint sh_stack[256]; - threadgroup float4 sh_stack_bbox[256]; - threadgroup uint sh_link[256]; - threadgroup float4 sh_bbox[256]; - uint th = gl_LocalInvocationID.x; - Bic bic = Bic{ 0u, 0u }; - if (th < gl_WorkGroupID.x) - { - uint param = th; - bic = load_bic(param, v_80, v_96); - } - sh_bic[th] = bic; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[th + (1u << i)]; - Bic param_1 = bic; - Bic param_2 = other; - bic = bic_combine(param_1, param_2); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_bic[th] = bic; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint stack_size = sh_bic[0].b; - uint sp = 255u - th; - uint ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = ix + (128u >> i_1); - if (sp < sh_bic[probe].b) - { - ix = probe; - } - } - uint b = sh_bic[ix].b; - float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - if (sp < b) - { - uint param_3 = (((ix * 256u) + b) - sp) - 1u; - ClipEl el = load_clip_el(param_3, v_80, v_96); - sh_stack[th] = el.parent_ix; - bbox = el.bbox; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - sh_stack_bbox[th] = bbox; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th >= (1u << i_2)) - { - float4 param_4 = sh_stack_bbox[th - (1u << i_2)]; - float4 param_5 = bbox; - bbox = bbox_intersect(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - } - sh_stack_bbox[th] = bbox; - uint param_6 = gl_GlobalInvocationID.x; - uint inp = load_path_ix(param_6, v_80, v_96); - bool is_push = int(inp) >= 0; - bic = Bic{ 1u - uint(is_push), uint(is_push) }; - sh_bic[th] = bic; - if (is_push) - { - uint param_7 = inp; - bbox = load_path_bbox(param_7, v_80, v_96); - } - else - { - bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - } - uint inbase = 0u; - for (uint i_3 = 0u; i_3 < 7u; i_3++) - { - uint outbase = 512u - (1u << (8u - i_3)); - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th < (1u << (7u - i_3))) - { - Bic param_8 = sh_bic[inbase + (th * 2u)]; - Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u]; - sh_bic[outbase + th] = bic_combine(param_8, param_9); - } - inbase = outbase; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - bic = Bic{ 0u, 0u }; - Bic param_10 = bic; - uint _619 = search_link(param_10, gl_LocalInvocationID, sh_bic); - bic = param_10; - uint link = _619; - sh_link[th] = link; - threadgroup_barrier(mem_flags::mem_threadgroup); - uint grandparent; - if (int(link) >= 0) - { - grandparent = sh_link[link]; - } - else - { - grandparent = link - 1u; - } - uint parent; - if (int(link) >= 0) - { - parent = (gl_WorkGroupID.x * 256u) + link; - } - else - { - if (int(link + stack_size) >= 0) - { - parent = sh_stack[256u + link]; - } - else - { - parent = 4294967295u; - } - } - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - if (i_4 != 0u) - { - sh_link[th] = link; - } - sh_bbox[th] = bbox; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (int(link) >= 0) - { - float4 param_11 = sh_bbox[link]; - float4 param_12 = bbox; - bbox = bbox_intersect(param_11, param_12); - link = sh_link[link]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - } - if (int(link + stack_size) >= 0) - { - float4 param_13 = sh_stack_bbox[256u + link]; - float4 param_14 = bbox; - bbox = bbox_intersect(param_13, param_14); - } - sh_bbox[th] = bbox; - threadgroup_barrier(mem_flags::mem_threadgroup); - uint path_ix = inp; - bool _718 = !is_push; - bool _726; - if (_718) - { - _726 = gl_GlobalInvocationID.x < v_80.conf.n_clip; - } - else - { - _726 = _718; - } - if (_726) - { - uint param_15 = parent; - path_ix = load_path_ix(param_15, v_80, v_96); - uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp)); - v_96.memory[drawmonoid_out_base] = path_ix; - if (int(grandparent) >= 0) - { - bbox = sh_bbox[grandparent]; - } - else - { - if (int(grandparent + stack_size) >= 0) - { - bbox = sh_stack_bbox[256u + grandparent]; - } - else - { - bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - } - } - } - uint param_16 = gl_GlobalInvocationID.x; - float4 param_17 = bbox; - store_clip_bbox(param_16, param_17, v_80, v_96); -} - diff --git a/piet-gpu/shader/gen/clip_leaf.spv b/piet-gpu/shader/gen/clip_leaf.spv deleted file mode 100644 index fe62632..0000000 Binary files a/piet-gpu/shader/gen/clip_leaf.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil deleted file mode 100644 index 13ffb01..0000000 Binary files a/piet-gpu/shader/gen/clip_reduce.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/clip_reduce.hlsl b/piet-gpu/shader/gen/clip_reduce.hlsl deleted file mode 100644 index e031f84..0000000 --- a/piet-gpu/shader/gen/clip_reduce.hlsl +++ /dev/null @@ -1,182 +0,0 @@ -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Bic _268 = { 0u, 0u }; - -ByteAddressBuffer _64 : register(t1, space0); -RWByteAddressBuffer _80 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Bic sh_bic[256]; -groupshared uint sh_parent[256]; -groupshared uint sh_path_ix[256]; -groupshared float4 sh_bbox[256]; - -Bic bic_combine(Bic x, Bic y) -{ - uint m = min(x.b, y.a); - Bic _56 = { (x.a + y.a) - m, (x.b + y.b) - m }; - return _56; -} - -void store_bic(uint ix, Bic bic) -{ - uint base = (_64.Load(56) >> uint(2)) + (2u * ix); - _80.Store(base * 4 + 12, bic.a); - _80.Store((base + 1u) * 4 + 12, bic.b); -} - -float4 load_path_bbox(uint path_ix) -{ - uint base = (_64.Load(44) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_80.Load(base * 4 + 12)) - 32768.0f; - float bbox_t = float(_80.Load((base + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_80.Load((base + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_80.Load((base + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -void store_clip_el(uint ix, ClipEl el) -{ - uint base = (_64.Load(60) >> uint(2)) + (5u * ix); - _80.Store(base * 4 + 12, el.parent_ix); - _80.Store((base + 1u) * 4 + 12, asuint(el.bbox.x)); - _80.Store((base + 2u) * 4 + 12, asuint(el.bbox.y)); - _80.Store((base + 3u) * 4 + 12, asuint(el.bbox.z)); - _80.Store((base + 4u) * 4 + 12, asuint(el.bbox.w)); -} - -void comp_main() -{ - uint th = gl_LocalInvocationID.x; - uint inp = _80.Load(((_64.Load(52) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 12); - bool is_push = int(inp) >= 0; - Bic _208 = { 1u - uint(is_push), uint(is_push) }; - Bic bic = _208; - sh_bic[gl_LocalInvocationID.x] = bic; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; - Bic param = bic; - Bic param_1 = other; - bic = bic_combine(param, param_1); - } - GroupMemoryBarrierWithGroupSync(); - sh_bic[th] = bic; - } - if (th == 0u) - { - uint param_2 = gl_WorkGroupID.x; - Bic param_3 = bic; - store_bic(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - uint size = sh_bic[0].b; - bic = _268; - if ((th + 1u) < 256u) - { - bic = sh_bic[th + 1u]; - } - bool _284; - if (is_push) - { - _284 = bic.a == 0u; - } - else - { - _284 = is_push; - } - if (_284) - { - uint local_ix = (size - bic.b) - 1u; - sh_parent[local_ix] = th; - sh_path_ix[local_ix] = inp; - } - GroupMemoryBarrierWithGroupSync(); - float4 bbox; - if (th < size) - { - uint path_ix = sh_path_ix[th]; - uint param_4 = path_ix; - bbox = load_path_bbox(param_4); - } - if (th < size) - { - uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); - ClipEl _332 = { parent_ix, bbox }; - ClipEl el = _332; - uint param_5 = gl_GlobalInvocationID.x; - ClipEl param_6 = el; - store_clip_el(param_5, param_6); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/clip_reduce.msl b/piet-gpu/shader/gen/clip_reduce.msl deleted file mode 100644 index dd34e64..0000000 --- a/piet-gpu/shader/gen/clip_reduce.msl +++ /dev/null @@ -1,179 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Bic bic_combine(thread const Bic& x, thread const Bic& y) -{ - uint m = min(x.b, y.a); - return Bic{ (x.a + y.a) - m, (x.b + y.b) - m }; -} - -static inline __attribute__((always_inline)) -void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80) -{ - uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix); - v_80.memory[base] = bic.a; - v_80.memory[base + 1u] = bic.b; -} - -static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80) -{ - uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_80.memory[base]) - 32768.0; - float bbox_t = float(v_80.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_80.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_80.memory[base + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -static inline __attribute__((always_inline)) -void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80) -{ - uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix); - v_80.memory[base] = el.parent_ix; - v_80.memory[base + 1u] = as_type(el.bbox.x); - v_80.memory[base + 2u] = as_type(el.bbox.y); - v_80.memory[base + 3u] = as_type(el.bbox.z); - v_80.memory[base + 4u] = as_type(el.bbox.w); -} - -kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Bic sh_bic[256]; - threadgroup uint sh_parent[256]; - threadgroup uint sh_path_ix[256]; - threadgroup float4 sh_bbox[256]; - uint th = gl_LocalInvocationID.x; - uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x]; - bool is_push = int(inp) >= 0; - Bic bic = Bic{ 1u - uint(is_push), uint(is_push) }; - sh_bic[gl_LocalInvocationID.x] = bic; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; - Bic param = bic; - Bic param_1 = other; - bic = bic_combine(param, param_1); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_bic[th] = bic; - } - if (th == 0u) - { - uint param_2 = gl_WorkGroupID.x; - Bic param_3 = bic; - store_bic(param_2, param_3, v_64, v_80); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint size = sh_bic[0].b; - bic = Bic{ 0u, 0u }; - if ((th + 1u) < 256u) - { - bic = sh_bic[th + 1u]; - } - bool _284; - if (is_push) - { - _284 = bic.a == 0u; - } - else - { - _284 = is_push; - } - if (_284) - { - uint local_ix = (size - bic.b) - 1u; - sh_parent[local_ix] = th; - sh_path_ix[local_ix] = inp; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - float4 bbox; - if (th < size) - { - uint path_ix = sh_path_ix[th]; - uint param_4 = path_ix; - bbox = load_path_bbox(param_4, v_64, v_80); - } - if (th < size) - { - uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); - ClipEl el = ClipEl{ parent_ix, bbox }; - uint param_5 = gl_GlobalInvocationID.x; - ClipEl param_6 = el; - store_clip_el(param_5, param_6, v_64, v_80); - } -} - diff --git a/piet-gpu/shader/gen/clip_reduce.spv b/piet-gpu/shader/gen/clip_reduce.spv deleted file mode 100644 index 40121e7..0000000 Binary files a/piet-gpu/shader/gen/clip_reduce.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil deleted file mode 100644 index 58e2da8..0000000 Binary files a/piet-gpu/shader/gen/coarse.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl deleted file mode 100644 index 673e879..0000000 --- a/piet-gpu/shader/gen/coarse.hlsl +++ /dev/null @@ -1,1246 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct BinInstanceRef -{ - uint offset; -}; - -struct BinInstance -{ - uint element_ix; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct Tile -{ - TileSegRef tile; - int backdrop; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _267 : register(u0, space0); -ByteAddressBuffer _891 : register(t1, space0); -ByteAddressBuffer _1390 : register(t2, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -static bool mem_ok; -groupshared uint sh_bitmaps[8][256]; -groupshared Alloc sh_part_elements[256]; -groupshared uint sh_part_count[256]; -groupshared uint sh_elements[256]; -groupshared uint sh_tile_stride[256]; -groupshared uint sh_tile_width[256]; -groupshared uint sh_tile_x0[256]; -groupshared uint sh_tile_y0[256]; -groupshared uint sh_tile_base[256]; -groupshared uint sh_tile_count[256]; - -bool check_deps(uint dep_stage) -{ - uint _273; - _267.InterlockedOr(4, 0u, _273); - return (_273 & dep_stage) == 0u; -} - -Alloc slice_mem(Alloc a, uint offset, uint size) -{ - Alloc _331 = { a.offset + offset }; - return _331; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _267.Load(offset * 4 + 12); - return v; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok_1) -{ - Alloc a; - a.offset = offset; - return a; -} - -BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) -{ - BinInstanceRef _340 = { ref.offset + (index * 4u) }; - return _340; -} - -BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - BinInstance s; - s.element_ix = raw0; - return s; -} - -Path Path_read(Alloc a, PathRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _404 = { raw2 }; - s.tiles = _404; - return s; -} - -void write_tile_alloc(uint el_ix, Alloc a) -{ -} - -Alloc read_tile_alloc(uint el_ix, bool mem_ok_1) -{ - uint param = 0u; - uint param_1 = _891.Load(0); - bool param_2 = mem_ok_1; - return new_alloc(param, param_1, param_2); -} - -Tile Tile_read(Alloc a, TileRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - TileSegRef _429 = { raw0 }; - Tile s; - s.tile = _429; - s.backdrop = int(raw1); - return s; -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _282; - _267.InterlockedAdd(0, size, _282); - uint offset = _282; - if ((offset + size) > mem_size) - { - uint _292; - _267.InterlockedOr(4, stage, _292); - offset = 0u; - } - return offset; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _267.Store(offset * 4 + 12, val); -} - -void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.new_ref; - write_mem(param, param_1, param_2); -} - -void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 11u; - write_mem(param, param_1, param_2); - CmdJumpRef _880 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdJumpRef param_4 = _880; - CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5); -} - -void alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) -{ - if (cmd_ref.offset < cmd_limit) - { - return; - } - uint param = 1024u; - uint param_1 = _891.Load(0); - uint param_2 = 8u; - uint _915 = malloc_stage(param, param_1, param_2); - uint new_cmd = _915; - if (new_cmd == 0u) - { - mem_ok = false; - } - if (mem_ok) - { - CmdJump _926 = { new_cmd }; - CmdJump jump = _926; - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - CmdJump param_5 = jump; - Cmd_Jump_write(param_3, param_4, param_5); - } - uint param_6 = new_cmd; - uint param_7 = 1024u; - bool param_8 = true; - cmd_alloc = new_alloc(param_6, param_7, param_8); - CmdRef _940 = { new_cmd }; - cmd_ref = _940; - cmd_limit = (new_cmd + 1024u) - 144u; -} - -void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5); -} - -void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 1u; - write_mem(param, param_1, param_2); - CmdFillRef _737 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdFillRef param_4 = _737; - CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5); -} - -void Cmd_Solid_write(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 3u; - write_mem(param, param_1, param_2); -} - -void CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.half_width); - write_mem(param_3, param_4, param_5); -} - -void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 2u; - write_mem(param, param_1, param_2); - CmdStrokeRef _755 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdStrokeRef param_4 = _755; - CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5); -} - -void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) -{ - if (linewidth < 0.0f) - { - if (tile.tile.offset != 0u) - { - CmdFill _960 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _960; - if (mem_ok) - { - Alloc param = alloc; - CmdRef param_1 = cmd_ref; - CmdFill param_2 = cmd_fill; - Cmd_Fill_write(param, param_1, param_2); - } - cmd_ref.offset += 12u; - } - else - { - if (mem_ok) - { - Alloc param_3 = alloc; - CmdRef param_4 = cmd_ref; - Cmd_Solid_write(param_3, param_4); - } - cmd_ref.offset += 4u; - } - } - else - { - CmdStroke _996 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _996; - if (mem_ok) - { - Alloc param_5 = alloc; - CmdRef param_6 = cmd_ref; - CmdStroke param_7 = cmd_stroke; - Cmd_Stroke_write(param_5, param_6, param_7); - } - cmd_ref.offset += 12u; - } -} - -void CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2); -} - -void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; - write_mem(param, param_1, param_2); - CmdColorRef _781 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdColorRef param_4 = _781; - CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5); -} - -void CmdLinGrad_write(Alloc a, CmdLinGradRef ref, CmdLinGrad s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.line_x); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.line_y); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.line_c); - write_mem(param_9, param_10, param_11); -} - -void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 6u; - write_mem(param, param_1, param_2); - CmdLinGradRef _799 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdLinGradRef param_4 = _799; - CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5); -} - -void CmdRadGrad_write(Alloc a, CmdRadGradRef ref, CmdRadGrad s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.mat.x); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.mat.y); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.mat.z); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.mat.w); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = asuint(s.xlat.x); - write_mem(param_15, param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = asuint(s.xlat.y); - write_mem(param_18, param_19, param_20); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = asuint(s.c1.x); - write_mem(param_21, param_22, param_23); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = asuint(s.c1.y); - write_mem(param_24, param_25, param_26); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = asuint(s.ra); - write_mem(param_27, param_28, param_29); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = asuint(s.roff); - write_mem(param_30, param_31, param_32); -} - -void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 7u; - write_mem(param, param_1, param_2); - CmdRadGradRef _817 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdRadGradRef param_4 = _817; - CmdRadGrad param_5 = s; - CmdRadGrad_write(param_3, param_4, param_5); -} - -void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5); -} - -void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 8u; - write_mem(param, param_1, param_2); - CmdImageRef _835 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdImageRef param_4 = _835; - CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5); -} - -void Cmd_BeginClip_write(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; - write_mem(param, param_1, param_2); -} - -void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.blend; - write_mem(param, param_1, param_2); -} - -void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; - write_mem(param, param_1, param_2); - CmdEndClipRef _861 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdEndClipRef param_4 = _861; - CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5); -} - -void Cmd_End_write(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 0u; - write_mem(param, param_1, param_2); -} - -void comp_main() -{ - mem_ok = true; - uint param = 7u; - bool _1012 = check_deps(param); - if (!_1012) - { - return; - } - uint width_in_bins = ((_891.Load(12) + 16u) - 1u) / 16u; - uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; - uint partition_ix = 0u; - uint n_partitions = ((_891.Load(4) + 256u) - 1u) / 256u; - uint th_ix = gl_LocalInvocationID.x; - uint bin_tile_x = 16u * gl_WorkGroupID.x; - uint bin_tile_y = 16u * gl_WorkGroupID.y; - uint tile_x = gl_LocalInvocationID.x % 16u; - uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _891.Load(12)) + bin_tile_x) + tile_x; - Alloc _1082; - _1082.offset = _891.Load(28); - Alloc param_1; - param_1.offset = _1082.offset; - uint param_2 = this_tile_ix * 1024u; - uint param_3 = 1024u; - Alloc cmd_alloc = slice_mem(param_1, param_2, param_3); - CmdRef _1091 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1091; - uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; - uint clip_depth = 0u; - uint clip_zero_depth = 0u; - uint rd_ix = 0u; - uint wr_ix = 0u; - uint part_start_ix = 0u; - uint ready_ix = 0u; - Alloc param_4 = cmd_alloc; - uint param_5 = 0u; - uint param_6 = 8u; - Alloc scratch_alloc = slice_mem(param_4, param_5, param_6); - cmd_ref.offset += 4u; - uint render_blend_depth = 0u; - uint max_blend_depth = 0u; - uint drawmonoid_start = _891.Load(48) >> uint(2); - uint drawtag_start = _891.Load(104) >> uint(2); - uint drawdata_start = _891.Load(108) >> uint(2); - uint drawinfo_start = _891.Load(72) >> uint(2); - Alloc param_7; - Alloc param_9; - uint _1322; - uint element_ix; - Alloc param_18; - uint tile_count; - uint _1622; - float linewidth; - CmdLinGrad cmd_lin; - CmdRadGrad cmd_rad; - while (true) - { - for (uint i = 0u; i < 8u; i++) - { - sh_bitmaps[i][th_ix] = 0u; - } - bool _1374; - for (;;) - { - if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) - { - part_start_ix = ready_ix; - uint count = 0u; - bool _1174 = th_ix < 256u; - bool _1182; - if (_1174) - { - _1182 = (partition_ix + th_ix) < n_partitions; - } - else - { - _1182 = _1174; - } - if (_1182) - { - uint in_ix = (_891.Load(24) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1200; - _1200.offset = _891.Load(24); - param_7.offset = _1200.offset; - uint param_8 = in_ix; - count = read_mem(param_7, param_8); - Alloc _1211; - _1211.offset = _891.Load(24); - param_9.offset = _1211.offset; - uint param_10 = in_ix + 1u; - uint offset = read_mem(param_9, param_10); - uint param_11 = offset; - uint param_12 = count * 4u; - bool param_13 = true; - sh_part_elements[th_ix] = new_alloc(param_11, param_12, param_13); - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - if (th_ix < 256u) - { - sh_part_count[th_ix] = count; - } - GroupMemoryBarrierWithGroupSync(); - if (th_ix < 256u) - { - if (th_ix >= (1u << i_1)) - { - count += sh_part_count[th_ix - (1u << i_1)]; - } - } - GroupMemoryBarrierWithGroupSync(); - } - if (th_ix < 256u) - { - sh_part_count[th_ix] = part_start_ix + count; - } - GroupMemoryBarrierWithGroupSync(); - ready_ix = sh_part_count[255]; - partition_ix += 256u; - } - uint ix = rd_ix + th_ix; - if ((ix >= wr_ix) && (ix < ready_ix)) - { - uint part_ix = 0u; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - uint probe = part_ix + (128u >> i_2); - if (ix >= sh_part_count[probe - 1u]) - { - part_ix = probe; - } - } - if (part_ix > 0u) - { - _1322 = sh_part_count[part_ix - 1u]; - } - else - { - _1322 = part_start_ix; - } - ix -= _1322; - Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1341 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1341; - BinInstanceRef param_14 = inst_ref; - uint param_15 = ix; - Alloc param_16 = bin_alloc; - BinInstanceRef param_17 = BinInstance_index(param_14, param_15); - BinInstance inst = BinInstance_read(param_16, param_17); - sh_elements[th_ix] = inst.element_ix; - } - GroupMemoryBarrierWithGroupSync(); - wr_ix = min((rd_ix + 256u), ready_ix); - bool _1364 = (wr_ix - rd_ix) < 256u; - if (_1364) - { - _1374 = (wr_ix < ready_ix) || (partition_ix < n_partitions); - } - else - { - _1374 = _1364; - } - if (_1374) - { - continue; - } - else - { - break; - } - } - uint tag = 0u; - if ((th_ix + rd_ix) < wr_ix) - { - element_ix = sh_elements[th_ix]; - tag = _1390.Load((drawtag_start + element_ix) * 4 + 0); - } - switch (tag) - { - case 68u: - case 72u: - case 276u: - case 732u: - case 5u: - case 37u: - { - uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = _267.Load(drawmonoid_base * 4 + 12); - PathRef _1415 = { _891.Load(20) + (path_ix * 12u) }; - Alloc _1418; - _1418.offset = _891.Load(20); - param_18.offset = _1418.offset; - PathRef param_19 = _1415; - Path path = Path_read(param_18, param_19); - uint stride = path.bbox.z - path.bbox.x; - sh_tile_stride[th_ix] = stride; - int dx = int(path.bbox.x) - int(bin_tile_x); - int dy = int(path.bbox.y) - int(bin_tile_y); - int x0 = clamp(dx, 0, 16); - int y0 = clamp(dy, 0, 16); - int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16); - int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16); - sh_tile_width[th_ix] = uint(x1 - x0); - sh_tile_x0[th_ix] = uint(x0); - sh_tile_y0[th_ix] = uint(y0); - tile_count = uint(x1 - x0) * uint(y1 - y0); - uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); - sh_tile_base[th_ix] = base; - uint param_20 = path.tiles.offset; - uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_22 = true; - Alloc path_alloc = new_alloc(param_20, param_21, param_22); - uint param_23 = th_ix; - Alloc param_24 = path_alloc; - write_tile_alloc(param_23, param_24); - break; - } - default: - { - tile_count = 0u; - break; - } - } - sh_tile_count[th_ix] = tile_count; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - GroupMemoryBarrierWithGroupSync(); - if (th_ix >= (1u << i_3)) - { - tile_count += sh_tile_count[th_ix - (1u << i_3)]; - } - GroupMemoryBarrierWithGroupSync(); - sh_tile_count[th_ix] = tile_count; - } - GroupMemoryBarrierWithGroupSync(); - uint total_tile_count = sh_tile_count[255]; - for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u) - { - uint el_ix = 0u; - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - uint probe_1 = el_ix + (128u >> i_4); - if (ix_1 >= sh_tile_count[probe_1 - 1u]) - { - el_ix = probe_1; - } - } - uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1390.Load((drawtag_start + element_ix_1) * 4 + 0); - if (el_ix > 0u) - { - _1622 = sh_tile_count[el_ix - 1u]; - } - else - { - _1622 = 0u; - } - uint seq_ix = ix_1 - _1622; - uint width = sh_tile_width[el_ix]; - uint x = sh_tile_x0[el_ix] + (seq_ix % width); - uint y = sh_tile_y0[el_ix] + (seq_ix / width); - bool include_tile = false; - uint param_25 = el_ix; - bool param_26 = true; - TileRef _1670 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1670; - Tile tile = Tile_read(param_27, param_28); - bool is_clip = (tag_1 & 1u) != 0u; - bool is_blend = false; - if (is_clip) - { - uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = _267.Load((drawmonoid_base_1 + 2u) * 4 + 12); - uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1390.Load(dd * 4 + 0); - is_blend = blend != 32771u; - } - bool _1706 = tile.tile.offset != 0u; - bool _1715; - if (!_1706) - { - _1715 = (tile.backdrop == 0) == is_clip; - } - else - { - _1715 = _1706; - } - include_tile = _1715 || is_blend; - if (include_tile) - { - uint el_slice = el_ix / 32u; - uint el_mask = 1u << (el_ix & 31u); - uint _1737; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1737); - } - } - GroupMemoryBarrierWithGroupSync(); - uint slice_ix = 0u; - uint bitmap = sh_bitmaps[0][th_ix]; - while (true) - { - if (bitmap == 0u) - { - slice_ix++; - if (slice_ix == 8u) - { - break; - } - bitmap = sh_bitmaps[slice_ix][th_ix]; - if (bitmap == 0u) - { - continue; - } - } - uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); - uint element_ix_2 = sh_elements[element_ref_ix]; - bitmap &= (bitmap - 1u); - uint drawtag = _1390.Load((drawtag_start + element_ix_2) * 4 + 0); - if (clip_zero_depth == 0u) - { - uint param_29 = element_ref_ix; - bool param_30 = true; - TileRef _1812 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_31 = read_tile_alloc(param_29, param_30); - TileRef param_32 = _1812; - Tile tile_1 = Tile_read(param_31, param_32); - uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = _267.Load((drawmonoid_base_2 + 2u) * 4 + 12); - uint info_offset = _267.Load((drawmonoid_base_2 + 3u) * 4 + 12); - uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); - uint di = drawinfo_start + (info_offset >> uint(2)); - switch (drawtag) - { - case 68u: - { - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_33 = cmd_alloc; - CmdRef param_34 = cmd_ref; - uint param_35 = cmd_limit; - alloc_cmd(param_33, param_34, param_35); - cmd_alloc = param_33; - cmd_ref = param_34; - cmd_limit = param_35; - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - Tile param_38 = tile_1; - float param_39 = linewidth; - write_fill(param_36, param_37, param_38, param_39); - cmd_ref = param_37; - uint rgba = _1390.Load(dd_1 * 4 + 0); - if (mem_ok) - { - CmdColor _1882 = { rgba }; - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdColor param_42 = _1882; - Cmd_Color_write(param_40, param_41, param_42); - } - cmd_ref.offset += 8u; - break; - } - case 276u: - { - Alloc param_43 = cmd_alloc; - CmdRef param_44 = cmd_ref; - uint param_45 = cmd_limit; - alloc_cmd(param_43, param_44, param_45); - cmd_alloc = param_43; - cmd_ref = param_44; - cmd_limit = param_45; - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - Tile param_48 = tile_1; - float param_49 = linewidth; - write_fill(param_46, param_47, param_48, param_49); - cmd_ref = param_47; - cmd_lin.index = _1390.Load(dd_1 * 4 + 0); - cmd_lin.line_x = asfloat(_267.Load((di + 1u) * 4 + 12)); - cmd_lin.line_y = asfloat(_267.Load((di + 2u) * 4 + 12)); - cmd_lin.line_c = asfloat(_267.Load((di + 3u) * 4 + 12)); - if (mem_ok) - { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdLinGrad param_52 = cmd_lin; - Cmd_LinGrad_write(param_50, param_51, param_52); - } - cmd_ref.offset += 20u; - break; - } - case 732u: - { - Alloc param_53 = cmd_alloc; - CmdRef param_54 = cmd_ref; - uint param_55 = cmd_limit; - alloc_cmd(param_53, param_54, param_55); - cmd_alloc = param_53; - cmd_ref = param_54; - cmd_limit = param_55; - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_56 = cmd_alloc; - CmdRef param_57 = cmd_ref; - Tile param_58 = tile_1; - float param_59 = linewidth; - write_fill(param_56, param_57, param_58, param_59); - cmd_ref = param_57; - cmd_rad.index = _1390.Load(dd_1 * 4 + 0); - cmd_rad.mat = asfloat(uint4(_267.Load((di + 1u) * 4 + 12), _267.Load((di + 2u) * 4 + 12), _267.Load((di + 3u) * 4 + 12), _267.Load((di + 4u) * 4 + 12))); - cmd_rad.xlat = asfloat(uint2(_267.Load((di + 5u) * 4 + 12), _267.Load((di + 6u) * 4 + 12))); - cmd_rad.c1 = asfloat(uint2(_267.Load((di + 7u) * 4 + 12), _267.Load((di + 8u) * 4 + 12))); - cmd_rad.ra = asfloat(_267.Load((di + 9u) * 4 + 12)); - cmd_rad.roff = asfloat(_267.Load((di + 10u) * 4 + 12)); - if (mem_ok) - { - Alloc param_60 = cmd_alloc; - CmdRef param_61 = cmd_ref; - CmdRadGrad param_62 = cmd_rad; - Cmd_RadGrad_write(param_60, param_61, param_62); - } - cmd_ref.offset += 48u; - break; - } - case 72u: - { - Alloc param_63 = cmd_alloc; - CmdRef param_64 = cmd_ref; - uint param_65 = cmd_limit; - alloc_cmd(param_63, param_64, param_65); - cmd_alloc = param_63; - cmd_ref = param_64; - cmd_limit = param_65; - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_66 = cmd_alloc; - CmdRef param_67 = cmd_ref; - Tile param_68 = tile_1; - float param_69 = linewidth; - write_fill(param_66, param_67, param_68, param_69); - cmd_ref = param_67; - uint index = _1390.Load(dd_1 * 4 + 0); - uint raw1 = _1390.Load((dd_1 + 1u) * 4 + 0); - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - if (mem_ok) - { - CmdImage _2106 = { index, offset_1 }; - Alloc param_70 = cmd_alloc; - CmdRef param_71 = cmd_ref; - CmdImage param_72 = _2106; - Cmd_Image_write(param_70, param_71, param_72); - } - cmd_ref.offset += 12u; - break; - } - case 5u: - { - bool _2120 = tile_1.tile.offset == 0u; - bool _2126; - if (_2120) - { - _2126 = tile_1.backdrop == 0; - } - else - { - _2126 = _2120; - } - if (_2126) - { - clip_zero_depth = clip_depth + 1u; - } - else - { - Alloc param_73 = cmd_alloc; - CmdRef param_74 = cmd_ref; - uint param_75 = cmd_limit; - alloc_cmd(param_73, param_74, param_75); - cmd_alloc = param_73; - cmd_ref = param_74; - cmd_limit = param_75; - if (mem_ok) - { - Alloc param_76 = cmd_alloc; - CmdRef param_77 = cmd_ref; - Cmd_BeginClip_write(param_76, param_77); - } - cmd_ref.offset += 4u; - render_blend_depth++; - max_blend_depth = max(max_blend_depth, render_blend_depth); - } - clip_depth++; - break; - } - case 37u: - { - clip_depth--; - Alloc param_78 = cmd_alloc; - CmdRef param_79 = cmd_ref; - Tile param_80 = tile_1; - float param_81 = -1.0f; - write_fill(param_78, param_79, param_80, param_81); - cmd_ref = param_79; - uint blend_1 = _1390.Load(dd_1 * 4 + 0); - if (mem_ok) - { - CmdEndClip _2182 = { blend_1 }; - Alloc param_82 = cmd_alloc; - CmdRef param_83 = cmd_ref; - CmdEndClip param_84 = _2182; - Cmd_EndClip_write(param_82, param_83, param_84); - } - cmd_ref.offset += 8u; - render_blend_depth--; - break; - } - } - } - else - { - switch (drawtag) - { - case 5u: - { - clip_depth++; - break; - } - case 37u: - { - if (clip_depth == clip_zero_depth) - { - clip_zero_depth = 0u; - } - clip_depth--; - break; - } - } - } - } - GroupMemoryBarrierWithGroupSync(); - rd_ix += 256u; - if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions)) - { - break; - } - } - bool _2231 = (bin_tile_x + tile_x) < _891.Load(12); - bool _2240; - if (_2231) - { - _2240 = (bin_tile_y + tile_y) < _891.Load(16); - } - else - { - _2240 = _2231; - } - if (_2240) - { - if (mem_ok) - { - Alloc param_85 = cmd_alloc; - CmdRef param_86 = cmd_ref; - Cmd_End_write(param_85, param_86); - } - if (max_blend_depth > 4u) - { - uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u; - uint _2264; - _267.InterlockedAdd(8, scratch_size, _2264); - uint scratch = _2264; - Alloc param_87 = scratch_alloc; - uint param_88 = scratch_alloc.offset >> uint(2); - uint param_89 = scratch; - write_mem(param_87, param_88, param_89); - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl deleted file mode 100644 index 5df99b9..0000000 --- a/piet-gpu/shader/gen/coarse.msl +++ /dev/null @@ -1,1261 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -// Implementation of the GLSL findLSB() function -template -inline T spvFindLSB(T x) -{ - return select(ctz(x), T(-1), x == T(0)); -} - -struct Alloc -{ - uint offset; -}; - -struct BinInstanceRef -{ - uint offset; -}; - -struct BinInstance -{ - uint element_ix; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct Tile -{ - TileSegRef tile; - int backdrop; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_267) -{ - uint _273 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, 0u, memory_order_relaxed); - return (_273 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) -{ - return Alloc{ a.offset + offset }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_267) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_267.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const uint& index) -{ - return BinInstanceRef{ ref.offset + (index * 4u) }; -} - -static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_267); - BinInstance s; - s.element_ix = raw0; - return s; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_267); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_267); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_267); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) -{ -} - -static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, const device ConfigBuf& v_891) -{ - uint param = 0u; - uint param_1 = v_891.conf.mem_size; - bool param_2 = mem_ok; - return new_alloc(param, param_1, param_2); -} - -static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_267); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_267); - Tile s; - s.tile = TileSegRef{ raw0 }; - s.backdrop = int(raw1); - return s; -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_267) -{ - uint _282 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.mem_offset, size, memory_order_relaxed); - uint offset = _282; - if ((offset + size) > mem_size) - { - uint _292 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_267) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_267.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 11u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; - CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, thread bool& mem_ok, device Memory& v_267, const device ConfigBuf& v_891) -{ - if (cmd_ref.offset < cmd_limit) - { - return; - } - uint param = 1024u; - uint param_1 = v_891.conf.mem_size; - uint param_2 = 8u; - uint _915 = malloc_stage(param, param_1, param_2, v_267); - uint new_cmd = _915; - if (new_cmd == 0u) - { - mem_ok = false; - } - if (mem_ok) - { - CmdJump jump = CmdJump{ new_cmd }; - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - CmdJump param_5 = jump; - Cmd_Jump_write(param_3, param_4, param_5, v_267); - } - uint param_6 = new_cmd; - uint param_7 = 1024u; - bool param_8 = true; - cmd_alloc = new_alloc(param_6, param_7, param_8); - cmd_ref = CmdRef{ new_cmd }; - cmd_limit = (new_cmd + 1024u) - 144u; -} - -static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 1u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; - CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 3u; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 2u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; - CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, thread bool& mem_ok, device Memory& v_267) -{ - if (linewidth < 0.0) - { - if (tile.tile.offset != 0u) - { - CmdFill cmd_fill = CmdFill{ tile.tile.offset, tile.backdrop }; - if (mem_ok) - { - Alloc param = alloc; - CmdRef param_1 = cmd_ref; - CmdFill param_2 = cmd_fill; - Cmd_Fill_write(param, param_1, param_2, v_267); - } - cmd_ref.offset += 12u; - } - else - { - if (mem_ok) - { - Alloc param_3 = alloc; - CmdRef param_4 = cmd_ref; - Cmd_Solid_write(param_3, param_4, v_267); - } - cmd_ref.offset += 4u; - } - } - else - { - CmdStroke cmd_stroke = CmdStroke{ tile.tile.offset, 0.5 * linewidth }; - if (mem_ok) - { - Alloc param_5 = alloc; - CmdRef param_6 = cmd_ref; - CmdStroke param_7 = cmd_stroke; - Cmd_Stroke_write(param_5, param_6, param_7, v_267); - } - cmd_ref.offset += 12u; - } -} - -static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; - CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_267); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_267); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 6u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; - CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.mat.x); - write_mem(param_3, param_4, param_5, v_267); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.mat.y); - write_mem(param_6, param_7, param_8, v_267); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.mat.z); - write_mem(param_9, param_10, param_11, v_267); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.mat.w); - write_mem(param_12, param_13, param_14, v_267); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = as_type(s.xlat.x); - write_mem(param_15, param_16, param_17, v_267); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = as_type(s.xlat.y); - write_mem(param_18, param_19, param_20, v_267); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = as_type(s.c1.x); - write_mem(param_21, param_22, param_23, v_267); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = as_type(s.c1.y); - write_mem(param_24, param_25, param_26, v_267); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = as_type(s.ra); - write_mem(param_27, param_28, param_29, v_267); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = as_type(s.roff); - write_mem(param_30, param_31, param_32, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 7u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u }; - CmdRadGrad param_5 = s; - CmdRadGrad_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 8u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; - CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.blend; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; - CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 0u; - write_mem(param, param_1, param_2, v_267); -} - -kernel void main0(device Memory& v_267 [[buffer(0)]], const device ConfigBuf& v_891 [[buffer(1)]], const device SceneBuf& _1390 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_bitmaps[8][256]; - threadgroup Alloc sh_part_elements[256]; - threadgroup uint sh_part_count[256]; - threadgroup uint sh_elements[256]; - threadgroup uint sh_tile_stride[256]; - threadgroup uint sh_tile_width[256]; - threadgroup uint sh_tile_x0[256]; - threadgroup uint sh_tile_y0[256]; - threadgroup uint sh_tile_base[256]; - threadgroup uint sh_tile_count[256]; - bool mem_ok = true; - uint param = 7u; - bool _1012 = check_deps(param, v_267); - if (!_1012) - { - return; - } - uint width_in_bins = ((v_891.conf.width_in_tiles + 16u) - 1u) / 16u; - uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; - uint partition_ix = 0u; - uint n_partitions = ((v_891.conf.n_elements + 256u) - 1u) / 256u; - uint th_ix = gl_LocalInvocationID.x; - uint bin_tile_x = 16u * gl_WorkGroupID.x; - uint bin_tile_y = 16u * gl_WorkGroupID.y; - uint tile_x = gl_LocalInvocationID.x % 16u; - uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * v_891.conf.width_in_tiles) + bin_tile_x) + tile_x; - Alloc param_1; - param_1.offset = v_891.conf.ptcl_alloc.offset; - uint param_2 = this_tile_ix * 1024u; - uint param_3 = 1024u; - Alloc cmd_alloc = slice_mem(param_1, param_2, param_3); - CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; - uint clip_depth = 0u; - uint clip_zero_depth = 0u; - uint rd_ix = 0u; - uint wr_ix = 0u; - uint part_start_ix = 0u; - uint ready_ix = 0u; - Alloc param_4 = cmd_alloc; - uint param_5 = 0u; - uint param_6 = 8u; - Alloc scratch_alloc = slice_mem(param_4, param_5, param_6); - cmd_ref.offset += 4u; - uint render_blend_depth = 0u; - uint max_blend_depth = 0u; - uint drawmonoid_start = v_891.conf.drawmonoid_alloc.offset >> uint(2); - uint drawtag_start = v_891.conf.drawtag_offset >> uint(2); - uint drawdata_start = v_891.conf.drawdata_offset >> uint(2); - uint drawinfo_start = v_891.conf.drawinfo_alloc.offset >> uint(2); - Alloc param_7; - Alloc param_9; - uint _1322; - uint element_ix; - Alloc param_18; - uint tile_count; - uint _1622; - float linewidth; - CmdLinGrad cmd_lin; - CmdRadGrad cmd_rad; - while (true) - { - for (uint i = 0u; i < 8u; i++) - { - sh_bitmaps[i][th_ix] = 0u; - } - bool _1374; - for (;;) - { - if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) - { - part_start_ix = ready_ix; - uint count = 0u; - bool _1174 = th_ix < 256u; - bool _1182; - if (_1174) - { - _1182 = (partition_ix + th_ix) < n_partitions; - } - else - { - _1182 = _1174; - } - if (_1182) - { - uint in_ix = (v_891.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_7.offset = v_891.conf.bin_alloc.offset; - uint param_8 = in_ix; - count = read_mem(param_7, param_8, v_267); - param_9.offset = v_891.conf.bin_alloc.offset; - uint param_10 = in_ix + 1u; - uint offset = read_mem(param_9, param_10, v_267); - uint param_11 = offset; - uint param_12 = count * 4u; - bool param_13 = true; - sh_part_elements[th_ix] = new_alloc(param_11, param_12, param_13); - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - if (th_ix < 256u) - { - sh_part_count[th_ix] = count; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th_ix < 256u) - { - if (th_ix >= (1u << i_1)) - { - count += sh_part_count[th_ix - (1u << i_1)]; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - } - if (th_ix < 256u) - { - sh_part_count[th_ix] = part_start_ix + count; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - ready_ix = sh_part_count[255]; - partition_ix += 256u; - } - uint ix = rd_ix + th_ix; - if ((ix >= wr_ix) && (ix < ready_ix)) - { - uint part_ix = 0u; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - uint probe = part_ix + (128u >> i_2); - if (ix >= sh_part_count[probe - 1u]) - { - part_ix = probe; - } - } - if (part_ix > 0u) - { - _1322 = sh_part_count[part_ix - 1u]; - } - else - { - _1322 = part_start_ix; - } - ix -= _1322; - Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; - BinInstanceRef param_14 = inst_ref; - uint param_15 = ix; - Alloc param_16 = bin_alloc; - BinInstanceRef param_17 = BinInstance_index(param_14, param_15); - BinInstance inst = BinInstance_read(param_16, param_17, v_267); - sh_elements[th_ix] = inst.element_ix; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - wr_ix = min((rd_ix + 256u), ready_ix); - bool _1364 = (wr_ix - rd_ix) < 256u; - if (_1364) - { - _1374 = (wr_ix < ready_ix) || (partition_ix < n_partitions); - } - else - { - _1374 = _1364; - } - if (_1374) - { - continue; - } - else - { - break; - } - } - uint tag = 0u; - if ((th_ix + rd_ix) < wr_ix) - { - element_ix = sh_elements[th_ix]; - tag = _1390.scene[drawtag_start + element_ix]; - } - switch (tag) - { - case 68u: - case 72u: - case 276u: - case 732u: - case 5u: - case 37u: - { - uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = v_267.memory[drawmonoid_base]; - param_18.offset = v_891.conf.tile_alloc.offset; - PathRef param_19 = PathRef{ v_891.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_18, param_19, v_267); - uint stride = path.bbox.z - path.bbox.x; - sh_tile_stride[th_ix] = stride; - int dx = int(path.bbox.x) - int(bin_tile_x); - int dy = int(path.bbox.y) - int(bin_tile_y); - int x0 = clamp(dx, 0, 16); - int y0 = clamp(dy, 0, 16); - int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16); - int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16); - sh_tile_width[th_ix] = uint(x1 - x0); - sh_tile_x0[th_ix] = uint(x0); - sh_tile_y0[th_ix] = uint(y0); - tile_count = uint(x1 - x0) * uint(y1 - y0); - uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); - sh_tile_base[th_ix] = base; - uint param_20 = path.tiles.offset; - uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_22 = true; - Alloc path_alloc = new_alloc(param_20, param_21, param_22); - uint param_23 = th_ix; - Alloc param_24 = path_alloc; - write_tile_alloc(param_23, param_24); - break; - } - default: - { - tile_count = 0u; - break; - } - } - sh_tile_count[th_ix] = tile_count; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th_ix >= (1u << i_3)) - { - tile_count += sh_tile_count[th_ix - (1u << i_3)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_tile_count[th_ix] = tile_count; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint total_tile_count = sh_tile_count[255]; - for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u) - { - uint el_ix = 0u; - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - uint probe_1 = el_ix + (128u >> i_4); - if (ix_1 >= sh_tile_count[probe_1 - 1u]) - { - el_ix = probe_1; - } - } - uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1390.scene[drawtag_start + element_ix_1]; - if (el_ix > 0u) - { - _1622 = sh_tile_count[el_ix - 1u]; - } - else - { - _1622 = 0u; - } - uint seq_ix = ix_1 - _1622; - uint width = sh_tile_width[el_ix]; - uint x = sh_tile_x0[el_ix] + (seq_ix % width); - uint y = sh_tile_y0[el_ix] + (seq_ix / width); - bool include_tile = false; - uint param_25 = el_ix; - bool param_26 = true; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_891); - TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_27, param_28, v_267); - bool is_clip = (tag_1 & 1u) != 0u; - bool is_blend = false; - if (is_clip) - { - uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = v_267.memory[drawmonoid_base_1 + 2u]; - uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1390.scene[dd]; - is_blend = blend != 32771u; - } - bool _1706 = tile.tile.offset != 0u; - bool _1715; - if (!_1706) - { - _1715 = (tile.backdrop == 0) == is_clip; - } - else - { - _1715 = _1706; - } - include_tile = _1715 || is_blend; - if (include_tile) - { - uint el_slice = el_ix / 32u; - uint el_mask = 1u << (el_ix & 31u); - uint _1737 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint slice_ix = 0u; - uint bitmap = sh_bitmaps[0][th_ix]; - while (true) - { - if (bitmap == 0u) - { - slice_ix++; - if (slice_ix == 8u) - { - break; - } - bitmap = sh_bitmaps[slice_ix][th_ix]; - if (bitmap == 0u) - { - continue; - } - } - uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); - uint element_ix_2 = sh_elements[element_ref_ix]; - bitmap &= (bitmap - 1u); - uint drawtag = _1390.scene[drawtag_start + element_ix_2]; - if (clip_zero_depth == 0u) - { - uint param_29 = element_ref_ix; - bool param_30 = true; - Alloc param_31 = read_tile_alloc(param_29, param_30, v_891); - TileRef param_32 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Tile tile_1 = Tile_read(param_31, param_32, v_267); - uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = v_267.memory[drawmonoid_base_2 + 2u]; - uint info_offset = v_267.memory[drawmonoid_base_2 + 3u]; - uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); - uint di = drawinfo_start + (info_offset >> uint(2)); - switch (drawtag) - { - case 68u: - { - linewidth = as_type(v_267.memory[di]); - Alloc param_33 = cmd_alloc; - CmdRef param_34 = cmd_ref; - uint param_35 = cmd_limit; - alloc_cmd(param_33, param_34, param_35, mem_ok, v_267, v_891); - cmd_alloc = param_33; - cmd_ref = param_34; - cmd_limit = param_35; - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - Tile param_38 = tile_1; - float param_39 = linewidth; - write_fill(param_36, param_37, param_38, param_39, mem_ok, v_267); - cmd_ref = param_37; - uint rgba = _1390.scene[dd_1]; - if (mem_ok) - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdColor param_42 = CmdColor{ rgba }; - Cmd_Color_write(param_40, param_41, param_42, v_267); - } - cmd_ref.offset += 8u; - break; - } - case 276u: - { - Alloc param_43 = cmd_alloc; - CmdRef param_44 = cmd_ref; - uint param_45 = cmd_limit; - alloc_cmd(param_43, param_44, param_45, mem_ok, v_267, v_891); - cmd_alloc = param_43; - cmd_ref = param_44; - cmd_limit = param_45; - linewidth = as_type(v_267.memory[di]); - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - Tile param_48 = tile_1; - float param_49 = linewidth; - write_fill(param_46, param_47, param_48, param_49, mem_ok, v_267); - cmd_ref = param_47; - cmd_lin.index = _1390.scene[dd_1]; - cmd_lin.line_x = as_type(v_267.memory[di + 1u]); - cmd_lin.line_y = as_type(v_267.memory[di + 2u]); - cmd_lin.line_c = as_type(v_267.memory[di + 3u]); - if (mem_ok) - { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdLinGrad param_52 = cmd_lin; - Cmd_LinGrad_write(param_50, param_51, param_52, v_267); - } - cmd_ref.offset += 20u; - break; - } - case 732u: - { - Alloc param_53 = cmd_alloc; - CmdRef param_54 = cmd_ref; - uint param_55 = cmd_limit; - alloc_cmd(param_53, param_54, param_55, mem_ok, v_267, v_891); - cmd_alloc = param_53; - cmd_ref = param_54; - cmd_limit = param_55; - linewidth = as_type(v_267.memory[di]); - Alloc param_56 = cmd_alloc; - CmdRef param_57 = cmd_ref; - Tile param_58 = tile_1; - float param_59 = linewidth; - write_fill(param_56, param_57, param_58, param_59, mem_ok, v_267); - cmd_ref = param_57; - cmd_rad.index = _1390.scene[dd_1]; - cmd_rad.mat = as_type(uint4(v_267.memory[di + 1u], v_267.memory[di + 2u], v_267.memory[di + 3u], v_267.memory[di + 4u])); - cmd_rad.xlat = as_type(uint2(v_267.memory[di + 5u], v_267.memory[di + 6u])); - cmd_rad.c1 = as_type(uint2(v_267.memory[di + 7u], v_267.memory[di + 8u])); - cmd_rad.ra = as_type(v_267.memory[di + 9u]); - cmd_rad.roff = as_type(v_267.memory[di + 10u]); - if (mem_ok) - { - Alloc param_60 = cmd_alloc; - CmdRef param_61 = cmd_ref; - CmdRadGrad param_62 = cmd_rad; - Cmd_RadGrad_write(param_60, param_61, param_62, v_267); - } - cmd_ref.offset += 48u; - break; - } - case 72u: - { - Alloc param_63 = cmd_alloc; - CmdRef param_64 = cmd_ref; - uint param_65 = cmd_limit; - alloc_cmd(param_63, param_64, param_65, mem_ok, v_267, v_891); - cmd_alloc = param_63; - cmd_ref = param_64; - cmd_limit = param_65; - linewidth = as_type(v_267.memory[di]); - Alloc param_66 = cmd_alloc; - CmdRef param_67 = cmd_ref; - Tile param_68 = tile_1; - float param_69 = linewidth; - write_fill(param_66, param_67, param_68, param_69, mem_ok, v_267); - cmd_ref = param_67; - uint index = _1390.scene[dd_1]; - uint raw1 = _1390.scene[dd_1 + 1u]; - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - if (mem_ok) - { - Alloc param_70 = cmd_alloc; - CmdRef param_71 = cmd_ref; - CmdImage param_72 = CmdImage{ index, offset_1 }; - Cmd_Image_write(param_70, param_71, param_72, v_267); - } - cmd_ref.offset += 12u; - break; - } - case 5u: - { - bool _2120 = tile_1.tile.offset == 0u; - bool _2126; - if (_2120) - { - _2126 = tile_1.backdrop == 0; - } - else - { - _2126 = _2120; - } - if (_2126) - { - clip_zero_depth = clip_depth + 1u; - } - else - { - Alloc param_73 = cmd_alloc; - CmdRef param_74 = cmd_ref; - uint param_75 = cmd_limit; - alloc_cmd(param_73, param_74, param_75, mem_ok, v_267, v_891); - cmd_alloc = param_73; - cmd_ref = param_74; - cmd_limit = param_75; - if (mem_ok) - { - Alloc param_76 = cmd_alloc; - CmdRef param_77 = cmd_ref; - Cmd_BeginClip_write(param_76, param_77, v_267); - } - cmd_ref.offset += 4u; - render_blend_depth++; - max_blend_depth = max(max_blend_depth, render_blend_depth); - } - clip_depth++; - break; - } - case 37u: - { - clip_depth--; - Alloc param_78 = cmd_alloc; - CmdRef param_79 = cmd_ref; - Tile param_80 = tile_1; - float param_81 = -1.0; - write_fill(param_78, param_79, param_80, param_81, mem_ok, v_267); - cmd_ref = param_79; - uint blend_1 = _1390.scene[dd_1]; - if (mem_ok) - { - Alloc param_82 = cmd_alloc; - CmdRef param_83 = cmd_ref; - CmdEndClip param_84 = CmdEndClip{ blend_1 }; - Cmd_EndClip_write(param_82, param_83, param_84, v_267); - } - cmd_ref.offset += 8u; - render_blend_depth--; - break; - } - } - } - else - { - switch (drawtag) - { - case 5u: - { - clip_depth++; - break; - } - case 37u: - { - if (clip_depth == clip_zero_depth) - { - clip_zero_depth = 0u; - } - clip_depth--; - break; - } - } - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - rd_ix += 256u; - if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions)) - { - break; - } - } - bool _2231 = (bin_tile_x + tile_x) < v_891.conf.width_in_tiles; - bool _2240; - if (_2231) - { - _2240 = (bin_tile_y + tile_y) < v_891.conf.height_in_tiles; - } - else - { - _2240 = _2231; - } - if (_2240) - { - if (mem_ok) - { - Alloc param_85 = cmd_alloc; - CmdRef param_86 = cmd_ref; - Cmd_End_write(param_85, param_86, v_267); - } - if (max_blend_depth > 4u) - { - uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u; - uint _2264 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.blend_offset, scratch_size, memory_order_relaxed); - uint scratch = _2264; - Alloc param_87 = scratch_alloc; - uint param_88 = scratch_alloc.offset >> uint(2); - uint param_89 = scratch; - write_mem(param_87, param_88, param_89, v_267); - } - } -} - diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv deleted file mode 100644 index 2417cf8..0000000 Binary files a/piet-gpu/shader/gen/coarse.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil deleted file mode 100644 index 97b006a..0000000 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl deleted file mode 100644 index 789c9b3..0000000 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ /dev/null @@ -1,269 +0,0 @@ -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const DrawMonoid _23 = { 0u, 0u, 0u, 0u }; - -ByteAddressBuffer _93 : register(t1, space0); -ByteAddressBuffer _103 : register(t2, space0); -ByteAddressBuffer _203 : register(t3, space0); -RWByteAddressBuffer _285 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared DrawMonoid sh_scratch[256]; - -DrawMonoid map_tag(uint tag_word) -{ - uint has_path = uint(tag_word != 0u); - DrawMonoid _76 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; - return _76; -} - -DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -DrawMonoid draw_monoid_identity() -{ - return _23; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _93.Load(104) >> uint(2); - uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0); - uint param = tag_word; - DrawMonoid agg = map_tag(param); - DrawMonoid local[8]; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - tag_word = _103.Load(((drawtag_base + ix) + i) * 4 + 0); - uint param_1 = tag_word; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_4 = other; - DrawMonoid param_5 = agg; - agg = combine_draw_monoid(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - DrawMonoid row = draw_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - DrawMonoid _209; - _209.path_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 0); - _209.clip_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 4); - _209.scene_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 8); - _209.info_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 12); - row.path_ix = _209.path_ix; - row.clip_ix = _209.clip_ix; - row.scene_offset = _209.scene_offset; - row.info_offset = _209.info_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - DrawMonoid param_6 = row; - DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_draw_monoid(param_6, param_7); - } - uint drawdata_base = _93.Load(108) >> uint(2); - uint drawinfo_base = _93.Load(72) >> uint(2); - uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_93.Load(48) >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _93.Load(52) >> uint(2); - float4 mat; - float2 translate; - float2 p0; - float2 p1; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid m = row; - if (i_2 > 0u) - { - DrawMonoid param_8 = m; - DrawMonoid param_9 = local[i_2 - 1u]; - m = combine_draw_monoid(param_8, param_9); - } - _285.Store((out_base + (i_2 * 4u)) * 4 + 12, m.path_ix); - _285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 12, m.clip_ix); - _285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 12, m.scene_offset); - _285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 12, m.info_offset); - uint dd = drawdata_base + (m.scene_offset >> uint(2)); - uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0); - if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) - { - uint bbox_offset = (_93.Load(44) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_285.Load(bbox_offset * 4 + 12)) - 32768.0f; - float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 12)); - uint fill_mode = uint(linewidth >= 0.0f); - if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u)) - { - uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 12); - uint t = (_93.Load(40) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_285.Load(t * 4 + 12), _285.Load((t + 1u) * 4 + 12), _285.Load((t + 2u) * 4 + 12), _285.Load((t + 3u) * 4 + 12))); - if ((tag_word == 276u) || (tag_word == 732u)) - { - translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 12), _285.Load((t + 5u) * 4 + 12))); - } - } - if (linewidth >= 0.0f) - { - linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); - } - switch (tag_word) - { - case 68u: - case 72u: - { - _285.Store(di * 4 + 12, asuint(linewidth)); - break; - } - case 276u: - { - _285.Store(di * 4 + 12, asuint(linewidth)); - p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); - p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); - p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; - p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; - float2 dxy = p1 - p0; - float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); - float line_x = dxy.x * scale; - float line_y = dxy.y * scale; - float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _285.Store((di + 1u) * 4 + 12, asuint(line_x)); - _285.Store((di + 2u) * 4 + 12, asuint(line_y)); - _285.Store((di + 3u) * 4 + 12, asuint(line_c)); - break; - } - case 732u: - { - p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); - p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); - float r0 = asfloat(_103.Load((dd + 5u) * 4 + 0)); - float r1 = asfloat(_103.Load((dd + 6u) * 4 + 0)); - float inv_det = 1.0f / ((mat.x * mat.w) - (mat.y * mat.z)); - float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; - float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); - inv_tr += p0; - float2 center1 = p1 - p0; - float rr = r1 / (r1 - r0); - float rainv = rr / ((r1 * r1) - dot(center1, center1)); - float2 c1 = center1 * rainv; - float ra = rr * rainv; - float roff = rr - 1.0f; - _285.Store(di * 4 + 12, asuint(linewidth)); - _285.Store((di + 1u) * 4 + 12, asuint(inv_mat.x)); - _285.Store((di + 2u) * 4 + 12, asuint(inv_mat.y)); - _285.Store((di + 3u) * 4 + 12, asuint(inv_mat.z)); - _285.Store((di + 4u) * 4 + 12, asuint(inv_mat.w)); - _285.Store((di + 5u) * 4 + 12, asuint(inv_tr.x)); - _285.Store((di + 6u) * 4 + 12, asuint(inv_tr.y)); - _285.Store((di + 7u) * 4 + 12, asuint(c1.x)); - _285.Store((di + 8u) * 4 + 12, asuint(c1.y)); - _285.Store((di + 9u) * 4 + 12, asuint(ra)); - _285.Store((di + 10u) * 4 + 12, asuint(roff)); - break; - } - case 5u: - { - break; - } - } - } - if ((tag_word == 5u) || (tag_word == 37u)) - { - uint path_ix = ~(out_ix + i_2); - if (tag_word == 5u) - { - path_ix = m.path_ix; - } - _285.Store((clip_out_base + m.clip_ix) * 4 + 12, path_ix); - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl deleted file mode 100644 index 2ec1911..0000000 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ /dev/null @@ -1,318 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct ParentBuf -{ - DrawMonoid_1 parent[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid map_tag(thread const uint& tag_word) -{ - uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; -} - -static inline __attribute__((always_inline)) -DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -static inline __attribute__((always_inline)) -DrawMonoid draw_monoid_identity() -{ - return DrawMonoid{ 0u, 0u, 0u, 0u }; -} - -kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup DrawMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _93.conf.drawtag_offset >> uint(2); - uint tag_word = _103.scene[drawtag_base + ix]; - uint param = tag_word; - DrawMonoid agg = map_tag(param); - spvUnsafeArray local; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - tag_word = _103.scene[(drawtag_base + ix) + i]; - uint param_1 = tag_word; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_4 = other; - DrawMonoid param_5 = agg; - agg = combine_draw_monoid(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - DrawMonoid row = draw_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - uint _206 = gl_WorkGroupID.x - 1u; - row.path_ix = _203.parent[_206].path_ix; - row.clip_ix = _203.parent[_206].clip_ix; - row.scene_offset = _203.parent[_206].scene_offset; - row.info_offset = _203.parent[_206].info_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - DrawMonoid param_6 = row; - DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_draw_monoid(param_6, param_7); - } - uint drawdata_base = _93.conf.drawdata_offset >> uint(2); - uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2); - uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2); - float4 mat; - float2 translate; - float2 p0; - float2 p1; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid m = row; - if (i_2 > 0u) - { - DrawMonoid param_8 = m; - DrawMonoid param_9 = local[i_2 - 1u]; - m = combine_draw_monoid(param_8, param_9); - } - _285.memory[out_base + (i_2 * 4u)] = m.path_ix; - _285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; - _285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; - _285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; - uint dd = drawdata_base + (m.scene_offset >> uint(2)); - uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _103.scene[(drawtag_base + ix) + i_2]; - if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) - { - uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_285.memory[bbox_offset]) - 32768.0; - float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(_285.memory[bbox_offset + 4u]); - uint fill_mode = uint(linewidth >= 0.0); - if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u)) - { - uint trans_ix = _285.memory[bbox_offset + 5u]; - uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u])); - if ((tag_word == 276u) || (tag_word == 732u)) - { - translate = as_type(uint2(_285.memory[t + 4u], _285.memory[t + 5u])); - } - } - if (linewidth >= 0.0) - { - linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); - } - switch (tag_word) - { - case 68u: - case 72u: - { - _285.memory[di] = as_type(linewidth); - break; - } - case 276u: - { - _285.memory[di] = as_type(linewidth); - p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); - p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); - p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; - p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; - float2 dxy = p1 - p0; - float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); - float line_x = dxy.x * scale; - float line_y = dxy.y * scale; - float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _285.memory[di + 1u] = as_type(line_x); - _285.memory[di + 2u] = as_type(line_y); - _285.memory[di + 3u] = as_type(line_c); - break; - } - case 732u: - { - p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); - p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); - float r0 = as_type(_103.scene[dd + 5u]); - float r1 = as_type(_103.scene[dd + 6u]); - float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z)); - float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; - float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); - inv_tr += p0; - float2 center1 = p1 - p0; - float rr = r1 / (r1 - r0); - float rainv = rr / ((r1 * r1) - dot(center1, center1)); - float2 c1 = center1 * rainv; - float ra = rr * rainv; - float roff = rr - 1.0; - _285.memory[di] = as_type(linewidth); - _285.memory[di + 1u] = as_type(inv_mat.x); - _285.memory[di + 2u] = as_type(inv_mat.y); - _285.memory[di + 3u] = as_type(inv_mat.z); - _285.memory[di + 4u] = as_type(inv_mat.w); - _285.memory[di + 5u] = as_type(inv_tr.x); - _285.memory[di + 6u] = as_type(inv_tr.y); - _285.memory[di + 7u] = as_type(c1.x); - _285.memory[di + 8u] = as_type(c1.y); - _285.memory[di + 9u] = as_type(ra); - _285.memory[di + 10u] = as_type(roff); - break; - } - case 5u: - { - break; - } - } - } - if ((tag_word == 5u) || (tag_word == 37u)) - { - uint path_ix = ~(out_ix + i_2); - if (tag_word == 5u) - { - path_ix = m.path_ix; - } - _285.memory[clip_out_base + m.clip_ix] = path_ix; - } - } -} - diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv deleted file mode 100644 index f9feedc..0000000 Binary files a/piet-gpu/shader/gen/draw_leaf.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil deleted file mode 100644 index 5516a9a..0000000 Binary files a/piet-gpu/shader/gen/draw_reduce.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl deleted file mode 100644 index 1a8f2b1..0000000 --- a/piet-gpu/shader/gen/draw_reduce.hlsl +++ /dev/null @@ -1,127 +0,0 @@ -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -ByteAddressBuffer _87 : register(t1, space0); -ByteAddressBuffer _97 : register(t2, space0); -RWByteAddressBuffer _188 : register(u3, space0); -RWByteAddressBuffer _206 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared DrawMonoid sh_scratch[256]; - -DrawMonoid map_tag(uint tag_word) -{ - uint has_path = uint(tag_word != 0u); - DrawMonoid _70 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; - return _70; -} - -DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _87.Load(104) >> uint(2); - uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0); - uint param = tag_word; - DrawMonoid agg = map_tag(param); - for (uint i = 1u; i < 8u; i++) - { - uint tag_word_1 = _97.Load(((drawtag_base + ix) + i) * 4 + 0); - uint param_1 = tag_word_1; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - DrawMonoid param_4 = agg; - DrawMonoid param_5 = other; - agg = combine_draw_monoid(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _188.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); - _188.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); - _188.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); - _188.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl deleted file mode 100644 index b2510e3..0000000 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ /dev/null @@ -1,142 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct OutBuf -{ - DrawMonoid_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid map_tag(thread const uint& tag_word) -{ - uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; -} - -static inline __attribute__((always_inline)) -DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -kernel void main0(const device ConfigBuf& _87 [[buffer(1)]], const device SceneBuf& _97 [[buffer(2)]], device OutBuf& _188 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup DrawMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _87.conf.drawtag_offset >> uint(2); - uint tag_word = _97.scene[drawtag_base + ix]; - uint param = tag_word; - DrawMonoid agg = map_tag(param); - for (uint i = 1u; i < 8u; i++) - { - uint tag_word_1 = _97.scene[(drawtag_base + ix) + i]; - uint param_1 = tag_word_1; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - DrawMonoid param_4 = agg; - DrawMonoid param_5 = other; - agg = combine_draw_monoid(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _188.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _188.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; - _188.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; - _188.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; - } -} - diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv deleted file mode 100644 index 2992677..0000000 Binary files a/piet-gpu/shader/gen/draw_reduce.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil deleted file mode 100644 index 4ea23f7..0000000 Binary files a/piet-gpu/shader/gen/draw_root.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/draw_root.hlsl b/piet-gpu/shader/gen/draw_root.hlsl deleted file mode 100644 index b4cb7e4..0000000 --- a/piet-gpu/shader/gen/draw_root.hlsl +++ /dev/null @@ -1,108 +0,0 @@ -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const DrawMonoid _18 = { 0u, 0u, 0u, 0u }; - -RWByteAddressBuffer _71 : register(u0, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared DrawMonoid sh_scratch[256]; - -DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -DrawMonoid draw_monoid_identity() -{ - return _18; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - DrawMonoid _75; - _75.path_ix = _71.Load(ix * 16 + 0); - _75.clip_ix = _71.Load(ix * 16 + 4); - _75.scene_offset = _71.Load(ix * 16 + 8); - _75.info_offset = _71.Load(ix * 16 + 12); - DrawMonoid local[8]; - local[0].path_ix = _75.path_ix; - local[0].clip_ix = _75.clip_ix; - local[0].scene_offset = _75.scene_offset; - local[0].info_offset = _75.info_offset; - DrawMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - DrawMonoid param = local[i - 1u]; - DrawMonoid _106; - _106.path_ix = _71.Load((ix + i) * 16 + 0); - _106.clip_ix = _71.Load((ix + i) * 16 + 4); - _106.scene_offset = _71.Load((ix + i) * 16 + 8); - _106.info_offset = _71.Load((ix + i) * 16 + 12); - param_1.path_ix = _106.path_ix; - param_1.clip_ix = _106.clip_ix; - param_1.scene_offset = _106.scene_offset; - param_1.info_offset = _106.info_offset; - local[i] = combine_draw_monoid(param, param_1); - } - DrawMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_2 = other; - DrawMonoid param_3 = agg; - agg = combine_draw_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - DrawMonoid row = draw_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid param_4 = row; - DrawMonoid param_5 = local[i_2]; - DrawMonoid m = combine_draw_monoid(param_4, param_5); - uint _199 = ix + i_2; - _71.Store(_199 * 16 + 0, m.path_ix); - _71.Store(_199 * 16 + 4, m.clip_ix); - _71.Store(_199 * 16 + 8, m.scene_offset); - _71.Store(_199 * 16 + 12, m.info_offset); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/draw_root.msl b/piet-gpu/shader/gen/draw_root.msl deleted file mode 100644 index 9ee8cfe..0000000 --- a/piet-gpu/shader/gen/draw_root.msl +++ /dev/null @@ -1,140 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct DataBuf -{ - DrawMonoid_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -static inline __attribute__((always_inline)) -DrawMonoid draw_monoid_identity() -{ - return DrawMonoid{ 0u, 0u, 0u, 0u }; -} - -kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup DrawMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].path_ix = _71.data[ix].path_ix; - local[0].clip_ix = _71.data[ix].clip_ix; - local[0].scene_offset = _71.data[ix].scene_offset; - local[0].info_offset = _71.data[ix].info_offset; - DrawMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - uint _100 = ix + i; - DrawMonoid param = local[i - 1u]; - param_1.path_ix = _71.data[_100].path_ix; - param_1.clip_ix = _71.data[_100].clip_ix; - param_1.scene_offset = _71.data[_100].scene_offset; - param_1.info_offset = _71.data[_100].info_offset; - local[i] = combine_draw_monoid(param, param_1); - } - DrawMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_2 = other; - DrawMonoid param_3 = agg; - agg = combine_draw_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - DrawMonoid row = draw_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid param_4 = row; - DrawMonoid param_5 = local[i_2]; - DrawMonoid m = combine_draw_monoid(param_4, param_5); - uint _199 = ix + i_2; - _71.data[_199].path_ix = m.path_ix; - _71.data[_199].clip_ix = m.clip_ix; - _71.data[_199].scene_offset = m.scene_offset; - _71.data[_199].info_offset = m.info_offset; - } -} - diff --git a/piet-gpu/shader/gen/draw_root.spv b/piet-gpu/shader/gen/draw_root.spv deleted file mode 100644 index e6a53e5..0000000 Binary files a/piet-gpu/shader/gen/draw_root.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil deleted file mode 100644 index 33873b3..0000000 Binary files a/piet-gpu/shader/gen/kernel4.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl deleted file mode 100644 index 2e1f937..0000000 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ /dev/null @@ -1,1304 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 _vector; - float y_edge; - TileSegRef next; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); - -RWByteAddressBuffer _297 : register(u0, space0); -ByteAddressBuffer _1681 : register(t1, space0); -RWByteAddressBuffer _2506 : register(u2, space0); -RWTexture2D image_atlas : register(u4, space0); -RWTexture2D gradients : register(u5, space0); -RWTexture2D image : register(u3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -uint spvPackUnorm4x8(float4 value) -{ - uint4 Packed = uint4(round(saturate(value) * 255.0)); - return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); -} - -float4 spvUnpackUnorm4x8(uint value) -{ - uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); - return float4(Packed) / 255.0; -} - -Alloc slice_mem(Alloc a, uint offset, uint size) -{ - Alloc _310 = { a.offset + offset }; - return _310; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _297.Load(offset * 4 + 12); - return v; -} - -CmdTag Cmd_tag(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _669; -} - -CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = asfloat(raw1); - return s; -} - -CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) -{ - CmdStrokeRef _685 = { ref.offset + 4u }; - Alloc param = a; - CmdStrokeRef param_1 = _685; - return CmdStroke_read(param, param_1); -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -TileSeg TileSeg_read(Alloc a, TileSegRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - TileSeg s; - s.origin = float2(asfloat(raw0), asfloat(raw1)); - s._vector = float2(asfloat(raw2), asfloat(raw3)); - s.y_edge = asfloat(raw4); - TileSegRef _826 = { raw5 }; - s.next = _826; - return s; -} - -uint2 chunk_offset(uint i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -CmdFill CmdFill_read(Alloc a, CmdFillRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) -{ - CmdFillRef _675 = { ref.offset + 4u }; - Alloc param = a; - CmdFillRef param_1 = _675; - return CmdFill_read(param, param_1); -} - -CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdAlpha s; - s.alpha = asfloat(raw0); - return s; -} - -CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) -{ - CmdAlphaRef _695 = { ref.offset + 4u }; - Alloc param = a; - CmdAlphaRef param_1 = _695; - return CmdAlpha_read(param, param_1); -} - -CmdColor CmdColor_read(Alloc a, CmdColorRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -CmdColor Cmd_Color_read(Alloc a, CmdRef ref) -{ - CmdColorRef _705 = { ref.offset + 4u }; - Alloc param = a; - CmdColorRef param_1 = _705; - return CmdColor_read(param, param_1); -} - -float3 fromsRGB(float3 srgb) -{ - return srgb; -} - -float4 unpacksRGB(uint srgba) -{ - float4 color = spvUnpackUnorm4x8(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - CmdLinGrad s; - s.index = raw0; - s.line_x = asfloat(raw1); - s.line_y = asfloat(raw2); - s.line_c = asfloat(raw3); - return s; -} - -CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) -{ - CmdLinGradRef _715 = { ref.offset + 4u }; - Alloc param = a; - CmdLinGradRef param_1 = _715; - return CmdLinGrad_read(param, param_1); -} - -CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); - s.xlat = float2(asfloat(raw5), asfloat(raw6)); - s.c1 = float2(asfloat(raw7), asfloat(raw8)); - s.ra = asfloat(raw9); - s.roff = asfloat(raw10); - return s; -} - -CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) -{ - CmdRadGradRef _725 = { ref.offset + 4u }; - Alloc param = a; - CmdRadGradRef param_1 = _725; - return CmdRadGrad_read(param, param_1); -} - -CmdImage CmdImage_read(Alloc a, CmdImageRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -CmdImage Cmd_Image_read(Alloc a, CmdRef ref) -{ - CmdImageRef _735 = { ref.offset + 4u }; - Alloc param = a; - CmdImageRef param_1 = _735; - return CmdImage_read(param, param_1); -} - -void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) -{ - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas[uv]; - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - spvReturnValue = rgba; -} - -float3 tosRGB(float3 rgb) -{ - return rgb; -} - -uint packsRGB(inout float4 rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return spvPackUnorm4x8(rgba.wzyx); -} - -CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdEndClip s; - s.blend = raw0; - return s; -} - -CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) -{ - CmdEndClipRef _745 = { ref.offset + 4u }; - Alloc param = a; - CmdEndClipRef param_1 = _745; - return CmdEndClip_read(param, param_1); -} - -float3 screen(float3 cb, float3 cs) -{ - return (cb + cs) - (cb * cs); -} - -float3 hard_light(float3 cb, float3 cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0f) - 1.0f.xxx; - float3 _889 = screen(param, param_1); - float3 _893 = (cb * 2.0f) * cs; - bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z); -} - -float color_dodge(float cb, float cs) -{ - if (cb == 0.0f) - { - return 0.0f; - } - else - { - if (cs == 1.0f) - { - return 1.0f; - } - else - { - return min(1.0f, cb / (1.0f - cs)); - } - } -} - -float color_burn(float cb, float cs) -{ - if (cb == 1.0f) - { - return 1.0f; - } - else - { - if (cs == 0.0f) - { - return 0.0f; - } - else - { - return 1.0f - min(1.0f, (1.0f - cb) / cs); - } - } -} - -float3 soft_light(float3 cb, float3 cs) -{ - float3 _904 = sqrt(cb); - float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb; - bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z); - float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z); - float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)); - float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)); - bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z); -} - -float sat(float3 c) -{ - return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); -} - -void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0f; - cmax = 0.0f; - } - cmin = 0.0f; -} - -float3 set_sat(inout float3 c, float s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -float lum(float3 c) -{ - float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); - return dot(c, f); -} - -float3 clip_color(inout float3 c) -{ - float3 param = c; - float L = lum(param); - float n = min(c.x, min(c.y, c.z)); - float x = max(c.x, max(c.y, c.z)); - if (n < 0.0f) - { - c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); - } - if (x > 1.0f) - { - c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); - } - return c; -} - -float3 set_lum(float3 c, float l) -{ - float3 param = c; - float3 param_1 = c + (l - lum(param)).xxx; - float3 _1048 = clip_color(param_1); - return _1048; -} - -float3 mix_blend(float3 cb, float3 cs, uint mode) -{ - float3 b = 0.0f.xxx; - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = min(cb, cs); - break; - } - case 5u: - { - b = max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0f) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) -{ - float fa = 0.0f; - float fb = 0.0f; - switch (mode) - { - case 1u: - { - fa = 1.0f; - fb = 0.0f; - break; - } - case 2u: - { - fa = 0.0f; - fb = 1.0f; - break; - } - case 3u: - { - fa = 1.0f; - fb = 1.0f - as; - break; - } - case 4u: - { - fa = 1.0f - ab; - fb = 1.0f; - break; - } - case 5u: - { - fa = ab; - fb = 0.0f; - break; - } - case 6u: - { - fa = 0.0f; - fb = as; - break; - } - case 7u: - { - fa = 1.0f - ab; - fb = 0.0f; - break; - } - case 8u: - { - fa = 0.0f; - fb = 1.0f - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0f - as; - break; - } - case 10u: - { - fa = 1.0f - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0f - ab; - fb = 1.0f - as; - break; - } - case 12u: - { - fa = 1.0f; - fb = 1.0f; - break; - } - case 13u: - { - return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0f - src.w)) + src; - } - float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = lerp(cs, blended, backdrop.w.xxx); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = lerp(backdrop.xyz, cs, src.w.xxx); - return float4(co, src.w + (backdrop.w * (1.0f - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdJump s; - s.new_ref = raw0; - return s; -} - -CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) -{ - CmdJumpRef _755 = { ref.offset + 4u }; - Alloc param = a; - CmdJumpRef param_1 = _755; - return CmdJump_read(param, param_1); -} - -void comp_main() -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x; - Alloc _1696; - _1696.offset = _1681.Load(28); - Alloc param; - param.offset = _1696.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1705 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1705; - uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12); - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = 0.0f.xxxx; - } - uint clip_depth = 0u; - float df[8]; - TileSegRef tile_seg_ref; - float area[8]; - uint blend_stack[4][8]; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0f; - } - TileSegRef _1805 = { stroke.tile_ref }; - tile_seg_ref = _1805; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11); - float2 line_vec = seg._vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + 0.5f.xx) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0f, 1.0f); - df[k_1] = min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = clamp((stroke.half_width + 0.5f) - df[k_2], 0.0f, 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - TileSegRef _1924 = { fill.tile_ref }; - tile_seg_ref = _1924; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1._vector; - float2 window = clamp(float2(start.y, end.y), 0.0f.xx, 1.0f.xx); - if (window.x != window.y) - { - float2 t_1 = (window - start.y.xx) / seg_1._vector.y.xx; - float2 xs = float2(lerp(start.x, end.x, t_1.x), lerp(start.x, end.x, t_1.y)); - float xmin = min(min(xs.x, xs.y), 1.0f) - 9.9999999747524270787835121154785e-07f; - float xmax = max(xs.x, xs.y); - float b = min(xmax, 1.0f); - float c = max(b, 0.0f); - float d = max(xmin, 0.0f); - float a = ((b + (0.5f * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1._vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0f, 0.0f, 1.0f)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = min(abs(area[k_5]), 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0f; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0f - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba = gradients[int2(x, int(lin.index))]; - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - float4 _2410[8]; - fillImage(_2410, param_36, param_37); - float4 img[8] = _2410; - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = 0.0f.xxxx; - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.Store((base_ix + k_13) * 4 + 0, _2519); - rgba[k_13] = 0.0f.xxxx; - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0); - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2618; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w); - } -} - -[numthreads(8, 4, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl deleted file mode 100644 index 1cf8cb3..0000000 --- a/piet-gpu/shader/gen/kernel4.msl +++ /dev/null @@ -1,1355 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 vector; - float y_edge; - TileSegRef next; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct BlendBuf -{ - uint blend_mem[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); - -static inline __attribute__((always_inline)) -Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) -{ - return Alloc{ a.offset + offset }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_297.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_297); - return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = as_type(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - TileSeg s; - s.origin = float2(as_type(raw0), as_type(raw1)); - s.vector = float2(as_type(raw2), as_type(raw3)); - s.y_edge = as_type(raw4); - s.next = TileSegRef{ raw5 }; - return s; -} - -static inline __attribute__((always_inline)) -uint2 chunk_offset(thread const uint& i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdAlpha s; - s.alpha = as_type(raw0); - return s; -} - -static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 fromsRGB(thread const float3& srgb) -{ - return srgb; -} - -static inline __attribute__((always_inline)) -float4 unpacksRGB(thread const uint& srgba) -{ - float4 color = unpack_unorm4x8_to_float(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - CmdLinGrad s; - s.index = raw0; - s.line_x = as_type(raw1); - s.line_y = as_type(raw2); - s.line_c = as_type(raw3); - return s; -} - -static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_297); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_297); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_297); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_297); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_297); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); - s.xlat = float2(as_type(raw5), as_type(raw6)); - s.c1 = float2(as_type(raw7), as_type(raw8)); - s.ra = as_type(raw9); - s.roff = as_type(raw10); - return s; -} - -static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImage& cmd_img, texture2d image_atlas) -{ - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas.read(uint2(uv)); - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - return rgba; -} - -static inline __attribute__((always_inline)) -float3 tosRGB(thread const float3& rgb) -{ - return rgb; -} - -static inline __attribute__((always_inline)) -uint packsRGB(thread float4& rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return pack_float_to_unorm4x8(rgba.wzyx); -} - -static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdEndClip s; - s.blend = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 screen(thread const float3& cb, thread const float3& cs) -{ - return (cb + cs) - (cb * cs); -} - -static inline __attribute__((always_inline)) -float3 hard_light(thread const float3& cb, thread const float3& cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0) - float3(1.0); - return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float color_dodge(thread const float& cb, thread const float& cs) -{ - if (cb == 0.0) - { - return 0.0; - } - else - { - if (cs == 1.0) - { - return 1.0; - } - else - { - return fast::min(1.0, cb / (1.0 - cs)); - } - } -} - -static inline __attribute__((always_inline)) -float color_burn(thread const float& cb, thread const float& cs) -{ - if (cb == 1.0) - { - return 1.0; - } - else - { - if (cs == 0.0) - { - return 0.0; - } - else - { - return 1.0 - fast::min(1.0, (1.0 - cb) / cs); - } - } -} - -static inline __attribute__((always_inline)) -float3 soft_light(thread const float3& cb, thread const float3& cs) -{ - float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25)); - return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float sat(thread const float3& c) -{ - return fast::max(c.x, fast::max(c.y, c.z)) - fast::min(c.x, fast::min(c.y, c.z)); -} - -static inline __attribute__((always_inline)) -void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0; - cmax = 0.0; - } - cmin = 0.0; -} - -static inline __attribute__((always_inline)) -float3 set_sat(thread float3& c, thread const float& s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -static inline __attribute__((always_inline)) -float lum(thread const float3& c) -{ - float3 f = float3(0.300000011920928955078125, 0.589999973773956298828125, 0.10999999940395355224609375); - return dot(c, f); -} - -static inline __attribute__((always_inline)) -float3 clip_color(thread float3& c) -{ - float3 param = c; - float L = lum(param); - float n = fast::min(c.x, fast::min(c.y, c.z)); - float x = fast::max(c.x, fast::max(c.y, c.z)); - if (n < 0.0) - { - c = float3(L) + (((c - float3(L)) * L) / float3(L - n)); - } - if (x > 1.0) - { - c = float3(L) + (((c - float3(L)) * (1.0 - L)) / float3(x - L)); - } - return c; -} - -static inline __attribute__((always_inline)) -float3 set_lum(thread const float3& c, thread const float& l) -{ - float3 param = c; - float3 param_1 = c + float3(l - lum(param)); - float3 _1048 = clip_color(param_1); - return _1048; -} - -static inline __attribute__((always_inline)) -float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const uint& mode) -{ - float3 b = float3(0.0); - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = fast::min(cb, cs); - break; - } - case 5u: - { - b = fast::max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -static inline __attribute__((always_inline)) -float4 mix_compose(thread const float3& cb, thread const float3& cs, thread const float& ab, thread const float& as, thread const uint& mode) -{ - float fa = 0.0; - float fb = 0.0; - switch (mode) - { - case 1u: - { - fa = 1.0; - fb = 0.0; - break; - } - case 2u: - { - fa = 0.0; - fb = 1.0; - break; - } - case 3u: - { - fa = 1.0; - fb = 1.0 - as; - break; - } - case 4u: - { - fa = 1.0 - ab; - fb = 1.0; - break; - } - case 5u: - { - fa = ab; - fb = 0.0; - break; - } - case 6u: - { - fa = 0.0; - fb = as; - break; - } - case 7u: - { - fa = 1.0 - ab; - fb = 0.0; - break; - } - case 8u: - { - fa = 0.0; - fb = 1.0 - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0 - as; - break; - } - case 10u: - { - fa = 1.0 - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0 - ab; - fb = 1.0 - as; - break; - } - case 12u: - { - fa = 1.0; - fb = 1.0; - break; - } - case 13u: - { - return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -static inline __attribute__((always_inline)) -float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0 - src.w)) + src; - } - float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = mix(cs, blended, float3(backdrop.w)); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = mix(backdrop.xyz, cs, float3(src.w)); - return float4(co, src.w + (backdrop.w * (1.0 - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdJump s; - s.new_ref = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_297); -} - -kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d image [[texture(3)]], texture2d image_atlas [[texture(4)]], texture2d gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; - Alloc param; - param.offset = _1681.conf.ptcl_alloc.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint blend_offset = v_297.memory[cmd_ref.offset >> uint(2)]; - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = float4(0.0); - } - uint clip_depth = 0u; - spvUnsafeArray df; - TileSegRef tile_seg_ref; - spvUnsafeArray area; - spvUnsafeArray, 4> blend_stack; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_297).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0; - } - tile_seg_ref = TileSegRef{ stroke.tile_ref }; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_297); - float2 line_vec = seg.vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + float2(0.5)) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = fast::clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); - df[k_1] = fast::min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = fast::clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - tile_seg_ref = TileSegRef{ fill.tile_ref }; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1.vector; - float2 window = fast::clamp(float2(start.y, end.y), float2(0.0), float2(1.0)); - if ((isunordered(window.x, window.y) || window.x != window.y)) - { - float2 t_1 = (window - float2(start.y)) / float2(seg_1.vector.y); - float2 xs = float2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y)); - float xmin = fast::min(fast::min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07; - float xmax = fast::max(xs.x, xs.y); - float b = fast::min(xmax, 1.0); - float c = fast::max(b, 0.0); - float d = fast::max(xmin, 0.0); - float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1.vector.x) * fast::clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = fast::min(abs(area[k_5]), 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_297); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); - float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); - float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - spvUnsafeArray img; - img = fillImage(param_36, param_37, image_atlas); - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = float4(0.0); - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.blend_mem[base_ix + k_13] = _2519; - rgba[k_13] = float4(0.0); - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41, v_297); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.blend_mem[base_ix_1 + k_14]; - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_297).new_ref }; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_49), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); - } -} - diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv deleted file mode 100644 index f9198c3..0000000 Binary files a/piet-gpu/shader/gen/kernel4.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil deleted file mode 100644 index bacd925..0000000 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl deleted file mode 100644 index 392d1f3..0000000 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ /dev/null @@ -1,1303 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 _vector; - float y_edge; - TileSegRef next; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); - -RWByteAddressBuffer _297 : register(u0, space0); -ByteAddressBuffer _1681 : register(t1, space0); -RWByteAddressBuffer _2506 : register(u2, space0); -RWTexture2D image_atlas : register(u4, space0); -RWTexture2D gradients : register(u5, space0); -RWTexture2D image : register(u3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -uint spvPackUnorm4x8(float4 value) -{ - uint4 Packed = uint4(round(saturate(value) * 255.0)); - return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); -} - -float4 spvUnpackUnorm4x8(uint value) -{ - uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); - return float4(Packed) / 255.0; -} - -Alloc slice_mem(Alloc a, uint offset, uint size) -{ - Alloc _310 = { a.offset + offset }; - return _310; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _297.Load(offset * 4 + 12); - return v; -} - -CmdTag Cmd_tag(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _669; -} - -CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = asfloat(raw1); - return s; -} - -CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) -{ - CmdStrokeRef _685 = { ref.offset + 4u }; - Alloc param = a; - CmdStrokeRef param_1 = _685; - return CmdStroke_read(param, param_1); -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -TileSeg TileSeg_read(Alloc a, TileSegRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - TileSeg s; - s.origin = float2(asfloat(raw0), asfloat(raw1)); - s._vector = float2(asfloat(raw2), asfloat(raw3)); - s.y_edge = asfloat(raw4); - TileSegRef _826 = { raw5 }; - s.next = _826; - return s; -} - -uint2 chunk_offset(uint i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -CmdFill CmdFill_read(Alloc a, CmdFillRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) -{ - CmdFillRef _675 = { ref.offset + 4u }; - Alloc param = a; - CmdFillRef param_1 = _675; - return CmdFill_read(param, param_1); -} - -CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdAlpha s; - s.alpha = asfloat(raw0); - return s; -} - -CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) -{ - CmdAlphaRef _695 = { ref.offset + 4u }; - Alloc param = a; - CmdAlphaRef param_1 = _695; - return CmdAlpha_read(param, param_1); -} - -CmdColor CmdColor_read(Alloc a, CmdColorRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -CmdColor Cmd_Color_read(Alloc a, CmdRef ref) -{ - CmdColorRef _705 = { ref.offset + 4u }; - Alloc param = a; - CmdColorRef param_1 = _705; - return CmdColor_read(param, param_1); -} - -float3 fromsRGB(float3 srgb) -{ - return srgb; -} - -float4 unpacksRGB(uint srgba) -{ - float4 color = spvUnpackUnorm4x8(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - CmdLinGrad s; - s.index = raw0; - s.line_x = asfloat(raw1); - s.line_y = asfloat(raw2); - s.line_c = asfloat(raw3); - return s; -} - -CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) -{ - CmdLinGradRef _715 = { ref.offset + 4u }; - Alloc param = a; - CmdLinGradRef param_1 = _715; - return CmdLinGrad_read(param, param_1); -} - -CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); - s.xlat = float2(asfloat(raw5), asfloat(raw6)); - s.c1 = float2(asfloat(raw7), asfloat(raw8)); - s.ra = asfloat(raw9); - s.roff = asfloat(raw10); - return s; -} - -CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) -{ - CmdRadGradRef _725 = { ref.offset + 4u }; - Alloc param = a; - CmdRadGradRef param_1 = _725; - return CmdRadGrad_read(param, param_1); -} - -CmdImage CmdImage_read(Alloc a, CmdImageRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -CmdImage Cmd_Image_read(Alloc a, CmdRef ref) -{ - CmdImageRef _735 = { ref.offset + 4u }; - Alloc param = a; - CmdImageRef param_1 = _735; - return CmdImage_read(param, param_1); -} - -void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) -{ - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas[uv]; - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - spvReturnValue = rgba; -} - -float3 tosRGB(float3 rgb) -{ - return rgb; -} - -uint packsRGB(inout float4 rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return spvPackUnorm4x8(rgba.wzyx); -} - -CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdEndClip s; - s.blend = raw0; - return s; -} - -CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) -{ - CmdEndClipRef _745 = { ref.offset + 4u }; - Alloc param = a; - CmdEndClipRef param_1 = _745; - return CmdEndClip_read(param, param_1); -} - -float3 screen(float3 cb, float3 cs) -{ - return (cb + cs) - (cb * cs); -} - -float3 hard_light(float3 cb, float3 cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0f) - 1.0f.xxx; - float3 _889 = screen(param, param_1); - float3 _893 = (cb * 2.0f) * cs; - bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z); -} - -float color_dodge(float cb, float cs) -{ - if (cb == 0.0f) - { - return 0.0f; - } - else - { - if (cs == 1.0f) - { - return 1.0f; - } - else - { - return min(1.0f, cb / (1.0f - cs)); - } - } -} - -float color_burn(float cb, float cs) -{ - if (cb == 1.0f) - { - return 1.0f; - } - else - { - if (cs == 0.0f) - { - return 0.0f; - } - else - { - return 1.0f - min(1.0f, (1.0f - cb) / cs); - } - } -} - -float3 soft_light(float3 cb, float3 cs) -{ - float3 _904 = sqrt(cb); - float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb; - bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z); - float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z); - float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)); - float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)); - bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z); -} - -float sat(float3 c) -{ - return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); -} - -void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0f; - cmax = 0.0f; - } - cmin = 0.0f; -} - -float3 set_sat(inout float3 c, float s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -float lum(float3 c) -{ - float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); - return dot(c, f); -} - -float3 clip_color(inout float3 c) -{ - float3 param = c; - float L = lum(param); - float n = min(c.x, min(c.y, c.z)); - float x = max(c.x, max(c.y, c.z)); - if (n < 0.0f) - { - c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); - } - if (x > 1.0f) - { - c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); - } - return c; -} - -float3 set_lum(float3 c, float l) -{ - float3 param = c; - float3 param_1 = c + (l - lum(param)).xxx; - float3 _1048 = clip_color(param_1); - return _1048; -} - -float3 mix_blend(float3 cb, float3 cs, uint mode) -{ - float3 b = 0.0f.xxx; - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = min(cb, cs); - break; - } - case 5u: - { - b = max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0f) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) -{ - float fa = 0.0f; - float fb = 0.0f; - switch (mode) - { - case 1u: - { - fa = 1.0f; - fb = 0.0f; - break; - } - case 2u: - { - fa = 0.0f; - fb = 1.0f; - break; - } - case 3u: - { - fa = 1.0f; - fb = 1.0f - as; - break; - } - case 4u: - { - fa = 1.0f - ab; - fb = 1.0f; - break; - } - case 5u: - { - fa = ab; - fb = 0.0f; - break; - } - case 6u: - { - fa = 0.0f; - fb = as; - break; - } - case 7u: - { - fa = 1.0f - ab; - fb = 0.0f; - break; - } - case 8u: - { - fa = 0.0f; - fb = 1.0f - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0f - as; - break; - } - case 10u: - { - fa = 1.0f - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0f - ab; - fb = 1.0f - as; - break; - } - case 12u: - { - fa = 1.0f; - fb = 1.0f; - break; - } - case 13u: - { - return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0f - src.w)) + src; - } - float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = lerp(cs, blended, backdrop.w.xxx); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = lerp(backdrop.xyz, cs, src.w.xxx); - return float4(co, src.w + (backdrop.w * (1.0f - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdJump s; - s.new_ref = raw0; - return s; -} - -CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) -{ - CmdJumpRef _755 = { ref.offset + 4u }; - Alloc param = a; - CmdJumpRef param_1 = _755; - return CmdJump_read(param, param_1); -} - -void comp_main() -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x; - Alloc _1696; - _1696.offset = _1681.Load(28); - Alloc param; - param.offset = _1696.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1705 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1705; - uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12); - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = 0.0f.xxxx; - } - uint clip_depth = 0u; - float df[8]; - TileSegRef tile_seg_ref; - float area[8]; - uint blend_stack[4][8]; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0f; - } - TileSegRef _1805 = { stroke.tile_ref }; - tile_seg_ref = _1805; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11); - float2 line_vec = seg._vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + 0.5f.xx) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0f, 1.0f); - df[k_1] = min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = clamp((stroke.half_width + 0.5f) - df[k_2], 0.0f, 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - TileSegRef _1924 = { fill.tile_ref }; - tile_seg_ref = _1924; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1._vector; - float2 window = clamp(float2(start.y, end.y), 0.0f.xx, 1.0f.xx); - if (window.x != window.y) - { - float2 t_1 = (window - start.y.xx) / seg_1._vector.y.xx; - float2 xs = float2(lerp(start.x, end.x, t_1.x), lerp(start.x, end.x, t_1.y)); - float xmin = min(min(xs.x, xs.y), 1.0f) - 9.9999999747524270787835121154785e-07f; - float xmax = max(xs.x, xs.y); - float b = min(xmax, 1.0f); - float c = max(b, 0.0f); - float d = max(xmin, 0.0f); - float a = ((b + (0.5f * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1._vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0f, 0.0f, 1.0f)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = min(abs(area[k_5]), 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0f; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0f - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba = gradients[int2(x, int(lin.index))]; - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - float4 _2410[8]; - fillImage(_2410, param_36, param_37); - float4 img[8] = _2410; - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = 0.0f.xxxx; - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.Store((base_ix + k_13) * 4 + 0, _2519); - rgba[k_13] = 0.0f.xxxx; - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0); - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2618; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x; - } -} - -[numthreads(8, 4, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl deleted file mode 100644 index 45e7a0e..0000000 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ /dev/null @@ -1,1354 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 vector; - float y_edge; - TileSegRef next; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct BlendBuf -{ - uint blend_mem[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); - -static inline __attribute__((always_inline)) -Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) -{ - return Alloc{ a.offset + offset }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_297.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_297); - return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = as_type(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - TileSeg s; - s.origin = float2(as_type(raw0), as_type(raw1)); - s.vector = float2(as_type(raw2), as_type(raw3)); - s.y_edge = as_type(raw4); - s.next = TileSegRef{ raw5 }; - return s; -} - -static inline __attribute__((always_inline)) -uint2 chunk_offset(thread const uint& i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdAlpha s; - s.alpha = as_type(raw0); - return s; -} - -static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 fromsRGB(thread const float3& srgb) -{ - return srgb; -} - -static inline __attribute__((always_inline)) -float4 unpacksRGB(thread const uint& srgba) -{ - float4 color = unpack_unorm4x8_to_float(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - CmdLinGrad s; - s.index = raw0; - s.line_x = as_type(raw1); - s.line_y = as_type(raw2); - s.line_c = as_type(raw3); - return s; -} - -static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_297); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_297); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_297); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_297); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_297); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); - s.xlat = float2(as_type(raw5), as_type(raw6)); - s.c1 = float2(as_type(raw7), as_type(raw8)); - s.ra = as_type(raw9); - s.roff = as_type(raw10); - return s; -} - -static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImage& cmd_img, texture2d image_atlas) -{ - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas.read(uint2(uv)); - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - return rgba; -} - -static inline __attribute__((always_inline)) -float3 tosRGB(thread const float3& rgb) -{ - return rgb; -} - -static inline __attribute__((always_inline)) -uint packsRGB(thread float4& rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return pack_float_to_unorm4x8(rgba.wzyx); -} - -static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdEndClip s; - s.blend = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 screen(thread const float3& cb, thread const float3& cs) -{ - return (cb + cs) - (cb * cs); -} - -static inline __attribute__((always_inline)) -float3 hard_light(thread const float3& cb, thread const float3& cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0) - float3(1.0); - return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float color_dodge(thread const float& cb, thread const float& cs) -{ - if (cb == 0.0) - { - return 0.0; - } - else - { - if (cs == 1.0) - { - return 1.0; - } - else - { - return fast::min(1.0, cb / (1.0 - cs)); - } - } -} - -static inline __attribute__((always_inline)) -float color_burn(thread const float& cb, thread const float& cs) -{ - if (cb == 1.0) - { - return 1.0; - } - else - { - if (cs == 0.0) - { - return 0.0; - } - else - { - return 1.0 - fast::min(1.0, (1.0 - cb) / cs); - } - } -} - -static inline __attribute__((always_inline)) -float3 soft_light(thread const float3& cb, thread const float3& cs) -{ - float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25)); - return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float sat(thread const float3& c) -{ - return fast::max(c.x, fast::max(c.y, c.z)) - fast::min(c.x, fast::min(c.y, c.z)); -} - -static inline __attribute__((always_inline)) -void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0; - cmax = 0.0; - } - cmin = 0.0; -} - -static inline __attribute__((always_inline)) -float3 set_sat(thread float3& c, thread const float& s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -static inline __attribute__((always_inline)) -float lum(thread const float3& c) -{ - float3 f = float3(0.300000011920928955078125, 0.589999973773956298828125, 0.10999999940395355224609375); - return dot(c, f); -} - -static inline __attribute__((always_inline)) -float3 clip_color(thread float3& c) -{ - float3 param = c; - float L = lum(param); - float n = fast::min(c.x, fast::min(c.y, c.z)); - float x = fast::max(c.x, fast::max(c.y, c.z)); - if (n < 0.0) - { - c = float3(L) + (((c - float3(L)) * L) / float3(L - n)); - } - if (x > 1.0) - { - c = float3(L) + (((c - float3(L)) * (1.0 - L)) / float3(x - L)); - } - return c; -} - -static inline __attribute__((always_inline)) -float3 set_lum(thread const float3& c, thread const float& l) -{ - float3 param = c; - float3 param_1 = c + float3(l - lum(param)); - float3 _1048 = clip_color(param_1); - return _1048; -} - -static inline __attribute__((always_inline)) -float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const uint& mode) -{ - float3 b = float3(0.0); - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = fast::min(cb, cs); - break; - } - case 5u: - { - b = fast::max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -static inline __attribute__((always_inline)) -float4 mix_compose(thread const float3& cb, thread const float3& cs, thread const float& ab, thread const float& as, thread const uint& mode) -{ - float fa = 0.0; - float fb = 0.0; - switch (mode) - { - case 1u: - { - fa = 1.0; - fb = 0.0; - break; - } - case 2u: - { - fa = 0.0; - fb = 1.0; - break; - } - case 3u: - { - fa = 1.0; - fb = 1.0 - as; - break; - } - case 4u: - { - fa = 1.0 - ab; - fb = 1.0; - break; - } - case 5u: - { - fa = ab; - fb = 0.0; - break; - } - case 6u: - { - fa = 0.0; - fb = as; - break; - } - case 7u: - { - fa = 1.0 - ab; - fb = 0.0; - break; - } - case 8u: - { - fa = 0.0; - fb = 1.0 - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0 - as; - break; - } - case 10u: - { - fa = 1.0 - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0 - ab; - fb = 1.0 - as; - break; - } - case 12u: - { - fa = 1.0; - fb = 1.0; - break; - } - case 13u: - { - return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -static inline __attribute__((always_inline)) -float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0 - src.w)) + src; - } - float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = mix(cs, blended, float3(backdrop.w)); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = mix(backdrop.xyz, cs, float3(src.w)); - return float4(co, src.w + (backdrop.w * (1.0 - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdJump s; - s.new_ref = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_297); -} - -kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d image [[texture(3)]], texture2d image_atlas [[texture(4)]], texture2d gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; - Alloc param; - param.offset = _1681.conf.ptcl_alloc.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint blend_offset = v_297.memory[cmd_ref.offset >> uint(2)]; - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = float4(0.0); - } - uint clip_depth = 0u; - spvUnsafeArray df; - TileSegRef tile_seg_ref; - spvUnsafeArray area; - spvUnsafeArray, 4> blend_stack; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_297).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0; - } - tile_seg_ref = TileSegRef{ stroke.tile_ref }; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_297); - float2 line_vec = seg.vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + float2(0.5)) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = fast::clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); - df[k_1] = fast::min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = fast::clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - tile_seg_ref = TileSegRef{ fill.tile_ref }; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1.vector; - float2 window = fast::clamp(float2(start.y, end.y), float2(0.0), float2(1.0)); - if ((isunordered(window.x, window.y) || window.x != window.y)) - { - float2 t_1 = (window - float2(start.y)) / float2(seg_1.vector.y); - float2 xs = float2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y)); - float xmin = fast::min(fast::min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07; - float xmax = fast::max(xs.x, xs.y); - float b = fast::min(xmax, 1.0); - float c = fast::max(b, 0.0); - float d = fast::max(xmin, 0.0); - float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1.vector.x) * fast::clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = fast::min(abs(area[k_5]), 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_297); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); - float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); - float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - spvUnsafeArray img; - img = fillImage(param_36, param_37, image_atlas); - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = float4(0.0); - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.blend_mem[base_ix + k_13] = _2519; - rgba[k_13] = float4(0.0); - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41, v_297); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.blend_mem[base_ix_1 + k_14]; - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_297).new_ref }; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); - } -} - diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv deleted file mode 100644 index df86875..0000000 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil deleted file mode 100644 index 2842f0d..0000000 Binary files a/piet-gpu/shader/gen/path_coarse.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/path_coarse.hlsl b/piet-gpu/shader/gen/path_coarse.hlsl deleted file mode 100644 index 106fdfc..0000000 --- a/piet-gpu/shader/gen/path_coarse.hlsl +++ /dev/null @@ -1,673 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct PathSegTag -{ - uint tag; - uint flags; -}; - -struct TileRef -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 _vector; - float y_edge; - TileSegRef next; -}; - -struct SubdivResult -{ - float val; - float a0; - float a2; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u); - -static const PathSegTag _722 = { 0u, 0u }; - -RWByteAddressBuffer _143 : register(u0, space0); -ByteAddressBuffer _711 : register(t1, space0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -static bool mem_ok; - -bool check_deps(uint dep_stage) -{ - uint _149; - _143.InterlockedOr(4, 0u, _149); - return (_149 & dep_stage) == 0u; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _143.Load(offset * 4 + 12); - return v; -} - -PathSegTag PathSeg_tag(Alloc a, PathSegRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - PathSegTag _362 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _362; -} - -PathCubic PathCubic_read(Alloc a, PathCubicRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21); - Alloc param_22 = a; - uint param_23 = ix + 11u; - uint raw11 = read_mem(param_22, param_23); - PathCubic s; - s.p0 = float2(asfloat(raw0), asfloat(raw1)); - s.p1 = float2(asfloat(raw2), asfloat(raw3)); - s.p2 = float2(asfloat(raw4), asfloat(raw5)); - s.p3 = float2(asfloat(raw6), asfloat(raw7)); - s.path_ix = raw8; - s.trans_ix = raw9; - s.stroke = float2(asfloat(raw10), asfloat(raw11)); - return s; -} - -PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref) -{ - PathCubicRef _368 = { ref.offset + 4u }; - Alloc param = a; - PathCubicRef param_1 = _368; - return PathCubic_read(param, param_1); -} - -float2 eval_cubic(float2 p0, float2 p1, float2 p2, float2 p3, float t) -{ - float mt = 1.0f - t; - return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0f)) + (((p2 * (mt * 3.0f)) + (p3 * t)) * t)) * t); -} - -float approx_parabola_integral(float x) -{ - return x * rsqrt(sqrt(0.3300000131130218505859375f + (0.201511204242706298828125f + ((0.25f * x) * x)))); -} - -SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol) -{ - float2 d01 = p1 - p0; - float2 d12 = p2 - p1; - float2 dd = d01 - d12; - float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x); - float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross; - float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross; - float scale = abs(_cross / (length(dd) * (x2 - x0))); - float param = x0; - float a0 = approx_parabola_integral(param); - float param_1 = x2; - float a2 = approx_parabola_integral(param_1); - float val = 0.0f; - if (scale < 1000000000.0f) - { - float da = abs(a2 - a0); - float sqrt_scale = sqrt(scale); - if (sign(x0) == sign(x2)) - { - val = da * sqrt_scale; - } - else - { - float xmin = sqrt_tol / sqrt_scale; - float param_2 = xmin; - val = (sqrt_tol * da) / approx_parabola_integral(param_2); - } - } - SubdivResult _690 = { val, a0, a2 }; - return _690; -} - -uint fill_mode_from_flags(uint flags) -{ - return flags & 1u; -} - -Path Path_read(Alloc a, PathRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _422 = { raw2 }; - s.tiles = _422; - return s; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok_1) -{ - Alloc a; - a.offset = offset; - return a; -} - -float approx_parabola_inv_integral(float x) -{ - return x * sqrt(0.61000001430511474609375f + (0.1520999968051910400390625f + ((0.25f * x) * x))); -} - -float2 eval_quad(float2 p0, float2 p1, float2 p2, float t) -{ - float mt = 1.0f - t; - return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t); -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _158; - _143.InterlockedAdd(0, size, _158); - uint offset = _158; - if ((offset + size) > mem_size) - { - uint _168; - _143.InterlockedOr(4, stage, _168); - offset = 0u; - } - return offset; -} - -TileRef Tile_index(TileRef ref, uint index) -{ - TileRef _380 = { ref.offset + (index * 8u) }; - return _380; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _143.Store(offset * 4 + 12, val); -} - -void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.origin.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.origin.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s._vector.x); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s._vector.y); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.y_edge); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.next.offset; - write_mem(param_15, param_16, param_17); -} - -void comp_main() -{ - mem_ok = true; - uint param = 7u; - bool _694 = check_deps(param); - if (!_694) - { - return; - } - uint element_ix = gl_GlobalInvocationID.x; - PathSegRef _719 = { _711.Load(32) + (element_ix * 52u) }; - PathSegRef ref = _719; - PathSegTag tag = _722; - if (element_ix < _711.Load(8)) - { - Alloc _732; - _732.offset = _711.Load(32); - Alloc param_1; - param_1.offset = _732.offset; - PathSegRef param_2 = ref; - tag = PathSeg_tag(param_1, param_2); - } - switch (tag.tag) - { - case 1u: - { - Alloc _745; - _745.offset = _711.Load(32); - Alloc param_3; - param_3.offset = _745.offset; - PathSegRef param_4 = ref; - PathCubic cubic = PathSeg_Cubic_read(param_3, param_4); - float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3; - float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); - uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u); - n_quads = min(n_quads, 16u); - float val = 0.0f; - float2 qp0 = cubic.p0; - float _step = 1.0f / float(n_quads); - SubdivResult keep_params[16]; - for (uint i = 0u; i < n_quads; i++) - { - float t = float(i + 1u) * _step; - float2 param_5 = cubic.p0; - float2 param_6 = cubic.p1; - float2 param_7 = cubic.p2; - float2 param_8 = cubic.p3; - float param_9 = t; - float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9); - float2 param_10 = cubic.p0; - float2 param_11 = cubic.p1; - float2 param_12 = cubic.p2; - float2 param_13 = cubic.p3; - float param_14 = t - (0.5f * _step); - float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14); - qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f); - float2 param_15 = qp0; - float2 param_16 = qp1; - float2 param_17 = qp2; - float param_18 = 0.4743416607379913330078125f; - SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18); - keep_params[i] = params; - val += params.val; - qp0 = qp2; - } - uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u); - uint param_19 = tag.flags; - bool is_stroke = fill_mode_from_flags(param_19) == 1u; - uint path_ix = cubic.path_ix; - PathRef _901 = { _711.Load(20) + (path_ix * 12u) }; - Alloc _904; - _904.offset = _711.Load(20); - Alloc param_20; - param_20.offset = _904.offset; - PathRef param_21 = _901; - Path path = Path_read(param_20, param_21); - uint param_22 = path.tiles.offset; - uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_24 = true; - Alloc path_alloc = new_alloc(param_22, param_23, param_24); - int4 bbox = int4(path.bbox); - float2 p0 = cubic.p0; - qp0 = cubic.p0; - float v_step = val / float(n); - int n_out = 1; - float val_sum = 0.0f; - float2 p1; - float _1143; - TileSeg tile_seg; - for (uint i_1 = 0u; i_1 < n_quads; i_1++) - { - float t_1 = float(i_1 + 1u) * _step; - float2 param_25 = cubic.p0; - float2 param_26 = cubic.p1; - float2 param_27 = cubic.p2; - float2 param_28 = cubic.p3; - float param_29 = t_1; - float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29); - float2 param_30 = cubic.p0; - float2 param_31 = cubic.p1; - float2 param_32 = cubic.p2; - float2 param_33 = cubic.p3; - float param_34 = t_1 - (0.5f * _step); - float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34); - qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f); - SubdivResult params_1 = keep_params[i_1]; - float param_35 = params_1.a0; - float u0 = approx_parabola_inv_integral(param_35); - float param_36 = params_1.a2; - float u2 = approx_parabola_inv_integral(param_36); - float uscale = 1.0f / (u2 - u0); - float target = float(n_out) * v_step; - for (;;) - { - bool _1036 = uint(n_out) == n; - bool _1046; - if (!_1036) - { - _1046 = target < (val_sum + params_1.val); - } - else - { - _1046 = _1036; - } - if (_1046) - { - if (uint(n_out) == n) - { - p1 = cubic.p3; - } - else - { - float u = (target - val_sum) / params_1.val; - float a = lerp(params_1.a0, params_1.a2, u); - float param_37 = a; - float au = approx_parabola_inv_integral(param_37); - float t_2 = (au - u0) * uscale; - float2 param_38 = qp0; - float2 param_39 = qp1_1; - float2 param_40 = qp2_1; - float param_41 = t_2; - p1 = eval_quad(param_38, param_39, param_40, param_41); - } - float xmin = min(p0.x, p1.x) - cubic.stroke.x; - float xmax = max(p0.x, p1.x) + cubic.stroke.x; - float ymin = min(p0.y, p1.y) - cubic.stroke.y; - float ymax = max(p0.y, p1.y) + cubic.stroke.y; - float dx = p1.x - p0.x; - float dy = p1.y - p0.y; - if (abs(dy) < 9.999999717180685365747194737196e-10f) - { - _1143 = 1000000000.0f; - } - else - { - _1143 = dx / dy; - } - float invslope = _1143; - float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f; - float b = invslope; - float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f; - int x0 = int(floor(xmin * 0.0625f)); - int x1 = int(floor(xmax * 0.0625f) + 1.0f); - int y0 = int(floor(ymin * 0.0625f)); - int y1 = int(floor(ymax * 0.0625f) + 1.0f); - x0 = clamp(x0, bbox.x, bbox.z); - y0 = clamp(y0, bbox.y, bbox.w); - x1 = clamp(x1, bbox.x, bbox.z); - y1 = clamp(y1, bbox.y, bbox.w); - float xc = a_1 + (b * float(y0)); - int stride = bbox.z - bbox.x; - int base = ((y0 - bbox.y) * stride) - bbox.x; - uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); - uint malloc_size = n_tile_alloc * 24u; - uint param_42 = malloc_size; - uint param_43 = _711.Load(0); - uint param_44 = 4u; - uint _1265 = malloc_stage(param_42, param_43, param_44); - uint tile_offset = _1265; - if (tile_offset == 0u) - { - mem_ok = false; - } - uint param_45 = tile_offset; - uint param_46 = malloc_size; - bool param_47 = true; - Alloc tile_alloc = new_alloc(param_45, param_46, param_47); - int xray = int(floor(p0.x * 0.0625f)); - int last_xray = int(floor(p1.x * 0.0625f)); - if (p0.y > p1.y) - { - int tmp = xray; - xray = last_xray; - last_xray = tmp; - } - for (int y = y0; y < y1; y++) - { - float tile_y0 = float(y * 16); - int xbackdrop = max((xray + 1), bbox.x); - bool _1322 = !is_stroke; - bool _1332; - if (_1322) - { - _1332 = min(p0.y, p1.y) < tile_y0; - } - else - { - _1332 = _1322; - } - bool _1339; - if (_1332) - { - _1339 = xbackdrop < bbox.z; - } - else - { - _1339 = _1332; - } - if (_1339) - { - int backdrop = (p1.y < p0.y) ? 1 : (-1); - TileRef param_48 = path.tiles; - uint param_49 = uint(base + xbackdrop); - TileRef tile_ref = Tile_index(param_48, param_49); - uint tile_el = tile_ref.offset >> uint(2); - uint _1369; - _143.InterlockedAdd((tile_el + 1u) * 4 + 12, uint(backdrop), _1369); - } - int next_xray = last_xray; - if (y < (y1 - 1)) - { - float tile_y1 = float((y + 1) * 16); - float x_edge = lerp(p0.x, p1.x, (tile_y1 - p0.y) / dy); - next_xray = int(floor(x_edge * 0.0625f)); - } - int min_xray = min(xray, next_xray); - int max_xray = max(xray, next_xray); - int xx0 = min(int(floor(xc - c)), min_xray); - int xx1 = max(int(ceil(xc + c)), (max_xray + 1)); - xx0 = clamp(xx0, x0, x1); - xx1 = clamp(xx1, x0, x1); - for (int x = xx0; x < xx1; x++) - { - float tile_x0 = float(x * 16); - TileRef _1449 = { path.tiles.offset }; - TileRef param_50 = _1449; - uint param_51 = uint(base + x); - TileRef tile_ref_1 = Tile_index(param_50, param_51); - uint tile_el_1 = tile_ref_1.offset >> uint(2); - uint old = 0u; - uint _1465; - _143.InterlockedExchange(tile_el_1 * 4 + 12, tile_offset, _1465); - old = _1465; - tile_seg.origin = p0; - tile_seg._vector = p1 - p0; - float y_edge = 0.0f; - if (!is_stroke) - { - y_edge = lerp(p0.y, p1.y, (tile_x0 - p0.x) / dx); - if (min(p0.x, p1.x) < tile_x0) - { - float2 p = float2(tile_x0, y_edge); - if (p0.x > p1.x) - { - tile_seg._vector = p - p0; - } - else - { - tile_seg.origin = p; - tile_seg._vector = p1 - p; - } - if (tile_seg._vector.x == 0.0f) - { - tile_seg._vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10f; - } - } - if ((x <= min_xray) || (max_xray < x)) - { - y_edge = 1000000000.0f; - } - } - tile_seg.y_edge = y_edge; - tile_seg.next.offset = old; - if (mem_ok) - { - TileSegRef _1550 = { tile_offset }; - Alloc param_52 = tile_alloc; - TileSegRef param_53 = _1550; - TileSeg param_54 = tile_seg; - TileSeg_write(param_52, param_53, param_54); - } - tile_offset += 24u; - } - xc += b; - base += stride; - xray = next_xray; - } - n_out++; - target += v_step; - p0 = p1; - continue; - } - else - { - break; - } - } - val_sum += params_1.val; - qp0 = qp2_1; - } - break; - } - } -} - -[numthreads(32, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/path_coarse.msl b/piet-gpu/shader/gen/path_coarse.msl deleted file mode 100644 index 4f59b3f..0000000 --- a/piet-gpu/shader/gen/path_coarse.msl +++ /dev/null @@ -1,718 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct PathSegTag -{ - uint tag; - uint flags; -}; - -struct TileRef -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 vector; - float y_edge; - TileSegRef next; -}; - -struct SubdivResult -{ - float val; - float a0; - float a2; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_143) -{ - uint _149 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, 0u, memory_order_relaxed); - return (_149 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_143) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_143.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_143); - return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_143) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_143); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_143); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_143); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_143); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_143); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_143); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_143); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_143); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_143); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_143); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_143); - Alloc param_22 = a; - uint param_23 = ix + 11u; - uint raw11 = read_mem(param_22, param_23, v_143); - PathCubic s; - s.p0 = float2(as_type(raw0), as_type(raw1)); - s.p1 = float2(as_type(raw2), as_type(raw3)); - s.p2 = float2(as_type(raw4), as_type(raw5)); - s.p3 = float2(as_type(raw6), as_type(raw7)); - s.path_ix = raw8; - s.trans_ix = raw9; - s.stroke = float2(as_type(raw10), as_type(raw11)); - return s; -} - -static inline __attribute__((always_inline)) -PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143) -{ - Alloc param = a; - PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u }; - return PathCubic_read(param, param_1, v_143); -} - -static inline __attribute__((always_inline)) -float2 eval_cubic(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float2& p3, thread const float& t) -{ - float mt = 1.0 - t; - return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t); -} - -static inline __attribute__((always_inline)) -float approx_parabola_integral(thread const float& x) -{ - return x * rsqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x)))); -} - -static inline __attribute__((always_inline)) -SubdivResult estimate_subdiv(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& sqrt_tol) -{ - float2 d01 = p1 - p0; - float2 d12 = p2 - p1; - float2 dd = d01 - d12; - float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x); - float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross; - float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross; - float scale = abs(_cross / (length(dd) * (x2 - x0))); - float param = x0; - float a0 = approx_parabola_integral(param); - float param_1 = x2; - float a2 = approx_parabola_integral(param_1); - float val = 0.0; - if (scale < 1000000000.0) - { - float da = abs(a2 - a0); - float sqrt_scale = sqrt(scale); - if (sign(x0) == sign(x2)) - { - val = da * sqrt_scale; - } - else - { - float xmin = sqrt_tol / sqrt_scale; - float param_2 = xmin; - val = (sqrt_tol * da) / approx_parabola_integral(param_2); - } - } - return SubdivResult{ val, a0, a2 }; -} - -static inline __attribute__((always_inline)) -uint fill_mode_from_flags(thread const uint& flags) -{ - return flags & 1u; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_143) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_143); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_143); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_143); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -float approx_parabola_inv_integral(thread const float& x) -{ - return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x))); -} - -static inline __attribute__((always_inline)) -float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& t) -{ - float mt = 1.0 - t; - return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t); -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_143) -{ - uint _158 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.mem_offset, size, memory_order_relaxed); - uint offset = _158; - if ((offset + size) > mem_size) - { - uint _168 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -TileRef Tile_index(thread const TileRef& ref, thread const uint& index) -{ - return TileRef{ ref.offset + (index * 8u) }; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_143) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_143.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_143) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.origin.x); - write_mem(param, param_1, param_2, v_143); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.origin.y); - write_mem(param_3, param_4, param_5, v_143); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.vector.x); - write_mem(param_6, param_7, param_8, v_143); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.vector.y); - write_mem(param_9, param_10, param_11, v_143); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.y_edge); - write_mem(param_12, param_13, param_14, v_143); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.next.offset; - write_mem(param_15, param_16, param_17, v_143); -} - -kernel void main0(device Memory& v_143 [[buffer(0)]], const device ConfigBuf& _711 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - bool mem_ok = true; - uint param = 7u; - bool _694 = check_deps(param, v_143); - if (!_694) - { - return; - } - uint element_ix = gl_GlobalInvocationID.x; - PathSegRef ref = PathSegRef{ _711.conf.pathseg_alloc.offset + (element_ix * 52u) }; - PathSegTag tag = PathSegTag{ 0u, 0u }; - if (element_ix < _711.conf.n_pathseg) - { - Alloc param_1; - param_1.offset = _711.conf.pathseg_alloc.offset; - PathSegRef param_2 = ref; - tag = PathSeg_tag(param_1, param_2, v_143); - } - switch (tag.tag) - { - case 1u: - { - Alloc param_3; - param_3.offset = _711.conf.pathseg_alloc.offset; - PathSegRef param_4 = ref; - PathCubic cubic = PathSeg_Cubic_read(param_3, param_4, v_143); - float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3; - float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); - uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u); - n_quads = min(n_quads, 16u); - float val = 0.0; - float2 qp0 = cubic.p0; - float _step = 1.0 / float(n_quads); - spvUnsafeArray keep_params; - for (uint i = 0u; i < n_quads; i++) - { - float t = float(i + 1u) * _step; - float2 param_5 = cubic.p0; - float2 param_6 = cubic.p1; - float2 param_7 = cubic.p2; - float2 param_8 = cubic.p3; - float param_9 = t; - float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9); - float2 param_10 = cubic.p0; - float2 param_11 = cubic.p1; - float2 param_12 = cubic.p2; - float2 param_13 = cubic.p3; - float param_14 = t - (0.5 * _step); - float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14); - qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5); - float2 param_15 = qp0; - float2 param_16 = qp1; - float2 param_17 = qp2; - float param_18 = 0.4743416607379913330078125; - SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18); - keep_params[i] = params; - val += params.val; - qp0 = qp2; - } - uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u); - uint param_19 = tag.flags; - bool is_stroke = fill_mode_from_flags(param_19) == 1u; - uint path_ix = cubic.path_ix; - Alloc param_20; - param_20.offset = _711.conf.tile_alloc.offset; - PathRef param_21 = PathRef{ _711.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_20, param_21, v_143); - uint param_22 = path.tiles.offset; - uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_24 = true; - Alloc path_alloc = new_alloc(param_22, param_23, param_24); - int4 bbox = int4(path.bbox); - float2 p0 = cubic.p0; - qp0 = cubic.p0; - float v_step = val / float(n); - int n_out = 1; - float val_sum = 0.0; - float2 p1; - float _1143; - TileSeg tile_seg; - for (uint i_1 = 0u; i_1 < n_quads; i_1++) - { - float t_1 = float(i_1 + 1u) * _step; - float2 param_25 = cubic.p0; - float2 param_26 = cubic.p1; - float2 param_27 = cubic.p2; - float2 param_28 = cubic.p3; - float param_29 = t_1; - float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29); - float2 param_30 = cubic.p0; - float2 param_31 = cubic.p1; - float2 param_32 = cubic.p2; - float2 param_33 = cubic.p3; - float param_34 = t_1 - (0.5 * _step); - float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34); - qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5); - SubdivResult params_1 = keep_params[i_1]; - float param_35 = params_1.a0; - float u0 = approx_parabola_inv_integral(param_35); - float param_36 = params_1.a2; - float u2 = approx_parabola_inv_integral(param_36); - float uscale = 1.0 / (u2 - u0); - float target = float(n_out) * v_step; - for (;;) - { - bool _1036 = uint(n_out) == n; - bool _1046; - if (!_1036) - { - _1046 = target < (val_sum + params_1.val); - } - else - { - _1046 = _1036; - } - if (_1046) - { - if (uint(n_out) == n) - { - p1 = cubic.p3; - } - else - { - float u = (target - val_sum) / params_1.val; - float a = mix(params_1.a0, params_1.a2, u); - float param_37 = a; - float au = approx_parabola_inv_integral(param_37); - float t_2 = (au - u0) * uscale; - float2 param_38 = qp0; - float2 param_39 = qp1_1; - float2 param_40 = qp2_1; - float param_41 = t_2; - p1 = eval_quad(param_38, param_39, param_40, param_41); - } - float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x; - float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x; - float ymin = fast::min(p0.y, p1.y) - cubic.stroke.y; - float ymax = fast::max(p0.y, p1.y) + cubic.stroke.y; - float dx = p1.x - p0.x; - float dy = p1.y - p0.y; - if (abs(dy) < 9.999999717180685365747194737196e-10) - { - _1143 = 1000000000.0; - } - else - { - _1143 = dx / dy; - } - float invslope = _1143; - float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625; - float b = invslope; - float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625; - int x0 = int(floor(xmin * 0.0625)); - int x1 = int(floor(xmax * 0.0625) + 1.0); - int y0 = int(floor(ymin * 0.0625)); - int y1 = int(floor(ymax * 0.0625) + 1.0); - x0 = clamp(x0, bbox.x, bbox.z); - y0 = clamp(y0, bbox.y, bbox.w); - x1 = clamp(x1, bbox.x, bbox.z); - y1 = clamp(y1, bbox.y, bbox.w); - float xc = a_1 + (b * float(y0)); - int stride = bbox.z - bbox.x; - int base = ((y0 - bbox.y) * stride) - bbox.x; - uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); - uint malloc_size = n_tile_alloc * 24u; - uint param_42 = malloc_size; - uint param_43 = _711.conf.mem_size; - uint param_44 = 4u; - uint _1265 = malloc_stage(param_42, param_43, param_44, v_143); - uint tile_offset = _1265; - if (tile_offset == 0u) - { - mem_ok = false; - } - uint param_45 = tile_offset; - uint param_46 = malloc_size; - bool param_47 = true; - Alloc tile_alloc = new_alloc(param_45, param_46, param_47); - int xray = int(floor(p0.x * 0.0625)); - int last_xray = int(floor(p1.x * 0.0625)); - if (p0.y > p1.y) - { - int tmp = xray; - xray = last_xray; - last_xray = tmp; - } - for (int y = y0; y < y1; y++) - { - float tile_y0 = float(y * 16); - int xbackdrop = max((xray + 1), bbox.x); - bool _1322 = !is_stroke; - bool _1332; - if (_1322) - { - _1332 = fast::min(p0.y, p1.y) < tile_y0; - } - else - { - _1332 = _1322; - } - bool _1339; - if (_1332) - { - _1339 = xbackdrop < bbox.z; - } - else - { - _1339 = _1332; - } - if (_1339) - { - int backdrop = (p1.y < p0.y) ? 1 : (-1); - TileRef param_48 = path.tiles; - uint param_49 = uint(base + xbackdrop); - TileRef tile_ref = Tile_index(param_48, param_49); - uint tile_el = tile_ref.offset >> uint(2); - uint _1369 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed); - } - int next_xray = last_xray; - if (y < (y1 - 1)) - { - float tile_y1 = float((y + 1) * 16); - float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy); - next_xray = int(floor(x_edge * 0.0625)); - } - int min_xray = min(xray, next_xray); - int max_xray = max(xray, next_xray); - int xx0 = min(int(floor(xc - c)), min_xray); - int xx1 = max(int(ceil(xc + c)), (max_xray + 1)); - xx0 = clamp(xx0, x0, x1); - xx1 = clamp(xx1, x0, x1); - for (int x = xx0; x < xx1; x++) - { - float tile_x0 = float(x * 16); - TileRef param_50 = TileRef{ path.tiles.offset }; - uint param_51 = uint(base + x); - TileRef tile_ref_1 = Tile_index(param_50, param_51); - uint tile_el_1 = tile_ref_1.offset >> uint(2); - uint old = 0u; - uint _1465 = atomic_exchange_explicit((device atomic_uint*)&v_143.memory[tile_el_1], tile_offset, memory_order_relaxed); - old = _1465; - tile_seg.origin = p0; - tile_seg.vector = p1 - p0; - float y_edge = 0.0; - if (!is_stroke) - { - y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx); - if (fast::min(p0.x, p1.x) < tile_x0) - { - float2 p = float2(tile_x0, y_edge); - if (p0.x > p1.x) - { - tile_seg.vector = p - p0; - } - else - { - tile_seg.origin = p; - tile_seg.vector = p1 - p; - } - if (tile_seg.vector.x == 0.0) - { - tile_seg.vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10; - } - } - if ((x <= min_xray) || (max_xray < x)) - { - y_edge = 1000000000.0; - } - } - tile_seg.y_edge = y_edge; - tile_seg.next.offset = old; - if (mem_ok) - { - Alloc param_52 = tile_alloc; - TileSegRef param_53 = TileSegRef{ tile_offset }; - TileSeg param_54 = tile_seg; - TileSeg_write(param_52, param_53, param_54, v_143); - } - tile_offset += 24u; - } - xc += b; - base += stride; - xray = next_xray; - } - n_out++; - target += v_step; - p0 = p1; - continue; - } - else - { - break; - } - } - val_sum += params_1.val; - qp0 = qp2_1; - } - break; - } - } -} - diff --git a/piet-gpu/shader/gen/path_coarse.spv b/piet-gpu/shader/gen/path_coarse.spv deleted file mode 100644 index bd32fc2..0000000 Binary files a/piet-gpu/shader/gen/path_coarse.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil deleted file mode 100644 index c498755..0000000 Binary files a/piet-gpu/shader/gen/pathseg.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/pathseg.hlsl b/piet-gpu/shader/gen/pathseg.hlsl deleted file mode 100644 index 4e9a0ae..0000000 --- a/piet-gpu/shader/gen/pathseg.hlsl +++ /dev/null @@ -1,662 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct Monoid -{ - float4 bbox; - uint flags; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const TagMonoid _135 = { 0u, 0u, 0u, 0u, 0u }; -static const Monoid _567 = { 0.0f.xxxx, 0u }; - -RWByteAddressBuffer _111 : register(u0, space0); -ByteAddressBuffer _574 : register(t2, space0); -ByteAddressBuffer _639 : register(t1, space0); -ByteAddressBuffer _710 : register(t3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared TagMonoid sh_tag[256]; -groupshared Monoid sh_scratch[256]; - -TagMonoid reduce_tag(uint tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u))); - c.path_ix = uint(int(countbits(tag_word & 269488144u))); - c.trans_ix = uint(int(countbits(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -TagMonoid tag_monoid_identity() -{ - return _135; -} - -float2 read_f32_point(uint ix) -{ - float x = asfloat(_574.Load(ix * 4 + 0)); - float y = asfloat(_574.Load((ix + 1u) * 4 + 0)); - return float2(x, y); -} - -float2 read_i16_point(uint ix) -{ - uint raw = _574.Load(ix * 4 + 0); - float x = float(int(raw << uint(16)) >> 16); - float y = float(int(raw) >> 16); - return float2(x, y); -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _111.Load(offset * 4 + 12); - return v; -} - -TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - TransformSeg s; - s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.translate = float2(asfloat(raw4), asfloat(raw5)); - return s; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _111.Store(offset * 4 + 12, val); -} - -void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.p0.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.p0.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.p1.x); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.p1.y); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.p2.x); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = asuint(s.p2.y); - write_mem(param_15, param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = asuint(s.p3.x); - write_mem(param_18, param_19, param_20); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = asuint(s.p3.y); - write_mem(param_21, param_22, param_23); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = s.path_ix; - write_mem(param_24, param_25, param_26); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = s.trans_ix; - write_mem(param_27, param_28, param_29); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = asuint(s.stroke.x); - write_mem(param_30, param_31, param_32); - Alloc param_33 = a; - uint param_34 = ix + 11u; - uint param_35 = asuint(s.stroke.y); - write_mem(param_33, param_34, param_35); -} - -void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2); - PathCubicRef _458 = { ref.offset + 4u }; - Alloc param_3 = a; - PathCubicRef param_4 = _458; - PathCubic param_5 = s; - PathCubic_write(param_3, param_4, param_5); -} - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid c; - c.bbox = b.bbox; - bool _472 = (a.flags & 1u) == 0u; - bool _480; - if (_472) - { - _480 = b.bbox.z <= b.bbox.x; - } - else - { - _480 = _472; - } - bool _488; - if (_480) - { - _488 = b.bbox.w <= b.bbox.y; - } - else - { - _488 = _480; - } - if (_488) - { - c.bbox = a.bbox; - } - else - { - bool _498 = (a.flags & 1u) == 0u; - bool _505; - if (_498) - { - _505 = (b.flags & 2u) == 0u; - } - else - { - _505 = _498; - } - bool _522; - if (_505) - { - bool _512 = a.bbox.z > a.bbox.x; - bool _521; - if (!_512) - { - _521 = a.bbox.w > a.bbox.y; - } - else - { - _521 = _512; - } - _522 = _521; - } - else - { - _522 = _505; - } - if (_522) - { - float4 _529 = c.bbox; - float2 _531 = min(a.bbox.xy, _529.xy); - c.bbox.x = _531.x; - c.bbox.y = _531.y; - float4 _540 = c.bbox; - float2 _542 = max(a.bbox.zw, _540.zw); - c.bbox.z = _542.x; - c.bbox.w = _542.y; - } - } - c.flags = (a.flags & 2u) | b.flags; - c.flags |= ((a.flags & 1u) << uint(1)); - return c; -} - -Monoid monoid_identity() -{ - return _567; -} - -uint round_down(float x) -{ - return uint(max(0.0f, floor(x) + 32768.0f)); -} - -uint round_up(float x) -{ - return uint(min(65535.0f, ceil(x) + 32768.0f)); -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = _574.Load(((_639.Load(96) >> uint(2)) + (ix >> uint(2))) * 4 + 0); - uint param = tag_word; - TagMonoid local_tm = reduce_tag(param); - sh_tag[gl_LocalInvocationID.x] = local_tm; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i)) - { - TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)]; - TagMonoid param_1 = other; - TagMonoid param_2 = local_tm; - local_tm = combine_tag_monoid(param_1, param_2); - } - GroupMemoryBarrierWithGroupSync(); - sh_tag[gl_LocalInvocationID.x] = local_tm; - } - GroupMemoryBarrierWithGroupSync(); - TagMonoid tm = tag_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - TagMonoid _716; - _716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0); - _716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4); - _716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8); - _716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12); - _716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16); - tm.trans_ix = _716.trans_ix; - tm.linewidth_ix = _716.linewidth_ix; - tm.pathseg_ix = _716.pathseg_ix; - tm.path_ix = _716.path_ix; - tm.pathseg_offset = _716.pathseg_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - TagMonoid param_3 = tm; - TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; - tm = combine_tag_monoid(param_3, param_4); - } - uint ps_ix = (_639.Load(100) >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.Load(92) >> uint(2)) + tm.linewidth_ix; - uint save_path_ix = tm.path_ix; - uint trans_ix = tm.trans_ix; - TransformSegRef _771 = { _639.Load(40) + (trans_ix * 24u) }; - TransformSegRef trans_ref = _771; - PathSegRef _780 = { _639.Load(32) + (tm.pathseg_ix * 52u) }; - PathSegRef ps_ref = _780; - float linewidth[4]; - uint save_trans_ix[4]; - float2 p0; - float2 p1; - float2 p2; - float2 p3; - Alloc param_13; - Monoid local[4]; - PathCubic cubic; - Alloc param_15; - for (uint i_1 = 0u; i_1 < 4u; i_1++) - { - linewidth[i_1] = asfloat(_574.Load(lw_ix * 4 + 0)); - save_trans_ix[i_1] = trans_ix; - uint tag_byte = tag_word >> (i_1 * 8u); - uint seg_type = tag_byte & 3u; - if (seg_type != 0u) - { - if ((tag_byte & 8u) != 0u) - { - uint param_5 = ps_ix; - p0 = read_f32_point(param_5); - uint param_6 = ps_ix + 2u; - p1 = read_f32_point(param_6); - if (seg_type >= 2u) - { - uint param_7 = ps_ix + 4u; - p2 = read_f32_point(param_7); - if (seg_type == 3u) - { - uint param_8 = ps_ix + 6u; - p3 = read_f32_point(param_8); - } - } - } - else - { - uint param_9 = ps_ix; - p0 = read_i16_point(param_9); - uint param_10 = ps_ix + 1u; - p1 = read_i16_point(param_10); - if (seg_type >= 2u) - { - uint param_11 = ps_ix + 2u; - p2 = read_i16_point(param_11); - if (seg_type == 3u) - { - uint param_12 = ps_ix + 3u; - p3 = read_i16_point(param_12); - } - } - } - Alloc _876; - _876.offset = _639.Load(40); - param_13.offset = _876.offset; - TransformSegRef param_14 = trans_ref; - TransformSeg transform = TransformSeg_read(param_13, param_14); - p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; - p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate; - float4 bbox = float4(min(p0, p1), max(p0, p1)); - if (seg_type >= 2u) - { - p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _946 = bbox; - float2 _949 = min(_946.xy, p2); - bbox.x = _949.x; - bbox.y = _949.y; - float4 _954 = bbox; - float2 _957 = max(_954.zw, p2); - bbox.z = _957.x; - bbox.w = _957.y; - if (seg_type == 3u) - { - p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _982 = bbox; - float2 _985 = min(_982.xy, p3); - bbox.x = _985.x; - bbox.y = _985.y; - float4 _990 = bbox; - float2 _993 = max(_990.zw, p3); - bbox.z = _993.x; - bbox.w = _993.y; - } - else - { - p3 = p2; - p2 = lerp(p1, p2, 0.3333333432674407958984375f.xx); - p1 = lerp(p1, p0, 0.3333333432674407958984375f.xx); - } - } - else - { - p3 = p1; - p2 = lerp(p3, p0, 0.3333333432674407958984375f.xx); - p1 = lerp(p0, p3, 0.3333333432674407958984375f.xx); - } - float2 stroke = 0.0f.xx; - if (linewidth[i_1] >= 0.0f) - { - stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth[i_1]); - bbox += float4(-stroke, stroke); - } - local[i_1].bbox = bbox; - local[i_1].flags = 0u; - cubic.p0 = p0; - cubic.p1 = p1; - cubic.p2 = p2; - cubic.p3 = p3; - cubic.path_ix = tm.path_ix; - cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; - cubic.stroke = stroke; - uint fill_mode = uint(linewidth[i_1] >= 0.0f); - Alloc _1088; - _1088.offset = _639.Load(32); - param_15.offset = _1088.offset; - PathSegRef param_16 = ps_ref; - uint param_17 = fill_mode; - PathCubic param_18 = cubic; - PathSeg_Cubic_write(param_15, param_16, param_17, param_18); - ps_ref.offset += 52u; - uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u); - uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u)); - ps_ix += n_words; - } - else - { - local[i_1].bbox = 0.0f.xxxx; - uint is_path = (tag_byte >> uint(4)) & 1u; - local[i_1].flags = is_path; - tm.path_ix += is_path; - trans_ix += ((tag_byte >> uint(5)) & 1u); - trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); - lw_ix += ((tag_byte >> uint(6)) & 1u); - } - } - Monoid agg = local[0]; - for (uint i_2 = 1u; i_2 < 4u; i_2++) - { - Monoid param_19 = agg; - Monoid param_20 = local[i_2]; - agg = combine_monoid(param_19, param_20); - local[i_2] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_3)) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)]; - Monoid param_21 = other_1; - Monoid param_22 = agg; - agg = combine_monoid(param_21, param_22); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.Load(44) >> uint(2)) + (path_ix * 6u); - Monoid row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_4 = 0u; i_4 < 4u; i_4++) - { - Monoid param_23 = row; - Monoid param_24 = local[i_4]; - Monoid m = combine_monoid(param_23, param_24); - bool do_atomic = false; - bool _1263 = i_4 == 3u; - bool _1269; - if (_1263) - { - _1269 = gl_LocalInvocationID.x == 255u; - } - else - { - _1269 = _1263; - } - if (_1269) - { - do_atomic = true; - } - if ((m.flags & 1u) != 0u) - { - _111.Store((bbox_out_ix + 4u) * 4 + 12, asuint(linewidth[i_4])); - _111.Store((bbox_out_ix + 5u) * 4 + 12, save_trans_ix[i_4]); - if ((m.flags & 2u) == 0u) - { - do_atomic = true; - } - else - { - float param_25 = m.bbox.x; - _111.Store(bbox_out_ix * 4 + 12, round_down(param_25)); - float param_26 = m.bbox.y; - _111.Store((bbox_out_ix + 1u) * 4 + 12, round_down(param_26)); - float param_27 = m.bbox.z; - _111.Store((bbox_out_ix + 2u) * 4 + 12, round_up(param_27)); - float param_28 = m.bbox.w; - _111.Store((bbox_out_ix + 3u) * 4 + 12, round_up(param_28)); - bbox_out_ix += 6u; - do_atomic = false; - } - } - if (do_atomic) - { - bool _1334 = m.bbox.z > m.bbox.x; - bool _1343; - if (!_1334) - { - _1343 = m.bbox.w > m.bbox.y; - } - else - { - _1343 = _1334; - } - if (_1343) - { - float param_29 = m.bbox.x; - uint _1352; - _111.InterlockedMin(bbox_out_ix * 4 + 12, round_down(param_29), _1352); - float param_30 = m.bbox.y; - uint _1360; - _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 12, round_down(param_30), _1360); - float param_31 = m.bbox.z; - uint _1368; - _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 12, round_up(param_31), _1368); - float param_32 = m.bbox.w; - uint _1376; - _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 12, round_up(param_32), _1376); - } - bbox_out_ix += 6u; - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl deleted file mode 100644 index 5aea66d..0000000 --- a/piet-gpu/shader/gen/pathseg.msl +++ /dev/null @@ -1,719 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct Monoid -{ - float4 bbox; - uint flags; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct TagMonoid_1 -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct ParentBuf -{ - TagMonoid_1 parent[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -TagMonoid reduce_tag(thread const uint& tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u))); - c.path_ix = uint(int(popcount(tag_word & 269488144u))); - c.trans_ix = uint(int(popcount(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid tag_monoid_identity() -{ - return TagMonoid{ 0u, 0u, 0u, 0u, 0u }; -} - -static inline __attribute__((always_inline)) -float2 read_f32_point(thread const uint& ix, const device SceneBuf& v_574) -{ - float x = as_type(v_574.scene[ix]); - float y = as_type(v_574.scene[ix + 1u]); - return float2(x, y); -} - -static inline __attribute__((always_inline)) -float2 read_i16_point(thread const uint& ix, const device SceneBuf& v_574) -{ - uint raw = v_574.scene[ix]; - float x = float(int(raw << uint(16)) >> 16); - float y = float(int(raw) >> 16); - return float2(x, y); -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_111) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_111.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -TransformSeg TransformSeg_read(thread const Alloc& a, thread const TransformSegRef& ref, device Memory& v_111) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_111); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_111); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_111); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_111); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_111); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_111); - TransformSeg s; - s.mat = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.translate = float2(as_type(raw4), as_type(raw5)); - return s; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_111) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_111.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void PathCubic_write(thread const Alloc& a, thread const PathCubicRef& ref, thread const PathCubic& s, device Memory& v_111) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.p0.x); - write_mem(param, param_1, param_2, v_111); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.p0.y); - write_mem(param_3, param_4, param_5, v_111); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.p1.x); - write_mem(param_6, param_7, param_8, v_111); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.p1.y); - write_mem(param_9, param_10, param_11, v_111); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.p2.x); - write_mem(param_12, param_13, param_14, v_111); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = as_type(s.p2.y); - write_mem(param_15, param_16, param_17, v_111); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = as_type(s.p3.x); - write_mem(param_18, param_19, param_20, v_111); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = as_type(s.p3.y); - write_mem(param_21, param_22, param_23, v_111); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = s.path_ix; - write_mem(param_24, param_25, param_26, v_111); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = s.trans_ix; - write_mem(param_27, param_28, param_29, v_111); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = as_type(s.stroke.x); - write_mem(param_30, param_31, param_32, v_111); - Alloc param_33 = a; - uint param_34 = ix + 11u; - uint param_35 = as_type(s.stroke.y); - write_mem(param_33, param_34, param_35, v_111); -} - -static inline __attribute__((always_inline)) -void PathSeg_Cubic_write(thread const Alloc& a, thread const PathSegRef& ref, thread const uint& flags, thread const PathCubic& s, device Memory& v_111) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2, v_111); - Alloc param_3 = a; - PathCubicRef param_4 = PathCubicRef{ ref.offset + 4u }; - PathCubic param_5 = s; - PathCubic_write(param_3, param_4, param_5, v_111); -} - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - Monoid c; - c.bbox = b.bbox; - bool _472 = (a.flags & 1u) == 0u; - bool _480; - if (_472) - { - _480 = b.bbox.z <= b.bbox.x; - } - else - { - _480 = _472; - } - bool _488; - if (_480) - { - _488 = b.bbox.w <= b.bbox.y; - } - else - { - _488 = _480; - } - if (_488) - { - c.bbox = a.bbox; - } - else - { - bool _498 = (a.flags & 1u) == 0u; - bool _505; - if (_498) - { - _505 = (b.flags & 2u) == 0u; - } - else - { - _505 = _498; - } - bool _522; - if (_505) - { - bool _512 = a.bbox.z > a.bbox.x; - bool _521; - if (!_512) - { - _521 = a.bbox.w > a.bbox.y; - } - else - { - _521 = _512; - } - _522 = _521; - } - else - { - _522 = _505; - } - if (_522) - { - float4 _529 = c.bbox; - float2 _531 = fast::min(a.bbox.xy, _529.xy); - c.bbox.x = _531.x; - c.bbox.y = _531.y; - float4 _540 = c.bbox; - float2 _542 = fast::max(a.bbox.zw, _540.zw); - c.bbox.z = _542.x; - c.bbox.w = _542.y; - } - } - c.flags = (a.flags & 2u) | b.flags; - c.flags |= ((a.flags & 1u) << uint(1)); - return c; -} - -static inline __attribute__((always_inline)) -Monoid monoid_identity() -{ - return Monoid{ float4(0.0), 0u }; -} - -static inline __attribute__((always_inline)) -uint round_down(thread const float& x) -{ - return uint(fast::max(0.0, floor(x) + 32768.0)); -} - -static inline __attribute__((always_inline)) -uint round_up(thread const float& x) -{ - return uint(fast::min(65535.0, ceil(x) + 32768.0)); -} - -kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup TagMonoid sh_tag[256]; - threadgroup Monoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = v_574.scene[(_639.conf.pathtag_offset >> uint(2)) + (ix >> uint(2))]; - uint param = tag_word; - TagMonoid local_tm = reduce_tag(param); - sh_tag[gl_LocalInvocationID.x] = local_tm; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i)) - { - TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)]; - TagMonoid param_1 = other; - TagMonoid param_2 = local_tm; - local_tm = combine_tag_monoid(param_1, param_2); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_tag[gl_LocalInvocationID.x] = local_tm; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - TagMonoid tm = tag_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - uint _713 = gl_WorkGroupID.x - 1u; - tm.trans_ix = _710.parent[_713].trans_ix; - tm.linewidth_ix = _710.parent[_713].linewidth_ix; - tm.pathseg_ix = _710.parent[_713].pathseg_ix; - tm.path_ix = _710.parent[_713].path_ix; - tm.pathseg_offset = _710.parent[_713].pathseg_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - TagMonoid param_3 = tm; - TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; - tm = combine_tag_monoid(param_3, param_4); - } - uint ps_ix = (_639.conf.pathseg_offset >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.conf.linewidth_offset >> uint(2)) + tm.linewidth_ix; - uint save_path_ix = tm.path_ix; - uint trans_ix = tm.trans_ix; - TransformSegRef trans_ref = TransformSegRef{ _639.conf.trans_alloc.offset + (trans_ix * 24u) }; - PathSegRef ps_ref = PathSegRef{ _639.conf.pathseg_alloc.offset + (tm.pathseg_ix * 52u) }; - spvUnsafeArray linewidth; - spvUnsafeArray save_trans_ix; - float2 p0; - float2 p1; - float2 p2; - float2 p3; - Alloc param_13; - spvUnsafeArray local; - PathCubic cubic; - Alloc param_15; - for (uint i_1 = 0u; i_1 < 4u; i_1++) - { - linewidth[i_1] = as_type(v_574.scene[lw_ix]); - save_trans_ix[i_1] = trans_ix; - uint tag_byte = tag_word >> (i_1 * 8u); - uint seg_type = tag_byte & 3u; - if (seg_type != 0u) - { - if ((tag_byte & 8u) != 0u) - { - uint param_5 = ps_ix; - p0 = read_f32_point(param_5, v_574); - uint param_6 = ps_ix + 2u; - p1 = read_f32_point(param_6, v_574); - if (seg_type >= 2u) - { - uint param_7 = ps_ix + 4u; - p2 = read_f32_point(param_7, v_574); - if (seg_type == 3u) - { - uint param_8 = ps_ix + 6u; - p3 = read_f32_point(param_8, v_574); - } - } - } - else - { - uint param_9 = ps_ix; - p0 = read_i16_point(param_9, v_574); - uint param_10 = ps_ix + 1u; - p1 = read_i16_point(param_10, v_574); - if (seg_type >= 2u) - { - uint param_11 = ps_ix + 2u; - p2 = read_i16_point(param_11, v_574); - if (seg_type == 3u) - { - uint param_12 = ps_ix + 3u; - p3 = read_i16_point(param_12, v_574); - } - } - } - param_13.offset = _639.conf.trans_alloc.offset; - TransformSegRef param_14 = trans_ref; - TransformSeg transform = TransformSeg_read(param_13, param_14, v_111); - p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; - p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate; - float4 bbox = float4(fast::min(p0, p1), fast::max(p0, p1)); - if (seg_type >= 2u) - { - p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _946 = bbox; - float2 _949 = fast::min(_946.xy, p2); - bbox.x = _949.x; - bbox.y = _949.y; - float4 _954 = bbox; - float2 _957 = fast::max(_954.zw, p2); - bbox.z = _957.x; - bbox.w = _957.y; - if (seg_type == 3u) - { - p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _982 = bbox; - float2 _985 = fast::min(_982.xy, p3); - bbox.x = _985.x; - bbox.y = _985.y; - float4 _990 = bbox; - float2 _993 = fast::max(_990.zw, p3); - bbox.z = _993.x; - bbox.w = _993.y; - } - else - { - p3 = p2; - p2 = mix(p1, p2, float2(0.3333333432674407958984375)); - p1 = mix(p1, p0, float2(0.3333333432674407958984375)); - } - } - else - { - p3 = p1; - p2 = mix(p3, p0, float2(0.3333333432674407958984375)); - p1 = mix(p0, p3, float2(0.3333333432674407958984375)); - } - float2 stroke = float2(0.0); - if (linewidth[i_1] >= 0.0) - { - stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5 * linewidth[i_1]); - bbox += float4(-stroke, stroke); - } - local[i_1].bbox = bbox; - local[i_1].flags = 0u; - cubic.p0 = p0; - cubic.p1 = p1; - cubic.p2 = p2; - cubic.p3 = p3; - cubic.path_ix = tm.path_ix; - cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; - cubic.stroke = stroke; - uint fill_mode = uint(linewidth[i_1] >= 0.0); - param_15.offset = _639.conf.pathseg_alloc.offset; - PathSegRef param_16 = ps_ref; - uint param_17 = fill_mode; - PathCubic param_18 = cubic; - PathSeg_Cubic_write(param_15, param_16, param_17, param_18, v_111); - ps_ref.offset += 52u; - uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u); - uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u)); - ps_ix += n_words; - } - else - { - local[i_1].bbox = float4(0.0); - uint is_path = (tag_byte >> uint(4)) & 1u; - local[i_1].flags = is_path; - tm.path_ix += is_path; - trans_ix += ((tag_byte >> uint(5)) & 1u); - trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); - lw_ix += ((tag_byte >> uint(6)) & 1u); - } - } - Monoid agg = local[0]; - for (uint i_2 = 1u; i_2 < 4u; i_2++) - { - Monoid param_19 = agg; - Monoid param_20 = local[i_2]; - agg = combine_monoid(param_19, param_20); - local[i_2] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_3)) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)]; - Monoid param_21 = other_1; - Monoid param_22 = agg; - agg = combine_monoid(param_21, param_22); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u); - Monoid row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_4 = 0u; i_4 < 4u; i_4++) - { - Monoid param_23 = row; - Monoid param_24 = local[i_4]; - Monoid m = combine_monoid(param_23, param_24); - bool do_atomic = false; - bool _1263 = i_4 == 3u; - bool _1269; - if (_1263) - { - _1269 = gl_LocalInvocationID.x == 255u; - } - else - { - _1269 = _1263; - } - if (_1269) - { - do_atomic = true; - } - if ((m.flags & 1u) != 0u) - { - v_111.memory[bbox_out_ix + 4u] = as_type(linewidth[i_4]); - v_111.memory[bbox_out_ix + 5u] = save_trans_ix[i_4]; - if ((m.flags & 2u) == 0u) - { - do_atomic = true; - } - else - { - float param_25 = m.bbox.x; - v_111.memory[bbox_out_ix] = round_down(param_25); - float param_26 = m.bbox.y; - v_111.memory[bbox_out_ix + 1u] = round_down(param_26); - float param_27 = m.bbox.z; - v_111.memory[bbox_out_ix + 2u] = round_up(param_27); - float param_28 = m.bbox.w; - v_111.memory[bbox_out_ix + 3u] = round_up(param_28); - bbox_out_ix += 6u; - do_atomic = false; - } - } - if (do_atomic) - { - bool _1334 = m.bbox.z > m.bbox.x; - bool _1343; - if (!_1334) - { - _1343 = m.bbox.w > m.bbox.y; - } - else - { - _1343 = _1334; - } - if (_1343) - { - float param_29 = m.bbox.x; - uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); - float param_30 = m.bbox.y; - uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); - float param_31 = m.bbox.z; - uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); - float param_32 = m.bbox.w; - uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); - } - bbox_out_ix += 6u; - } - } -} - diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv deleted file mode 100644 index 2fb04e5..0000000 Binary files a/piet-gpu/shader/gen/pathseg.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.dxil b/piet-gpu/shader/gen/pathtag_reduce.dxil deleted file mode 100644 index 692ac5f..0000000 Binary files a/piet-gpu/shader/gen/pathtag_reduce.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl deleted file mode 100644 index 6e9dee1..0000000 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ /dev/null @@ -1,139 +0,0 @@ -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); - -ByteAddressBuffer _139 : register(t1, space0); -ByteAddressBuffer _151 : register(t2, space0); -RWByteAddressBuffer _238 : register(u3, space0); -RWByteAddressBuffer _258 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared TagMonoid sh_scratch[128]; - -TagMonoid reduce_tag(uint tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u))); - c.path_ix = uint(int(countbits(tag_word & 269488144u))); - c.trans_ix = uint(int(countbits(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 2u; - uint scene_ix = (_139.Load(96) >> uint(2)) + ix; - uint tag_word = _151.Load(scene_ix * 4 + 0); - uint param = tag_word; - TagMonoid agg = reduce_tag(param); - for (uint i = 1u; i < 2u; i++) - { - tag_word = _151.Load((scene_ix + i) * 4 + 0); - uint param_1 = tag_word; - TagMonoid param_2 = agg; - TagMonoid param_3 = reduce_tag(param_1); - agg = combine_tag_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 7u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - TagMonoid param_4 = agg; - TagMonoid param_5 = other; - agg = combine_tag_monoid(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); - _238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); - _238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); - _238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); - _238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); - } -} - -[numthreads(128, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl deleted file mode 100644 index c6266ad..0000000 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ /dev/null @@ -1,156 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct TagMonoid_1 -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct OutBuf -{ - TagMonoid_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(128u, 1u, 1u); - -static inline __attribute__((always_inline)) -TagMonoid reduce_tag(thread const uint& tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u))); - c.path_ix = uint(int(popcount(tag_word & 269488144u))); - c.trans_ix = uint(int(popcount(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup TagMonoid sh_scratch[128]; - uint ix = gl_GlobalInvocationID.x * 2u; - uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix; - uint tag_word = _151.scene[scene_ix]; - uint param = tag_word; - TagMonoid agg = reduce_tag(param); - for (uint i = 1u; i < 2u; i++) - { - tag_word = _151.scene[scene_ix + i]; - uint param_1 = tag_word; - TagMonoid param_2 = agg; - TagMonoid param_3 = reduce_tag(param_1); - agg = combine_tag_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 7u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - TagMonoid param_4 = agg; - TagMonoid param_5 = other; - agg = combine_tag_monoid(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; - _238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; - _238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; - _238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; - } -} - diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv deleted file mode 100644 index 829addc..0000000 Binary files a/piet-gpu/shader/gen/pathtag_reduce.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/pathtag_root.dxil b/piet-gpu/shader/gen/pathtag_root.dxil deleted file mode 100644 index 77f12e6..0000000 Binary files a/piet-gpu/shader/gen/pathtag_root.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/pathtag_root.hlsl b/piet-gpu/shader/gen/pathtag_root.hlsl deleted file mode 100644 index 7ad806c..0000000 --- a/piet-gpu/shader/gen/pathtag_root.hlsl +++ /dev/null @@ -1,115 +0,0 @@ -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const TagMonoid _18 = { 0u, 0u, 0u, 0u, 0u }; - -RWByteAddressBuffer _78 : register(u0, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared TagMonoid sh_scratch[256]; - -TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -TagMonoid tag_monoid_identity() -{ - return _18; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - TagMonoid _82; - _82.trans_ix = _78.Load(ix * 20 + 0); - _82.linewidth_ix = _78.Load(ix * 20 + 4); - _82.pathseg_ix = _78.Load(ix * 20 + 8); - _82.path_ix = _78.Load(ix * 20 + 12); - _82.pathseg_offset = _78.Load(ix * 20 + 16); - TagMonoid local[8]; - local[0].trans_ix = _82.trans_ix; - local[0].linewidth_ix = _82.linewidth_ix; - local[0].pathseg_ix = _82.pathseg_ix; - local[0].path_ix = _82.path_ix; - local[0].pathseg_offset = _82.pathseg_offset; - TagMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - TagMonoid param = local[i - 1u]; - TagMonoid _115; - _115.trans_ix = _78.Load((ix + i) * 20 + 0); - _115.linewidth_ix = _78.Load((ix + i) * 20 + 4); - _115.pathseg_ix = _78.Load((ix + i) * 20 + 8); - _115.path_ix = _78.Load((ix + i) * 20 + 12); - _115.pathseg_offset = _78.Load((ix + i) * 20 + 16); - param_1.trans_ix = _115.trans_ix; - param_1.linewidth_ix = _115.linewidth_ix; - param_1.pathseg_ix = _115.pathseg_ix; - param_1.path_ix = _115.path_ix; - param_1.pathseg_offset = _115.pathseg_offset; - local[i] = combine_tag_monoid(param, param_1); - } - TagMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - TagMonoid param_2 = other; - TagMonoid param_3 = agg; - agg = combine_tag_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - TagMonoid row = tag_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - TagMonoid param_4 = row; - TagMonoid param_5 = local[i_2]; - TagMonoid m = combine_tag_monoid(param_4, param_5); - uint _210 = ix + i_2; - _78.Store(_210 * 20 + 0, m.trans_ix); - _78.Store(_210 * 20 + 4, m.linewidth_ix); - _78.Store(_210 * 20 + 8, m.pathseg_ix); - _78.Store(_210 * 20 + 12, m.path_ix); - _78.Store(_210 * 20 + 16, m.pathseg_offset); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/pathtag_root.msl b/piet-gpu/shader/gen/pathtag_root.msl deleted file mode 100644 index 65e3741..0000000 --- a/piet-gpu/shader/gen/pathtag_root.msl +++ /dev/null @@ -1,146 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct TagMonoid_1 -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct DataBuf -{ - TagMonoid_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid tag_monoid_identity() -{ - return TagMonoid{ 0u, 0u, 0u, 0u, 0u }; -} - -kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup TagMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].trans_ix = _78.data[ix].trans_ix; - local[0].linewidth_ix = _78.data[ix].linewidth_ix; - local[0].pathseg_ix = _78.data[ix].pathseg_ix; - local[0].path_ix = _78.data[ix].path_ix; - local[0].pathseg_offset = _78.data[ix].pathseg_offset; - TagMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - uint _109 = ix + i; - TagMonoid param = local[i - 1u]; - param_1.trans_ix = _78.data[_109].trans_ix; - param_1.linewidth_ix = _78.data[_109].linewidth_ix; - param_1.pathseg_ix = _78.data[_109].pathseg_ix; - param_1.path_ix = _78.data[_109].path_ix; - param_1.pathseg_offset = _78.data[_109].pathseg_offset; - local[i] = combine_tag_monoid(param, param_1); - } - TagMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - TagMonoid param_2 = other; - TagMonoid param_3 = agg; - agg = combine_tag_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - TagMonoid row = tag_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - TagMonoid param_4 = row; - TagMonoid param_5 = local[i_2]; - TagMonoid m = combine_tag_monoid(param_4, param_5); - uint _210 = ix + i_2; - _78.data[_210].trans_ix = m.trans_ix; - _78.data[_210].linewidth_ix = m.linewidth_ix; - _78.data[_210].pathseg_ix = m.pathseg_ix; - _78.data[_210].path_ix = m.path_ix; - _78.data[_210].pathseg_offset = m.pathseg_offset; - } -} - diff --git a/piet-gpu/shader/gen/pathtag_root.spv b/piet-gpu/shader/gen/pathtag_root.spv deleted file mode 100644 index 3783b49..0000000 Binary files a/piet-gpu/shader/gen/pathtag_root.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil deleted file mode 100644 index 35a1c2b..0000000 Binary files a/piet-gpu/shader/gen/tile_alloc.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl deleted file mode 100644 index aed9001..0000000 --- a/piet-gpu/shader/gen/tile_alloc.hlsl +++ /dev/null @@ -1,236 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _53 : register(u0, space0); -ByteAddressBuffer _148 : register(t1, space0); -ByteAddressBuffer _232 : register(t2, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared uint sh_tile_count[256]; -groupshared uint sh_tile_offset; - -bool check_deps(uint dep_stage) -{ - uint _60; - _53.InterlockedOr(4, 0u, _60); - return (_60 & dep_stage) == 0u; -} - -float4 load_draw_bbox(uint draw_ix) -{ - uint base = (_148.Load(68) >> uint(2)) + (4u * draw_ix); - float x0 = asfloat(_53.Load(base * 4 + 12)); - float y0 = asfloat(_53.Load((base + 1u) * 4 + 12)); - float x1 = asfloat(_53.Load((base + 2u) * 4 + 12)); - float y1 = asfloat(_53.Load((base + 3u) * 4 + 12)); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _70; - _53.InterlockedAdd(0, size, _70); - uint offset = _70; - if ((offset + size) > mem_size) - { - uint _80; - _53.InterlockedOr(4, stage, _80); - offset = 0u; - } - return offset; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _53.Store(offset * 4 + 12, val); -} - -void Path_write(Alloc a, PathRef ref, Path s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = s.tiles.offset; - write_mem(param_6, param_7, param_8); -} - -void comp_main() -{ - uint param = 1u; - bool _192 = check_deps(param); - if (!_192) - { - return; - } - uint th_ix = gl_LocalInvocationID.x; - uint element_ix = gl_GlobalInvocationID.x; - PathRef _216 = { _148.Load(20) + (element_ix * 12u) }; - PathRef path_ref = _216; - uint drawtag_base = _148.Load(104) >> uint(2); - uint drawtag = 0u; - if (element_ix < _148.Load(4)) - { - drawtag = _232.Load((drawtag_base + element_ix) * 4 + 0); - } - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if ((drawtag != 0u) && (drawtag != 37u)) - { - uint param_1 = element_ix; - float4 bbox = load_draw_bbox(param_1); - x0 = int(floor(bbox.x * 0.0625f)); - y0 = int(floor(bbox.y * 0.0625f)); - x1 = int(ceil(bbox.z * 0.0625f)); - y1 = int(ceil(bbox.w * 0.0625f)); - } - x0 = clamp(x0, 0, int(_148.Load(12))); - y0 = clamp(y0, 0, int(_148.Load(16))); - x1 = clamp(x1, 0, int(_148.Load(12))); - y1 = clamp(y1, 0, int(_148.Load(16))); - Path path; - path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); - uint tile_count = uint((x1 - x0) * (y1 - y0)); - sh_tile_count[th_ix] = tile_count; - uint total_tile_count = tile_count; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if (th_ix >= (1u << i)) - { - total_tile_count += sh_tile_count[th_ix - (1u << i)]; - } - GroupMemoryBarrierWithGroupSync(); - sh_tile_count[th_ix] = total_tile_count; - } - if (th_ix == 255u) - { - uint param_2 = total_tile_count * 8u; - uint param_3 = _148.Load(0); - uint param_4 = 2u; - uint _370 = malloc_stage(param_2, param_3, param_4); - sh_tile_offset = _370; - } - GroupMemoryBarrierWithGroupSync(); - uint offset_start = sh_tile_offset; - if (offset_start == 0u) - { - return; - } - if (element_ix < _148.Load(4)) - { - uint _387; - if (th_ix > 0u) - { - _387 = sh_tile_count[th_ix - 1u]; - } - else - { - _387 = 0u; - } - uint tile_subix = _387; - TileRef _400 = { offset_start + (8u * tile_subix) }; - path.tiles = _400; - Alloc _406; - _406.offset = _148.Load(20); - Alloc param_5; - param_5.offset = _406.offset; - PathRef param_6 = path_ref; - Path param_7 = path; - Path_write(param_5, param_6, param_7); - } - uint total_count = sh_tile_count[255] * 2u; - uint start_ix = offset_start >> uint(2); - for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) - { - _53.Store((start_ix + i_1) * 4 + 12, 0u); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/tile_alloc.msl b/piet-gpu/shader/gen/tile_alloc.msl deleted file mode 100644 index e02138a..0000000 --- a/piet-gpu/shader/gen/tile_alloc.msl +++ /dev/null @@ -1,247 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_53) -{ - uint _60 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, 0u, memory_order_relaxed); - return (_60 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_53, const device ConfigBuf& v_148) -{ - uint base = (v_148.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); - float x0 = as_type(v_53.memory[base]); - float y0 = as_type(v_53.memory[base + 1u]); - float x1 = as_type(v_53.memory[base + 2u]); - float y1 = as_type(v_53.memory[base + 3u]); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_53) -{ - uint _70 = atomic_fetch_add_explicit((device atomic_uint*)&v_53.mem_offset, size, memory_order_relaxed); - uint offset = _70; - if ((offset + size) > mem_size) - { - uint _80 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_53) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_53.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_53) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); - write_mem(param, param_1, param_2, v_53); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); - write_mem(param_3, param_4, param_5, v_53); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = s.tiles.offset; - write_mem(param_6, param_7, param_8, v_53); -} - -kernel void main0(device Memory& v_53 [[buffer(0)]], const device ConfigBuf& v_148 [[buffer(1)]], const device SceneBuf& _232 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - threadgroup uint sh_tile_count[256]; - threadgroup uint sh_tile_offset; - uint param = 1u; - bool _192 = check_deps(param, v_53); - if (!_192) - { - return; - } - uint th_ix = gl_LocalInvocationID.x; - uint element_ix = gl_GlobalInvocationID.x; - PathRef path_ref = PathRef{ v_148.conf.tile_alloc.offset + (element_ix * 12u) }; - uint drawtag_base = v_148.conf.drawtag_offset >> uint(2); - uint drawtag = 0u; - if (element_ix < v_148.conf.n_elements) - { - drawtag = _232.scene[drawtag_base + element_ix]; - } - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if ((drawtag != 0u) && (drawtag != 37u)) - { - uint param_1 = element_ix; - float4 bbox = load_draw_bbox(param_1, v_53, v_148); - x0 = int(floor(bbox.x * 0.0625)); - y0 = int(floor(bbox.y * 0.0625)); - x1 = int(ceil(bbox.z * 0.0625)); - y1 = int(ceil(bbox.w * 0.0625)); - } - x0 = clamp(x0, 0, int(v_148.conf.width_in_tiles)); - y0 = clamp(y0, 0, int(v_148.conf.height_in_tiles)); - x1 = clamp(x1, 0, int(v_148.conf.width_in_tiles)); - y1 = clamp(y1, 0, int(v_148.conf.height_in_tiles)); - Path path; - path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); - uint tile_count = uint((x1 - x0) * (y1 - y0)); - sh_tile_count[th_ix] = tile_count; - uint total_tile_count = tile_count; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th_ix >= (1u << i)) - { - total_tile_count += sh_tile_count[th_ix - (1u << i)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_tile_count[th_ix] = total_tile_count; - } - if (th_ix == 255u) - { - uint param_2 = total_tile_count * 8u; - uint param_3 = v_148.conf.mem_size; - uint param_4 = 2u; - uint _370 = malloc_stage(param_2, param_3, param_4, v_53); - sh_tile_offset = _370; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint offset_start = sh_tile_offset; - if (offset_start == 0u) - { - return; - } - if (element_ix < v_148.conf.n_elements) - { - uint _387; - if (th_ix > 0u) - { - _387 = sh_tile_count[th_ix - 1u]; - } - else - { - _387 = 0u; - } - uint tile_subix = _387; - path.tiles = TileRef{ offset_start + (8u * tile_subix) }; - Alloc param_5; - param_5.offset = v_148.conf.tile_alloc.offset; - PathRef param_6 = path_ref; - Path param_7 = path; - Path_write(param_5, param_6, param_7, v_53); - } - uint total_count = sh_tile_count[255] * 2u; - uint start_ix = offset_start >> uint(2); - for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) - { - v_53.memory[start_ix + i_1] = 0u; - } -} - diff --git a/piet-gpu/shader/gen/tile_alloc.spv b/piet-gpu/shader/gen/tile_alloc.spv deleted file mode 100644 index 25a362c..0000000 Binary files a/piet-gpu/shader/gen/tile_alloc.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil deleted file mode 100644 index 9427186..0000000 Binary files a/piet-gpu/shader/gen/transform_leaf.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/transform_leaf.hlsl b/piet-gpu/shader/gen/transform_leaf.hlsl deleted file mode 100644 index d3347a6..0000000 --- a/piet-gpu/shader/gen/transform_leaf.hlsl +++ /dev/null @@ -1,235 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Transform _225 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx }; - -RWByteAddressBuffer _71 : register(u0, space0); -ByteAddressBuffer _97 : register(t2, space0); -ByteAddressBuffer _279 : register(t1, space0); -ByteAddressBuffer _377 : register(t3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Transform sh_scratch[256]; - -Transform Transform_read(TransformRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _97.Load((ix + 0u) * 4 + 0); - uint raw1 = _97.Load((ix + 1u) * 4 + 0); - uint raw2 = _97.Load((ix + 2u) * 4 + 0); - uint raw3 = _97.Load((ix + 3u) * 4 + 0); - uint raw4 = _97.Load((ix + 4u) * 4 + 0); - uint raw5 = _97.Load((ix + 5u) * 4 + 0); - Transform s; - s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.translate = float2(asfloat(raw4), asfloat(raw5)); - return s; -} - -TransformRef Transform_index(TransformRef ref, uint index) -{ - TransformRef _85 = { ref.offset + (index * 24u) }; - return _85; -} - -Transform combine_monoid(Transform a, Transform b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -Transform monoid_identity() -{ - return _225; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _71.Store(offset * 4 + 12, val); -} - -void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.mat.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.mat.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.mat.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.mat.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.translate.x); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = asuint(s.translate.y); - write_mem(param_15, param_16, param_17); -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _286 = { _279.Load(88) + (ix * 24u) }; - TransformRef ref = _286; - TransformRef param = ref; - Transform agg = Transform_read(param); - Transform local[8]; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3); - agg = combine_monoid(param_4, param_5); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_6 = other; - Transform param_7 = agg; - agg = combine_monoid(param_6, param_7); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Transform row = monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - Transform _383; - _383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0)); - _383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16)); - row.mat = _383.mat; - row.translate = _383.translate; - } - if (gl_LocalInvocationID.x > 0u) - { - Transform param_8 = row; - Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_8, param_9); - } - Alloc param_12; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_10 = row; - Transform param_11 = local[i_2]; - Transform m = combine_monoid(param_10, param_11); - TransformSeg _423 = { m.mat, m.translate }; - TransformSeg transform = _423; - TransformSegRef _433 = { _279.Load(40) + ((ix + i_2) * 24u) }; - TransformSegRef trans_ref = _433; - Alloc _437; - _437.offset = _279.Load(40); - param_12.offset = _437.offset; - TransformSegRef param_13 = trans_ref; - TransformSeg param_14 = transform; - TransformSeg_write(param_12, param_13, param_14); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/transform_leaf.msl b/piet-gpu/shader/gen/transform_leaf.msl deleted file mode 100644 index 01fefd1..0000000 --- a/piet-gpu/shader/gen/transform_leaf.msl +++ /dev/null @@ -1,289 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Transform_1 -{ - float4 mat; - float2 translate; - char _m0_final_padding[8]; -}; - -struct ParentBuf -{ - Transform_1 parent[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_97) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_97.scene[ix + 0u]; - uint raw1 = v_97.scene[ix + 1u]; - uint raw2 = v_97.scene[ix + 2u]; - uint raw3 = v_97.scene[ix + 3u]; - uint raw4 = v_97.scene[ix + 4u]; - uint raw5 = v_97.scene[ix + 5u]; - Transform s; - s.mat = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.translate = float2(as_type(raw4), as_type(raw5)); - return s; -} - -static inline __attribute__((always_inline)) -TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index) -{ - return TransformRef{ ref.offset + (index * 24u) }; -} - -static inline __attribute__((always_inline)) -Transform combine_monoid(thread const Transform& a, thread const Transform& b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -static inline __attribute__((always_inline)) -Transform monoid_identity() -{ - return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_71) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_71.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref, thread const TransformSeg& s, device Memory& v_71) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.mat.x); - write_mem(param, param_1, param_2, v_71); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.mat.y); - write_mem(param_3, param_4, param_5, v_71); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.mat.z); - write_mem(param_6, param_7, param_8, v_71); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.mat.w); - write_mem(param_9, param_10, param_11, v_71); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.translate.x); - write_mem(param_12, param_13, param_14, v_71); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = as_type(s.translate.y); - write_mem(param_15, param_16, param_17, v_71); -} - -kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _279 [[buffer(1)]], const device SceneBuf& v_97 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Transform sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef ref = TransformRef{ _279.conf.trans_offset + (ix * 24u) }; - TransformRef param = ref; - Transform agg = Transform_read(param, v_97); - spvUnsafeArray local; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3, v_97); - agg = combine_monoid(param_4, param_5); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_6 = other; - Transform param_7 = agg; - agg = combine_monoid(param_6, param_7); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Transform row = monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - uint _380 = gl_WorkGroupID.x - 1u; - row.mat = _377.parent[_380].mat; - row.translate = _377.parent[_380].translate; - } - if (gl_LocalInvocationID.x > 0u) - { - Transform param_8 = row; - Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_8, param_9); - } - Alloc param_12; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_10 = row; - Transform param_11 = local[i_2]; - Transform m = combine_monoid(param_10, param_11); - TransformSeg transform = TransformSeg{ m.mat, m.translate }; - TransformSegRef trans_ref = TransformSegRef{ _279.conf.trans_alloc.offset + ((ix + i_2) * 24u) }; - param_12.offset = _279.conf.trans_alloc.offset; - TransformSegRef param_13 = trans_ref; - TransformSeg param_14 = transform; - TransformSeg_write(param_12, param_13, param_14, v_71); - } -} - diff --git a/piet-gpu/shader/gen/transform_leaf.spv b/piet-gpu/shader/gen/transform_leaf.spv deleted file mode 100644 index a0081bf..0000000 Binary files a/piet-gpu/shader/gen/transform_leaf.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/transform_reduce.dxil b/piet-gpu/shader/gen/transform_reduce.dxil deleted file mode 100644 index 6986f8f..0000000 Binary files a/piet-gpu/shader/gen/transform_reduce.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/transform_reduce.hlsl b/piet-gpu/shader/gen/transform_reduce.hlsl deleted file mode 100644 index 90ea55f..0000000 --- a/piet-gpu/shader/gen/transform_reduce.hlsl +++ /dev/null @@ -1,141 +0,0 @@ -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -ByteAddressBuffer _49 : register(t2, space0); -ByteAddressBuffer _161 : register(t1, space0); -RWByteAddressBuffer _250 : register(u3, space0); -RWByteAddressBuffer _266 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Transform sh_scratch[256]; - -Transform Transform_read(TransformRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _49.Load((ix + 0u) * 4 + 0); - uint raw1 = _49.Load((ix + 1u) * 4 + 0); - uint raw2 = _49.Load((ix + 2u) * 4 + 0); - uint raw3 = _49.Load((ix + 3u) * 4 + 0); - uint raw4 = _49.Load((ix + 4u) * 4 + 0); - uint raw5 = _49.Load((ix + 5u) * 4 + 0); - Transform s; - s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.translate = float2(asfloat(raw4), asfloat(raw5)); - return s; -} - -TransformRef Transform_index(TransformRef ref, uint index) -{ - TransformRef _37 = { ref.offset + (index * 24u) }; - return _37; -} - -Transform combine_monoid(Transform a, Transform b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _168 = { _161.Load(88) + (ix * 24u) }; - TransformRef ref = _168; - TransformRef param = ref; - Transform agg = Transform_read(param); - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3); - agg = combine_monoid(param_4, param_5); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Transform param_6 = agg; - Transform param_7 = other; - agg = combine_monoid(param_6, param_7); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _250.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat)); - _250.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate)); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/transform_reduce.msl b/piet-gpu/shader/gen/transform_reduce.msl deleted file mode 100644 index 6ae57e7..0000000 --- a/piet-gpu/shader/gen/transform_reduce.msl +++ /dev/null @@ -1,155 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Transform_1 -{ - float4 mat; - float2 translate; - char _m0_final_padding[8]; -}; - -struct OutBuf -{ - Transform_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_49.scene[ix + 0u]; - uint raw1 = v_49.scene[ix + 1u]; - uint raw2 = v_49.scene[ix + 2u]; - uint raw3 = v_49.scene[ix + 3u]; - uint raw4 = v_49.scene[ix + 4u]; - uint raw5 = v_49.scene[ix + 5u]; - Transform s; - s.mat = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.translate = float2(as_type(raw4), as_type(raw5)); - return s; -} - -static inline __attribute__((always_inline)) -TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index) -{ - return TransformRef{ ref.offset + (index * 24u) }; -} - -static inline __attribute__((always_inline)) -Transform combine_monoid(thread const Transform& a, thread const Transform& b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _250 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Transform sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) }; - TransformRef param = ref; - Transform agg = Transform_read(param, v_49); - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3, v_49); - agg = combine_monoid(param_4, param_5); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Transform param_6 = agg; - Transform param_7 = other; - agg = combine_monoid(param_6, param_7); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _250.outbuf[gl_WorkGroupID.x].mat = agg.mat; - _250.outbuf[gl_WorkGroupID.x].translate = agg.translate; - } -} - diff --git a/piet-gpu/shader/gen/transform_reduce.spv b/piet-gpu/shader/gen/transform_reduce.spv deleted file mode 100644 index fc8e58a..0000000 Binary files a/piet-gpu/shader/gen/transform_reduce.spv and /dev/null differ diff --git a/piet-gpu/shader/gen/transform_root.dxil b/piet-gpu/shader/gen/transform_root.dxil deleted file mode 100644 index 5b4f059..0000000 Binary files a/piet-gpu/shader/gen/transform_root.dxil and /dev/null differ diff --git a/piet-gpu/shader/gen/transform_root.hlsl b/piet-gpu/shader/gen/transform_root.hlsl deleted file mode 100644 index d447db6..0000000 --- a/piet-gpu/shader/gen/transform_root.hlsl +++ /dev/null @@ -1,94 +0,0 @@ -struct Transform -{ - float4 mat; - float2 translate; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx }; - -RWByteAddressBuffer _89 : register(u0, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Transform sh_scratch[256]; - -Transform combine_monoid(Transform a, Transform b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -Transform monoid_identity() -{ - return _23; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Transform _93; - _93.mat = asfloat(_89.Load4(ix * 32 + 0)); - _93.translate = asfloat(_89.Load2(ix * 32 + 16)); - Transform local[8]; - local[0].mat = _93.mat; - local[0].translate = _93.translate; - Transform param_1; - for (uint i = 1u; i < 8u; i++) - { - Transform param = local[i - 1u]; - Transform _119; - _119.mat = asfloat(_89.Load4((ix + i) * 32 + 0)); - _119.translate = asfloat(_89.Load2((ix + i) * 32 + 16)); - param_1.mat = _119.mat; - param_1.translate = _119.translate; - local[i] = combine_monoid(param, param_1); - } - Transform agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_2 = other; - Transform param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Transform row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_4 = row; - Transform param_5 = local[i_2]; - Transform m = combine_monoid(param_4, param_5); - uint _208 = ix + i_2; - _89.Store4(_208 * 32 + 0, asuint(m.mat)); - _89.Store2(_208 * 32 + 16, asuint(m.translate)); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/transform_root.msl b/piet-gpu/shader/gen/transform_root.msl deleted file mode 100644 index 8b4b2a1..0000000 --- a/piet-gpu/shader/gen/transform_root.msl +++ /dev/null @@ -1,129 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct Transform_1 -{ - float4 mat; - float2 translate; - char _m0_final_padding[8]; -}; - -struct DataBuf -{ - Transform_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Transform combine_monoid(thread const Transform& a, thread const Transform& b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -static inline __attribute__((always_inline)) -Transform monoid_identity() -{ - return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) }; -} - -kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup Transform sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].mat = _89.data[ix].mat; - local[0].translate = _89.data[ix].translate; - Transform param_1; - for (uint i = 1u; i < 8u; i++) - { - uint _113 = ix + i; - Transform param = local[i - 1u]; - param_1.mat = _89.data[_113].mat; - param_1.translate = _89.data[_113].translate; - local[i] = combine_monoid(param, param_1); - } - Transform agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_2 = other; - Transform param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Transform row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_4 = row; - Transform param_5 = local[i_2]; - Transform m = combine_monoid(param_4, param_5); - uint _208 = ix + i_2; - _89.data[_208].mat = m.mat; - _89.data[_208].translate = m.translate; - } -} - diff --git a/piet-gpu/shader/gen/transform_root.spv b/piet-gpu/shader/gen/transform_root.spv deleted file mode 100644 index 1578842..0000000 Binary files a/piet-gpu/shader/gen/transform_root.spv and /dev/null differ diff --git a/tests/shader/gen/clear.dxil b/tests/shader/gen/clear.dxil deleted file mode 100644 index a79182a..0000000 Binary files a/tests/shader/gen/clear.dxil and /dev/null differ diff --git a/tests/shader/gen/clear.hlsl b/tests/shader/gen/clear.hlsl deleted file mode 100644 index f6a576c..0000000 --- a/tests/shader/gen/clear.hlsl +++ /dev/null @@ -1,26 +0,0 @@ -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -ByteAddressBuffer _19 : register(t0); -RWByteAddressBuffer _32 : register(u1); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _19.Load(0)) - { - _32.Store(ix * 4 + 0, _19.Load(4)); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/clear.msl b/tests/shader/gen/clear.msl deleted file mode 100644 index d89853b..0000000 --- a/tests/shader/gen/clear.msl +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -using namespace metal; - -struct ConfigBuf -{ - uint size; - uint value; -}; - -struct TargetBuf -{ - uint data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _19.size) - { - _32.data[ix] = _19.value; - } -} - diff --git a/tests/shader/gen/clear.spv b/tests/shader/gen/clear.spv deleted file mode 100644 index 0e8d1d7..0000000 Binary files a/tests/shader/gen/clear.spv and /dev/null differ diff --git a/tests/shader/gen/linkedlist.dxil b/tests/shader/gen/linkedlist.dxil deleted file mode 100644 index 231f0f6..0000000 Binary files a/tests/shader/gen/linkedlist.dxil and /dev/null differ diff --git a/tests/shader/gen/linkedlist.hlsl b/tests/shader/gen/linkedlist.hlsl deleted file mode 100644 index 614791a..0000000 --- a/tests/shader/gen/linkedlist.hlsl +++ /dev/null @@ -1,39 +0,0 @@ -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _56 : register(u0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -void comp_main() -{ - uint rng = gl_GlobalInvocationID.x + 1u; - for (uint i = 0u; i < 100u; i++) - { - rng ^= (rng << uint(13)); - rng ^= (rng >> uint(17)); - rng ^= (rng << uint(5)); - uint bucket = rng % 65536u; - if (bucket != 0u) - { - uint _61; - _56.InterlockedAdd(0, 2u, _61); - uint alloc = _61 + 65536u; - uint _67; - _56.InterlockedExchange(bucket * 4 + 0, alloc, _67); - uint old = _67; - _56.Store(alloc * 4 + 0, old); - _56.Store((alloc + 1u) * 4 + 0, gl_GlobalInvocationID.x); - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/linkedlist.msl b/tests/shader/gen/linkedlist.msl deleted file mode 100644 index 0461d79..0000000 --- a/tests/shader/gen/linkedlist.msl +++ /dev/null @@ -1,36 +0,0 @@ -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct MemBuf -{ - uint mem[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -kernel void main0(device MemBuf& _56 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint rng = gl_GlobalInvocationID.x + 1u; - for (uint i = 0u; i < 100u; i++) - { - rng ^= (rng << uint(13)); - rng ^= (rng >> uint(17)); - rng ^= (rng << uint(5)); - uint bucket = rng % 65536u; - if (bucket != 0u) - { - uint _61 = atomic_fetch_add_explicit((device atomic_uint*)&_56.mem[0], 2u, memory_order_relaxed); - uint alloc = _61 + 65536u; - uint _67 = atomic_exchange_explicit((device atomic_uint*)&_56.mem[bucket], alloc, memory_order_relaxed); - uint old = _67; - _56.mem[alloc] = old; - _56.mem[alloc + 1u] = gl_GlobalInvocationID.x; - } - } -} - diff --git a/tests/shader/gen/linkedlist.spv b/tests/shader/gen/linkedlist.spv deleted file mode 100644 index a723283..0000000 Binary files a/tests/shader/gen/linkedlist.spv and /dev/null differ diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil deleted file mode 100644 index 2be73da..0000000 Binary files a/tests/shader/gen/message_passing.dxil and /dev/null differ diff --git a/tests/shader/gen/message_passing.hlsl b/tests/shader/gen/message_passing.hlsl deleted file mode 100644 index ba8ce5f..0000000 --- a/tests/shader/gen/message_passing.hlsl +++ /dev/null @@ -1,54 +0,0 @@ -struct Element -{ - uint data; - uint flag; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer data_buf : register(u0); -RWByteAddressBuffer control_buf : register(u1); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -uint permute_flag_ix(uint data_ix) -{ - return (data_ix * 419u) & 65535u; -} - -void comp_main() -{ - uint _76; - data_buf.InterlockedExchange(gl_GlobalInvocationID.x * 8 + 0, 1u, _76); - DeviceMemoryBarrier(); - uint param = gl_GlobalInvocationID.x; - uint write_flag_ix = permute_flag_ix(param); - uint _77; - data_buf.InterlockedExchange(write_flag_ix * 8 + 4, 1u, _77); - uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; - uint param_1 = read_ix; - uint read_flag_ix = permute_flag_ix(param_1); - uint _58; - data_buf.InterlockedAdd(read_flag_ix * 8 + 4, 0, _58); - uint flag = _58; - DeviceMemoryBarrier(); - uint _62; - data_buf.InterlockedAdd(read_ix * 8 + 0, 0, _62); - uint data = _62; - if (flag > data) - { - uint _73; - control_buf.InterlockedAdd(0, 1u, _73); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl deleted file mode 100644 index e48f48a..0000000 --- a/tests/shader/gen/message_passing.msl +++ /dev/null @@ -1,54 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Element -{ - uint data; - uint flag; -}; - -struct DataBuf -{ - Element data[1]; -}; - -struct ControlBuf -{ - uint failures; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -uint permute_flag_ix(thread const uint& data_ix) -{ - return (data_ix * 419u) & 65535u; -} - -kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed); - threadgroup_barrier(mem_flags::mem_device); - uint param = gl_GlobalInvocationID.x; - uint write_flag_ix = permute_flag_ix(param); - atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed); - uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; - uint param_1 = read_ix; - uint read_flag_ix = permute_flag_ix(param_1); - uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed); - uint flag = _58; - threadgroup_barrier(mem_flags::mem_device); - uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed); - uint data = _62; - if (flag > data) - { - uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed); - } -} - diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv deleted file mode 100644 index e5f56d6..0000000 Binary files a/tests/shader/gen/message_passing.spv and /dev/null differ diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv deleted file mode 100644 index 8527c2b..0000000 Binary files a/tests/shader/gen/message_passing_vkmm.spv and /dev/null differ diff --git a/tests/shader/gen/prefix.dxil b/tests/shader/gen/prefix.dxil deleted file mode 100644 index 73f1ba1..0000000 Binary files a/tests/shader/gen/prefix.dxil and /dev/null differ diff --git a/tests/shader/gen/prefix.hlsl b/tests/shader/gen/prefix.hlsl deleted file mode 100644 index 72cfa90..0000000 --- a/tests/shader/gen/prefix.hlsl +++ /dev/null @@ -1,225 +0,0 @@ -struct Monoid -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid aggregate; - Monoid prefix; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _185 = { 0u }; - -globallycoherent RWByteAddressBuffer _43 : register(u2); -ByteAddressBuffer _67 : register(t0); -RWByteAddressBuffer _372 : register(u1); - -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -groupshared uint sh_part_ix; -groupshared Monoid sh_scratch[512]; -groupshared uint sh_flag; -groupshared Monoid sh_prefix; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - if (gl_LocalInvocationID.x == 0u) - { - uint _47; - _43.InterlockedAdd(0, 1u, _47); - sh_part_ix = _47; - } - GroupMemoryBarrierWithGroupSync(); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - Monoid _71; - _71.element = _67.Load(ix * 4 + 0); - Monoid local[16]; - local[0].element = _71.element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - Monoid _94; - _94.element = _67.Load((ix + i) * 4 + 0); - param_1.element = _94.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.Store(part_ix * 12 + 8, agg.element); - if (part_ix == 0u) - { - _43.Store(12, agg.element); - } - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - _43.Store(part_ix * 12 + 4, flag); - } - Monoid exclusive = _185; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - sh_flag = _43.Load(look_back_ix * 12 + 4); - } - GroupMemoryBarrierWithGroupSync(); - DeviceMemoryBarrier(); - uint flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _223; - _223.element = _43.Load(look_back_ix * 12 + 12); - their_prefix.element = _223.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _245; - _245.element = _43.Load(look_back_ix * 12 + 8); - their_agg.element = _245.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid _267; - _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _267.element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - GroupMemoryBarrierWithGroupSync(); - flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.Store(part_ix * 12 + 12, inclusive_prefix.element); - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - _43.Store(part_ix * 12 + 4, 2u); - } - } - GroupMemoryBarrierWithGroupSync(); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.Store((ix + i_2) * 4 + 0, m_1.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix.msl b/tests/shader/gen/prefix.msl deleted file mode 100644 index 24bee60..0000000 --- a/tests/shader/gen/prefix.msl +++ /dev/null @@ -1,264 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid_1 aggregate; - Monoid_1 prefix; -}; - -struct StateBuf -{ - uint part_counter; - State state[1]; -}; - -struct InBuf -{ - Monoid_1 inbuf[1]; -}; - -struct OutBuf -{ - Monoid_1 outbuf[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_part_ix; - threadgroup Monoid sh_scratch[512]; - threadgroup uint sh_flag; - threadgroup Monoid sh_prefix; - if (gl_LocalInvocationID.x == 0u) - { - uint _47 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_43.part_counter, 1u, memory_order_relaxed); - sh_part_ix = _47; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - spvUnsafeArray local; - local[0].element = _67.inbuf[ix].element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _67.inbuf[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.state[part_ix].aggregate.element = agg.element; - if (part_ix == 0u) - { - _43.state[0].prefix.element = agg.element; - } - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - _43.state[part_ix].flag = flag; - } - Monoid exclusive = Monoid{ 0u }; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - sh_flag = _43.state[look_back_ix].flag; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - threadgroup_barrier(mem_flags::mem_device); - uint flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_prefix.element = _43.state[look_back_ix].prefix.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_agg.element = _43.state[look_back_ix].aggregate.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - m.element = _67.inbuf[(look_back_ix * 8192u) + their_ix].element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.state[part_ix].prefix.element = inclusive_prefix.element; - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - _43.state[part_ix].flag = 2u; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.outbuf[ix + i_2].element = m_1.element; - } -} - diff --git a/tests/shader/gen/prefix.spv b/tests/shader/gen/prefix.spv deleted file mode 100644 index 8e7db4a..0000000 Binary files a/tests/shader/gen/prefix.spv and /dev/null differ diff --git a/tests/shader/gen/prefix_atomic.dxil b/tests/shader/gen/prefix_atomic.dxil deleted file mode 100644 index 45a7dd8..0000000 Binary files a/tests/shader/gen/prefix_atomic.dxil and /dev/null differ diff --git a/tests/shader/gen/prefix_atomic.hlsl b/tests/shader/gen/prefix_atomic.hlsl deleted file mode 100644 index a75448f..0000000 --- a/tests/shader/gen/prefix_atomic.hlsl +++ /dev/null @@ -1,229 +0,0 @@ -struct Monoid -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid aggregate; - Monoid prefix; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _185 = { 0u }; - -globallycoherent RWByteAddressBuffer _43 : register(u2); -ByteAddressBuffer _67 : register(t0); -RWByteAddressBuffer _372 : register(u1); - -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -groupshared uint sh_part_ix; -groupshared Monoid sh_scratch[512]; -groupshared uint sh_flag; -groupshared Monoid sh_prefix; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - if (gl_LocalInvocationID.x == 0u) - { - uint _47; - _43.InterlockedAdd(0, 1u, _47); - sh_part_ix = _47; - } - GroupMemoryBarrierWithGroupSync(); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - Monoid _71; - _71.element = _67.Load(ix * 4 + 0); - Monoid local[16]; - local[0].element = _71.element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - Monoid _94; - _94.element = _67.Load((ix + i) * 4 + 0); - param_1.element = _94.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.Store(part_ix * 12 + 8, agg.element); - if (part_ix == 0u) - { - _43.Store(12, agg.element); - } - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - uint _383; - _43.InterlockedExchange(part_ix * 12 + 4, flag, _383); - } - Monoid exclusive = _185; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - uint _208; - _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _208); - sh_flag = _208; - } - GroupMemoryBarrierWithGroupSync(); - DeviceMemoryBarrier(); - uint flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _223; - _223.element = _43.Load(look_back_ix * 12 + 12); - their_prefix.element = _223.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _245; - _245.element = _43.Load(look_back_ix * 12 + 8); - their_agg.element = _245.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid _267; - _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _267.element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - GroupMemoryBarrierWithGroupSync(); - flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.Store(part_ix * 12 + 12, inclusive_prefix.element); - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - uint _384; - _43.InterlockedExchange(part_ix * 12 + 4, 2u, _384); - } - } - GroupMemoryBarrierWithGroupSync(); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.Store((ix + i_2) * 4 + 0, m_1.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_atomic.msl b/tests/shader/gen/prefix_atomic.msl deleted file mode 100644 index 910e842..0000000 --- a/tests/shader/gen/prefix_atomic.msl +++ /dev/null @@ -1,265 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid_1 aggregate; - Monoid_1 prefix; -}; - -struct StateBuf -{ - uint part_counter; - State state[1]; -}; - -struct InBuf -{ - Monoid_1 inbuf[1]; -}; - -struct OutBuf -{ - Monoid_1 outbuf[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_part_ix; - threadgroup Monoid sh_scratch[512]; - threadgroup uint sh_flag; - threadgroup Monoid sh_prefix; - if (gl_LocalInvocationID.x == 0u) - { - uint _47 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_43.part_counter, 1u, memory_order_relaxed); - sh_part_ix = _47; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - spvUnsafeArray local; - local[0].element = _67.inbuf[ix].element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _67.inbuf[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.state[part_ix].aggregate.element = agg.element; - if (part_ix == 0u) - { - _43.state[0].prefix.element = agg.element; - } - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].flag, flag, memory_order_relaxed); - } - Monoid exclusive = Monoid{ 0u }; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - uint _208 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed); - sh_flag = _208; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - threadgroup_barrier(mem_flags::mem_device); - uint flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_prefix.element = _43.state[look_back_ix].prefix.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_agg.element = _43.state[look_back_ix].aggregate.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - m.element = _67.inbuf[(look_back_ix * 8192u) + their_ix].element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.state[part_ix].prefix.element = inclusive_prefix.element; - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].flag, 2u, memory_order_relaxed); - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.outbuf[ix + i_2].element = m_1.element; - } -} - diff --git a/tests/shader/gen/prefix_atomic.spv b/tests/shader/gen/prefix_atomic.spv deleted file mode 100644 index d7dac5b..0000000 Binary files a/tests/shader/gen/prefix_atomic.spv and /dev/null differ diff --git a/tests/shader/gen/prefix_reduce.dxil b/tests/shader/gen/prefix_reduce.dxil deleted file mode 100644 index 0ee28e8..0000000 Binary files a/tests/shader/gen/prefix_reduce.dxil and /dev/null differ diff --git a/tests/shader/gen/prefix_reduce.hlsl b/tests/shader/gen/prefix_reduce.hlsl deleted file mode 100644 index f2de539..0000000 --- a/tests/shader/gen/prefix_reduce.hlsl +++ /dev/null @@ -1,72 +0,0 @@ -struct Monoid -{ - uint element; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -ByteAddressBuffer _40 : register(t0); -RWByteAddressBuffer _127 : register(u1); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Monoid sh_scratch[512]; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid _44; - _44.element = _40.Load(ix * 4 + 0); - Monoid agg; - agg.element = _44.element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = agg; - Monoid _64; - _64.element = _40.Load((ix + i) * 4 + 0); - param_1.element = _64.element; - agg = combine_monoid(param, param_1); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Monoid param_2 = agg; - Monoid param_3 = other; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _127.Store(gl_WorkGroupID.x * 4 + 0, agg.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_reduce.msl b/tests/shader/gen/prefix_reduce.msl deleted file mode 100644 index 3a3125d..0000000 --- a/tests/shader/gen/prefix_reduce.msl +++ /dev/null @@ -1,68 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct InBuf -{ - Monoid_1 inbuf[1]; -}; - -struct OutBuf -{ - Monoid_1 outbuf[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _127 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Monoid sh_scratch[512]; - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid agg; - agg.element = _40.inbuf[ix].element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = agg; - param_1.element = _40.inbuf[ix + i].element; - agg = combine_monoid(param, param_1); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Monoid param_2 = agg; - Monoid param_3 = other; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _127.outbuf[gl_WorkGroupID.x].element = agg.element; - } -} - diff --git a/tests/shader/gen/prefix_reduce.spv b/tests/shader/gen/prefix_reduce.spv deleted file mode 100644 index b2e35fc..0000000 Binary files a/tests/shader/gen/prefix_reduce.spv and /dev/null differ diff --git a/tests/shader/gen/prefix_root.dxil b/tests/shader/gen/prefix_root.dxil deleted file mode 100644 index 03fe2d1..0000000 Binary files a/tests/shader/gen/prefix_root.dxil and /dev/null differ diff --git a/tests/shader/gen/prefix_root.hlsl b/tests/shader/gen/prefix_root.hlsl deleted file mode 100644 index adf6bf8..0000000 --- a/tests/shader/gen/prefix_root.hlsl +++ /dev/null @@ -1,80 +0,0 @@ -struct Monoid -{ - uint element; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _131 = { 0u }; - -RWByteAddressBuffer _42 : register(u0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Monoid sh_scratch[512]; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid _46; - _46.element = _42.Load(ix * 4 + 0); - Monoid local[8]; - local[0].element = _46.element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - Monoid _71; - _71.element = _42.Load((ix + i) * 4 + 0); - param_1.element = _71.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Monoid row = _131; - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_4 = row; - Monoid param_5 = local[i_2]; - Monoid m = combine_monoid(param_4, param_5); - _42.Store((ix + i_2) * 4 + 0, m.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_root.msl b/tests/shader/gen/prefix_root.msl deleted file mode 100644 index 897a6a4..0000000 --- a/tests/shader/gen/prefix_root.msl +++ /dev/null @@ -1,112 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct DataBuf -{ - Monoid_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(device DataBuf& _42 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup Monoid sh_scratch[512]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].element = _42.data[ix].element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _42.data[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Monoid row = Monoid{ 0u }; - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_4 = row; - Monoid param_5 = local[i_2]; - Monoid m = combine_monoid(param_4, param_5); - _42.data[ix + i_2].element = m.element; - } -} - diff --git a/tests/shader/gen/prefix_root.spv b/tests/shader/gen/prefix_root.spv deleted file mode 100644 index 3e04224..0000000 Binary files a/tests/shader/gen/prefix_root.spv and /dev/null differ diff --git a/tests/shader/gen/prefix_scan.dxil b/tests/shader/gen/prefix_scan.dxil deleted file mode 100644 index 427f14d..0000000 Binary files a/tests/shader/gen/prefix_scan.dxil and /dev/null differ diff --git a/tests/shader/gen/prefix_scan.hlsl b/tests/shader/gen/prefix_scan.hlsl deleted file mode 100644 index d9e74ea..0000000 --- a/tests/shader/gen/prefix_scan.hlsl +++ /dev/null @@ -1,92 +0,0 @@ -struct Monoid -{ - uint element; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _131 = { 0u }; - -RWByteAddressBuffer _42 : register(u0); -ByteAddressBuffer _141 : register(t1); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Monoid sh_scratch[512]; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid _46; - _46.element = _42.Load(ix * 4 + 0); - Monoid local[8]; - local[0].element = _46.element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - Monoid _71; - _71.element = _42.Load((ix + i) * 4 + 0); - param_1.element = _71.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Monoid row = _131; - if (gl_WorkGroupID.x > 0u) - { - Monoid _146; - _146.element = _141.Load((gl_WorkGroupID.x - 1u) * 4 + 0); - row.element = _146.element; - } - if (gl_LocalInvocationID.x > 0u) - { - Monoid param_4 = row; - Monoid param_5 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_4, param_5); - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_6 = row; - Monoid param_7 = local[i_2]; - Monoid m = combine_monoid(param_6, param_7); - _42.Store((ix + i_2) * 4 + 0, m.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_scan.msl b/tests/shader/gen/prefix_scan.msl deleted file mode 100644 index 5be4e65..0000000 --- a/tests/shader/gen/prefix_scan.msl +++ /dev/null @@ -1,123 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct DataBuf -{ - Monoid_1 data[1]; -}; - -struct ParentBuf -{ - Monoid_1 parent[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _141 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Monoid sh_scratch[512]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].element = _42.data[ix].element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _42.data[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Monoid row = Monoid{ 0u }; - if (gl_WorkGroupID.x > 0u) - { - row.element = _141.parent[gl_WorkGroupID.x - 1u].element; - } - if (gl_LocalInvocationID.x > 0u) - { - Monoid param_4 = row; - Monoid param_5 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_4, param_5); - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_6 = row; - Monoid param_7 = local[i_2]; - Monoid m = combine_monoid(param_6, param_7); - _42.data[ix + i_2].element = m.element; - } -} - diff --git a/tests/shader/gen/prefix_scan.spv b/tests/shader/gen/prefix_scan.spv deleted file mode 100644 index 6d8fe0a..0000000 Binary files a/tests/shader/gen/prefix_scan.spv and /dev/null differ diff --git a/tests/shader/gen/prefix_vkmm.spv b/tests/shader/gen/prefix_vkmm.spv deleted file mode 100644 index cef3965..0000000 Binary files a/tests/shader/gen/prefix_vkmm.spv and /dev/null differ