From 8de34f8728b2dc2854f6d6cca90a100f630d00f0 Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Thu, 14 Jul 2022 14:57:17 -0400 Subject: [PATCH] remove shader gen directories --- piet-gpu-hal/examples/shader/gen/collatz.dxil | Bin 3136 -> 0 bytes piet-gpu-hal/examples/shader/gen/collatz.hlsl | 62 - piet-gpu-hal/examples/shader/gen/collatz.msl | 48 - piet-gpu-hal/examples/shader/gen/collatz.spv | Bin 1616 -> 0 bytes piet-gpu/shader/gen/backdrop.dxil | Bin 4692 -> 0 bytes piet-gpu/shader/gen/backdrop.hlsl | 257 ---- piet-gpu/shader/gen/backdrop.msl | 263 ---- piet-gpu/shader/gen/backdrop.spv | Bin 11832 -> 0 bytes piet-gpu/shader/gen/backdrop_lg.dxil | Bin 4696 -> 0 bytes piet-gpu/shader/gen/backdrop_lg.hlsl | 257 ---- piet-gpu/shader/gen/backdrop_lg.msl | 263 ---- piet-gpu/shader/gen/backdrop_lg.spv | Bin 11864 -> 0 bytes piet-gpu/shader/gen/bbox_clear.dxil | Bin 3160 -> 0 bytes piet-gpu/shader/gen/bbox_clear.hlsl | 67 - piet-gpu/shader/gen/bbox_clear.msl | 70 - piet-gpu/shader/gen/bbox_clear.spv | Bin 3328 -> 0 bytes piet-gpu/shader/gen/binning.dxil | Bin 5980 -> 0 bytes piet-gpu/shader/gen/binning.hlsl | 274 ---- piet-gpu/shader/gen/binning.msl | 282 ---- piet-gpu/shader/gen/binning.spv | Bin 16368 -> 0 bytes piet-gpu/shader/gen/clip_leaf.dxil | Bin 7228 -> 0 bytes piet-gpu/shader/gen/clip_leaf.hlsl | 372 ----- piet-gpu/shader/gen/clip_leaf.msl | 372 ----- piet-gpu/shader/gen/clip_leaf.spv | Bin 19356 -> 0 bytes piet-gpu/shader/gen/clip_reduce.dxil | Bin 4628 -> 0 bytes piet-gpu/shader/gen/clip_reduce.hlsl | 182 --- piet-gpu/shader/gen/clip_reduce.msl | 179 --- piet-gpu/shader/gen/clip_reduce.spv | Bin 9812 -> 0 bytes piet-gpu/shader/gen/coarse.dxil | Bin 11724 -> 0 bytes piet-gpu/shader/gen/coarse.hlsl | 1246 --------------- piet-gpu/shader/gen/coarse.msl | 1261 --------------- piet-gpu/shader/gen/coarse.spv | Bin 60088 -> 0 bytes piet-gpu/shader/gen/draw_leaf.dxil | Bin 6768 -> 0 bytes piet-gpu/shader/gen/draw_leaf.hlsl | 269 ---- piet-gpu/shader/gen/draw_leaf.msl | 318 ---- piet-gpu/shader/gen/draw_leaf.spv | Bin 20204 -> 0 bytes piet-gpu/shader/gen/draw_reduce.dxil | Bin 4260 -> 0 bytes piet-gpu/shader/gen/draw_reduce.hlsl | 127 -- piet-gpu/shader/gen/draw_reduce.msl | 142 -- piet-gpu/shader/gen/draw_reduce.spv | Bin 7240 -> 0 bytes piet-gpu/shader/gen/draw_root.dxil | Bin 4468 -> 0 bytes piet-gpu/shader/gen/draw_root.hlsl | 108 -- piet-gpu/shader/gen/draw_root.msl | 140 -- piet-gpu/shader/gen/draw_root.spv | Bin 5440 -> 0 bytes piet-gpu/shader/gen/kernel4.dxil | Bin 14524 -> 0 bytes piet-gpu/shader/gen/kernel4.hlsl | 1304 ---------------- piet-gpu/shader/gen/kernel4.msl | 1355 ----------------- piet-gpu/shader/gen/kernel4.spv | Bin 66368 -> 0 bytes piet-gpu/shader/gen/kernel4_gray.dxil | Bin 14600 -> 0 bytes piet-gpu/shader/gen/kernel4_gray.hlsl | 1303 ---------------- piet-gpu/shader/gen/kernel4_gray.msl | 1354 ---------------- piet-gpu/shader/gen/kernel4_gray.spv | Bin 66124 -> 0 bytes piet-gpu/shader/gen/path_coarse.dxil | Bin 7012 -> 0 bytes piet-gpu/shader/gen/path_coarse.hlsl | 673 -------- piet-gpu/shader/gen/path_coarse.msl | 718 --------- piet-gpu/shader/gen/path_coarse.spv | Bin 39708 -> 0 bytes piet-gpu/shader/gen/pathseg.dxil | Bin 9596 -> 0 bytes piet-gpu/shader/gen/pathseg.hlsl | 662 -------- piet-gpu/shader/gen/pathseg.msl | 719 --------- piet-gpu/shader/gen/pathseg.spv | Bin 35296 -> 0 bytes piet-gpu/shader/gen/pathtag_reduce.dxil | Bin 4644 -> 0 bytes piet-gpu/shader/gen/pathtag_reduce.hlsl | 139 -- piet-gpu/shader/gen/pathtag_reduce.msl | 156 -- piet-gpu/shader/gen/pathtag_reduce.spv | Bin 8400 -> 0 bytes piet-gpu/shader/gen/pathtag_root.dxil | Bin 4716 -> 0 bytes piet-gpu/shader/gen/pathtag_root.hlsl | 115 -- piet-gpu/shader/gen/pathtag_root.msl | 146 -- piet-gpu/shader/gen/pathtag_root.spv | Bin 5836 -> 0 bytes piet-gpu/shader/gen/tile_alloc.dxil | Bin 4904 -> 0 bytes piet-gpu/shader/gen/tile_alloc.hlsl | 236 --- piet-gpu/shader/gen/tile_alloc.msl | 247 --- piet-gpu/shader/gen/tile_alloc.spv | Bin 12352 -> 0 bytes piet-gpu/shader/gen/transform_leaf.dxil | Bin 5668 -> 0 bytes piet-gpu/shader/gen/transform_leaf.hlsl | 235 --- piet-gpu/shader/gen/transform_leaf.msl | 289 ---- piet-gpu/shader/gen/transform_leaf.spv | Bin 13088 -> 0 bytes piet-gpu/shader/gen/transform_reduce.dxil | Bin 4700 -> 0 bytes piet-gpu/shader/gen/transform_reduce.hlsl | 141 -- piet-gpu/shader/gen/transform_reduce.msl | 155 -- piet-gpu/shader/gen/transform_reduce.spv | Bin 8424 -> 0 bytes piet-gpu/shader/gen/transform_root.dxil | Bin 4824 -> 0 bytes piet-gpu/shader/gen/transform_root.hlsl | 94 -- piet-gpu/shader/gen/transform_root.msl | 129 -- piet-gpu/shader/gen/transform_root.spv | Bin 5336 -> 0 bytes tests/shader/gen/clear.dxil | Bin 3076 -> 0 bytes tests/shader/gen/clear.hlsl | 26 - tests/shader/gen/clear.msl | 27 - tests/shader/gen/clear.spv | Bin 1212 -> 0 bytes tests/shader/gen/linkedlist.dxil | Bin 3024 -> 0 bytes tests/shader/gen/linkedlist.hlsl | 39 - tests/shader/gen/linkedlist.msl | 36 - tests/shader/gen/linkedlist.spv | Bin 1936 -> 0 bytes tests/shader/gen/message_passing.dxil | Bin 3116 -> 0 bytes tests/shader/gen/message_passing.hlsl | 54 - tests/shader/gen/message_passing.msl | 54 - tests/shader/gen/message_passing.spv | Bin 2196 -> 0 bytes tests/shader/gen/message_passing_vkmm.spv | Bin 2300 -> 0 bytes tests/shader/gen/prefix.dxil | Bin 4876 -> 0 bytes tests/shader/gen/prefix.hlsl | 225 --- tests/shader/gen/prefix.msl | 264 ---- tests/shader/gen/prefix.spv | Bin 9828 -> 0 bytes tests/shader/gen/prefix_atomic.dxil | Bin 4884 -> 0 bytes tests/shader/gen/prefix_atomic.hlsl | 229 --- tests/shader/gen/prefix_atomic.msl | 265 ---- tests/shader/gen/prefix_atomic.spv | Bin 9852 -> 0 bytes tests/shader/gen/prefix_reduce.dxil | Bin 3764 -> 0 bytes tests/shader/gen/prefix_reduce.hlsl | 72 - tests/shader/gen/prefix_reduce.msl | 68 - tests/shader/gen/prefix_reduce.spv | Bin 3472 -> 0 bytes tests/shader/gen/prefix_root.dxil | Bin 3888 -> 0 bytes tests/shader/gen/prefix_root.hlsl | 80 - tests/shader/gen/prefix_root.msl | 112 -- tests/shader/gen/prefix_root.spv | Bin 4072 -> 0 bytes tests/shader/gen/prefix_scan.dxil | Bin 4168 -> 0 bytes tests/shader/gen/prefix_scan.hlsl | 92 -- tests/shader/gen/prefix_scan.msl | 123 -- tests/shader/gen/prefix_scan.spv | Bin 4720 -> 0 bytes tests/shader/gen/prefix_vkmm.spv | Bin 10016 -> 0 bytes 118 files changed, 18495 deletions(-) delete mode 100644 piet-gpu-hal/examples/shader/gen/collatz.dxil delete mode 100644 piet-gpu-hal/examples/shader/gen/collatz.hlsl delete mode 100644 piet-gpu-hal/examples/shader/gen/collatz.msl delete mode 100644 piet-gpu-hal/examples/shader/gen/collatz.spv delete mode 100644 piet-gpu/shader/gen/backdrop.dxil delete mode 100644 piet-gpu/shader/gen/backdrop.hlsl delete mode 100644 piet-gpu/shader/gen/backdrop.msl delete mode 100644 piet-gpu/shader/gen/backdrop.spv delete mode 100644 piet-gpu/shader/gen/backdrop_lg.dxil delete mode 100644 piet-gpu/shader/gen/backdrop_lg.hlsl delete mode 100644 piet-gpu/shader/gen/backdrop_lg.msl delete mode 100644 piet-gpu/shader/gen/backdrop_lg.spv delete mode 100644 piet-gpu/shader/gen/bbox_clear.dxil delete mode 100644 piet-gpu/shader/gen/bbox_clear.hlsl delete mode 100644 piet-gpu/shader/gen/bbox_clear.msl delete mode 100644 piet-gpu/shader/gen/bbox_clear.spv delete mode 100644 piet-gpu/shader/gen/binning.dxil delete mode 100644 piet-gpu/shader/gen/binning.hlsl delete mode 100644 piet-gpu/shader/gen/binning.msl delete mode 100644 piet-gpu/shader/gen/binning.spv delete mode 100644 piet-gpu/shader/gen/clip_leaf.dxil delete mode 100644 piet-gpu/shader/gen/clip_leaf.hlsl delete mode 100644 piet-gpu/shader/gen/clip_leaf.msl delete mode 100644 piet-gpu/shader/gen/clip_leaf.spv delete mode 100644 piet-gpu/shader/gen/clip_reduce.dxil delete mode 100644 piet-gpu/shader/gen/clip_reduce.hlsl delete mode 100644 piet-gpu/shader/gen/clip_reduce.msl delete mode 100644 piet-gpu/shader/gen/clip_reduce.spv delete mode 100644 piet-gpu/shader/gen/coarse.dxil delete mode 100644 piet-gpu/shader/gen/coarse.hlsl delete mode 100644 piet-gpu/shader/gen/coarse.msl delete mode 100644 piet-gpu/shader/gen/coarse.spv delete mode 100644 piet-gpu/shader/gen/draw_leaf.dxil delete mode 100644 piet-gpu/shader/gen/draw_leaf.hlsl delete mode 100644 piet-gpu/shader/gen/draw_leaf.msl delete mode 100644 piet-gpu/shader/gen/draw_leaf.spv delete mode 100644 piet-gpu/shader/gen/draw_reduce.dxil delete mode 100644 piet-gpu/shader/gen/draw_reduce.hlsl delete mode 100644 piet-gpu/shader/gen/draw_reduce.msl delete mode 100644 piet-gpu/shader/gen/draw_reduce.spv delete mode 100644 piet-gpu/shader/gen/draw_root.dxil delete mode 100644 piet-gpu/shader/gen/draw_root.hlsl delete mode 100644 piet-gpu/shader/gen/draw_root.msl delete mode 100644 piet-gpu/shader/gen/draw_root.spv delete mode 100644 piet-gpu/shader/gen/kernel4.dxil delete mode 100644 piet-gpu/shader/gen/kernel4.hlsl delete mode 100644 piet-gpu/shader/gen/kernel4.msl delete mode 100644 piet-gpu/shader/gen/kernel4.spv delete mode 100644 piet-gpu/shader/gen/kernel4_gray.dxil delete mode 100644 piet-gpu/shader/gen/kernel4_gray.hlsl delete mode 100644 piet-gpu/shader/gen/kernel4_gray.msl delete mode 100644 piet-gpu/shader/gen/kernel4_gray.spv delete mode 100644 piet-gpu/shader/gen/path_coarse.dxil delete mode 100644 piet-gpu/shader/gen/path_coarse.hlsl delete mode 100644 piet-gpu/shader/gen/path_coarse.msl delete mode 100644 piet-gpu/shader/gen/path_coarse.spv delete mode 100644 piet-gpu/shader/gen/pathseg.dxil delete mode 100644 piet-gpu/shader/gen/pathseg.hlsl delete mode 100644 piet-gpu/shader/gen/pathseg.msl delete mode 100644 piet-gpu/shader/gen/pathseg.spv delete mode 100644 piet-gpu/shader/gen/pathtag_reduce.dxil delete mode 100644 piet-gpu/shader/gen/pathtag_reduce.hlsl delete mode 100644 piet-gpu/shader/gen/pathtag_reduce.msl delete mode 100644 piet-gpu/shader/gen/pathtag_reduce.spv delete mode 100644 piet-gpu/shader/gen/pathtag_root.dxil delete mode 100644 piet-gpu/shader/gen/pathtag_root.hlsl delete mode 100644 piet-gpu/shader/gen/pathtag_root.msl delete mode 100644 piet-gpu/shader/gen/pathtag_root.spv delete mode 100644 piet-gpu/shader/gen/tile_alloc.dxil delete mode 100644 piet-gpu/shader/gen/tile_alloc.hlsl delete mode 100644 piet-gpu/shader/gen/tile_alloc.msl delete mode 100644 piet-gpu/shader/gen/tile_alloc.spv delete mode 100644 piet-gpu/shader/gen/transform_leaf.dxil delete mode 100644 piet-gpu/shader/gen/transform_leaf.hlsl delete mode 100644 piet-gpu/shader/gen/transform_leaf.msl delete mode 100644 piet-gpu/shader/gen/transform_leaf.spv delete mode 100644 piet-gpu/shader/gen/transform_reduce.dxil delete mode 100644 piet-gpu/shader/gen/transform_reduce.hlsl delete mode 100644 piet-gpu/shader/gen/transform_reduce.msl delete mode 100644 piet-gpu/shader/gen/transform_reduce.spv delete mode 100644 piet-gpu/shader/gen/transform_root.dxil delete mode 100644 piet-gpu/shader/gen/transform_root.hlsl delete mode 100644 piet-gpu/shader/gen/transform_root.msl delete mode 100644 piet-gpu/shader/gen/transform_root.spv delete mode 100644 tests/shader/gen/clear.dxil delete mode 100644 tests/shader/gen/clear.hlsl delete mode 100644 tests/shader/gen/clear.msl delete mode 100644 tests/shader/gen/clear.spv delete mode 100644 tests/shader/gen/linkedlist.dxil delete mode 100644 tests/shader/gen/linkedlist.hlsl delete mode 100644 tests/shader/gen/linkedlist.msl delete mode 100644 tests/shader/gen/linkedlist.spv delete mode 100644 tests/shader/gen/message_passing.dxil delete mode 100644 tests/shader/gen/message_passing.hlsl delete mode 100644 tests/shader/gen/message_passing.msl delete mode 100644 tests/shader/gen/message_passing.spv delete mode 100644 tests/shader/gen/message_passing_vkmm.spv delete mode 100644 tests/shader/gen/prefix.dxil delete mode 100644 tests/shader/gen/prefix.hlsl delete mode 100644 tests/shader/gen/prefix.msl delete mode 100644 tests/shader/gen/prefix.spv delete mode 100644 tests/shader/gen/prefix_atomic.dxil delete mode 100644 tests/shader/gen/prefix_atomic.hlsl delete mode 100644 tests/shader/gen/prefix_atomic.msl delete mode 100644 tests/shader/gen/prefix_atomic.spv delete mode 100644 tests/shader/gen/prefix_reduce.dxil delete mode 100644 tests/shader/gen/prefix_reduce.hlsl delete mode 100644 tests/shader/gen/prefix_reduce.msl delete mode 100644 tests/shader/gen/prefix_reduce.spv delete mode 100644 tests/shader/gen/prefix_root.dxil delete mode 100644 tests/shader/gen/prefix_root.hlsl delete mode 100644 tests/shader/gen/prefix_root.msl delete mode 100644 tests/shader/gen/prefix_root.spv delete mode 100644 tests/shader/gen/prefix_scan.dxil delete mode 100644 tests/shader/gen/prefix_scan.hlsl delete mode 100644 tests/shader/gen/prefix_scan.msl delete mode 100644 tests/shader/gen/prefix_scan.spv delete mode 100644 tests/shader/gen/prefix_vkmm.spv diff --git a/piet-gpu-hal/examples/shader/gen/collatz.dxil b/piet-gpu-hal/examples/shader/gen/collatz.dxil deleted file mode 100644 index a03f96a31f63a6f2055144854e947d6299f09e55..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3136 zcmeHJdrVu`89(;D_}ch#xrtqRT?4(w0YQSYi+P0tnjb*SD=~p2s9}oDYfLrFV8VD* zn{ABkq((IjSxitP$b+?A#vqEiP^A-Kmw6Nx(xnq5ZUif#X_cic>w~OSwsQ$gsOo>4 zrd8UJzVkiKcfa$U?|k~*lT)tNa7RCWf&1t0o5Mcm{`Kom26{LE03{9p26+~wB1jdG zjF4_ZD(3;v45>6PpW#D&bL5xqP1@91zW*D#uJjP2hjwQ3gKzMEz!9>=#^$k=D5$eJ zZ5_f51>IsSEmM`f2m|Rs2sjD@yS!rAmu9m#cn7sJ=id#C!NuVW{GOOY0HSnmpG@d|%@0I$Mhl0gmQ zo9mM+<3C8Qcx=zMFFTdViD>A}NR`f=`X1%JRqVUFe_o?~T%g^R^e9k&zS5nPmvE=N zicAcpUT`N(X?I}UeJ6)p+v9%Lm2Ql=VsP>EZ@qSojuGQ;n9%{44iJf(#+}W2XIh=J ztqy^5*spa&{T$H_XAs`bU`+#r064Nxa=lMj1#cssTqvth4^x!?a%7gR zmh~23SM&inhqKv-$_PFPg+@iRS1j39vHXnaJp_ll_GqW~8rijH4P9X7+dV?5x}wdS>U}&x~2y zk_yejs=@^l71<}eH2yoDfllM2Duc1X#?Vp+B8%fk_?LdUQfp&&h|sBp5pr5hB7aN? z8fSHo4o{ui zJ=@*T(CIrC{m`nSToV*t&Gf1ccXiJW&&GCNn(1yRAj@Pv^Ttv8x0A(z)r$L*^Y^Eh zhtS+8Mb_zzE6ioZ*iiaq#hVG70}lLp?y&K}ul6-uGBy;fmc{tYgYS0k5;mDT&I`}O zYW4ST*Byf~ri1sE9}e7kFL-Zd2z~x(7-;l2&!xYm7#mAaNj7R;0(mmu66s%xtQB*z zU79_1pIdFg+FCKQw-H|f;eR2C&n!eTu8y9Zi2B1|WGxf2?$#-tL9zpyY&ViU)9P+E zVI6#|P3tt7uyzyD(&{wPPRYzR;=YGiV!|7k@DEpsRUMH``lIg1BIjjMlZNP8=6L3{ z&dfPY#uX&%T4$CW$(&ggx9G8sIxM};*#u{c)8uiQ^jJ$g*0L4?!~VqLJj!=AQqCr? zv(0Nwe6UJ9SR_6lC+;&yWneIJc`$NjeGVETr)80TYt*!X_a~1gBcRFfuFqF}dwMl{ zTu5fG?|X35uWgw(k<1Yv?w4UNnvnK*XN$!txuGG}7J17w@zEmh3zB#^PAqxCKh=eQ z#&lW(oM(edcB4r{6f3a(+eiZt0ePM>?DVHuKUF>zvh5VXtOe?is8TC_`?odDL2-_M z-@Y7Zxot^dE$SQ~+Qyw7AtPw^lgG_DDC6pUIryd^%Xc^P<^8Ej`=fX2rCD*ym-B-?QvZLiJ9{V@&xe+F`6`wC{!RI=-_MOG&>i;f2Gq7&| zQ~11b_mKTZ@L2!@L3}2^4WIpa;9dlOfrW{6Y<>>%e;1qM_{e1yg@;y|r93|!$3LXe zuLE=Kw*oVLBjV>PX{xJU!Q_CEav0{7r08IMe2l6r@q~DYP4A3QB=3?ZmKc=_*hd_ zjk(nJ3qo?XuuuAGMfA-hXQUqld&k*#3;QqnKQ4YZaM3`+9)@=&LUR+@wZiLXYa*09BEXp&O9%1i+@Bs}Lp?Uw2tK7Mdb&sSl0> zT^ufyl?w{=dYu~J>8OO7QIIO8kjBgKJ{p@)zM~W5nXCyy@gp}y0lsZQ#w*cV<IZ3*tt6U{6=Y_%D>DR4?iVdV) MMcGvjHYqs%2}h{p0ssI2 diff --git a/piet-gpu-hal/examples/shader/gen/collatz.hlsl b/piet-gpu-hal/examples/shader/gen/collatz.hlsl deleted file mode 100644 index 762f06d..0000000 --- a/piet-gpu-hal/examples/shader/gen/collatz.hlsl +++ /dev/null @@ -1,62 +0,0 @@ -static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u); - -RWByteAddressBuffer _57 : register(u0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -float mod(float x, float y) -{ - return x - y * floor(x / y); -} - -float2 mod(float2 x, float2 y) -{ - return x - y * floor(x / y); -} - -float3 mod(float3 x, float3 y) -{ - return x - y * floor(x / y); -} - -float4 mod(float4 x, float4 y) -{ - return x - y * floor(x / y); -} - -uint collatz_iterations(inout uint n) -{ - uint i = 0u; - while (n != 1u) - { - if (mod(float(n), 2.0f) == 0.0f) - { - n /= 2u; - } - else - { - n = (3u * n) + 1u; - } - i++; - } - return i; -} - -void comp_main() -{ - uint index = gl_GlobalInvocationID.x; - uint param = _57.Load(index * 4 + 0); - uint _65 = collatz_iterations(param); - _57.Store(index * 4 + 0, _65); -} - -[numthreads(1, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu-hal/examples/shader/gen/collatz.msl b/piet-gpu-hal/examples/shader/gen/collatz.msl deleted file mode 100644 index 1b75efe..0000000 --- a/piet-gpu-hal/examples/shader/gen/collatz.msl +++ /dev/null @@ -1,48 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() -template -inline Tx mod(Tx x, Ty y) -{ - return x - y * floor(x / y); -} - -struct PrimeIndices -{ - uint indices[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u); - -static inline __attribute__((always_inline)) -uint collatz_iterations(thread uint& n) -{ - uint i = 0u; - while (n != 1u) - { - if (mod(float(n), 2.0) == 0.0) - { - n /= 2u; - } - else - { - n = (3u * n) + 1u; - } - i++; - } - return i; -} - -kernel void main0(device PrimeIndices& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint index = gl_GlobalInvocationID.x; - uint param = _57.indices[index]; - uint _65 = collatz_iterations(param); - _57.indices[index] = _65; -} - diff --git a/piet-gpu-hal/examples/shader/gen/collatz.spv b/piet-gpu-hal/examples/shader/gen/collatz.spv deleted file mode 100644 index 886797e6937b1918712237eaba6a9ff12daa1c67..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1616 zcmYk6*-leY6o$8yB7+LZ`=hyTvq#o=R5ubzCGu)Z(iJSYi_nF+C_3);#TWPdjR~y^ScdflF)3!4f z>1`SQ@~D1L+ivdH_iD$8*(B;cN`*`C5~Pp#3dGqA*_l0&qYSpEkZ*wP(;n}@OOQ3RE8orK zF=QW>J?ifa^SMp^S}wb5+yXpz|YkZ}L_B-{v zXy45ra*TreA#)ikj@U7{0`%8)EVzR*ppm2?#jJJzFDyO{$UsUHvR)Zt7irP diff --git a/piet-gpu/shader/gen/backdrop.dxil b/piet-gpu/shader/gen/backdrop.dxil deleted file mode 100644 index 50f5badcde2fb7b3b663425d9cbb33292ba1fd60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4692 zcmeHKe^3)w9)G*pY&OXfHW2Lc(+$5wL|woF3idVx5fy3>M7`SSCa4D%C<1c$QN3XQfsN@9I({ub?BAr*;ZSpy|)26o%Wx*xtp8& zqi^QB-|zc;f4uj0-|pva#^!W=>l^E@6`1bsGdnhS*YsUphyVcc{Q!_bodzWv%2p^P zP^O>^LdVBY@-o-31nkeXHF>M!=lY^v|D(PkuYj$AI6If{BmEt0(jQ`QBH~;`L7~u4 zFeqZ1!<92#?_6h1ubRW>;zckoUzb0M1Hc0*I1dYCY+jQs7NVff4e@ln`-YYT;CYBC zEsS?ef9R8$BN}x5*czNlSpGC(C8MN7U>Np5gb3 z&Lr9+8omOS55O9jBY`>o7CxQHCTwy90S6fX7DFr{JWI}OtHX|jw_D-mLu;E;rcS)ZSpaUY0kZ))B@c|I537neqp0y^ex_ug=}TY}ivK5@`Fp zXi$|7tMw>&n+lpkebXkj0=%m;jqfi6h`2e#=63!O zpp4f9K+yW47&qGi`Y^HEROZ+0J@-^y*8P9*tsA1GznykY*VKHfjT2mph2o6{sea)|&+QG!T4}2sArj z-X6F=DxeOMWIHoa|BUOYGMzUFJRU z>nfFn>)xj@e@r;WfFaz-BoUa-SW;#IDZw_0Hk$Y#^X4La!43;Z3-(Lj;xO_AuOH5) zRy^6aDd#e#9Z8DwNYoy8MlM!DwEI{S5jWOGCxKwR_oXE4J)5zE^(!?Nm-%B$TNyBn zA&w;S&uj_fIQ)+!O%raI-^MQQ>b$tL`)t?wrDxK*U+R*h%-`iljwt{1&56|3N53&e zv_14IDF&i=+yPCBmDzoAD_Z>0SRgMy_(R~buRgpg~$~v`SzPj>W5y)E?aB3s_m;h zrdLKRI`Zh=h((31e$4j%>8+Ckw{Cngd1>I*DJ(Av=60uDc!^1VCApjJh9$ZdA_aEx z?URFtA6(-5$zpLbYo~X^LV^39t9%wdtyVt|e8s%aGZ*SaLJ09pgzV5DD;@e2YhQi3 zAlp+Tt8Csy-U7a#QmWg6Dv?YN>+K1>+!nHfm3qT!g9jxm`}Hg9B`XJO>Z{6S`v}>d zEPHvmY;U=wy2f74*aHXEswt;xg7qz9eLtL5O&e5+lqd9tHe^&A+G`Bk!5&P#SerVm zUw%%KcCj|iDoGuj)>QMdeMPe5B6|hA3ifiRy_}a-$H=PhNMYGu3-(6{`!3pE(QMz- zY>xZewCZo;sxRECDOO_YZwr}h3mNoY$BZEZ+7OR9bijyTbLyA(>z6lsuNV4K8ucj- zNor%?QaI>6OX^igYG)tm(aQFhOZLXts|9=D6}{@tI6ld!{xXh#NvS?}t0tVjpBa2_ zv$f`aWd0Z@4%BN56%FxT?qUE)fjVkj`grB-P2awLps(c1eI$0-)w9z%^!QU|w@X=o zY#dKsec+?dzWMdN7he4Qlbi zs#+p=#P!ruW9LJciO77AYo!_%8oah)l`i9?gnXdlgYyQT-+q?%I0wIpXO8%*A##)v zk=H4;JHyH)##sX$J>_>Ve|Vpg&B@S{pQ_bvMrD#@s=m0>U;2=F+1A(papxT`2y-)? zV(zz985rxR9Tgh%D(AZkqxC zk^7E8GW{XO6#N@J_$_G zVgVxkf#Jl}l%@}%fg`KC_`*H;KH{xMfP46Us=RsG zsz&y1aFY5un_-PC&CjLO3NZ5Eky8%8=mT}hCE@3c4r0yqjuT9{26%?lLa)Znpx$H9 z|1TGkkPF52&b0p}7s4ahPq=U`kGbf96O_U!TvUTYnV^3W^~nxU)QY%xcy#lAGjpK+ zPdPA}5YFmoQc+kLjC+`9qIDJjWB%*jX`yn0=iS^wn0SUjldvmzZv)^h1~T$`@5op* z5q7^0xa25RN_QGZIqF(dX-tQrd6}xXtZM&_>tiG1H$H~+!d)8fouqS4zT9)B$xOaR zK7U#;$feR!;-cKLpp@?RC6malw9sSIksd()q4Ui9FL$*!G<99cky~Q7)hwyrUQt>6 zSXmx=LLT_c6A#m^4<8$Aba&`&e@U><%Pfd~?{;RvrKBisz&X*?-!$~vyKfzC8p@Hk zuYkGR%c?5&7#@QYs?0-u#Ph??eDgEk{5yPOWZ=ITBv^lJQ=E827XOp5L7*{lYE7p3 zDrd$UGu~>_w>5tEole(S#vwAzueiH-e)?VC#seIp(C{46q+5tnd1wT{RJ*ZsuuKRd zz1kfFKm;xa7M?kyTjfH696b&lDll|bDX%Ve;f1V;2MmGRLb7(UCWB}?Lb4)oCCr%i zm^1blQ@GKM2W(q0MqNPY!33;eHUkecl(S=$TLg(!tVs}a140{-oA?g4g#vv(YdHKY zDN)zc@Z4_E?UogF?k^TSZ@SEl6em4i(238cZJd9>thv}zlnyjk=CFVmk1n8J#ICl@ zVeiHaBlE6$u^#-FL?|Cv=Gr}qv~X5lDp`CSO)lZEUNCB~@)xuQDNC27Y_VfuoyQIQpT5uYuYLn_J`Nj~e4Gv(f!+0h`U!fbR3DqMSW(l0xZO zY>1O~`Zum%+huOLjIzWxmoas_h*+lXW!F5e)1nI-)VjosEEa#qr%s*kR6N?VM1YHZ zpi|EwtS$IBPo4@^M07AA*DU3rM}Z%3+69y+lMOv|cCseZ(6f}2e+U-QS?GLJA;gcl zq=oFC4YW>r7eq~=r+I_?01*Q&l}l)B*6{_1a$=`>LH@MYQ5!hpfVEq=gWlR5Iu^eV z6D%TgCHxKj)dc3O-Jc#C(=wrl~e@;0`+9##X&7O`3!vdn-GzO5;{WxA_p z{EEyEx?Rn`yPzO`uN5Y2e_Y_)zToQKAbCQPO?_(Qfw#9)I;48Ep?!TK~YxWg_6f9(y9M8)IS G_wApV> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _146 = { raw2 }; - s.tiles = _146; - return s; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _59.Store(offset * 4 + 12, val); -} - -void comp_main() -{ - uint param = 7u; - bool _154 = check_deps(param); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.Load(4)) - { - PathRef _195 = { _181.Load(20) + (element_ix * 12u) }; - PathRef path_ref = _195; - Alloc _200; - _200.offset = _181.Load(20); - Alloc param_1; - param_1.offset = _200.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - GroupMemoryBarrierWithGroupSync(); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 256u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12); - } - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; - comp_main(); -} diff --git a/piet-gpu/shader/gen/backdrop.msl b/piet-gpu/shader/gen/backdrop.msl deleted file mode 100644 index 3726dff..0000000 --- a/piet-gpu/shader/gen/backdrop.msl +++ /dev/null @@ -1,263 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_59) -{ - uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed); - return (_65 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_59.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_59); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_59); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_59); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_59.memory[offset] = val; -} - -kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_row_width[256]; - threadgroup Alloc sh_row_alloc[256]; - threadgroup uint sh_row_count[256]; - uint param = 7u; - bool _154 = check_deps(param, v_59); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.conf.n_elements) - { - PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) }; - Alloc param_1; - param_1.offset = _181.conf.tile_alloc.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2, v_59); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 256u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7, v_59); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9, v_59); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12, v_59); - } - } - } -} - diff --git a/piet-gpu/shader/gen/backdrop.spv b/piet-gpu/shader/gen/backdrop.spv deleted file mode 100644 index b8a74ea44c7209d7c79e5e3b2e3f39466abb90a3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11832 zcmbW5d4OD1mBwGv)lEp)LfDs(kO)agBEf*f1gHSofQ?QkT*{p1CHYdAxR+c~Wv)M3NHZL2<_2AGYL#rn{qpR1Rda?lrWPN#@ zF$XfPA6-JmtM#Ui`;hyQJ;*`i&^I43nPZeb{x=)_5JrjF2Zt(y7hNR>B565VM{jf_>uuxzr^o*L<_%8Ok!wI;8kJICR~ z<+bJGO4$+U>H6no$6;@8*E_Xm%#)YFoTcoTo?OSy$kmrE#NJkI$FCEo*WpEPDU_NDbkEn>8N(wG(4=8<f`+fmv zqS~&GqqT>6?8|!WD`wbz*=Fo*hvYjP4RE`C547)G88N0ierR<&=zEZXuo1L|Ea?Q!_o~@0_P-~>xIKR0K z)v33d=Ql@d*W&Nb-cMYuQ5&x{JNY2TcvmrIu+bWxG3eYpXMgqpy4~7d8EH*5xia#3 z_H@Ve&f^gLrR)#T`KyffTx$Eo4jc1_*ypsGTk165GotO|8Hqb4c+XC=QdCYeg7@q+ zD-)dV$(noUnKF;(W_x|q)-)@{%Md>A$gx^|>liHvXXNuPH@tfpPR}y{KTLGI=Mm43 zM>f$JX@n!@m?PhCGYL+LE)cKPq**$@mbHz zTD!i33tzptC0ToazK#X>*b7(BJYDXPoU@Crr^_9hb5=LK*N4G%`xd85Uw1A3cKAEm zJqxM7ThkHvoq?aFy|+*F=*Ysy$@&f7QTSZv(~Ud2@QqeG)fniz_eb#E^M3Z!7O-oQ z^BLhiP{y(+H)H3oeX%(6jlxaVt|{)l z{;XZZaIF+vU$(=TsquXN8^GzkebWrPFWZIPy?0C5?K6C(?2aD$t{(gCJ@(y+JxHym z(<8bI%ChsQ#@|ApC0(_bV_43mao&l>c?ZQ>cE{lK%S(+ixjte+%?`?wv30dgt?zi@Uv->#g6o-umM{kNP8@YnJmq3eNjm&buQx z`zYtT9-Q~KoNEZqe#tFLIPYq?r3vRfEw?P;T)*7PgtOOjen`X|-p6ul6VAI>&OMJf z?_jwN3FkWH$_eLvD|c?fdC$t7mvG*%azhDckK{HbocF8Tr3vT#DtCFpd9TV{nQ-=3 zZgawUhsup6oOh?(Si*UB{LW0zuy?3u*z@HX-3@S$ ztY-ly%rmx_(T*1%N{r`Z5#qB5+cTobA3~JpL*6qXuf32Ao(E$V^IzxCj^jXvM(5^-Q`oi|!k&B$3 zb>}etjKXIg{pzz2pRmhd|LRxM z<8QA`AkpFpCihISF-$r`#Je_R~zKhOn{eO(M*K*dY{b0exetZJEABlc_2JC(5 z_|KyqZ@uCF3fQI`ga2=U)*)y7-=aO+^3JFI)q>OitpYnH@_ic|=R^K6#4}|6U!vzC zk)LCb=a)CXwzW8?`TGj&n3&)1AN?`keC#=h_8Ry#VDp6keC(arD_PkG(B40|9rz}Q3Q^gP~!sKegfW52)0eju?=1b;lScYz;HZ1025CARTjOlx;O*2S=RyA93FR>)TbV>6>83%6qoHh5iS`-vZ~h zCT-&)_dkLo_qV~3`=7x2BJQ7)xH356{spWrYWP>MW96OCvo06$kAUU&P@nJQzaftx zWyEv#9kh?EV||f3eCnw4f5A~_y2syx*B5c$PvXoM>-j&hzUc4&gB>d$ zV}AgSInq7+A-ukb`%w~C2FDyf2J4GCegbx^e7u`K1v|IC;2sCdKZe8{KLcCK2E=n6 zbNn2v&z?Pj))qgBHjnrz^dPd1eVL2?1#%!_ym9)CdlaoNa?hI8eXq0~Hyhg+-+}zw z1l#Ww;~ZZ?8{>KS7%uly9r*_E?N01{uphvVcgpV=zi*5)pSCgPi#u4Y&)@VoA9KOZ zrGAWB_e1ZG_&D}}!mo~64g{Mc?&X8Pa`xPFsXY%dm$}7{zUcMg20O3oGTu3@>sYjpbs2LEB4=IVsB0lOt?M}Kw5~v?Sw$IUvXM^Q7P+Png za*m&icD!f!>3ZSs*eUp|$A34(9M1vEo6m1$?eLunc76KRg0-FNG_-T6eFslRuS0zF zKdf&e+M^d?8x!x;MPRuyYl?Sj6SjM*FY;UrmXAB)#b9&G``%xQ_OZ|UE{xY-eJR*}#M&eU9;0fbDOr^JQSa!Q><6d21a* zYr6-|>m1G%zIeC7r;a=mV8_S&*MMECyyHjFa@MjBHBEx;P5M34!IqDAW(q8K74dOy zwqeV=u9$l}w)x{5>RPazzts_Q9k%-)G1r5QlaJi50-Il-HMh}n=5Swc1Y5Jd8_?R3 ze<#>@eK(=C{dbJN)2{|EMLR~HF+0%m;$7%6e*1ni*uJa1w{Ath2JzAV+QP4nxo-oT zBkb3KU2EK9uLm0=?>W-G1u?gCiDT>=z^)^D@m<}H-V!kw}Z7K*PFr7 zlRLoL(UZ4;?TKUb#hAB(?M=*oCs@w-=$l;3|2A;Ue-~Iga=il_^S>Re9lm?O&STEI z(b|3^&!(n#f{#O+&vE+gz58Il)RE_1V0$0+z8fqT_nJPrbZ_mucHA#&?R9ALt0Vt= zz-y7n|6Z`%SE$e0^~pK@G_>PSNBg&y(-Hs1=RWz(?=ug1F5`R$_e=2p1>P5Z0Aikl z(Ei=*Q1r=&W1_C-fE^$2^IEVmzR!7GXk(0b{!vjJJV!o9BQgG%0z3ZL1TO@eYY{s7egW9H`1|EhVMkwI zh;2T5Zmf2!|3a|&^*K)4_s^cc2ytzW(HDQuy9liBamHMX_VLb;--O6nqd5MaCl_;G z3UqIbs^*q$swtmRqg=+|Z7v|m^B z*qaMG`gb|DIjzI7+EM$h9%`G*{uv+hT?ckPWAqv09>~XhuL7s@-GD8(qlj_7n~<2# zJbpSXUtB-823;<=DP*#n&Uox4Os40Bw}6*HYWDybzr&MkcfFb*tJBS zH-P2RJZ~&wBG2vEa^~5@NnVD26Jia{VGd(FKX;(tjCiIzKdwdF_^9D6VAtp0&zGa$ fipV+sPV`-f<2N9V*EYuSE6{I4^!t8_eLsH4F+n>TM}9y4!Xpa%^4X80i@=>rusuzmAG1jQNz(aN?Neyt=R0!yHk zZ-!xJFsvb=MXAm}=(1&nWhP|p49vP{Xh?3E%X*-Qt&VN1+Yc>I?R}$kJncWbXZP&c zKlXdhz4!b3e(#U>KG(F_U|gF&@k-%upV|Ap_4+CQ-S2#h001_J0w957CTzK|ZGo)< zw#%@MLc=GpWc&e=_*NY(1cS0r4+F7#xGarp-CILJ|tvd{A#N`Yvfn0A7ST zxt$5l83t_=px)OE{$n0$LaVZil*5UAB}dX~y>HdL6L)$kFXlhvx2S zl{?e7Ax2a7 zcviSr)eSs%u0Nn#+j%|wQh&>sQ>%+Be{PT6Z3galR6Gj8)HOa=oyC>5)z!FF1o@Dc z&5Ft~#ZK4*pnXnv<9jjb~9aS(-g@F zYURc2KA^BLjNi|IGq{yWBQU+SqRI|dL_0;=1mcfbA1=Zdl-WUMbg1DTx79*ejAOaf z9gh!hT7MSPj;AFDM5=(#l!sLiy&;aJh!5+g(?B#n^hz4`zSG*rhE`h3tHQ7)T?`n* z5O*3GlU~8N_x>$S-GQ6uce5)8`cEtwe0AX1k|UXeuM9|0=C9Iy`{Zwb*OSq;?YpAb zuIFH6*j{$<^%k=4)OP@`jJUw#=%a4b#gPF%)VbWhuA7lq%ih+elp^JRy z;OO4F7ArryT_|Sl>@Q)W$XzegJ_k>$D>MKK_-I}6DMXN@Pt6D{?MjKk|swKM!Nn^IF zx>~ZcT2$ZQs%Bh~qbkK^k79~dRv8rbu$YLBbhBB@W5)L#|Dv=8yF2ME^=+Evr(YV5Qn z|7BM3mr2D}KE-8LR5a49neNt%2Kx!C=CoE5u*ICV;_rEkD@TkgJA?i4;T7%16>d>R z`|uJt>2-U?c~M6HFd5KF9;ep38s?U$Rre{1(}#n-z??5x-Spy$k^PuP53c_FfK zGJVbNkH7ftq3bWb@hLY+M&;siuk8BFCof-r;34Frv$9qDg!{k zz5@_UZ^b?{)X!mG8?^mX*hk;MzQB*zSM(3qr;5D=`%?b1*p~v=nW}RKu`etZVqaM7 zP3&uh*cZs99DWT#$9VzkrpQvpm$Wo1SXbl`QEY0G3|mUfyoZo zfdGGCZ0Qs0k%b}9H0&Hc)CMeeGZ}YrXnZ1p zn73RHyi$~^r2DsB;HdXHDiix;oy!#ERdo+vx;QaDdFc}fFWkwop=o;k!PoppI&9>d zliqc=La}`B(79$^amS02_vFN*12T0`6d=KwZSC)LFLk zoDFX`o$4@lxBu{iUf*8DArkd3xNCTU;hM62H;2g777ppqFT~XhmIB|o+@yI8gK?rh z|BdmmfZegU6xc1yYqr(B`lwdFTaWS#on2L}daxfaVT&w)A@Gx$+-+=;Szt#rxv{t$ zM$C@*O}s72ZKIFs#RnAW*mi)wO4`C(X&MlATbpE~hD^tH2ug#hm4+{LyMCjjj6dOz zY)!y?)y7bdKY5tLVsQdyGf(NGk}Y2RIVob!jmnlH1!Lvn3Rqmwc+DT3jc8yPr>dIM zHQO}>{-7>q+nlb=5M!~$2!1h@-sbbjQ-8(p{e6sAY9b2R^Jr4_Fm7TJU*z`pBdTR0b{$6+fWTzfkPQDhFHWV?7}N^Nq;#+sIy3aY5gB)a)+t;|H+|~dz_`5j4V&c^2)2lxnbM##7{O!3?4QiE zUi^H2#4IW-)|i2*LGf|F^rU65o3+aV*4rrwb=Y#%)kq|QlhvZ|R)3w*iiPu{c23c6 z?~c2o^#Xc06or;8mK@B|e$6gxZMp+pP<>=z?jHT;887`WV*Spi3Zxnk}OkJ(|khZI%ex`ul`zqoJHA zoS!}3lK&88ib=LbT*k;fQ!Ov>o+y;AYIxKDH%yn5s84ta-OsB;7|^qQBRvVT+WwYv z4$v9zfd^+S>aoct+uU2dXhU3K%Y!4*K5oHF=reQv7kU>=$Z<1Fv;V{SX30-`a zop_8%9?~V>HT3u~9$w%{8@#}A;#aYP*X(Pd4CE#3+hYa(WLq> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _146 = { raw2 }; - s.tiles = _146; - return s; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _59.Store(offset * 4 + 12, val); -} - -void comp_main() -{ - uint param = 7u; - bool _154 = check_deps(param); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.Load(4)) - { - PathRef _195 = { _181.Load(20) + (element_ix * 12u) }; - PathRef path_ref = _195; - Alloc _200; - _200.offset = _181.Load(20); - Alloc param_1; - param_1.offset = _200.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - GroupMemoryBarrierWithGroupSync(); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 1024u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12); - } - } - } -} - -[numthreads(256, 4, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; - comp_main(); -} diff --git a/piet-gpu/shader/gen/backdrop_lg.msl b/piet-gpu/shader/gen/backdrop_lg.msl deleted file mode 100644 index 68f0905..0000000 --- a/piet-gpu/shader/gen/backdrop_lg.msl +++ /dev/null @@ -1,263 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_59) -{ - uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed); - return (_65 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_59.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_59); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_59); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_59); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_59.memory[offset] = val; -} - -kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_row_width[256]; - threadgroup Alloc sh_row_alloc[256]; - threadgroup uint sh_row_count[256]; - uint param = 7u; - bool _154 = check_deps(param, v_59); - if (!_154) - { - return; - } - uint th_ix = gl_LocalInvocationIndex; - uint element_ix = gl_GlobalInvocationID.x; - uint row_count = 0u; - if (gl_LocalInvocationID.y == 0u) - { - if (element_ix < _181.conf.n_elements) - { - PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) }; - Alloc param_1; - param_1.offset = _181.conf.tile_alloc.offset; - PathRef param_2 = path_ref; - Path path = Path_read(param_1, param_2, v_59); - sh_row_width[th_ix] = path.bbox.z - path.bbox.x; - row_count = path.bbox.w - path.bbox.y; - bool _225 = row_count == 1u; - bool _231; - if (_225) - { - _231 = path.bbox.y > 0u; - } - else - { - _231 = _225; - } - if (_231) - { - row_count = 0u; - } - uint param_3 = path.tiles.offset; - uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_5 = true; - Alloc path_alloc = new_alloc(param_3, param_4, param_5); - sh_row_alloc[th_ix] = path_alloc; - } - sh_row_count[th_ix] = row_count; - } - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - bool _276 = gl_LocalInvocationID.y == 0u; - bool _283; - if (_276) - { - _283 = th_ix >= (1u << i); - } - else - { - _283 = _276; - } - if (_283) - { - row_count += sh_row_count[th_ix - (1u << i)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.y == 0u) - { - sh_row_count[th_ix] = row_count; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint total_rows = sh_row_count[255]; - uint _360; - for (uint row = th_ix; row < total_rows; row += 1024u) - { - uint el_ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = el_ix + (128u >> i_1); - if (row >= sh_row_count[probe - 1u]) - { - el_ix = probe; - } - } - uint width = sh_row_width[el_ix]; - if (width > 0u) - { - Alloc tiles_alloc = sh_row_alloc[el_ix]; - if (el_ix > 0u) - { - _360 = sh_row_count[el_ix - 1u]; - } - else - { - _360 = 0u; - } - uint seq_ix = row - _360; - uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width); - Alloc param_6 = tiles_alloc; - uint param_7 = tile_el_ix; - uint sum = read_mem(param_6, param_7, v_59); - for (uint x = 1u; x < width; x++) - { - tile_el_ix += 2u; - Alloc param_8 = tiles_alloc; - uint param_9 = tile_el_ix; - sum += read_mem(param_8, param_9, v_59); - Alloc param_10 = tiles_alloc; - uint param_11 = tile_el_ix; - uint param_12 = sum; - write_mem(param_10, param_11, param_12, v_59); - } - } - } -} - diff --git a/piet-gpu/shader/gen/backdrop_lg.spv b/piet-gpu/shader/gen/backdrop_lg.spv deleted file mode 100644 index 2819ec57ec091a6da676686f566120489b033a4e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11864 zcmbW5dw|?kb;p0n&MslYD}?tWBqTx-l1MNhF##rkUBE?xpn#&2+1*KYV0LDiolO7( z!~hB?qJj!4hNz{IT5BKlp|-UYwXJ>Gr?oHouGngAE1=-x^ZCv1WbcmfPk(wi-}61^ zo_p@O_jl))?VG#wfGnGn4QBJQkIl*QXF)a>Cd=k$1Gyd=zG8UIRA+3>x-(BV;E=2@ zk2B^_#`U91$V9c?)Ug+N6xoLyMh<-o5R-`=*T?_nqMyjQ!E9)_GIZHxL&LAGj80Bg zraCt@YL!O4S*wiI+qKb7eS6KZ^OCrFbF?u%mIm|_k=CI-$Q+v*jn*i7fJ(J}T3feH z)jAo>0jwd*nzbF3s&UJw*KXkdBWpL{D`f}GiY;XaV^7s@rW(hV=mqPpO;lRfYY$`# zu}8;iqt{o)YLinYlkd+ChlgQh)ol*8l4H@G*7WFjg$&E5I_>Gv&g#6_)zfS9D!OwV zOes&V}j&{9Md&xX`8O&MAPUy*X;;dYK*&^)i)ka?5GH|=f+H)z0|qYFqO?(Dq4VR$yOQZ`2}2+b4}#iESQv_fp$uWTbVY zIylc)hg?(Wes*XL+Hn;Z^OJop8edeHx88Am*(%0W^BjHIYIK|W<$Vddd?{NGzO**c zYTxAi1Gx>=Jrl(l3*NdsE46mJ)dml6_8jkfFw&?s$11Vf?ppNurZFbZ@7x@x{ITzs zfF`T$>I7PQxW~S#$G&Ek-Ir~_-j0}e8@f&x&C>w4t2^>{UAu*y&T~VLZJ&u>i^|8F zGmUq1Qu}6X;?F?k@wfK)cg?c7J^&hn^|(cy4yo$81frQoIb|^Nt*^)whk)f^bGY?{dStm*LDj1Mnk6$9o>} z{CH%OozX@(Vvc$8-KxU1zvct+Rh!LLI3m{|e5YM)P8E*GHNVJJ@kQV7o0qTC`+1DR zG|_6d>SK{{rf%=+(MEkTJdx9TJLgC0qcgs4PJa(_AD1;3c`SKRNpI$vzu@r7@!+Ns7s-@RXg@1FPbr?-M# zlbp{e?}6I8=y3s$V?KwrKJMm-0I^J;UCio?*|IXLK*X zJ+hvKoG{PW5=J{-d?YcRlf{V7VrkDA- zOUHi+?Re`A{~v*E$_e=Y7-$`G#{UW0vn}s@+Fva={a-7vVIO6-TRO%VP^u+6uM zm45=g7u)*e%jmP{rad#p$F)5NV~?%L^ZF2?4tr0J{qY|AvBW+V{JF&54Sq7Qy)V9) z*v5Y)vGqTl*w*n}kNtdN9|(RSv5oiJQ62UB{g~Q*JEpeZkE!kVV`}>inc99shHbrD z5?kKy$MCDidu+caQ@`JmVVlqI$*|RaPo}otlVKb0H)YsrzbV7k?>A-GYQHH{+iyzk zb2$y}y?4@jPK4+DL&!oz{*#;!xep`q&l3MI+UNWDla6!&w!nK6D#mH=U0%&5r5w{A`$niV11t{`uA%&&N6xD`z+eW`Skrd zBIkVKP3(mEe*=m9p9ecn%<-FrKkVPawzfN%)BgT8BKHMkSHV32cAUP=%zX;_cMy4d z={u|K`o4tzGLp{kT;8+4Tlg+7^8Q}Iq4It39Q{7xJ8sS9);7)@&ie<5^Xc>5*Iv)O zE75<5tU(;F&lv9y`B?9tg7+ZS>U@8O7~_09k%;?qu)c`<3vk4_{}K0>V12ubHT@OX zvGSg+uc7}M@wdXctx4Os$o)6q$o+M2{xl{^Q_B7 z{8M1LebnbW`H#p`NEz{*{WIFfbEfZ~5IN78IO_QqaMWD}NB(~W>-$uZ|KGrJ_U(J* zh+OLLvJ2Mm?}&4Gj*WL7YxxG+$6Ac}4@Ayd#Ie4}9X@r``Au-tneOp_!t0B;|4QP_ z7wh>JSYPz_+hE7a$JqY{#~kS%eg|G(#QjeaR|dx%{|nX^bNnCJvGVb5ei!WA`ht5F zEdLA=b9@hMEgKQfb|E++sP!Q9!HADz4=McWsO3nDdm-oeLA2vN!!Ongf5*O(Q((Ec7qmAa=CTHHjJ*hKEn!~*HrEryyZRdN z#fW_D-!Ry5`mBElEf+a11*biFEw(Z7PF)6;E3>9}r#54|r}`q#|+n~y#5_>Dyd{Ngnuw2x29oWaZ^wp6TVqN0Mdp+1ZaW)!YxiVv; zXA{_-4Sg}L36{6kakRF3;JnV^T;Yp%D}3t6GYNKl%zp#ewaPnw3@v9Z2T;=#*xsby zGaYRCcxR@;a@P?b=Vm*$yz7d&cVL@8zM*ae%lTU!F*jkm{}FRD*f{yf{Z_E~^;vTp zEoTn*^;WPo>$?T59r<^Gjn{V@THAle_&fbJ@G`Vx^ck}gEic}UF5|cFw}b7w+I#Cx z^xF|1{qHFJ>X`d3usOoM8|+%+9=iu@jJ)Sa`wqn1&Lxhq?*zM!=*hdla`w_%^~o7; zPu>lVp4yvZ*S!lvNp8@1H$?E#lf7qc8rRcNtjUvy8bM?c<#x zzZsFUMsfT-PcG)X66~B|zaH$m{I{%o_y)v1yaF-S+T@H&&#Zg94DnflMDI>6usvCd zSj)@M(XXq(X}_-Nv9}a<^zUkHb6SUEwWIbof}KO3d9+#vc3T#5Vrc#5R6cV($ju zUf6LTjbpox^!uWYEg#<(*MsG*L+s%;v|RLX0xTDIZxbvRcg!Go60vuVw+Gr{*Kd6M z-nGHY5M%TiV-MwHz79B@ZyH;!j2Pp5JCK;q9%`G*{uv+h-2`?%WAqv09>~XhZw06G z-GVK*vxsrN+mM*gJXUs0d823;<=DP#zn&UoxJ6P^cBx2qHHYWDy zZm`^4NW|O&b}fI0_>G9;wT*H7O7!~?{l1@K-_IWdQZ^LL diff --git a/piet-gpu/shader/gen/bbox_clear.dxil b/piet-gpu/shader/gen/bbox_clear.dxil deleted file mode 100644 index 82cfb036bbf224f135b70444348c78c5fd18307a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3160 zcmeHJeNY?66<_HN-2vhdNXC){_5neIF?A0C!W5CwmoXSQ5y7sVmK2GvidK>ZqL`0P zBY}_p`V%UB$7;R%MFoe_l#wxiG2Sb2MyiW9MlQI43m2j`c^h@Wzm zdU&L=#Ptgx5Gz(n36D1|ppjSD+{@h`A4hkVuqxmt=(7>JZ_(SuWX!QR$2x5yB;$Qn z%?52UJfIBX5QyV;X%(azho=p|b0h$nP>a(_qsh7+v@dPc4!e)+BOYF&`nWsRKC0$a zrGpH>Tg9jR>{@o31;vLt9q;^n-08W;(K~u%(?jX_ahpSs?!DM};a+qneyKXTF+;Rt z(sBsyRZ;yWm;*^gOB z%;+JSB1T)pXwqUdV3GC7L$FpE)}h5(7Fnwttn~p>OTY{dkgIUYk3o#_76J2YW(a$f zr^1I#FA4yM6y=2Lh90ondE{5c#J`&-`wB>{v|GcaM#IQhSU_|wiMJ2(1PN-7J6g1F z4pnZ;pqLtVeFYxn(qw-1*TWW@|;{bP92)}B*tiLEXF69?tmM+Qmu97{(^a8#}{n7XKB zp#w=3;gx~qa#2GUMG6xYPrGzlT&rAEbFy5sTMt}DQ=25&0VE+XrK&&;_-LFxmobIP zfdmLyGjB-H4;;END$%HGG;z_t43X?hs7@nc-=qwrtM>~DZ-AxQk&rV9be|}`%-jx?)2`XoI?8C1m;OSis7N!}7q?vz(J*?*`f zzU6*R{rV@puepu;t^KL97Vj=!=c|3fT|Qr;dH}}WxqEBAD->F~Gjj)~SUrOtNfAq5 z6c>DFQaW}%ui)b3&gl<&V4}5OzT5k{+GN`Ap+6S-9UOo<7q=z{mXjNJjAEB^kA2py zuww0Pm}Rhuy$a$#;}Czd61i-}_L=FFcSn*NsHiZkTeyG}2bINcqFLo8GACMy1;bz)sZr(;{#l84f>Ca?aq7nuOY9nYA%JZv!_r(U|5Kk3&;t3V>_^f%fN1+1T1m@*&P7+Cn z{If-cQm*b7Lcd{f*w#FlD%?utm8HB*`|^kF8)tIWkALdcpB3Ny>{*(NgPYvk{0KLX zKg7+A|I6I0{d&2Xg6H^+;^xg-fa*Wy<`@_Zxta6txmnhTS`9YMNrt}={!D33cpT#Y zTV597v*+Y|c1*2B$O@1myicipl$6glba}FcNq>GCuhEh?Q-c1HHRA|zF z2v5qD^G`(2_znDV-E|q~+JG^0EM{;b%V20V?ryBBH?>rD`C|m9o_}U2RCUAmVZT+c zaNMrhI@fSmRFFY^>RFy#80qm2T+08gci`NCXpgwAZFh6M<>*hEYwInQ%vT7(sq!J= z$*S$|eScVZGt@6b{?CcuFizBw0MO*fMU~m~9W)wBUIW;l<6nu%@oVSr-}kp5F?s&+ ztPv_^R1@rq_d(ym)39Ywd$_A2&)SZ3p5O diff --git a/piet-gpu/shader/gen/bbox_clear.hlsl b/piet-gpu/shader/gen/bbox_clear.hlsl deleted file mode 100644 index 5d29894..0000000 --- a/piet-gpu/shader/gen/bbox_clear.hlsl +++ /dev/null @@ -1,67 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -ByteAddressBuffer _21 : register(t1, space0); -RWByteAddressBuffer _45 : register(u0, space0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _21.Load(80)) - { - uint out_ix = (_21.Load(44) >> uint(2)) + (6u * ix); - _45.Store(out_ix * 4 + 12, 65535u); - _45.Store((out_ix + 1u) * 4 + 12, 65535u); - _45.Store((out_ix + 2u) * 4 + 12, 0u); - _45.Store((out_ix + 3u) * 4 + 12, 0u); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/bbox_clear.msl b/piet-gpu/shader/gen/bbox_clear.msl deleted file mode 100644 index 289fc9a..0000000 --- a/piet-gpu/shader/gen/bbox_clear.msl +++ /dev/null @@ -1,70 +0,0 @@ -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -kernel void main0(device Memory& _45 [[buffer(0)]], const device ConfigBuf& _21 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _21.conf.n_path) - { - uint out_ix = (_21.conf.path_bbox_alloc.offset >> uint(2)) + (6u * ix); - _45.memory[out_ix] = 65535u; - _45.memory[out_ix + 1u] = 65535u; - _45.memory[out_ix + 2u] = 0u; - _45.memory[out_ix + 3u] = 0u; - } -} - diff --git a/piet-gpu/shader/gen/bbox_clear.spv b/piet-gpu/shader/gen/bbox_clear.spv deleted file mode 100644 index f1ef3719d108fff7d35416261738ff4ef3f9c63f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3328 zcmbW2+m0JW5Qb;h_Qqr(2a*s@S!S~k0$G;RCImu477{s;90dt3xIh}ll^Q$|XlvJA zKkD7T-|OFwlIb*>SC8{F%CjPkhS@Als_a3EU)S@?iX=Z6`VJj=`1^OXM^}4fj{@b^ zb@}Y)Q7Y7Qx>CaAD^`V zoiRx#(LDP_SuQwdEuu6}Cuvd57o2k!(KN2c^HgCJ<%o;JZ9dF~)i}zEsLJxxt*LFC zYn*2L<7#2F{keDCqqt7q_?q}Yp&ERcuO2#>R!MFK+n>8?vNF@*niph?i=s4x?QvAR zn#IMuF<30#o>Y<0}z zW}8=W^0QeNcYDI@&*W3zE$>V|&*oa&cwcN;G4jsdapyQCn;weO>z-a(1cjoPnk!ME2ry(&99RPlafR=cCu8k;KXKepFp<1Eix_eNtI z##L+!JIbFk*7g4N!ANt?1Y;!X1HSYbXJpVvp}ysT3f1*pJGdnMUOFjfk6FLwbRpL> zi~MU&&(KThY*x-R=Ul9(X9js%3@tfl>o^;Iqctu*xqKj<(NiweA@Zi=jiZJqdf; zWAr2JqQ^KtY};ex3gd^|;?Z}oU60Xou&W-U*I-|IjB~oIB!_PxjGBiIifqkmvOdW`#p{p2xv1vc~;JpvnhjNX9djpn0Ut(B zH3)0DdE52Ic?=rVBq{L_GrR@$&aimKmg^C}<$8p%FKc;YaxunU^oE}c zI=L{gf2Hrbgu6MV=)~Vt3}P}rBOh`NC5%uy^@_mHN~bQFvj#ggxgvpIYjpO2U6PJ3 zxiavl4za;E6oopf=Nqn9nJ)`I|3{YNJWnSF-jmuTKWdY^fN>sl-qh}gnhYhJf%BnL zlMK_T9o7+#yJjtWpVo}NH@{8IxhvD@b$rqBzaZgzPB~f~ZHVyZa1(6owXJWxzRn$; z8nBq;x+CH3Y!7}To<3dZpGvp05&tuZtw+Bm@z>vwj>ERHp$4ci7;%s*s(IE^{|wgt zDZ=k1_e*W@`^VSxu+jJAMZNGl#XV6+*PFZHcM5((5m}FJHh!;Qd&-IXr|)?~-ify{ zKYAbA@7jlVIVWK){13%_OJBy{+7sJ`1V;YM_mnm9&r2*mZ^}Nl$5vy1qCL33cO)?0 z+I-&CoP2+4d@gE!K?47^Vee_qT5Oy-)E8zkKGYm$XZ~1o-k$fy#u^y!ZEHT!Je1fT zyPCszH;Z#wbBlwWI50oX73rbG;#}1nM(tRf&o#F=*og!4-}DRVp~T|w9|WW3EY5Y! ZEe>|#z=-pg-o@UB??1tc{@Yf9e*usUD7^px diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil deleted file mode 100644 index 5c89a15f37e2f6add9cdd72a00bec9f8e8e4fc8b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5980 zcmeHKdsGxxn!nXuU0n@DQ_ZuHht&`nMT|v@h=Qj20ir_EDp8E&SdEZi0?5M{&`eVO zplK5bjiPDAaT>wMWQ{>X5-=v)4IR@WX3#i>OpMbc#ALFrA;ct@ILY2>8rI#jXaCu= z=ggcl_ncexec%1Q-+g`e{;ISEYK^0-Fvaj{d__<3sU4nQe|H=K02D?7AcpTuSaW$D z)-qUc@-hxUEv)OaSJCh_Q&z2CnKG^O?}z@Iu3Nu>u7!4bCgQ&ezrrHA*8~T`kA<%S zSmm&yu<~Wv9yq1?#_AV+uW9?VKW|*0ugWillVX7g{22mh3s&XwKqPF_LAzSxysjVs zxCCv1M#|S`C~OlO5?WLqbQMM_m+nQ(#2gVG6^Fi1#L2S+>b-$#H>nIu$K`7RlttDY zTNtIuwjKjfO7&Qd)Fw-C>r`tEjiaV7o<~!%v9)j#{Me!!z5H~=jG!yXGR})aC%B9i zP4XgQOOv~KA(%t+0ayiNgfJ%5$f~Ja+#*2`u!jO5721^XRtZ&HkA^06n&IPfbGt1p zB)73K(Cp47?XDO^exC!-R+5n52oX;V;EW_dIS2z4i$J{&WVIh70vx*a{C4dc)B%uH zgW`EV5-&ZV4_4e!3=@F{!7CKd!4(0;7iH*sU7Q5agB7#xf381HJ+V>!$e!~nE26_; zGczyEApoB}zYg#k$Q=tpV-oXZb8>&NPm%B4th-|EgzQdf=>{ z7HX%3Dpt%p3}b)2$89Gg-o=V{3}ZJ)@y;nvRtgcOJa51zhZy>ld!-P^kKnT`3)b2c z30Ycj1F*Mjh&`hi#7)Q3I{u51{($iiZYaQM}hdq zL8+bJ=sK8hweUSlm}|=xglZSdw+7Z^2v5!5*S2PNjqqSqAEIgx%xK%)l2nuwa46}g zLw$z2lr=ShTi1L{qM@kwAO(goJ(Y%|Dt%dn5on_=LPa88L}K^7Tf`P6;#`kH(ONIZ zEFcZNh#CktH?Z;IMHCU5f&#x~&=h)Y7!obmCLGNLv?|>a)dwq1lvS*zAC4cW zX}@5aA6Q+pyCbjzZc$rX2>TSo7`*n?*o5Qy6CwQtkB;lZx7tN+g<2%9nn6zv2Gz=zsmUie+TF345K^)A)TY7FY3bc*5~dM4af^ zn%hq|o_zbGzr6Kv=E4ts@x-6iiwocVZqhkMTe}rOKTUedH>(%%7KF^gWe1T|pL`j} zZbZISjt^Jf`QUSLj!KV{9gGd}uLazVlvr zTfc7r`9kf8C3i1u4I&AS=Ow=6sMhC%O^IN6{cm2~H8gR2O~p6r zL2|p7i`~#Z&BZ5S+dl#qr@?vtuUzc-hvQ<}3W)Xm_Whl5av6K8%&TbTPoKYN;?R59 z6(@b4iF_kb7jZAUrZO`hlJr>Jy)+(*AQT?}i_<2)m&Th#sMBYM56uw*pJ8rGW(+w4 zN7Q9f0Q38K1PD*!kdMRuXFlczWuQMBbS0CJd~*&X4m}^r^Y4T3{eR`(M4ahS$%%li zHPM)xg8XZf*(q?@V%9|z$>o9(GBp%cIumjG8+yf&dJ4mg6ssZ2sTc`J#NTI*E=ayu zZq#8Pp@=;Z2XE+|7QIkau<^}yM-qEEy|eGqw{=g}tTFBGm|xQ&jM4<|+nT!nD)#;c zOnF<`b6TLD6BgE^7*E7Ybmj6h!9*HSDCb-j72yWNE8V9)ext9mWq;qLJc%)>xc2^? zCDk?CwpXkd9F;^JdErrc+oK1swK}^rmM@hJ5!o9OKf0a0;au7rX3#d;=h=Vpoj;v# z+J7-m(zy)AmQ?JhuG4Mjxp%!F$hUs~K@1kGSLrZlta^$v6t{{~UG&5-*yC5Nwbz*{r6^81yI&_8&%0=xz0DaZ8Yu3&R`{ zsoY|sM;GAgMr8JWkiQS>wzwi5RLIjPLB5>Zbp>3dz3L=%0Mj}Ggt~R|LQzPgoC3HJ z`4RqV9#hY4OhlcP1EkHBtYOdu42KhPp6{Q_mM{Sw5=3X2Ul{vZb~s=w+1p>L2E-EdZGgQ_n8@kA0=sos6&c7Qy6-O()qrq9 zr%Emr^-Lok99Yn!;&_Kj!|8{w2>gM^rEimBWFbFD(clJkAYBCizHsbt#$9zR>ZK2n zAad^JpoP;zn4qO~q-_8Wq0n(N*+uKxO9eToFg#$6RST7Lqfd#qY5h2bE4l_|clIxX z_)NWFX^sTRcSs*mWlZD!3W8rI-BWn2qF2E0mDiDM0uqdiOW3okpxzmm9V+k&jwRQM zCa+9Lr%_jbB}_(to*G>VvcP$qdWMq;QJ|u|g`drSLIL(%GJ{@bUSKt{A^{V_CGQ?X zA7DPUx)8XK$;&PU=PoI#RN!$hL(>!^=DZO!(|$!d5tTv% z5;Z-xKpWo+s1C0RP7i}U5BFxRLL))(l}yj$ZYN+!;!tn;9g0Z(X7W^FzNXn7`L6GJz@or*w|*DAwU`cnp1{Xv_@zUqFo^k8=7KoD+#$u9~S z3Xu!LnUd1?7*SnDm})Fv(8ny-io~b3`LG42T9&V4+C@)5fJw614{&M*;OnYr2#~+I8v=AS zf9D5C3Y-R*h&#&zoEMcrfWNWR06jARw`Fns2p%1V0F%v=et<`20QSzl!2^6Ns(=6? zr=|hkngRIb6c3PJIt~GHTK+Z#z>QiA4>;KahE>M+q0jv%F;I3VJ?K|^GdxrBlmeK4 z!hg~3)GTID?bjVEECIN@0>@J9CAtHwn?+#iC5L_6bd?PzUBP#$mQBVIhk#LtR&AiK zvz*UrA10`HqIBF|LKL3OTYOGUp-ACY&g&t1!= z;#;k-jAu*kPTO6c=e*f;yvN$;vEKIYoObc)xUW+#4Ci;zYrL?R-_q?_i*(3c*L@Lr zqO%lCk=MNO>(r11&Y80X0o+i9b$^@3aKrdHaG(N+~3@=XYq9AcW2(iT5{~`x3 zZ&Y+3@CU>gP_2DM!Uk#D?&dHEos`ZVpk{Re6yLRh9y)J_e}G$i6>~#rdx^c%VU6qy zs78}0hcV`o%!I+~X?&!3TZadRjMuk&vJ_F>Qjh5iQUU1H;? z#SzQrXS;EeF+f-Z5+N6E#u7-=0!@h3b-*BTvlqtEqV5u#AZ0|EZ;+Qobu+}9#AXWH zjGe(@D;LQdC*om<{9 diff --git a/piet-gpu/shader/gen/binning.hlsl b/piet-gpu/shader/gen/binning.hlsl deleted file mode 100644 index 7096371..0000000 --- a/piet-gpu/shader/gen/binning.hlsl +++ /dev/null @@ -1,274 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _57 : register(u0, space0); -ByteAddressBuffer _101 : register(t1, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -groupshared uint bitmaps[8][256]; -groupshared uint count[8][256]; -groupshared uint sh_chunk_offset[256]; - -DrawMonoid load_draw_monoid(uint element_ix) -{ - uint base = (_101.Load(48) >> uint(2)) + (4u * element_ix); - uint path_ix = _57.Load(base * 4 + 12); - uint clip_ix = _57.Load((base + 1u) * 4 + 12); - uint scene_offset = _57.Load((base + 2u) * 4 + 12); - uint info_offset = _57.Load((base + 3u) * 4 + 12); - DrawMonoid _136 = { path_ix, clip_ix, scene_offset, info_offset }; - return _136; -} - -float4 load_clip_bbox(uint clip_ix) -{ - uint base = (_101.Load(64) >> uint(2)) + (4u * clip_ix); - float x0 = asfloat(_57.Load(base * 4 + 12)); - float y0 = asfloat(_57.Load((base + 1u) * 4 + 12)); - float x1 = asfloat(_57.Load((base + 2u) * 4 + 12)); - float y1 = asfloat(_57.Load((base + 3u) * 4 + 12)); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -float4 load_path_bbox(uint path_ix) -{ - uint base = (_101.Load(44) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_57.Load(base * 4 + 12)) - 32768.0f; - float bbox_t = float(_57.Load((base + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_57.Load((base + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_57.Load((base + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -float4 bbox_intersect(float4 a, float4 b) -{ - return float4(max(a.xy, b.xy), min(a.zw, b.zw)); -} - -void store_draw_bbox(uint draw_ix, float4 bbox) -{ - uint base = (_101.Load(68) >> uint(2)) + (4u * draw_ix); - _57.Store(base * 4 + 12, asuint(bbox.x)); - _57.Store((base + 1u) * 4 + 12, asuint(bbox.y)); - _57.Store((base + 2u) * 4 + 12, asuint(bbox.z)); - _57.Store((base + 3u) * 4 + 12, asuint(bbox.w)); -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _65; - _57.InterlockedAdd(0, size, _65); - uint offset = _65; - if ((offset + size) > mem_size) - { - uint _76; - _57.InterlockedOr(4, stage, _76); - offset = 0u; - } - return offset; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _57.Store(offset * 4 + 12, val); -} - -void comp_main() -{ - uint my_partition = gl_WorkGroupID.x; - for (uint i = 0u; i < 8u; i++) - { - bitmaps[i][gl_LocalInvocationID.x] = 0u; - } - uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if (element_ix < _101.Load(4)) - { - uint param = element_ix; - DrawMonoid draw_monoid = load_draw_monoid(param); - uint path_ix = draw_monoid.path_ix; - float4 clip_bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - uint clip_ix = draw_monoid.clip_ix; - if (clip_ix > 0u) - { - uint param_1 = clip_ix - 1u; - clip_bbox = load_clip_bbox(param_1); - } - uint param_2 = path_ix; - float4 path_bbox = load_path_bbox(param_2); - float4 param_3 = path_bbox; - float4 param_4 = clip_bbox; - float4 bbox = bbox_intersect(param_3, param_4); - float4 _354 = bbox; - float4 _356 = bbox; - float2 _358 = max(_354.xy, _356.zw); - bbox.z = _358.x; - bbox.w = _358.y; - uint param_5 = element_ix; - float4 param_6 = bbox; - store_draw_bbox(param_5, param_6); - x0 = int(floor(bbox.x * 0.00390625f)); - y0 = int(floor(bbox.y * 0.00390625f)); - x1 = int(ceil(bbox.z * 0.00390625f)); - y1 = int(ceil(bbox.w * 0.00390625f)); - } - uint width_in_bins = ((_101.Load(12) + 16u) - 1u) / 16u; - uint height_in_bins = ((_101.Load(16) + 16u) - 1u) / 16u; - x0 = clamp(x0, 0, int(width_in_bins)); - x1 = clamp(x1, x0, int(width_in_bins)); - y0 = clamp(y0, 0, int(height_in_bins)); - y1 = clamp(y1, y0, int(height_in_bins)); - if (x0 == x1) - { - y1 = y0; - } - int x = x0; - int y = y0; - uint my_slice = gl_LocalInvocationID.x / 32u; - uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); - while (y < y1) - { - uint _460; - InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _460); - x++; - if (x == x1) - { - x = x0; - y++; - } - } - GroupMemoryBarrierWithGroupSync(); - uint element_count = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x]))); - count[i_1][gl_LocalInvocationID.x] = element_count; - } - uint chunk_offset = 0u; - if (element_count != 0u) - { - uint param_7 = element_count * 4u; - uint param_8 = _101.Load(0); - uint param_9 = 1u; - uint _510 = malloc_stage(param_7, param_8, param_9); - chunk_offset = _510; - sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset; - } - uint out_ix = (_101.Load(24) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc _532; - _532.offset = _101.Load(24); - Alloc param_10; - param_10.offset = _532.offset; - uint param_11 = out_ix; - uint param_12 = element_count; - write_mem(param_10, param_11, param_12); - Alloc _544; - _544.offset = _101.Load(24); - Alloc param_13; - param_13.offset = _544.offset; - uint param_14 = out_ix + 1u; - uint param_15 = chunk_offset; - write_mem(param_13, param_14, param_15); - GroupMemoryBarrierWithGroupSync(); - x = x0; - y = y0; - while (y < y1) - { - uint bin_ix = (uint(y) * width_in_bins) + uint(x); - uint out_mask = bitmaps[my_slice][bin_ix]; - if ((out_mask & my_mask) != 0u) - { - uint idx = uint(int(countbits(out_mask & (my_mask - 1u)))); - if (my_slice > 0u) - { - idx += count[my_slice - 1u][bin_ix]; - } - uint chunk_offset_1 = sh_chunk_offset[bin_ix]; - if (chunk_offset_1 != 0u) - { - _57.Store(((chunk_offset_1 >> uint(2)) + idx) * 4 + 12, element_ix); - } - } - x++; - if (x == x1) - { - x = x0; - y++; - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/binning.msl b/piet-gpu/shader/gen/binning.msl deleted file mode 100644 index d3ef95c..0000000 --- a/piet-gpu/shader/gen/binning.msl +++ /dev/null @@ -1,282 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix); - uint path_ix = v_57.memory[base]; - uint clip_ix = v_57.memory[base + 1u]; - uint scene_offset = v_57.memory[base + 2u]; - uint info_offset = v_57.memory[base + 3u]; - return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset }; -} - -static inline __attribute__((always_inline)) -float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix); - float x0 = as_type(v_57.memory[base]); - float y0 = as_type(v_57.memory[base + 1u]); - float x1 = as_type(v_57.memory[base + 2u]); - float y1 = as_type(v_57.memory[base + 3u]); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_57.memory[base]) - 32768.0; - float bbox_t = float(v_57.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_57.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_57.memory[base + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -static inline __attribute__((always_inline)) -float4 bbox_intersect(thread const float4& a, thread const float4& b) -{ - return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw)); -} - -static inline __attribute__((always_inline)) -void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_57, const device ConfigBuf& v_101) -{ - uint base = (v_101.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); - v_57.memory[base] = as_type(bbox.x); - v_57.memory[base + 1u] = as_type(bbox.y); - v_57.memory[base + 2u] = as_type(bbox.z); - v_57.memory[base + 3u] = as_type(bbox.w); -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_57) -{ - uint _65 = atomic_fetch_add_explicit((device atomic_uint*)&v_57.mem_offset, size, memory_order_relaxed); - uint offset = _65; - if ((offset + size) > mem_size) - { - uint _76 = atomic_fetch_or_explicit((device atomic_uint*)&v_57.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_57) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_57.memory[offset] = val; -} - -kernel void main0(device Memory& v_57 [[buffer(0)]], const device ConfigBuf& v_101 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint bitmaps[8][256]; - threadgroup uint count[8][256]; - threadgroup uint sh_chunk_offset[256]; - uint my_partition = gl_WorkGroupID.x; - for (uint i = 0u; i < 8u; i++) - { - bitmaps[i][gl_LocalInvocationID.x] = 0u; - } - uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x; - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if (element_ix < v_101.conf.n_elements) - { - uint param = element_ix; - DrawMonoid draw_monoid = load_draw_monoid(param, v_57, v_101); - uint path_ix = draw_monoid.path_ix; - float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - uint clip_ix = draw_monoid.clip_ix; - if (clip_ix > 0u) - { - uint param_1 = clip_ix - 1u; - clip_bbox = load_clip_bbox(param_1, v_57, v_101); - } - uint param_2 = path_ix; - float4 path_bbox = load_path_bbox(param_2, v_57, v_101); - float4 param_3 = path_bbox; - float4 param_4 = clip_bbox; - float4 bbox = bbox_intersect(param_3, param_4); - float4 _354 = bbox; - float4 _356 = bbox; - float2 _358 = fast::max(_354.xy, _356.zw); - bbox.z = _358.x; - bbox.w = _358.y; - uint param_5 = element_ix; - float4 param_6 = bbox; - store_draw_bbox(param_5, param_6, v_57, v_101); - x0 = int(floor(bbox.x * 0.00390625)); - y0 = int(floor(bbox.y * 0.00390625)); - x1 = int(ceil(bbox.z * 0.00390625)); - y1 = int(ceil(bbox.w * 0.00390625)); - } - uint width_in_bins = ((v_101.conf.width_in_tiles + 16u) - 1u) / 16u; - uint height_in_bins = ((v_101.conf.height_in_tiles + 16u) - 1u) / 16u; - x0 = clamp(x0, 0, int(width_in_bins)); - x1 = clamp(x1, x0, int(width_in_bins)); - y0 = clamp(y0, 0, int(height_in_bins)); - y1 = clamp(y1, y0, int(height_in_bins)); - if (x0 == x1) - { - y1 = y0; - } - int x = x0; - int y = y0; - uint my_slice = gl_LocalInvocationID.x / 32u; - uint my_mask = 1u << (gl_LocalInvocationID.x & 31u); - while (y < y1) - { - uint _460 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed); - x++; - if (x == x1) - { - x = x0; - y++; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint element_count = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x]))); - count[i_1][gl_LocalInvocationID.x] = element_count; - } - uint chunk_offset = 0u; - if (element_count != 0u) - { - uint param_7 = element_count * 4u; - uint param_8 = v_101.conf.mem_size; - uint param_9 = 1u; - uint _510 = malloc_stage(param_7, param_8, param_9, v_57); - chunk_offset = _510; - sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset; - } - uint out_ix = (v_101.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u); - Alloc param_10; - param_10.offset = v_101.conf.bin_alloc.offset; - uint param_11 = out_ix; - uint param_12 = element_count; - write_mem(param_10, param_11, param_12, v_57); - Alloc param_13; - param_13.offset = v_101.conf.bin_alloc.offset; - uint param_14 = out_ix + 1u; - uint param_15 = chunk_offset; - write_mem(param_13, param_14, param_15, v_57); - threadgroup_barrier(mem_flags::mem_threadgroup); - x = x0; - y = y0; - while (y < y1) - { - uint bin_ix = (uint(y) * width_in_bins) + uint(x); - uint out_mask = bitmaps[my_slice][bin_ix]; - if ((out_mask & my_mask) != 0u) - { - uint idx = uint(int(popcount(out_mask & (my_mask - 1u)))); - if (my_slice > 0u) - { - idx += count[my_slice - 1u][bin_ix]; - } - uint chunk_offset_1 = sh_chunk_offset[bin_ix]; - if (chunk_offset_1 != 0u) - { - v_57.memory[(chunk_offset_1 >> uint(2)) + idx] = element_ix; - } - } - x++; - if (x == x1) - { - x = x0; - y++; - } - } -} - diff --git a/piet-gpu/shader/gen/binning.spv b/piet-gpu/shader/gen/binning.spv deleted file mode 100644 index 1a5c2e1edd0acd497212d2f89dc169c0ee7039b1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16368 zcmbW7cYt11wZ(5TC7~!FB_a?q37CY0NQV&01cCt#g3=Y7OlBs@XQsrN2_?3OqP{2g z-g}LT`mlHOiM{vU3wEE1py>Pk?swPZ-2I-9_tGOid53IN`)&bhvxfRr~3)Pc^6Nq3))P4YtNL9E4#h zWnbE>@23dY-_phZOmz#So6Kw14XnN3g0<_;85r8Sbzo}xy3vt=(bo9Lz;J7FWN5l| z^@x6R3%}O*(CD_|(xC^BvJXA@?@f-e!O_u)A%YHW8dt+t{v2pFw&Dg7IwIQi{X(RWU*tl_OWZF1;RtLbRC$jHYQjkGhO?&)s5mRR_bb z9vmf4Pjx7G&E()U=TD4Jw1(T`Q1jV3IK6qGb!|O1HP>rsw6(RZ&8geEc2h$mEKo*m zjNaPj&S;HqoQTPFI2>(sVsLn1n4|+^byn8`x$e(=>XFfrv61m<)1b|%j!c_6`GyS> z*S1?YPe))&pNzi@uE#)Ye0pSZiY>F`>WwF?vJk7QT8e#;B0fsr8qcX#P}lvc2fcfa zqp@W!%yB$?YITSE7=V2|4z3eb%}=DxY-v9ke*VbV#N>7Dxs&_2 z|KjJIs=Eg2Ba@R8li*(FN8NSUFgh|mJkz4RmfBng+SKtK_e!{rxi6yLIygBv*6>R@ z_To&AdBqZfj$MQ(N%pt-b-zS`1BW8)rMAcU9j+&-&g5-ev2p{wUwP zZG1}(VBMy>x&ytO|2sSQUAu8^{n?za%>%oIa(;WzC${C=qr2J^E>#W z4t_)jKNjx$r~R$dQ>}!Xzn-LKKGvYMhW(c4tu6)c)@gj!((jX~85O9i&c)OBzR#j| zHKQ>NwO0BOvetaJrLFH-+xi|_%bMgn!d!koETs5^_qA!=ez3u1?hXG&>+Fwyo&8ZO z`(q!~9D_N0Z$XpUcPL819hS39E6e4nc=DKy{ZYRe1F zxl`l%oyp;QTkV8G^PR1BN}>6#Ry(cG9H&};q3zdbYYNTxv3}Q=-pBvDf5g$P=M%y8ZMW^Nzrobi4j_wb+Q)u&9mTG@cH=qk_9I$5Ui)_| zt6_Ky}LdG`Wjoi^_i6T%lj!l zX@4GA`%?Poyk1z?+_SE$Ycoh~`}zEDLxbgI(dYkD${7C++lL|Io7=haoH$lB=SuEg zR?FBng1uY9eOJbAo-GvDIdiqW!@dLV_|yJIuybVoXHmPpeH8c2%?(a`*XmY^XD9JL z4K}abc3+|JtWEqsL)_oSJ78AZ*Zk%?5YA7nJ6^eGRZaW61}DCCADU+^`4{O-ITZgR zsP$2kA4#od8{^%Z+PP6TpWJUqwdD71;HT1md1EuC{3N*Z)84b))#`?O?lS)Sp)J`m z{<-k`xA_B>({dqx>)`rWv{~Xi39hOgt1`AH!Cf10EmrEhlkL=wl2Y*ZHp~ zzL(A6-1)t49q#vh$^AwTcfS2TFS+07CHFhML;_uISVetVbPZ|rd2ZGK~iTl|_4gY)+}dyK zlKYKaa=)=l?l*SH{k|@_-`Mii>{{=idr}Uf^rM~59`-yO+~{Yp!`;tvv3ps+JJ|Dw zfZhAnf1%xB)blAm+7EB+)``0SY>c;(+jGAVtmgf7Co$xQQjBG6aoR2c+cy0^3hZ6h zPn`HHgR8G*6y9w|Q@sCdtIaW}*~UKFU-MWe#tN``cF;%X;yz$C-?^CswQ>&ZZ`vOV zcFrE(v_B54<~@?W9}n*A`w3|3$$KK$w(92f-cS>JXZQ^v_Pz*SS>RI}oPMo>8^gId zjkc$OefQedvBmFnbZuFqGr+c0zny&j)IP44wlgVe&Z9VS)_~1tAH9?0E147DCFg*> z$JMo;MXl!ixHs2=)$Cj1oeNjj-#bk%_CE8SleP~4+qN9j1#or!&!?6r?uB5rH<8;t z`9QE*KiKhJ1om;f+8#tvbG+ijxfpDm>zj4I1gz$J!@SmlG3#8TJpovmhDb3GKSrtKBsw+O)v7*?~Q69tKYy+vuZae?9x7;Ot@FTk?!$9Be$>jZw>; zH^(snKAze(+R}%uVB7R#Gfp4%!{O>TG~<{8t4&h&CSD&t(_nt;??Ub4wDtRC8(5ot z@J^5?*VSNix&M;u8n}9LT?w2(Snd@lk4$dZRR?a zTJ9O_r}m6n=gxcrICrMMXE|2)$rI7Fp1i=wsEv4_RGQUlkiu7%UrL7m$_aAS4*x} zgZZg*8O!$in9I1@6ZJdl zCTg|Bcn7#M#yio}Z*9)dyTNMjqI|5;-UD{uYcuZ6)M}3Lz0}T!wcoApr~UxNNBajG zyLHC-A+X~M|1j9KdRwy&9|7y5?%d1YM=`d!#F>Lzz>YJqJ_tL1+9B-nQ9`8NI(crnHPXivURgPpHG@E*25p8@OZeD+hc zC+}y$<~5J|^K)Red@E~Hi~r}rYT;i1m;3pPaDCL>&tInYaZa^;iK6D5iWBE6;Bt@3 zWBV%D`Ar|c23AWSwW+!GiES-+Uzp3f+&ix859rf&`eyk$+9>VCNRn+>+zx4_1+ zcF%o>`nwb#?cZzc*7n`YnxZ?F+Xb+8m#M_fd0g{9EGwlyW@wbCC82G&X++ zaV!VI)qI>gfB!uQ?%$1^17pke%UJITHlH^Co+OX$USRi-HvhIHcfF~qL*&%Pww?C0 zI}~ipjPEe8n)@YVIvoD5#maP&wlzhF7u@?M}pl;+KvFr zGlqME9fPrKr@iwoHja1IVr(ZjaeVK{V_O0~rLp-Akh?dRQhP3~y{nf|FQ@ouKf1A7 zC)NtEF~aWyKNX(ugJamJmy>l*C+SwX<*+K`e{qM)4|$|>E9vcv7G^~ zwPqY=!fz%^aBs5x82u zE3~P_e`Dhxz6rh(pZt!u8Lp4IbM!E3ALmG0i=yTni4$iFxV-!1v5kVgW75YlxLW$C zP0cYTwzYgYwYjY2OB=q7`mSmg{~kW8{&zC(4ZkgX{P)b0>AUZ?Jqql9YZBdmb2YC2 z4N3m(>_o6_e24gra5}~HDZdTQfE)8RJk7b9qMzrbpQ7D$G*;WzzDLi3t2rmOJDXyh zoQ*YLebnP~4%j~DjNccmpSo*vF13$qqirok&2fn{j{AWfhx47-_Xj%%`Mql$Sig)z zd&bcZcJ33~I&sbiJFb6V7iT^Ed%*z|AOF4DdH3I&{O@V92L5+6_Vqw&+h0hX_AGDx z-leMl+l{pMzm-XQ|9h6;gTcl+gxd8q*F_Z9GwXXv!|l8O&B(4E zo!q~-I?UQbERZD2LeN8(-$PF(G2dkt8dah*STY}bL;Qxf+PV72X(#C;?pI9|dklCzC2=1MR@*^I-0Q)Kt37QW2i9g>*Ha$byyEA8vZQmXLs-ybnusT z@YfW4F?eUgvwxogAECH!Z*1y}?Wu6>*}qQ%tCjor>2Uj<{re2C`&T_{^GvX9wK?aG zNiBJv16IqrJr|sH(-yzyfwlR)*hl?*ikdN<7kPYN2sUQ=@FK8Uxu0JQPaf@Q`x3CW z)>iTD4Dm{ zgOf*l+P(p-EqUGuRx^+LL$0rL`X-8V8UALlV-9}{*uI6|1U82I-+ld7iZN`buXgk7 zq}FCT{oY1V^K6LAv#|{BvxMT=aNjO1@KIpT$#Uw%dq?AweT+QM$ixS^=fsGO0 zcZ1a|KKkb0N#E0O`(jLe6X(5P`;|EF1FKnPQ@SYeeLuL&`2o0^CBDh|K{WetB*mEe z`n$jD^C60T@`?6gaQYeTBWT9)iFOM(v9n$uMKk7yDYntaHBm3uWCh%35heH4(O}o% zKGeCdJ`SGKaPO$?bkqvwa`+uPLrW_;0|@P55uY_BZ@@U}NMw{T^%#+v%&_wRInB zv)w}KKTy;>YvP=>+2G?TKF3gU){brPV(Q~4p1TvMbMF2Go<;$7r6vQ`G#u%)8(&ihb{=Wd8mE zR`=axn}1T&vd&(FS!d&!M@_%vnFTjb`ZXJ_c4i%?ox2NeZnfm@MpIAj96hGh1DCn)4p-Yj ip3M0^aC56A_r7TA$-N&~ExGRjPHtnGTTQ?G-u1s?qL}dj diff --git a/piet-gpu/shader/gen/clip_leaf.dxil b/piet-gpu/shader/gen/clip_leaf.dxil deleted file mode 100644 index d5123cb5369e9dbd3ec9f753e940b3ea71e934be..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7228 zcmeHLeOMD$x<8W%lS!CBfS?IPIsr)$C>_GLXqgbeDCmZQN-eG7Q&fZ~$l}M=d|-$e z+)&YC?Y05brCZyZTD9Q1-2@OLf}*u8TDFDOTDI3(w6v=&ZTCz7x4X~X`_KJj?>^7Y z^PHLUd%w;(?|IMro+K?VRoPYbc3RteudxRn92!_Bi(?@GfJG7jILsHeEZFj4D~Ihi zY-KnAb+F}RET!P*;j=VnN%SmczQ6bftj<|Q)j~S;P~-1AKf%Z2=Oj20W-d%)!6t%@ z4IA^!(mki4d#*EG_h*(qE6)&fauvDNa8VlI0saIHq~$HmVhjl&FCWrVm5z_31OV3{ z&C@`-$Ml0djy|MG;bJev1oEWkkvbyO0~ZIe_ZI3z>8#Y}JyUxHa$zDa%H+ul%_^&q zRA!iu0xrpn9e6Vj_zWa}+s#?Knq(HU4aTm&<>K+7nzTn^3Z;F~6xF6sbRv7-A- z7qRL#po%G`3se9sg*qtI@iS;r$t>K&MG&xu1Rw^|HLu-GN_N%UvHTd!%bqE>r_YT{<36 zMetZ|MEN8Lv@Nme&D4Xyc%K_oTHX9jx!D3A9{6s>{$ple*{YFSXaEA+PQ54ZOEgoGms^70KCT_1Zk!sh#a? z)^HesA?(jT6RV*)+cli(A?%ibQ!_28@q!knC2v8NjRR#ije0>lGZx&Gh2F(hX-Ik+ z=tCS9RG0Ky!tqiLrn%vRQeT>pTGxJe{4r|>4(mFEMY_8H=%klfY0e{ZU)I`D7AZK90bDHzsl{}? zVh~X3Hnt7M*H+jIK(nkFEWflkQITg=GWur3{==C&s?bB5yAVaIXY%14P2q*%ycfg2 zd$CKuEjqKxvncbrfbGWyFOuL2RzSw%Y(+tNg#oAnO{i3cYe-D;j#eYX9VeyI=G`L9 z1mf9)Yz5J$>sh^TknLm{`JQ)ab~KeOL;_js(XlK*DbyCe!=$$#=Q~doh{7#tPv^b( zA-jNhhs&au3v?TLhWI_0VK}i;nY|u-e(!s+J>u|T63lspO6szo zef-Q@U8f(9Kaq5X%hKKzc<$ey8L>YkST^@oylGW@f&x$hrR9|c)m46MvZ(1+{E`Gn zi}hV!!B8|IsjE$YIrr_HjCch|lvS71uC1!B+*0DlwiRW+a4Un6J0XpaaIlw-@cK5)zv>TevGZ~moJ+8dwEya$%MBTAXn;$%jcpj zgKuP2EK^^T_Ea5GdWJ;o-?%*_qUf+MS>l?=zuAB5qc3ju_1`+i&IyIOXBKsyAQRqB zI76L*Cb||NtLll%M+X{LM0tPl*kEHt#j=U5(2#h?YgO%V;|}`{z$^v;v$xss8-wd4 zyq|PgbNDbIlFu5Ghm=Vt(M4yCi|WwifeB%ahO?uLlTcQ_1-^ClT5G*l!>I}5)ZF)g zW`C)xUyIjo7S(TQso&P3kGwPCb!XJ;8;93z3N3WCN^Z7F2HfwF0!hDA(yO1>Ux2-D zRVKNVNiFVoXHR0YGSQ4CH}^aa2c0q`Uqh4aJw&gRvr~(1538@y)r+qvz3z`HU-SVb!H0^;O?*2;a~6l@U%x+ z=g!aXqDPS6fh83mN;<~w{pP$k%)f0{5ez2s9UZ2NzYI#TH5kmp7xv(gveC*=)|zom zaH>r_r`xNIQAr4{-`?wl7JQE|@RfaLfNh>=`Y^t(oU#AgkYJrc(u-LnU)+U6)PPtq z=s5sXiX4^D-2-4~$$BS#5CTor{b=VFIEmN0o!-D|7%@->EEX=cMBaPiz0%B@B;GxxIUv*D>%pId1WO_nBx5K`PUU^J`)2$EyDk~ zHtYJ}jYqnXt#gQh?>eR;+X|-736rl^K;8dqEtcWBlL`^RE2;{_dP!J|tv)Rzc-K^? zo>M3iv4#b)e(dNW89wK9f%N6wB!(GCjZ(0QlyHFzZ&0*77CE@dKw%%S5t|IBPZc;! z1*jr#_32hyxaMl&k*+Iuw{5J-H124RsA@;WO3(d8u?ODA&acAclk$@%SgD~x;YsO) z3@_4b5}olT;)#5b&S_E*y@2?g{mtvAx{fp*=(@6;YX~o{jjAcxQnkLaB8Sz+6~Daq z8PVZq4vjQBI+Ui*<@IwiR>?k?%vjYIA4>OI$GThx2LJHhg~kJe%ehAuLfw*z>Mh&U zl}y>rVe$V{@BUNo{^j+qfCM0c$(QUJfhVk4kHGWH9st5Zdg(Pt9bB;u#$d&HCNEi13%G-|FcJhD+>I{RDIJ* zG`0~m1OFm6i5n&oEk3rcONZA{8M<2wD_=$rDCeQkz(>H}3Qt=aEde1I7Z^0!ACPso zok2rRvx21~MftWN>CPd}R;o|~NF2MkAn#Xy>pu6l?k;MCB#Nnv0^Dgv7{#(9H0Rt( zPCFM-dr0!RNUnghMj>&`Fn?#`WZB)!w1>3Uhd93&WoT3;N#p{R&d))6(CmfC@q%%W z)w=ho$?k}>9;}Wg9CpNqK&ea$ikT<;QGXD;^Bk0&)4kdv5&_(xX!mSa^U^(?D>Y87 zQs4h35jS*=QOcphDgt?;VV@G)5NwJ zqD`XV8<&nw6OYfTKceCHESsDro}M9sAPpbQGN-u*a*(9)&sZBCp2*GXQTsjWqXw2S zlbo5AKFknDB)#d2csN8A_*I%H6?@GOGd%K!DbCDei=t^DV55yPRofg_>--EqmJQo! zYpANiT?vY zdbw;&z6hCE+Ag=@s<#;J4|4eU!`Cf=$7w9>i$0fYPmOB#*6MeOwzF2%EvGF4Ls z9HzKEmc?tx_E@hv%E0oFI0kYzsJ>(-QQhu_{Eez_7`QVe9&)1#waj_*shCVxVj&m@ zo=m}KrnO)(NyX@&&7b|t`sokfge zCOXccIjCO>*Z&D&K}7MS%O=u6XAouvoGkz|JB)e{e)0<`j=%Y^)Mh%ZU7{z`oe}PoTt5rsf@@GbU{fYihqgY>0 z2h54l$47Ham9vTEyVjtcKF80OrGk8Qv&RwSl{F4@W+C7I`Y(7Rc)}fiZILSeePLP< zRj6a!9I$7oem(n}=obZGuCPSE(Xs9z2++*vE%?fF z6?HT73?T$X2jLxd)W0PZz|GQ4)Kwmo61}#t#Ontl=Ifqf$vf)OHJws?ZcIyw2$ka- z*3ZpzQu2F@uOU}74q7NdAAPD=$jyoYg9&k18b4XHs`9w)&{)g9lK*iFiMTY z1}PtUJZ4&ir+ddO(kUIDOSP0FqA@r;VH=qEhzU!%F!$BxxEiss|9&Qu!TLdN$Hd-T z7u#-WElHKZaAK8$hTI?1mp#kM>jN?K{g|8^1Ga_-hr~UOL>9Fi+RugNKmbhkOhlAD zi>;k3=^VSjEQB`3;yk%{!2viz&*1SaS`ppwr1;er7gx%#`s_K4vq*bI*H5jPDS9&zL^`>(VPTXPJB%aOmRK@P(Q0PQXfd<=A{Ob<*JFH*Cz*Ie!ceC_~1%y zg4zlE#~b%eE6bAGlrhe6i;V4vR7l`8iY1x25jbOcRr1lI7){Ya=1O?mCwaAETePueHcitkjXegUPvJ^w4F9H)P;y1>96mpsS7rBkXV24=Y2aQP3aLkwJ@ z+BIVm4fP0f=`%P#j$&a695i%ykPAs?P&k7;=DGAr21U7ffi5JKK~w@~#*>XhQ`Spg z4P9nDIX-pyPe?hRl`BYDqnL7-6)3!NY8Tvm;<(>!?>AGve=Frg70({$@V1Cz;T|Dx zr%rsjQ1KJ~!@DUzD3jj?PoMjKR_U?%DR#f9Ur(g`^mWSVD#E_?q0(PcCqt4II9Do6 zmDVMT1n6A&d*f--LcCXVQOnVb%Ek9df$=eA7`xD3LhYuB)5L26v-2P_hZQ|6zm`g3 qPh-z8!lRyy@RSq^8#@`{#1|OhZ+xM!w3rb-zMP!?n*xpiz+VBINqNu! diff --git a/piet-gpu/shader/gen/clip_leaf.hlsl b/piet-gpu/shader/gen/clip_leaf.hlsl deleted file mode 100644 index 4eb9994..0000000 --- a/piet-gpu/shader/gen/clip_leaf.hlsl +++ /dev/null @@ -1,372 +0,0 @@ -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Bic _394 = { 0u, 0u }; - -ByteAddressBuffer _80 : register(t1, space0); -RWByteAddressBuffer _96 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Bic sh_bic[510]; -groupshared uint sh_stack[256]; -groupshared float4 sh_stack_bbox[256]; -groupshared uint sh_link[256]; -groupshared float4 sh_bbox[256]; - -Bic load_bic(uint ix) -{ - uint base = (_80.Load(56) >> uint(2)) + (2u * ix); - Bic _287 = { _96.Load(base * 4 + 12), _96.Load((base + 1u) * 4 + 12) }; - return _287; -} - -Bic bic_combine(Bic x, Bic y) -{ - uint m = min(x.b, y.a); - Bic _72 = { (x.a + y.a) - m, (x.b + y.b) - m }; - return _72; -} - -ClipEl load_clip_el(uint ix) -{ - uint base = (_80.Load(60) >> uint(2)) + (5u * ix); - uint parent_ix = _96.Load(base * 4 + 12); - float x0 = asfloat(_96.Load((base + 1u) * 4 + 12)); - float y0 = asfloat(_96.Load((base + 2u) * 4 + 12)); - float x1 = asfloat(_96.Load((base + 3u) * 4 + 12)); - float y1 = asfloat(_96.Load((base + 4u) * 4 + 12)); - float4 bbox = float4(x0, y0, x1, y1); - ClipEl _336 = { parent_ix, bbox }; - return _336; -} - -float4 bbox_intersect(float4 a, float4 b) -{ - return float4(max(a.xy, b.xy), min(a.zw, b.zw)); -} - -uint load_path_ix(uint ix) -{ - if (ix < _80.Load(84)) - { - return _96.Load(((_80.Load(52) >> uint(2)) + ix) * 4 + 12); - } - else - { - return 2147483648u; - } -} - -float4 load_path_bbox(uint path_ix) -{ - uint base = (_80.Load(44) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_96.Load(base * 4 + 12)) - 32768.0f; - float bbox_t = float(_96.Load((base + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_96.Load((base + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_96.Load((base + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -uint search_link(inout Bic bic) -{ - uint ix = gl_LocalInvocationID.x; - uint j = 0u; - while (j < 8u) - { - uint base = 512u - (2u << (8u - j)); - if (((ix >> j) & 1u) != 0u) - { - Bic param = sh_bic[(base + (ix >> j)) - 1u]; - Bic param_1 = bic; - Bic test = bic_combine(param, param_1); - if (test.b > 0u) - { - break; - } - bic = test; - ix -= (1u << j); - } - j++; - } - if (ix > 0u) - { - while (j > 0u) - { - j--; - uint base_1 = 512u - (2u << (8u - j)); - Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u]; - Bic param_3 = bic; - Bic test_1 = bic_combine(param_2, param_3); - if (test_1.b == 0u) - { - bic = test_1; - ix -= (1u << j); - } - } - } - if (ix > 0u) - { - return ix - 1u; - } - else - { - return 4294967295u - bic.a; - } -} - -void store_clip_bbox(uint ix, float4 bbox) -{ - uint base = (_80.Load(64) >> uint(2)) + (4u * ix); - _96.Store(base * 4 + 12, asuint(bbox.x)); - _96.Store((base + 1u) * 4 + 12, asuint(bbox.y)); - _96.Store((base + 2u) * 4 + 12, asuint(bbox.z)); - _96.Store((base + 3u) * 4 + 12, asuint(bbox.w)); -} - -void comp_main() -{ - uint th = gl_LocalInvocationID.x; - Bic bic = _394; - if (th < gl_WorkGroupID.x) - { - uint param = th; - bic = load_bic(param); - } - sh_bic[th] = bic; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[th + (1u << i)]; - Bic param_1 = bic; - Bic param_2 = other; - bic = bic_combine(param_1, param_2); - } - GroupMemoryBarrierWithGroupSync(); - sh_bic[th] = bic; - } - GroupMemoryBarrierWithGroupSync(); - uint stack_size = sh_bic[0].b; - uint sp = 255u - th; - uint ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = ix + (128u >> i_1); - if (sp < sh_bic[probe].b) - { - ix = probe; - } - } - uint b = sh_bic[ix].b; - float4 bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - if (sp < b) - { - uint param_3 = (((ix * 256u) + b) - sp) - 1u; - ClipEl el = load_clip_el(param_3); - sh_stack[th] = el.parent_ix; - bbox = el.bbox; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - sh_stack_bbox[th] = bbox; - GroupMemoryBarrierWithGroupSync(); - if (th >= (1u << i_2)) - { - float4 param_4 = sh_stack_bbox[th - (1u << i_2)]; - float4 param_5 = bbox; - bbox = bbox_intersect(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - } - sh_stack_bbox[th] = bbox; - uint param_6 = gl_GlobalInvocationID.x; - uint inp = load_path_ix(param_6); - bool is_push = int(inp) >= 0; - Bic _560 = { 1u - uint(is_push), uint(is_push) }; - bic = _560; - sh_bic[th] = bic; - if (is_push) - { - uint param_7 = inp; - bbox = load_path_bbox(param_7); - } - else - { - bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - } - uint inbase = 0u; - for (uint i_3 = 0u; i_3 < 7u; i_3++) - { - uint outbase = 512u - (1u << (8u - i_3)); - GroupMemoryBarrierWithGroupSync(); - if (th < (1u << (7u - i_3))) - { - Bic param_8 = sh_bic[inbase + (th * 2u)]; - Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u]; - sh_bic[outbase + th] = bic_combine(param_8, param_9); - } - inbase = outbase; - } - GroupMemoryBarrierWithGroupSync(); - bic = _394; - Bic param_10 = bic; - uint _619 = search_link(param_10); - bic = param_10; - uint link = _619; - sh_link[th] = link; - GroupMemoryBarrierWithGroupSync(); - uint grandparent; - if (int(link) >= 0) - { - grandparent = sh_link[link]; - } - else - { - grandparent = link - 1u; - } - uint parent; - if (int(link) >= 0) - { - parent = (gl_WorkGroupID.x * 256u) + link; - } - else - { - if (int(link + stack_size) >= 0) - { - parent = sh_stack[256u + link]; - } - else - { - parent = 4294967295u; - } - } - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - if (i_4 != 0u) - { - sh_link[th] = link; - } - sh_bbox[th] = bbox; - GroupMemoryBarrierWithGroupSync(); - if (int(link) >= 0) - { - float4 param_11 = sh_bbox[link]; - float4 param_12 = bbox; - bbox = bbox_intersect(param_11, param_12); - link = sh_link[link]; - } - GroupMemoryBarrierWithGroupSync(); - } - if (int(link + stack_size) >= 0) - { - float4 param_13 = sh_stack_bbox[256u + link]; - float4 param_14 = bbox; - bbox = bbox_intersect(param_13, param_14); - } - sh_bbox[th] = bbox; - GroupMemoryBarrierWithGroupSync(); - uint path_ix = inp; - bool _718 = !is_push; - bool _726; - if (_718) - { - _726 = gl_GlobalInvocationID.x < _80.Load(84); - } - else - { - _726 = _718; - } - if (_726) - { - uint param_15 = parent; - path_ix = load_path_ix(param_15); - uint drawmonoid_out_base = (_80.Load(48) >> uint(2)) + (4u * (~inp)); - _96.Store(drawmonoid_out_base * 4 + 12, path_ix); - if (int(grandparent) >= 0) - { - bbox = sh_bbox[grandparent]; - } - else - { - if (int(grandparent + stack_size) >= 0) - { - bbox = sh_stack_bbox[256u + grandparent]; - } - else - { - bbox = float4(-1000000000.0f, -1000000000.0f, 1000000000.0f, 1000000000.0f); - } - } - } - uint param_16 = gl_GlobalInvocationID.x; - float4 param_17 = bbox; - store_clip_bbox(param_16, param_17); -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/clip_leaf.msl b/piet-gpu/shader/gen/clip_leaf.msl deleted file mode 100644 index c9456e8..0000000 --- a/piet-gpu/shader/gen/clip_leaf.msl +++ /dev/null @@ -1,372 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Bic load_bic(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix); - return Bic{ v_96.memory[base], v_96.memory[base + 1u] }; -} - -static inline __attribute__((always_inline)) -Bic bic_combine(thread const Bic& x, thread const Bic& y) -{ - uint m = min(x.b, y.a); - return Bic{ (x.a + y.a) - m, (x.b + y.b) - m }; -} - -static inline __attribute__((always_inline)) -ClipEl load_clip_el(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix); - uint parent_ix = v_96.memory[base]; - float x0 = as_type(v_96.memory[base + 1u]); - float y0 = as_type(v_96.memory[base + 2u]); - float x1 = as_type(v_96.memory[base + 3u]); - float y1 = as_type(v_96.memory[base + 4u]); - float4 bbox = float4(x0, y0, x1, y1); - return ClipEl{ parent_ix, bbox }; -} - -static inline __attribute__((always_inline)) -float4 bbox_intersect(thread const float4& a, thread const float4& b) -{ - return float4(fast::max(a.xy, b.xy), fast::min(a.zw, b.zw)); -} - -static inline __attribute__((always_inline)) -uint load_path_ix(thread const uint& ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - if (ix < v_80.conf.n_clip) - { - return v_96.memory[(v_80.conf.clip_alloc.offset >> uint(2)) + ix]; - } - else - { - return 2147483648u; - } -} - -static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_96.memory[base]) - 32768.0; - float bbox_t = float(v_96.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_96.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_96.memory[base + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -static inline __attribute__((always_inline)) -uint search_link(thread Bic& bic, thread uint3& gl_LocalInvocationID, threadgroup Bic (&sh_bic)[510]) -{ - uint ix = gl_LocalInvocationID.x; - uint j = 0u; - while (j < 8u) - { - uint base = 512u - (2u << (8u - j)); - if (((ix >> j) & 1u) != 0u) - { - Bic param = sh_bic[(base + (ix >> j)) - 1u]; - Bic param_1 = bic; - Bic test = bic_combine(param, param_1); - if (test.b > 0u) - { - break; - } - bic = test; - ix -= (1u << j); - } - j++; - } - if (ix > 0u) - { - while (j > 0u) - { - j--; - uint base_1 = 512u - (2u << (8u - j)); - Bic param_2 = sh_bic[(base_1 + (ix >> j)) - 1u]; - Bic param_3 = bic; - Bic test_1 = bic_combine(param_2, param_3); - if (test_1.b == 0u) - { - bic = test_1; - ix -= (1u << j); - } - } - } - if (ix > 0u) - { - return ix - 1u; - } - else - { - return 4294967295u - bic.a; - } -} - -static inline __attribute__((always_inline)) -void store_clip_bbox(thread const uint& ix, thread const float4& bbox, const device ConfigBuf& v_80, device Memory& v_96) -{ - uint base = (v_80.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * ix); - v_96.memory[base] = as_type(bbox.x); - v_96.memory[base + 1u] = as_type(bbox.y); - v_96.memory[base + 2u] = as_type(bbox.z); - v_96.memory[base + 3u] = as_type(bbox.w); -} - -kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_80 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - threadgroup Bic sh_bic[510]; - threadgroup uint sh_stack[256]; - threadgroup float4 sh_stack_bbox[256]; - threadgroup uint sh_link[256]; - threadgroup float4 sh_bbox[256]; - uint th = gl_LocalInvocationID.x; - Bic bic = Bic{ 0u, 0u }; - if (th < gl_WorkGroupID.x) - { - uint param = th; - bic = load_bic(param, v_80, v_96); - } - sh_bic[th] = bic; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[th + (1u << i)]; - Bic param_1 = bic; - Bic param_2 = other; - bic = bic_combine(param_1, param_2); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_bic[th] = bic; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint stack_size = sh_bic[0].b; - uint sp = 255u - th; - uint ix = 0u; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint probe = ix + (128u >> i_1); - if (sp < sh_bic[probe].b) - { - ix = probe; - } - } - uint b = sh_bic[ix].b; - float4 bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - if (sp < b) - { - uint param_3 = (((ix * 256u) + b) - sp) - 1u; - ClipEl el = load_clip_el(param_3, v_80, v_96); - sh_stack[th] = el.parent_ix; - bbox = el.bbox; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - sh_stack_bbox[th] = bbox; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th >= (1u << i_2)) - { - float4 param_4 = sh_stack_bbox[th - (1u << i_2)]; - float4 param_5 = bbox; - bbox = bbox_intersect(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - } - sh_stack_bbox[th] = bbox; - uint param_6 = gl_GlobalInvocationID.x; - uint inp = load_path_ix(param_6, v_80, v_96); - bool is_push = int(inp) >= 0; - bic = Bic{ 1u - uint(is_push), uint(is_push) }; - sh_bic[th] = bic; - if (is_push) - { - uint param_7 = inp; - bbox = load_path_bbox(param_7, v_80, v_96); - } - else - { - bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - } - uint inbase = 0u; - for (uint i_3 = 0u; i_3 < 7u; i_3++) - { - uint outbase = 512u - (1u << (8u - i_3)); - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th < (1u << (7u - i_3))) - { - Bic param_8 = sh_bic[inbase + (th * 2u)]; - Bic param_9 = sh_bic[(inbase + (th * 2u)) + 1u]; - sh_bic[outbase + th] = bic_combine(param_8, param_9); - } - inbase = outbase; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - bic = Bic{ 0u, 0u }; - Bic param_10 = bic; - uint _619 = search_link(param_10, gl_LocalInvocationID, sh_bic); - bic = param_10; - uint link = _619; - sh_link[th] = link; - threadgroup_barrier(mem_flags::mem_threadgroup); - uint grandparent; - if (int(link) >= 0) - { - grandparent = sh_link[link]; - } - else - { - grandparent = link - 1u; - } - uint parent; - if (int(link) >= 0) - { - parent = (gl_WorkGroupID.x * 256u) + link; - } - else - { - if (int(link + stack_size) >= 0) - { - parent = sh_stack[256u + link]; - } - else - { - parent = 4294967295u; - } - } - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - if (i_4 != 0u) - { - sh_link[th] = link; - } - sh_bbox[th] = bbox; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (int(link) >= 0) - { - float4 param_11 = sh_bbox[link]; - float4 param_12 = bbox; - bbox = bbox_intersect(param_11, param_12); - link = sh_link[link]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - } - if (int(link + stack_size) >= 0) - { - float4 param_13 = sh_stack_bbox[256u + link]; - float4 param_14 = bbox; - bbox = bbox_intersect(param_13, param_14); - } - sh_bbox[th] = bbox; - threadgroup_barrier(mem_flags::mem_threadgroup); - uint path_ix = inp; - bool _718 = !is_push; - bool _726; - if (_718) - { - _726 = gl_GlobalInvocationID.x < v_80.conf.n_clip; - } - else - { - _726 = _718; - } - if (_726) - { - uint param_15 = parent; - path_ix = load_path_ix(param_15, v_80, v_96); - uint drawmonoid_out_base = (v_80.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * (~inp)); - v_96.memory[drawmonoid_out_base] = path_ix; - if (int(grandparent) >= 0) - { - bbox = sh_bbox[grandparent]; - } - else - { - if (int(grandparent + stack_size) >= 0) - { - bbox = sh_stack_bbox[256u + grandparent]; - } - else - { - bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0); - } - } - } - uint param_16 = gl_GlobalInvocationID.x; - float4 param_17 = bbox; - store_clip_bbox(param_16, param_17, v_80, v_96); -} - diff --git a/piet-gpu/shader/gen/clip_leaf.spv b/piet-gpu/shader/gen/clip_leaf.spv deleted file mode 100644 index fe626323c1913a1cdaec2a0ca3a4a58477f67e93..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19356 zcmbW7cVM1X*~Z^CNn3_c_J)=+$_80t0VxFwQlJP3GECAmZ3Agile7#`D9XEP^OIU_j}&wN)NC4&-cLLT-SBq=f2N;#!K49P8r{5jA^Xe zn9yi%YgE@-jj0dmzbm+jN>_-PV7+mAPzMiTVVy|wBy^`CPIg}1Yvbwmh zr?Y2p;T%#=9Ud84+%vKh&39V78~^S$?LQTo(6)^cG#aPZcs1%XDm=ci4t4)v_uS4! z-6IP+=gb*Aed^-frrD;wu{O3!?mHbmp|KHMkIueVN5Xv(2WqVJ}FN9`~nm&`z=?*^ajVa(6{e6oL z?svY%RUC;v5A_a=bn1rMQR&q@(s%70% z2YaX;i`;8)-n`-7k*3e#@EL;x^ZMpD=RxkZuy{@$uh)8<}Y(AzhE0dpN=@^oO+y|cU7`RL=& z*}Rd-)9jafebGoye++Bf1B3)Ygo-W^BVF-94wqx^iwO#$Fw& zX5;kL`;q;rj^t=xn>js~+WH3O6=UzH#^HHpEgU`b)YR6kG)HdkrnX+CIii*QSs$%A zZ`obinq%>7d%tV$h0@;a(}vj1Fw#B0GuDj7`$pEV)wVG<$MWCWZBpCjc8_!?Xb1g2 z0=>E44_rJC?3mQNdN>Exo~zjcF7Fj5QCHKZ0epF z_1>YO!6C5Y(%&;Pr@wb#?r4iqu(sdT!XJT`@g9Sh@t$b$f3k&7WMa$qTfU5UGjTVoQuZ(tGeCRFcO%;U`d z!8!l2L7k&5U)Hw0v8`=~I~Oe;UO@Zy#@;RVX;tjz3Oe?E<#oRf1`jSCsf@NgxutDa z3ts^Dg`_$5_C{ZeeIcCjS1#UnmcZw8XPw*JlvV%EZnoiJ+MK_O;OWoh&GyxD*q`f~ zzN6>q#umP!h2Pe~@2$D|BrTJ`)W*g&8EHvL0)O=3G&-Vm1pDWRPM^N)U zF`D_*CKcK$wYF8E`A(qU)I#%pKyAlDv#)C0Rz`F9>{r{P(0uNzy{gcB-mC3fXpU3u zfI{=xu69tNtzBz}6q?U+{al-j*Ev#~U1&bb)s87NpXF-D7n;v-HJ^ov=lrU56`Id$ zwYi1nb6Radq4|7P>#wx-(PwbhEYDZCUX5W(4O}zX&yHy_#r-8enc98e zp0U4qMkijh*8P}gvwrT6_?cJDHMTE#PMfO>t?Y~3zIbWBgxdY$nB_hnXRA2`&z7SnM51+VcM-v zbobyS$_CindvfP*GXHhosoBQ1?i<^x8Q(o4R=3#3@!B8r$+w`kjWO)A^QpZVqrI^e zSWUbBQ>nE#c}I2DoC|p~PkZB);7OD*lpSaz-@S$J1^3#VF?t@1xjFxMj}o^Qef~eC zHO_zI_TdQPo7;IjlG-t;O{@9rTFZEjg1aB|e|>E;cI?O2nq2)jinabHQak6aJ_i))qZMNy8wolsale~+fmYm&S_oLj{^C(<1(+A%h$56tDz-e#JGby%ruboeA z4DIqyQR}1K{(PF+SnBpi?wKm%Ukdlqe;KtlVFJUOYYuW3-106p8|L6@!uBiGc9}u+??@0 z2JYSuKLPGDCjN8bTuWnL0QY?DNL2fE0o=K=WG=lQ*(@=ZK_*bbuh2+wj5kx;pK@-$ zh4xa>{vPxYu~{-!}OXJJKWj_Ap!@i#Ch7{zPq`r%+svf5Odcf3*LL z+P;FIJcHt%Fs6IQ?+*JRcTfB65w6|u zjFS7E5w5@A8YTBzqvU>Tgd5LqjgtGFQF6aEO78bY$^GUix!)Y&?q$C_!d-K}JHpNH zw@1nS&Iqrb158-S{iX=F{iGJ|cSG#1soxFZ`up8ba=#l&?sr4U{dNdrF&yL1`#yI3FZ&)#G9V~=`f-;JieKhEx7x!AqyJt=O`iMQBj?dVi0NYRNr-*ei^`|Ia+CN>}t&{r_urV?p zmx9%N<~axQPg0C!Z1F?rK9eoW;EDAaaK_=feHL!)TN#J^GO+h4^`*6TIoNjE(udE1 z)xEd5CRb8>*&l76r>NNg=DktA53}Jqe+lgU+&0?uF^;hbv-*#2eD{~fHKdbEFl)xC!~#>c^GK6iZ2eFD4De!p|?+re? zp9bsW*^JLKaCLn=zt2*hq}bM&a(%S>{C*B>jI4?8zyF4-XC36ObLP)l+w;^puP=a| zTWinjOVrTnvh5hKcI)(YEZ7)X!#22D&a3=I?8dUKIBmycb1vlec>>%$hI&gETXYZ^Fww<<&aec63)RuYJ0Icqq{k>ft+eTpLNSp7;a-U_+-^Oxk z+i25AUv<|nL+$?SIQ=){+yv~na&9(-tK}Y|P0jY%KdyD!Z-y=NWFI$&mutKQ z-0|yYf93i(Kg;l+0`?ouJ(`$X!PWKOl3MQhbIrE~Z%?hReG;`=+D-+loyGe4J-;1T z?fQB@Y!6oRJlfuG3pLwU-;H3ktH@@)9l<+LTx*n%%@t$up2mYs{bqD`Z#B|!*_>!pVrUza(&W|J;3(E{k|8qm-}6PPs*z) z?gMdhz6$KT8PD%4xpVHC?gL(jT3x&AtLC0CzOjrM+r!vPn?Bn7PIC^$exv2Sv>(_Q z*4~$1Lwx|n%Q(|(yLI||AlMkWFC7F{n?Z4`^8G2sGPXEvUkkQv)^a9zIl0txFF6G4 zx@n8wq2TM#v^fU9E7e-hTJpROTh`INbQrwMI}2`q)sy#duz9s*Y)62NrOh>+P3`44 zwH--ObDZMjKMHIg!(R`!KRN41!}U>*&oN-vDg0QlZSw4W16VqOaQDA?&F2~E1gpC)CsWJg-woE^Sn;0&R@c9a zTApu)9Jm!PPR( zw}aI(&Ub*l9H+LWly_6?m)QAUhW#Ayh0J^TEIk)ZJ?HnGVoLe$d>-8XWbeHTte?8$ zIh$I|@z@vhnm^+?A8h;Z3&1zgkHmWqTp#tU?S)|VjQPD_wT$`wU@yn4?R}JwP|Ph( zt`C6AzJCzz*wnKRKLqyuT3gzE7_6T0$sM10&0((CR;6ue)2EyZ=S1I&sPjzwDA+jG zKGQx<{RxVfZ9iGtt<$HA!N$ma|5ISK<+O1g>$CP;1iFXA!@qCx~9K3u#{5+`?v5m#cfKiNlDfz`5)mV?!@kFEiGIc{xNQ`8){IQ_g9oN=xKzMf*g)b+9N zH`KcQybdgn?F(SXrR_$rJn!va1pD4@8*TdNtDgK{0w=%s&M(8AXZ84e1*~p+edW%3 z#$(+&Z`$2=j@LS|z6y4~y+6I|~5yWr)%{~p}^rJwQS`n0ZJ;(Z_6q}umx zYPmMg`aR%nDOc9~Ua*?y{)g0F&Z+tjC~D5BIM0qBfhSYSXUC7>>i$0TdSd(pJe%6} zI)?d9-+qd&E%&XTfo-eq{3M_Cla29JpMMUvKhBx{_Qn4GlG@Au>hlYVn*9|g=dZwL zQ=Gf|sO5R5xgWeeG1Rp?Uux6I?Kpl7&Nv$fD9*JwasLW7ZqC-osay}%_s!xzD==kO&ob>qK4 zElbF(f zYj?cn)0SAT2Ai*pH4UyFpMAlt`SwFo&${mqww-$V{Ti_O@?L)cSU>f|o(^su?}3$l zrOynw`O7&u2(F&|2ZL>=p8T%`m-%PH^;4f+uh}8s11Q>FWNqYGQ_tZ#6tA@?uBr3C zHpO-H?+dezhk;#3`H9pgweSTkd{GNOz2N>vy|m`J#~uyOJ@zPYxnBM)qxRfm-vCxC z@3F_j?RV~}Zv^{yQ|g(+6Tr6B=3a43YMIYDV71&sd%#_kZi=?}%>`>)#9a9IFTE7C zbC`4a{92nwZ64=JE$#cj=1IR!1*@GxN$!4d7scG#({>?PTXGM8)slMQbhN}%xl6wT)MKQPbv|S9=mfTCgYRP>XSS`8T18Ql12Dr@q7P#7(l;l1O+(j|B z_OyK~SX*+x4Xl>jZwIR-xBE@a_O8i0D6T{J*mPo*}dzKQdGu>DG$4}jGyV<~Nv_4XW4x3Zw{&z7r_rEp3m$mR~3qApSQwzVXh2K?h<2_Pv+dtOApJ?Gv*4*dJ zGVo_8mryRHxHgXMvlMmrsr{FW?Yn-yQ(R8%Sgrlt;d9iVr+8_{l1an{YYERtx_t*trb<8`!)n>ht_}us-Tpzkh&@qb+$K2dlex za;E+X_DpFr{&?ztQPl0*6E%3Y2Ug2oc^>TL`f2+&Ma}gSr*AKSjcs20C{KO7;;fFQp7Fl|>~H$&zC*~3W4~TvV)8!Wo|#PX+Jchr zsYzh{x1`Sd#6;TapYNZw;j`iPV=ZvmpLO8cSJeA_U9j4*_@>Y6!6(7bt$o&qd*0M@ z7B&FeR-5Z+AJh_KL$F%j#Wwdfz!v~zyLcPRLY=)1J=JzMy+7Cyb; z>(Ty@nrHpCf@l4vfXnsU8m>L-w+&dWT)(Mm#LfC`2j7;Wo_XCKY+G%vpJPnM-!zTh&?esHxji#+?olSg~nz6Pu< zc@6+q8hP9w^7tMIF7q4&SDQh}cn$_9kM^{EEm&Lf%mk}B9`~JGU+45tuyYyyIe%5BYapXAy?B0-1gdYjk$8$ObJ{zo$y604$b87qT zDPB`4Ij7r!J*PWRC(qHeQ_mb73(q+^1{}XPfVJhD<2bNw)qPKYBej>$2yMqxdMM@- z=bWAhPW%(VxzVtX7V@ zA8yXvpBBR1+v=H%0kCbgxu@)dTE;&JR`c2GJ$Mn=zWbcl$Nl|gGa-W8#p4_K{)sp)Ra2NjF6l0oOO~2$m z6I|wg3tX*vUu)(*3vO<;H2KCHFaK>dAdBSS`8V3GTwbn_^6JtLc~jNAQ1{0BXVj diff --git a/piet-gpu/shader/gen/clip_reduce.dxil b/piet-gpu/shader/gen/clip_reduce.dxil deleted file mode 100644 index 13ffb01b3ccdcaa458b0da68838a635ac3122689..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4628 zcmeHKeN+@zmVeb%U0vN2)kV{-g?yyC5e89XfuvD^Np&}3BV=rZL7kYS8pVYPU<)h( z^-QWi=rM_|g{;s?22mnB< z0YCxUVn~&cc0+1{bPv)b9Jm9ix~!ZdV1KEUS8vH%>PzjmKk3`5Yq?G+=avlqr2PXd z^2cQ`5GfXkWRMt07$m7I$rBd4@mL?R@n%WBq%R4pYfLrQaR3A$2k%1%r8~Gl^Gc02)iW(0Fb^d5HMn1!ZGa_7kJcHBdjN8>7I9%_(7N>wEr%58JP5 ztHx&cUoUMKr<0DJt=eoV%>kZmeW(4J?3cR>n+#{|exm%w@JZI71y|d!AhZL?+SahI zUGNzjd|eGFWdpp}6#S#s%9%$|g{B6MVR-(ba|HuU&`QN%QqP&9a`H@7ZV zbt3maPmDUd@+#UA_EdaCW64N86MfKUlu3yhw_U>~5K2>Ah z<+lrjV413z=9*alk$*R8kKwi_hPaKRp$qHIzdQQ=x{>1ZKOa@fxG$7_eF>L; zf3j%s#oyOu4!#8~wdxV5cs75dp{~{5vZtZG4Mu#@e(}p?Wrc+%ux43fOItgX#SFc- z;u6l#Uf=mb(|-HjWQ^To-r@b~sV!e+@3?sK$q$dTz|}RR4s*0Sk9FtACS&9jn|Mt~ zydF%PjwQxow`}l?_n1XRSEr(F-~f3Gs6VHvha$C}G-r%O(tk6QwvUs?)!eC*Xvu`7qz5gT z?CfcGC=L;dF00SsP#koi9i2W0>r0tTqV5H#8BX28sXv{g=4{k@I+}h{pLSiJK4#C@ z$GuW?!Cf?E*?1N$zThtQphc5&+73Z+s6kQK;A@4o=yL>o4nfh8qv)8IL$@!9z847J z0mj!l;OiQ2uDL%)-H%cK5vJ~OXx+q6+U!u;WW1iXr(M#gMV;xF?D%g3mW>mZjRW!e z-1vrm%LYGM)IYusPP*VKx`q~o#>uE&aoB+#%<*-IzLYB#YCeX~vefMu{v}O42vaiw z^}pHF4>`AU0(oMZmnIssr%R0VU>#rqFv2!VkpH0V;f~icQ0MxFP;TXG&TufH7OjmH zZh84vck}2CZv5}Y9x?=ZzD~|cs>?He^fw>3vkp8l+pXgkC8yu|Wn;0V+GnyfcrU+{ z@iaI|=nWNOoaa}tV~1zoZ+5PD@}IjF#9WmjH(i+P{U@XM`{xed{sR4Jj^F>r>wZZh zZ=-Ecm?WT=Fe%33>As^n&)&^*RabR;m`U?-dB9k7FMR#FYo^r`_{pA<{SKR>`Tss*q5C72>W^<_C+gcF_MNbD}#XZ z(_|*dV*EMqPVMct!=yn@(JdBU)DU?FjkXl4xNqeghLgdEs^TwYzO z?EB?dsxj#XSL#{WTvJ6Ssle4+1aGGi<}}#%{}>0~ASb_rXFQO;i5?YhHt%sn+XIlL z!EoT$K*l~&6ZL8tCO+x_ONRV$=%i^7f70ME*_F+LMbMI= zkK^E00ShvhPSUD?crFPXECc@4^o@NWXAlVrk|PQGo`LQE7W=Y^@Vh34RPAX?#iMLC z@zG}C&w%em*PJbN*(bY*I)No*M(hec)(Zrejb3?U?CNwjvHK|#IH{De%}mICou_|$ ztT|^`HBdk`wzMC9brCV{q&UxjPSva}6+xz-xdZv8-4n%KXezl&?Xm(;(_q}$I&j+yMb z=V$0@StfXR4g9qw-N^weHdR;#TrBb0xOD3Q$i$VvC4f;={jhUIAmTU4M3&+A3n?## z@p`UK04#yy>9(dnvdvtbC`qf+{1N_0q`?fX*`AZ{0Xou#q}=f%l7q^ZM0{%ie@ltj zoL*fFc}HO7#X>C?FlQ8u;~t(2hma%^UW@jE3Qb|eJJ9O^D?=$`T3 zqb7xOcJg_I+?S%tWb)v9iMbn>S%=6^g4(iVSx~j{oJKYr)Sh)!i%D)An7CnwUU!|{ zDc`57wd-bs*!CrPt4Vi*734nMPN(jc@S`x3$3buCspa2^XbDQJY6>K9)ATi2%4B|; zHROt*$09{Kir0knr<;rT_P>d)s?&Z3@dEG&Ov7oRY<%9<`j0m9Sd=|Dj7J>r(V zKo6drzvS92{=*t%z3mHU#IXDl|100x_93$7@KjyAv!-)sGQWUg&Imze-;`V>Rvlv1 zJIpX|H?UboJEvrhC6`^i}XUCro z`%NfJhnj*a%^mu6k3;jV1Dg8|b?CbbF!H!jee$=4DAQJARj+0hqE;Z!`m9Sv8+tYO z=rf*>X7z~Xfg>q&Y--8q?c~)nl&OMPdquO>q<$Bp@M!#x^jZAdL{zvurE)S&DbMR^ V_gc$ZtS(d^^6`roFn$XG_;1I}ye> uint(2)) + (2u * ix); - _80.Store(base * 4 + 12, bic.a); - _80.Store((base + 1u) * 4 + 12, bic.b); -} - -float4 load_path_bbox(uint path_ix) -{ - uint base = (_64.Load(44) >> uint(2)) + (6u * path_ix); - float bbox_l = float(_80.Load(base * 4 + 12)) - 32768.0f; - float bbox_t = float(_80.Load((base + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_80.Load((base + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_80.Load((base + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -void store_clip_el(uint ix, ClipEl el) -{ - uint base = (_64.Load(60) >> uint(2)) + (5u * ix); - _80.Store(base * 4 + 12, el.parent_ix); - _80.Store((base + 1u) * 4 + 12, asuint(el.bbox.x)); - _80.Store((base + 2u) * 4 + 12, asuint(el.bbox.y)); - _80.Store((base + 3u) * 4 + 12, asuint(el.bbox.z)); - _80.Store((base + 4u) * 4 + 12, asuint(el.bbox.w)); -} - -void comp_main() -{ - uint th = gl_LocalInvocationID.x; - uint inp = _80.Load(((_64.Load(52) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 12); - bool is_push = int(inp) >= 0; - Bic _208 = { 1u - uint(is_push), uint(is_push) }; - Bic bic = _208; - sh_bic[gl_LocalInvocationID.x] = bic; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; - Bic param = bic; - Bic param_1 = other; - bic = bic_combine(param, param_1); - } - GroupMemoryBarrierWithGroupSync(); - sh_bic[th] = bic; - } - if (th == 0u) - { - uint param_2 = gl_WorkGroupID.x; - Bic param_3 = bic; - store_bic(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - uint size = sh_bic[0].b; - bic = _268; - if ((th + 1u) < 256u) - { - bic = sh_bic[th + 1u]; - } - bool _284; - if (is_push) - { - _284 = bic.a == 0u; - } - else - { - _284 = is_push; - } - if (_284) - { - uint local_ix = (size - bic.b) - 1u; - sh_parent[local_ix] = th; - sh_path_ix[local_ix] = inp; - } - GroupMemoryBarrierWithGroupSync(); - float4 bbox; - if (th < size) - { - uint path_ix = sh_path_ix[th]; - uint param_4 = path_ix; - bbox = load_path_bbox(param_4); - } - if (th < size) - { - uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); - ClipEl _332 = { parent_ix, bbox }; - ClipEl el = _332; - uint param_5 = gl_GlobalInvocationID.x; - ClipEl param_6 = el; - store_clip_el(param_5, param_6); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/clip_reduce.msl b/piet-gpu/shader/gen/clip_reduce.msl deleted file mode 100644 index dd34e64..0000000 --- a/piet-gpu/shader/gen/clip_reduce.msl +++ /dev/null @@ -1,179 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct Bic -{ - uint a; - uint b; -}; - -struct ClipEl -{ - uint parent_ix; - float4 bbox; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Bic bic_combine(thread const Bic& x, thread const Bic& y) -{ - uint m = min(x.b, y.a); - return Bic{ (x.a + y.a) - m, (x.b + y.b) - m }; -} - -static inline __attribute__((always_inline)) -void store_bic(thread const uint& ix, thread const Bic& bic, const device ConfigBuf& v_64, device Memory& v_80) -{ - uint base = (v_64.conf.clip_bic_alloc.offset >> uint(2)) + (2u * ix); - v_80.memory[base] = bic.a; - v_80.memory[base + 1u] = bic.b; -} - -static inline __attribute__((always_inline)) -float4 load_path_bbox(thread const uint& path_ix, const device ConfigBuf& v_64, device Memory& v_80) -{ - uint base = (v_64.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix); - float bbox_l = float(v_80.memory[base]) - 32768.0; - float bbox_t = float(v_80.memory[base + 1u]) - 32768.0; - float bbox_r = float(v_80.memory[base + 2u]) - 32768.0; - float bbox_b = float(v_80.memory[base + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - return bbox; -} - -static inline __attribute__((always_inline)) -void store_clip_el(thread const uint& ix, thread const ClipEl& el, const device ConfigBuf& v_64, device Memory& v_80) -{ - uint base = (v_64.conf.clip_stack_alloc.offset >> uint(2)) + (5u * ix); - v_80.memory[base] = el.parent_ix; - v_80.memory[base + 1u] = as_type(el.bbox.x); - v_80.memory[base + 2u] = as_type(el.bbox.y); - v_80.memory[base + 3u] = as_type(el.bbox.z); - v_80.memory[base + 4u] = as_type(el.bbox.w); -} - -kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_64 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Bic sh_bic[256]; - threadgroup uint sh_parent[256]; - threadgroup uint sh_path_ix[256]; - threadgroup float4 sh_bbox[256]; - uint th = gl_LocalInvocationID.x; - uint inp = v_80.memory[(v_64.conf.clip_alloc.offset >> uint(2)) + gl_GlobalInvocationID.x]; - bool is_push = int(inp) >= 0; - Bic bic = Bic{ 1u - uint(is_push), uint(is_push) }; - sh_bic[gl_LocalInvocationID.x] = bic; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((th + (1u << i)) < 256u) - { - Bic other = sh_bic[gl_LocalInvocationID.x + (1u << i)]; - Bic param = bic; - Bic param_1 = other; - bic = bic_combine(param, param_1); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_bic[th] = bic; - } - if (th == 0u) - { - uint param_2 = gl_WorkGroupID.x; - Bic param_3 = bic; - store_bic(param_2, param_3, v_64, v_80); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint size = sh_bic[0].b; - bic = Bic{ 0u, 0u }; - if ((th + 1u) < 256u) - { - bic = sh_bic[th + 1u]; - } - bool _284; - if (is_push) - { - _284 = bic.a == 0u; - } - else - { - _284 = is_push; - } - if (_284) - { - uint local_ix = (size - bic.b) - 1u; - sh_parent[local_ix] = th; - sh_path_ix[local_ix] = inp; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - float4 bbox; - if (th < size) - { - uint path_ix = sh_path_ix[th]; - uint param_4 = path_ix; - bbox = load_path_bbox(param_4, v_64, v_80); - } - if (th < size) - { - uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u); - ClipEl el = ClipEl{ parent_ix, bbox }; - uint param_5 = gl_GlobalInvocationID.x; - ClipEl param_6 = el; - store_clip_el(param_5, param_6, v_64, v_80); - } -} - diff --git a/piet-gpu/shader/gen/clip_reduce.spv b/piet-gpu/shader/gen/clip_reduce.spv deleted file mode 100644 index 40121e75edcd0425ae3c04046dc041f913c379f8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9812 zcmbW5cYs_~6~-sq5=aP1C?VXwU?HJnL?^SmlVo6aX4##ks|Z*@ z#je<}3!)$@3MwKZ_Kpo3*cCfc74-Le^Umfn;6J`_xZn9sz2)6`NqgJ;iG{+1!nTD; zg)=4;vS)gs4W>|-T?U|oG+Y{Hx*ntd{Dm4wCKyF7S z;4qCG+NL8WGe&9WPaET%C>>Pq?JM@KTi4sSx;QXATpVp&SS=T;m0GztSQ#k~G%Dwn z^_!IVRcZs(vBA`#6OXiyPHHf@!ph1()`i%ogxHn3z4RlU%w&qVGEf|-5A{=e;b>!I zY@l%f;RlQ@<-esXp3V?LY&$b36fVf|WX20KJh89?W3^ryEDo0%TZ;Yt^$Qk`EnQ(u zM_~rGjILaOzHMO^`e>s*QZCYkZqHE8T07%MNLF_y81F3KuHaSG%JAx{^Xn?8`wW*x z%C$z(5ZKO)&*n`x+B=!~9L8qt0WuWJ)uetir)zNDre!p54}0JrJfxPm<>t&{demW46Ios@3XYh+Lkl#z?6)nj0dQ zXKegPjoT(=^Yk1Kj+D+Hs@Lk3!N|CEeYV4%?eh41nrkpMcwd5XTQleOVH<6f2F?lV z*0r4$c8~aYKh3H#C4YZ6hE8(rY*W+D zC98gJt}|r%Me4~NaphC=_QK8R?sX@7=kLnqYO$|AP^zx2oyX9q)N5;xfyUlmxRtR| z8_s4P=Ad(Pma5Hb9Hz6d zk3L(t=)B&uemV>L2j?xv?QX4QaM@h}p}rj5JtHL356|o|ht5JzQy>4$aBbJZHShS2 z!U<3D?g;<*FIiF@7h;p2Z!hk54aWRIWY49od@fsez}L z)6~s45Al4dpTOuj@XR>ZxHBVPaINPt?n?bUAK_zVeVZZAICz1KW1 z&RN~NOWm`m?j54;I;o$M>#pSv+}v%9^QdVL;+^nuZgUZz+322cxhC(IbRpy9=P`fJ zwOC%6ON{3*;&vvxXK*eu6PssG-SwNt|2=nd#+c7@V=wCFvj_XPzJBI+zUG%V#`#8! zHs`S?<6L9{vVX3-&Z)it-DfsE`hHkrHh;Wh#I2&w|0At=o=LX{&!M%Qcuy=Rp z*MNOOzX9yo^DKUh(V9{3<6wJ}cYf-=V{-O$OOB0+`nQ3-i^Jb{Q2!m7)m@CneUZ_5 zs7D{)0qeLs_U{6<%jy3;MthVuulhYXr~OAcHYW1j5B7Zt{a5PbkG>{=`LAI7HURTG zk43DOd8WX*R*F6CjjlaOHKQ z=l7F{XWD%}k~MUXKZVF2#R|Kg>ODlQWc)PJjVwTJ%g2cO47&C^$!(s`B66#dsP{Rr z??hVf4s`iT^SIB0jlBcuA(M6QMAjoGA=;zn7r@%=)xA~^+n2y{moTqA8NZCk-AJ9g z7{8Kp&c*ni3fEp)kvT~Dn0@NL9+ z?e^e#l#BKHKDc#%egG$LEpz-3G0qt8f*&K^5Bix?T_5qijQ0K*@6AsbeSaJuCfCmx ze~$QQ{{^FV$5_+*z~;CqU&CL5-t;p21Gv2#_zz!X^S3y50>{1d4xIskv^z=8daoS=J{tk8zw3%xn<3A92_rZHbJ#7C3yNBAmW7Na;Fj#+W4>77A&WSYV zBghoQ7;XBPN8UY*c{~2EeRvdG?1MQUL(CC#c^vFq^fSM@KIZqlxUP<|X8!`aX0bm1 z2FuMyjMpZYo88_9R&D8pQq0ez?HyueSdna`qvP{cVTych0!T-GMG| zA8m~4vCds!=dI0qTitbTgPRDR#b}H+dykrqt!wMOqiq@@=lX~v_Y82Fdk1tmbI0EAh;IMd zVx4vZ%SXTJu1C~x)b}ppb_RP7x^J_|Z7uyyQ5e{0ysY(^jZ&}R=s&OXFZb1vAuiN5B6?JM2KJ>lfzw_rZl zIQiiA0?WIuu9?62a#3d?ST6K^!S0K7Jwy8;ep~5hyt+Q&zX+VhAAo*q62BPTZ!!Ih zSJ%gNi+MY?p5e%UAU696eF?hj66bFzSRZ-U_h3dJ*H_y?h@9&yjy#8e(|I0>ZlChu za~N3OzWp{*58JcAa@zbZQa_wEn?~&w;2jv{wJ&FsbI#F=W8^vloX+t`^mL9#q3a{> z9J?8PoTIj*5jp25j=U?u=^R&~yO;9ea|~GC{l0~qtHG`J*0FH%SK!lyPcK-`cRuch zHR$Hj=A3&N<)Y4UU^#Wy=yunP}8SL6` z#~9!K^T5W&`@0v|nDqVK8(ltp_5qtKy?Yj-%TFPjYrii-iiP5i^qG#hr{4WD5chFM#@NRdVDqUT z&)C8*H8E>p5UKf19HAS>ULxJz}2=))uv&2bPQ4MX+4dc3l9Rh2M+BL9T)UJc& zqPFKv&Up7^1aTii9|gNMp*O%A6MYP9j`f`SX^iI~<}glQ?e4ATSetqDJ0FqrT@%N< z=J)F?#AhcY-nE@`Jc)58;(NCXW4w14fqn1dJ-GzjjVwSe&PQu^fuE0PkN4ySIhVdC zFNCvZyeF4}-Fx{sk1qlndnw`^?LjW;Tn3hl`|EPBz1vgxTmhDkeSI-F_SJav%IOy| zF9Dl3`gkc=ZUYjvUIw<-d?eyt4wjEvuK-6ab687Gzw|e)4Sf#cvpW)dvPX`0W1Nk+ zCvzENPp$-~d-6*3Zp8Utm5=lKh%hipb3XPU z7j<3_mWy?I1K8fvJ$WO#e5})(z_Cuon^#W1$opond83cFfaRKd(v|%dycOMA?#VTX zvA%O}L$vvCKV6J(&$+1o4)myRUhB*07xmu>PU~NbF4vEk=Xl0R*R07xk|PM}6~JUrxWM s{~>T%|HJ5VHy|(kZ;VV4iMzPn+*BlPA<@K}H9mGHo)Z+(1L{LV6J09Gr;LZg%9&NON zAQx~aMJ?iyz@znAloTCceMZZr|Mo0NTF%n~InUbR-^xG0Mfg`47z-T^y>Z}Xfg2BQ z^s>q+laGJOvEUiKt#Wh@|9V;FRy}B2QnE048~_oB0R0XWN2V-_M+cyRwgDg)i48xv zQ6Xp?$S6vVS-&G_BPu-h2|MwNU>ZMcKSoXUB9PeA@VljQ)b7Skb{kA~+rcknznw?S*Aojk#0^@zP1Z`G zwXz8zq5u>8)4#yB3W(bg;${;(LL+XSL?U)T!z6M9H0g-|vtiH<=&@)pOXk`~YTY~* zMnWSH^8fzVh|z_wg{Hk?3BdFxFW#_F4q5#H=~O`&^gyAPK^u+13eXz37d|G)3MkN@!B$* z8PV7AVr(pe*R#5hlE~GN4Vjjx?~ASREamGtWad#{O6(a1u2dp7V5`^S<;861Sr#;U z2%|?}2-*hDm>lSvii_7qVI3obSm~64aNIec1Bc>1DZm}vbO|G@ri35*WFJ?`C0B9Z zS6xzU3y3SAq{sEs@Q!$S6$iQpr*VQwcwt&@o)SuNF2=ddAg!LNecRTx>mUbuP^^|X zNm(o`@{$>N02e5P8g0Aup&fxde4{W&Z_{=X_WU{{H*W^%E;~1cwANjtsP-oJ(UP4t zomja|TXilJOpUDEKx(#amv8WV*mh@N1_?o6M?B{c2YOhHp%$m%goSHZx!#R-tsQcS z?#tAaAErF&PyPOjlxeH`2k@A_x9YC0qNh7CEDw4j_%ymqT4rtn1}2e362g45khV9$SUVYpp}Mm-<(;v zCU;|6h9e%K>P!_wJ8s^-+EoGId?ewEkth`2ji?sFnFyRMhu7-i^#*vuAiNPGZsHIN z1jH=}@h@`XHc8Q*n`dr%?WynClNU=%c55%Nto|W7ZE@#V>Wj9%7u~}r@C#k}5nqHg z^Um`xod`M4zc34vSWNwH;hEfD8kXeM=jO#C%iP-ws&A-#DES4S)KF@`1|B*@&)oua zbPtRUztaD3XJB;X1YT+?qJEe8qT$w_Gx2$Ov05~SqURt1lz}9I^rFqLx;ZeFK&UKws)Phr_+8SxVm zaa&A@Oh();!)+}rk#S1cJq~s+w01*0`#hfglQFw7iCrMA!}SL@mp(VwwzO$kyj|f} zRN*FZSTio-iYh{l3-1|YY(%g==>jd|xXL>lBCS%)hPe zq2=OG4KBRA-5(4(p$zZEg*UcSJKTt$%5dAgOSZ~O*tf-YZwKLFj@^%g@Ee-lD}&vT z*8cAj`*EI1(TSNpAV3FdOLIjlXms4fLAeHkDmbv>Z!W(xPyX~X?^8ScExpkmZ^NHAO6T(k{mJfzR?;J`C`#t=#2ah&$WU%cFRnerT zqLQcM8xO9|`{L1f7%fxo?Ey=Au0g80oFpZ^GWkEp58-x3JY;&GYmEwM2*^Mm7q~iLsE;|p$gt#dAe!yZC-VX>$anrV zFVF(HH7{@jZT|^*K_Hmpf14L*|0D8(BuGUv@WAf`GNVQnScJr@&jA+ z9Q-%=0m%j9O{`2NbAcHod5e%mC8rlS!yTL%q{GpO5=wQZ zi&kjT5E7^(&ijQcNZFvSGDSG5|9*%Q|ERb~2z0kSX7O%+szPRnOy!457BO*c&4 zWKxAhVk(BHAYo|S!1|g;Q`xFQU}<9-2gj$WxVgDoH_Tp}k(swSNvS8ZzuLX};6Uob zL*JLnb40~IN~c`U`e;Uo7w?&N=u%I0(Y}hVpsV{T&Mmgl@-qr&=Vumd+>)Q3S&)Q1 zWyk(1?x6GG)M>q|(CnaGj^BNZ?sb{4ZxY%CXHD#QwAQdh+vf+5C2A? z49wsPhh~@m@rSUQx!kQ{qd-VxY!zVK!Mijjk->?=IF|o1{Q{-%mcQ8l!)M*i0xS`C+m4qEyN+g$v!&aQSu2I z+0uNejqQD^k$jXVMbPFLKaG)F#$rJvB}Pg~Hvt$7w8;BJ_d%5cs+}Mjr zmXT9TeyPG%8R4d4`7uh05qgSGRG(v#tne_CXoPqHg$GR-mvIDy(~y@ISH=SGib~@( zyUSzMSpTvacmZKl?g5j4d9tfwe~r;~qAf=RB{$lIPTKTEgj~!XR)m_NzgVI7+C%k5 zZh1QjrP+tZaj41A%CV%`0TN7Ww6!$R(Pxd&WR`bEPnIT2*%z&KN81N2fQXk#uF*{)khu&B`-gsm@z8#L_IO2G5JTTddNWX$7!MrGb3bzRe z5=x~r3C9%prQ~s_A$S~%8p7BifSIJZHv|Hs4CiCQvl(~DyaAdT%kHI(`ij%=mPJ<- z#r0rg3?1l78tEUMq6rAxO$+f7MSKon4#oniE?PV;k~`MeU?h=?4H^tq&S9ml-{&|; zdx3VWM_MT@qeWmsG#IHguxg>)B1!2AWx5_)%QAvTZq!F*D`3yM<-D@uRqA%IR8%kz zEj2klSKitkFnNH<9}nP{^zgmoC7S{UWVkSV2JeQ!S6HE<`LHDEv$14(f)Lw;?-39= zzU3mX$>Xh`C|Ww9o>hoZ4HfHs4M=5mm~sXDBStl~2U^a%Qv7i!8@pxqVtl&TxjTAQJNmxSXAOvtRO?%a?)xObwpMkX#k-XJF6AcSkmT7x1(6H z4FZ|&Xrf^b>gU)ph5~ysb*o&q5skn{AmF;ff_zN19iinRr^bo7v zHh~A>bAy}hWCKFLY3ifRx*u{_DvG?; zT|QI{%?Im}wqHpv1YN$=e2R9dM5fk4_+73?PpLRyS2R0Z7*I{|`&RlL9G9lYzBW1F z@Q{$#`k`jtsC(h(x7ARJk>l~fVG4Q(sRmz6(B!6d@6vs{jj?EySjKWeBTs>QtAkoZ znBmZFE8M9y`ie6N?Y6jCTBEFeRxN>@%z5Ff2$BTYF#nJ=QQMi+kT2VWa1%AfsU8`0 zVDwYKCS#Tg2;TYDm=LqGENW~zIC;{ z>XPHt+_?;Kk{EZV5Qg;H39%a<}R^*rUnA2k}9C+hQtl;(MeN@5pftQqG{c zr5Ft}*S0yES4z%4>wpdX*aNs55yUt{&!RF?E?+7`!!7l2eN3Z=rJ=X0jGAX@C_CI7 zWAZrB8%2tu+Cg}u-00|R^Ejhp5E9U+x(LNSD8B5{9-3vzn`Va5O#F<+~Tx{78>|oQ0RAQ)iI65zqNkbT}sP9}kG_`4g-?trnmj$E$hDamg#{CG@SJtCR%v5hx$U8Zs@Gj0a4kJ;M2 z(!d4I3;92$qs|L+EzYZBdJiQ77rm;FGrLi<#YLgjdi*K{lREKuw%J7?m8D@2G;9YA z?qYv)1JBZsVnq0FCv^vrt`A~4r$}H=dd5+;T#ZAmeobBngaP|Gd;h*E5jLZlbKIa} z=e_$dsujfnG?iZa7oapSi4TLmr+(KSszRCG{v8)3AO98VLldK3y6an&F z{^Lev0M3I)HjuJxZs{K@HX^NVdLmz!Ny5zmqB}^30riyfdkKeH+$5OmzKW@UGUr#o z=_VM*lrKGQWHkdD3`uKw@CLx5W64v?gw6mGVSzx4Ef9ef(wAz1B$~?*d;<=It@VSj z(Dp2@-4^pVxzSe@*C>>VU`?$e= zZm@4c^lM443D#hoHAZKRB6c_~tmj+bj!18fF;*jt(_rET66Ou;o7b@{ zMuVA8ZN^9quyhRGd7Lhl*hzU5DUB>`r2l%4{_`~;v6mwDJDBuYMfQ_s4qR0X)Tsw5 zR0B7a14Y2hj!#;End*Uis)2shKvBn{p+C)(eav-(pK{JVFe>YPj z4AU5WHNtrqWJ{;LO>|Ful1J*-wjx@ninYCTX1Tp3wXIn2U%gD6LHS zkV*)^F-N~!CYW!{l}X0$9^qf1Ub0YY)w4|WYfXsoce5ThwUdcTUB_~3MzRoI53X>7 z0G9E{M+M|&195~${0BsIPY~LyCcLu_*kbyiSz=lM29z0F+imM)WROiE#7%%S%n3MC z{D*k%Y=+28jEyK8CD^{Se`cUj_n1gOEhYR04z&?%DK(!*G-`}!!WS7jzTh-Y?4Kc; zffqI53&1+-(>TN!7m#rV;Xy18W>A1f125*?kPtW17=C(MvRpJ*YC_es?aygn;QiEM zzh04Fb9g4j8_%rO?q$sZS%YvyJUcSnNOAKY7y0*wH{rKPNF<}L!WFor3`1OkmYXre zWVFo35TjhF(UQyJPup!HSqiW}uHy>cKqD;019P{_i9%)|zKOb1OB53Vk9okhfOfTq zZ`;0JA{S?=!Jh`iBT3OLAtC4(a{#P{dHfSmbMf3PCwRo;2z`4@G<7PVKhEwN-rOUCvc8zIj+nB+lShM;(zY8JH=l0b#z@eUDx_=Z=F42I*br``SGtGi5c? z@;yTGJwDWVfVLoO+lSFT&t}SNuzLqddnd9hISg)sht4ZUH3aW4putv9lmPM*8ko=^ z-7ECekp=rv{j7i#bPk>6550GaW|fa>_TI0>1}le(5gOo-MDHS`13_nUG+F8K!NyB+ zxhA}{W`!~lyd@;_IfM*Gu`@Oa3o_{>`YsUG1ztQ|W+)+vbjtG;iQDPnI=Mvk74LJ@ z*afj(3to9FkXFy#6#>ZJy1btpU%;(~%4rg;mn6S>g)+(8b1zZ}F+t+Ob|C!70Hw17 z;Tc*^bs#)p#4=NV;?legFP| z0SlZWWGWLAZL-+XG-fe5agk+fpC4adRc)v!?5arZswk}0tx}SgFY?I&%FSC#{SxUa zR;J7?Nhv*^^1hm$ViPraBq%3SE>{aGyM>h*iz*E{l^mxmc4{?KtNhr8Tj*t(b?%p0 zvr4cl6TMa*^;$`4hbW5lS zS~{eTGKTH}0kH@ZYP7`JH<3%$Gjw-BoMl9G-rX}9l`5A0*c&CBi$cRj@l5>zOMJM# z^o3`%F#Ks!cuP|F&k4deAa)#BZdk&bj4k~Ot|qw>#xe|!5yk`4D;lkGc8u|WE8*p_ zj--fdcc%_Q%e^d8ZQ5quMV z18=}I$?8?Hv@jOD$jkWk+lFwo6N#tbr zx|3VioxD@$dtOPF#rl|LS^TfQVf(r-u}6+lj=Y+6M0yg0R^drYXgz=NeO*4;5DWP< zVtxKn<;5xip%qIE7$gLY61-hFo4|Yd02z3Gx57pFZ~{24IrLSXZWNtF>Z2U7`YSr6 zCEUI!t-n8ucNz`1JQf(+oG1I7z14kK*^CAjcif-Qx3LY6XCY>2puh@!e)eJMSIUbh zG*NZLsx^{w=9gJoGc8HFlUO;)Jn#+moC3}$Fpj*m4_Z+S&Nro4(m z&*QFGp{cviMsdebsCTcV!P#Q#g%78WDNQJJD$b-N3*s#6TjO^pu}>V^XoV*<-M^Dy_f5zz9n1mHyV1k z*9lr&?j`!&YpZh~x61GO-BZ>fEywPyY`E85#~rcCzi+r#S|@Muy)X5Tiq+fUJ3 zlom$(?0L&NuG}wB?$@XEJHjnGYh~UsrQad4G|pD)b3yw2@h!QC&uo9)afB)sWv7-(DBudiNkJq2PzWz$?jSDMJ zV;@+Fa&t_Hl_+2D_d7Dx|7CZ>KvlyE>-46hwl=uUpOd)O| zJ&WH;;r97iDC5P|&ra84$5#4}Vg1MQPsbj3q$F>*ZszW@V{=anuFzi6{a-49$qV(y5 zHH7yIrSFfgd0(_La}O(7wq~X9Gi#{wy}srsbNoTs_(wZmKUjwnZ~54o zww=E}*x7R5S2zzP$_R}jTw@Fu{0iM}mx&)p5s7jq_!j2er;>2nq#t2+TUlhz3~4P> z+XqUqKv1cz9qmzHa#9+lErmC}pLO3Q?V87G)}mL+?=knZEO z3bUK(dtK^#VwKmIR{8aG-x;ffyWRYNvs_pOcua!uXM_*XUa-I$>7aN%ewoia*i3I) zpNk?uI)CWxt%jw&kv2a~4+#Dzy;ZPx4>{=+{be_GrOO(n7J6&6pRm%KI?MmUD&$Ri za8^1mIL=@X=#5MKO+E@%Kmy)c9wAvnbU<&}Fc|5B$zHgRvR6n5wa)Pz`}18NnbLFa zr_Z~e{(RYpGF_tBYo(aHk@R8a1nX7$f;Z_u2gMEA7a3SR^oe!>0b2EL@GU3>tKJFT zfs(W89rCta)(&L@_M*`_4!yK2iBM-%v}JkLbEd9;??G^}cig0_BbgiFmf2fi%U8Iu z{v8c*lE3F?^Wsb}!Cwlu+g2-qssVxjHq>sf0J^Auca_l>7pqlE6jfHepN(XVGq0>T za<%`^)fGFLUk|dA4OuG{hpY#v+9~@V%ue>H&Vyfeu{9N@EKBBcu(pf+8KEGHTM(RG z;3RFrdk5T*(PwO8d}ClIL)q4Mr@Gq*KO=0pkhNub_Lg9Pnwjw7F)&e1$GOU7^gA&z zo&f`6{>M-Yx%=osMQQCLOPW=;HT!qwa6dR;$%SaCb7mKC!^M4(lHi)`5UKgS zci*L!m3xa0oxVM_fpBfjd6?O0Rgsl<;d;8U!S>?T^%s|CHQ|F5m;fji{OxcRynxh+ z+@O&$>|Az?8%O2WJ`11Gm?#tF;s*x)W2MYeo z69peMgAbtKEpz}DE=9o;i%>HXQLvtif)OtORyCmD!3q@o7zJZin>ELwnhVWff3q1V pSlo$%y$_<`S&jglmTA_!7}ea5f+h1&@DDRl@B}^sEYqLYe*p=hKBoWx diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl deleted file mode 100644 index 673e879..0000000 --- a/piet-gpu/shader/gen/coarse.hlsl +++ /dev/null @@ -1,1246 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct BinInstanceRef -{ - uint offset; -}; - -struct BinInstance -{ - uint element_ix; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct Tile -{ - TileSegRef tile; - int backdrop; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _267 : register(u0, space0); -ByteAddressBuffer _891 : register(t1, space0); -ByteAddressBuffer _1390 : register(t2, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -static bool mem_ok; -groupshared uint sh_bitmaps[8][256]; -groupshared Alloc sh_part_elements[256]; -groupshared uint sh_part_count[256]; -groupshared uint sh_elements[256]; -groupshared uint sh_tile_stride[256]; -groupshared uint sh_tile_width[256]; -groupshared uint sh_tile_x0[256]; -groupshared uint sh_tile_y0[256]; -groupshared uint sh_tile_base[256]; -groupshared uint sh_tile_count[256]; - -bool check_deps(uint dep_stage) -{ - uint _273; - _267.InterlockedOr(4, 0u, _273); - return (_273 & dep_stage) == 0u; -} - -Alloc slice_mem(Alloc a, uint offset, uint size) -{ - Alloc _331 = { a.offset + offset }; - return _331; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _267.Load(offset * 4 + 12); - return v; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok_1) -{ - Alloc a; - a.offset = offset; - return a; -} - -BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) -{ - BinInstanceRef _340 = { ref.offset + (index * 4u) }; - return _340; -} - -BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - BinInstance s; - s.element_ix = raw0; - return s; -} - -Path Path_read(Alloc a, PathRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _404 = { raw2 }; - s.tiles = _404; - return s; -} - -void write_tile_alloc(uint el_ix, Alloc a) -{ -} - -Alloc read_tile_alloc(uint el_ix, bool mem_ok_1) -{ - uint param = 0u; - uint param_1 = _891.Load(0); - bool param_2 = mem_ok_1; - return new_alloc(param, param_1, param_2); -} - -Tile Tile_read(Alloc a, TileRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - TileSegRef _429 = { raw0 }; - Tile s; - s.tile = _429; - s.backdrop = int(raw1); - return s; -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _282; - _267.InterlockedAdd(0, size, _282); - uint offset = _282; - if ((offset + size) > mem_size) - { - uint _292; - _267.InterlockedOr(4, stage, _292); - offset = 0u; - } - return offset; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _267.Store(offset * 4 + 12, val); -} - -void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.new_ref; - write_mem(param, param_1, param_2); -} - -void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 11u; - write_mem(param, param_1, param_2); - CmdJumpRef _880 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdJumpRef param_4 = _880; - CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5); -} - -void alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) -{ - if (cmd_ref.offset < cmd_limit) - { - return; - } - uint param = 1024u; - uint param_1 = _891.Load(0); - uint param_2 = 8u; - uint _915 = malloc_stage(param, param_1, param_2); - uint new_cmd = _915; - if (new_cmd == 0u) - { - mem_ok = false; - } - if (mem_ok) - { - CmdJump _926 = { new_cmd }; - CmdJump jump = _926; - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - CmdJump param_5 = jump; - Cmd_Jump_write(param_3, param_4, param_5); - } - uint param_6 = new_cmd; - uint param_7 = 1024u; - bool param_8 = true; - cmd_alloc = new_alloc(param_6, param_7, param_8); - CmdRef _940 = { new_cmd }; - cmd_ref = _940; - cmd_limit = (new_cmd + 1024u) - 144u; -} - -void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5); -} - -void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 1u; - write_mem(param, param_1, param_2); - CmdFillRef _737 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdFillRef param_4 = _737; - CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5); -} - -void Cmd_Solid_write(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 3u; - write_mem(param, param_1, param_2); -} - -void CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.half_width); - write_mem(param_3, param_4, param_5); -} - -void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 2u; - write_mem(param, param_1, param_2); - CmdStrokeRef _755 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdStrokeRef param_4 = _755; - CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5); -} - -void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) -{ - if (linewidth < 0.0f) - { - if (tile.tile.offset != 0u) - { - CmdFill _960 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _960; - if (mem_ok) - { - Alloc param = alloc; - CmdRef param_1 = cmd_ref; - CmdFill param_2 = cmd_fill; - Cmd_Fill_write(param, param_1, param_2); - } - cmd_ref.offset += 12u; - } - else - { - if (mem_ok) - { - Alloc param_3 = alloc; - CmdRef param_4 = cmd_ref; - Cmd_Solid_write(param_3, param_4); - } - cmd_ref.offset += 4u; - } - } - else - { - CmdStroke _996 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _996; - if (mem_ok) - { - Alloc param_5 = alloc; - CmdRef param_6 = cmd_ref; - CmdStroke param_7 = cmd_stroke; - Cmd_Stroke_write(param_5, param_6, param_7); - } - cmd_ref.offset += 12u; - } -} - -void CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2); -} - -void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; - write_mem(param, param_1, param_2); - CmdColorRef _781 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdColorRef param_4 = _781; - CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5); -} - -void CmdLinGrad_write(Alloc a, CmdLinGradRef ref, CmdLinGrad s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.line_x); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.line_y); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.line_c); - write_mem(param_9, param_10, param_11); -} - -void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 6u; - write_mem(param, param_1, param_2); - CmdLinGradRef _799 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdLinGradRef param_4 = _799; - CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5); -} - -void CmdRadGrad_write(Alloc a, CmdRadGradRef ref, CmdRadGrad s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.mat.x); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.mat.y); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.mat.z); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.mat.w); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = asuint(s.xlat.x); - write_mem(param_15, param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = asuint(s.xlat.y); - write_mem(param_18, param_19, param_20); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = asuint(s.c1.x); - write_mem(param_21, param_22, param_23); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = asuint(s.c1.y); - write_mem(param_24, param_25, param_26); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = asuint(s.ra); - write_mem(param_27, param_28, param_29); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = asuint(s.roff); - write_mem(param_30, param_31, param_32); -} - -void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 7u; - write_mem(param, param_1, param_2); - CmdRadGradRef _817 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdRadGradRef param_4 = _817; - CmdRadGrad param_5 = s; - CmdRadGrad_write(param_3, param_4, param_5); -} - -void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5); -} - -void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 8u; - write_mem(param, param_1, param_2); - CmdImageRef _835 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdImageRef param_4 = _835; - CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5); -} - -void Cmd_BeginClip_write(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; - write_mem(param, param_1, param_2); -} - -void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.blend; - write_mem(param, param_1, param_2); -} - -void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; - write_mem(param, param_1, param_2); - CmdEndClipRef _861 = { ref.offset + 4u }; - Alloc param_3 = a; - CmdEndClipRef param_4 = _861; - CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5); -} - -void Cmd_End_write(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 0u; - write_mem(param, param_1, param_2); -} - -void comp_main() -{ - mem_ok = true; - uint param = 7u; - bool _1012 = check_deps(param); - if (!_1012) - { - return; - } - uint width_in_bins = ((_891.Load(12) + 16u) - 1u) / 16u; - uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; - uint partition_ix = 0u; - uint n_partitions = ((_891.Load(4) + 256u) - 1u) / 256u; - uint th_ix = gl_LocalInvocationID.x; - uint bin_tile_x = 16u * gl_WorkGroupID.x; - uint bin_tile_y = 16u * gl_WorkGroupID.y; - uint tile_x = gl_LocalInvocationID.x % 16u; - uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _891.Load(12)) + bin_tile_x) + tile_x; - Alloc _1082; - _1082.offset = _891.Load(28); - Alloc param_1; - param_1.offset = _1082.offset; - uint param_2 = this_tile_ix * 1024u; - uint param_3 = 1024u; - Alloc cmd_alloc = slice_mem(param_1, param_2, param_3); - CmdRef _1091 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1091; - uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; - uint clip_depth = 0u; - uint clip_zero_depth = 0u; - uint rd_ix = 0u; - uint wr_ix = 0u; - uint part_start_ix = 0u; - uint ready_ix = 0u; - Alloc param_4 = cmd_alloc; - uint param_5 = 0u; - uint param_6 = 8u; - Alloc scratch_alloc = slice_mem(param_4, param_5, param_6); - cmd_ref.offset += 4u; - uint render_blend_depth = 0u; - uint max_blend_depth = 0u; - uint drawmonoid_start = _891.Load(48) >> uint(2); - uint drawtag_start = _891.Load(104) >> uint(2); - uint drawdata_start = _891.Load(108) >> uint(2); - uint drawinfo_start = _891.Load(72) >> uint(2); - Alloc param_7; - Alloc param_9; - uint _1322; - uint element_ix; - Alloc param_18; - uint tile_count; - uint _1622; - float linewidth; - CmdLinGrad cmd_lin; - CmdRadGrad cmd_rad; - while (true) - { - for (uint i = 0u; i < 8u; i++) - { - sh_bitmaps[i][th_ix] = 0u; - } - bool _1374; - for (;;) - { - if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) - { - part_start_ix = ready_ix; - uint count = 0u; - bool _1174 = th_ix < 256u; - bool _1182; - if (_1174) - { - _1182 = (partition_ix + th_ix) < n_partitions; - } - else - { - _1182 = _1174; - } - if (_1182) - { - uint in_ix = (_891.Load(24) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1200; - _1200.offset = _891.Load(24); - param_7.offset = _1200.offset; - uint param_8 = in_ix; - count = read_mem(param_7, param_8); - Alloc _1211; - _1211.offset = _891.Load(24); - param_9.offset = _1211.offset; - uint param_10 = in_ix + 1u; - uint offset = read_mem(param_9, param_10); - uint param_11 = offset; - uint param_12 = count * 4u; - bool param_13 = true; - sh_part_elements[th_ix] = new_alloc(param_11, param_12, param_13); - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - if (th_ix < 256u) - { - sh_part_count[th_ix] = count; - } - GroupMemoryBarrierWithGroupSync(); - if (th_ix < 256u) - { - if (th_ix >= (1u << i_1)) - { - count += sh_part_count[th_ix - (1u << i_1)]; - } - } - GroupMemoryBarrierWithGroupSync(); - } - if (th_ix < 256u) - { - sh_part_count[th_ix] = part_start_ix + count; - } - GroupMemoryBarrierWithGroupSync(); - ready_ix = sh_part_count[255]; - partition_ix += 256u; - } - uint ix = rd_ix + th_ix; - if ((ix >= wr_ix) && (ix < ready_ix)) - { - uint part_ix = 0u; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - uint probe = part_ix + (128u >> i_2); - if (ix >= sh_part_count[probe - 1u]) - { - part_ix = probe; - } - } - if (part_ix > 0u) - { - _1322 = sh_part_count[part_ix - 1u]; - } - else - { - _1322 = part_start_ix; - } - ix -= _1322; - Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1341 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1341; - BinInstanceRef param_14 = inst_ref; - uint param_15 = ix; - Alloc param_16 = bin_alloc; - BinInstanceRef param_17 = BinInstance_index(param_14, param_15); - BinInstance inst = BinInstance_read(param_16, param_17); - sh_elements[th_ix] = inst.element_ix; - } - GroupMemoryBarrierWithGroupSync(); - wr_ix = min((rd_ix + 256u), ready_ix); - bool _1364 = (wr_ix - rd_ix) < 256u; - if (_1364) - { - _1374 = (wr_ix < ready_ix) || (partition_ix < n_partitions); - } - else - { - _1374 = _1364; - } - if (_1374) - { - continue; - } - else - { - break; - } - } - uint tag = 0u; - if ((th_ix + rd_ix) < wr_ix) - { - element_ix = sh_elements[th_ix]; - tag = _1390.Load((drawtag_start + element_ix) * 4 + 0); - } - switch (tag) - { - case 68u: - case 72u: - case 276u: - case 732u: - case 5u: - case 37u: - { - uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = _267.Load(drawmonoid_base * 4 + 12); - PathRef _1415 = { _891.Load(20) + (path_ix * 12u) }; - Alloc _1418; - _1418.offset = _891.Load(20); - param_18.offset = _1418.offset; - PathRef param_19 = _1415; - Path path = Path_read(param_18, param_19); - uint stride = path.bbox.z - path.bbox.x; - sh_tile_stride[th_ix] = stride; - int dx = int(path.bbox.x) - int(bin_tile_x); - int dy = int(path.bbox.y) - int(bin_tile_y); - int x0 = clamp(dx, 0, 16); - int y0 = clamp(dy, 0, 16); - int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16); - int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16); - sh_tile_width[th_ix] = uint(x1 - x0); - sh_tile_x0[th_ix] = uint(x0); - sh_tile_y0[th_ix] = uint(y0); - tile_count = uint(x1 - x0) * uint(y1 - y0); - uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); - sh_tile_base[th_ix] = base; - uint param_20 = path.tiles.offset; - uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_22 = true; - Alloc path_alloc = new_alloc(param_20, param_21, param_22); - uint param_23 = th_ix; - Alloc param_24 = path_alloc; - write_tile_alloc(param_23, param_24); - break; - } - default: - { - tile_count = 0u; - break; - } - } - sh_tile_count[th_ix] = tile_count; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - GroupMemoryBarrierWithGroupSync(); - if (th_ix >= (1u << i_3)) - { - tile_count += sh_tile_count[th_ix - (1u << i_3)]; - } - GroupMemoryBarrierWithGroupSync(); - sh_tile_count[th_ix] = tile_count; - } - GroupMemoryBarrierWithGroupSync(); - uint total_tile_count = sh_tile_count[255]; - for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u) - { - uint el_ix = 0u; - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - uint probe_1 = el_ix + (128u >> i_4); - if (ix_1 >= sh_tile_count[probe_1 - 1u]) - { - el_ix = probe_1; - } - } - uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1390.Load((drawtag_start + element_ix_1) * 4 + 0); - if (el_ix > 0u) - { - _1622 = sh_tile_count[el_ix - 1u]; - } - else - { - _1622 = 0u; - } - uint seq_ix = ix_1 - _1622; - uint width = sh_tile_width[el_ix]; - uint x = sh_tile_x0[el_ix] + (seq_ix % width); - uint y = sh_tile_y0[el_ix] + (seq_ix / width); - bool include_tile = false; - uint param_25 = el_ix; - bool param_26 = true; - TileRef _1670 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1670; - Tile tile = Tile_read(param_27, param_28); - bool is_clip = (tag_1 & 1u) != 0u; - bool is_blend = false; - if (is_clip) - { - uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = _267.Load((drawmonoid_base_1 + 2u) * 4 + 12); - uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1390.Load(dd * 4 + 0); - is_blend = blend != 32771u; - } - bool _1706 = tile.tile.offset != 0u; - bool _1715; - if (!_1706) - { - _1715 = (tile.backdrop == 0) == is_clip; - } - else - { - _1715 = _1706; - } - include_tile = _1715 || is_blend; - if (include_tile) - { - uint el_slice = el_ix / 32u; - uint el_mask = 1u << (el_ix & 31u); - uint _1737; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1737); - } - } - GroupMemoryBarrierWithGroupSync(); - uint slice_ix = 0u; - uint bitmap = sh_bitmaps[0][th_ix]; - while (true) - { - if (bitmap == 0u) - { - slice_ix++; - if (slice_ix == 8u) - { - break; - } - bitmap = sh_bitmaps[slice_ix][th_ix]; - if (bitmap == 0u) - { - continue; - } - } - uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); - uint element_ix_2 = sh_elements[element_ref_ix]; - bitmap &= (bitmap - 1u); - uint drawtag = _1390.Load((drawtag_start + element_ix_2) * 4 + 0); - if (clip_zero_depth == 0u) - { - uint param_29 = element_ref_ix; - bool param_30 = true; - TileRef _1812 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Alloc param_31 = read_tile_alloc(param_29, param_30); - TileRef param_32 = _1812; - Tile tile_1 = Tile_read(param_31, param_32); - uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = _267.Load((drawmonoid_base_2 + 2u) * 4 + 12); - uint info_offset = _267.Load((drawmonoid_base_2 + 3u) * 4 + 12); - uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); - uint di = drawinfo_start + (info_offset >> uint(2)); - switch (drawtag) - { - case 68u: - { - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_33 = cmd_alloc; - CmdRef param_34 = cmd_ref; - uint param_35 = cmd_limit; - alloc_cmd(param_33, param_34, param_35); - cmd_alloc = param_33; - cmd_ref = param_34; - cmd_limit = param_35; - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - Tile param_38 = tile_1; - float param_39 = linewidth; - write_fill(param_36, param_37, param_38, param_39); - cmd_ref = param_37; - uint rgba = _1390.Load(dd_1 * 4 + 0); - if (mem_ok) - { - CmdColor _1882 = { rgba }; - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdColor param_42 = _1882; - Cmd_Color_write(param_40, param_41, param_42); - } - cmd_ref.offset += 8u; - break; - } - case 276u: - { - Alloc param_43 = cmd_alloc; - CmdRef param_44 = cmd_ref; - uint param_45 = cmd_limit; - alloc_cmd(param_43, param_44, param_45); - cmd_alloc = param_43; - cmd_ref = param_44; - cmd_limit = param_45; - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - Tile param_48 = tile_1; - float param_49 = linewidth; - write_fill(param_46, param_47, param_48, param_49); - cmd_ref = param_47; - cmd_lin.index = _1390.Load(dd_1 * 4 + 0); - cmd_lin.line_x = asfloat(_267.Load((di + 1u) * 4 + 12)); - cmd_lin.line_y = asfloat(_267.Load((di + 2u) * 4 + 12)); - cmd_lin.line_c = asfloat(_267.Load((di + 3u) * 4 + 12)); - if (mem_ok) - { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdLinGrad param_52 = cmd_lin; - Cmd_LinGrad_write(param_50, param_51, param_52); - } - cmd_ref.offset += 20u; - break; - } - case 732u: - { - Alloc param_53 = cmd_alloc; - CmdRef param_54 = cmd_ref; - uint param_55 = cmd_limit; - alloc_cmd(param_53, param_54, param_55); - cmd_alloc = param_53; - cmd_ref = param_54; - cmd_limit = param_55; - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_56 = cmd_alloc; - CmdRef param_57 = cmd_ref; - Tile param_58 = tile_1; - float param_59 = linewidth; - write_fill(param_56, param_57, param_58, param_59); - cmd_ref = param_57; - cmd_rad.index = _1390.Load(dd_1 * 4 + 0); - cmd_rad.mat = asfloat(uint4(_267.Load((di + 1u) * 4 + 12), _267.Load((di + 2u) * 4 + 12), _267.Load((di + 3u) * 4 + 12), _267.Load((di + 4u) * 4 + 12))); - cmd_rad.xlat = asfloat(uint2(_267.Load((di + 5u) * 4 + 12), _267.Load((di + 6u) * 4 + 12))); - cmd_rad.c1 = asfloat(uint2(_267.Load((di + 7u) * 4 + 12), _267.Load((di + 8u) * 4 + 12))); - cmd_rad.ra = asfloat(_267.Load((di + 9u) * 4 + 12)); - cmd_rad.roff = asfloat(_267.Load((di + 10u) * 4 + 12)); - if (mem_ok) - { - Alloc param_60 = cmd_alloc; - CmdRef param_61 = cmd_ref; - CmdRadGrad param_62 = cmd_rad; - Cmd_RadGrad_write(param_60, param_61, param_62); - } - cmd_ref.offset += 48u; - break; - } - case 72u: - { - Alloc param_63 = cmd_alloc; - CmdRef param_64 = cmd_ref; - uint param_65 = cmd_limit; - alloc_cmd(param_63, param_64, param_65); - cmd_alloc = param_63; - cmd_ref = param_64; - cmd_limit = param_65; - linewidth = asfloat(_267.Load(di * 4 + 12)); - Alloc param_66 = cmd_alloc; - CmdRef param_67 = cmd_ref; - Tile param_68 = tile_1; - float param_69 = linewidth; - write_fill(param_66, param_67, param_68, param_69); - cmd_ref = param_67; - uint index = _1390.Load(dd_1 * 4 + 0); - uint raw1 = _1390.Load((dd_1 + 1u) * 4 + 0); - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - if (mem_ok) - { - CmdImage _2106 = { index, offset_1 }; - Alloc param_70 = cmd_alloc; - CmdRef param_71 = cmd_ref; - CmdImage param_72 = _2106; - Cmd_Image_write(param_70, param_71, param_72); - } - cmd_ref.offset += 12u; - break; - } - case 5u: - { - bool _2120 = tile_1.tile.offset == 0u; - bool _2126; - if (_2120) - { - _2126 = tile_1.backdrop == 0; - } - else - { - _2126 = _2120; - } - if (_2126) - { - clip_zero_depth = clip_depth + 1u; - } - else - { - Alloc param_73 = cmd_alloc; - CmdRef param_74 = cmd_ref; - uint param_75 = cmd_limit; - alloc_cmd(param_73, param_74, param_75); - cmd_alloc = param_73; - cmd_ref = param_74; - cmd_limit = param_75; - if (mem_ok) - { - Alloc param_76 = cmd_alloc; - CmdRef param_77 = cmd_ref; - Cmd_BeginClip_write(param_76, param_77); - } - cmd_ref.offset += 4u; - render_blend_depth++; - max_blend_depth = max(max_blend_depth, render_blend_depth); - } - clip_depth++; - break; - } - case 37u: - { - clip_depth--; - Alloc param_78 = cmd_alloc; - CmdRef param_79 = cmd_ref; - Tile param_80 = tile_1; - float param_81 = -1.0f; - write_fill(param_78, param_79, param_80, param_81); - cmd_ref = param_79; - uint blend_1 = _1390.Load(dd_1 * 4 + 0); - if (mem_ok) - { - CmdEndClip _2182 = { blend_1 }; - Alloc param_82 = cmd_alloc; - CmdRef param_83 = cmd_ref; - CmdEndClip param_84 = _2182; - Cmd_EndClip_write(param_82, param_83, param_84); - } - cmd_ref.offset += 8u; - render_blend_depth--; - break; - } - } - } - else - { - switch (drawtag) - { - case 5u: - { - clip_depth++; - break; - } - case 37u: - { - if (clip_depth == clip_zero_depth) - { - clip_zero_depth = 0u; - } - clip_depth--; - break; - } - } - } - } - GroupMemoryBarrierWithGroupSync(); - rd_ix += 256u; - if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions)) - { - break; - } - } - bool _2231 = (bin_tile_x + tile_x) < _891.Load(12); - bool _2240; - if (_2231) - { - _2240 = (bin_tile_y + tile_y) < _891.Load(16); - } - else - { - _2240 = _2231; - } - if (_2240) - { - if (mem_ok) - { - Alloc param_85 = cmd_alloc; - CmdRef param_86 = cmd_ref; - Cmd_End_write(param_85, param_86); - } - if (max_blend_depth > 4u) - { - uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u; - uint _2264; - _267.InterlockedAdd(8, scratch_size, _2264); - uint scratch = _2264; - Alloc param_87 = scratch_alloc; - uint param_88 = scratch_alloc.offset >> uint(2); - uint param_89 = scratch; - write_mem(param_87, param_88, param_89); - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl deleted file mode 100644 index 5df99b9..0000000 --- a/piet-gpu/shader/gen/coarse.msl +++ /dev/null @@ -1,1261 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -// Implementation of the GLSL findLSB() function -template -inline T spvFindLSB(T x) -{ - return select(ctz(x), T(-1), x == T(0)); -} - -struct Alloc -{ - uint offset; -}; - -struct BinInstanceRef -{ - uint offset; -}; - -struct BinInstance -{ - uint element_ix; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct Tile -{ - TileSegRef tile; - int backdrop; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_267) -{ - uint _273 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, 0u, memory_order_relaxed); - return (_273 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) -{ - return Alloc{ a.offset + offset }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_267) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_267.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const uint& index) -{ - return BinInstanceRef{ ref.offset + (index * 4u) }; -} - -static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_267); - BinInstance s; - s.element_ix = raw0; - return s; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_267); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_267); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_267); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) -{ -} - -static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, const device ConfigBuf& v_891) -{ - uint param = 0u; - uint param_1 = v_891.conf.mem_size; - bool param_2 = mem_ok; - return new_alloc(param, param_1, param_2); -} - -static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_267); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_267); - Tile s; - s.tile = TileSegRef{ raw0 }; - s.backdrop = int(raw1); - return s; -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_267) -{ - uint _282 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.mem_offset, size, memory_order_relaxed); - uint offset = _282; - if ((offset + size) > mem_size) - { - uint _292 = atomic_fetch_or_explicit((device atomic_uint*)&v_267.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_267) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_267.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 11u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; - CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, thread bool& mem_ok, device Memory& v_267, const device ConfigBuf& v_891) -{ - if (cmd_ref.offset < cmd_limit) - { - return; - } - uint param = 1024u; - uint param_1 = v_891.conf.mem_size; - uint param_2 = 8u; - uint _915 = malloc_stage(param, param_1, param_2, v_267); - uint new_cmd = _915; - if (new_cmd == 0u) - { - mem_ok = false; - } - if (mem_ok) - { - CmdJump jump = CmdJump{ new_cmd }; - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - CmdJump param_5 = jump; - Cmd_Jump_write(param_3, param_4, param_5, v_267); - } - uint param_6 = new_cmd; - uint param_7 = 1024u; - bool param_8 = true; - cmd_alloc = new_alloc(param_6, param_7, param_8); - cmd_ref = CmdRef{ new_cmd }; - cmd_limit = (new_cmd + 1024u) - 144u; -} - -static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 1u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; - CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 3u; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 2u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; - CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, thread bool& mem_ok, device Memory& v_267) -{ - if (linewidth < 0.0) - { - if (tile.tile.offset != 0u) - { - CmdFill cmd_fill = CmdFill{ tile.tile.offset, tile.backdrop }; - if (mem_ok) - { - Alloc param = alloc; - CmdRef param_1 = cmd_ref; - CmdFill param_2 = cmd_fill; - Cmd_Fill_write(param, param_1, param_2, v_267); - } - cmd_ref.offset += 12u; - } - else - { - if (mem_ok) - { - Alloc param_3 = alloc; - CmdRef param_4 = cmd_ref; - Cmd_Solid_write(param_3, param_4, v_267); - } - cmd_ref.offset += 4u; - } - } - else - { - CmdStroke cmd_stroke = CmdStroke{ tile.tile.offset, 0.5 * linewidth }; - if (mem_ok) - { - Alloc param_5 = alloc; - CmdRef param_6 = cmd_ref; - CmdStroke param_7 = cmd_stroke; - Cmd_Stroke_write(param_5, param_6, param_7, v_267); - } - cmd_ref.offset += 12u; - } -} - -static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 5u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; - CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_267); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_267); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 6u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; - CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.mat.x); - write_mem(param_3, param_4, param_5, v_267); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.mat.y); - write_mem(param_6, param_7, param_8, v_267); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.mat.z); - write_mem(param_9, param_10, param_11, v_267); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.mat.w); - write_mem(param_12, param_13, param_14, v_267); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = as_type(s.xlat.x); - write_mem(param_15, param_16, param_17, v_267); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = as_type(s.xlat.y); - write_mem(param_18, param_19, param_20, v_267); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = as_type(s.c1.x); - write_mem(param_21, param_22, param_23, v_267); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = as_type(s.c1.y); - write_mem(param_24, param_25, param_26, v_267); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = as_type(s.ra); - write_mem(param_27, param_28, param_29, v_267); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = as_type(s.roff); - write_mem(param_30, param_31, param_32, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 7u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u }; - CmdRadGrad param_5 = s; - CmdRadGrad_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.index; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 8u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; - CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_267) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.blend; - write_mem(param, param_1, param_2, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; - write_mem(param, param_1, param_2, v_267); - Alloc param_3 = a; - CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; - CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5, v_267); -} - -static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_267) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = 0u; - write_mem(param, param_1, param_2, v_267); -} - -kernel void main0(device Memory& v_267 [[buffer(0)]], const device ConfigBuf& v_891 [[buffer(1)]], const device SceneBuf& _1390 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_bitmaps[8][256]; - threadgroup Alloc sh_part_elements[256]; - threadgroup uint sh_part_count[256]; - threadgroup uint sh_elements[256]; - threadgroup uint sh_tile_stride[256]; - threadgroup uint sh_tile_width[256]; - threadgroup uint sh_tile_x0[256]; - threadgroup uint sh_tile_y0[256]; - threadgroup uint sh_tile_base[256]; - threadgroup uint sh_tile_count[256]; - bool mem_ok = true; - uint param = 7u; - bool _1012 = check_deps(param, v_267); - if (!_1012) - { - return; - } - uint width_in_bins = ((v_891.conf.width_in_tiles + 16u) - 1u) / 16u; - uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; - uint partition_ix = 0u; - uint n_partitions = ((v_891.conf.n_elements + 256u) - 1u) / 256u; - uint th_ix = gl_LocalInvocationID.x; - uint bin_tile_x = 16u * gl_WorkGroupID.x; - uint bin_tile_y = 16u * gl_WorkGroupID.y; - uint tile_x = gl_LocalInvocationID.x % 16u; - uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * v_891.conf.width_in_tiles) + bin_tile_x) + tile_x; - Alloc param_1; - param_1.offset = v_891.conf.ptcl_alloc.offset; - uint param_2 = this_tile_ix * 1024u; - uint param_3 = 1024u; - Alloc cmd_alloc = slice_mem(param_1, param_2, param_3); - CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; - uint clip_depth = 0u; - uint clip_zero_depth = 0u; - uint rd_ix = 0u; - uint wr_ix = 0u; - uint part_start_ix = 0u; - uint ready_ix = 0u; - Alloc param_4 = cmd_alloc; - uint param_5 = 0u; - uint param_6 = 8u; - Alloc scratch_alloc = slice_mem(param_4, param_5, param_6); - cmd_ref.offset += 4u; - uint render_blend_depth = 0u; - uint max_blend_depth = 0u; - uint drawmonoid_start = v_891.conf.drawmonoid_alloc.offset >> uint(2); - uint drawtag_start = v_891.conf.drawtag_offset >> uint(2); - uint drawdata_start = v_891.conf.drawdata_offset >> uint(2); - uint drawinfo_start = v_891.conf.drawinfo_alloc.offset >> uint(2); - Alloc param_7; - Alloc param_9; - uint _1322; - uint element_ix; - Alloc param_18; - uint tile_count; - uint _1622; - float linewidth; - CmdLinGrad cmd_lin; - CmdRadGrad cmd_rad; - while (true) - { - for (uint i = 0u; i < 8u; i++) - { - sh_bitmaps[i][th_ix] = 0u; - } - bool _1374; - for (;;) - { - if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) - { - part_start_ix = ready_ix; - uint count = 0u; - bool _1174 = th_ix < 256u; - bool _1182; - if (_1174) - { - _1182 = (partition_ix + th_ix) < n_partitions; - } - else - { - _1182 = _1174; - } - if (_1182) - { - uint in_ix = (v_891.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_7.offset = v_891.conf.bin_alloc.offset; - uint param_8 = in_ix; - count = read_mem(param_7, param_8, v_267); - param_9.offset = v_891.conf.bin_alloc.offset; - uint param_10 = in_ix + 1u; - uint offset = read_mem(param_9, param_10, v_267); - uint param_11 = offset; - uint param_12 = count * 4u; - bool param_13 = true; - sh_part_elements[th_ix] = new_alloc(param_11, param_12, param_13); - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - if (th_ix < 256u) - { - sh_part_count[th_ix] = count; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th_ix < 256u) - { - if (th_ix >= (1u << i_1)) - { - count += sh_part_count[th_ix - (1u << i_1)]; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - } - if (th_ix < 256u) - { - sh_part_count[th_ix] = part_start_ix + count; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - ready_ix = sh_part_count[255]; - partition_ix += 256u; - } - uint ix = rd_ix + th_ix; - if ((ix >= wr_ix) && (ix < ready_ix)) - { - uint part_ix = 0u; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - uint probe = part_ix + (128u >> i_2); - if (ix >= sh_part_count[probe - 1u]) - { - part_ix = probe; - } - } - if (part_ix > 0u) - { - _1322 = sh_part_count[part_ix - 1u]; - } - else - { - _1322 = part_start_ix; - } - ix -= _1322; - Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; - BinInstanceRef param_14 = inst_ref; - uint param_15 = ix; - Alloc param_16 = bin_alloc; - BinInstanceRef param_17 = BinInstance_index(param_14, param_15); - BinInstance inst = BinInstance_read(param_16, param_17, v_267); - sh_elements[th_ix] = inst.element_ix; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - wr_ix = min((rd_ix + 256u), ready_ix); - bool _1364 = (wr_ix - rd_ix) < 256u; - if (_1364) - { - _1374 = (wr_ix < ready_ix) || (partition_ix < n_partitions); - } - else - { - _1374 = _1364; - } - if (_1374) - { - continue; - } - else - { - break; - } - } - uint tag = 0u; - if ((th_ix + rd_ix) < wr_ix) - { - element_ix = sh_elements[th_ix]; - tag = _1390.scene[drawtag_start + element_ix]; - } - switch (tag) - { - case 68u: - case 72u: - case 276u: - case 732u: - case 5u: - case 37u: - { - uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = v_267.memory[drawmonoid_base]; - param_18.offset = v_891.conf.tile_alloc.offset; - PathRef param_19 = PathRef{ v_891.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_18, param_19, v_267); - uint stride = path.bbox.z - path.bbox.x; - sh_tile_stride[th_ix] = stride; - int dx = int(path.bbox.x) - int(bin_tile_x); - int dy = int(path.bbox.y) - int(bin_tile_y); - int x0 = clamp(dx, 0, 16); - int y0 = clamp(dy, 0, 16); - int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16); - int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 16); - sh_tile_width[th_ix] = uint(x1 - x0); - sh_tile_x0[th_ix] = uint(x0); - sh_tile_y0[th_ix] = uint(y0); - tile_count = uint(x1 - x0) * uint(y1 - y0); - uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u); - sh_tile_base[th_ix] = base; - uint param_20 = path.tiles.offset; - uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_22 = true; - Alloc path_alloc = new_alloc(param_20, param_21, param_22); - uint param_23 = th_ix; - Alloc param_24 = path_alloc; - write_tile_alloc(param_23, param_24); - break; - } - default: - { - tile_count = 0u; - break; - } - } - sh_tile_count[th_ix] = tile_count; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th_ix >= (1u << i_3)) - { - tile_count += sh_tile_count[th_ix - (1u << i_3)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_tile_count[th_ix] = tile_count; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint total_tile_count = sh_tile_count[255]; - for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 256u) - { - uint el_ix = 0u; - for (uint i_4 = 0u; i_4 < 8u; i_4++) - { - uint probe_1 = el_ix + (128u >> i_4); - if (ix_1 >= sh_tile_count[probe_1 - 1u]) - { - el_ix = probe_1; - } - } - uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1390.scene[drawtag_start + element_ix_1]; - if (el_ix > 0u) - { - _1622 = sh_tile_count[el_ix - 1u]; - } - else - { - _1622 = 0u; - } - uint seq_ix = ix_1 - _1622; - uint width = sh_tile_width[el_ix]; - uint x = sh_tile_x0[el_ix] + (seq_ix % width); - uint y = sh_tile_y0[el_ix] + (seq_ix / width); - bool include_tile = false; - uint param_25 = el_ix; - bool param_26 = true; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_891); - TileRef param_28 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_27, param_28, v_267); - bool is_clip = (tag_1 & 1u) != 0u; - bool is_blend = false; - if (is_clip) - { - uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = v_267.memory[drawmonoid_base_1 + 2u]; - uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1390.scene[dd]; - is_blend = blend != 32771u; - } - bool _1706 = tile.tile.offset != 0u; - bool _1715; - if (!_1706) - { - _1715 = (tile.backdrop == 0) == is_clip; - } - else - { - _1715 = _1706; - } - include_tile = _1715 || is_blend; - if (include_tile) - { - uint el_slice = el_ix / 32u; - uint el_mask = 1u << (el_ix & 31u); - uint _1737 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint slice_ix = 0u; - uint bitmap = sh_bitmaps[0][th_ix]; - while (true) - { - if (bitmap == 0u) - { - slice_ix++; - if (slice_ix == 8u) - { - break; - } - bitmap = sh_bitmaps[slice_ix][th_ix]; - if (bitmap == 0u) - { - continue; - } - } - uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); - uint element_ix_2 = sh_elements[element_ref_ix]; - bitmap &= (bitmap - 1u); - uint drawtag = _1390.scene[drawtag_start + element_ix_2]; - if (clip_zero_depth == 0u) - { - uint param_29 = element_ref_ix; - bool param_30 = true; - Alloc param_31 = read_tile_alloc(param_29, param_30, v_891); - TileRef param_32 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Tile tile_1 = Tile_read(param_31, param_32, v_267); - uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = v_267.memory[drawmonoid_base_2 + 2u]; - uint info_offset = v_267.memory[drawmonoid_base_2 + 3u]; - uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); - uint di = drawinfo_start + (info_offset >> uint(2)); - switch (drawtag) - { - case 68u: - { - linewidth = as_type(v_267.memory[di]); - Alloc param_33 = cmd_alloc; - CmdRef param_34 = cmd_ref; - uint param_35 = cmd_limit; - alloc_cmd(param_33, param_34, param_35, mem_ok, v_267, v_891); - cmd_alloc = param_33; - cmd_ref = param_34; - cmd_limit = param_35; - Alloc param_36 = cmd_alloc; - CmdRef param_37 = cmd_ref; - Tile param_38 = tile_1; - float param_39 = linewidth; - write_fill(param_36, param_37, param_38, param_39, mem_ok, v_267); - cmd_ref = param_37; - uint rgba = _1390.scene[dd_1]; - if (mem_ok) - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdColor param_42 = CmdColor{ rgba }; - Cmd_Color_write(param_40, param_41, param_42, v_267); - } - cmd_ref.offset += 8u; - break; - } - case 276u: - { - Alloc param_43 = cmd_alloc; - CmdRef param_44 = cmd_ref; - uint param_45 = cmd_limit; - alloc_cmd(param_43, param_44, param_45, mem_ok, v_267, v_891); - cmd_alloc = param_43; - cmd_ref = param_44; - cmd_limit = param_45; - linewidth = as_type(v_267.memory[di]); - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - Tile param_48 = tile_1; - float param_49 = linewidth; - write_fill(param_46, param_47, param_48, param_49, mem_ok, v_267); - cmd_ref = param_47; - cmd_lin.index = _1390.scene[dd_1]; - cmd_lin.line_x = as_type(v_267.memory[di + 1u]); - cmd_lin.line_y = as_type(v_267.memory[di + 2u]); - cmd_lin.line_c = as_type(v_267.memory[di + 3u]); - if (mem_ok) - { - Alloc param_50 = cmd_alloc; - CmdRef param_51 = cmd_ref; - CmdLinGrad param_52 = cmd_lin; - Cmd_LinGrad_write(param_50, param_51, param_52, v_267); - } - cmd_ref.offset += 20u; - break; - } - case 732u: - { - Alloc param_53 = cmd_alloc; - CmdRef param_54 = cmd_ref; - uint param_55 = cmd_limit; - alloc_cmd(param_53, param_54, param_55, mem_ok, v_267, v_891); - cmd_alloc = param_53; - cmd_ref = param_54; - cmd_limit = param_55; - linewidth = as_type(v_267.memory[di]); - Alloc param_56 = cmd_alloc; - CmdRef param_57 = cmd_ref; - Tile param_58 = tile_1; - float param_59 = linewidth; - write_fill(param_56, param_57, param_58, param_59, mem_ok, v_267); - cmd_ref = param_57; - cmd_rad.index = _1390.scene[dd_1]; - cmd_rad.mat = as_type(uint4(v_267.memory[di + 1u], v_267.memory[di + 2u], v_267.memory[di + 3u], v_267.memory[di + 4u])); - cmd_rad.xlat = as_type(uint2(v_267.memory[di + 5u], v_267.memory[di + 6u])); - cmd_rad.c1 = as_type(uint2(v_267.memory[di + 7u], v_267.memory[di + 8u])); - cmd_rad.ra = as_type(v_267.memory[di + 9u]); - cmd_rad.roff = as_type(v_267.memory[di + 10u]); - if (mem_ok) - { - Alloc param_60 = cmd_alloc; - CmdRef param_61 = cmd_ref; - CmdRadGrad param_62 = cmd_rad; - Cmd_RadGrad_write(param_60, param_61, param_62, v_267); - } - cmd_ref.offset += 48u; - break; - } - case 72u: - { - Alloc param_63 = cmd_alloc; - CmdRef param_64 = cmd_ref; - uint param_65 = cmd_limit; - alloc_cmd(param_63, param_64, param_65, mem_ok, v_267, v_891); - cmd_alloc = param_63; - cmd_ref = param_64; - cmd_limit = param_65; - linewidth = as_type(v_267.memory[di]); - Alloc param_66 = cmd_alloc; - CmdRef param_67 = cmd_ref; - Tile param_68 = tile_1; - float param_69 = linewidth; - write_fill(param_66, param_67, param_68, param_69, mem_ok, v_267); - cmd_ref = param_67; - uint index = _1390.scene[dd_1]; - uint raw1 = _1390.scene[dd_1 + 1u]; - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - if (mem_ok) - { - Alloc param_70 = cmd_alloc; - CmdRef param_71 = cmd_ref; - CmdImage param_72 = CmdImage{ index, offset_1 }; - Cmd_Image_write(param_70, param_71, param_72, v_267); - } - cmd_ref.offset += 12u; - break; - } - case 5u: - { - bool _2120 = tile_1.tile.offset == 0u; - bool _2126; - if (_2120) - { - _2126 = tile_1.backdrop == 0; - } - else - { - _2126 = _2120; - } - if (_2126) - { - clip_zero_depth = clip_depth + 1u; - } - else - { - Alloc param_73 = cmd_alloc; - CmdRef param_74 = cmd_ref; - uint param_75 = cmd_limit; - alloc_cmd(param_73, param_74, param_75, mem_ok, v_267, v_891); - cmd_alloc = param_73; - cmd_ref = param_74; - cmd_limit = param_75; - if (mem_ok) - { - Alloc param_76 = cmd_alloc; - CmdRef param_77 = cmd_ref; - Cmd_BeginClip_write(param_76, param_77, v_267); - } - cmd_ref.offset += 4u; - render_blend_depth++; - max_blend_depth = max(max_blend_depth, render_blend_depth); - } - clip_depth++; - break; - } - case 37u: - { - clip_depth--; - Alloc param_78 = cmd_alloc; - CmdRef param_79 = cmd_ref; - Tile param_80 = tile_1; - float param_81 = -1.0; - write_fill(param_78, param_79, param_80, param_81, mem_ok, v_267); - cmd_ref = param_79; - uint blend_1 = _1390.scene[dd_1]; - if (mem_ok) - { - Alloc param_82 = cmd_alloc; - CmdRef param_83 = cmd_ref; - CmdEndClip param_84 = CmdEndClip{ blend_1 }; - Cmd_EndClip_write(param_82, param_83, param_84, v_267); - } - cmd_ref.offset += 8u; - render_blend_depth--; - break; - } - } - } - else - { - switch (drawtag) - { - case 5u: - { - clip_depth++; - break; - } - case 37u: - { - if (clip_depth == clip_zero_depth) - { - clip_zero_depth = 0u; - } - clip_depth--; - break; - } - } - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - rd_ix += 256u; - if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions)) - { - break; - } - } - bool _2231 = (bin_tile_x + tile_x) < v_891.conf.width_in_tiles; - bool _2240; - if (_2231) - { - _2240 = (bin_tile_y + tile_y) < v_891.conf.height_in_tiles; - } - else - { - _2240 = _2231; - } - if (_2240) - { - if (mem_ok) - { - Alloc param_85 = cmd_alloc; - CmdRef param_86 = cmd_ref; - Cmd_End_write(param_85, param_86, v_267); - } - if (max_blend_depth > 4u) - { - uint scratch_size = (((max_blend_depth * 16u) * 16u) * 1u) * 4u; - uint _2264 = atomic_fetch_add_explicit((device atomic_uint*)&v_267.blend_offset, scratch_size, memory_order_relaxed); - uint scratch = _2264; - Alloc param_87 = scratch_alloc; - uint param_88 = scratch_alloc.offset >> uint(2); - uint param_89 = scratch; - write_mem(param_87, param_88, param_89, v_267); - } - } -} - diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv deleted file mode 100644 index 2417cf8ebf1f3d27b35aa71d031235e1cd67490f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 60088 zcmbWA2cR8Q)wYM+o6vjjz4s2%0z^XZy`|njAO%wBB}94`K>_I?p-2Z2kls5;lioxT zL`9{E$p1X&%$vLKeERYI9v%1lzH9BZ*Is+ioH=Jst{uG=owBN?sHUl=s@|WfYQ1Kz zdZAR+v{j#0+keX)wp?NCxS=bow)#ps%usc-{PdZznzHIm+sQF<(C|?jIw7-j%tU|n znH6!?-=l;7^rC$L(#gpEw;a%an{E1U`TYSy4me=I*l~xB=o&C$_^7S{Lx+#)8ZvJ9 zL0$SyRrn1bHDtv2p`}A_Jjxv8(=f({BSwrKLfeNd<-SG_8#cCUT!k_v`B&Abu7d{* z(r>BpE3d6q=s#)Pf_U#(7|A3)g z2aGkCdhco$bQESbXdgTGW-fEvw>4;6u7|GsbXIdAOyTQh6tJwJY}p=q7R0<_&VnYLOSe(;##ou*m_Jf>^V(Ek_~yZJ0dTjsYkyv)x{ICjMFA>W-} zN431MIE=eOk2syxipH6|o_(rS;2RAewb>|&I%-JQ_Fco;`$pYYcMdaDYr+fP0mDZP z?K&hiFAM~kT>Reb`_<_;hQQ{t2DsIa)_t;e&zS6}J}nKo>Kf5CvTM}10mBc`cb00s zp1uRDYliP$@_G;an|zFEK5WstJ{{G%;N-Rec-uka_O%F}J91y$HR!B1g6}wdM5|Ex z$bFSQ8^evGu8-W;;K8GrNw?;8VH~l>()Q-e>Q8IG|9xV0hOG*>=ktB~wb#3&+63I5 zPw#4Tu(|HgwU1M3<)Y@R%y&z;e(L5c_hoLju8ErY4jwdQ|Dj_>AE0i|+tS+if6rMD zQYdoXntsW78*n@4KGpW%jYkgMVceL}`?q&vJ12Et-Ff$}c7jj#1)HauFLNg+X5Fou z|GtAp3>$Fp@S(1bSC~D}CdU}ix&r*iEj8IMQ>nMwWS#jfKa8smG2Lgo(q@jkf!lNJ zsCEap*SvSN7nuH=3?DIqsO`C``Rbl)XEgv``?cmN_tl)I`u|*~*((>a?*HAhtg~q? zSN9xMo#Ak2===r~GxPKPCi5E#&iwXikGt{c5u?XALHC!MukQKvu7<1`IKRb)CH$jTYbUo=k>r%%&x`wANfk5HoWb>6ql|9DQ?d&!tawV0+vxhmYz% zX3)^qa<%58?yH=SeaNx)ZLh1EuU!AEg_`Ttdhh4z5Up3VL%X$(XcW6Q@1~fFF}}MJ z-HA79Q5#&`VfBC8@8ccSL5z{Q91KofypW|Xhqv?Ce$ddKby4?K*5zn8b*X*Zb5rxx zU6;;E&AN;nG_F+>xivXt1OhMbvec}}kd?c+_d{by*g9(OEhhmQ@VwP=wB6%>cg?yd z)u>r*ux1AhTa9Ct*3qh8eSS=2tkmuY;MDF|aJzQBs}tJ!Y&MeHBzKf~tNAMHb~2p2 zTYlQ~Q}fkbx3*^8y8F&ioe59h@1Ai|Y0S4Jw`Mzhm3n@s5jXQY9h~`{0nXkyt3CGj zM-AP0#P9>0VLNwqU)_23uFi&&XYJeeQ}Z==MAs;3pHQe^XaTEg4cen^^*JY-n$)l_Ua0F?epCe zsYh(gLN!w$laA_AVrCAPfis88!Rm_!jo|it=csN6JD2v2#eXw$yC-wZ zTYPKmTZ^%8E5`oCbQOOG0hI3Vqf~{fwcJbeg`XAnD+Uxib@v~dWtclf!ph zx!busQRMPuk;_w)bD6z*q0XgsY5Z?8`8U_p=ZbNkFUI}#{K&-<-A=+;MXzmKS=(*1D~KR>trj<#z=>!WC^ zhQEim$C#@61l;0Zf#S9oS@;`r|<;=@9w*6HE zv3pm4p)bAK-{pYK_0w8w&%1Z^*P4$QK61D>PrdmT<)5_e`48ht{J(wV$)I&*cI1QA z@&#^xxb)KB`d~1ub#KsFeM$V*oLb{`R$tMYXFI3P>K|~+Mbnzc*R&(}{;U3I)|$t+ zXx(+{sHT`AHSesZ25;3ha`YH(g8i%@_to;Rzw3&wNxf^#7(YVbJ!Q+^`^naQ#elr) zweLBM<$Wc6TJhae)7J5=@0{TDkS_y2V9=OBer}L2*Whb5_}V>qN7WbZeYWH50&aib z(OK;Y9y93Rl`ys94(QRZvl`TccT_{+?fuh`#Y zH0*~p_z`XYRpLLPVV~H8cT~s0ljnKhk!>_Ztx54jg@W&hc$p(L_2k)q!foH52d)PXwmmB<*27jl)-)r#qd+?6x z19-;zxQDH?`lP}C)Pr|ae=Yic*~7;9+TdR|cqb=Qc@Fex@F^R7h6bOh!Dnvpxf^`m z2A{77@2D1lrw)ttuys~TH29JYzEXp)(%`E$`1cxo-3DK;!TUA%CJo-d!MASkZ5w>M z2H&m0_h|4v8+>Sk4{Pv!8hmtvAK2hy8vL*ZKcc~pZ1Ce7{KN)7slm@`@Ut8IoF1Id zc<}6v3wzi)tBV`_k_Nw_!Eb8tn|tt%>Q;Ehy0eF^v-(wo-`#_ERQDHsAMIi5tR8Rh zCmQ_u27jT!Uu^JK8~pVKf1|ooX!4ZePZ_iOM?8oYmlZ`I)2H2Ag+zEgwm(%`!`_}&dZu)zm4_`VIkUxV-8;0HGN z*ajch;0ME7A1b&>^7|IKukt?f5VVa)j~X_7pY}bi+?V&Rd1mK5tvYYN)u;jW6EbdU zqOo{(wtHE zTF-&ju3L7Ow)R}+#5VbJR*%m;?VRSq?g%_$pKMKy=B{n*tp8z~2b=T!cm3wAZ9@l* z8&3*h_ly+k_~?hhLFOB?+12EVewuWInCd+^@XU2s1q`>8x*KGegfvwEb# zA8YUz8~o)Sym$2$+%fnR&qLPsXS@%4_;gkuHTWMJ{BJ#YpXw|4qPq=h^IF2eMSt}W%QW+`;Qqt{(#N;IewpNI&{``-0*S3N4LG(`t;~t&uWWdTkAL* zI#0?C3!kd>+Z*C;IeN&T5u1%Vh{ixh*UmQ{_?t)TT1F?e`AUuN5!@dO8P{jWEuYS6 zvGA6?PqmbM-{E81fm*}2Vy%j9O>=QD)|zPLUR|%j*KhE?b&Py()34^;-`n)AHdFKZ z#gA*+&#?Pcn@2mWYs~1{;P_jhj~SX8byi!#4;~ZVr`if`W&Je{|6A9xceS|wvmGl z>0$8qN_(Ie*YsAl?RD)_4XkZmcVaN#u3B$hb83V7ezo4Z=1$f-s&VK(Z+2fC4IaDi zfWgDZjU03UJI-33jW%TT_)+ce&pNA1^{+Ro_Uqy0THn&(zkugztp6Pi`#nAQl+^=p za%e42_q%*Mmq&W|c2*WVen zwQ9q6U)siu9^6&eY7KaKA7K5yr$0X^%I`0n@A|dYUboI_19(}l4I8{)gKyG<_pUaD zGmpuoG0y>L>;p33HwNIIzaQ#djX>LBNY|*Y_S+4+p{<$ISoPMNc;=6vFZatK;9~C$ z9yGQKy|X$J-SU<-new%s=_o?m%Cw}*Lti7v;3cVe#clCIo zwd3}#oqgm;_s64m^%T7QTSr%<9p`D|v@Xc)`h3*t)2=)3mvw!6 zSDzMo*7Ga0`rfpaQwJYX${KdU%Nq7+@Yx%Djvl;EH5dHf)n|V6|5=~j)gtJv_n-O= z!E<#sqXjR_5Ec!8dB~O&fgk2Hw{Gz5;r<3IYq3Ye zzE^_}Xz*bTzHfu?*WlwC{GbLuq`?ns@T1_~i&3}MdY{{{pWomYH2BXNd{Tp7-Qc%0 z_-ze-dxPK6;CI8#cg&#nXOwRm_HP?}3O-Di>px9{PuJixH27Q%K2L+s*We2@_#*K9 z=E!q=2{<49CO?N+4qo2ttkdYfUW0GY;5#<>&JDh6gAZ!(Aq_sh!4GcmLmT|?20sek zeGTiZPH5OqYVcDU{E-HKtOxH?Jqyphb-w4Zt}k`#t$qAPkA9uiTMhnp!TZyHD!wG* z(g&)lO^J|aq>FkY^NQwoNou8^-y^9JadkEwD}6n`Rlh( zp-o+DixryRRq3}>q4_$_P0q#dq|{a`G{28hTdUCg9!hQ9LUW#KeGAR+ zoYeXin%_04ZCYr4$E3Dpp;;rfZ41rsnACPGG{0j~+qKaAZb@y=LbJYV0}E~LS{qtu zet)FjzJ=!ZMrtEkTIb~NhO%b)u1FtW?nyQN+uDPEZ&dC{xqH(6TD0BJr=#VQ@%c-oOR?T?EvH$k8$#n;?vDN0l=6Saehp~2|)vv8} zRy%|BRkyag(DvdmwsEu@-+xn(_=7Q21SVFIo3^fQNJXp@6uP2ezx6KIpe ziD3JvIp#^U*1tVwXY04!`l>s|DYW{Z%;6ZP(q;@}X?KjXXfwu-!1htoc6MPqr?#mX z@5i)>XFu)sxqvqDE(9A-P1{9rN6X(!o{Ap+%Tf6b)-Uq5B=5%oWTj@W2Z8MH`+Zi?T z3ZDtCl>VziItu%CaL?1&cZWNk@rQt|mG1+IcT{0Nwzh9v*iS(FmZBuy$#CzfjQ7LZ zf5VzzU-Qjtesj&OUB;V@WXgPZg!8sP***YX&VN6+Ij8@4xa$%=0q!12eiPxDzj025 zyO&}=9bS&N56Mi6-E+@-aQiI4f4o-~HxGUOo1+=$-*I#B{>-th?_sp0+tS=Wa_`a9 zGXK-yPIwvmT9rR3h}V|P@1Jn5S90xhv#}@8roYbs?Codl1Tc0nM+~fer3+#nRtri{Z}S=ZoQfX7IUU zxUJ6>!}a&MVz~A3xnlTcaGxuN>+f^LlKWgS+}3A`CHGlk$$f@ca-SoX+~odb}=j$`WaO>kU!*J{4GsAFOpBa|iXN4tS zq``eYSlWF?81CmipA&{VzRw85C&Bv`Tz#X0>+dr{b>Gh?(fW)q-1+;AFx>HcMi_4E zGs1A=`HV1p65MBm;rqdTMi{QY&j`aO!F@&;ZtF9`aQ%Ho7(NN^Gs1B5^%-Hf`TC47 zd>q_ogyF{X8DaP&xX%c~_4gTJxc)vP44(w|8DY4s&j`cy_ZeaMB)HEA!_C)cgyH7v zGs5r*aGw!|8_#Ei;gjG#BMjHyXN2MU`;0Jr65MBm;kG^_4A!EpQgY%tu`XM-j8*`R!V?&keWRe28r_CDV4AlByY9+&Fx@L6fqui{#u z_8D0H3;e&J_4N;I>YuA~`1wHm4S~gK8}j+W?|l9NZhfu?`+d%O^yTteeT}Yd0%Oa+ z0jv4Bd}6Kr3#{(vlH+UbTe!M@&cV+fYKc1q_jYO<;_v75J{*2_H%5G>to38-m{Y;+ zr=EUOgYD;cROvSjTs=8Y3%0MixiGa#P3&hL`=$;vz*7gm7fZ|;(X}P!%wRP?lcf%` zfYr-7%nDc6&++_BrI!5X0Q((8ng5({b^ZO!C6E8y;O2biK~qni<^@}Kbz}HBO)W9z z2RCcI0GfJYEC@D+x-tB0ry!2CPrkX<4v7>RIFEz**yRuP=|TEj3sHY=8B{UlD9Q6MrSJe(LdA z8SGr+vkF*0b!+M8UbXnI4pz&!Yk<}KUd^2S9%^ll&nfZd1>T~- ze!h>-Rt4U=#?Hk$YzKFK#@d!vF5aFt{dWNSJzi?JBi!2Qub+1NY(uLpbKeQvocqpb z>Y4j4U^Ty2PrqH^>1SNIe&XF|lh^Jw-jFr*I=>e@`)E&aS@XT&+8?O*(g3iU-`^W| z53pL!qru?B8U)VT4*_dSoS|T~#2E-yOPqbci8BnGIQxRNCC+fLTH-(ea=T&EQAnh0qU)smkc3ahmpedf6GgE)-kxZ?Dk0Jd-VG2q!a ztocORA8;Jap`SMMRZG6+W=z|}I2LTo?91c8YQ9fqzG~%s%}stht?_J~|B19Gab*4{ z)OOp9cQV*>G5i#;=j=6gJx&GdqaL5rz|$4{bnx^Xso@!5ebgt_an1xAOPlvkXVLny z#@c?!p=SQ#jP)b1`8oHkXyxLwX{}@Y&VgI|aI)1vHgr|g!}9wuz5IF{msEVFQHAI`drMRW}f1l zhnItM9$p46&%-O=+H)TM46NpPn4GVKm-CziFXwp`Tp#tkXI>3fx2B164VWdYQlD$V z`l+Yi&%x?GgN)yGaP!jUdR|JamiOWtz~%fL&-daR(Y58h_$IKLuUxlphF{NNO^hwq zFFD@|c06sj(8|~5ob}%OHt;gE_ScrZrk?({gYBQW{1U9@J!$g46aMe=zYASk^8XcB z%~$fj8-52z@|WvpF89znUh=&cY`*$y)8`ko>dE&(uxlIs5cq74tk1(>HQ)Q<^C;Xn z#(RWTE`E$wfAQnAUYGNJ`~=+J=IO7WcIWgY?Nb~NaF~mI<@$SmTte&qw$=7DZLX2e z)HYkMY0uF<&*96yzpm}J$^8YeF>)<<5v-PLy8KxVV>zxkeP04wtMq*ttTq#e=jU%| zeRD%{|z?# z|C1wm{0ppZysc^Fv3(1kjw3m-^p%=(*I%2Ob2?|@r_#Os9Xqi#H( zajB($Z?IZ;AMlp+&wig0u8(?rrUI9JriMGOtlub#bQJ&(`bWEVQ$7__9v3)ppzDJUiGJ$#D*_TCR)onK_JQ4&wBk z3v4do^MH+&dyIL({w_g1b)OGxKW*mkvqH7Rm>*o$Z~?eJIrkR?tF_-(+t*EW5Z09%jDeMzud z&VOxc_Rrc`=k#9+Y<+*uM)w-KG+52`b`HydeK`kh%W|yDVIJbdSsrYh)Nlo`T3N#t z;nq-F`mF?3ckVvJm7A;MI)-D#cNMVLl=#X^U;D&&Rj~2GR|C7|$!~SAntsO9rWXG- z!Oq8Pi}w+0fghk%*T?S;)&{HF-{;M8{rz5M9k5z2>X?}8!qxrWWEFhY1FPBJ=hJe1 zwe7X;nzZ)OmR!FNHXr-+qm_#{p|y|w{oP)F@OrfR zXw%1MVP9j)^>O`j?XhiM+cW2_^e0aC z`POhXuhsF{2JW?RI#$4Yh;8ANzgmJrf9!Y6Uw#I-*ak%fbr|(#>w$w;&&6Brne8+*UqjNo&*4Nj3no&Q9 zqaS=c?WT1*8)M1+dtkQwx799Qx7Njej{OX6-nWgd%|03XQ1BkboDYLL=X^dp9PDQ^ z_0-`Au>G_-#v!z7iFp)QEj2nC>~E6&E?!&wCV;gW<49V0zGIjO&SzMEPiuYC?+4hl zhaU^Je{w!f%;9fOlk@R#`)Tvqy8*R20j!?S%_o7&F^%u%{*%$Q<#Y2XU^QR4{+$Xx zk;6Qlk6b_TX|#^z^>t(VoDS|!tG_mVj-gdI7vDS11gja(&tIAA57D&6@2tYFJP&__ zt}WNAv%&UNcf2!b)e`r|;PSlv30$9CyUzum!=a!3<@zL#^TElt%;Tr%+K!->{+{&$ zuv*5w5Ih(AFZPSzYUZu~#bEoWJMMY3Y8m%ZaK`mMBXKW7*Os`KgVkcc0z6+a?$6+A z8TU%CebgQI5?VF;o7YufbD54StNdy(!?*r_f3)?2_&av{PJ(Ms-)q6fOW&V^(^p&B z_ZqnN%=db5=3DNI8_>07zBhu^GT)oPWu0$^t7X2ofbFB6ylw?2FYRSs*TJyt4b2A5+#0(XqWdlamec#nY};;^6b zZSniK@GGy!e?-@od_Mu(S3Q0I z1kM=cx%_8zZSng{;a6UJKSkG;G5!j+uX<|tH?U)Ci}rW0y7kN$pTo_ixd*;L(`Jo6 zqm_%lq&1HCD_SoU`HuJ>aK9tgUq9{oeMqY zdo8ygKm8tvt4^zysO4;akJ?Nj}?v^-=dd$lug#3m(N7y};VjcRR2)$Jvrr9@`G! za?N&xn|Hor-3hGbKDG9{f>V2KyKwBqVg1C3yBpXzS^M3=YUSGR0e9`SrQe=lb#vXB zR&I?Q*D)L`wwyb$*(T23VEbqM0bu8pcA^B792mKYkODuzL8B;IhwA@N?*+9-pJZ>fsZ>&Lj7E$AI-w zkIzJ~HBTLW0M<`E{f-5zdmWD7ad30g=Jz@qG0)?{>ek`}TIVl5kye}cYyMmQlfc{3 z>Z47c@wDp6>lCo(_M`Rn`&6)+=i9Ytey@5OTs`kUCxg|DcRFp>Oy4u$+CArgNbAe% zt@@c9YMzhc1BhCFW&dwd8d< zIM-us@w)=7%@~)`%47Q(SS@Qd39NP{hyJdUn%BgvfvscuecV-G_eL-B_DI~T(X=J* zHDEROLVmk{EnM9-zmC?IYp(ut4mH~+$A%XdB4@qAx&EWfL`0j{3k$K424OFr(e zo8a!RE9$*@Ggv=$=a#W-^PT@KVDodn`kRMi8t+!HwaGJt+rav(TeoqvYVrRCSk1Zq zlGc}VRll7>&AEzG!#lw%a^$-BE3n#K9FB1Zty=u=0qdW0@;O7v zn=$UDRZENqz{U-K5Ilk-=hQ=BHGh{LpNHW?IkImb0qdvkKJl5uqu`@yjjug@9|N1i zr|c%0>T&)@UvqS?Xm@>5A6xk{wARZuv7P`g&XGKx1gqsY3)cUqpMiUCpZPoscRubhZSi{!tj!ot)5;z1d0NM_^?v&W+7~%|X@9A<+qy4&pZYae zKl_O@#>-%1XTQG!Rx@As*Q;P(#?kg04mIP56X!Ls^Gv;72dm|tQ=3}+-vFy++&95$ zzVA5h+u)3=?JW*9#}zxz%tt+O-vt{t{5^14>-XW-T0d>^`z=^oYAuiL18`ZR-@(;V zqYuH!Rom}5)XY_EuC9+9vd?sI?-kYKGd);6dxnE8HBlY(g4N2HE5Yrj&6xgMZMDQ$8SJ^{^?WU`FQ2)q%lpAsq4in&s0Mi_@-2>-naC;(rh9n5pGD;M7uE{MH3)^V;a&g;@`*X3qY5a=E^K z_q+jkRqCM4e_Jlsb|tnA!F_96U$8u5Yz*#K+cpBr{Z5=IRsFz!qcxuWv}e5UgVi0+ zapf6r6R`f;99N$4`h$(5&2i<4xfwWf*c2>J%q_r~!{%VQHb1Lv1@7Q5=9aW_ZPsiX zu(i^*HEoM?_v-w%h5wV*v5aY7eKO{DwVrv%wXrp-9l-9ztl^H}T)(x&Zzr%eV{A_= z*XI56&S3woulsCQurIIG>br2Lxp%~gw;R|vUZaNNzdKk>yZc~IaB8>*Sgy@|us67G zz1Dky<>obj);L-Bf#Cb`x4(YceeX{`w!YueRD<{*WA-ZEF9)Nk=ejfmtmgGL{f5HR z&m83XInG+N&f9Y<^V$cTc?|>0Gq2&`%xhn;T$}5(KREk)Kd?OW907KJ8_#~)Gu}wB zy16;7JmZZ5>#xmmpdQv zx{L$MwYlC0gU!!=2Z80>~|P#i`VDO5pd5L$1;wHSghaejW!; zt+l7`@nCJK^$B1#$M)|CoCtR8d>)YNll~`x%dt;}tCjD;r@%9|_Vhg!tSw`o23E`1 zr-L2a_b%rx*C+kY0GDH*30EuMr+)~~*xJ+gEU>nW{Ufki#y%VD*#0idvE};M-)q-7 z;9R?Y0`}##OZ~?jYM#Sl`?y!n181+E3zlnhul^L=x8AGggXP-BV!IG*F6MCotvu)O zMc_yAF`oUjdkx9>w%JFQfE}Y3b27Gj@=`SQ?48TNY8P{)-{s)+GY7eT<+W-xc;u zaJBN<^B6p_w5RXmU~S<~fXi4{=P3rj^Jh8N= z@AF`7+g4NJ%lAKE=WV`@qussc-&=eEtliqbNc$3pwa>L$u1{jW3@-C}1+KPZ>%OG@ zJnuK~AHb83_VoQ7SX=n-!DXxu;cD&sw|1TW08cFK z>H85_TjusL*!b$1+aJN|ncF8|=a%;xxju>cCvch1pW$i~in;v-o_w^Y@26mGxjy|B ztmfFhzyA&F*#6yKuLE*@(*N(^a_rCGYW`i{)b(?C#@3#`Ux2k`>@UG;8T%`+WBVIs z$Cm43f8S&O0nU5uKf%6yk5&JgL(TVCv3^ES!Hm|V;?OVV1e2XsEb~LaT z*j&uRzfC32drt@WX?%=lKkdHvWqe!bL{s&~<`}(-_t-vY>Uocy60GLmK~2A@!0BfW za{bEp*wq;z^PCo2=IP(gl4qXNgN|buFdtI7i@m^^Y4YpwfP=9Ke&Tf+WgyLa&5lHE(mrUZT=mx7O&r97lQj9 z>sZFLuRa-b;aaz^W6QO<7K?(HEatx$JnymE;*E)$~c6mEp!opH;wW z>9Z6?RnS!->Zaj02qvHAFXb_=la%jdIOqNykLR$w*9H_ybe&9m37!H#3@`a6c- z6>UqKJoMRyV+RiN5NFM|18+t>Q@icqYMvX`$!7;@j&Dr+8z<|(6FBR?BUqmG-vw;V zjk7aYo^f{r=Nh{!Sf2f`JJ@Ti@$9EPTstGv1zH{k1u+Jmc*JHjXyOm20D^ z_U3TbZjut)=u224b zrCORpU+s=-A8qBnasMRFXmGjj4uGpA4{d7pUyHqM{nw;T|1ov{Jd+y>S1bFg z>F;&wAaK@eJXo&H>(n7&Yi8aDgXP+M&pQm9_q;>Fa{Ugc9mlbD)&8FN2(a&Q#x{n& z`eckFYu&z%CD-Qr#?j#NJ#GR#??u|;cMMpYF^-~@YongISUtuOb3wo5qFj4QTg_V+!^F=MmM7?*(^Gc~*%u2$x(R_1K( zWeu&RJiiyPmHS%{bMZG1&EJ@0{40r7{yX4FaJBqqMVnguud4mSuZH_?qVwD3YvB5* z$LCsb`ER0s4)@Tr(e5;N`lhKg6MCAF+MBCO!(zHSrO!T-%k{ z9tZa&7tg21!1BEBKLNfLANy!?PKj@uIy?#Xy)(b_ehRMUd%Sad2JFi`wLQ(DW}f22 zc@Aux{C41ZxSGF5i}q`{da1ntSNHxeelNn+&B3`n3s&>G?eE)P2K$-R@0~LKD`@(C z#C4I=y7~=RKjV6x(Qd4lXtm`%;8pN8KWT z{>keduw!a_n^vCps&~O&v+Zv`?XFYmYb(!Qvh{Bzuf^g2-->zo@56je%dsY7aLMCd zsT)sUsgaDA+gbCBz!zvK4?YyTH<#mS9S zTYnRpruF|BncV#U9x{j7XtSO((9iYs+MeGV&J0%1`niV5aTc&~^4q6b;rgrRvy8uk zP`97C$&;J=V{Q&#vvVZ3ISM>yf&KpuHD~{SmGb{xGbiz^Tjnqq+&NgM1T3?>W+7{wab6(=i zV-axHBC!`mvj$n4#o+p79@;aHyr(C&ZQ?8rc3!TBIP2kFS9&6wFGU>gJp=PU)FcInwz`(d|9~hoTt9> z%xihD@wM4c?seFCtpIjz_R*H#Sg#1y=Gv@G>&vxKUx`D_9L4$j=xP~rHLzobuMT%! zdEZ+D?mGHDtgpGL=~rGuJQtSX@U!TZ)c^;-*W zO!Kg>Jh^`l>=@dNBTxO-0b4)&XiNRp1#7c@>(lzOe(LLSsF}0a{o=lqt7qH|!H%1n zZ3Iuvw8d{@ur~8^Uh>%bfz=#CoIUNH@$W7BT8<-iUB1TFZiND`2sXc!X>;xEPdxp5 z(Pq3&!Rq0gfsK=E!{%^()cu@g9$SFb%~6~j{o70a-6dbEahT((w5!+H9M|A5ueE5C z*Ve?)r_5^`H1*`QE!e(!Pv0J{wjD=u-2rUBmW^D^XGgfYxr&pkf9J`+@#M?@-`>=7 zof@0>x*V-Frp>aaIh%{{7*Yc>$<-0W|TajJA~4#U9C z%Q@&T*GGTryboBrYaq@VY>cfxhp&DdS%dG_xG(J{9InBpv{{4wh*7S=sDir&2Nqnv zu?;@H;QAj@aQzQ!@FNPY|Ad0;Ke6D)f{&|t>a#!Gd03y+a0FOAYdjLHmNj;cYVjWp zR?8Y609Gs4cnsXyxdzriE_VL0=GmGu}bqa=e4#YBSY7?x{n;8P6KZ zjb;7x&v=J}9Zw%^`nU$_8ShANIo{E5wWDeu$2$g`@mvGBv8=!T8Se*R$J0lfKCYp9 z#ycMDoO922B3SJNj`*Ae)+cLp3RvxAj`*Akb}or?8d$B2b9(KQIA_4sjFaC0oC!7u z$1sLI?w=php2gvwa{o9Nx&F!FM_}id-*lf1Rtdk+2>tljfd zob%J~5B<%zudO(8er{c3V{gOZ`MDi!&d>9RQJ$X{7Tojal7j1ZS%Y6uaQ!D0T>q;J z{u%t*n&*5wA8uaOD>eElSUqcS0a$H!4r^l0YVp4atd{fbVz63yzFi8p7WQ{ea(UaJBM$y9%E1tcl!M)GJ4!~WTGa((RYzPh!xyXVB& zbKZOJ!QpEcj_kQzYrF&PZZ+PSc6Sc<-=4JDf437a`_KJzcfr-~ZSeaFuK(i&*Z;`| zf2!u$Z@+|_leJ2H?f|Q2zugH|%YHLQwfNr!R?B|-6 zY+cHH9zavq-}%b(y}^Uv#ql?$v9u?zhrnfC55v{mmyY*2{T=~3o@;GCpQk?xR(F4R zU-1}NP5U(PC%~DD^_H8PwRQ}#b+%vfd>U+?`s=5?{OsZ0>EiG;h$DMvaE>YDdi~p~|YS}w4fYr*q^Ag-TSXbvH7ds#Qlh1F!)}_qnRWx<|ov%D=@*3DRF{ZJ! zC$HDRWnOQ<)v|ZIPQ1zCc-cGt#^9}5&)#_(u9m&?E;w_s-g0xZ){Y^z&h|^5zXhA8 z{`zTe?w!%teeKVYy)&Z5`_hiA@qV|`&tT_S=JOXcb^Xm> zo*I7&w#LRZmiA_i55?|lJV$DLP>sjX9$e#bw1;q52KSo!KW;^{xcL@|Ct(m=7Q@#cfs|ax54MDd20MO@^j8*jsK3Oo;Cjrtd=#m z25MRJ&%tV0^Dn?^<(hv5x6ZDcHIs|2m;TA;pJ3}==JO4jy8hNvo;Cj$*flq%v9u?z zZ^30=EM=vZ^WXQ5DbO4*=fCeAz0lNi{&#@Ya{l)QXD+U%+}vDC#}K=Y_Dh~qfz4C@ z)|%5M&#A#>p3}h9jv`mQ-A%mC(l{IWuCLc)!M&bZs$1%+&ot;@|+V*J$cRrR!g4qfRm^DS8i_ZUB?i+ zZ|#>n=Lego{`zTep6kb9_jL?M&h?2kK7#fKHJ(6wEQja%@w7SD7a(TNRnM)(3a-9H zgD+Wd{Z}fu{;M?jss-2odj;2j-3DK;;QIF~xc-|oc>jXyzjeX&-?qWGE4co<6p!&M`VVXHeG0Ds=z{BiV1th-xc-L~T>m2){K$gqe|*99Ke542D!Be<6zcGY-0{42CSR{h zE1+x3`Mx4pE$92n;EDK|ljn$B?D^q1V$TixCD+x!=BmGb+LP<*;4;@W;A-vbXFJz5 z;mOrG*ws&oQ<>u$P={RD~OZz3)^}*(jvO5*S>JI z_BFJf>xS^;>Ku~mM(Em->&9TU-3qLhT(<#FtaJ6cBsV{=Nsc4-dSt)kx;@xj z_18~(a@_%3=DHJHZAXsex--~ZJr~RUx(m9t=GVHBoMUUJo5d?6uH- z$#pNVx$3W<_T;)ZxXg7RTx|eHavcOtuH}9mjIJ%Y4gsqr*DmnHI@h6Kx%myFbsX_N zwDwD``-06?fBm#4*WutY*ZtvY`*9@K5nyxmJ-@8$NOWzqOI{N|eHJ^Ng^IrXZq^L|%# z9-4aYA!-cBXV1s(>&G0~vp=cvk7&=W@j0~TakyuHN}D}2JGduPGbf7Rf3 z*F0-}8{9FiV0%JT<-#Y>ka+EbYx2UxD4%B^;^or8T~g_OcpZOnW(pHU1fGYJ5L2%Njpg zaP`L<{E3=pP7lB@!>^pvgJ|lh@k3y>)YviAQsalgYN_!fV70QwkHMXnbuve}*!=WQ zK2L(3XPM7aXzKc#zdSX58f=Y?X)Nu{8efau*Hs*;@zph+M0*W~b^SSQuCLD%Cv~-b zp}}9QdGdY+Zk#gjXVKJC$LGLm)-mVqui@t6+#FLbc0B#Xj%6(~=a=BlS%3YsH|Km4 zc3;-8Gkc~}>7l#9(zf3dlFolU*ogj+BD_0yg_-U2uCcpFVU`}rNP znx8LxF1Q!5-i51Y9p48hFKZz;7we#}ego+DJFxZEW=-VD^Y`E`Z1%UG_U5|Yj@{QS z99h>}YwWt-R^y#%f5G9p{*pH9`Y|!fb^WBl|5R|-_3s7u8RO>#cZ@G;o_c-=H-~av z|A3~Rb^QpeR<7$G;m*Uln4?^5e)@~e&Ci{w*Pr38tN!|FPab~(H}m)uO+D-SSFl>v zb$ep{4X&Pb{S2JEtcBcMtb@M#?L@yX!PZ-wHIXOJufTg?v%mecH`nzp?7qrt)E#iw z*mb>=!?pYsZPxM|;*@LoZG+RbWlv51f!#Re+I)?sp0)WWSgl-}f5FYgxjCj>?0EW% z{qJnx%y|m9Yoot@+M9E}AKBO49NEkF)Yv)STjTv`@8fX(571`*y@^@Qf64}*s^ISB z84B)mtC(!bQ zSbOrA2HebJS~T_S<>|m`*~|M7YkIhP_VSG2pCYf%XOW*!RINs>$*_EeO9(e z!5w4Knx~$#!Ofvu*V)n3v#xW1)yj393+_Cui#f{0=BK~d+y)fuIxpOH)n7mD$zwin zGmrVv)U&P&fYq|D2NG*RxO&!gVQ})Y7IJg34*KdhmVS$Yt+zI7BG0-m4n7o{{q3i{ zxvo!R_w_hO*7b=RyRJ{xcmnNH9Iop#v{~1siCM1evJJjm!Clu?3hr}})e1fVzIx45 z&n4jIP_FBeXzE$lrNCUT8#)&N^?ZPrAdbzKvD95(ygPkVD+U%>9` zS&pphb2WBdpRe&Lw7=$XU0hJT^pA&$@1;KS$Q} z1Y&IrSI@eBADq0bh1^`MgTDHmOutRR)?1r3k!M{u1D}b_{`S+}T-R5y`zr4#UV^*E zuItMjuH|oNvzA*Cr(DZz8hqQDrzV@jjZ?187HH~On=Qd=<=SiwHy7vTm~ye>=`VJy z3B}rM2X}4s*H3$M&TnG(^(sg9@@qAA&ac<_9NITHoc~+2ng342Ea$&VgYQ~!_wwEa z{}Fs(!5w2z%~P-K;pR}*YX>y-)N4nuT3N51;m*Uln4?^5e)@~e?bM=PyTPrO{`zT8 z9=n5^dF+9vp1r&$SS@?`EMn~iSI=G^08U=kLT)YtY4z3bZ2AobTW@XFM4r7o1bjX= z``b@@b6wxZ?(1!itm`{9c3t1C@x`?7ak#F(rOmqTOU!az_iOO|3+}oeSa83G8C!72 z7+3Sub12*#%608RQ_s2%1FMzmIvoBE{j7^Q%EjiVzu4UN;YhtkfL&Mp_0yg_MuMAp zj6ze-x{d~`WnC{I)&X$!tm_zX^0F3kbFmKk>URYFU zhuD36z~Q>OM!&1E>+pLH*YXdvW#4JwpK$p4h$DSJuJP2gf8?<5pJ?3&#y)}^Y{QQ% zxP2zn-1+G1_bz^qJ3Z}D@WVM=<8qykM$={=f5F zyTAKYFMs#@H++2knIn7uFEyS-`)Q4x*IzlD^WSMR$5V+{&iV8PKcm6VD!6<7f`Ypy z7Z=>|FDu6g$Gad78U*7$fdb?e#(+X-OjntxY*B3vK!FR1m& zw7&eeYT8cXP_s5-_mWu6wXr6rab&H}1pm9W{vo^T55Bj$h8^s3b3)X zxo70LcKi(dK0d~?pLXY$Ty2xZ+z3{`fx{TBJq#|dZM=oI{^kq~Yu0_v ze_3PCxvy&c8SOu6{5kE{9IoXzw4VP1Y40L_&VSF}hYD`2M+&b0V+Gg$#e(bqa=~|o zzg2MkKP-fverj;hw#X)|Y!$+pQdLayU=1=dAPl z1=w16-6*dex1(#z{}0j?gaZXzP39!p5rjSIPrf4{&(^3M%U)`%jbRf)PDKx z<`%@h7fs#2xi$cPAJ~5C?gQ6I&6-$)`@uP99|Zd{KlKMV)XYz8AJ4;wTUtAwYb@91 zzI_zzo^dSKTdvJq9tS(VHrHOB^Y97qP52nke%d_`Grn!E+fTJ@J!}6ontINOXTWMt za-`q0U~}t5?aV>0pX0bT&eQcsZJr0GHZOpESsV3VbEw%zoZ7t9(%SLe&+^pf6>w_f z-j@4shCB~{1HOyaKHAJF@oh7QS6jB8bAJs@J#&8@tmb`J`n>@!=PuW;oVzuUdmcD1 z_kQ-|+u(nve?e2vx_k;&EBEAI;pMu>^()uG^~iPm{{hD@ithjb diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil deleted file mode 100644 index 97b006a53b013923478e71d0b0a34418735ab44f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6768 zcmeG=3sh6rmM6J4HdfvW3#cz@e$_ShF%J1ed007!K0I=a24`&9P`EZuP=>?f5 zZ~*LZu1r}bh0Bw%Y~|802Trfg{uQoTnImn5bmm2VOH_o(DqI`f#}W-S_)f@Q%?)3JG)N9pviWz1HdagqiR z3XEWd(=^DikS`-oyP&PDIgG)Xo1uHGJHt zZnB76Gt6e^>YfY%+3Cd)2UvmBB;fjkK(Q$pnkj0~z^xX8ekX1o%}n!kK+Z0hMdz5#Cp9@yRZ+W4_7m>tm2%=81q`@x8S z#wev-MrlSwSzY6=*po`S1;YBHS;_LGU{Dg5xz8`H@dx*-ZQE={U%!BLEwy@+3aDb1 z0uIy!ZlaA;O}V-zx}>J61c5N`LlwWw#@`H|z-6Zw|~pdkb)4-zK}=zI&@WG>6@Gj(=P~6 zHnyQ@zC82 z3T+~yfhVTF5sh9kYV7f@Wtw&C-Kt%cy+ zPTH@Y>aw5gnY!IS5rmV;!5PaJhJLgC>#|Vu*V}iLz6nFmuYN3fduZaKLlB&L`mx~M z(hnSRMe#jQaHN0SWID68V@K2Ov%}KDEzT|b_Y+{w_Q;PGAMbEtw(U&aG(+(c9uGgU z=-5Zne|2Ko!kXrHf4{F~%dYuQ)_eQ$$QW97vWyx6yb&4ibCP$F;$4;Lkth?gxDi>> zio~@e@qI{w9Z78MI#v9|M14xmL`qB`e#kP`Vc&h`?7>euGM!98MU@LHG^G`5H?Gt- z^CiC@@LDa-+nfKXXP)Fz1DK~Y-tt=&{awz}F*(sOfqfx6$2x2`J9o1>IzAcRIt)#m z&2&1k1lm};zIXmit%Qvmmhu>BhZ-qa|xs|diC?+Owlz!*X^^yhG2%Pll8S^R@ zF%nG5*y?Vxf@D`!vNttu;7$OSuLb=3BtL?y@b2oIJJjS=EM?7z89WD(gg#}08A%wd zG?#1HTX1$&YK>OQ-mFC`Dr>Yvjbu>BzhmK#NnO@UU2aVBCsq6iLC@T)zFv0Uxm_CX zV(GTH6NWgOa`90l{)8dE8i^a66ji9%TT0k5B{du2ldRENYP4#0MKHT!ngz|iO4huH z*K82iY;3HlYSe|^o#fvg<^P%D-;p8(eNA5DO@z;xHVetf6yrsk2DhYndgL$bV2 z#$3Dm>OL-tJ>SN`4b&@d{xo!YO8UAPW4>fl?+$jkw*Qsy;xaL&;e%YKYWZl)(l>4u zRy8K3Ai9W>wzP(tZ_a-7{X0R3PX5+mtc#f3-}1+AkHlyAwCE^FWNu)8_jePQeq6pw zM_GSUjGxs$9J;}liF}iaxaVi!>?}dnm%vbW`B+qT_szIE9~FFk-x*J;hLi=VG-Xi| zDi!+LxXv2R5Ut`eRP=ps*&2Cd{>g8rQ^r+wvD1BJmvl4er$Qk`@Dn#)Dz@<6`X>IT za;R%E*vXyB1W4R7$%Vwk&wy~^sgKG~6d`ypDR899SEh;{gW!vAIc?r^xz|FCyA{C)TCZ9tGpi70{zKmPt6 zdw8@o&a?3y-rx`h8~AKZ!R7y~fzyp0_HWVO?BC?S->d2O#ecU~!&Z4zF6MFyH+f<` zL=f(kqSk{XPgg~rNdvpE9;ABK%k%J;X5b7@*QI%Q$xL^SA4WR|g7A?)Xnfz>N?^w3 zV6{?JMg%y6Ae<#{UJ!b=Oh1|DhLZaT@R?EPY2=|zWmk;M0dnB||6226w)|(!tL;y) z@ilY|pSdajWm?))F zlsYC#YHG+26qU%9otMe61_i_Y1)clqCRfp2g~mPm9@em4CTBOiBHNmsk}Rr_9dTd5 z8;)SAmmVzcafo#KEh6wbd5=K?i8o|A8GLRD;+SsT%_$BRIc8 zo}294AtnH>*Clhx$eCuBO)6MaR*otq>Oq{5;TgXk}tSyVlYWUoO^=eXaq;QN)&(_gMKW) zo&s5hMJ2++*5__BbEKTrEabUZ6$?#HZKHg7{bPBf7Hk46(54zTnLJ_-FqWitW2lby z9p;6=knmGOrk8+`XX=!Y8bncosqG}*$Y=}{V<8|T)lgt%j#ftoLqsYVv5H$kpKOLG zbMfWX3K0azrLyVHr#3g;@ogV71`>umqaLpn7O=XPjVjypV*yk0+)l0QBk5sv?6%hl z6Tsl76jPi8T1;4NrPYMjHmdi~c2OqJsN+zCO$`yzW5gFCVNxJKk@3Lm3~wV3UnquA z7P!$-+Kdbse0Tvcr#XpX79RlY-?*m)X0aHaJHyrOTf(KA*JgE9O9x0Vd9Qw1K5HD7 z;mp~0qN#R;z_rDRp{ia?&7Mg_m}`f?(gV-rypAs|fWKF9Y)@t*x(Izb4A#%V9}x57 zn%eC)@1C?aHy?J*GnbsLm=~KBsP;RRfj#S)Zts$$I zAEkMNuAzuKXM6CCUA*kzh0W$?qg73mDLnClY^$Gt|8x;#?s&|Mo?$Xf<`vIe&?<>J zFvm8j&go@_p2ZKA&*@~c8-jTn9g@~~s*PdP?g0#$fE)U=)@7L5 z@trsmC+A#Lrr}ooeH-VxL*7sLD0hc7k@kr{bhuhRo|x}X(XLvRi4HsN!*zE2L1CmX zNIT;)8}j?fj||bwpK04G8xoQ(L^LEQE)*b9_B zWn!_{RG6>noJq%kFU`<(enFkH;L8es9cu&eZk~RfRUr)6Xfo!JE81g2c&#z5qttO= zU$Hg3gHw*CaOc1rfLYZsr+Pmr^C$n@ zT`<+bSmtc14=1j36FOZkf~kH}^#EO)IS+YgUx7^>ImjI^%#ie*Mwy90g2}Kk?r33` zggV_2ZK+#OM)<6`C`&wbRQH-$cD=9@_{^`L&XrN8jq=1G943aW`!QAK6UThm=nE~R zCI-o7h&z@E-i;s^#K_!dH)mdzyETKkN|@jKTz+!qU2tr4G+W(crE5w0;s*B!Cv`IE zo~v_@?qJ&M4LQ2myGF5cYylQ`c}`Js&gG&#qai!(yWf|sa%^eCUiV0+lC^M4wUyA* zOu-N2m#)p!uc1@r-;f}`kfvqL=4fa__2%fa%>ZRk4Z-ZVmrZ z^2B(R8vaxm1Lj%I@bT;di5}v~us6Ba z3%w*grw@fD7PIaj`Ktb$Nlv%v$O!+1tSmtgdN0h+TE{ZImLVH2?Dq9hcCiw_<5M0l z3=Td=hD!@81ct=@Kb}l#~rYwkA~|9+RX#41HYwm zHat-}*KY1WrT!Fmkm7y}mF6yRDCu~E)^s95KiFp;Y#*qiaz>sg+561L(MW$A_pFV3 z;%%&SL7+ovC+}=qx;jFC)^0xAH?WS%DO}_T?}FX@F&gVX${iZzesUQtP4IIlHSvb1 zbZdlu=(c&tK9EJ_WIyqA@wWK{8t*^JJvYfcna?WCobOON!aHY6AC1tTn>L@jJ&;G` z^gdC#G;RI_Rr`}XJIVXwpld#|pmuLYalzhYYl9l5BmZYGmS;dqH;*wyc7`H*SkXTB znR-s8@t-^g{nb0dU5> uint(4)) & 60u }; - return _76; -} - -DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -DrawMonoid draw_monoid_identity() -{ - return _23; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _93.Load(104) >> uint(2); - uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0); - uint param = tag_word; - DrawMonoid agg = map_tag(param); - DrawMonoid local[8]; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - tag_word = _103.Load(((drawtag_base + ix) + i) * 4 + 0); - uint param_1 = tag_word; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_4 = other; - DrawMonoid param_5 = agg; - agg = combine_draw_monoid(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - DrawMonoid row = draw_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - DrawMonoid _209; - _209.path_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 0); - _209.clip_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 4); - _209.scene_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 8); - _209.info_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 12); - row.path_ix = _209.path_ix; - row.clip_ix = _209.clip_ix; - row.scene_offset = _209.scene_offset; - row.info_offset = _209.info_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - DrawMonoid param_6 = row; - DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_draw_monoid(param_6, param_7); - } - uint drawdata_base = _93.Load(108) >> uint(2); - uint drawinfo_base = _93.Load(72) >> uint(2); - uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_93.Load(48) >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _93.Load(52) >> uint(2); - float4 mat; - float2 translate; - float2 p0; - float2 p1; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid m = row; - if (i_2 > 0u) - { - DrawMonoid param_8 = m; - DrawMonoid param_9 = local[i_2 - 1u]; - m = combine_draw_monoid(param_8, param_9); - } - _285.Store((out_base + (i_2 * 4u)) * 4 + 12, m.path_ix); - _285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 12, m.clip_ix); - _285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 12, m.scene_offset); - _285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 12, m.info_offset); - uint dd = drawdata_base + (m.scene_offset >> uint(2)); - uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0); - if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) - { - uint bbox_offset = (_93.Load(44) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_285.Load(bbox_offset * 4 + 12)) - 32768.0f; - float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 12)) - 32768.0f; - float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 12)) - 32768.0f; - float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 12)) - 32768.0f; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 12)); - uint fill_mode = uint(linewidth >= 0.0f); - if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u)) - { - uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 12); - uint t = (_93.Load(40) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_285.Load(t * 4 + 12), _285.Load((t + 1u) * 4 + 12), _285.Load((t + 2u) * 4 + 12), _285.Load((t + 3u) * 4 + 12))); - if ((tag_word == 276u) || (tag_word == 732u)) - { - translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 12), _285.Load((t + 5u) * 4 + 12))); - } - } - if (linewidth >= 0.0f) - { - linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); - } - switch (tag_word) - { - case 68u: - case 72u: - { - _285.Store(di * 4 + 12, asuint(linewidth)); - break; - } - case 276u: - { - _285.Store(di * 4 + 12, asuint(linewidth)); - p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); - p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); - p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; - p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; - float2 dxy = p1 - p0; - float scale = 1.0f / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); - float line_x = dxy.x * scale; - float line_y = dxy.y * scale; - float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _285.Store((di + 1u) * 4 + 12, asuint(line_x)); - _285.Store((di + 2u) * 4 + 12, asuint(line_y)); - _285.Store((di + 3u) * 4 + 12, asuint(line_c)); - break; - } - case 732u: - { - p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); - p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); - float r0 = asfloat(_103.Load((dd + 5u) * 4 + 0)); - float r1 = asfloat(_103.Load((dd + 6u) * 4 + 0)); - float inv_det = 1.0f / ((mat.x * mat.w) - (mat.y * mat.z)); - float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; - float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); - inv_tr += p0; - float2 center1 = p1 - p0; - float rr = r1 / (r1 - r0); - float rainv = rr / ((r1 * r1) - dot(center1, center1)); - float2 c1 = center1 * rainv; - float ra = rr * rainv; - float roff = rr - 1.0f; - _285.Store(di * 4 + 12, asuint(linewidth)); - _285.Store((di + 1u) * 4 + 12, asuint(inv_mat.x)); - _285.Store((di + 2u) * 4 + 12, asuint(inv_mat.y)); - _285.Store((di + 3u) * 4 + 12, asuint(inv_mat.z)); - _285.Store((di + 4u) * 4 + 12, asuint(inv_mat.w)); - _285.Store((di + 5u) * 4 + 12, asuint(inv_tr.x)); - _285.Store((di + 6u) * 4 + 12, asuint(inv_tr.y)); - _285.Store((di + 7u) * 4 + 12, asuint(c1.x)); - _285.Store((di + 8u) * 4 + 12, asuint(c1.y)); - _285.Store((di + 9u) * 4 + 12, asuint(ra)); - _285.Store((di + 10u) * 4 + 12, asuint(roff)); - break; - } - case 5u: - { - break; - } - } - } - if ((tag_word == 5u) || (tag_word == 37u)) - { - uint path_ix = ~(out_ix + i_2); - if (tag_word == 5u) - { - path_ix = m.path_ix; - } - _285.Store((clip_out_base + m.clip_ix) * 4 + 12, path_ix); - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl deleted file mode 100644 index 2ec1911..0000000 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ /dev/null @@ -1,318 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct ParentBuf -{ - DrawMonoid_1 parent[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid map_tag(thread const uint& tag_word) -{ - uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; -} - -static inline __attribute__((always_inline)) -DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -static inline __attribute__((always_inline)) -DrawMonoid draw_monoid_identity() -{ - return DrawMonoid{ 0u, 0u, 0u, 0u }; -} - -kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup DrawMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _93.conf.drawtag_offset >> uint(2); - uint tag_word = _103.scene[drawtag_base + ix]; - uint param = tag_word; - DrawMonoid agg = map_tag(param); - spvUnsafeArray local; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - tag_word = _103.scene[(drawtag_base + ix) + i]; - uint param_1 = tag_word; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_4 = other; - DrawMonoid param_5 = agg; - agg = combine_draw_monoid(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - DrawMonoid row = draw_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - uint _206 = gl_WorkGroupID.x - 1u; - row.path_ix = _203.parent[_206].path_ix; - row.clip_ix = _203.parent[_206].clip_ix; - row.scene_offset = _203.parent[_206].scene_offset; - row.info_offset = _203.parent[_206].info_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - DrawMonoid param_6 = row; - DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_draw_monoid(param_6, param_7); - } - uint drawdata_base = _93.conf.drawdata_offset >> uint(2); - uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2); - uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2); - float4 mat; - float2 translate; - float2 p0; - float2 p1; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid m = row; - if (i_2 > 0u) - { - DrawMonoid param_8 = m; - DrawMonoid param_9 = local[i_2 - 1u]; - m = combine_draw_monoid(param_8, param_9); - } - _285.memory[out_base + (i_2 * 4u)] = m.path_ix; - _285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; - _285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; - _285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; - uint dd = drawdata_base + (m.scene_offset >> uint(2)); - uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _103.scene[(drawtag_base + ix) + i_2]; - if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) - { - uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_285.memory[bbox_offset]) - 32768.0; - float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0; - float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(_285.memory[bbox_offset + 4u]); - uint fill_mode = uint(linewidth >= 0.0); - if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u)) - { - uint trans_ix = _285.memory[bbox_offset + 5u]; - uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u])); - if ((tag_word == 276u) || (tag_word == 732u)) - { - translate = as_type(uint2(_285.memory[t + 4u], _285.memory[t + 5u])); - } - } - if (linewidth >= 0.0) - { - linewidth *= sqrt(abs((mat.x * mat.w) - (mat.y * mat.z))); - } - switch (tag_word) - { - case 68u: - case 72u: - { - _285.memory[di] = as_type(linewidth); - break; - } - case 276u: - { - _285.memory[di] = as_type(linewidth); - p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); - p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); - p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; - p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; - float2 dxy = p1 - p0; - float scale = 1.0 / ((dxy.x * dxy.x) + (dxy.y * dxy.y)); - float line_x = dxy.x * scale; - float line_y = dxy.y * scale; - float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _285.memory[di + 1u] = as_type(line_x); - _285.memory[di + 2u] = as_type(line_y); - _285.memory[di + 3u] = as_type(line_c); - break; - } - case 732u: - { - p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); - p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); - float r0 = as_type(_103.scene[dd + 5u]); - float r1 = as_type(_103.scene[dd + 6u]); - float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z)); - float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; - float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); - inv_tr += p0; - float2 center1 = p1 - p0; - float rr = r1 / (r1 - r0); - float rainv = rr / ((r1 * r1) - dot(center1, center1)); - float2 c1 = center1 * rainv; - float ra = rr * rainv; - float roff = rr - 1.0; - _285.memory[di] = as_type(linewidth); - _285.memory[di + 1u] = as_type(inv_mat.x); - _285.memory[di + 2u] = as_type(inv_mat.y); - _285.memory[di + 3u] = as_type(inv_mat.z); - _285.memory[di + 4u] = as_type(inv_mat.w); - _285.memory[di + 5u] = as_type(inv_tr.x); - _285.memory[di + 6u] = as_type(inv_tr.y); - _285.memory[di + 7u] = as_type(c1.x); - _285.memory[di + 8u] = as_type(c1.y); - _285.memory[di + 9u] = as_type(ra); - _285.memory[di + 10u] = as_type(roff); - break; - } - case 5u: - { - break; - } - } - } - if ((tag_word == 5u) || (tag_word == 37u)) - { - uint path_ix = ~(out_ix + i_2); - if (tag_word == 5u) - { - path_ix = m.path_ix; - } - _285.memory[clip_out_base + m.clip_ix] = path_ix; - } - } -} - diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv deleted file mode 100644 index f9feedc8d52df64553a1f01ed883cbfdb46b9d37..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20204 zcmbW833Odmy~ZyjY0FTCQp!}Ir3_`J$oR@kv4BM;nN$q7N!rGyNl4O`BFHR*il~Sp zhy#c+h#&|!0D>qeDvCIRBBFu{Dk`(^e!qLqmu~LT_11cqZua+m|7q`i_Sxs2G;LdP z+Spoch1$xsakU-WYK_mDwG~lnwN+|k8vX1!N6y)9c%*x~op#U8!N{0gZPrkI=^+FC1HIifM$^=g zYlHQX1)aSojcSgotFL#ksf}&KI>zCy9tJWnZ{Bdvh;d{1&%CZeZ~wf3m^v8WN@(5G z>s-_**xB3N(?8NXvTQnfM{OO}iy8xq<~F9b+;95u$k5`hk!_a`b=$==M*qy1`G1(r z-BueVusl_4(5m&zOmCM{O-Mc<0iAAy%aszdn6e19#MB z(l4kFce+NvM*c(TSuXIn#x=Y^zI}ZI*6~`%{bY6Zc`JPXf&O{D^P6ie_p_*HQRi^)$&FlI19h)ie`il$ z&mz`&c)7pVt-mq);U3a7u9*H_v!%V=Uikjbk>0+ZM$FhoF89uYp5FNjMwS~>$33ol zXT3T2=wr~in31W|9GClJaHOj*26v40$EmZXj4Gzl823)SzkeWxa2>*^{=*JHR|Q-<6dd(_0jn>$LGFj=yQ9!mXD*Ev$1!*KZZx@ zT?=DfzP4+O#+Ku0=Joz>+!wDKYtuHNvNi6d(eZRN=HWi&LOFZZsj0c2o7dLbO>OOl z=E#?iXA)X--Lkv1HRrMpHqV#lUMTI&F|CW;>*W58HS4}!W%C-g+SbRGecx)^pt5z> zN9qamUVIyRbHDGmcplg}srl*RKCt)xK3>4b81J9g-@hF-Z;T^7tD0#Vc^WqhdgBah z#J1JEAN-he5q*9Bd~kcs!+WqkR9BO8Yt*wOL0R$D^fTiMQ`FMZEz;TOD)choM1 z4=?B(?i#9(baA41hWL!&44GqzmLsGww#%@m-u2*tkp(?N^p5Qo`m+A5uj6gCyWm3u zOYt4kcn(qLn1P{%vxf#24|+(shWFt!i+iMeHmLb2pABlQ&jz*s_H5ARIrI8wLr3j9 zXouB@IH;CiXEi^}1>JGA)gEX{;P%?%P21=-dE#|^Ozlay`>nC3s)N=!KV9jKF^uZ% zwP(=>7IVNg&hd`ga}{5HMvtkzhSoR(bR3OstF^NT&Fj%tTNRvrG7(%}i%Bhf1Nb35 ziw1_4xwpsEMMY5=bFP&78Yd|7zoy0q6$4`*Q7&kdj3A8GhL9N)hBNHd=AQ7q0N-^`2~ zFP_oFJA%7UT87?U+XXz#eb$U=ug!utzF#%=L3{0>aITU5Zw}|6G4HAHp{mAN;zo`3 z+WFw#{w1B=d^Ixel8QIS+g`h>;v?0X-3;f58tEZ{d2fRcRkl0Ce24U$(RQD{)m$EE z*qUQ|P|S5}u2p+&BQ`+p*V&BW2n;?OC({ljQ@&$NGk}kp&tgBV8q3?z{4Nkb$~82f z-O(u7(0tBDGsb*A2cvaV+QdThJA-~cW8ycq(lX|JSI}=t;kQ!dw`rmI{XoCzh35R# zwkb5ns>Wp;tvkNbb}BT#1L(I~q514r+oRB&r`kS+=5tSGBeVZf`uRH_^byJ?XQ1D$V^Rx4!$JopvDq_rCO#{NZJOwd9`( z&sO(<^N`am_39XXh9sZk_gT<7{yaPM%lPxmP-~9Atr44AK0nH_ z$fx4#$Fa`9(Y19it;LGFK2sT~@#3}daja8lel~)QrB^GPF^);w6k^ONwoPR|j!mp? z)5rTu%{)`#-dl3@t*g4;AAYvMqRo3lt!$3by)lKh0?k;t=Z~Lo*Qm5_1$U0Ib38ZZ zJrQ-!N_T8~(-J2?fM)K*9SC-h$+aI$^K4lM?)|UkdOGLB>CL4+v*ModYTA#iurbMZ zG}yJ>h%BD}+BV`p&sA}2)93%wTJ!uT?if59tnGL`qg+EZ=Og!gDA)BJaPL+9J1d*H zHzZfi4)b~@$n~37`Kg&#KEKi?!%qaeA6y&#hbpd~b^9Q=wu0BHz4oEXuBQKo>Ahy^ z=9gbwX}K<*JA^mzk~G|>bP=7Id;#Oa@^V+ z!(@8LsNFHjJzvT-*&XiQPt7CXvkHC`-0$469|wMf1mVZSy}!ci@Q#W**ZFYIlh_x+ z9qS~j4uYqE9qWl;ubrB{_DgAeW_zDr2v&1F^#1@@fA#oZ0rn%e_D%4W>5ac1Y+d8! z55djnTA1(0VDqVKf0SPTtd+jIN@}O?ow>XWHzs@>3%OFkSA$zW_VwVK!M7o^=Zj~% z``z=)whJp~3CHLMbL|Bb*VF5_CcQrwvu3#8OG@5VaM#`Urqb?vcewZDaKX*Lq=lbT zaPytl!Y?Se@s|}`|La@$EiL@k7JgR?f4qf1QE=-$*}|W0;m@}4=UVt{E!_9ctdHaO z-LvH03nlmcGu-j`4p?&E1556^V7PnU_rQ|-9$50zTe$Cmv3n0(RB-e89vHjh@g1<_ zzW>geHSdb?}8=wU9jZ750>0_ z!f?m$J7Kuv_nolhz7vK!KHmvT?t5Xl@xB*^>+ic^$$dWzcfEZ-l<&)3>vsj;C61s? zqV35&?3r>DP5o?qWMi6Y`v`anGIa4sb{^< z0%yIoC-!WxHs|fy%42&EIP2|sEzf$N3wFKL^>>ZWr@6-F@;sM|JvtjC>!+Xba(%oeULV)hKKVWXF4yQHxY{(D@!HhNYwq>Q z{dNi1?}d|S$@x*RddB!MaK`BO#*fo3rs-!5xjtf_XXdir<@Ao%-aU2&{U>RDw10|T zyM1bZ8f=d2vCn|jj;6WC-wYE2Y4E-H6*D+_sot2jRK$}{* zFWp0}W8R&7W6{4sTZv}vyDFaRdJp_=n)^KUzX?{qzS=YQg4OhK&wQKSk9$Vjw`gk4 zL!8{-1Gnb>KAL*=!wSQO`~qyu z44N@={k)D@Kl`lTFTt&Q?N?~(xi*i0)pBiq4ff-;(e@}!&1)l0?%#l0b3cZro@?`4 zu-e|V?CsxyorB|loL+9uOX+`4bDtX*|36f^{!f7AxgY-s_I@;vHhs*k?wVxW_IXDB z34A=w>-!Wfd-l&@ZTUX*7qD8c&y!%aeYqn#>HkVgo@c=3ORm3F_T>6ITrIht2CHQ) z{{e3Op7&2Q^?c9!7g+6Rn)7^?UM=H!0i5wX4=%^^B3ygM^AcFi@np;|gUy+G{|2kQ zLd!YwD%eAe=ig4{2LmOBvYw{X-)XeL9l>5SG6oc-zhn>=|}1DiMZ&gx(_ z$LhJZCfJWPw5>r?vxYc%CVRGs ztLyK3r9Ah^dSLGp^Jvq@-0H4j#%P~0ZUpxFhHnga%-J7Pz-s0Gm6bC?3@*pG3%nfTu5h(-jJv^&)0Q#*4_JM7TGnwU*fDw?Q*RHr zdd9dXIAb)2V^q^GF?)kOZ2soM+Jo;~qSu$p@!d#MlZUMk}j!PRqy_JbGF)N`E& zz~6g&d&f@yXPpY(~^wt@ww8Wnbw@$|O zF0fkj+-&}A=@fWsYftQ{U~Q@WZm?Qvp9WS-?PXxK#Ge5!Yo7^MyRxyq&Dv+dQ(Jpt z&jxEt?Q_5lt(y0H;A*LTI#@07=Yh-G=fl;S=UlV)1@P3?p4j(-wWap^z-pQI`@w3d zeJ)tdc(2I^XZ}`Pva~#Gumka4XLNkYP`fB&uUPQ0WxG}8ZN5SPB zKL%Ix%+J2M6rMS1PwdCR+D@s)d>L5HI+xJPGrmuN)$Xj$j8B5quAp7RNF2+j!0PU+ zNi_3Z4p&c}PlL-mpMk5HC*!&jT%HA2!PSnUa?u$4dAS^_QZY>tj+bfj$WR#=}X|8O*etd zGv;Qv_MA<(fYr+H4PS+EmpXTV)yi|_PIz)@PwZV_ZQrGS#qYt@+{5wtev8i! z;A-W5eGqP}Huvj~=(At7{g9^SeibM8Pr&6G{1mQMo(n&NXAQI`_UB-2;lBWvxqb;( zEBF4d;K`*uv5$bY9mf6lDE+T#=2v$eAEN&aP2F{TxZ=LcWM202+?V#|AEbYbb_p%# z(r+v63VPS;cl22=ZI9E`TraWD_!H@$pm_~Fx03t!l^&lzR6g#b`1}#9ehzsY&yzIc z)D!m<*xcS1iTe{=JwAV~eBAHx`3stQaz70=PCd_|XTa9fwkoHvWB4nLKdex#rak)K zDt%*geNyM|VAn8po&~Gv?={h;<~7LL*&FY<{|~Tx-SvI}?B`{ELqT0W3;s{~1FL=_ z_+K=Cf8aU#4)o{gy(ao4?~CAB6?a}Q(Hx8W#=0+8TJGsr;Ms%5I(9Yv9J^z&hP6}U zRd8A3HF#MA_nKO2tN>39W38d4UvjpA_rjKQu^p~fo{JsueBale*fC&j;bXyNu5oa+ z@?2aAo?O}!yE0f??!8sO=2y?Xw<=iOdH8p_^2D#EUd&^4xSIPv>#+trwY4X9O|Z7` z3E(o}t!m;^SzdgieXSlxN}chPd=FJ!Fig1!F^cAFhw)I+W+Y25|KY*{Q})2Ae}YH8up7H8z5)+4#e#me<%=jaHrqQ{d{UF%@hM z^)srvn}DsY&AN_d8rU;4KAXbzQ_nS3%Q!a!m*dhan8Kltop3@y3$1skV+2J`Y9-4AR{ZRb_m{$PKvNt<)^_chc~>i}@;T;GVM z?)sT`HrTk-J`k*rx;bajt0m{b;MO_52~9mY=YY$chrsnwH|Ih0YRP#h*lSo`lf&TZ z$$2=~v1FV_fb~&No+H8LDcAokaP{Ol3T*x4IU1~w`We-_9s@SNw$rM0Iu@+%_}@$~ zH>c0EY3Rf$~@iu*Ec@DlEtd=wRcyOLk##={CztnjLxUAC&S1Z4NoB(%z zsZ$55rOsS%>KJbwHT_bj3tZOehO3p|p?cu1L+Z=}tEJ9-aOxOu9X0*R=epk|Qny#X zVjbTJS92YccOg8_8|{hh18WOk1TJ&+!_~@X@Bloyv?q2DtS!&36T#+J&$DX?tnR$r zbMnLws~7Vafvc6z;KlIN)}GiUU~S<`!DX(K;A-VFco{sov?umtu(r(OU10O8XC9}3 z)t$$9bz0)ztzOLIG`L#%4skj>wY4Yq46ruqc+Q*&RhbwtnHHP^-U{wP>2wLb=S zeC50@MN^N@$19(54ws>+=N!Botd^KhfX!RZ=?XOU#C#I0mY7e0Td%{X(bV&^w9kMY zpY@E_?sp5n8(s<4?i`)_CG={}UHfOjYWePXHCWB&$1Y>}9K`P*<(~LFntHy=Tmv?T zdTLw?E^Ax|SF@3hQ7x}=J(@Mjb^QXGdTQJNHi!Bd%+v2OUj$oQoAbPiUd=IRzY)A* z!EXZVllU)z)$~ieTHi@_+8+#<~J&y`{!=BKI+D}PWONvd-yjiesT4@zZb5L zddB)Ku(`BltltJZ9&L{G4tllZy&s%9uFrSi`s5ma7p#xE@%PcIWp3XCmvj9--1yAx z2Vi~Fjeme%E%84D8=pD+2&}d%P5%e!)%5p!%R^wF^NIg4ynK&-7_OEx_$S~z(~UK! zntsXoQ*im+;b(BQ<}->m&>xV6iB@i%bw)P4-CmfF7sr?xq*t)^dUKMpQy{|>I!eCAg_uYy~ ze}b0Ue*mkc_8-BiZBA>e>6hA1g3H=Z!PT1I5t_CC1h;m1{{I=Sp4xu_tEKkS;M6v! zwbk@X?PtJc?Z3j+n%@zcwf_e9Tq>{k-{I=1{VZ55wf_N5ZF5>%O~2IsC%CNrFSuIs zdqcDKb8u^y*ZX<6dTPG_R!i*{!KrOdYpdy(+Ao32+ONRXUZ&;Q@^7&BcscJ^;p(aV z8dxp0DK%Q#oYwY_eyP0zysW(uW^O~2F`4=(Gh4p;Nv<)qFUV9&JFSre|7 zIupRDW4v|L^ef*%{5fXHU#NHwy+3=j&$uQL+r>KP@9)=vtGVCfvu=ye`fxS>4N?A1 zYXh*c+WZ;phV*{?J1uRKX=>INC-=r+e@`rY3cQ`#dH0+O*GK*S>MY#^tnU2%HxBZ| zPXl+6FZ0xu>J4r>@*}PP~15w*%Sp7 z09TvF*vzL*&G?Qgek}dJ`6jp<&3_v<7GHmd*UvbbHCCp7m3|%i($C)&^Rp^#SMvKi zXX6XJ8rb+X=>4~3+SjL9BkQ*z*qHKqYy?-2&&FVLl|ED8>RIEdVB@CH61NH1dd4|_ zxqh*43bwZElQrB7tRA1u!PYK)wt%ar_H?ju>WSMDZ0)SQT))`223x!Q`_^sX>hakY zZ0*u#JGgpkZx1$3J#jmLt!_Q%~Hk zU~3y^ZMlA~uh)Gyns)b-xZF<@;eIC2+)v)GYth_8Yt!f7BhCbyE7xofuzuIEfA^%{ zi>BuDEWban4_MvjjJjhp&px!|*%z!9z8~1U_mS7{di&GV%n_ei;L_)faJ2&ppV^g9 k{;vHXxSD@wmE3Ovs~=4B-O~BZp*be&npYp!P2F|-UzZ9vt^fc4 diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil deleted file mode 100644 index 5516a9a4c03e932511faf7900f923798085cce6d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4260 zcmeHJ4^R_V8h^=dc9$e<76@%HfEy8tXmLRVlz5YWVo*xM85M8rbOTChi!=yF1-$9z z4p>H*Cp~+1SSkPj9H9WfVXuao3v~_?$PXLbq3_)C{ z_g_>=0eB5!5<5LxKLU>7wzy{P5SoKgsVnvioYHs^5fzK>DdgmHhSc=KHL2wh)UH3jHIV|!GKAdZ)?2l_^<(8l#S)X&GBnfHcs)!fNO-uLhEOE z6Y+xytmqbR3Ow8si1s#b0wVx9Fo)k$gq_vVxkP=4Kmc~r03<;yb%8O2-dcyYEI8tX z%@t>dM;@Nr&?s>p$)!pwP}ai;Le)&*iyImDJa1 zdJo1C4ZWPu+kEozuG=BC#M_UCJRYAgzk9nGPLe1>rJ!MT6vsb}hG~u8G9l0J3p_Px zDCwud_IKv5)*2GQ)`we;y2BHmsmmx`&}nfKIp>bOKrfJkz6#U}BIG;#u1eOGUgD}Q zL7*M_nu**#PS(H{$Z8mThZnD)@$DY6JZS6;8s#j`2aIDM-xF4)!E0E&avYnc@T$2; zmV_1NBB$Yy7l$$1r$|`mPK8-yrEH-`6}Q>|s#Y>tF>Qy)kvu&12Bm>pn-eCG!hzuZ zW`~M^fWIKT_&sj{Aw6pXz^PIC|3xgjAuLK32> zv2F6Guv|}LNWz*yR4d~}%g&|nm^(CGxKwUn0GL8h`EQ6&)14PKUfRBQ)oWw>s$+rkoD|E94~RKTS# zbbLU)f>H(X(0Qs< zS+BZc?5umS`ync=)DDahD0_}^_A;fkTP7V~_082F9dq^t0)015Umcr1cY5H))#2#` z0=)S^ZeH?|&-Pp|ef85lO{II`BF*Xn7&tK;aJ%2G>u&0JcJ#QhqWy4tI}PsJotDwl z9U`=bbu8}xrm$8LSESoie{=bon+cl+Uq~KpDl2oCv0^J$;=%UK<9g@$w92ktrO67g z@+oY?ay3Sz?Qs-C3s`S>yT@orwCBL+wPe zMrPzyEM8lJXOy^hz?O4aJuWMYS0&Q_N=o= zmaktrpu%@qk(xwT73YdNqbF}oVF8-_XbSraMc(j}mp!u2OtLQ-hiynO|AK~}sLv9^ z8~oR)oCe@$uoqF7?Nao&hxUDF`C1W*j(K~N@?%eDT)O`BK`Dwo;u8}N|5JPaHulyH z=2wlF@Ue0B*+ki;_NQ)UJ%kAzuNH(jwNn{up82S#y6tD#h;3PkZ*8mVv(XdZ97;fJ z+UH)u`skVV!@vLHxH@2>S@dm^xURwOpj5y7^upz@+iHKKvcOhbjIiwNB#9<-8&Q5W* zIICiLp$E_emZwnLU)peIz%5AI`TX)imLC|5O1$K@2{>AzDE4w%(Vvue(*(px*i!%?!6Ic=u-Ll)n z8o_DSD>QLS6~_QIC4q1yIPLu{Nb)$8DH=ebjzrlQEji1ksX)C_s3sf|%~>D*3+KF& z$fvo;S8N+U*F?(9pKHQolgq@6qKudZ(&8v5tw>>DHc}Kh$;me=wsy$vk=SNNWU15M zmuD%kctmEpK9neo;w(s^31cTYYQI2{P7Ak!;*ffq*dIROg-cqZ3T^nFqj|uYmY6M- z0<;$#s9QDesHvMuDqy=*CQ)kJHoAeW>%G5FTVRI1I*4YV2Bo4kg zOb`v6mv7i#^K($bP&gq_jkDi%<~A@8y`>`(u5_=0U#XH`>9~_$>FX?L496AH4D@w? zy&)Z>q#3)e@D?6!UUa*!Ns$I0Pj4mJO>>28mt+b53X`uEP^PO{r8wk6X&UP+H% v96#B22X7^NeB$0@`lL^Ca$LN;D~KN> uint(4)) & 60u }; - return _70; -} - -DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _87.Load(104) >> uint(2); - uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0); - uint param = tag_word; - DrawMonoid agg = map_tag(param); - for (uint i = 1u; i < 8u; i++) - { - uint tag_word_1 = _97.Load(((drawtag_base + ix) + i) * 4 + 0); - uint param_1 = tag_word_1; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - DrawMonoid param_4 = agg; - DrawMonoid param_5 = other; - agg = combine_draw_monoid(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _188.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); - _188.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); - _188.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); - _188.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl deleted file mode 100644 index b2510e3..0000000 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ /dev/null @@ -1,142 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct OutBuf -{ - DrawMonoid_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid map_tag(thread const uint& tag_word) -{ - uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; -} - -static inline __attribute__((always_inline)) -DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -kernel void main0(const device ConfigBuf& _87 [[buffer(1)]], const device SceneBuf& _97 [[buffer(2)]], device OutBuf& _188 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup DrawMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _87.conf.drawtag_offset >> uint(2); - uint tag_word = _97.scene[drawtag_base + ix]; - uint param = tag_word; - DrawMonoid agg = map_tag(param); - for (uint i = 1u; i < 8u; i++) - { - uint tag_word_1 = _97.scene[(drawtag_base + ix) + i]; - uint param_1 = tag_word_1; - DrawMonoid param_2 = agg; - DrawMonoid param_3 = map_tag(param_1); - agg = combine_draw_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - DrawMonoid param_4 = agg; - DrawMonoid param_5 = other; - agg = combine_draw_monoid(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _188.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _188.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; - _188.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; - _188.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; - } -} - diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv deleted file mode 100644 index 29926778b3a46f9aebfc58c387f095ac8deeec99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7240 zcmbW5iIZGa6^9?0>B&IC8bShum=M$m1e2;jz_29*C(*$GQ2|Apo}SK3o1X5myN9s2 zL`4%7aR+zf4#5R?)Sw}vxZv_9_*b}9Y59Hcy)$#uLRsYtC-^AEC z=G_hLMiaYcyWX0cObsenq;*uNjLl^`y0!f`b=sZgWJWaw2As{-`qS0sJ@c4#)mzQk z0$ZH>T4S%?ppeeqy}d?XzY+cz*Df^Mdpn`3WGk|zVD-+-L|!_1t224x^}T*~uHN6U zP|Ak6P4oY4+WJ4#8C4I_r7WvK@=I7Ja=w!HcBVF4?boK(&u!X@zCz!#47s|$(`DC% z&ob6}&R4R{tkbn#)gHj+F?X@j4*1dh45wPvu~uiI*4ov+yHl_Ao1ONq9l7mDb}Q>7 z`!mNxt;gx;H=ONZ-PUS#%ww;W{i1u?ycYR}PJ3^2s_3P%-%Mks+H2mI$Fj%bo=Lmf zXf?r!)|y*)z*G`=z9HHeP3u7dh1N( zF;A$ci0l2fp6c`H&MI~JU74?^lIL88j)QXd^r^tk&aq-L16!VBW?!i19I&EqF+yhtd^ldVH`n-wt#QR&xs$hG3i;6IL95KKAjbb&9Z-k$`ccV5n1wWj5(PwMj z+Dxh2$J)&KO2#!A$Tytb54K=GoV{Z}K5&9u$qpj-rmMYrx7M$d%y;Av81Kk9@5kUX zlGpbT`l5#6>@a+%Ki%lETJNK*Y5pT8$d&9#dh`nHSt<@ct1+rrZ&F_WNi7!H?ws{;~JlI^BI^-Ok*skEi>34x5`AGo9|e z&caA8>*o8nFjEC?-QtaIw_{(NQT?rbqSa_m&U+MdrOw*%$>UqoACdjW$UV;LyRr(} zNi};|{niyrj`t1Ya7 zUj-j=w7U*6cEs5Z@9c#A4*08~6`-$T)yF*U>(#8r65m?N<0Yp4rjpku;`tWXm(@h^ z9ap!SKfVX@gQU&>LxXXiPqqf{xVf$O^{n<(%>5{P&(l8Nj_gcpe@Ce^_65X>H^8{w zZDsB5Vl{@Cag}RO#MS=+$Y!oW|6W+u}g_ARl7l-!SRf;#u(-B-4rpRpQC%>2sUf3c{?`ycf@UFwWw+;|Jj?Ojgu zsx!wQN+0#cRQ4X5<5E1FIq$JOa;Bv#xPR*Hm7jC^cM$*0Hiq|Y7i3>TzI8zMH*Dyg z)2T$(e|kXPHz4;0W$u;~z+5<3E(h+8-W}A03d7l=3!q>3w?*U&Eg5 z?QTf?0c@0SDKYQjwRFjzh;2iA2kRRk=iWESd5<`6gjaXGjB_Wv*gD8K_EIeDC*Z|G zuEU$_KYP{Q=-=Dn?Vq^z z_Vk^QebnB0R+c}&s{K{Wk#_HbmO|PbfV9z1+@9KB`{Ek4yc<5<<9m?BRzdo!6HCv` z{>J>jAN~-O&i@CH#iRZY!dr(v53+s;ItXcJ4rOiRA7V9@`992Qy{^v4M_E4x`KkXn zt9sYi&nMuG5i{~hc(Irf<&Qwd(pUa3#D17ndIXBSeF}bXZ=VJek9?njH{Ubm9s4Z2 zGcF!;^Er6^)J0vNhZlbU`WyTHBCDTstnLetm^I2rO<#f^tm(^O;!)FA;M1DEifm2d zQPbDp^-~w;@^yI6MO~cTH{iuRo5QTip?eH|3R3qdt8(Zbhd&IdJHo0QbNWqq=Tskc z+89UNbB%qwzR=lx3*BHJz6~ZGHGBu&xUs({;15FDnNwLCb2?Y{+ExD%*6%|0F?#mB z5{tW`PAr{O=PJ(X`|yJ`{s2t;Nco2S5MIogcMg8c>gOD&`w=8&ZSoQO=kQNK`bE9J zfESPbAA>LUKaxESe;hgrEr+zdrJUJc!XJbDjDH8~Gm!S$+AC%4!sl1;$Dos-@cA{o zKG7d#?ZW3-_+!u_D13ecug^NjJj&XI&u`(6K|@gZ{0?59iy?iKweuXJSFZn}x+@`n zrz}Qx=KM~Ew%|9)dP>4Cfw#}6vif^P{W3^jbDzd~4y2ELiN3EW@vvEmY^>DgTx9W> zrSstR6A!=h;msF4SJp1{7r>j_{zmTA@Zw?fB6xGBHfxZ@BllW({nkL?cOksFBfPowbC#5~Gmq!K2~z)W=0ZN5r!$fLPKTT)=i>~>89IwK ze)DdIH&)bsIlOj{GgDhwuY| diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil deleted file mode 100644 index 4ea23f7699a63ef9a4dcac871e1f234a5ea5b0ef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4468 zcmeHL4^R_V8h@MJY&KyNHV`!k(v1j3;p_qyu)$9L854vyh_$rRYmh&UCujgmLECFW zVnQ)usGtb84aGWi&enK!wDgY`2r5$K(0UzIw4p|7J+xG>)%I+AZ=uXMox7X4xw*N! znZB9td*Ana-~0C4_g?n*CYn5zI_HMw&LPi-iuxgDT+eA9L;wIA#Q^YNp8+KcN?_Fa;Z0CYesq?QI8h=)VrSz{WN zeV87nk{7>>*vVKf5fP1TU1O0f<)~f`QSGIYCFz88xgdE(oz5+x)!Mo@Kt!@Cs1vyx z=k^+uIab%G?Mgj1UyJ9!V%gdvT>$*1{i zW8?1YDq9I363@j*P`@<7!mfDzn8LUAj3lQgcwoYjZBHe|GJWJ@5{;dssA>Qb7!fY1 z@Hsacov9{gjR}Re@PI*7HYC~vH3;3r@G3mKN}5;Z7L`vM)ze0~k;euM;a}VnR;A!= zGV(SK;bRo9YAVtwf*GbF$Ka5M2V=?tBADl=43o%1q4V6bn57y}mBGlPTjqogH1MKt zQVLk)RAYik7BGD_jeUgxVGje_d~P1XQLq+l7TIG zAO(OLxYt#1{%kbMt>uJk9+Vb_RLaqV2|L=BZ>~glmv0uCE1)A|0qKj+k{~^v`i6wIDs*UUI=p^{XiK z<3&QR%$!R3P!Mjf9R{URLVuphigd_c*B#pOMprh)l+}Waa15Mhiu_(Hy)f&7K{FGS zxq5=Zo|{L9FCFh0KH7WpYX4Xqq0q!xgPqGOJKEZ4a1Q_7t%n%`O_HJvZA)fsDGJ*ngY7}1 zE!B*U3j-cOWZP41%V$5aJMv-^-KlbXzUJ|`v|ZnfrN+{&?$NH{EeFpvpUmdhKU7q` zu+mgeR9un0p{Tetm$R3Qcy(+3xslbcjO3jgX$rilaeS`Y#d~~aaO@lHEG?TNE!J}l z=4k3Vee#O(!#CK9`G*(6M1>okUB9uoqB5704)>Ma-)z{Y5SFOk(Wnv$vZPL(Zuh%X z7G6y?ud=0_90Q@(DbZgnq7+gU-P1j1usy1T;ZBCxA_veVed;AHbjd)qYonRBnc&sv zoMtm`lNqh5cA9Bt#DG{d?iP(Qq3fB@%M+prgD8dSopVVRH7uLcQxILkyd?j?CLdBS zK7?j`V9T(h@_`9Sm65mE#7i?dE1X$|JTIci^;GekFi~H1zTPF3p{`4kwdL1fn z@;?9{dbd`77L|Mbu09#>c{5r;IV;U5`z}VOxzG7rCvQ_rcSg_>x}Fx@G>AsMBGv%q zGyrnAc3cX#ej4@tE8DLWd?yuRgP!gMInhVcKE3f=D~aJxc?E>cx1r&O!4o%`7hE{! znIU6yeCVfb8*a<9agOcnJic8SNL%*e=Y=&b4{K3tipiVVyZmErQ&eLX$YrCPb~&mjfLE%?@0}j@|1O_FhIvORB?X0*k$5i+`E03i zlFYQ{{~npqr)TaQOI7;z@i`M&6fpS>LO)*H62DF@)`N}Zy2_5rw>}x$iQm>fKBna* zJ7&h6FfDjX*zguvbhX%2%(Z96$D7pNY|3l$d&)05WVhZE`foq`*zoZ9`TR3|{UcZR zVagfF8MEF`?&>;}c4R(s+D`uQ*h1H-x3kt~8qdo7mAlm;F$p`?ZHh@KYzw1{`zG>( z{bQG|1&{ZS?ZY0Olbo?DIqjjYjLsvA(%$Qu-_71{f8mB-xZz*nh5{OZui&`}$Cf9u zzkBu`i1&Or zG)2B-L5euS>*~qU;E5efcq8&r{L1$Hh%?)yG$3k~i|`p?c^!Pj;6D<-)z>Af@dglCAbO>DUDWctsofC zlJN4~P50X#oZzHNvkA-WYsO_$*k@=OVG9Vg;T(@EZCH$AK*`koK#Tflx9zkYbOwc0 zy#P^ASBxLWtfbG2Bt@ZcjfmrbjHHxF z;2q4b;ChI)2DlOpZv2>nOYQE^R6lL3IKnzbd^-`Su`=%M{`J4xt`z@c{mEuT?StO zfV(d?|G`LzTjZX2sQ>hOA# zhgH7T=YW!%P#%K|;W{zLG`bPBAxx`p(&z(fZsNh8@RN33Ky3_j=v(aipqf8#8b8&m z3#!dw$Mt)9^;gxxxzqS*47!#uJkx_^j@$Gph;7snmXp+jW4)!m|Qhc Q7U2zCNH%rf`@hBC0Mz2sqW}N^ diff --git a/piet-gpu/shader/gen/draw_root.hlsl b/piet-gpu/shader/gen/draw_root.hlsl deleted file mode 100644 index b4cb7e4..0000000 --- a/piet-gpu/shader/gen/draw_root.hlsl +++ /dev/null @@ -1,108 +0,0 @@ -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const DrawMonoid _18 = { 0u, 0u, 0u, 0u }; - -RWByteAddressBuffer _71 : register(u0, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared DrawMonoid sh_scratch[256]; - -DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -DrawMonoid draw_monoid_identity() -{ - return _18; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - DrawMonoid _75; - _75.path_ix = _71.Load(ix * 16 + 0); - _75.clip_ix = _71.Load(ix * 16 + 4); - _75.scene_offset = _71.Load(ix * 16 + 8); - _75.info_offset = _71.Load(ix * 16 + 12); - DrawMonoid local[8]; - local[0].path_ix = _75.path_ix; - local[0].clip_ix = _75.clip_ix; - local[0].scene_offset = _75.scene_offset; - local[0].info_offset = _75.info_offset; - DrawMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - DrawMonoid param = local[i - 1u]; - DrawMonoid _106; - _106.path_ix = _71.Load((ix + i) * 16 + 0); - _106.clip_ix = _71.Load((ix + i) * 16 + 4); - _106.scene_offset = _71.Load((ix + i) * 16 + 8); - _106.info_offset = _71.Load((ix + i) * 16 + 12); - param_1.path_ix = _106.path_ix; - param_1.clip_ix = _106.clip_ix; - param_1.scene_offset = _106.scene_offset; - param_1.info_offset = _106.info_offset; - local[i] = combine_draw_monoid(param, param_1); - } - DrawMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_2 = other; - DrawMonoid param_3 = agg; - agg = combine_draw_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - DrawMonoid row = draw_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid param_4 = row; - DrawMonoid param_5 = local[i_2]; - DrawMonoid m = combine_draw_monoid(param_4, param_5); - uint _199 = ix + i_2; - _71.Store(_199 * 16 + 0, m.path_ix); - _71.Store(_199 * 16 + 4, m.clip_ix); - _71.Store(_199 * 16 + 8, m.scene_offset); - _71.Store(_199 * 16 + 12, m.info_offset); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/draw_root.msl b/piet-gpu/shader/gen/draw_root.msl deleted file mode 100644 index 9ee8cfe..0000000 --- a/piet-gpu/shader/gen/draw_root.msl +++ /dev/null @@ -1,140 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct DrawMonoid -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct DrawMonoid_1 -{ - uint path_ix; - uint clip_ix; - uint scene_offset; - uint info_offset; -}; - -struct DataBuf -{ - DrawMonoid_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b) -{ - DrawMonoid c; - c.path_ix = a.path_ix + b.path_ix; - c.clip_ix = a.clip_ix + b.clip_ix; - c.scene_offset = a.scene_offset + b.scene_offset; - c.info_offset = a.info_offset + b.info_offset; - return c; -} - -static inline __attribute__((always_inline)) -DrawMonoid draw_monoid_identity() -{ - return DrawMonoid{ 0u, 0u, 0u, 0u }; -} - -kernel void main0(device DataBuf& _71 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup DrawMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].path_ix = _71.data[ix].path_ix; - local[0].clip_ix = _71.data[ix].clip_ix; - local[0].scene_offset = _71.data[ix].scene_offset; - local[0].info_offset = _71.data[ix].info_offset; - DrawMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - uint _100 = ix + i; - DrawMonoid param = local[i - 1u]; - param_1.path_ix = _71.data[_100].path_ix; - param_1.clip_ix = _71.data[_100].clip_ix; - param_1.scene_offset = _71.data[_100].scene_offset; - param_1.info_offset = _71.data[_100].info_offset; - local[i] = combine_draw_monoid(param, param_1); - } - DrawMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - DrawMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - DrawMonoid param_2 = other; - DrawMonoid param_3 = agg; - agg = combine_draw_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - DrawMonoid row = draw_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - DrawMonoid param_4 = row; - DrawMonoid param_5 = local[i_2]; - DrawMonoid m = combine_draw_monoid(param_4, param_5); - uint _199 = ix + i_2; - _71.data[_199].path_ix = m.path_ix; - _71.data[_199].clip_ix = m.clip_ix; - _71.data[_199].scene_offset = m.scene_offset; - _71.data[_199].info_offset = m.info_offset; - } -} - diff --git a/piet-gpu/shader/gen/draw_root.spv b/piet-gpu/shader/gen/draw_root.spv deleted file mode 100644 index e6a53e55ba4055a4339c14c7a99f86a510b0d3c2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5440 zcmai$`I^6HB!5mjl=9LOwP`1yR#tXg;Xjn zwX~?Lj7lxb%r15}!|ZBTyZqc=q1N-%?ceJNjDL31@tIeIQeeGJc(`dJL?#OfdvTf)l&Sq}+XWP-oHrVZ@KA&PQ z_4W2r-^=yVSP7Zk%f9SVd^@V0>bB{z{EoDJM)9dVb6LP6ejB=xVNa9Q@nr9v zWtX!3*wsCI;Pq!WgQxaZrfQSbPK`u+-$pa^!MWvWGsayK2VgJ?Qhj zh&=B>-$21vO1|UKuI~)YLlz_QqW7_WnSSe&XTJAv2I3R;s;-&a_S5~Img4_N_9^IB zFd6P?6|1?PcqwaIe+cnek={9dMUmq@D4SVOF=V&9%etY0}ht($+3u-6eaeiEXu zDDL5v_=b=pD+w zLX274y^bLE*SsrJtt-Ih`IzS!-;9`Rp7piO3(hXE97bV=^A@n2m53Z|^MZ3FSdQ}$ zb*=)-S%b*YHZM3=gXK6k!FelKj^6+|+UAL!73c2XRCx#5SyMkxKi-9YH{xUbJ!s?V zxQF+GUE^{3a1Hu>h(7P(KdhnsPQ2KEkJ&vl%&J&1mL;QOH+v*F*ym<`vHvmO0@#8~wDy5vL6^tuW5{farz zHZM5iVAm%z?If!D)l#6z5<9TTaY@wt2z%09Z~5i8=7Mf}EHGZSywqPS}$v zWD#ON%!~f{1JwR`U-nDeTxY`h@Ltt%N7LZqo$SNb7w0pkuQ+qgN7Oq2w%+wf+}90Y z{np=));8bo!$IU0WC;>`_z?JDvOU|ah&(yBp>IcSM$B_fZF9ta|6I%ceHiWe)V>oR zMc;w=7{3#3TphJP26l~jCq53=7w?4jM-bPNEB+U2--TB0M&fMufP2sO34Hpa-Y3D< zdnUaD_kw+A^go8r_wqhOp0O7=zaQKB z`r}>xJXoHwxPvc%y#r&efQ6E)XQ%WTY$)Y1g#zS`b}`}c^<{5|Isuv-^y|KU2tZ;jqT&i z7<&xS=Q+f&_T%8*wZDT;f6T*o!TOw&=-2na_RCoG^!s4_aRzPAA9dBnoUWB-gF>{_o>yP@s0>XzXrRXn4{l-&CwqhmNN$l&Y!??Rw8n=%?r+-!E)vz z!TAeV&Kg9Hwt2z%D_Bkm3C?q1Im3t?ZS%Z$XTZBtNALayF81p0*!p6h#`MXLxpD^n z%jqsa{JlOOdjaAzfLP-=^#9N&p^JImrO)xmM)v1SoKV0gg5@tp`}^MbQp6gbaT)p) zM2`KBbDWy|k#ichYZY^rW9yH(I2|lcfACg-trvHzZC=EOz}B`uQF|452#K6Cz}7D2 ztj5+KwO<96r$2aSf~_6B*ETQWuLfJ&d5GF;!TKZTHDGHObI!unAGOa0%hMmcbzp19 ztZADU@%3P94lUheQpc#0Y&0YwB&6F?qc<`B@nIoV(zg1OXn{X*a0rUY5t4rcep5j>c@GJ;e*SA z!UuD=;pXbY`{{TCE?m|>U;o@sgqQfo(~tARFPuOA!QbJRryrLC|NL`53HtDlryr;F zT=VlK`DPGA29*6ETt-1o9xf;g0CmuxmTtKp!9b9Q&_~zuTs-{Y9aR&xN76>lL0JjY z_7W->F%%jnoV=@0jm;#c?WL#LSP22iG%VLUp|C337{E)74Y@2qTU$bmuE20D{Ofzla(g+LcDuv789dcz!H+AG=+eX zvQy5`%dy&8E5=yTOJl8i#+kIeDLid3-NXqZQsyjHcXRYs0pmnO-|6nbvHcOz+hb1d zuY2*}(p)*F7Y57>J_dI=Rx-Xyh~Lo#>&X(~v3ibvm-(>%#=GW9
qg@uEA{+4-7 zz-X3wmu=K*E*(dj6F~}w&zmGdWzPHzuYF(2{p)?wGqUew22$AdGM;nt%AIlY!9^RJ zW79(PRmPLM$9qSnS;Y?oM0tA#`q0Ac7Wn?`K+h}b7p2T$vF|TD-$y*^NelJ4l=)EX z`xDPsC1dtGncZUFQUon`P-PBk9Pj-jQ7-d9`~%*wRF=yslS(Q#m5|`s=OZbztb@53 zz7gNe0_qzk>Utiv%*b5ldD!TASSO?6fDZKWpI{rr)Xg$#c?a6dqHY*P%9y}m6xj=} zOjN*>Iher5&B4`^e$3;Cu!Q>QWR}ZE;2B|TqKz6G-oOS!#V(7-8-ku3^-C&Q+XaMx z+bVupj8q~~j6cDpj<`i8TU?Lum43{druH=S(=nFIknj%1rX=1=55fk^#V>gqDc~?{ zfFy+1VH|dz(cl%E)k01Wo+TrjuuVEdrJBynnyfy@ zMpEbj0(A=qNhn6*ju4PdV7qSvuV7N1F_Y+@F$Gi5*Qb&iC+s_%yJbD;(7G!GNj*LF z@RmK}3&(rEGyd0iu4pz1bJx=qxp!G){|e0^9^)n|=Oxj|5_#zwtspr>Pl|}8$$2RM zi^5W6VJxlrLPSLE4h+>JN#vVkIm4p19nAasCb@esiEksZZI+C=Wb^Re_ce{ct3$tH zW2`o~E^epVOUtPs9K3kReBt~@f|N_>}|b$Akxf?yh-h4jI%gEHo4t?zp1Y`mX_WUd9&Y&zgUq}Lzl0Hj1IQe|S)KAArq&pb5uAR$UGwJEgvRUHE5zXrx>52=m zjD7)cwmFyfb~LqJ?|J8b+jaYCayBPn`j{){uY8hnrSbfgaT+W?vUz>|4c!F#8|$~6 zpr3%1KYZBlh$Jjw`oV;$DOaX9d@_0Jr&q>FLdtkcgnAk3&rPp)V-q^YT!Hd5Eutz+~ezE4@qAvH6Rkrv)$4g0!2v`9d4!E_<*q#14L zGYq5|?Hdi{O6nFGbyIewQc2ycByHGOspM61+S$y0BOY1S2z(zpnNBG)k!1_J9}#Mg z2y2pu7YV9Uuj*1e(x+V{O~0y}UO`H2cLr>bQMZ&(rz1 z!GS(!F$W<&8hw9~`VI+nnl=KrONTO-<;d3$srhqR?9^%OZs_s4gZI$cguD#eOkMXZ~td2BJ%@^1-HcK7k4B886&WFUm1qUoA>^ie=IqY2d>vWRF-3x%_G%KB6(p>RxG2%``tF(YJJXiq zuRgA?iYRA+yoHV5M;)0Wv*7CU1Ejl-c^j z*_#u6p6x!@zMHw6F){D$+xbgELSN&wb=>H7$!%{_8!xckXwcCTI9V zy8KL#^jQMr%DI({TPv6o4QKMsJ>GrBP&P|l3psJmgu`1t+!rzdl43cYZm)Os^$eQs z<5~VRc{!{JUT6Jpp8VfDiRVc<4?!ZmVZTV8!luKfcQV+= zfiUJhB);@GkAA|7Ak~dq>P5)!@RmyWeN_Y^h*$bVMt@E1dO35NcwMjZV~R8^W$8LX zm#ot(b7HyEK?n~@**L)RrH4)5WUSo5yg$9Y*-VgJv?7cUR(c4-VkSh4HYCtUqY6!x zn+y7Q=r1xLv01{O$?J7k7fA%@j8Ggcw7E#rTn#of_~inUssUIR8GWCdJFJYMZWy!9 zDstchb&d`c<{D_UA%R93b&7+!rJ&D*{zC}^by~Pi7%2Efx@(|t*FZg^1GyS(Xz#PPF3{>kvr;l?D zG}@3rBaJ#kR;_{=Q76{JKtq<8=P*$CRq`wNK;vBlog5v=)nG%T-H&#Gd`)}}iM8Z2 z@)^;@JZSLfmi&CPuhjaAXOWX7e($sDY$a((KH{6w#d46b_5?b_R^TvF$>I5vMer zIq#I`)K1RXdotT9zRP3(IHAop>@6HPCJDAJ`{ea|GyY>n#?4#g93zo(ft)rjnGk5I zSSa&zo9y9&%Qik)AOw@0d68_uc;Fh=g+W3&-I`~JC0jD73wZSNSQ3x;CZ^ua{7@QH zBrFw^e-t1gVFgxxOQBs&v&)gBl#5M^E3TOd?2FAvX}OYek+(}uid0q-?0rt}l@USB zXoizXGT$Zybw*Cq`&1#}sO&^K{|sdg=vFeFVc}baQsI$wEho?c;`w{TTRLn@Cd*vn z4ZRd^j9kvN7Jzu-uw;VU$+{_vPo~R)a3`BpU~&7_mUTh>tdrMpweNe$^7V#mspQgi zhQ{}>Eme)hSZb~sR*tv6pHO(g?B{G;Eb|(!Q`%Z9qC^W`TwCB6vxK@zMme=^D)y#! zuAb%;W~gsfNpRndzBCY269sOL2Q*_Bx>W2tRLANlwmA?ni^2TPxr-N*$zUR6tHZv# zgSN1VI7<{O9TY}X*LdZq@>o3zEsyI@93G8GR=@BQ?9y_?>${FPK}(et4_=|Y)?llk zRLrm<1wVucur;(&5~GOY7mdy4kz|)65MM#v3hCR97nTCLohTZjwg@7sZnCc6iS7ODtofll1qezN<8FMZc}HOfbu!RbOX@NjL zQ=)Ru+vz(%yH_}>khg!2t8q+BO6jlIYH0;ip^?NN$ru;3Fn^bf!pI-kZP(nwM;{L- z{0C@b*FP@TfF-d|b&(ajL>b_W%1D}NkGj0eE&RNm8QQD%K4}ndFBc4r$3VW*3K@)- zjBiH{u$>;plhU}F*GtQ#yZj>dj56L}#TKG3B{s7E4=qjMmYDNZjcjsK%GDh^yGA7z zXx@nZJ^3%k|5UtR$RtjnM;W~9#}_nyK-#oq=XQH0rwU5M55bQ_U9Pv znSvr;7!?HIvWiW(LgR_+IJuI)~ z)Kq0TqyCO<+1>aFwxzo9E%2)jkf_hrL&~=vUP@VWe2T+sp>*t9y18Sq6@=B6*5JQZ z)lf22?VJY+3CUxK`PXJwyl@+$V!{24jvDtO7tEYP{>@{d3wks9YPx9=$}e^^pUfS9 zo8SD97Jw-ZAm<~iYXWDf@-Fu&h?!jf$s+|$ODj4@6qMm~TTaxX-4I~EZW8%@SrnU$P%V`i8$)wdw$lSwiaD;|#t zcPJ1Oj>kewpg>Hh8v`-HSf{pFvC(SCJVgH-f|Sd5)rKo%CD@)JU4TS0TQH#j_jtsA9~Ac5KK3_QijmN48qacEBCrOPcW)M>W#Ks%h+brmO>_ppU4bs5*~vJ3W}C% z-7{WxdYSeWF!Fm57*a@8xC7*1Lae_#RzxHY`&C^x69$-8#F&aHY!*D^1|7;dBy+p& zJ9iJ7^1sE8gGOeTd!_mc-K*fk+Lr z?iz1Wo-spM)H%$jKUw5cJ=kkS+v)jLy^*|AGyXc#ks$>RqP#hB6Y zsOrN9mV14?Nn8D;GDZ|}q%NYZ)hH32W(_XxW5u6X&o8eUb4L=CV#|FTO%Ho;tvn#o zAgK^m2~c*6d2bFleFS#`d89zt*6N)m%FJ*IBegYbw#rb2J57cSujo1i2$)F}UV3$d zy%#*SfPt5Ufq`E>W6HkvCai43yhC9Pvhs`C4vl+So5#MnG`8RD~rp$&Az%r7Y z@}qTLON%E*AWyI`Y;;-5RLd%hw03-1<`qyg87$n86S;S<98zz{^d~x)ijM@(QAM%W zdCgA3bMvyc@z}c`id7fU=T`4KKdccvA)nwQoZ$jfTWi%GO$}wYs#NAsXb?{<`>j|- z{KS*6D%!`YL+DR3A&cT$6tlYHC-NA^TuV-%kiWgobT9zS} z(NsbdmX`Al|9&M-pyy~*?NBqc@VJSz5q^2yVZSFqDeb$xHqpa+dn>-!!CbFPlZo9L z<~3m(QgT%MxE{q09(MvdQXl~-X@Al)n12mA^Jt!rgc5Ty(71qhTdIVKvy_bUu)MN|`Hwjk7t8s+ z9+vAm(;xW1aIxI+l4Y1??g+UMXSx4<56cZ%iVBg0gR@*G^ss!qhjaE-btW$5_jC`- zt+koY{LkD5DMO3y38Bpd#Ln$U9&e=4X z95`ik+nba8m>t2=QueD#u^vSln9gV{Z2PK~NQ&Z%fw3EgcOOmf#U*<`6*uGVTZ6lAg+D|6RFiE5&+Qlsp4)fX^q}=1 zcy8Q?WNZU+tbEi%FV%i9c82$}mEfV{P48VW9ZQhvW<26sr%HwP632e)B2CVJP-0F^ zxf-z0GH+29M?xV7%*&J#U<2R-{v>!@JN^B6IX;q{{rNfjJ7FCSu{3K*6VDLKYEG-1 zdwpDSeRJI7&I=v#+6~@I`UJD42l|At$O(9Wfw~G3DuxSAzJ}EG%y(7V-jG!-4tkMS zlgv~|+aYj%gD`?p8vF_<^sxMKXDW6uKZst~oJL&`JV8rSu;dI^e1ol!-;$UBteKcD zip2&St;QO*UKNz$-22)Ldf3LtXH7epSILWRr%{*foKXD-9pl*E;Q}edLWyW0W3W*R zWUg4K$RKm0B8NenY$3=>o{Y;nK*MECwjvm&4r1s-h*yca#d;n?+pSN2G9jUE1*S`& z6?wIR;Bk}$-d!08Zu_PL1bp@~<$-H-KWuLipx-=FyT zc=u!D+0Ro9b>l`b$^1d#@zXtuirHTkkLKS4-`$x|=uXA=?|B{%+<)MC4Ep}bga=jf zx3OOQ>$Lvvr`|g!Mt*#*LiS_+;7Wj4Alcv7t`f zs9t~72TupTbRuCwUC_yLa&20j|_=wKv%FvZ}QFoNA) zG$?HPs7GOY-J^pbzmRyh4i5N5c^-rOCwLwY_$S8yNeAd}xDL|3z;$5!2y{TyeXsSt zj*O2ZHHaA|ZQy5Y9SYgHudwuL_5?&ovWXeC4Xf>HjMNzn;X92XU5!927l_Z|5!`}& zj9S`|7MY-nWy0W;!KC4kFOXLXa*b%eQKes5RZss zPb~w=wmP6ZLkJ-UE=tm$nwPNQ)RHM10?k4BO2s0ANyVD(B9a4Zu@ zg~m55Xd}mBQ<8pGEEn8xplezA&5C*HNN!mEjdr59UYLteSA9WR&2DaB7kQEh3cCTbl*9Ugh9k41k>gO)P-mI9Lg{`PiebrhuvbTmk z6vK^rk}INz-C2F>{0`>hYsL4{DuQ zH8Z|?Q}gy?Wx9BkOmG8!MrRdN`wTRSA!OI8O3ghAax6D0Xc%j>v{q3>nSAG=7`2z8 zkRf_rNZ87+f>0gJ4T&77!|a&K&_evRfQ1eU<%}LhI;ZVDsr6fBod6>mWxdF8-TTbQ zRMRtyl_a3;m=$>tEH8RiZ%$G z!bf{*#?`3uHbKs3aTE2!bwXS50VJAnK5+yqFCG-qZuKZgN3E^;6kUza29z6;G;B9R z(??XaaZuR2uZD9>)s8)IQ)8UkcT@&O-fU4O`d%zI#3mLcRcTbEEYb)gt5)+t;P=W5 z&Y2|xK3TPd4+Fn1*I60g_@Ie-Q?YhIF5>6jKv<4^ewGmGuEN>t|9%Nox~uxi_U^w0 zTdXhz&whUW-+R&Zo|^w=_u-Is&mLmR!L@4y)C**uFqBO_6{Nbr*kYtwmA0@WOPn_o z`ZeKX{i=;D?g^dRbLh|)+V=huL9Urf3e%gYlfxXE+0;#fP&xHOoO+uDsL9-mIQ6iE zOYU8@mBqdB2eta>S5}~Q;M5&dSy;cOfT|LNHc}gLYTf`)XT-JR)M|1ZIdk=K7Wd~r zsO!RR+y!dHubF*Zx!OAQhS>W!i;-n&I!pPUPa0=PlU>R52IC3rOLT7$?_KLUM`>9t zQ5AV*%T3L!Ovy(&spUs;fIdoU{i}BEf$o4=5!l;G%Iowm7VGB=c33R410N^|{FjGE zSDL4-qnCDRhui&*G>I!C>WHhTCuR5GoHFZj+7*18`n!z4#&Cw7(Y(if% zd5VQie_#JNsP0qBz@Zq#>u;28`)Z!1efd@E2J81d3f&iACR@R3UVznXB#4rnL{+JS zGF&%69~3?u0|po0)@q0l6)^^cimI9!k9gcI*CnFowK|nP>R$b$ookbihb=uf42^DR zd}__sKSrZCr$Y?O>tc{dFoJj<=R^P z4ph~|@Y&oD#)uul0-7$`upv*ycv?4Ha8EcO{P@G_Vf($mWT*^JS4E1N*)wjxb!}?s zZU1eHVDCO-;H_)Lse9U5wdtZuZ%I_AL$s}C$5@BrmD|uaQ-p4I_2o3m_zs1F{Z*?r zOSEvb??$78???nwIvuo@&7HsNld78cuf1k-=g%OFF-L&8^K7*hq3As7<1pq4up%#U zljHa#heA1bj8k~Ifj*2a>p7<4o9H72%lhG=fj&~VYzQ9A&@&Gv87Vrzz}LUpy{~Q( z#93wWL+%4PrjU^*=-cJlF$-agKKd8#g8&_a6v+53(hTQ`%8He>`oWtCT}>0n&Fs^2 zgRf7$!K0hO1?tdMwD0-p{1<7mFA3oT!uwEA5>~qu6C2*c=8|f}q9BW2WuWyX<~r%k z?8X6{SW3SD#5R2?5R)bXabGbv8 zu-%!Z;=cmK#_+*LH3aS736TShzrpa5EwD*ai!PAA6olRm@2`j8@VS`|foaR-N(c_l zrg-wg$XGva$Q}@_y;8+jl1A)18?sgX8Ng1-g&=Oa#ka;YvK*CLVRE{7m|XmW$N^!H z7ieZXOfJ<}bGyo5Yn4TVu)Ii*u;N1mJ&~`jXVGPG?L^G`qJ?@1(Wgx`NhYc5)%0Nt z^!F1*!|>{JfwZWq9-;TJ=!J3ZT2Y5qWY=QeAM^BSld1q)80UoMXX0V_#<51gzF^Up z1D4-GXJbD6qQMrAn6+;WL3c0&GFd03G zv2| zp2Mh%3f3-(#GNg=-`4QaEJ3JnSSALwJ6KFjOs0xI?SaAz^zAodhh4UQrl=)u0CZf# zCT=xgeF_p8vhc8-XDvR!3q{8>NTGJen18ASjLdB7S2Rm!n1u8?^14TJj z0<56S)~XD!_j+dxz7$wQM{AV^5+eaNWTauc7=_IL(tVaC)U{e>-&^iJ;ICu!3Dx^N z?o-Ej2Cb>?{eF5ZNyX@icDax3@a1Z-Lo;a_*x}j-vETtp<|FWct2sw-BbnBbF1!53 zgn>q0AZesPI(D$pVXXP(`Z-&xb|F}&ccL?L1L4t;&jH{ncQUCrLBN zNt3$$xbz|+nAG-2E*%P!E48^Q{^e_6riPuP?kDzbW}k>04^Cia4zcP!H3Ptb157(r z{Ei;QPsI3vMkEv@u#6wIW5NtZ*2U`|#C^|iPQ92e2(^ws8wrCy6Z>jz=Y9_BzaFI1 zoM~$%CKziT=Y9b_Z{j^?|BzQc;bUChLBtpG^SQ2_Ag>MNMH7Apc^$aCS3%w`u_!hn z%AZT`!+RWVr|4)_W?X)Olfb_MlJyDW05FIH9s$WSvFK95MCkby?|J-^M108lZrU{G zdot#WaVFAs@5dJEkCu0}DJD&-^Sut{Fw6JFI15SC4zC8mH-_JnGJ9mq-^Q7AqLVt& z8Qpg8CysZusl$N%ZJZ67PH{VY<5+crl=(u&0M%QsJ(my+sn}-TGSC_lH_pe6Z;IMSOeGMwF0eCbEl!-Nu-ADNfqegwY)|Ihr$5)S@zBHXV2 zmpk~;ySN{9hyN!(D&+i6{D=ZunFeQ~<0ewPrln{&!G1#=uVd?6-GhmJH!a1R^~Jv? zIBv+|N&ed50wl>PB$UDtirrFdli^2s<9}z^60r*LWL*bCoao6`8KhqrsN8+_>*w=r zVw~qJId8J`mWk&+n@+who1u%s-o~CK@SyTibyQmXqC^Mal!aVf<5LZFLXV<3lB=j4 z#@HRLG>s^eG1N%V*7&ehrAA2HCSOvZevfad`d{wN?0KtD+_`*tY;4`NRvKTFxqP7U zTgF9T)z(Ur0INaa&0RHY!d3&TPhk^7366-83=nB}Bq;OLHg^pyEQ%*#s^l^?9%Z_RApB@Bo;3P|d5M1s^91Y$J-a#h@GtxSa5w@T| za@?_X;Iw2ns`FIhN6@6;08b^pF1WKq%5oR?Z7XFbtZYUW;{k^SY5kSqbXhf{=ksm7-e;fAz;hKvL#qs4`$ zWI^Zqff+jkRjUH?@&d)rn0a3_3(he!YM3Q!nM*T6?XjUf55i^5;Tz@LAqm&8ok!X( zcx9UUlULMJ;ZG`?k}vyszU#2oSTRx%ZnWd*+{pg8!s89X8*RsNT%&-+M$LBAn8 zD6crDNX2qOV17X|B{i24(mz&|WPcSlpzjXb)#-zc5tUG@GvP$>Hx1P4ByPwsRrE{N zmf${RI`Sa__UIKA+2n;oQq`H^`r!h0=0KxF4+hKtir~F9J_<;W*I{jruvK|kq|lu; zlvUAOdN~aW9D%SsDLLM1AZPfkclgcCk;Zl(A!DYD{eER8zg1>G-`rYBZoA)A8{e8+ zP=l5Ryt`O?ur6j}HQWSTpL=n@BVPVWFpOQ~wXDf9*;x-Ze6?c7N+b&^&W&FF-|*gk zpil%?%*6qP2OKdB|ELixDRWS0F!U%AmS+tLIiE(2)G@mEz%JFHXhGm0a@Za`z<&AT z@n6+TNmG5Q+{No}Q{w*Wm`(B&uSTEtDe7<3?f?+AT({om!DL>Mm#om#&MKBfw=FL5 zs)?}(3%nAfUWwUW>w=`jP~AROu@zp$dA$*oopxl$9xN`gZ86JhebACLMuC>GP#aC! z&nu3I=|p1M7AJe{W#nmnW@(R5_6myS(R^dnh+RBuP$=<%bqNNqo&xp5@@Vdawro+6 z@juPuxCQp=vFI$u2F3$%P#STQSAw!yfTg-L3wGF@;UHvwBl2>H~z zgs)KeZMt(CIub0Vj0AiB@gyN+c(vdI33A@*I%5eS1vXFhFPy}nhl!^c@A3-zkPocw zXj2+h>Qj=0l?YIu7LygDhhEoeNbzonqa!UJ3P|QmBCoU!& zx*|u*oaYGV&@|Qs>+$Hy&(P-;Wr(g@&nqUuL9eO$5KX{C%iw>$UeoWP*=0AReuR)x z&YVb@Zesc@a;bSmLABcF5@RCUY(3B&Lrk*6WGv`w`r9sWFb-ZC1>ojV9BkT+gRkIV z$y6NtA``%<(*+L4!R;>aW*jW5!NCJAum}ftW#Ql#IJkq0gXcx#oOj~jl07&$-VZoO zWa8lVY+Q_BF90i}aPX`c0Fy4`VAeq#Jm>!ohL= zIOi2O*qn!h54gaIIJnRSzKMeyT;OCJEHB2vLoV>UIGB}$gIjQL`du8Hd>H4v5(i7x z;NZ^L_yn)wVDnTQ{K5t9!@<0xIQTsr-2D~~HqFJsN4)_oU510d=5N$Z)O|@=YhKZJ LMUDTTvFG}~f`f)V diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl deleted file mode 100644 index 2e1f937..0000000 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ /dev/null @@ -1,1304 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 _vector; - float y_edge; - TileSegRef next; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); - -RWByteAddressBuffer _297 : register(u0, space0); -ByteAddressBuffer _1681 : register(t1, space0); -RWByteAddressBuffer _2506 : register(u2, space0); -RWTexture2D image_atlas : register(u4, space0); -RWTexture2D gradients : register(u5, space0); -RWTexture2D image : register(u3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -uint spvPackUnorm4x8(float4 value) -{ - uint4 Packed = uint4(round(saturate(value) * 255.0)); - return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); -} - -float4 spvUnpackUnorm4x8(uint value) -{ - uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); - return float4(Packed) / 255.0; -} - -Alloc slice_mem(Alloc a, uint offset, uint size) -{ - Alloc _310 = { a.offset + offset }; - return _310; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _297.Load(offset * 4 + 12); - return v; -} - -CmdTag Cmd_tag(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _669; -} - -CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = asfloat(raw1); - return s; -} - -CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) -{ - CmdStrokeRef _685 = { ref.offset + 4u }; - Alloc param = a; - CmdStrokeRef param_1 = _685; - return CmdStroke_read(param, param_1); -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -TileSeg TileSeg_read(Alloc a, TileSegRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - TileSeg s; - s.origin = float2(asfloat(raw0), asfloat(raw1)); - s._vector = float2(asfloat(raw2), asfloat(raw3)); - s.y_edge = asfloat(raw4); - TileSegRef _826 = { raw5 }; - s.next = _826; - return s; -} - -uint2 chunk_offset(uint i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -CmdFill CmdFill_read(Alloc a, CmdFillRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) -{ - CmdFillRef _675 = { ref.offset + 4u }; - Alloc param = a; - CmdFillRef param_1 = _675; - return CmdFill_read(param, param_1); -} - -CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdAlpha s; - s.alpha = asfloat(raw0); - return s; -} - -CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) -{ - CmdAlphaRef _695 = { ref.offset + 4u }; - Alloc param = a; - CmdAlphaRef param_1 = _695; - return CmdAlpha_read(param, param_1); -} - -CmdColor CmdColor_read(Alloc a, CmdColorRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -CmdColor Cmd_Color_read(Alloc a, CmdRef ref) -{ - CmdColorRef _705 = { ref.offset + 4u }; - Alloc param = a; - CmdColorRef param_1 = _705; - return CmdColor_read(param, param_1); -} - -float3 fromsRGB(float3 srgb) -{ - return srgb; -} - -float4 unpacksRGB(uint srgba) -{ - float4 color = spvUnpackUnorm4x8(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - CmdLinGrad s; - s.index = raw0; - s.line_x = asfloat(raw1); - s.line_y = asfloat(raw2); - s.line_c = asfloat(raw3); - return s; -} - -CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) -{ - CmdLinGradRef _715 = { ref.offset + 4u }; - Alloc param = a; - CmdLinGradRef param_1 = _715; - return CmdLinGrad_read(param, param_1); -} - -CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); - s.xlat = float2(asfloat(raw5), asfloat(raw6)); - s.c1 = float2(asfloat(raw7), asfloat(raw8)); - s.ra = asfloat(raw9); - s.roff = asfloat(raw10); - return s; -} - -CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) -{ - CmdRadGradRef _725 = { ref.offset + 4u }; - Alloc param = a; - CmdRadGradRef param_1 = _725; - return CmdRadGrad_read(param, param_1); -} - -CmdImage CmdImage_read(Alloc a, CmdImageRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -CmdImage Cmd_Image_read(Alloc a, CmdRef ref) -{ - CmdImageRef _735 = { ref.offset + 4u }; - Alloc param = a; - CmdImageRef param_1 = _735; - return CmdImage_read(param, param_1); -} - -void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) -{ - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas[uv]; - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - spvReturnValue = rgba; -} - -float3 tosRGB(float3 rgb) -{ - return rgb; -} - -uint packsRGB(inout float4 rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return spvPackUnorm4x8(rgba.wzyx); -} - -CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdEndClip s; - s.blend = raw0; - return s; -} - -CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) -{ - CmdEndClipRef _745 = { ref.offset + 4u }; - Alloc param = a; - CmdEndClipRef param_1 = _745; - return CmdEndClip_read(param, param_1); -} - -float3 screen(float3 cb, float3 cs) -{ - return (cb + cs) - (cb * cs); -} - -float3 hard_light(float3 cb, float3 cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0f) - 1.0f.xxx; - float3 _889 = screen(param, param_1); - float3 _893 = (cb * 2.0f) * cs; - bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z); -} - -float color_dodge(float cb, float cs) -{ - if (cb == 0.0f) - { - return 0.0f; - } - else - { - if (cs == 1.0f) - { - return 1.0f; - } - else - { - return min(1.0f, cb / (1.0f - cs)); - } - } -} - -float color_burn(float cb, float cs) -{ - if (cb == 1.0f) - { - return 1.0f; - } - else - { - if (cs == 0.0f) - { - return 0.0f; - } - else - { - return 1.0f - min(1.0f, (1.0f - cb) / cs); - } - } -} - -float3 soft_light(float3 cb, float3 cs) -{ - float3 _904 = sqrt(cb); - float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb; - bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z); - float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z); - float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)); - float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)); - bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z); -} - -float sat(float3 c) -{ - return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); -} - -void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0f; - cmax = 0.0f; - } - cmin = 0.0f; -} - -float3 set_sat(inout float3 c, float s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -float lum(float3 c) -{ - float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); - return dot(c, f); -} - -float3 clip_color(inout float3 c) -{ - float3 param = c; - float L = lum(param); - float n = min(c.x, min(c.y, c.z)); - float x = max(c.x, max(c.y, c.z)); - if (n < 0.0f) - { - c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); - } - if (x > 1.0f) - { - c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); - } - return c; -} - -float3 set_lum(float3 c, float l) -{ - float3 param = c; - float3 param_1 = c + (l - lum(param)).xxx; - float3 _1048 = clip_color(param_1); - return _1048; -} - -float3 mix_blend(float3 cb, float3 cs, uint mode) -{ - float3 b = 0.0f.xxx; - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = min(cb, cs); - break; - } - case 5u: - { - b = max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0f) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) -{ - float fa = 0.0f; - float fb = 0.0f; - switch (mode) - { - case 1u: - { - fa = 1.0f; - fb = 0.0f; - break; - } - case 2u: - { - fa = 0.0f; - fb = 1.0f; - break; - } - case 3u: - { - fa = 1.0f; - fb = 1.0f - as; - break; - } - case 4u: - { - fa = 1.0f - ab; - fb = 1.0f; - break; - } - case 5u: - { - fa = ab; - fb = 0.0f; - break; - } - case 6u: - { - fa = 0.0f; - fb = as; - break; - } - case 7u: - { - fa = 1.0f - ab; - fb = 0.0f; - break; - } - case 8u: - { - fa = 0.0f; - fb = 1.0f - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0f - as; - break; - } - case 10u: - { - fa = 1.0f - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0f - ab; - fb = 1.0f - as; - break; - } - case 12u: - { - fa = 1.0f; - fb = 1.0f; - break; - } - case 13u: - { - return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0f - src.w)) + src; - } - float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = lerp(cs, blended, backdrop.w.xxx); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = lerp(backdrop.xyz, cs, src.w.xxx); - return float4(co, src.w + (backdrop.w * (1.0f - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdJump s; - s.new_ref = raw0; - return s; -} - -CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) -{ - CmdJumpRef _755 = { ref.offset + 4u }; - Alloc param = a; - CmdJumpRef param_1 = _755; - return CmdJump_read(param, param_1); -} - -void comp_main() -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x; - Alloc _1696; - _1696.offset = _1681.Load(28); - Alloc param; - param.offset = _1696.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1705 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1705; - uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12); - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = 0.0f.xxxx; - } - uint clip_depth = 0u; - float df[8]; - TileSegRef tile_seg_ref; - float area[8]; - uint blend_stack[4][8]; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0f; - } - TileSegRef _1805 = { stroke.tile_ref }; - tile_seg_ref = _1805; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11); - float2 line_vec = seg._vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + 0.5f.xx) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0f, 1.0f); - df[k_1] = min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = clamp((stroke.half_width + 0.5f) - df[k_2], 0.0f, 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - TileSegRef _1924 = { fill.tile_ref }; - tile_seg_ref = _1924; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1._vector; - float2 window = clamp(float2(start.y, end.y), 0.0f.xx, 1.0f.xx); - if (window.x != window.y) - { - float2 t_1 = (window - start.y.xx) / seg_1._vector.y.xx; - float2 xs = float2(lerp(start.x, end.x, t_1.x), lerp(start.x, end.x, t_1.y)); - float xmin = min(min(xs.x, xs.y), 1.0f) - 9.9999999747524270787835121154785e-07f; - float xmax = max(xs.x, xs.y); - float b = min(xmax, 1.0f); - float c = max(b, 0.0f); - float d = max(xmin, 0.0f); - float a = ((b + (0.5f * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1._vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0f, 0.0f, 1.0f)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = min(abs(area[k_5]), 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0f; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0f - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba = gradients[int2(x, int(lin.index))]; - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - float4 _2410[8]; - fillImage(_2410, param_36, param_37); - float4 img[8] = _2410; - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = 0.0f.xxxx; - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.Store((base_ix + k_13) * 4 + 0, _2519); - rgba[k_13] = 0.0f.xxxx; - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0); - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2618; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w); - } -} - -[numthreads(8, 4, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl deleted file mode 100644 index 1cf8cb3..0000000 --- a/piet-gpu/shader/gen/kernel4.msl +++ /dev/null @@ -1,1355 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 vector; - float y_edge; - TileSegRef next; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct BlendBuf -{ - uint blend_mem[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); - -static inline __attribute__((always_inline)) -Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) -{ - return Alloc{ a.offset + offset }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_297.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_297); - return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = as_type(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - TileSeg s; - s.origin = float2(as_type(raw0), as_type(raw1)); - s.vector = float2(as_type(raw2), as_type(raw3)); - s.y_edge = as_type(raw4); - s.next = TileSegRef{ raw5 }; - return s; -} - -static inline __attribute__((always_inline)) -uint2 chunk_offset(thread const uint& i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdAlpha s; - s.alpha = as_type(raw0); - return s; -} - -static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 fromsRGB(thread const float3& srgb) -{ - return srgb; -} - -static inline __attribute__((always_inline)) -float4 unpacksRGB(thread const uint& srgba) -{ - float4 color = unpack_unorm4x8_to_float(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - CmdLinGrad s; - s.index = raw0; - s.line_x = as_type(raw1); - s.line_y = as_type(raw2); - s.line_c = as_type(raw3); - return s; -} - -static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_297); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_297); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_297); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_297); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_297); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); - s.xlat = float2(as_type(raw5), as_type(raw6)); - s.c1 = float2(as_type(raw7), as_type(raw8)); - s.ra = as_type(raw9); - s.roff = as_type(raw10); - return s; -} - -static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImage& cmd_img, texture2d image_atlas) -{ - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas.read(uint2(uv)); - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - return rgba; -} - -static inline __attribute__((always_inline)) -float3 tosRGB(thread const float3& rgb) -{ - return rgb; -} - -static inline __attribute__((always_inline)) -uint packsRGB(thread float4& rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return pack_float_to_unorm4x8(rgba.wzyx); -} - -static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdEndClip s; - s.blend = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 screen(thread const float3& cb, thread const float3& cs) -{ - return (cb + cs) - (cb * cs); -} - -static inline __attribute__((always_inline)) -float3 hard_light(thread const float3& cb, thread const float3& cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0) - float3(1.0); - return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float color_dodge(thread const float& cb, thread const float& cs) -{ - if (cb == 0.0) - { - return 0.0; - } - else - { - if (cs == 1.0) - { - return 1.0; - } - else - { - return fast::min(1.0, cb / (1.0 - cs)); - } - } -} - -static inline __attribute__((always_inline)) -float color_burn(thread const float& cb, thread const float& cs) -{ - if (cb == 1.0) - { - return 1.0; - } - else - { - if (cs == 0.0) - { - return 0.0; - } - else - { - return 1.0 - fast::min(1.0, (1.0 - cb) / cs); - } - } -} - -static inline __attribute__((always_inline)) -float3 soft_light(thread const float3& cb, thread const float3& cs) -{ - float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25)); - return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float sat(thread const float3& c) -{ - return fast::max(c.x, fast::max(c.y, c.z)) - fast::min(c.x, fast::min(c.y, c.z)); -} - -static inline __attribute__((always_inline)) -void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0; - cmax = 0.0; - } - cmin = 0.0; -} - -static inline __attribute__((always_inline)) -float3 set_sat(thread float3& c, thread const float& s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -static inline __attribute__((always_inline)) -float lum(thread const float3& c) -{ - float3 f = float3(0.300000011920928955078125, 0.589999973773956298828125, 0.10999999940395355224609375); - return dot(c, f); -} - -static inline __attribute__((always_inline)) -float3 clip_color(thread float3& c) -{ - float3 param = c; - float L = lum(param); - float n = fast::min(c.x, fast::min(c.y, c.z)); - float x = fast::max(c.x, fast::max(c.y, c.z)); - if (n < 0.0) - { - c = float3(L) + (((c - float3(L)) * L) / float3(L - n)); - } - if (x > 1.0) - { - c = float3(L) + (((c - float3(L)) * (1.0 - L)) / float3(x - L)); - } - return c; -} - -static inline __attribute__((always_inline)) -float3 set_lum(thread const float3& c, thread const float& l) -{ - float3 param = c; - float3 param_1 = c + float3(l - lum(param)); - float3 _1048 = clip_color(param_1); - return _1048; -} - -static inline __attribute__((always_inline)) -float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const uint& mode) -{ - float3 b = float3(0.0); - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = fast::min(cb, cs); - break; - } - case 5u: - { - b = fast::max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -static inline __attribute__((always_inline)) -float4 mix_compose(thread const float3& cb, thread const float3& cs, thread const float& ab, thread const float& as, thread const uint& mode) -{ - float fa = 0.0; - float fb = 0.0; - switch (mode) - { - case 1u: - { - fa = 1.0; - fb = 0.0; - break; - } - case 2u: - { - fa = 0.0; - fb = 1.0; - break; - } - case 3u: - { - fa = 1.0; - fb = 1.0 - as; - break; - } - case 4u: - { - fa = 1.0 - ab; - fb = 1.0; - break; - } - case 5u: - { - fa = ab; - fb = 0.0; - break; - } - case 6u: - { - fa = 0.0; - fb = as; - break; - } - case 7u: - { - fa = 1.0 - ab; - fb = 0.0; - break; - } - case 8u: - { - fa = 0.0; - fb = 1.0 - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0 - as; - break; - } - case 10u: - { - fa = 1.0 - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0 - ab; - fb = 1.0 - as; - break; - } - case 12u: - { - fa = 1.0; - fb = 1.0; - break; - } - case 13u: - { - return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -static inline __attribute__((always_inline)) -float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0 - src.w)) + src; - } - float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = mix(cs, blended, float3(backdrop.w)); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = mix(backdrop.xyz, cs, float3(src.w)); - return float4(co, src.w + (backdrop.w * (1.0 - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdJump s; - s.new_ref = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_297); -} - -kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d image [[texture(3)]], texture2d image_atlas [[texture(4)]], texture2d gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; - Alloc param; - param.offset = _1681.conf.ptcl_alloc.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint blend_offset = v_297.memory[cmd_ref.offset >> uint(2)]; - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = float4(0.0); - } - uint clip_depth = 0u; - spvUnsafeArray df; - TileSegRef tile_seg_ref; - spvUnsafeArray area; - spvUnsafeArray, 4> blend_stack; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_297).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0; - } - tile_seg_ref = TileSegRef{ stroke.tile_ref }; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_297); - float2 line_vec = seg.vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + float2(0.5)) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = fast::clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); - df[k_1] = fast::min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = fast::clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - tile_seg_ref = TileSegRef{ fill.tile_ref }; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1.vector; - float2 window = fast::clamp(float2(start.y, end.y), float2(0.0), float2(1.0)); - if ((isunordered(window.x, window.y) || window.x != window.y)) - { - float2 t_1 = (window - float2(start.y)) / float2(seg_1.vector.y); - float2 xs = float2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y)); - float xmin = fast::min(fast::min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07; - float xmax = fast::max(xs.x, xs.y); - float b = fast::min(xmax, 1.0); - float c = fast::max(b, 0.0); - float d = fast::max(xmin, 0.0); - float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1.vector.x) * fast::clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = fast::min(abs(area[k_5]), 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_297); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); - float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); - float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - spvUnsafeArray img; - img = fillImage(param_36, param_37, image_atlas); - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = float4(0.0); - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.blend_mem[base_ix + k_13] = _2519; - rgba[k_13] = float4(0.0); - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41, v_297); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.blend_mem[base_ix_1 + k_14]; - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_297).new_ref }; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_49), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); - } -} - diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv deleted file mode 100644 index f9198c3a4fbf266f6ea52255db6b8a9f9d3efa2b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 66368 zcmbWg1)yeC+4jGOb7tsnrMtU(h#8upq&=MJfhi^^sR5)E1f)ahQc3{<1r$&uL_$hJ z8VNzV;rsobXJ2R5+28wr{rqoTtn0e&d)@b{9nZ7RIny!C;?q`DpK69`x@zAUs@5@k zH4REt%~8f&hi!z@)t%TJ$Kt7)sg)SZ+u-J`~8II&+<-AtK{ z_Uf}!rlW{$)4~6yp}rf^N#=pub`9KNhk@H}(KTfJ_^yeQ4jDbHYxJnG!@7o!nlNn0 zq)`VB({H-MZ`9Z!qbCn79s1%?_CfA&`i~wxZU}WhdRY&z8aI6S#9@;vlxgXIRgE2X zP*=Bp%S>K%ef~dq)%CG;Rx@Fn>f2e(44*jaP_PG z&WzO}@QI^G4H@?T8HmB+)0g#Fkh;5tJFA7Lr}n3_S{Occ-2JM>;G2yZy7Qz72Z?jj+qV>d} zLHE%Advfd9?s21Atw}eJ<6D}#J-&`=8L(;l^1tQ5G#W5!^yt<|+$U-tQ^(O+tpuOs7zTF_*?;JSapU#r%bBnmb>03yj-c*Lul{@DrT?pf)BiQvaW@+`dfWsD zVE@%TruM&YwHBQGEkA9pgPO;L5rezCh8UsU*Y(ii`yV@-%yn;_>vd?KzOHNh)?D|i zHfYD)cGTE`6S{}Crn}W&b&s;Y8^P&A?c3g8Y96D;4jpzdwPRItzFXG|#Ase$TG}B~ zHLqGNjSGVNYE$^s9RG2obzkUrG1g6pk+Jp%XRMpIWAD^GwD(xmJ<73e38x>mZ@a&0 z9#hBKS*bbJG2N3|w_|TLrh7!|Qehv|JjyxP5l$ake%ka?^O!mZZOu8D+IF^T zS9sd~$KIzWMc;R(efqu&IDOx(9k>7J@guug^;VzMJf`OFTkQcSf6GsseOL47wlTHW zfPJXr_aFP2tVKWfGF~fIN3}O~_F7jv?iOQ*ZZ>+xdRb$|_&wuP>ov`kqY`%s3D?jA9bx-U-zJ+E&wm;7 z_#);BJuzpiPN`$o7yADqYI{z9T*N)Oi2KtzZhhhWuj8Ip#67(y?yS{0b==k^?Z1fX zTAfwIJi906Y}JKz%=(1>FQT^R;QS))1wC|L;Sa+Sy*$psQ$~bMb0$`~2=#-3XpIWWumvW0yH_`0DFhTIa;| z=tBm#=lcdR7YIMg{6_3!T-VTXbaul_B3hbxvgk( zd(q}D@W}27L-~+4V&tUWeY>M*b7#@!9`M9*!zcYuZT?WSxx1%L-|9Zwbhi@#TYdVI z_7?72JwQEr@|ac!fGzv|)NOme>R~Z=#P+jo+s*y-m%`_9FdqiH62}SC=g}UYj_NV6 z_lCaJ^wr;MfBMxmYV6oy6Y43OI@g`mleQhx{w(b0#HZTaTOctytEXGM+e>gO*E7^! zhC+`f^J96R-88>G9xQWAhBDKdgHQpQeCEh>9t#}>Po8ay`&s*X;&)eWK&xi8# zx5%~z+j|W^d}H@>*}tiKe}?L)-bb4_VF=dF>I38S^uKTQ34GGHi8~G4bm}^PjK*gh z{QFg(gNIKTH)g7bc{;1l&?oA|q2JfklgEzd(_724)gk@9s{PvMDc_6qsj40?yWVQw z*}91Mc?wFrX{&zt4H-Fk?EYQtE0SL^v~3+#Uu>gVelu0Gf`@Y-Z(YlmIdJl7>reee z+tRpVta#w4)p9=1jQ_#hxb534=HQuP)R+i0kJ=xku|ej`};3=$h1rO_<>267OkS{?mY4d$Arb6kFextpiM( zR($)pY#m>`E_f7YgniryJfZs_Kk~MCe|R~D%^G}*w*RW}->PBXrWfz1wuiGliQQ4{ z4o=>^!DZgAUhO)o?gk&y_V39%ykQ^Ni}S1rPu|Jk^??=|@QZU437|8c|qX@h^> z_FpIdUp4ICG`L?%uwLuNzY|`rPrnAAuI<07C%otXObz=iy*Sr5c=pL!;Ovw2z~w&K zuvfdzYU2j)-}dj>C!06y1A6g}YD;+X?g38TeZXbjeS5X*tOobu9n~;+;*16-&UkPc zXJW5*oz>)CyrVi8o;XK<6X$4f8Rv(++I3dP_u?JZN$_@@e$}a9KAU!Rj~&`I^>b@y zbw;l?oz+BEh&JU==bye}O8o_#u9!#+cU&(!wsxt`71u+QG$bGH3g z&-pq}!#-bwf2Zx=b3I$IVPCkx7j66ZT+fzh*q7?Xc`ksr_hV;uU@u!|b#O11XZ2izzu4d}H~6~^{(ggh+~A)z_*V`7ZG-pi+j}of*WfcX_^b^+ zXM@kv;0rYPLJhuHgD=tGYc=?~4ZePZZ`k0QGdHNA;|BE^dC-e6hCYbFNOtcuk*HzP{C4;P&UK_NQI@@NTqWL-Fsd z-h-Fdt@j)J!(O~^^%dOD`Cat~XmfV1e3HuM6FmCertu}AxJ{i6ISpvC7`90$)Xq$~2 zJA72@eL%lT?&0U;yg&Ezvw9!&v0cMP4;wRV>?HO>^l4i9`0hy~Ck`VF`m`;7Zc|n3 zl{nuzb=l!oo3@QhPc?Fw-^EQeCLT-O9$)_dbn5M1 z#njX1>9KW>9Xl?D#F_zp(uD4@6Kg|a%~;3k8a#O1!9BL=TH~Av+fW{M#*7;~Zq(33 zoH{=9Ly4Z@F-5U_78pZ<7Y;k>se!)IEn9dVx2m-v&G)4Kkc})W3xZK z``Wh6QQJn19bSyRZ!71V*jOk>&pfrYxmudDXzsQ)cT00bQ~NUyT6^BIyR@~(GA}mY zVYl}}X>a#wKI~@j89vs``~0Ez+~ zI3_iZA)E)+URzG9HQSs6_PeU=JI~Pz(aP)1r44>rgI`g{$oJ&ez?a6|>o_fvTx(;vc z!J)&(w|_>3>pB{9M;@LCA2;lu_TqeYfcu`Zb)Dt}9Xedx zubKhex^}ssPDbrB2f9}e`!W}}?903jK3{`>r@_D5i}$S-g13H#f)^#pwf1TB` z;Gukj+_rUAtMqE$QLP2{#ff9tsc5$+xSJpCNxLy{zYh%`3opkpz8B{mDLiv@A~@qe zsh3}8b#jBB(%`2y_-Va(M|B1~W4!=8=8!JGhHcID&wBZGR+qxL;u#4i{z5^aw%}{&K}Fjk;PU=Hw3lyZHM|$^s7Aquk7$o?zlLor zJo9^C%dbn?8q+Co_FijVe$uO5XLVXH&N{&}{!78_n0>2XfZN~4x0bCH>zBRSc2+mQ zSy!94^4L!CN19&8onR(;vLne z@XW`z;Bvq9;l?{PPe(Nmd~p2?`Cagk8ZQRU-dYKqv8(}}I+o6A1NhWbZEbyv~;&)`k{S(o;0iz{irO3kBw)*jY=U(!)MQ0tzH zPlBiB=v%!CCrVfQ?%m$cul4fjtlntwf5Xdu`9rB0^Gx8uJUH@es)T(O$)qPfj2MkfC3LJ@RkMMs=!+pc$)%mTj1>q>}Rree>z*A$I7`J zi_N)oeVjkJ&&k0)yNdnH<{_R~;7J9ZTwp)b#sA0xAJc<7Tc7J@#L4Ge_e(y%xi>u2 z{H);7)YuZe_LY9keWh0JtHg7x#`7~qG(VH6%~fcoR&x&G$8Ec%El_CgWBnE>G%hJE zZHYqTv}|e16q@6=?FxnFIiTjdm*n*`mf9MH=I1N5bqdYTRcad)n&VX4q|p2frM7vY z`PoTr%R+N6sBK$l&XL-nLi00|+Af9WXC}4X3(e0;YI_%&^Q*RRq4~K;ZD^snPt-;h znxBKzMz^%ip3ge=drNk8KKo=1!)@c?epc&fVV+sQ<@q7^{BXZ{?YSB5nwi(Oeh%>v zce*)hylH`bj*i}4;OX5s9JZamz$+DalrXT40~y;{Qy6 zpDXb51%9c(uN3&z0>4(^*9-hcfqh0xUZ2l`H!Sc*1>U&8KEK6(V1c(T@HPeBp};#9 zc&7sIT;N>_yla7d=1boF3hXmo^aBfgNP!P6@DT+*s=!kUd~|`2De$oczPP|XC${_0 zS=|Trl=2$wwJ*Up3dlh)^ z9^6@VfxVWQdroG|Ia`#x_E9`HtKeEJLh)Dx?&Zs(J{Mk|7V1u|x_lubm`0yP`L8+T zX{)boya%aiTa>l-{$u;-YThr5Gga$XdGEDNG-H_4`EK{Ovvp5USDOc$_u)k-3z2t8 zYW>vAu@tqM@!V&|TAn(wyf+(5%{f_-dJ#$=it$#W)>qwlD^sf(&$h<%zMXi!bTyuu zwzX=TZPy0NZL6Pl@8xN`A=tKR3!(bEHrgCZig7lEYcq~*jI%j);%osnj@rD8VF0!5 zJ8M3$=I(=qSd}fQZL6QQ*llCJt*G@epRvuiJ$3T!05+eR`FwV8zS{Bm%mUU|-FQ1u zs~OL>#@mfL@qDH*o|?8jsMD7{!E)Q`r`_MXNZWnEwpG*CO|8wmgQ?}V-It<$1a;bu z1lv|k+o;-R+x@_D+v=x%9Cg}`2isQ7eRDurOQp&!Z2hZHZ@8@V4vzlYRj5=e!9PF6Y+^<(q53Gyp zrS3Sdq}FCW+qg%5Nv)69Bd-_NQX5O%HrLfy-J;L0sr9j)J~!1^-Lfb?f2=jz$RDCM zW}Ek|9w~Uc&9v1MXzta-N$vIR#lqhh=3kU^=5^`;6g5lQXt%HLQj7I5&U-ahkInvl zS?le%oz=JSezlMD)X}HS?VtXgaJA%_uEuKSoxbL-@A7bC&44x?^#br2!S+ebIh=`F z%{aET&5GFP0DCOU3fXqP+Gf1PYukd@@?C-J@8^2$DcbDQGSv28`}#HabH7^hEeFqD zSfREVC-w~rfAxWI{$Ck?JAm`yc@zH~3;TYxeWluee9axF?T@Ot=acP?e?D6JC%+V) z{#{-Bd#=U*S+vfAzXI=D@VDU^kM)OjjI^JIWYZV^Gr+wM+P~Srj?eR0zIe?C*8YxT zd9>u0uMAIq`L^)csBOPX-F{Hbht}L{jP`?S-d*z(Ywoom{XG}$TQ;ly%P=wa?~2;) z`MH&o#i74^P>Ub6argmk`zGN>w)w^@FiW1VN5R!nJQt?G-HW=W-HDRwsQ%$UZt+d} zC(g-mwUqds)^4{+{7#3frNr;-w%%-4SZ?C#CZ^I8~wCJzlX(jPE%H3#s@I+^H}8MOl>ar*q^a|d*sx)dK!NCf~PK} z-97OR-1&_EH~PcpXD5y(fa}(WQXVWjt=sE9#v~m2dS1)@j>*G)aTwTr;q}G!J07l? zVYKWgK#Z?$d$~Ukpl17%YE0F-_xK%B8!y!Q?nvA$`uzVX%{c!P_x|Vi34Zrv|4yZL zKGf{L-0u|BGRNn_orBEr`EXoY?SEGL+TV=d?;sq1{BMPO-e$cYg=Y_>_PRa|Nn9_l z>-Mb!ZeDd`Y3m1GX8*C3-CMEK5otMKe&O8 z`gXIdzn}R%(*AAuTm|2Qi`*)3`+P2#;k5cJKM$_vwbU`34<=np_g<6FkFJBC6Xp8d zPwkwm*$=s&71h!&KZkjEKDpogeB@l2%X`UM%z^&yZ4dY8XDFN!tr+&hpIxywrq43I zTQ9lq)x)iQuU>NBtA~%r?tArcYu~Gfdq4Jldbrno-^Kk9HI}f+^-FdjrbG|zdxAy&cxa0G?zi^*1 ze1{%x|9poY?tJ+UJ>1%N=;6+X@6p4peV1Nx-=&8;AHGWuH=pm)!<`S`rH5PlEH|}cj_hgy?VIw>AUrkzu4fuTQBXtUoW}u*h}tv_HgUP zxz?22_wC_6mWL;r7?>dcz%$-}i=V_xs#%KO6ZzKHTx_TX4tYyZG3>9{as)xYu*v$A=ql zWWm*aA0NB<{r)!G+IRBd-Wz={AFjXOwT4^!PQK*6lMmP5?@Yse=J)-2xa0HPdbs0z zqu}bkTaVq^4=g43{d&0h{9ZQP+IQ^X+WmgEhr;d-1C9ek88a@%+IN(!+qxS^YU_q-%ruB9ZM{# z>J0v4TlK_06Ks4xPbdCaaP_0=c4vcatG*FWB|d|w#s568TJl@~R`WU2KKhLLb4vQ? z^Qdio=Deu3?O5l!xYp=ytB=mbC1^g^I6m{s^%MJiV?MFZHs)NH8Dtu(%fX9K+g3mA z`uWVH%{egsm0&gFXG~YY)d$f}zfZdwtY&+kndE8z3$R+*{+Dp|yX$dX3s&jrQ$*NtfES+8G#)qJi@yI;f8&bV^@w7bu4q1;R<_rY&! zJwCU=n|tMUH1(|gZ^3?EP)~n;2R4?r#JU4)zH(pR30IHLU10N-KEH>nC*L2y=2K6; zyTQiNHi+?cFc0^D?T0qU?{lr1bFTf5V72k^fvoqRz-s#X%qtiByz3l&&U5*n!9Jf` ze?q$ls2`+wXn&}-TW4+m0yf4C^ov_d^)Oh?&jDW$Lw-NSSjHBo?IU2@IzL{k9s~Ql zt#04sk5bHSKWuNE_K$;YpLzNlSk3RLGB&kxZ1zLj6Vw^^lVD?8JMO2cpP_iz_V2ab z+A;aP-BV!wY$vu)o}eu=X`$h0%d~BmF*N>OL z+B|IcELbgb@G@BKntJ|U0joK-Pw{^Z>|wvPy-HED-(ts?ai}Nm8(`xa<3nnXFZmWj zUCyno`X@DMsyC_qZ1)y*C-vLZe#U!;n*Ue-;y+%O-lcYryhqJ#uKG8%bM`*9@4P;s zwqM50e0~DXe0~g;XFfjzXFfj#%bm|9iS;>{uW4GJue3RLA5p7|zo5?ge+f3Pwd?;i z^*0m`=jYqnZk_pYK*q?v@8bX|+4u6Vup7(R;B|K^9!N*?|7)2^SN zS+zMo_I&}c`Ly}@RqnpH0^5S%Yp89bO&>oqtJ`0nkrxK5IiJQHi2ovRb^ZMeEEoIv zH~VNYuybSW+$=%8B*nwIS*o^MXDyco8zcK@8L(RRk$iEAv5YNFpOyoAEuTWa2a;n2 zxSHQNWh^U#9ZTkZAbu;s)sLy$tqiuUx?@_FS}lEC6>J~1`FUQxE^RxQpVh$qsMT|? zSskpNey#yFX8O4%SWUm=Q;YxFVCOh|9kA`sAmhW0*T(()i=4rD5Tp#rl z>Np#MjiqfSu1v<>2+VN!u8r7WzYp{~VEgHJ7uwCqv{xI0otN-UYVO>7UiXK4UaRY4 z{7u2?>Gx*f^jrJpaPyo(jDhei!1}1C%>c0Tk@Xu0)=zyVdS;w0!H#27iuReRX^>OkKKRP8Sge=$DixawqUhfhqS5Lelcp>FF|cSzf0K; z?B4Ww(U_N^Z4XyB{|wY}+k1_%y|wMBsvY=`KH1x@$soA;FYxhRvm;o|bJTmFd=|=V z6ysT&YbWZQ$2-?HYtQ3dslP|@urIsScI(W=?qFl&Jl+GWmh)J?3&mLG6{qc9VB2OL z_6B=>&h^Lj+y_m43geQ0AFSqg%ZbwkFXQYBcRkhhv0XP<-Mu3p3^tZ}&VeD|oCDf( z4y1oW(e|X+Z|6>K8|TJ(a(ve5*D$dC%03wmR?9xore^!hz4MfH9trk7U#{~gH1)*X z53DwVl6L!p)6Ra%^)rvpF!s}D7yIdR$T)0y)))(xYxCLd0C1i)#)IYFQ&z$@0emR6 zZM2zd47Iv=B6arGB(Qm{-CGAzA4Ku6UkBHA>&)LFU}I!&9ST;<-jYwI7|Yn=wEY3t zw$6#)*Bl16Z`oVEvpO8EK92slCyoHCWxbCCyWYlfJaT>PuYEVCb#fjBHfPpmikOo1 z)~06r%!hr?x*ZL+kL7wF16R-U$+2KH`{Ol0ZoD5-+aGJ^_ju|PC?496tL@gw_akug zI`U&Qb>p8zEf=3mJ!g$ip)Q|KeggOTM1TFX+vZeiZO+eS*iQ$WPupqK@;smX6zubf zZM5lgBDK2s4C?HqGr^A4+OeKZeGbLLv7TGot+O`gfsK*9bUs)udr5v4#aPA`+jbx^ zF9iF%kyt;68_RzEj9Q-Of{VaD7pUjC;9{`)6!zjk;#~qZrn)gMpjLBTd=9$|?AWsp z2GZtoxO)0@1=x1E7hVb0N8Om0Qme)PYOq>zTmv>o&WC~I_yt@&{rn}^cIw8widxMW z-si6a8#ABlJ%_G`tHiPH;*mmmCehtn$$!Ireub)f2W)CFa&1ku1 z8^?C~+23oawWlw)fYsbb&Wrv14O~4wx7I#oe{Vxm{|){1-g>*3qVDI!-%)#bpVjtT zikkaKoHe);Z2siB3*4N)-=nE#%zpr@S+dr5gN>!l@1R_Zd%#)GvhVkzYfHR8g4HZO z4-Ta5pTNe{=DgiOt(JcN8EikxdAlF39-jxmu5;=0AY45@4}pzc`uqj1o_p}aVAs?8 zi#d#;Ph$KP-0aIEXzKBK6x{5~V`%E}c^qtC%DMU*Ts?hx0-U~>!x;KxE}sP3U;Ui7 z`>55D<7u#3Io@aB>hbw|?Nj#SSv2)LyFLfDA3nR{LKJXvwhml2u?f4 zDc8^EiQf^=Yq@pCJrg#^o$K$+@Dt(QPs_2+g03x}PiFrE+ni|XnWwqHwo}h|=LV~Le&yOW58QsVepVfv&pMggdC}~ve#t){*g10S z$?xwYsVCQWz-qQnyYGV2&bgKA=iHt{Jm=OrbGraG=QhvL3&Pd%9IZ{w_Gi}Z&!#q? zpRX6f<~)_3uNOvBH~$RSg? zb^5g$*nVccR|l(Qy|t;8bLe=pUTc8!c{efFL{m@9wZLj4=vUgU4Np7!Dc8?D`JC)` z``Y~Z57%1TdSJ(3?K*Biy&=Uz`$o0hI{n!gY>ceqCSbL!qkMgev5YNF+x}qNW_~vX zJHP7oP44`g+kV*II_)hakN?E2c){T{@GV^SSQEcU~^Nx2vrdWGg{GdE z`-0URbJ}&o)6Ra%^()uccVWhKeTQMo`VIxlwYk0{z**nnV7cq-&(V(rpGa*RZRQ$6 zt)9N@4_3>(lC~NRSJ!_(YPp}&2Vffm_U{*{Yad0eCLT+j{W1>hd|SI;4xpYu@o=0I zYrA#kZW7oS*)Nm9YS}OH@f2ejTWnkVbr9ITB-X)TV`Xm~0(Nhx>*M}86s+!d=f?N} z*f{Er=Rj(;#5@da%<#i&ZVul^9|8A$w7NdVJQA#)ejEih?xpO9^kWKKJR1Is-p{9Tcsf={8gjW%V!x8;71&pBx7c@{hu zY&-R|I}dE^@>%qJxO#jp02{k}ro9kNJ+XfVww?ME=H5R39BfW)iFFa!e9IO6ycn*Y zc9(!{tL}W9L9Hgflsf11WnlMhE+4OlJbwES|4 zv5YNF+iStLwI9EvmgoDz>%e|rsGjc&uLrB^{|jn)*5*dAd|3o8jtNhg-n5Q_p<<25dfUiFGU3eC4{|23L>I?O^kj>;7A~dh-1aY&-Sjy8~=a zZI1m0YPF30F0fiY!~7o1)U`gtXls6kxf@NNKTz^Kdk@$)7VTqMU*84X3)W`dJE`U7 z9nIq32mVvd_5CwgAIIyp;(oAmQr>$WfU6%?$A1v4Hi$NZX!8)*IO_f@7X#4#0ydX6 z`|(F=HT$9cuVA$us_9z4*YpTj&Enyh{P`5yK8&W_edg~#JqrGuT0QIZ7}$933xDp$ zbMJAuz8=QWM=fps2DXiBn>J6tZDW4h=%bc4Pl9c8e$nPBxNXv3ebh46XTY|Rd;a|$ z>^YQ}&w{-Vs_SFlp98DsUh_QIIO_K6X==5^d;x6C@PE|Y9Nq_CgnQ3X*T*$@39O!e zybLz(rHn28cm=MW`{1i!?}O^;+iPHBX-n+a!N!_W?}s%;0Iv2Ht&NUgL5DJ2rTzLxFxnvz(1w7jW%<=PpvNglseb_ z&%ln)+Ux!o)L&9O?AKSd-8y6a8f=VQ_rC$F<+?BboMJ3vi)}m5p!~2``QB(6xUuY) zKYuRIeV_yEeLy|;fljcx`@wj9!NycK#<#>$%iib*cI>$qxL>D5Q{RErG{$sbHH(Mm zpY45zFg=F=s|C^2lXoGodBYd3d2%cQ*GE0~(M7?= z(PmzMZ$T|N76;px@Fi-V_DjO`QIF43VBwm*KKsjc}euqv89<+H$Q zbsO7h&$GblU~T60_dMk0^;uv|aQQ5-7F-|4HZ5`120OO$SzsMB^*jr#3s%dsz0 z&jS77`g#~gAGNgE6l|OFSzt4`ZOm^QebmxsbFgj7XMru?wn=~WQOj5df^8#b392o@ zUPlviE4a@B>iW26w+5@{e!C6WIO_Im0JU0TZVNVM_;xinhtC4r!@cjS>*E^i09H>w z27!%RJ`3y!SI@rQNzTPjJ$>65Y%Fbwy$je_Q^;?wUBUXPr%&Gl+b8?(nB@9-FY~_U z{m43F+6|jy%zb-zxLTeCw5i!X_e$?;=5ufFi7of^#M}!_Ju&wNtL;HayM4fEXFuio zmCpk2F%Dz;EYOWD&jR~`<=T7}7y{0-z+kZ4XMr8D4F#V=Z5wUo>Y`S+eFvLz1o$R$ zX&VlfYqR|*u>RUcQp=6)@0{-kZv8uN)Y|kJMy)R1pW1WBF^vX0_trjVjHMn&@z6fL zwp*KxcM zIi#+S=g=Ww_3YC_!NyT{%oC~A67vUOV}>7A^YrU*xcgOIANzF#SY03caU|Hd zJ+hxpfvfrbyg##YD%it!SK59;xsKv^#pXDTI(z!`+Gg#ZK7;y9iidqTtF~LG4`+jo zkv^OQR?D82|CC}Z^NQ2!_~J0Qz+?SUsOpF9EB0M7y-s zn3f*xvRdY@?6!b|tko@3(ZRx(eKf1nRyob{|{~zJTI5 z9h>dc^vxKr0Xyc5@fTpnsII>}zbE8cIEU70|4Xp#v#+iNt7WfhQ*-^YzOIGb{b4-! zMB?25zJ~nyUAP#hO4Wkzqi26q0Kos z3EOYv)b7jl>sB=N_}o_e8;x;_Kw>mzEEoC}YF)jXm-R%=XEkM?-2mFLmlYOOqv zoIrcbNM8-wwy;#fjy7Z^ShT%gCC&yyzTgGr>1Yl_zbulJ_lFJd8AFvHO-p4KIM6IDm?L?2YZfWU0#5zC7w1l*TieZKft;6C&r6t z>WT3ZSk3b+?Oukbo#T}2XTNgK@ORLR>2ufX*z(-<8d$E)=dOQ(jb)rSz;d6By0N_p zzL45B+VpvqT0Q-J3tV0o-iE7{V^)j*JGK9H%mI&})xY3sxqrO}_VE6t?OlqRYbQ>e z_rc{g>jSu2dCmF|UiR%HxLSG5`WUWWUb8-dtC!cTPvPd!mg~`H;G3Dd@|yKIntFV` zsC~+7)|Y7NxfXo|_Bx=Rb^jV{EN$`o2CSd9_I;cA-9=7dU+pRO_{j^hZ4VxCMmTQ=N8j7)uEl%6%z^+BwP7hbhy3YvqaNV`d zK$)Fl9C6~z3^q>qEO7gj`|qsa3mC6{+Tu4GSex_U-yD!TPV<_>T=AU)>^z3g2{%XP zd@is)>bYLc4bJsSd)m$e)|R~TGOzjLJ3rXI>FeJ;kb8|BKzjd1t?#wfwflDx)Y5hV zaC;uwpKBL{do5Jg#~A)STlKVA7@Rgfqb~yYvx2%lOQ898Z`JcW?%%#Oj=KHu?=h$) z=Hg&uCg+l1`>L;ht3e){e=pbe+Wfl>@?7JV1$(V2zi(L%O8-aIWkAjSIQg^-|sADsSXoExC2$7e&ZeRLg-A=jrIlVg+n zJL;~5wLHJa{xsaSo6s)Tul`_tt-XG2M!h-3!?s)0cI)(Q0N5D01`PzO<@zPxlwvGn zi*4)ogIj^Ota%4x*&6Kk4T-%i+}QSW8)~`duD;uWeFvcK`m1dZHikC8KlI(<4q)}f z90WF|bLl?b3G6;r*Jl8le?t+ofGY@ zsq5>ySf{`HfbDP2tM7x=a;|DqvwilA>-#k`vkt|-=>~26o)cB;_x9Ij4ShcIZ`uXV zTw^Xt)hrbIF&lOM&fDw^(l+VWoN(LcJeUivPtF6mKKh$~?%MthamDGI&q#AqJpB9E z_Q$_tGiQy>>EF(?Kl4!A-rwO$dzP;C?`u%;_Z6qM_iy@TZ1aO{@4sn~@qCvU#+F+z z*x*anJpEq)ZcO*TZRMGZg}~;}W*oWKBFDBc*zws$+g0RKUj(e}Q`#&>?cuY5`l1vy z`z&_Mi`QH|d6x&9H}~ij;rV+m+TynoSeyNGT=Ljf2CJDvoOyN47NL0f_sFv@3)a{< zTd2UkPigg+I(uR@;^|-B<5ovgPyRK)#>xI(6RwZC*J%5(7FgXriql7bhId(t$5NE^ zap@W_PQ47pel16xwO*GP<=SslaQ9ySg6p?ggKtrA{kJN({@WDXzahAN%`-3S!R=Q$ zFYBYJ=R1lG!D>G5yKeSbE%UZ9SS|Cm30Td2qL1^oDcHHNyv!T#GL z`e@U~IZ;o(0pK#EEEqly9t7T620jp(?eIKk=?y-I0&V_5@nB-!|qkqd1Q9S2sk zy}zq{0LAv%V{(0L?>?GP+udX0>@j~KFS|3J8T$~|)sntJxk!CjW?w3PjL-4qs|%}PmFR6epGPR;FN;vcWQ&5UU2=-EV%w>H~6^)*Z;zT>;Llx zzqsJ~UtVziuPpf0;A?80`8)yMAHQ->oQS5LJ#i9PEqlT_QOg?r7_62(aWYt~+!H^6 zZ$ci|*g2DnoiF{fCr$&qC-l*#k8`e`d_M)3`Obi=4Xk~`_-+ACN-vw~B^J^dT{S2IZu9@6eu9yDFcM;fp`e@U~HCIo*OTcBm%iwC4 z);{LD0-SuVx!hP+QtO|5SAor^k2ZbWGwR9r3$SyXJ@HGh`y}U>T%Y(~3wDljFTM_} zW_$0&*Mn`J>!e(tw7&uDnA847uv*&x3T*rAH@QA(|7);gNc)?>YH5En*!Ig&9Isp- z+j||j1+3j`ggDoTEwOD&@z{!zYsA(y-hz4?ir0wksB?|DjToM5;dd0=bMp5E*YEBI zzqjD}-&b(`?{Dx23$Fje1=s(P27j#J`ae-{{hw;^X9}+W^99%c9}WIe!S#Qw;QGJO z;BOXO|9=%+|MwdF{etWNal!Tfw81|wxc*-iT>ozxoaO9UKm9uku7AGz zXDPUUBY5_j=lr^z{O+SUsI#Yk3s%oH_jh2moCEGLwfNr&R?9W_F0fj8&HV%1z3VyP z9+ZpSfBNSdaSzyQgg)BzaSy8}-ygwczCXd$2G%}all}}&KKHQPSngl_lkWkr`Sj7I zkLQ4T@;wAD^Zf;`R$g=e3Qs=I0lBf<|N1B2qhRytqfH;rA@$^Y99-u68(git<~|8e zKF=Y!u{?kDPrj$Y=F>-;KAwZ>$@h0~neREc+OxHf^Z5ce`8)^Z#`65rKlxq+n@=BY z`aDCeo_sHZ%Y3iG)n2K6%=bDt`8Eks-J^4NWm-#+|t9@AenC}yC@_7xB8_Vm5{>k?l z*nIkE)5mL&dh&e%F7tf}SDT^sG2hqVr;j#$TGv2u@=Xse^UVlX^WW@AzL~+v=QU7n zEU$n1C*Q1K^Xa2aAFrY6$u|f14#t~n;*qHwiA#2-lf#lW^vH|OHiOHjO9x2 zMGWun;p-LL=YS0huHVKD-oN1bZ(eZy2NZmI=5))NJ6`Q;!|j*nMCNE6H1#|;tP57l zKCsVf@n0XTmgj~Iz-o)vF};p#1a~f+Q^zD1J0AUWUT*^Syw*pXKF*1H@@)z(^KAxK z8(8Gq0-k)%iQHJuhyKYo5NtkuwCUrVswdx8VEd3c+ZyaV<~kzRC;r=jjd3q??02Qx zfz`I9xbOVEy6wU0_uwPn0j#D^;tT>CCvA2FtEJ6OU^Ux#P7NZ?&R}C{v%mhEWxG=B zZ>}A3{mkRO{a$T%Pm8ms_rSIf#bZxO_Vivg-ko}HihKI|)Y-#(5To4F`xM+gx^Kbt z8(i?A;9)h-eD4W2Pq|0;LQ~Hk-5acyJ!&7+GMC>6t7VUNfz`@A+6{MX?g9HK7u!Gm zvuB2Y-81@V)5kHYC*N?eTCOuAz-o@k>(ofFtlQ8eN=6CO~hG~ZfwIS9)l@a zlOZ+UmwG70H5pEwH5pBea!tk;+%=h4aQ!A1{2=hbHP4!ift#mXld)*(S(9;KwXBJK zP|F-009MPIOaQBuYcdJ$*qmegC>Ps5{j(+qf?X4RwCUp*)sycKuv*4&DA;jj4dnXh z?>zsYwz~%6tbso-Foxo>A0=zBe~m{{kEXZ=W2v(SM-Zc2gQE-X8vL-}`W;{J6Tv6d zJpDfsZk}=tjzUw<8cYGJWex0uTIS>!uv*sOSg=~T2FJl2n{#R(IdHQ!6+&JYNpN^)UIsPeF%{jK;-tW$Y+ZX4+9CESo_0OE04R+4- z(WZ}mRL`8916I3_Ylruf^T2B7QsQ$ySfBLi0hr*M(r)YI8p2nNQy}97^%< z-`CB29$aJR^AL*j`2%X_)A_rM7@1GsYh6+E%-=d=uYudH9Op04)HBXsg4Ht4#qhrtuAXsT54JCk&75+v`Sf+1j^ReI z@3HmK<~Y?dPVXVdQaq+mGR~uG>^P61IL;qZXPmbZBjfb`etXR`&YR%2E5~^=ntH~0 z3s^1V+ztQVz|}L(+rUSoJ2rF5#pcu3aXN-=S|4qWQ!V54JLXd;9zUXFoIkFy<2;$-IDbN&aXv(hjB`Wuhijg3{uyq& za-8?0sb`!IfYmb24e);uuAXuJ1#Dj&n>poT^Xcn29m8M2n^Nne&2g$_oPNgoDaGSd zO2&CwjUDId6vuf6b;kK5F*43A(4Ve(#`zfBcI7x9M^n!@{{~jeI5)@t3AlR3`4rf` zI5u<2#pcu3aXN-)z}r&mqs?)uWt=-=JBQ+NCMDxMtHzG=Y>MMNmpbEokr)}LpDACi zdB*u1+;-(SpGQ;AI9~v(Wt`jN{|~r&#`zN1zBo2>%Eji>*KsivI^1^UINv~1&p7`HR?9fM@P8Ao zo^ienwl9v&oN}@G^mUw$;a}ik)cR<1oN5`T&nlNtJbq5eI4`QPKW$;V6}{M1pXhw)ichI!55)BHgn3w=F``4I)+cdKHKV} z&2g$_oL6AGhT?H0CF8uR#+OrHO>vyRpmv;&?+apNoYvnIe0F%Bj^6f;2Jc&N{iiFq z@n&f7nF_A|tOeJ9_6DD`;QG%~aQ)|N@b45{{{;)K|H2KvXu3RdD}J(`9R( zeefmo<-C^r;43usJo9}GR?D-#bFStd@VW6@uv+#F%iq$l=Tub+cYa$oB9{Rga zjMWMDxm6!+`naa*$=45D=9?C-R(?-2Jv{kbQ@OEP>j_T28Nue$N1HzG3H9Wg8C>R@ z1+I2}pQ-1;Z1CiBPsokse$YSp<^Y>dA8q=$r__^gE^wJ|Zn)Z|MZS6A$>*Mu8_WHq zfAY-_HlIG)^l?wBC*OC$WxfUAYM%G{__<>tc=EX?<;HS9>YsdzfX%0mHhtXF>dChl zxXiaWTmZx$!k%3#kW&l_{f^-IiEz^-dzt_oI*eKoM_5c}$2wd7m_Y#a6DToY{0 zeBUP5FEQ5wJJ*T1HdxK}emArZ*!DS(<@%)kx?tzd&q|l!zaCi4_FjM22ird1q09BL zz1QyzYP;8VajxyZ#C9Xa<2s7h_G_uHFYpauukF90HlA%ZCWb!Yo51y(LVvXPhwGyr zpH0DKo6X?1@o%-*W^=ec>i#ag+7@7AX|wP0^nD5JzoB^CM6vJA<;?}Y1#I7MrM4fo z*^(Ig+(eG)sJDizZAJN%7~4{N_;c3UwxMiKF^<@N=qp!G+#SH?OWYmdYJ(`o-I?0M zxY~B2>`F0?*tq)2)f4x7VEY@s8{9n3k>lANZaek-UGqJ_w$qlsX}l+xQ=;`*Qh$BS zscugD_bnb`=RjP}^>1nGQO@;k@XY(|VB7wVI(^-TIQnExz7JQ+oa_tsa89&!Q3g|t zBQEDeJ#mMC&6l`C;c8jK;b0HrY8yrwNinXtTtoH59R;?(ncMx~_EkMT`-9D$xgHJI zPd#%z23*dyK8ZONY)pOZhunVI=ghzJQqI5U!d(=P@;to*?)-bs-bpde@2PzTQ#+72 zK5K*@Tyw`_%n5MY`Fm~hiEy<8D1*4`t4$IYCFka3H2t+F-a%md&0W`E*y7}!4Nx^XyMKlS(=QTzOkaZLdqO;J0Fa&_H@V`|;@ z_Q$nQ%bvHN_fR~_IsXGZbAC72Ilq@Wb9@{zob&YMc(`r++fv3n0q)*dn4&FyC)Iu# z`;XviCsH!!r-0Q@rlj3Zz{byfoC?=ZJ=f!(g4Iu_q}>_dvfY_*{nX=gR_&8{JQuEZ z4khz=9$4M>j?MLU4%8Fpe6YE~FMvCD8S8~`HP<;~{W)0uXOy(N2yE=|i^0ZCn@iyO zsAoPd1sh9S=HoK3ebSa#mxIebUI8!rcqLq|?Bi8%`=>2&t_GL$dJSAZ_4xdv_DR2g z2{(><`h6|fw%X!%9oT+qORVd`_A_gA16-}#>o>yHZSUCK>uTBS-be1Ec>Ix){rxAf z`}@z-K5KY?y@__}`3zy(o6*$cb4%@$=YD;DgQgyzTWg;@tLt+cntHCIw}bnjFH^VI z9@}pVoB8!k?B9W#eY^urJwA8VK4l;8LQ{{=?`xm3kAFZ@Pe1Mk+s7p-w$~opJ%!Eu z`nr~m^9;D9o=04haz45#gKS;^mcnEBp@V~(IIiv2w z!(e^X?Snjh@LKgS#p5AL_QYSn_TjJ8_Q5^zDDBGo%wq-D{zQYnQ1kTbakz0EgKg#h zex~#EH?VV|uH9T}S)(Vxwk`MLQ)ue(dAjx~*ZLVW_4xd~_9@r;Sv2*m=X2n!we7XX z_IzP8zrOa@v1?17e}J3)eGyGPJ}=ciWq)5rQ;*LpwNKgKSJBkdr`Nzae{8Qkw$}@r z`SrEGu8lU=#`%8(>=wt<@IsinkV0^aARa%W`i3i_Bm?1 zeK(do*T*@*ULV!79&>?>p)GSWH&{I-b2AT``L!G0zNzK+H zc!J`5I0sJ__^ARvUEpWH&eOBh8TWUItxxy@aAW0MSP-s{y63{e)E=G-+7_ZLM=`EA zaTWpFkMKp|#?5>!2G>VDK8u6xqt9bYP%i=3Pdz?Mg3JCc1ux?+4cA9KpBI+_8%vw> zZNJnKb6K!?wHaIPK6R{)%kib1*WmbxOF#X!*&l7?@A|vPU!ZuD`}jF{*6?|-`Tjwj z`~14Z%szIVH)!yU8+_9S-?_oR*Wi0J_}&dZyun8`_~-^7*WgDr_%RKBT!Ww3;O8~? zg$;gDgJ0U<*EaYK4gTu}zoq86=PwU;jml@870}eSkci6K(Ew z`MS)o*Tv1jUK{O`d2Fjs+HL{XmfQot8JqFs`ow=A*f{au5}dK=FV`pjTY+2sWz9WT zx32vioBncryk~X$!1|Yc*&R(?f3G$2`0okM{AGN5p{eWdwMZV@KH#ji z*Clz@`ukwl+CG`bw)&)P7g$?z?+eb@%q7<+{@q~X#D6e2W7A)*PyB~~%^Clp;EYXw zxjwG7^Y2_c=kXr_PW;3iiKcFQue1f2ZIc_^BCe11^-lzaU!H1(W!hl6dW?s<14 zwTI`Owj(H~Q_L?;>?z=kC;5*?Q+IqvQOo0hEV$XOv$FZ+H1n!5hS zQOlG2Byje^iC}s5!jHi21^Z+k+v=0HKL%?{?vue8o4Mrr#QzkqapM0IaK@&;T%Y)# z3N~l_PXlLc`pflkt(|}8+BuK^Pr>O=VxEDf9-lL7pK`v=LQ~ILpAEL1x@&zdwTEl1 z?HtN4Ddra^_W9su{tM95?f-ey^7#J@objc%fVhnAfAJ+x}W=dHin#r~mQ)6`H#KH&Dyte-k+UiT}-L z>iYkhS{~bPz?th?!1B!XtzhTcKAFe1`lRh`U~S2LJ2+!Amt3Ft{}ybV`2P-^vFR_@ zC;oSU%^CkY!5N$Wa(&9VcFxQ5%=1mYDz$52ZG1ob+yyqCpJ&DSJgerp-r&zQ_=^qxa>0FWd$+;gZ}5*B{IdrCs=>c) z@VlWPpu3vETZP?(O6kPxQ z1=oM`1|L{({kJN({@XP8_665}hl1KzpLh+2ky0dDDLZi z*zN_pC%p$e0rvQ^TC}DA4eocrf28*F$)BkCTA;d*n*UdS=0Ak4?x!}^1Jv&02dVwc z_z?B<)PJG&=Z+qxo{{>m)H6{(LOnC}qtvrdKSn((_2bl@Bj(TfH<(18M;U|MbJafU zYaiqLB-ryW{3*C?^PcQ!xIXHMr&h*utZDx&*m(MSZpveO9-Mx8p33vi`USA>tZkz$ zxzx*C&S&Di1Wr89b9rp9$m@Q1&dZbQRj}{-ZKEx@)XQA1S>nA8F6ZeDxVinl-|N~x z;eDu=r0AL>+f|@?i?)b;mzD^K1p!R9rNw#4}wY~J!Y?i)09<9MBx$KR9Dyrq91Gb7s)Gr;km9^71;8PL@A_nsn8UuOjS z^F+qcmN+wmn`<)*n!0hk7s=y48@RbPv!ki&@4ZT%ymNrfYaDHfGZ(nIHglt?8^?Q` zJpS{7n`<*4n!5hp1Les(KiIs+(U!IO4%oS{U;5bAwNWqE#%piR#|5$3SAD&=%41s? zY`?U5pOxDO&;LchUQ=wNExFXoTwdQ3Z*gqNwHR0)+mhhq@}4bEuBE{DkjpmOl1shJ zO3pM z^-)ioRlv5%yRB8>Y6oxwjn8UuBQUwZXDzL zOr<^XHw8bAuRhu`R`qhMJ}V^m7TD7F&B5~I9sqW%>ha$aoWAc3{V3`{Gzv5Sp(>A4_{f?hD=U@r)ABOIF z}h`_*tJdjqu{nzPd>H8YWByv!Pfy=%h54V5niLI7c%|5us@hkgqB6>NeC&7)Yo>*#SzuYJ3*N?&F zJji1^8C=Fb1#WEh#8xX~yZ6%mRB$=w)8MvO&sfyre>%9FPk9;J{T$yjz-8<+;l@@^ zY_<5G1un;OcI~g8v8a{#JZ}=q*zpsWex6s!b3WKSa-Ul+fO~Dqv%`gOHT}}&XJE&Y zHa~}ZZA+Vr;A;BiIpt!ov9u-5CE(*`)}y!NEu*MZA^Uk~?MnxDP60j!U@ zYj7>KTH5~#ocVK{zlM*Zr2S1`ebjA#Beh!E-vTzK+%@|RxLmVa;cEJ&&23=gq|NQ% za?O4VSJN+R_B*h#wAqiFsnrtW4zOBs+zC#<9M@fNb7nn%57tLL>+}b(aXg~kU2Elf z-cxJkdftm>d;PT=-!)S&@5LXs!&mQ7doO;M`rie9zsBC9KA^bXA5rHX`vmc{s8vOl&+y27_|El1|>*OX|_HTg(U#Q^rcaa8Pyx_)J zs^+dMu)2?Y&P)Ej$Ng}%KU1#ZT=ky*09gG}7R_gdhrk}b)6w=IMa{Vt8+!ouzkpqf z{=~6w55rv#<78j`6-_<&(MP~u)6~7@80S&2v9u-jV_;*s|MYnruAjR5L~eiVSMomz zZsvaqO+EYg8L-;Zl>D63-@*1to9BvqMlF4L4s6`~n=jA9)hr(Fb@z70`~tc$a!vgQ z*f#3McMqzi-!Flk$8z3YMpI9}UjeI?{eBg0zqJ|nMQXM5`*pB!)9*LnY8H=XzyFDD zjP(0Wux-?h@42C7eD~JdVE0yjNBCcGwRb4`d+w;|@3rpVU}I?WyphND0oZxb<~bx! zo{zxB(&qUj_urv1=EvYysf}kl?fQA1YD@f2z{U^%6s%9y;4^S@4L(Ox&vz(afYnm` zH`DFImuSxAN)+=LTVMNOPVE`X*WmJe{1&eE4Ml&i4{GsuqSbP)_Hp7VIafR69G!0?l%S>Rkjqf#1-F0+5Gd~N0a}Ig0 zkjJ(#*fn!ry>G}f{zbvfIa&-&UH?U>SG{|Zz*~Hb1=^PG}L*n>yNYZp843c)p+WBYz9|r>G&De{x~1D)t>p-9PGYR&wOkF zwoy4B1JKkn_JLsAdPLi@*31`et6D4PV{0_q>#yC|_RXByGOlgFYS|~-!qqIzec~R| zeLMbR{}yi=Tf3!G3)YZai)FTb_Q;h<#Ry$4u1rdphcwYwX{o zorU5UW}|kmxJULNM$XY)ScL9^s}Cu-=k)#s_nCb{!4Cr;UUS#W*o%|P&+L1G&28>b~ncpt3F>YY|KAZ0gR&y@B$IJJk7|YmV+d5Z+!Hzw#hQf_yzZ{d? zJ+ALCuxqYw&wbe2kSGK_5&~c+fRwrrp-H(w=jBGT3XK zdd}?w!Nzmla*iGZ*Vn^7=%bc42ZL=huxN7#+&1R7jXr8=b12w0?)l{T0o*p}uRdy7 z!y~}1Va9$W*ciriZyp79|LJE=xjybM_nLdeI`O7}jptfXRY&t5wfsF1ZEEHIb+6gp P^X(Y0`8*fJo{Rqns)KQL diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil deleted file mode 100644 index bacd92543e7ca38ba81394738995e4013604ad6e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14600 zcmeHtdsI``w)f6!CxH+WUhWVe0R&N_B%p{;(*!6F!8R%)K57$!fQlNQ)tYJ^5I|#z ziWuvo@zr9hEvU8V(Kdl7sHkXd8y{_hMN2L91dg?#pVxkKhs5@tbH^RyeBVF!kGlui zS!?azoNKPR=W8vJRh*H@e=NMzf2;K2wgn%yjJ^`Si+~^qZ8CyT;rk`{6u@Ubd@Aua zJWQn{NFjV?X6FfMFot+;2$DB5N91jKhF<%pzHDZZ&;f8EPV-+}f51ig%Q()93?Ez` z6h4@@4_}@!eB2-Jz=g~D*Xy6hiSQ8r_{MR5_=WSwKln5J@{Qwi;GcgUC&3v0@r~oO zzH4Tow6GThkptxr2$xlySAYx30zemxXJlHxi)J8*3 )d=HOcct+L6Zj*MB^H5gO zHVK?O#mf0ey})nubd=!oA#n$^N+7qUbo8BHl< zq-|C(3<|8F!G1fTOckk)R~JOsf_u9B=GP2 zK((CN?_%~y0xJ=8wUa7$QWN+`_tLr4wX~Y`X6htU^*l@UN-OoPChGbG>SR7eETpbo zSEI5}UzTX4KSmG|a6N~twAQSa*Q8X`tgj$JJKz%;b7?p8Ex03rZwaX@E!0(f>QWPP zg|D&B*H|H^;(%`Sr@z3~N~mwisjIuuhb-#aVWgZ19EOn(;gN+3n59l8@bPl+jAZWj z`74NJc!|vN_!6`c#(LVYQQ{qJFhb(-47?-k&C|S~l6Rg&2)Iq-m(?_iNR;3$xHQq% z$z-di3AfDsel(4*V|WQ;y>J@F`V{^P-q_%32?Ac>xl9yrtRQG6MVT=UyTD|m$LCxm zXNFIelP%cggEYSbsh2Gweg~1j&GRhosSO9z$y3uU$EHrue9T6s`iBTL7dc2$IYQe< zu&BVQ*Mps#fL}PNz?4l4&YFlR{a2-vTE_0!lfPjVY4?gV1nB|)^gSE4jVT%9_wJbA z-#w#UFUnu#ugt&AA_rG#ck>xnQ3XGRMwTinm+6G5TmvaOo~96@g3n7TRVDGX_LI@k z4b>QGKvKw8$qI&5W8YP9?JBuvFhyV|vF+BZ>Evej_M_Ss;MHwdxGr87Zcf;&q3bwR zgk7gjH=jK5i7@SS^BH;@YZ?n(q?eR(!!DntRay=v+ywUa-h%N)!SSxG%V2sLH|*LY z+VbH?6?{~1^G@5f%P9g3t0LF}3PWDGW61dM^lOY1)-?>VlBGxr!bRH{UGhv7$~7Ed zbZfHa6~B9z93AjEy-l_hb0FLlmOWg)wD~{+X(G~T4Zx}|U8;WYcFXy#)h%z=J-D&+ zDhRbjX5Kr$_`4Bz?-pZsM{sXPd`nie*4AFCKC|_J2VgXC?#{!$y+>Ab79T!y^#)F~ zHS1k2KH{kD-q`~TtGB`6JM`B%-+dGD?VkCZEoUc8It#1rUTSjknZ0smpM}})WV-l) zPvn7aOW-rj?<84M+r_eP9&XPrdYC;aj`pFc=Zs_LrK?BIpP5FljW1m{eyyT%?ZTBa zbqAT8{Z}I9Vsm!S|B^e0bGHE*ooVvL;VD(YJ*5*{n~HJmOsv)_bAqh-RtA6!hAjG4@`M}8L_;fRiGQ$&>s z>(bBZ)4MY#pCY|jj<9PZFDuZmO_`HBGbfup z_ty5*hRoDjQhGysB24;Bmwt|MP#i z39~}73e&94*w2>wf2A6__5-zWI*Xk?ncV{;^qcsIKbugHh361Vn~am_lW%Waao6Bv z0JrDPg3sQ#cbbyb{vf@6c^326T7CV!)0{%Q`!f5RtXHyrLi7NLEPgRU#+i@!Y$@?J z&iu8UEJUAL8-Pu|^#jaExsDIc3}7i>3REIQZi3)&2mIT0z(}2Rv5sNi zE}}?0wb%con!szz@by2kCScpd>#xhjsgt=UD+nwgzTN#CVvb1}m6nmfECRtjXv^e^dRLAdomv7OY zRvc}u?nLQ2%aTm?G{@IV`4NFgJdG$hI8JbNsm_Et$cm;EhEf*5rDZV2BspXnDTchk zX%;Fs_#gV=A74LgM1RYk{Ve5!I6X@sC zfOg@s)hpM_mM>bideux_(*Nex|IMv`=2itC_O5=gUm{Or`@`mX0{HEOE+k||G}>CY#NG^p=ogOl;ry+|Fs*ntz4a+6ko!LWw@Ind|p>qjsfBeB~D3Jsd z@?RPHBd{TdgPLYW#UgQ%ix`2X!c3@Jr^Jz=p2G0|B zqLFPmb8Sz#FdNZXHr3dM$^1MIQ!yi3!>8SJ;t%+gL6BIKj_Bl?Pcyi(BSJrmu*8IbK<2Tn(pUTrvHZM`&$n@l zn^b-Wy^k@0)%KO&QK9egg^URK@ovAvK59Xy_MzV=GI)#vb4h<&__?-PEkVP_Sm&&0 zi?BqFPeM*?mI{m*hR}3`8~#g${>kjcLg67{@Bx&R?DzlHsl*wSXmIxPQk#i|ejf|p zWg(%G8Cr5IN&{h{*zG5i3i@gZL=dm?XSx48jpt$YWXXyj zRi9F1ucyslLAWQsMb93$#&thBsxfWdAU`la^4xlJ%@JnQl~F`%;|aJ0zoA<>U>*MB$n$&qPt4 ziF$`8@^skH5v)<^O?@!YGJeo=m?$9oK1|d~$kW(iq9?->@jMd^ccjpnL7kjH-vle7 z#t*|0P#^bzf?ymwdUHB89cemK`Axy@()Jhs#B_7e^4mxkLHeD=njDX&k-x_X_Q zXChKgk~2n6CWKn5isV6Flf7HqY8Q}&A~4xkPLYimA6&y$7A8{o+X{^FWNQ(1R>GDO zSPGxG2Gbm8ek2bo7gb8gKMRq0k;OfN&Ju@)=1?G~(@qiR)?R2Puurukm1|VhrxLa( zNHMA!g5#0PZ?Pz>9nEqvNzGpo!oJQJX9%c8qEPuqnSy4@tDsxyOAM>PCX$KvX6iVh zP7p8HC*FLwJ(Vo?i1*}$cq0@FrmYynlSHNxyiWFjGUsrnJPdcTsl`^WZ>d*=^|KCN zz}3Fv1cS-E;%ZCCbuVua)AyJMy!Eu=-eVb3CA#oalT>;1e^eyWH+XBrL`| z-@3N&V4{0CBH3c14zVlOAa&n% zCkQXr+VJ2N@uMx=Cajb&Y)J8MTp_lMwv5Cm;{=VwrtwMg)@WLw(7aIgj`Nwd*xx}E zKcQX}M%P~V(z--k?Q2FX0)M4!-f6bxLJ_qQM8w~w0;vcwDsouwvGpWGDolMunxt#N zM5zditZx^ujZN5coGH2y^%#HIZAkZMtli8P$D&kNsH!h#Ik`eqqQXO7$3{(#C2U?~ zWrp}WCD=k0!7@u|ctxuAuG<|rK)X{itPmz^biH%j>9l_P+x8AH6&gwMiJWm#2kZC9 zD2@4%-Fd+)eB8k(!t0<7|L4|vup}0$DYIdxDFd8g8Dn2@#I|nn3SZbeMR(5eNEQ}- zkOzjwXCU9|L=46Y#yv3uY?qI5Vg^t9M&%lrfAHvS!;IJ3up;z@#Af#Yp=Bt&5{F-^ zmrppIcJ9LFd&3e7^JRr%3$0Man=5_WXJ$z^c?a3gI>5|AY&>5s*s950M2<-U#~Oj1 z{wg|g@!@Q+dkOAEZz*CYC4pjbS8;6N9>C?`gFleoaMl`ee-MFp{T`FExD+&M1Q&9S zNK1-^m8lrkqM9!p>_`s0!cy=$4O(yM1XYeC3G=OB#Y@?X@+k=iL(Md=E2qSdPvR1c zxUaCs5xu^$JJ0CI7RKVfLf!0J#G;JYGi*mEETSPO*1HJqo_n{HhxydSve<6%zK&rz}4>v6o&XAN8hw z`Y3E6VTrXP{F~Z(N|w5db6+VX`3$k>!nCSqUPIK+`Yx-x-usg0zVa&hxz9ppJ z@1aGjK0nTUq!@EU(EbB01XDI5Ct~XAL#L_>TKkm5Y+mq$p<vNg)JXvTHsuPVJ8In*6iPP2_P&!zNE{HTw0$gpmhZ6sCH* zR4u@QhaB7N9YlioeZhbz+ETw_p1Oe6o4H3MU^$a3C97?aCM2X#G%B?%IU<_+QjEHt zy{bFb{$?9%6ho20()lw^idyvUVi)R@vVFnIIQ|ar(jQmPCMLI7OxKHQIO|7DY0lPM zhn!C)$<_QsJSN5rPF~KqdVuH!6vD&cVYN*+re;$IA$9JoYXUHnBJ!7U2 z8OgkeiG;Yv$CbL`d^#_^8E4+1ARUa*5%?=14ta_w!N`+|Hm_%nY9FuN2Uhwci-GLc zi?SIE2ypVroK1AER%N^2qz{7W=GMp&TBK1lTzhS~TvZ?()?m(+&V1{rcGR9mCP^O2 zjcgJggNo-GN_E~9%imw1dl`)UPBey;P?g>QNlYFU?2Q$oWOq>QvpiTf;^iA%rYx9x61Lp{kQ(I`wE+4OX6K35GaX>z%a#^-yJUvsERy-uGs|l#(hilHVTeNd@2UYem3vtg4Hi--h zcR0(+t#N(3Z&V>onzB>mQ#QZK>@9tqV@UZa<9=JR@oZaDtTC-EA@<=b;_U7%YGc!g zAxx1txI3;H9fQ98(S7TkNBr>{gJp6?EOE#j-PvK1ir;4q&UwU2JiJP=zP_d}9!!JXeo#hh6=<3;QwXqg=nk+kB z(RB+EFq2rk^y&e7H~(S<16M_YfwvyBQlOIt#?`KQETSM7w>~* zB@oxh?Fa!ZQjcNHosdxE)zZ?9}*xOcd@0jqjsCNp0ZV4 zDR(Nhh%c5&D%8Z!dD85B$uIm0fc?4rZNjIKGx6yiDy?5+2*YWGW zAG`iLUU*gLSjKopTLn>6xrWbWolOuLI9hcV)C?E-yky#tV2zm@{HQUl>neSHe&j>6 z`tu{qRXZ}|60e5&ZP?nhJhdR9S6R*HjYWrwr648kFM0;^uf;S95-}W=J2dO62v8U7 zl_OmNDjM1zKwxNX0(~Q8VbU|wN%AZEi~&+g+~H6o%bQrx?6Kr@ssC0yv5fKUbhd8u zzxpbYi6@pEy3JdJm4#%U4({s46U+S!Uv>`C@?DHCTY9qnUz zejD>YUbT8y&J6UiTzM<=e(*C7%jy>_Bem0q$R#+-okx8v4LQmxv6O?eTp{wY{AC;G z_-mSMT*~kLeJnRNWIql*b_1l0DDPi%F{-S8&c&#O{dpIoO8X~Yj8gWGzIZK2MGDpD zqj#sMLThi%VojafK1M-lTJ0$eLtIPn?8IFaQFEtQJu>x^e#6XK2>doP5 z@aNvCpr7Gt$@XeXNWY09AL%1;DI}tbU#1ZfXY#K{< zqe==%!f>3Kcs1=SoX{b&w6pxEKJdbhaKRc^^!mB_BTVsx*D7I;R?~Mh zz~i%EQCV1fZ$e!?rCNOq4DkZk6>SJ(+y1mG$V+TC2?|z8JMO;qxcgQGGc>>0a&6$b zj#1#beXW-J9lOAD6NaRtnvngghdnf9=ZB-F_&r_(9y-SI!AZ;hB$%MlmHnvM}#h)e+{06N=hrP-n%A)zOqJ1;Z2H zU@J6ZZd?e~PRta?V}mU=Q$5?DZcKCCoHr#uavl9+%Ms?ekl8n8P#0_;TlXg&WB+z{ zu?%9NR9wUuY|#OkCl)HR$h_DXH)xYB0$Irua9IavxU8u*1pe_E#xQVo=ZN|_20laA zV@Mqwn`B;y>62(>^iB{wfs!P=EeFBv!HYof`$tcK-~**|K`HCf#{ETbZz4Z1?wp^u zdGz?XaBs6^-1#xy=9%N?(~RcPLzr~tpy=THy~?U-m&%9pZ`gM?$Ch|g@ppH8&9Lw9 z`5|+LedUX&M9P4W~29NbM!$Oi1|DprKb6f`*pW`|( zeF8cl>c0ockueFRCJDnr4Eq~fk3zQYE3G`2I~Ea<>=K53?GlFuBi#yz@SVYsEkPib z3nj<#2yR6_MXjCui*n&T)?o}z87x{3`6PLfFyDlR&PYZnGZImV=he41NhyCE@ok>5 zx+6vM5siOehEsEuQ_t4O4>QsEURx|~(`rE*3lvL&+c=Psh3L6nbB7e>fzyTA43 zhGmV)FGBkr;H_OjR-5F4rB&-fL%Yn4s459bTjW*>v!}QtMnyTqJ*upCH zrH zMVu{g?T*vXl_d=E(-Oklf?5dGBYE7IAv5N{)W(a%--}u3ph&^!Rc3NJKall&tuhNS zqDlS`IjBF%j7hgVw%YVUTJ_Y>d-Bk)HKF^9grwOP{R)%I7U;IsW{I;$4vJ#7T^A5? z9X4wR6BVx&x~YO>sM#p&KABTQ?|b-%-!mukOO`d#N!_!@FuDqhOji8~q@j$F)nuDYaU4C5tq~ z$Z61i82Y^`i*s!5KtN6d;iJ&+SLZXoP?&mSj5c&qT}Rll7^ zRo<$;qpRm`xYY(r@a^YU{q_)D<*WIx_Uz$y`SuVgyOu8#Qcsfkq6jwiNSOL0V}prm zQ`sZ;&UIZ)>eoh*4U5;Yc!%^F-=RZ)MCZ}D!u)0`Dbiq}PKb1Br%~4nBNWt+aO#a# zpeFN9;nZ#^kNj})+brIdKdGzsU9thS6Q}N`$|L)=#ZrT%j;lefWYo`Kb@ zBZyO6M0KT;;x-TX4~l*m0S1@Y*fS{`j84n+|RtN`KS4+pK>2=vt&8YCi1B z0~1H_wouJG>H~yS9=C)kPU(zLSGH1z9IC*nLObPq^62>`!K#VJqp>dHPU zn5?Gi8kj6Ow|_pEtcH9YH`(a{Uj}4P1e5(rKGwE&uw`dO`jBI4>ppd*EFFAft*b@* zi&II@w|4|J*4D=f*gP&{$N^yi%@nU)TcBqAVs;nb5eym2<{-KPw^d7(UgTW5zZQ+)bOsrr4cuA|vG%Bg($28_KTM&I&`<+aEK zPNkB4sY90|E*c)Y(&7|2lfjhU4_nUW&D`=?ZT-;;^X%UI*@!V@Ld=_Ix7qj~2TViXAbw)l3;NsJl7KL5|X2LXo|q)^85m}WRnR8=i%FbrNz zy4NQCN)-DC&T=CaYk3l;)ao=Bv&u-(k zO}ZlY|AOUE+jG?YQ;2Z9!yRmq?5Mw;180g4V@>sxAEJ4&#NWGup0%mS>uYO3>^|an z5c>yd6cC$$xOkH*M=f|6h)q$0EgA^ge-L5@TAsu7(haal(uhxzzYs>;i0VH8!Qp(f zKLn-?tu+uFTy2TuqL}y~9(Nmv)>Wexs7OPO%}u%L{w!dp;z1C%To>3989C0Hw_$Pm zL|9zn{g?q!7#%dT2^N=bs=rZdw0Foyg0TD;pRkhM1Ori^Il%IlCv*`pzf)G~X~d8* z&LSPJIiT@(TVcGPD0ai6^Fmo!?E%EUm*roQ(4`Z1>%ARO zhC7Zm0rnZozZ9^7ZX(C;v;_FNGRK<$izS*4G{Yk|jGq^lG~=7m&{Xsg_N->plMW0m z!KxJA;aREZVT`4h7;N~knFYay1Qo8U#fI-s@bmD(huGl~Ax#>NR$zO9%yU9aO-EZW zMw@pB^9$BlBJ&+awJKR#q*8CT79rPVEUwV5Y{+!|o>uY-b;e83Rtx%zVN! zpKS(!69-r}s|DS?%D)p62U?H_kia@-*p3NP7&)gdf0*#SpgsLmrZB=b=6DQD{#X*I zy^((&-v26)PJ67ggP3Hh|0(}-7+Hgl9RE{Z#n?}Ac?Xd|$j=|&+YIvBL0&ZJ50KZ1 z%X<#wy(baJC&dQy{QK|`r`IXE+f`Yu&u|j>`5@VlG#UVdIN$-0JeG)0Cyj%V-|>;3 zUXVz3+uqBV?D{~?d^Xxb+T{0>mHM;wJzbhbo9_Cco9Si+J{xT%iM!y@Aozy+0~!9) z%jcsldhubs_?Ui^-y`RHx^y>SpO3af*CpwKJC4;f$(YaNO#T>)UT~OH>-RHqP?zS? zr)!$JnZ++@nvg*Lm@XI}^g7JWCFlyyDgVD6%P&BMoO7k#;jfmUD>*DZe}@zw{xALL z6zASTAJ`CEqa-;1le^9-+MMKxgnpTkHy}>9w+gk@=|x#Sc784jd03BuB#8I zgF2yC*&f4FHn=f%cLz-?&SpGmA?WG@*y>6XByLL}Db%nnuu}6c-|g%fi&5OU0{Lug z#f1)-phhwVxGoI0R7lbe#isRf_Lj@{Yw>?Q2;smMSpi7P)gFoDY+0RT{T+2_C9Kg z^IyJuXmw&z&z^;Ds)=T7y{{yP@nTQ7v5n|lUWDYDrM3B8Z4z5SX8k0ej}G*2H|7uvN;+oaW!FTRL_r8GEKa4Q=To@-qr@qW*I)zsp4*|x9BhfMfm)pjCyLLt zP^*)8TsKwxYtDx7K2;{`BLeKvtEzIzMNeevW9|d)Vs`dGi_`!H%m9k;?ezglNROA{ z-Rxzn3vx&io9iiyNAmnv)1bf+in>x#6KzIvR?uo^(DXc6e9zu2%#yV;XqhEwNpnzO zzFC^z6?D!nu;mxmqm?1=&C%^L$E~Y_FCj1BP6F^qRJWNT4 zl#b+$?aURInf}u{4q9Qa9*<6CtYzGngk=!d)00$n+8}+TKGjrhk}S|M=9gIP@(w<; zG%bImw3Qe zad49doQi`L2mBrmW~JcZi#Ry*HV#hRgL7VlgQd%G@U3b10?*;#=1DmCnFrj5 vgZcY#@CP`!=S>`JnT~_^`2kqA00&d9VKqK3uE+ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl deleted file mode 100644 index 392d1f3..0000000 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ /dev/null @@ -1,1303 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 _vector; - float y_edge; - TileSegRef next; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); - -RWByteAddressBuffer _297 : register(u0, space0); -ByteAddressBuffer _1681 : register(t1, space0); -RWByteAddressBuffer _2506 : register(u2, space0); -RWTexture2D image_atlas : register(u4, space0); -RWTexture2D gradients : register(u5, space0); -RWTexture2D image : register(u3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -uint spvPackUnorm4x8(float4 value) -{ - uint4 Packed = uint4(round(saturate(value) * 255.0)); - return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); -} - -float4 spvUnpackUnorm4x8(uint value) -{ - uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); - return float4(Packed) / 255.0; -} - -Alloc slice_mem(Alloc a, uint offset, uint size) -{ - Alloc _310 = { a.offset + offset }; - return _310; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _297.Load(offset * 4 + 12); - return v; -} - -CmdTag Cmd_tag(Alloc a, CmdRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - CmdTag _669 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _669; -} - -CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = asfloat(raw1); - return s; -} - -CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) -{ - CmdStrokeRef _685 = { ref.offset + 4u }; - Alloc param = a; - CmdStrokeRef param_1 = _685; - return CmdStroke_read(param, param_1); -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -TileSeg TileSeg_read(Alloc a, TileSegRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - TileSeg s; - s.origin = float2(asfloat(raw0), asfloat(raw1)); - s._vector = float2(asfloat(raw2), asfloat(raw3)); - s.y_edge = asfloat(raw4); - TileSegRef _826 = { raw5 }; - s.next = _826; - return s; -} - -uint2 chunk_offset(uint i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -CmdFill CmdFill_read(Alloc a, CmdFillRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) -{ - CmdFillRef _675 = { ref.offset + 4u }; - Alloc param = a; - CmdFillRef param_1 = _675; - return CmdFill_read(param, param_1); -} - -CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdAlpha s; - s.alpha = asfloat(raw0); - return s; -} - -CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) -{ - CmdAlphaRef _695 = { ref.offset + 4u }; - Alloc param = a; - CmdAlphaRef param_1 = _695; - return CmdAlpha_read(param, param_1); -} - -CmdColor CmdColor_read(Alloc a, CmdColorRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -CmdColor Cmd_Color_read(Alloc a, CmdRef ref) -{ - CmdColorRef _705 = { ref.offset + 4u }; - Alloc param = a; - CmdColorRef param_1 = _705; - return CmdColor_read(param, param_1); -} - -float3 fromsRGB(float3 srgb) -{ - return srgb; -} - -float4 unpacksRGB(uint srgba) -{ - float4 color = spvUnpackUnorm4x8(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - CmdLinGrad s; - s.index = raw0; - s.line_x = asfloat(raw1); - s.line_y = asfloat(raw2); - s.line_c = asfloat(raw3); - return s; -} - -CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) -{ - CmdLinGradRef _715 = { ref.offset + 4u }; - Alloc param = a; - CmdLinGradRef param_1 = _715; - return CmdLinGrad_read(param, param_1); -} - -CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); - s.xlat = float2(asfloat(raw5), asfloat(raw6)); - s.c1 = float2(asfloat(raw7), asfloat(raw8)); - s.ra = asfloat(raw9); - s.roff = asfloat(raw10); - return s; -} - -CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) -{ - CmdRadGradRef _725 = { ref.offset + 4u }; - Alloc param = a; - CmdRadGradRef param_1 = _725; - return CmdRadGrad_read(param, param_1); -} - -CmdImage CmdImage_read(Alloc a, CmdImageRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -CmdImage Cmd_Image_read(Alloc a, CmdRef ref) -{ - CmdImageRef _735 = { ref.offset + 4u }; - Alloc param = a; - CmdImageRef param_1 = _735; - return CmdImage_read(param, param_1); -} - -void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) -{ - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas[uv]; - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - spvReturnValue = rgba; -} - -float3 tosRGB(float3 rgb) -{ - return rgb; -} - -uint packsRGB(inout float4 rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return spvPackUnorm4x8(rgba.wzyx); -} - -CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdEndClip s; - s.blend = raw0; - return s; -} - -CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) -{ - CmdEndClipRef _745 = { ref.offset + 4u }; - Alloc param = a; - CmdEndClipRef param_1 = _745; - return CmdEndClip_read(param, param_1); -} - -float3 screen(float3 cb, float3 cs) -{ - return (cb + cs) - (cb * cs); -} - -float3 hard_light(float3 cb, float3 cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0f) - 1.0f.xxx; - float3 _889 = screen(param, param_1); - float3 _893 = (cb * 2.0f) * cs; - bool3 _898 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_898.x ? _893.x : _889.x, _898.y ? _893.y : _889.y, _898.z ? _893.z : _889.z); -} - -float color_dodge(float cb, float cs) -{ - if (cb == 0.0f) - { - return 0.0f; - } - else - { - if (cs == 1.0f) - { - return 1.0f; - } - else - { - return min(1.0f, cb / (1.0f - cs)); - } - } -} - -float color_burn(float cb, float cs) -{ - if (cb == 1.0f) - { - return 1.0f; - } - else - { - if (cs == 0.0f) - { - return 0.0f; - } - else - { - return 1.0f - min(1.0f, (1.0f - cb) / cs); - } - } -} - -float3 soft_light(float3 cb, float3 cs) -{ - float3 _904 = sqrt(cb); - float3 _917 = ((((cb * 16.0f) - 12.0f.xxx) * cb) + 4.0f.xxx) * cb; - bool3 _921 = bool3(cb.x <= 0.25f.xxx.x, cb.y <= 0.25f.xxx.y, cb.z <= 0.25f.xxx.z); - float3 d = float3(_921.x ? _917.x : _904.x, _921.y ? _917.y : _904.y, _921.z ? _917.z : _904.z); - float3 _932 = cb + (((cs * 2.0f) - 1.0f.xxx) * (d - cb)); - float3 _942 = cb - (((1.0f.xxx - (cs * 2.0f)) * cb) * (1.0f.xxx - cb)); - bool3 _944 = bool3(cs.x <= 0.5f.xxx.x, cs.y <= 0.5f.xxx.y, cs.z <= 0.5f.xxx.z); - return float3(_944.x ? _942.x : _932.x, _944.y ? _942.y : _932.y, _944.z ? _942.z : _932.z); -} - -float sat(float3 c) -{ - return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); -} - -void set_sat_inner(inout float cmin, inout float cmid, inout float cmax, float s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0f; - cmax = 0.0f; - } - cmin = 0.0f; -} - -float3 set_sat(inout float3 c, float s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -float lum(float3 c) -{ - float3 f = float3(0.300000011920928955078125f, 0.589999973773956298828125f, 0.10999999940395355224609375f); - return dot(c, f); -} - -float3 clip_color(inout float3 c) -{ - float3 param = c; - float L = lum(param); - float n = min(c.x, min(c.y, c.z)); - float x = max(c.x, max(c.y, c.z)); - if (n < 0.0f) - { - c = L.xxx + (((c - L.xxx) * L) / (L - n).xxx); - } - if (x > 1.0f) - { - c = L.xxx + (((c - L.xxx) * (1.0f - L)) / (x - L).xxx); - } - return c; -} - -float3 set_lum(float3 c, float l) -{ - float3 param = c; - float3 param_1 = c + (l - lum(param)).xxx; - float3 _1048 = clip_color(param_1); - return _1048; -} - -float3 mix_blend(float3 cb, float3 cs, uint mode) -{ - float3 b = 0.0f.xxx; - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = min(cb, cs); - break; - } - case 5u: - { - b = max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0f) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -float4 mix_compose(float3 cb, float3 cs, float ab, float as, uint mode) -{ - float fa = 0.0f; - float fb = 0.0f; - switch (mode) - { - case 1u: - { - fa = 1.0f; - fb = 0.0f; - break; - } - case 2u: - { - fa = 0.0f; - fb = 1.0f; - break; - } - case 3u: - { - fa = 1.0f; - fb = 1.0f - as; - break; - } - case 4u: - { - fa = 1.0f - ab; - fb = 1.0f; - break; - } - case 5u: - { - fa = ab; - fb = 0.0f; - break; - } - case 6u: - { - fa = 0.0f; - fb = as; - break; - } - case 7u: - { - fa = 1.0f - ab; - fb = 0.0f; - break; - } - case 8u: - { - fa = 0.0f; - fb = 1.0f - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0f - as; - break; - } - case 10u: - { - fa = 1.0f - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0f - ab; - fb = 1.0f - as; - break; - } - case 12u: - { - fa = 1.0f; - fb = 1.0f; - break; - } - case 13u: - { - return min(1.0f.xxxx, float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -float4 mix_blend_compose(float4 backdrop, float4 src, uint mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0f - src.w)) + src; - } - float inv_src_a = 1.0f / (src.w + 1.0000000036274937255387218471014e-15f); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0f / (backdrop.w + 1.0000000036274937255387218471014e-15f); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = lerp(cs, blended, backdrop.w.xxx); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = lerp(backdrop.xyz, cs, src.w.xxx); - return float4(co, src.w + (backdrop.w * (1.0f - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - CmdJump s; - s.new_ref = raw0; - return s; -} - -CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) -{ - CmdJumpRef _755 = { ref.offset + 4u }; - Alloc param = a; - CmdJumpRef param_1 = _755; - return CmdJump_read(param, param_1); -} - -void comp_main() -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x; - Alloc _1696; - _1696.offset = _1681.Load(28); - Alloc param; - param.offset = _1696.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1705 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1705; - uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12); - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - float4 rgba[8]; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = 0.0f.xxxx; - } - uint clip_depth = 0u; - float df[8]; - TileSegRef tile_seg_ref; - float area[8]; - uint blend_stack[4][8]; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0f; - } - TileSegRef _1805 = { stroke.tile_ref }; - tile_seg_ref = _1805; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11); - float2 line_vec = seg._vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + 0.5f.xx) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0f, 1.0f); - df[k_1] = min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = clamp((stroke.half_width + 0.5f) - df[k_2], 0.0f, 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - TileSegRef _1924 = { fill.tile_ref }; - tile_seg_ref = _1924; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1._vector; - float2 window = clamp(float2(start.y, end.y), 0.0f.xx, 1.0f.xx); - if (window.x != window.y) - { - float2 t_1 = (window - start.y.xx) / seg_1._vector.y.xx; - float2 xs = float2(lerp(start.x, end.x, t_1.x), lerp(start.x, end.x, t_1.y)); - float xmin = min(min(xs.x, xs.y), 1.0f) - 9.9999999747524270787835121154785e-07f; - float xmax = max(xs.x, xs.y); - float b = min(xmax, 1.0f); - float c = max(b, 0.0f); - float d = max(xmin, 0.0f); - float a = ((b + (0.5f * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1._vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0f, 0.0f, 1.0f)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = min(abs(area[k_5]), 1.0f); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0f; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0f - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba = gradients[int2(x, int(lin.index))]; - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); - float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - float4 _2410[8]; - fillImage(_2410, param_36, param_37); - float4 img[8] = _2410; - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = 0.0f.xxxx; - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.Store((base_ix + k_13) * 4 + 0, _2519); - rgba[k_13] = 0.0f.xxxx; - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0); - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2618; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x; - } -} - -[numthreads(8, 4, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl deleted file mode 100644 index 45e7a0e..0000000 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ /dev/null @@ -1,1354 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct CmdStrokeRef -{ - uint offset; -}; - -struct CmdStroke -{ - uint tile_ref; - float half_width; -}; - -struct CmdFillRef -{ - uint offset; -}; - -struct CmdFill -{ - uint tile_ref; - int backdrop; -}; - -struct CmdColorRef -{ - uint offset; -}; - -struct CmdColor -{ - uint rgba_color; -}; - -struct CmdLinGradRef -{ - uint offset; -}; - -struct CmdLinGrad -{ - uint index; - float line_x; - float line_y; - float line_c; -}; - -struct CmdRadGradRef -{ - uint offset; -}; - -struct CmdRadGrad -{ - uint index; - float4 mat; - float2 xlat; - float2 c1; - float ra; - float roff; -}; - -struct CmdImageRef -{ - uint offset; -}; - -struct CmdImage -{ - uint index; - int2 offset; -}; - -struct CmdAlphaRef -{ - uint offset; -}; - -struct CmdAlpha -{ - float alpha; -}; - -struct CmdEndClipRef -{ - uint offset; -}; - -struct CmdEndClip -{ - uint blend; -}; - -struct CmdJumpRef -{ - uint offset; -}; - -struct CmdJump -{ - uint new_ref; -}; - -struct CmdRef -{ - uint offset; -}; - -struct CmdTag -{ - uint tag; - uint flags; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 vector; - float y_edge; - TileSegRef next; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct BlendBuf -{ - uint blend_mem[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); - -static inline __attribute__((always_inline)) -Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) -{ - return Alloc{ a.offset + offset }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_297) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_297.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_297); - return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdStroke s; - s.tile_ref = raw0; - s.half_width = as_type(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - TileSeg s; - s.origin = float2(as_type(raw0), as_type(raw1)); - s.vector = float2(as_type(raw2), as_type(raw3)); - s.y_edge = as_type(raw4); - s.next = TileSegRef{ raw5 }; - return s; -} - -static inline __attribute__((always_inline)) -uint2 chunk_offset(thread const uint& i) -{ - return uint2((i % 2u) * 8u, (i / 2u) * 4u); -} - -static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdFill s; - s.tile_ref = raw0; - s.backdrop = int(raw1); - return s; -} - -static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdAlpha s; - s.alpha = as_type(raw0); - return s; -} - -static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdColor s; - s.rgba_color = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 fromsRGB(thread const float3& srgb) -{ - return srgb; -} - -static inline __attribute__((always_inline)) -float4 unpacksRGB(thread const uint& srgba) -{ - float4 color = unpack_unorm4x8_to_float(srgba).wzyx; - float3 param = color.xyz; - return float4(fromsRGB(param), color.w); -} - -static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - CmdLinGrad s; - s.index = raw0; - s.line_x = as_type(raw1); - s.line_y = as_type(raw2); - s.line_c = as_type(raw3); - return s; -} - -static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_297); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_297); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_297); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_297); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_297); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_297); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_297); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_297); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_297); - CmdRadGrad s; - s.index = raw0; - s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); - s.xlat = float2(as_type(raw5), as_type(raw6)); - s.c1 = float2(as_type(raw7), as_type(raw8)); - s.ra = as_type(raw9); - s.roff = as_type(raw10); - return s; -} - -static inline __attribute__((always_inline)) -CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; - return CmdRadGrad_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_297); - CmdImage s; - s.index = raw0; - s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - return s; -} - -static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImage& cmd_img, texture2d image_atlas) -{ - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - uint param = i; - int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; - float4 fg_rgba = image_atlas.read(uint2(uv)); - float3 param_1 = fg_rgba.xyz; - float3 _1653 = fromsRGB(param_1); - fg_rgba.x = _1653.x; - fg_rgba.y = _1653.y; - fg_rgba.z = _1653.z; - rgba[i] = fg_rgba; - } - return rgba; -} - -static inline __attribute__((always_inline)) -float3 tosRGB(thread const float3& rgb) -{ - return rgb; -} - -static inline __attribute__((always_inline)) -uint packsRGB(thread float4& rgba) -{ - float3 param = rgba.xyz; - rgba = float4(tosRGB(param), rgba.w); - return pack_float_to_unorm4x8(rgba.wzyx); -} - -static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdEndClip s; - s.blend = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_297); -} - -static inline __attribute__((always_inline)) -float3 screen(thread const float3& cb, thread const float3& cs) -{ - return (cb + cs) - (cb * cs); -} - -static inline __attribute__((always_inline)) -float3 hard_light(thread const float3& cb, thread const float3& cs) -{ - float3 param = cb; - float3 param_1 = (cs * 2.0) - float3(1.0); - return select(screen(param, param_1), (cb * 2.0) * cs, cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float color_dodge(thread const float& cb, thread const float& cs) -{ - if (cb == 0.0) - { - return 0.0; - } - else - { - if (cs == 1.0) - { - return 1.0; - } - else - { - return fast::min(1.0, cb / (1.0 - cs)); - } - } -} - -static inline __attribute__((always_inline)) -float color_burn(thread const float& cb, thread const float& cs) -{ - if (cb == 1.0) - { - return 1.0; - } - else - { - if (cs == 0.0) - { - return 0.0; - } - else - { - return 1.0 - fast::min(1.0, (1.0 - cb) / cs); - } - } -} - -static inline __attribute__((always_inline)) -float3 soft_light(thread const float3& cb, thread const float3& cs) -{ - float3 d = select(sqrt(cb), ((((cb * 16.0) - float3(12.0)) * cb) + float3(4.0)) * cb, cb <= float3(0.25)); - return select(cb + (((cs * 2.0) - float3(1.0)) * (d - cb)), cb - (((float3(1.0) - (cs * 2.0)) * cb) * (float3(1.0) - cb)), cs <= float3(0.5)); -} - -static inline __attribute__((always_inline)) -float sat(thread const float3& c) -{ - return fast::max(c.x, fast::max(c.y, c.z)) - fast::min(c.x, fast::min(c.y, c.z)); -} - -static inline __attribute__((always_inline)) -void set_sat_inner(thread float& cmin, thread float& cmid, thread float& cmax, thread const float& s) -{ - if (cmax > cmin) - { - cmid = ((cmid - cmin) * s) / (cmax - cmin); - cmax = s; - } - else - { - cmid = 0.0; - cmax = 0.0; - } - cmin = 0.0; -} - -static inline __attribute__((always_inline)) -float3 set_sat(thread float3& c, thread const float& s) -{ - if (c.x <= c.y) - { - if (c.y <= c.z) - { - float param = c.x; - float param_1 = c.y; - float param_2 = c.z; - float param_3 = s; - set_sat_inner(param, param_1, param_2, param_3); - c.x = param; - c.y = param_1; - c.z = param_2; - } - else - { - if (c.x <= c.z) - { - float param_4 = c.x; - float param_5 = c.z; - float param_6 = c.y; - float param_7 = s; - set_sat_inner(param_4, param_5, param_6, param_7); - c.x = param_4; - c.z = param_5; - c.y = param_6; - } - else - { - float param_8 = c.z; - float param_9 = c.x; - float param_10 = c.y; - float param_11 = s; - set_sat_inner(param_8, param_9, param_10, param_11); - c.z = param_8; - c.x = param_9; - c.y = param_10; - } - } - } - else - { - if (c.x <= c.z) - { - float param_12 = c.y; - float param_13 = c.x; - float param_14 = c.z; - float param_15 = s; - set_sat_inner(param_12, param_13, param_14, param_15); - c.y = param_12; - c.x = param_13; - c.z = param_14; - } - else - { - if (c.y <= c.z) - { - float param_16 = c.y; - float param_17 = c.z; - float param_18 = c.x; - float param_19 = s; - set_sat_inner(param_16, param_17, param_18, param_19); - c.y = param_16; - c.z = param_17; - c.x = param_18; - } - else - { - float param_20 = c.z; - float param_21 = c.y; - float param_22 = c.x; - float param_23 = s; - set_sat_inner(param_20, param_21, param_22, param_23); - c.z = param_20; - c.y = param_21; - c.x = param_22; - } - } - } - return c; -} - -static inline __attribute__((always_inline)) -float lum(thread const float3& c) -{ - float3 f = float3(0.300000011920928955078125, 0.589999973773956298828125, 0.10999999940395355224609375); - return dot(c, f); -} - -static inline __attribute__((always_inline)) -float3 clip_color(thread float3& c) -{ - float3 param = c; - float L = lum(param); - float n = fast::min(c.x, fast::min(c.y, c.z)); - float x = fast::max(c.x, fast::max(c.y, c.z)); - if (n < 0.0) - { - c = float3(L) + (((c - float3(L)) * L) / float3(L - n)); - } - if (x > 1.0) - { - c = float3(L) + (((c - float3(L)) * (1.0 - L)) / float3(x - L)); - } - return c; -} - -static inline __attribute__((always_inline)) -float3 set_lum(thread const float3& c, thread const float& l) -{ - float3 param = c; - float3 param_1 = c + float3(l - lum(param)); - float3 _1048 = clip_color(param_1); - return _1048; -} - -static inline __attribute__((always_inline)) -float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const uint& mode) -{ - float3 b = float3(0.0); - switch (mode) - { - case 1u: - { - b = cb * cs; - break; - } - case 2u: - { - float3 param = cb; - float3 param_1 = cs; - b = screen(param, param_1); - break; - } - case 3u: - { - float3 param_2 = cs; - float3 param_3 = cb; - b = hard_light(param_2, param_3); - break; - } - case 4u: - { - b = fast::min(cb, cs); - break; - } - case 5u: - { - b = fast::max(cb, cs); - break; - } - case 6u: - { - float param_4 = cb.x; - float param_5 = cs.x; - float param_6 = cb.y; - float param_7 = cs.y; - float param_8 = cb.z; - float param_9 = cs.z; - b = float3(color_dodge(param_4, param_5), color_dodge(param_6, param_7), color_dodge(param_8, param_9)); - break; - } - case 7u: - { - float param_10 = cb.x; - float param_11 = cs.x; - float param_12 = cb.y; - float param_13 = cs.y; - float param_14 = cb.z; - float param_15 = cs.z; - b = float3(color_burn(param_10, param_11), color_burn(param_12, param_13), color_burn(param_14, param_15)); - break; - } - case 8u: - { - float3 param_16 = cb; - float3 param_17 = cs; - b = hard_light(param_16, param_17); - break; - } - case 9u: - { - float3 param_18 = cb; - float3 param_19 = cs; - b = soft_light(param_18, param_19); - break; - } - case 10u: - { - b = abs(cb - cs); - break; - } - case 11u: - { - b = (cb + cs) - ((cb * 2.0) * cs); - break; - } - case 12u: - { - float3 param_20 = cb; - float3 param_21 = cs; - float param_22 = sat(param_20); - float3 _1340 = set_sat(param_21, param_22); - float3 param_23 = cb; - float3 param_24 = _1340; - float param_25 = lum(param_23); - b = set_lum(param_24, param_25); - break; - } - case 13u: - { - float3 param_26 = cs; - float3 param_27 = cb; - float param_28 = sat(param_26); - float3 _1354 = set_sat(param_27, param_28); - float3 param_29 = cb; - float3 param_30 = _1354; - float param_31 = lum(param_29); - b = set_lum(param_30, param_31); - break; - } - case 14u: - { - float3 param_32 = cb; - float3 param_33 = cs; - float param_34 = lum(param_32); - b = set_lum(param_33, param_34); - break; - } - case 15u: - { - float3 param_35 = cs; - float3 param_36 = cb; - float param_37 = lum(param_35); - b = set_lum(param_36, param_37); - break; - } - default: - { - b = cs; - break; - } - } - return b; -} - -static inline __attribute__((always_inline)) -float4 mix_compose(thread const float3& cb, thread const float3& cs, thread const float& ab, thread const float& as, thread const uint& mode) -{ - float fa = 0.0; - float fb = 0.0; - switch (mode) - { - case 1u: - { - fa = 1.0; - fb = 0.0; - break; - } - case 2u: - { - fa = 0.0; - fb = 1.0; - break; - } - case 3u: - { - fa = 1.0; - fb = 1.0 - as; - break; - } - case 4u: - { - fa = 1.0 - ab; - fb = 1.0; - break; - } - case 5u: - { - fa = ab; - fb = 0.0; - break; - } - case 6u: - { - fa = 0.0; - fb = as; - break; - } - case 7u: - { - fa = 1.0 - ab; - fb = 0.0; - break; - } - case 8u: - { - fa = 0.0; - fb = 1.0 - as; - break; - } - case 9u: - { - fa = ab; - fb = 1.0 - as; - break; - } - case 10u: - { - fa = 1.0 - ab; - fb = as; - break; - } - case 11u: - { - fa = 1.0 - ab; - fb = 1.0 - as; - break; - } - case 12u: - { - fa = 1.0; - fb = 1.0; - break; - } - case 13u: - { - return fast::min(float4(1.0), float4((cs * as) + (cb * ab), as + ab)); - } - default: - { - break; - } - } - float as_fa = as * fa; - float ab_fb = ab * fb; - float3 co = (cs * as_fa) + (cb * ab_fb); - return float4(co, as_fa + ab_fb); -} - -static inline __attribute__((always_inline)) -float4 mix_blend_compose(thread const float4& backdrop, thread const float4& src, thread const uint& mode) -{ - if ((mode & 32767u) == 3u) - { - return (backdrop * (1.0 - src.w)) + src; - } - float inv_src_a = 1.0 / (src.w + 1.0000000036274937255387218471014e-15); - float3 cs = src.xyz * inv_src_a; - float inv_backdrop_a = 1.0 / (backdrop.w + 1.0000000036274937255387218471014e-15); - float3 cb = backdrop.xyz * inv_backdrop_a; - uint blend_mode = mode >> uint(8); - float3 param = cb; - float3 param_1 = cs; - uint param_2 = blend_mode; - float3 blended = mix_blend(param, param_1, param_2); - cs = mix(cs, blended, float3(backdrop.w)); - uint comp_mode = mode & 255u; - if (comp_mode == 3u) - { - float3 co = mix(backdrop.xyz, cs, float3(src.w)); - return float4(co, src.w + (backdrop.w * (1.0 - src.w))); - } - else - { - float3 param_3 = cb; - float3 param_4 = cs; - float param_5 = backdrop.w; - float param_6 = src.w; - uint param_7 = comp_mode; - return mix_compose(param_3, param_4, param_5, param_6, param_7); - } -} - -static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_297) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_297); - CmdJump s; - s.new_ref = raw0; - return s; -} - -static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_297) -{ - Alloc param = a; - CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_297); -} - -kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d image [[texture(3)]], texture2d image_atlas [[texture(4)]], texture2d gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x; - Alloc param; - param.offset = _1681.conf.ptcl_alloc.offset; - uint param_1 = tile_ix * 1024u; - uint param_2 = 1024u; - Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint blend_offset = v_297.memory[cmd_ref.offset >> uint(2)]; - cmd_ref.offset += 4u; - uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); - float2 xy = float2(xy_uint); - spvUnsafeArray rgba; - for (uint i = 0u; i < 8u; i++) - { - rgba[i] = float4(0.0); - } - uint clip_depth = 0u; - spvUnsafeArray df; - TileSegRef tile_seg_ref; - spvUnsafeArray area; - spvUnsafeArray, 4> blend_stack; - uint base_ix_1; - uint bg_rgba; - while (true) - { - Alloc param_3 = cmd_alloc; - CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_297).tag; - if (tag == 0u) - { - break; - } - switch (tag) - { - case 2u: - { - Alloc param_5 = cmd_alloc; - CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_297); - for (uint k = 0u; k < 8u; k++) - { - df[k] = 1000000000.0; - } - tile_seg_ref = TileSegRef{ stroke.tile_ref }; - do - { - uint param_7 = tile_seg_ref.offset; - uint param_8 = 24u; - bool param_9 = true; - Alloc param_10 = new_alloc(param_7, param_8, param_9); - TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_297); - float2 line_vec = seg.vector; - for (uint k_1 = 0u; k_1 < 8u; k_1++) - { - float2 dpos = (xy + float2(0.5)) - seg.origin; - uint param_12 = k_1; - dpos += float2(chunk_offset(param_12)); - float t = fast::clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); - df[k_1] = fast::min(df[k_1], length((line_vec * t) - dpos)); - } - tile_seg_ref = seg.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_2 = 0u; k_2 < 8u; k_2++) - { - area[k_2] = fast::clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 1u: - { - Alloc param_13 = cmd_alloc; - CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_297); - for (uint k_3 = 0u; k_3 < 8u; k_3++) - { - area[k_3] = float(fill.backdrop); - } - tile_seg_ref = TileSegRef{ fill.tile_ref }; - do - { - uint param_15 = tile_seg_ref.offset; - uint param_16 = 24u; - bool param_17 = true; - Alloc param_18 = new_alloc(param_15, param_16, param_17); - TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297); - for (uint k_4 = 0u; k_4 < 8u; k_4++) - { - uint param_20 = k_4; - float2 my_xy = xy + float2(chunk_offset(param_20)); - float2 start = seg_1.origin - my_xy; - float2 end = start + seg_1.vector; - float2 window = fast::clamp(float2(start.y, end.y), float2(0.0), float2(1.0)); - if ((isunordered(window.x, window.y) || window.x != window.y)) - { - float2 t_1 = (window - float2(start.y)) / float2(seg_1.vector.y); - float2 xs = float2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y)); - float xmin = fast::min(fast::min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07; - float xmax = fast::max(xs.x, xs.y); - float b = fast::min(xmax, 1.0); - float c = fast::max(b, 0.0); - float d = fast::max(xmin, 0.0); - float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); - area[k_4] += (a * (window.x - window.y)); - } - area[k_4] += (sign(seg_1.vector.x) * fast::clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0)); - } - tile_seg_ref = seg_1.next; - } while (tile_seg_ref.offset != 0u); - for (uint k_5 = 0u; k_5 < 8u; k_5++) - { - area[k_5] = fast::min(abs(area[k_5]), 1.0); - } - cmd_ref.offset += 12u; - break; - } - case 3u: - { - for (uint k_6 = 0u; k_6 < 8u; k_6++) - { - area[k_6] = 1.0; - } - cmd_ref.offset += 4u; - break; - } - case 4u: - { - Alloc param_21 = cmd_alloc; - CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_297); - for (uint k_7 = 0u; k_7 < 8u; k_7++) - { - area[k_7] = alpha.alpha; - } - cmd_ref.offset += 8u; - break; - } - case 5u: - { - Alloc param_23 = cmd_alloc; - CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_297); - uint param_25 = color.rgba_color; - float4 fg = unpacksRGB(param_25); - for (uint k_8 = 0u; k_8 < 8u; k_8++) - { - float4 fg_k = fg * area[k_8]; - rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k; - } - cmd_ref.offset += 8u; - break; - } - case 6u: - { - Alloc param_26 = cmd_alloc; - CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_297); - float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; - for (uint k_9 = 0u; k_9 < 8u; k_9++) - { - uint param_28 = k_9; - float2 chunk_xy = float2(chunk_offset(param_28)); - float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); - int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); - float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); - float3 param_29 = fg_rgba.xyz; - float3 _2257 = fromsRGB(param_29); - fg_rgba.x = _2257.x; - fg_rgba.y = _2257.y; - fg_rgba.z = _2257.z; - float4 fg_k_1 = fg_rgba * area[k_9]; - rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; - } - cmd_ref.offset += 20u; - break; - } - case 7u: - { - Alloc param_30 = cmd_alloc; - CmdRef param_31 = cmd_ref; - CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_297); - for (uint k_10 = 0u; k_10 < 8u; k_10++) - { - uint param_32 = k_10; - float2 my_xy_1 = xy + float2(chunk_offset(param_32)); - my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; - float ba = dot(my_xy_1, rad.c1); - float ca = rad.ra * dot(my_xy_1, my_xy_1); - float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; - int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); - float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); - float3 param_33 = fg_rgba_1.xyz; - float3 _2367 = fromsRGB(param_33); - fg_rgba_1.x = _2367.x; - fg_rgba_1.y = _2367.y; - fg_rgba_1.z = _2367.z; - float4 fg_k_2 = fg_rgba_1 * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; - } - cmd_ref.offset += 48u; - break; - } - case 8u: - { - Alloc param_34 = cmd_alloc; - CmdRef param_35 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_297); - uint2 param_36 = xy_uint; - CmdImage param_37 = fill_img; - spvUnsafeArray img; - img = fillImage(param_36, param_37, image_atlas); - for (uint k_11 = 0u; k_11 < 8u; k_11++) - { - float4 fg_k_3 = img[k_11] * area[k_11]; - rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; - } - cmd_ref.offset += 12u; - break; - } - case 9u: - { - if (clip_depth < 4u) - { - for (uint k_12 = 0u; k_12 < 8u; k_12++) - { - float4 param_38 = float4(rgba[k_12]); - uint _2472 = packsRGB(param_38); - blend_stack[clip_depth][k_12] = _2472; - rgba[k_12] = float4(0.0); - } - } - else - { - uint base_ix = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - for (uint k_13 = 0u; k_13 < 8u; k_13++) - { - float4 param_39 = float4(rgba[k_13]); - uint _2519 = packsRGB(param_39); - _2506.blend_mem[base_ix + k_13] = _2519; - rgba[k_13] = float4(0.0); - } - } - clip_depth++; - cmd_ref.offset += 4u; - break; - } - case 10u: - { - Alloc param_40 = cmd_alloc; - CmdRef param_41 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_40, param_41, v_297); - clip_depth--; - if (clip_depth >= 4u) - { - base_ix_1 = ((blend_offset >> uint(2)) + (((clip_depth - 4u) * 16u) * 16u)) + (8u * (gl_LocalInvocationID.x + (8u * gl_LocalInvocationID.y))); - } - for (uint k_14 = 0u; k_14 < 8u; k_14++) - { - if (clip_depth < 4u) - { - bg_rgba = blend_stack[clip_depth][k_14]; - } - else - { - bg_rgba = _2506.blend_mem[base_ix_1 + k_14]; - } - uint param_42 = bg_rgba; - float4 bg = unpacksRGB(param_42); - float4 fg_1 = rgba[k_14] * area[k_14]; - float4 param_43 = bg; - float4 param_44 = fg_1; - uint param_45 = end_clip.blend; - rgba[k_14] = mix_blend_compose(param_43, param_44, param_45); - } - cmd_ref.offset += 8u; - break; - } - case 11u: - { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_297).new_ref }; - cmd_alloc.offset = cmd_ref.offset; - break; - } - } - } - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - uint param_48 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); - } -} - diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv deleted file mode 100644 index df86875a0475861f6bf6130f7e9f230df7e9ea43..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 66124 zcmbWg1)yeC+4jHToEe7hQo6glhnOJ-7)k`e!eMq=eOr9spj`&7GHLR#Rn}U2H63TGI;>i@*{d0lztSk@jLk6^wf7jI_bf{L0toP+;QNbt-6Lzn9wz8^1)+!(I%dkS(xDF?Wgp}YtN)lWq%$l+|02+wJDYua&QkGY|E&JTq0_f=?XYJ?wu8)5^a< zBhG@o;>=tv2A?!$^w8n|pMe-GK7Ck^g{iw+xU*V>dRl)vt3~0{#@)AC0=~uAVY^J8 zIDWt3I}aZLZOyN`$F#P6s-@vQzTm$6Pt9ZUXcl^6+P8K0$EfZxBf1V8J#6wQ`!;8_ zJX%i-8gvi)zbChz?H)I})tYqkIKE}6+vDq~mIIr%5C2;cOrrs#$Bb!>#C@XXF>M^3 z)hh7XuQdj_M{^A7j$ugm(EWx@96v#yKAZ__P}lAM;|S``^y34rNTa_d6aXo6P!M@{Iuz(<}qy!+M07P zt?eAuZt%4IkG)S%ioWkc`}BQRaQeP`J8u6m6GnBj>a9Mhc}&aSr`iKf{+6FM`>y8E zZDVS$0ee%&??3i4S&M#t&v>m^9o1gc*=t?xxLb`Iw#Asy6YOcb59%IeUu{nxYTve> zn#Yha!^aJy_T2C$H_h)q_I6tCqQBmE(_iCeUPrcLZ#!jdyR*)hn#Z)hc2=X|^g+MY zoXS1CU$>{QQLF+<09@!Mckj(aqA1`e;xPKBJOEDac8g2uH&{Y zY5zr3*Xqn7=2<;4=cq2IW7a41e-X7k2j>-W&+myldv$Rg*K_c{32I+2EMi{N6SGfs zDR@_&A$J)*vgbTi^C-{L%i*%5eEjGs7qq@P$kSJgaM58wlb&q2|=zaHE*eAq}X zh|#>ijvIbZ>z?a%{c3m}|9>Caw9fXr23<+}oQqe1+vj)R>IU$np%aG>AGh2ABi7u| z(mE%uLmxV%J>S=hxj^_?<~L#=&g&cY`O5A2IoVYV(Jp&0Rfh`c(JYrn{X0 z*y_`tw6}1d>VE1mQ^vMB0BqUsqi);#Ru74}BetJy+ivcszZ5=?f%!1ll{ikAK9BVH zbX1Rmy*Ko!W~}~R`_r$k(c{JqpIA@Xw7KrAp0MrM_Ge)~CqCKc-U5lySv}R_-ClxQ zxt^x>8sCQ<_Pm(>WZdb`v*_({byUyw`1h+`0goMhP*>~T-Ln+aKG(fyyLzv@ByR1M z&gx~`jUVQ1p1dV#GzO_tM`r5)Birz$MDJHC+$3N^J(k+ z5gMOu@b6oF1|BhS{Mcz8=IN|HMW3V-hkjpCPZ>9XPj4;HR)_Tavi57Ar+hCmT~+mX z+4WZY&elc5&r?w1%~18lZ|JBg6A^`d}N~@|&fa9Xx{jc+Lp!@W90)zuaWb4R{Rg*#%ea5Z>Td9%ZU3IUBO3Nmy*SUB@Z_BWPTqsTW!^)3wd<@7@5MW+ zsqn-(4xBhY0+(@4?$xfdI;Fu+Yy0=~|BQzHtOh@)?Y~Cm`GSW1=M8>Q+kegYU)Hc+ z(TjIfSHm-|TfrIE?cj1;zwgzqv-LeYgMUu^rY9sgGw_SYNyjkffn7XKC1H>&3ag!Lv`+0cW3V z0512*#=Y8gR+~0>|F(b6KH0KiAJB_;R9nN7cMov#?hP*U?$fJXXEmf3@2G~u6K4!K zaVCJvIFov{>#U~q;vLmN@WeSBoH$2;%Q!#m)vmKTt{3m9PK3AP^sP<-^Vzhkd)%8#Fd@Jk!~@&>=Q!LM)dTN?bf2EVJp?`iM{8~mXLf1<&kYVa2t{G|qe zv%%kP@DCgO<6gX@`W&A1`5IiV&ve`v%k!h7!TZ4R@7bsQ8upnQe3rI<&-HBfhJDTk zpS$h9X3p378ukSm{9A4Rp6l7d4f~=EzIfZe=X$nO!@f)}&T|30y&pTP1A5sytAl#+ zj_O#r57yqx?*)&j@gv|t_4VY@Mw`dseC%trc^>S8xYwOId3-C^Vg3eRzriqJG1|Q$xM>hD;4SsBcpU~jvHuwb%eqn=O(%{!L`1K9` z>juBM7w@RhHa5oz=4q{z8Ml)Zp(l_!AEqruMwlgY&@?K*MPexb!lqx8{|W}G{ZZpo8UwF zve?hCkAp{#JD`ittX*yYj_MihT-^Mu`9f{a=UknP@v1(pe0{1n!R^me?N7V*;hkv1 zhvDB@y$dg|Tkkda2fcWo>Pxtv^SkN~(B|yyn|^mz3xUgci!}IR4ZdW9FVoRX2DFA5b0V&|ZF>)qe01BfDB3x1H~Cw(~_(SNG&Gd;wvb ziH$ag!KZyb@2rl6Pxd`DH)q$xpIsO^rfc`{6Zac9ar~4C+icm+eG+>4vjlRF@_WXU z(Y6>rZp7%;`+&Zc+{4ewd4KNbXZ7jO$8`-KGkomuag*5((Wh_e6S^mlnlzj+=rgqZ zxlL8ASK@r@)MbZTZQ3?2J=Lh;eit{*n0PF8dwlf|97MNkLkN|hLx?^D`h>|t$Hb8Q z{nXpNim9j1Gh*u=H*S0ki8T}Y#*QC1 ze)O9LyGd$Y@Ts_78pZ)B(QG@0*pVx2a(bHv`OKkc}4 zVzWQJ``Wh6Roh078&QnCPb=r#*jOk>&pfrYd0LvYXzsQ)Z%cDT)A}a#w0qkb*89vs``+~KNo%Mg&zJ<+k{!i_`UE79rPwq}b&$pj2x9$DD z`IHg(I3_iZp_~WSURzGBHQSsG_PeU=JI~Pz(8}x0B@KRQgI`|9$oJ$|!<*kZ8TUr? zgAVSRGJ4!(GWTo!OcLkzpz%Yy$80n10IJEO$B*+2Z};<_mVX=H3vOS7I;+2er(H9+ zuESeTt!4tZu3avulTrK3h3?hEzRUwI`!aunFVNuMYVdFO;(e+`;H{sb;6+LDEsoai zUuU&Eco^Rxw{4x(YQ5TbRO`TfapG8ZF52x0?&e2((rzr=??c1K!OL+>=*4+Q3eOy! z0M7VN?B&;4oz&nbH~1+Herhk?QJoIYSkDKKJ-ExSVOw+kvtGWP)g^E)ITJJ9E8)Ij zcU`UlAIJ|jj6bkF7q`RXcQ5!LW>CK;#l^dX7g~JUyMx!@dG>e%?4mfgZ-a-`_+4*d>7jp)TYs?qQfBirNK zw_zIx&-@BhVO^H zct`aKJoE7lxZH2kapRqqr=yw=KBRtz{5E)Kjh6stZ><8(Sk?kh8%t-k5qw&_&T2s0 zUiZ6C)g||54Sdninw!pQpGKP@y*S@>!Q1a8*ay9AozwW8n-kQfN(8|7Fr%(5!`nj&7y0d8iXYl6!tV{d0#g(*grRGsTYY%U~FX^c6 zuXWGGC&1Hk^r>Ef6Q!$t_ipd!S9|$%RY8s~_zvVc^Rbi0A@+Tn zhxoHLY~fD}?E5tjeOeE}HTM0N$3Su}1Ll?os`z=e_PZl(OZp#=C|ro3?ycv?Cn0TcyB$rit$7nc#H_yl#QlEAR#d-mt(M z6?o$UZ&F}CD<#+F1>T~-TNZdgfd>|N>jH06;B5=MU4aJ`c>4nT8LZu(&erFzaxTYV zb1q#U=TGi)aj?&-Vn3sKh$j_za)GB5*w1kBKcc`#_u$Uf=eU`1^10UilFw`I4G%Rx z8+bG|wnVRerC)Pjsg?UG@f@r1{7eze&tPiv6q>2koP+prn{H_f6`K24zr_lTOGryw zs?azsTiSAk=J;*9QlWVcsQIoXdHqbKwpO9}c}i`)Li2N!+D3)uIMp^QG(R(`ZCPl3 zR#Mx#(A*1Zg9^<#QX5=oenwK;wb1;Gr1srH^RtoKUWMlTs_j!~ehyL_R%q@MwNZuU z=N`2&Ev>WXvyJ`Unq8gGI$6VT+jzL2)jC?3XB2RGe#kvP+;3icZi2gJ=C!S#J3Pdl zZjKsnUSOY_qjwj0MmG+JZ5J%?Dg|D(z^fJ5=d-jOTVS8Zq90h`Lkj$Z0v}P}sRcf& zz(*JOm;(Q>z&>-e^LAD~ZcMd(r9IHIn7_n=k?lYN(_^$55hpUINf=ds|83%p5zH!ZNwYw;gg;B5=MU4eHj@JJD99z6cRaqtD{}*BtV+)z>!OBh<7l&RTeXuzhqjujj^@ruD77 zH`*qeG0f?FwENrHx@N1Z&4TJB_90oUBZ} z7-c$&@m8VMSKWB4QmYxyw#M_mns~khHJ+Nbb!wY!*9FUMtDko7wQ0LC*tTkmp!&Nn z+8j%YaW;i(GmdSHvn6%nYy~!s+Wd@R0JZHqYd)~%?t?{Gm9435tDm;mZDYP|sP!?Q zvCX#wb@J^9HlLdLytg}F?f9Lo-}BH{-FQ1w??f@4ZH>1(b>ex?H=dfdJ*d-{J;8F@ z>Zje`gGk$bz_wM>)=jO=yhEtvw%v!KeI#|-jsn|OP21?&X4`$ia@*>seLQvAP5|3h z&3&_fZA?wvzMYBikAHvZ7N#E9PL=N?4@WwLd(90b_!b7F4`GrJ5kEM zoD0v~#P8>58MB&WzLYv+z6|V`)!eU_QxB|*>!t2Eub|duKHIoQeo3v5*CVeN*H9Zv z-8R?OSlyz}uc`I1ojy0#SlzNXK7Xt=+sGfJHfEdmsU9wPyUh&M<7n>H#7XV-?S;bM z80KG`bLKVb0TeY$+Gw}0?@){NG0wX+R*%j8eNpS}xSiEE@V>Q=^VBh2o7+GAJK<`{ z)33&A=AE(TuJ4L)W6gxtk9r~a%wYSZ<{ZvKt!5nC+Gb_!bAdgUXN7FLKy5SLlC^DN zZ22tj`un+Cdx|#uv>dhl*S=xR{amh=d@I1S7gnlm#)*BS!e4zLoc~wG-yYz6c;3W+ zr^3E(ZC|DKpHOqhY5OBd+ar+-(~{+?^`e+I3y;4j1b6#OlC#$)|K z9V6|hC)tdJ|4eZ2gZ6I@u;cSQmM>ZJfwjNmSP?Dx<*UMzUp@#v2es{Yt=kW-`LLRM zjnRH!&AV%ULe0Gvq`&8&eZywee>o<`{#{<%JwLZ`vN-gY4{q@zHx1vvZQm^Xh&JDJ zC1%O<^+>o{is!;qxO-98v^zmk9oawp$1T2D|HL^7u9gzNQ`_w}i{EK*wUqds)%M#Y zerLngQlkG1%`v8={h!g!J?ETF!taA`Jk$I=^Kd`>+JP@_oH!4_ZKI#I=y$WY&S}a@ z%=iFCelClgoT<&_9{V%4Z;qHYS5LtYTlkb^w7VzXhC83}|5|_eg6zaG1aRG^qm&2B zPV4r%k2ML0zMj`|zXS4cUmOZ{UwD0S{f>icW*9B|@et#y+g|Sbc{SUgSYxWzy~po( z+IW%HcRS){(dYk9X~y}Vxc5Il@B7`5{X2!)`B1a}azFE{Wsc8*I|rHL^WeC)+W)Nf zwZ9p^-w`Eeyv=$)0?!^u?R9;6lDJ-8*X>&e+`Q_>($*Ke+p`vmbIl%c;4Sx8_3X_zBzV&^|lG z7~6QU<Tj+CT3<>o9M|U^!cd`}$K9PNP-~`|i)5SR2#lAK#Cc z+;`*Q*1j7rx$nlqCt&y8c(}Fi#>2gb`%XOE`-Jbj!>#WvxaX(eL6!E$8{GHavD<&& zeTUm0-+hOBANJjMxc(m%Tz}tv$8PPr?{N35--Cr;hTV7I;ofI__Z@ES`|pzb{yUs4 zQ2G8l-2V9fJKXrb{|kI9eB9o^ZUGTpG$lX z9&Z194<7D(`5rvn+V|k$&WG>9!>xTEUUJ`uhdUp>4-Yq=@595L58sD}Tl+pd-0}NP zJlxuM;^EpaX>i|(mv-Nam)!T_CHLKUxbx}z@shvL;JzO(?Y<)~x$ntK?z{4E>m|89 zmE3pc;XW7n-aOp-^1XSu>*IU#aBJV2habuM4lTIteRm$a?MD>c&jo%r8@sjd&r3e8 z;QITWZR|dmo?LMA`5rxX^Z6b<-1+rAdbsr^1$RAtpI+L1mmY3=-=&B9T35K4!z{QM=!bW z(!(8(@6*HWuixK>J08Eo4cG2>wBdf9@|}9Pq@uv3ouCyVh{8=e|=9H{Pg% ztNTtpcJuq4Y`C@W)x*7?`ffd3f4@HsxAwhy$$hULuD{=dhWk0ecjV!Y&-dfuj_>t? ztNVUDc56T6l-zgZ;pX$Z)^KazlZR{fJJ*u?uDs;FD-XB!U3s|u^<8vQRa zwQZ+5*G08PcUygQE-ps%ImYpsU#_3n=Na>feU>rj`ph8HSX~BQjM}#PY1hwZByG-t z@vi`@89!sX60Sa&e)^r;RbVyS`-~({`(J?7%J#p6tKU_R>l(0{-!o+0u7xiTcg?gX z_jO=x=JNSVo?O?1o4Ia4Q_p(+3asXHWZL~2o_5BS>!;m)b~EKBO1TeyQ|s}$72ez{ zx1p(L?SBjQGlhEk^Ev`xme=uBTtzN~(vzYCiXWP7L{d6k{1%oVE{xZR`AaZF&^!^R>EtlRrW+ zxBalab=p4$wteR5Z(udQ%gWf)%CXrGZI4rD+)sdwZSA<9qJEm461Z(rK-7{dd z%)v`wwX5s-e;KUi*gnDkRj`Ns*7gcT&3=m=U&f)HxUYkaYm5)5J-*=6p}L$~S@ln9 z(o}Cy`&sTy>Q3snsQpa$HZ}jR{>6X1F1!#+!}W{#ZL#b5PGo z@z6edZMROodBMgg^UViWH@=@q9Eks7aCQCt%qtiBc{lrL39xfx z?c6Lyy)?zcxml*RTW2ko1sfy#XgRQ2_K|!^im{9>PM=l)do7}&u;yvA5nLbjza z7OqUj-UQ5W_&$!go3u z;PhMjmT>c&OpJl>t-$)Ir_BJc^O5x%2-Z)17J6o!t-+3CbBgv^su^1PHnr|G(!S`I z{%#AlzmCiC%Jp~ex{uv|)*0`1V8@^9&>*l{u0z_?Y`+Ax?U$l9pWmHq4|Z?*ylBiz z(RP5Vn|~&1x$V8i*xuUqRMn3BN1yC%*JLnU{TKLnuh|K#<~iy;P(B-F4vO)t&9yUi z&f{Hbo3-chZq(nQc-WWSYrA#k;=5pDJ z>$x|Y`c%dx|2|mF@1GN=3tqtnlau)2FkJ_Kwm^_&Ak!8r%C=Nw4?hN10A zvER;}+&0dQ^W^xf)34!R`;~n%0<4yOqD{^AnS19c>pTkVeZE}h(P-+4xi464Bqi#a@A_L&d+ zo^?A4Y#+<@J{qo`=aXZ=YWBx#fZTXLq_#iS&hK&5$5TACA6wh4lkZ31=5^%9XzIp4 zkyc2i~4Mehhsgbwp(Xy&IKDId+9u|TK1CsOp39LEw=4IVqO6Dc_XoY z4mXzl`Wdx6&jlBPeJ)VXbHPPm^{MQ|fyBEQY)o}yoKLOhy7(M+DcG@R9}J|;WpMTM z>2k2`axc6BtdF`eFQHb8|5aeM%H|hF-6_aiNB-v@II^Uw-hz^kvMB` z2iW|{btkwvf4@gl&zS!JR$M~+dqMg zsm*!2omwsZ{4>~omh*NWTs=PbgI(v+=K;8Sd>#ZFyY%@BTs`;Thrq6<_ZM>*L!ZR> zE4bO0htbsI^9Z=vmq*dm zwQ{^q!`0*S_u8lI$1`Z^d3JpkY(IQ=O1wG&r|H*lhkU-{UW&8 zzn9R||mTIc%=Y#+5H{^wxFnpj_e^;3_}mtf~V z{3~$T=4-e<>iMkr4cJ)Poa1+?)r`R{vGv8)={0vQrsD#sPxjCB@N3}ZInaTo9-q$I zr`%(G(9|=YzF^y_XKrQyn@?L}^#kuoJ}=Z6r@sTEp81;ztY-VPn;D#Tj#I9m&lA5R zp4W2gjC&Srjyu=iS>Y$Zy`Pq2oef=EKA+AGb`Lst+LC_`@cFba=U`4W_4v#MHiz#X zGPb$V)H6@>fNiIq@y-iY_x#GWZ9cgDX#G4pIG=SgxAUXfSN)QI0kCuA*puJiDN;|a zZ-LcppLX8{r=4>v*Uz~?;cxYd)wp(W|mIoW-diq9Ht-ycO^4uz4nqn+ti_>;Rux-Ovs(IFB zWw`62u8-&MDq!{O*Hyvplg`?fYw2pW?mbB#_g3Pr4mYm(oddc4j?wYjPwVt+4Y2*p zdansq%X({5E9cPhX1&$|=kso2u8pRinCpPmM$)geTNj>o_EWB(dGa~g@AkF%^B%6X zwhh3J!P<4)h;4e0k(bSVoR`E=2x3qIX3%|dD;q`c}mOyXzGbM5Uf_dtK1r%cJ@=QpLsIB zzB4hV-$M+-mhU091IxAfJ;V-RV;N_Au-wm%{w(s2;A5z5qfMV}snx}Usk3%Fft?>~ z*KQZ;T`3;+Yq#2Now0rgY>cek?qId7oqT7Cv5YNF+wX#Ho3+~mY~ONiS&F=S!qwyR zJ+SL*Tl?SQVEboZ&0(D!dx6c7wb&c1mi5)9X8VlazGi*D56(IzW*3@zV(tT0bIfVi z4Np7!Dc7%DU*Clp)Ab#WE$cfBEZ64xjs$0YM}Xz7uRm))3VZ^!ZM2zdD7AX}vL9G2 z^Ge!k3|w9ReW~SsP9K16EZDz6pssy1wVHSwb@t16u=8#0e%YUTBE`dTPO9zJnY+ng zV`RTf0jp)d$R|*YWo)r+?bm@|`;u4(fsK{DaWL4up{|en=Mb>E-<=!d2VmoTk90m5eq@KPV4K|jx z#6AXWtf_VXe+br3J$*VBY@h7AW0LFV8n_M<&GuPi*TZ~F zZ*>AVdpj{tL{m@9AA!}%bK=MFw6mXb{mOH~_rk{XoHzwr&WWFZ<=Q+aP6OwhI2A1S zobdNaehNOB+BVwEbuzVj*60kdnqxMmd*w{HdVJ0T``woNJw9inspnbn9I)-w)9zfb zvCC)C^Wf_7IUj87@|pGmH1)*(8Q6B}Q<;1F_;av1wI$YtVDqg|^z$OPdfHtKwynDJ zaXPh{_!8=z)0cwXFV>#Zms4Ls@o=54tnJp$2}7u^0vjXe^wnUsoYV5lD8@3jIBl;1 z+tz;kl3Jec3$F$HeW7~3FT4({uKzEnz>X{P@++|OQm*l@;p*|Z5p0}tjc#5Z; z_B+99`3&=WFjLq145O|28RjlDef~hn^X%PV+gP-ZV|{%Wa1U6UdGDZ>n|BP0e=qn? zHP`pgV0|2~*NXeV&PjRixgV~6XdV9nu-ahS45rP4VB@I!FH;Oa`wQ4y+U&<4snzU< z_P>JFcC7lfe)s8Nu$slgG5PZ;wtWaqyZg-FlX?XF8MS)W=~1xp+!y}bjpyEDaD6?D zqmNqJ{0(dy*EVe)hug;dw$Vo|ZJq$z=DecKlW^Olzxt?UtWSe&BlrCKJJ@q5F`of@ zA5_=JzCR0A&%Ne3uyNGw*HhGLiTOO(nBo7ZxjDQKz5w^0qppu@@FG|}{dfs%+)Ef+ z`tdScJ@>&^z}^Sd)3;Z_#?qG9uYrv}&aF&0Eq&(GO> z3v4@eWBU68YQ}JG7Xll@-@i4F*QVD_tC|{#?fY8e@{Uz zIhF+5m++-(p7u+_^-+({GGOC4&-Qa!uzu>fk1hvx9T%S?z!*tY79%ip(9i~p)%wLA-~2DU$bpQ)|+EU-G7KIOB(8g(1nY0tC3 znqY0__4hjD=Ji=%ZE*Q4unt@w$2J3T)&)DZ@>yU#H1#|StPfVpv%m&mT7EU+oqX94xB$!1{VmCpkG;reyUDxNXdD8-3K$W=pVb%4dPC;I>JB^-;@M2ZC)QX9=pU!Cprba~rtN z0_ys>XSW5b=YG2#*f{F;YXG%cVh#cuGkp7+o5N>;9pK(~)%9@=b_A=ZAA`ZhEuRH; zf~#j=?=0uyr=Gs;0ydVm#NHKbtf}NT*KT0_)YGT$fbElgcT95qyq9@j^L}KVG3}1c zG3LJgUAS7F1+=NzKKDxRYvyxr?};t<^u+ugntEdH1y2>0n4@dEHD(DXMrJLxz7SSVH*ZMo7y(o%+*D$Zu<^4LT$G;H{GfB z2OA^L856;3dCriJr5MZD;AJ3tK z!Rpzkhk%Wv?wBW0t0m?Sz{U(ewC3s8VQ}}Wx<2;naIm^Q_Tvb!am#b)NVs~=p{Zcc zA@%g_D6p}#CHB!^V@<8kp<}@Msi#jr1luS3?wI8IxlXQ`b8Ve59Se4h+3&}J)p8DL zQ!Dp@Yi2&rq2s|h7ZURXH1)(h5v*3etN0N-?d+#qKl9|fbbqfPF;Bwg?|NiEoeWp= z`+0w6JTJ$*X$859ruaAs|{P9M$!8zX%<8?2T+ zE&nOSSmqU{?FC@_AO17AvBG~|^Q_~AaMw{?p8@piBCvWsr(O(J^N4mytuZY<+NHJT z{V0By)tb+(gIP)AUyf!>W7tL?=j{q=ZQgI`QgtPGIufY+zSwn8vV; zKF;M6)Y@_$Jqh+aQqS*RJ_Wv?;`6rSvz?m08ROI7a*Th6J4SW=9bL_f2qlU3d$wR*qRM{%_a**D?n@f>!^6tL6UnF4)8Sm$r8(YOb9)aoz)$*R1#9 zYUMTS19;iD58-O%HR~g|dU?(I7_MGkvp#{FLtCy#pMq~b9AItEgMVW{?l{eB4s*qKF0k_$J~!MPne%zT`l#o6 zH7_{VEA44JA6Q%R%FDdwkMDwD`=+md=Roc?ascW5JF~vmR@d&|MNmuIg~083Xn(F< z81A)DT_0okH)++=W>Ik3_>8_7+|LT?`YeU!->y~9^SFP%);Q|+!@s|vmY7R|jhUQF zgYB!n{%r<%Z2rw#+iUahG{|#}TORDSru@ET1vGX2{W}hFf0xp+tqA^r+8oBxZY)3J zuLO23?3-=n`nwM9dDq`MV^|sN7_#20fYowM)23$ooJa2Y^nF!uuFr|N8k%}yt`1i7 z+)2AN;Av++<@%W?*L8oV*qF%G+WaTib^nfqT$|VRb-}r=`!_A*Ue`xsTMv9OwQaO{ zuP1D^K3JQFzn5a14dCkb$9VGecSCUcn{jT0rXHV-!S>O0G=^NCa!ig*?(e9(7S{6o z9{W>p+ipg?T)+B*^|ki;wFUK-6c5{ORoku8w*g>doh?+<-f*qf2v|UL)^~J#2KB3JL)E?ez)fcCz*=Mn1Ub5!u$-5%hyt(JC4A0+3(H6f| zz}oDe@dSwiw03zX6_gS-8f|*&+q@{Y0z3)Y%hj5KsT|p0OsHdh)LY zHcs~U+Hif;z2@1Eb-?QOQJg;dGqKB4JeHxPkIUA0N$TY&_G<;|to8cDDA#_Ig1h(n z7hJzB8hopQ>%UFG_1~`G{tdnzYMy!70B*m^dD#$6J)ikE2CMn}?7G=!wanY5V71KK zW?(hTN^&e4i{YMpiU+{i4&wT9xw_oKR+Y?Pad+d8)wd^tbtd=?58?2T+_I$Xb!9=KkO*QQ>d;vUEjsHlkX_7 zTE=iR*l}eI<@)IFJReirT?28}z@PQpoZ_)5CFl8OHQt!IKgBiJf;ww(95Ko@_))=K zgOdxc-zg1#TEX={qu~0V)!^q8T>lFSuK&*){Gx*Ee_6rxzoOt*fv>K4=JR-XfBecl zaRQop_QZ)`wd@J!L@jIdW3XEG#7SVaa!>pOz8QI3W9LjRcE0q_o;Vfkp3p~|KF+y% z^8FNC<~tp(Hn8^bUUeoo`J8jPv7B%HlkaS>`Sj7Ik87r$eCLA8eCNZ}&Z~XQ_cL(v zxn^=>xnBAw--TfF>7z{_*IYgME(VwRE`_UIQu~KN|eSg6sck!S#Q=!QUvj{{Jet{_i&Udj;43qk`-INrQh@ zaQ(k5xc*-^ILq0ye)@M7T>rie-ml>L&r)#xXDhgWV|UJ)=lr^j{O+T8(git<~{*WKF=Y!u{?kDPrj$X=F>-;KAwZ>$@h0~ zneSP++B3C}^Z7hD`8)^Z#`65rKlxq&n@=BY`aDgoo_sHX%Y3iE)n2ZB%=a2N`8Eks- zJ^9`Tm-#+~t9?-WnD1k7@_7xB8_Vm5{>k?#*nIkE)5mL&dh&e^F7tf>SDUH!G2d6< zjO?cuVHdydHvEq`DOr{ zPakdiw61~R-;K3+rBlW#8Y?Tk0q z$hqNOH@!ycFV`pj^MJi(;*;&8RW#2-lfCBU{( zH|LVnOHs_ZQJqt+Ut%r|b`K}!GH|unmj%1$VqXrfmYmCjZKIx?D}c?pLY-5tUt+EV zc3&mt%3w9y`<%E6*!KP`xAP>|C+$}QyHC=7b+B66uK~8bKmTldxjwe{S#C|RcAxXa zdCuDj+inyO|82ZH=j~GC9jSMv_?-70>O9x2Lk#cl;Tsg(=YWk1uHU8&-oN1bZ&`5t z2NZlp=5*_tJ6`SU!tIymMCNEcH1#|;tPfVpKCsVf@!t@vmgk0zz-mj@F};p#0(UN) zQ^zD1J0AUWUT+5Wyw*pXKF*1H@@)<-^KAiF8(8Gq3Z8tBXhz{6{v`Q8(5o^p?V4^2ILbT6=4 z_NaYO%UpgRtd>361y(EfXgA!kxd-f{Tx|dJ&z>0ycF*XeO&`aoo_r&~YPrsg1gkkF zuT!JIjx&2mu8-}V_tCZ8H4$e`y0HzXcnqOrO@`KZAL?Ng*JK2B)?^GZ$~Bo#aMxr~ z!S$O`@B_gI)jVr57H*z$O~#?AXHCX~)v_k`K`nE%KUghmG7+p+uE}J$V{?w}qg-tN z^v{|c0Cr9E(WZ}MR8PKx!D<=9Az;UqHIVD0zw`Wq+U^>Nvj)C@A4~Drmy$Kuug0UO z$532@anxCZ!--L@!BGWw4SrZ~{f;a63E&fJp8g*JH&3|+N1~}`4W@$CvIh1+Epu`- zSS@RC3|OsPgJa>2%{jG?aKWlJ2*fr2cn?8u1Gzr>JI^Q8 zcGp0hHP|296pDxcE^F3cQjI52Po}sA2T*4XP9a9tz&Ss?;Ob}AJpDTrZk%$CPeW7B z9RC!o<{aB^?{{aw?Td3@4!PL)`e)A00y}5=Xw%0&s%Os52CLo6wZr?#xnQ+(DDgQD ztWWxMK3HuK#e0MEbphD6+MG{$=F@i#hfqBHH*GVY2i4g5JecBq{(#!~bp9?SM&{G^ zT9?;6^LHWKcIEtCgr=T7cQIJa&#At@T@n9F;Od#r%fR-<`7x(lY(9ORPseZt*mrOG zXmgxu8K>{<{2Tlp{yV)H=V3K=oQG2!=aJMI=e5MhIG04fzUCR{)o|ODxF6c_Z9*`OPM~-kOUXEotFhxep5i!9q|P|+B1XpPyRv&~o^jp*w_Q2T zJJHlL&fkO8GS0Q|{{vh-o^_5AHf??>!Zzas%4yh$9yuy<42T? z^T#!IoF`En=TE3J&IgH+ac+$MP|Y*WKf`TTj`Kb=^^Eg=uv*5s5&jRr)ich&fbEN8 zGpAf^K7AdhWB4n0b83CGIZm~V)6ZBxrFfh|$v97~vEw|A;y6#I&N!bSM#i}n`cpN} zI3I=Et{mrMXzCf~-@s}a=a%?C4p+}Op9I?%$7W8s*nIjrPRH;xco4Nd+8n1^#<>%= zvnd{DP%_RlYwS4BqBzcTs58zNh>>ynnewHYXPnQ%ZC8%-IW+Z*^LemZ#<>Ij|A4D! zoG*gyi(@mVTx>pl9j9Y>8SLj?eY81FwT#o}s0%0_=Tb7x^J?rk&!;%fpHXL=ZxSQp z+z0*bnrED^!EINL^K~@!jPsvhwT!b1|2N?38RuJI`{LNlDHoehU&rYf{skURt&cXx zsg`m2ta35M&ccd?C7HGpAf^K7AdhWB3H@v#mbb9H&~wc{#SLDIQl)GR`Y&d>Qps6vz1s zYRBpLJ|{-TY5jG<=Y&t!(c9k9;C%|Nf4_noZ>9#HrQrI{UU2>AZ1A}YuK#=m*METq z|5m~EU%24&TF|3zC=^cGv8NWwLI%P=W6Z& zpBujct7YG?{4K3~)}J2DwRCT|CUUXsp}+gYSe;;>TlLYVk87%)e0{-Xz8T-;KJH2N7z{_&k6P9TLJ7I%6BF!!qsA53GAB3zA{)X-zTpEwvD>y&631g73{g>d1Fqweu=pn z*mX_J)xm19uK{))VqX)imYi#WZKIx?YlF?1@7v`1CFVL{=Q=Uh1*_TK?}pX`+dk*9 zT%WXGAMD)uS?N;zHvp^I-s|s%VB65awf$Gr#vGq`?J>5umQaDCL{vpKkIvjyBX{_PXnYzfy#-QO)&+X`$f zZT4NBzAuISHx!Q>DfZpDys5x9gYEk*)b_(RTN6W{8_Cg+dRw^KHk40@F^Jm3pR?As z9c2fKam4mRU%7hX?g%zt;_d`j8%#0oF4P{z)wVNbH;Qq@#?@D@p19uu+u!ir;pTCU z9M5;*wo}jF3Eu;3J8k(J!Fz%^C0d^)_1DLo>gKe6-{2v34#eeL|CY8MoIWs)HBy(!R1`*lbGYc#?;4t$nBSX&ip$s z<@|dt+)42$&(quC&cEmE9TemIp4w+HwF8LbvqtzqHFqq=oCvp_zb7W21XtUiGMKx* z+GKH2a&AsR(_eey9SF8hUU$tc7n@gq?+wO080>XNA8l!W2-x=N&kx{g_9s4vg6(sz z8;8O5Q;*N#wa@Pu*HrLP6tyEMSJizuy4G!Pe_RW-?0NfnH^rlz^FP2d=XZgf^LwZ> z$Hx-GIZt1XgWJZx)nv@$;qHw^Dca(9V(pi){|K&j0wr^PGFbg2O4|JdZ2Zi}DRBMN zb3OhkSp76g+MNzA+noW|Pdz?o);^iXbKq)cQ!#~&%#-+uzTzyD0_vxfKA8)>JW&k(k~2~9mdH`hLS z?$_rxXzKB~rS{3Qx<0p}spmR+8+bbOBn7Q`?xg4_S$2+yReyGU)R#H-UD_F`nVqQay{@2`{1s} z1Jt?R+)F#JH=cLm2PoMa4}xtI{uj7Dr`LUW2&|8~eUPUQUaKCWcsxkSp7;ycKKzy1 zKDZ|ypi#A8+vIYo2~R1~;x_u&vzR!*rhh26isgwVO*VYxD%zw&i|&5=}im zPt`uhXEG_9^@O3YvQQ^eQ;#kL|U`_F7>xzrOa@wbACL%{?McoVUQPUDo_A}X%+;zb1pL*7y6Kq>;Y1aqr*uwk5%Q!Q@^-)j1`hnF`vIaAv znL~SgX97F!l&rzb=-OWyZj8*!9B|{rK38qG@5Ykn`Zzb(>!W(sV;-H#Ors7Yl&B52!mI^32Ed=zphpJWg>woP#F{{A7WjD)7@_=jj>hjQiWf z)+c-+xUq6BEDYC2-E(14Y7fr^ZHrJ=pcq%2IE#VpNBH7!<7U2=fa{|kpC!Te(dV(H zsF#B4ryifB!DWA!ftPWYh3li9&x^}}jit@`wqI(Axjfjs+KerCpE_2@<@nOhYjFI; zrJw%V?2oqccl}-0=P4fLK7JOSHGB?izJE~XKEFOOvyWZpjT(H@2H(8FcWLnNH25A3 zzE^{fXzXu> z*WdjskN;+1{mZ`hM^o3|{VmVAx&_$tM4NkEzCJVTb#Y6u*GBtf9^2}Zwp)RHsDr&S#!_TZEJtWroUVtuNlt2bM2fb{vdGrlbGA1 zsmEuB+NYea9nsXY)`P*eQ+KU*ruJ~Hwe3V1PBFhYv3CVG^Y4bHZvVZO$m72|SpTvw z-$hf`-)oIL{(FKme;MER(A4$!S|pEcZ*bPy>ykWc{e7@&ZJ*3zTYb{D3#=`<_W@^Y z=923Z|8B5x;y(nOvFR_@C;mgh=8XR^aK@&;Tp!ok`FF0J^Z1VhCw^j%LQ}WB*Ijx1 z_XVf_@!t#l2Y5Ov}m z3{L*!JOoWWK0l~^%DsLlntIN=!@#yv_q;oT+Qais+u@YcDCQR@_Ed1jll(`asXM+S zspauM2HfoL57E^1Kbl$||Kq^=mwi7TO#61OzY(1N#Q!EVb^U)$EsyOt;LP>SV0q^H7O-<|pUh)h zebV+;u(ssB4Vsq*uK%75zIVa(@2a`yfqU(4iu<}RwtK+tN$&xVgFU{e7H{c)gZo|ZAF2I( z@+WG(7O3u}=Ks~7`46G1`>2g|KehY#0ct-pK1e+y^~@ zYuY~pHlDtooATJ61E*h}r}DhBeje;QYuji`F7+~(^O<-rf)mg4TpruY^12_M^YY|+ z1?>BN+h|KJ^)i=hmUyp$%XxYoZf?Kt_qz5^_;l1uQ}orIxZ29N?t#R86I{l93tq;3 z8*W_R(dnx_akZ6k-BY=yzY8{Q`E2lSG>iT=Vl_&2PVDlPBTjG2LHgEYH z_cfZjalB5;b7s)Gr;km5!_sxnb6er_nsn8UuOpU^F+qcmN>J5n`<*0n!0hk7s=y42e`R5 zbE2v1@4ZT%ymNugYaDHfGY`1AHuIvX8^?Q`JpS{8n`^TGn!5hp1LeuPAlSUd(U!IO z7TCG4U;5bAwNWqE#%piR$Az)kSAD&=%41s;Y`?U5pOxDO&;P~1UQ=wNExFXoTwdQ3 zZ%J&)wFFom+tT3V@}4bEu4TY?lgl>Rl1shJ!Y4FtATBkcU!B&)%NEG8lN@b#>stY zO}KvQ8N*s&<7-P_)ylqlA5F}4v6)w2pM&Ii@3S8GUTo^|-vFF7@i|FuTc45i&-!cx z_WOT*w59zf;NUlTsP#`@wgo#@eYB^;Hq^kpxweNm79_rdA=-e9?Tx~TO}U-kjp7k#v)FY0AqeCA8+ z;n=+X`MFG-&t(IM^ESofHA+60y_xmo;@f$Dli&w3C!WV_ z8>rRN{${W-<*wOpz~!3V0$0;7ZEgh{Cv9#6muvQ0xSD=hv)_S@rOkfaM6H$>w}aJ^ z;|_58<+$#In=|YAd$2z0S*JgMjpGsRu39VC^X^(J*Yh4U+v~60_^z3Hc`yE;9lm;( z+I#Uk)c-E zGivF}vtZ-q-&}qUu4eIYue-N1=I7Ckk!$Kdz_w90zI#wD{eBVbJeKqJ5}JDY{W4gs z?Ds2h`>oBmFHozc->-p@_Yz3mNw5Px&N+{F+T#oLTx#vzCd#>SD~24*!tQJb8636z5*i|v`yt&$``&zD`|c5K{#tYF z(H5w+@?N+gn(g)1ZhWtC>aL^fnfX~5oO8%~g*>)J!LFI}>U~3=@h=W;&e0NR>iRE6 zEsy_F;O2ZSji#=@_Z_*psj6l9kNZ@c_aOQDd{P{MZ8`AU)b`hYnalS2IZo}4)4kmM zUBr)R=kXvVpT$1{>;DOL#=atL)r}|4GlYBjGm1wU<5RdXKBrEMRa*UM#}FsR0DQip zczi*zAKq)fq!{CCYM(EBHd&o^<^6o^nrEKYfZNXR6^*+l-2Jf`Mca<`oUH}cW?@T` zrR_)6nq%|mW848~>wwjAtzQ?cX7R}R?fTd!?SA*-{HzCdj?}aM>w}F~-jglY5F#t_HV;=~%tw*%2Yt4MowyCvp zKDI@(z5d#bZQsnPE#uk_td@N;2(D&n?i2Tz?%VSp^RG+M-+5NcJa=I1s{=yQ9AFPkM?VW!$ z+Yi7#39OcD-DI$u#e=RfGwrsnmG+$5Q@~#9)N^hh05+cMmUHw#xV|3tK_9iWIS6c< zfkm5x;kGfqZS+w~n?u01anC2u58$>*fAvwz8XgXI4Kwy5z{W79d-F)J`%gb}%Jp%7 rx!2qy)`>S2Y&_SBsyd4QsO9g8Xj3cquY1k*o^MBk&F8r&_FViwkzrds diff --git a/piet-gpu/shader/gen/path_coarse.dxil b/piet-gpu/shader/gen/path_coarse.dxil deleted file mode 100644 index 2842f0d9e87824de3f74a086d48daa1e2df16112..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7012 zcmeG<3sh5Awl~SmO+s$M4FOCDngkHhqTKLDG|dMh7HUJ`X{b({528+yB2Y#&?Ia;W zRFq)BM*JnpOmXJT8c^Gj4$~$imZ(_cXDl)|tL@fw;Qneb!s+t@YNd zx87On+wJeVC75B0&&TE`T5!fXhHH0(~9m zm7qTYeIp=PK`&9Q#OYujYAZ`tNJ4W0zVHuob;)XcE5PHS1^)nl0FC-1oCu5wU5HKw zod-G$I?+OS=FD_j*V*AT2;oEg1hS-5URsAhkQSmshk<}R&B`J|5C&-L0A8VVeX75kV9AC?-KP3IMcrRr@3 z_DGRZW%~uhPgRT-u-R#gJ4bF#EO;bNcoy!|iPgc#a^4Xg zugUDpu{e2`0%&92)#tspUl>*v{QbFtXCzbp&Byh;CS6i~#xk#Lb+PFtzX`B6^_WfG zu7$^}=RWV*8E6v6C2Tsr%VJaGf%?w${C0fmnSLGD#Zu-KTv0_)IWDcUYA+N?oGG%# z-CU<_&fM7Cu#p1Btlz5Hn*;1^;0uLs!)Y~6TCIq-+0Nb)8f*;>wrFVtLjbw|Ber@O zZJU->7eMY{wE7uNEgMLf;k*M#P8#6Z9ApDIvl6B;$|KX4K^PZ;nwG)Xv|kYuv;m!Q z0Aj+MNRU1CJbbjw^mLWhPa%;+1P^F#`vx*uJ3WLTlem=%yJshmavn;5D*L1ig4lqP zI?oZw=M>pn7>O%Bf|W7PX<;Yt{4p%78=^hu(mvPYe9YaI?JRoE*2RIo3$ym~p<5MT zT?6oqS^#4xMRpaLo0q{eFl%!uz0bULuyA`V<-nFclHA42J-EGXk$zEF=c4aA`^*iJ z!dj-G@J|@bwTcgj(DR5+lu3u>y2>gGo-Amhh!g3DRJR@`CRSK*To9osv+1;Stuj!A z^|$!epcjW=`TLp4J_^U@&MSs1>0K;q8p#dIp-f1C44lk_KWWl+;}M&6o2t0h@Pi`s zBCM;weUEe)7R6V^4{-y@%K`Cm%V?ok1;TYg}wC_^TPM~?EyoBgYRkd{hr;BU5GmS}# zt6RVImCDz22dR=Ax`kE5UF*g6m zh1i#PWe3(>6_oKmZHE>ro9@P~$^3Tpf~?h+HUp*ZhJbYeSLqpyyG$uaq)RZ z&r%>`eN|megZi~iRdux`mftdgpy9QL1wT13GSV&s&OqF0F}Tw-x+&IoGqxgCR)Q*TCHOXdIt*s5@SC$vC~k+yaCVLk6p19ICYw>Q7}YV?pH2vp)4QT+EQ0d z+fJu76f{>?)3#Mp>bEvmi<od=0`sOR zU1ps+yZIoLzV3ssCP`Ny&Yk$*ZKhqTp8EI;T4^y>d>?9T9eGb&>b;%YDpcpo^B#@n zSpkM$KwwhxV4NckpiCX<3bEvc6?sr&_N0IFr|vXDU@0Zf@yf`1Bn&mB12v{wfxM)E z$bCdRGm-}XYS+(yZNL2L;2Q(m`-nO;3&JD;4e+75^d+GEv(}|laIXKd zF4_Od>rw<+Tpea2o&x_|^@&hurv0zhr!cVRe_EgZgL)(d{QuK>lt}lUl=ILqL#+Vu zi9kKd)Bd|!w9z73E{MKWM%QaabaEzq85wAUwBWIp@aKUmBZ+ijx}4X^7zkGKdUV4w z?0U!Mr0y`s=|z>5^_!P$DhFA(#NrI&AK48u@47!9{IuP)QPFf)Kj)*0HHlgC@dqIH z54m=11zFj?qv}Zy2ZmI(Ji7U zdpMIx4DaVWQs=||2E+hA+0^-XhU13;o_yP9x4+-Rc@md$e3<9?VAuN$!13yDuHVX1 zVf(i4{`s^(oY%rJiRc4Ce0rA|WCE`R=pBzRAd42?so)pA8KgDfdOIYdBUh6g@~>%5 zCoLX909n>$p@pS!w0OdCoJ&HKsiBF!hu~H(@|j(TtB1B-78YeQv$Nn|LdYppE`Z|^ zI#{kfp%99W2WiI{B=t@~0fW@S6G3!~`F5Dg-EU56Nrc_iO0M0L+=#>ZlcdrhZK>Ww z2)>@|utXjj*r-Y@6(P3g;}>M7D&!|5A@=Svgm6B4<- zl}a`y2dGmmJRZ0MxrO@>)ATSW_!1n#;KsZK!(!gduzZ0m?u^#KX{11M95<4MmV-P= zHejNBvEZ>-vy*90eT9B6Z3Mpu^M+FKe7sPLmWmc08_Sod%_HWxu(L914Q*FaBqhr0 z99yX)X=FiiSF&u>a2N9pT4d5_`b4jc1(b~O587S_ioGfla<9vzO=lX7$y&;k!;MTx zng9b!;2)%uq$>KDgtV`f(Pk{t1PudZAvbBtp*}YVGo*4xe#r9TCf>_8bkV_No79*O&n9`_$eO!^FhN6Z3!Oj-}1KHSu1r6i`AvbQy z(c>PpQl*J%zyr36Fc#SXsSJoZX>WafDfTSTSL|by5>!R~U=vqlTZV!^V5#?5!-h@sVcKFc>G~k|#B?z~FwB^u6>6 ziP+RirN8m0VbpLbo*>QMPL1$3$Kmn~lAEQ`=!7K6dC6ub0q0D3A{Ux-_8ODbtSOBt zW^DHM;JlIe+1pFV^TqdQ_qEFH#$*#<3O;4><=qQ0jpha7dz||&Rp)U9zN`z?NoGcY zQH7!q>=Biy1ZKtSN`cKS+KIF8L1WD2GD0RDsc&Z>{d84w*L5vzw6B2s!4hQD!?cb$ctEI2nDZ{!Jk1q zdBhmMioUZQYAkCyORr!!%wh2BbbYUE`DmJ6OQeyVo&5#X9y}hQ5y|NExt(xD>Ifv* zKt@9OX=SUkxnK}wZ0j``9CuF{?lqU8!8PPV*lm-$^NJyEZ2DH*Y7&H9e@%00p?m^2Hdu!WDOvofp zX~5N4zOuJ6?uv!+fjnunvl+O4_!RGC;zX}#`KUp$%|GY>cbjn^Fu;>Mv3n3MkS69$ zNOa~_2GdyN1U&PF+=NMo$TJ9+wzV=�eixLelHwBoNF|Ak-I4NR+l#zQP!t{dnhx zV;F(BR7$OdBDNtyU1zvVrg{sCC`NduB7;(K7NTCs`av;=! zd+9GX0a5qTZm84D_-yPvbga^LO{9)V#6a(N$U^3uDyfQ=N)hG)&zK9&qpch@ME}XZ z!RID)iWuP^4qD8QmI^`@=49?Kxf*BMSf5sO;krsX^P;dn+4}&VgO+!G`gGy=ro(Pa zkNFanvS<1TnDsxF| zXIFTnzpFhHB7AAcm9JfAc2ITTx>6?{>jL@oWY=y?BeJu>g8sJ7iwNDn;L@MhY_2 z66+xUjAFed%&eX97L@rBJddC9OZtI)(MEg0CL@v>A^ zPC3=`accQX_zhe+5np!8l7BlWao@y;lK-{tCJPhkoF7CMAvz^> z^Q?|p@4He$uZ%r)bQF$neAyN%dex_uo~Pa+S+OiKa`kBrJ(=Yrh3s~%jZFo1n{jqz z-kxIFtOK;IC@=R9%=*%{bd~3sSwioMiu6E;U@TS1lhG`c6f=}sw*hPKQKc3JfknsF z$gRK(EZSh6dpJn;RrXM@(jMiSg;!K+GZb~EZ7(kwgdnN(xy=cR^2m|PbLXEz4P#&0 z=5FK&nF@@C8y0xAc%{QBXNkAbzihfi(gP9Tv{^`)7Uhb0KVr-Qn zdq+w3-!4nM(d1R3MY;F)38-Hrv|P>JQ<4N0E$j;|@^Di;#p+dxoQ{&5JufbP5W=4= zNjY1rUG<9_Q$NO9 zg;U<5oiVkZYb-RmgW{ty5X}MNP3tsA4MlXB_Y6wS?(^}b!L$+4NEi=YL4P1O0^41H zPRQ}X9$=ovXLEMN0NcggcDE2P@pgcK=#txhTMU|I3i}@Xu}}oro@BWI{5ulu-vDH< BbRPf! diff --git a/piet-gpu/shader/gen/path_coarse.hlsl b/piet-gpu/shader/gen/path_coarse.hlsl deleted file mode 100644 index 106fdfc..0000000 --- a/piet-gpu/shader/gen/path_coarse.hlsl +++ /dev/null @@ -1,673 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct PathSegTag -{ - uint tag; - uint flags; -}; - -struct TileRef -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 _vector; - float y_edge; - TileSegRef next; -}; - -struct SubdivResult -{ - float val; - float a0; - float a2; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u); - -static const PathSegTag _722 = { 0u, 0u }; - -RWByteAddressBuffer _143 : register(u0, space0); -ByteAddressBuffer _711 : register(t1, space0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -static bool mem_ok; - -bool check_deps(uint dep_stage) -{ - uint _149; - _143.InterlockedOr(4, 0u, _149); - return (_149 & dep_stage) == 0u; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _143.Load(offset * 4 + 12); - return v; -} - -PathSegTag PathSeg_tag(Alloc a, PathSegRef ref) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1); - PathSegTag _362 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _362; -} - -PathCubic PathCubic_read(Alloc a, PathCubicRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21); - Alloc param_22 = a; - uint param_23 = ix + 11u; - uint raw11 = read_mem(param_22, param_23); - PathCubic s; - s.p0 = float2(asfloat(raw0), asfloat(raw1)); - s.p1 = float2(asfloat(raw2), asfloat(raw3)); - s.p2 = float2(asfloat(raw4), asfloat(raw5)); - s.p3 = float2(asfloat(raw6), asfloat(raw7)); - s.path_ix = raw8; - s.trans_ix = raw9; - s.stroke = float2(asfloat(raw10), asfloat(raw11)); - return s; -} - -PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref) -{ - PathCubicRef _368 = { ref.offset + 4u }; - Alloc param = a; - PathCubicRef param_1 = _368; - return PathCubic_read(param, param_1); -} - -float2 eval_cubic(float2 p0, float2 p1, float2 p2, float2 p3, float t) -{ - float mt = 1.0f - t; - return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0f)) + (((p2 * (mt * 3.0f)) + (p3 * t)) * t)) * t); -} - -float approx_parabola_integral(float x) -{ - return x * rsqrt(sqrt(0.3300000131130218505859375f + (0.201511204242706298828125f + ((0.25f * x) * x)))); -} - -SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol) -{ - float2 d01 = p1 - p0; - float2 d12 = p2 - p1; - float2 dd = d01 - d12; - float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x); - float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross; - float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross; - float scale = abs(_cross / (length(dd) * (x2 - x0))); - float param = x0; - float a0 = approx_parabola_integral(param); - float param_1 = x2; - float a2 = approx_parabola_integral(param_1); - float val = 0.0f; - if (scale < 1000000000.0f) - { - float da = abs(a2 - a0); - float sqrt_scale = sqrt(scale); - if (sign(x0) == sign(x2)) - { - val = da * sqrt_scale; - } - else - { - float xmin = sqrt_tol / sqrt_scale; - float param_2 = xmin; - val = (sqrt_tol * da) / approx_parabola_integral(param_2); - } - } - SubdivResult _690 = { val, a0, a2 }; - return _690; -} - -uint fill_mode_from_flags(uint flags) -{ - return flags & 1u; -} - -Path Path_read(Alloc a, PathRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _422 = { raw2 }; - s.tiles = _422; - return s; -} - -Alloc new_alloc(uint offset, uint size, bool mem_ok_1) -{ - Alloc a; - a.offset = offset; - return a; -} - -float approx_parabola_inv_integral(float x) -{ - return x * sqrt(0.61000001430511474609375f + (0.1520999968051910400390625f + ((0.25f * x) * x))); -} - -float2 eval_quad(float2 p0, float2 p1, float2 p2, float t) -{ - float mt = 1.0f - t; - return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t); -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _158; - _143.InterlockedAdd(0, size, _158); - uint offset = _158; - if ((offset + size) > mem_size) - { - uint _168; - _143.InterlockedOr(4, stage, _168); - offset = 0u; - } - return offset; -} - -TileRef Tile_index(TileRef ref, uint index) -{ - TileRef _380 = { ref.offset + (index * 8u) }; - return _380; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _143.Store(offset * 4 + 12, val); -} - -void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.origin.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.origin.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s._vector.x); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s._vector.y); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.y_edge); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.next.offset; - write_mem(param_15, param_16, param_17); -} - -void comp_main() -{ - mem_ok = true; - uint param = 7u; - bool _694 = check_deps(param); - if (!_694) - { - return; - } - uint element_ix = gl_GlobalInvocationID.x; - PathSegRef _719 = { _711.Load(32) + (element_ix * 52u) }; - PathSegRef ref = _719; - PathSegTag tag = _722; - if (element_ix < _711.Load(8)) - { - Alloc _732; - _732.offset = _711.Load(32); - Alloc param_1; - param_1.offset = _732.offset; - PathSegRef param_2 = ref; - tag = PathSeg_tag(param_1, param_2); - } - switch (tag.tag) - { - case 1u: - { - Alloc _745; - _745.offset = _711.Load(32); - Alloc param_3; - param_3.offset = _745.offset; - PathSegRef param_4 = ref; - PathCubic cubic = PathSeg_Cubic_read(param_3, param_4); - float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3; - float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); - uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u); - n_quads = min(n_quads, 16u); - float val = 0.0f; - float2 qp0 = cubic.p0; - float _step = 1.0f / float(n_quads); - SubdivResult keep_params[16]; - for (uint i = 0u; i < n_quads; i++) - { - float t = float(i + 1u) * _step; - float2 param_5 = cubic.p0; - float2 param_6 = cubic.p1; - float2 param_7 = cubic.p2; - float2 param_8 = cubic.p3; - float param_9 = t; - float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9); - float2 param_10 = cubic.p0; - float2 param_11 = cubic.p1; - float2 param_12 = cubic.p2; - float2 param_13 = cubic.p3; - float param_14 = t - (0.5f * _step); - float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14); - qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f); - float2 param_15 = qp0; - float2 param_16 = qp1; - float2 param_17 = qp2; - float param_18 = 0.4743416607379913330078125f; - SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18); - keep_params[i] = params; - val += params.val; - qp0 = qp2; - } - uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u); - uint param_19 = tag.flags; - bool is_stroke = fill_mode_from_flags(param_19) == 1u; - uint path_ix = cubic.path_ix; - PathRef _901 = { _711.Load(20) + (path_ix * 12u) }; - Alloc _904; - _904.offset = _711.Load(20); - Alloc param_20; - param_20.offset = _904.offset; - PathRef param_21 = _901; - Path path = Path_read(param_20, param_21); - uint param_22 = path.tiles.offset; - uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_24 = true; - Alloc path_alloc = new_alloc(param_22, param_23, param_24); - int4 bbox = int4(path.bbox); - float2 p0 = cubic.p0; - qp0 = cubic.p0; - float v_step = val / float(n); - int n_out = 1; - float val_sum = 0.0f; - float2 p1; - float _1143; - TileSeg tile_seg; - for (uint i_1 = 0u; i_1 < n_quads; i_1++) - { - float t_1 = float(i_1 + 1u) * _step; - float2 param_25 = cubic.p0; - float2 param_26 = cubic.p1; - float2 param_27 = cubic.p2; - float2 param_28 = cubic.p3; - float param_29 = t_1; - float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29); - float2 param_30 = cubic.p0; - float2 param_31 = cubic.p1; - float2 param_32 = cubic.p2; - float2 param_33 = cubic.p3; - float param_34 = t_1 - (0.5f * _step); - float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34); - qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f); - SubdivResult params_1 = keep_params[i_1]; - float param_35 = params_1.a0; - float u0 = approx_parabola_inv_integral(param_35); - float param_36 = params_1.a2; - float u2 = approx_parabola_inv_integral(param_36); - float uscale = 1.0f / (u2 - u0); - float target = float(n_out) * v_step; - for (;;) - { - bool _1036 = uint(n_out) == n; - bool _1046; - if (!_1036) - { - _1046 = target < (val_sum + params_1.val); - } - else - { - _1046 = _1036; - } - if (_1046) - { - if (uint(n_out) == n) - { - p1 = cubic.p3; - } - else - { - float u = (target - val_sum) / params_1.val; - float a = lerp(params_1.a0, params_1.a2, u); - float param_37 = a; - float au = approx_parabola_inv_integral(param_37); - float t_2 = (au - u0) * uscale; - float2 param_38 = qp0; - float2 param_39 = qp1_1; - float2 param_40 = qp2_1; - float param_41 = t_2; - p1 = eval_quad(param_38, param_39, param_40, param_41); - } - float xmin = min(p0.x, p1.x) - cubic.stroke.x; - float xmax = max(p0.x, p1.x) + cubic.stroke.x; - float ymin = min(p0.y, p1.y) - cubic.stroke.y; - float ymax = max(p0.y, p1.y) + cubic.stroke.y; - float dx = p1.x - p0.x; - float dy = p1.y - p0.y; - if (abs(dy) < 9.999999717180685365747194737196e-10f) - { - _1143 = 1000000000.0f; - } - else - { - _1143 = dx / dy; - } - float invslope = _1143; - float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f; - float b = invslope; - float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f; - int x0 = int(floor(xmin * 0.0625f)); - int x1 = int(floor(xmax * 0.0625f) + 1.0f); - int y0 = int(floor(ymin * 0.0625f)); - int y1 = int(floor(ymax * 0.0625f) + 1.0f); - x0 = clamp(x0, bbox.x, bbox.z); - y0 = clamp(y0, bbox.y, bbox.w); - x1 = clamp(x1, bbox.x, bbox.z); - y1 = clamp(y1, bbox.y, bbox.w); - float xc = a_1 + (b * float(y0)); - int stride = bbox.z - bbox.x; - int base = ((y0 - bbox.y) * stride) - bbox.x; - uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); - uint malloc_size = n_tile_alloc * 24u; - uint param_42 = malloc_size; - uint param_43 = _711.Load(0); - uint param_44 = 4u; - uint _1265 = malloc_stage(param_42, param_43, param_44); - uint tile_offset = _1265; - if (tile_offset == 0u) - { - mem_ok = false; - } - uint param_45 = tile_offset; - uint param_46 = malloc_size; - bool param_47 = true; - Alloc tile_alloc = new_alloc(param_45, param_46, param_47); - int xray = int(floor(p0.x * 0.0625f)); - int last_xray = int(floor(p1.x * 0.0625f)); - if (p0.y > p1.y) - { - int tmp = xray; - xray = last_xray; - last_xray = tmp; - } - for (int y = y0; y < y1; y++) - { - float tile_y0 = float(y * 16); - int xbackdrop = max((xray + 1), bbox.x); - bool _1322 = !is_stroke; - bool _1332; - if (_1322) - { - _1332 = min(p0.y, p1.y) < tile_y0; - } - else - { - _1332 = _1322; - } - bool _1339; - if (_1332) - { - _1339 = xbackdrop < bbox.z; - } - else - { - _1339 = _1332; - } - if (_1339) - { - int backdrop = (p1.y < p0.y) ? 1 : (-1); - TileRef param_48 = path.tiles; - uint param_49 = uint(base + xbackdrop); - TileRef tile_ref = Tile_index(param_48, param_49); - uint tile_el = tile_ref.offset >> uint(2); - uint _1369; - _143.InterlockedAdd((tile_el + 1u) * 4 + 12, uint(backdrop), _1369); - } - int next_xray = last_xray; - if (y < (y1 - 1)) - { - float tile_y1 = float((y + 1) * 16); - float x_edge = lerp(p0.x, p1.x, (tile_y1 - p0.y) / dy); - next_xray = int(floor(x_edge * 0.0625f)); - } - int min_xray = min(xray, next_xray); - int max_xray = max(xray, next_xray); - int xx0 = min(int(floor(xc - c)), min_xray); - int xx1 = max(int(ceil(xc + c)), (max_xray + 1)); - xx0 = clamp(xx0, x0, x1); - xx1 = clamp(xx1, x0, x1); - for (int x = xx0; x < xx1; x++) - { - float tile_x0 = float(x * 16); - TileRef _1449 = { path.tiles.offset }; - TileRef param_50 = _1449; - uint param_51 = uint(base + x); - TileRef tile_ref_1 = Tile_index(param_50, param_51); - uint tile_el_1 = tile_ref_1.offset >> uint(2); - uint old = 0u; - uint _1465; - _143.InterlockedExchange(tile_el_1 * 4 + 12, tile_offset, _1465); - old = _1465; - tile_seg.origin = p0; - tile_seg._vector = p1 - p0; - float y_edge = 0.0f; - if (!is_stroke) - { - y_edge = lerp(p0.y, p1.y, (tile_x0 - p0.x) / dx); - if (min(p0.x, p1.x) < tile_x0) - { - float2 p = float2(tile_x0, y_edge); - if (p0.x > p1.x) - { - tile_seg._vector = p - p0; - } - else - { - tile_seg.origin = p; - tile_seg._vector = p1 - p; - } - if (tile_seg._vector.x == 0.0f) - { - tile_seg._vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10f; - } - } - if ((x <= min_xray) || (max_xray < x)) - { - y_edge = 1000000000.0f; - } - } - tile_seg.y_edge = y_edge; - tile_seg.next.offset = old; - if (mem_ok) - { - TileSegRef _1550 = { tile_offset }; - Alloc param_52 = tile_alloc; - TileSegRef param_53 = _1550; - TileSeg param_54 = tile_seg; - TileSeg_write(param_52, param_53, param_54); - } - tile_offset += 24u; - } - xc += b; - base += stride; - xray = next_xray; - } - n_out++; - target += v_step; - p0 = p1; - continue; - } - else - { - break; - } - } - val_sum += params_1.val; - qp0 = qp2_1; - } - break; - } - } -} - -[numthreads(32, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/path_coarse.msl b/piet-gpu/shader/gen/path_coarse.msl deleted file mode 100644 index 4f59b3f..0000000 --- a/piet-gpu/shader/gen/path_coarse.msl +++ /dev/null @@ -1,718 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct PathSegTag -{ - uint tag; - uint flags; -}; - -struct TileRef -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct TileSegRef -{ - uint offset; -}; - -struct TileSeg -{ - float2 origin; - float2 vector; - float y_edge; - TileSegRef next; -}; - -struct SubdivResult -{ - float val; - float a0; - float a2; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_143) -{ - uint _149 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, 0u, memory_order_relaxed); - return (_149 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_143) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_143.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_143); - return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; -} - -static inline __attribute__((always_inline)) -PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_143) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_143); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_143); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_143); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_143); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_143); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_143); - Alloc param_12 = a; - uint param_13 = ix + 6u; - uint raw6 = read_mem(param_12, param_13, v_143); - Alloc param_14 = a; - uint param_15 = ix + 7u; - uint raw7 = read_mem(param_14, param_15, v_143); - Alloc param_16 = a; - uint param_17 = ix + 8u; - uint raw8 = read_mem(param_16, param_17, v_143); - Alloc param_18 = a; - uint param_19 = ix + 9u; - uint raw9 = read_mem(param_18, param_19, v_143); - Alloc param_20 = a; - uint param_21 = ix + 10u; - uint raw10 = read_mem(param_20, param_21, v_143); - Alloc param_22 = a; - uint param_23 = ix + 11u; - uint raw11 = read_mem(param_22, param_23, v_143); - PathCubic s; - s.p0 = float2(as_type(raw0), as_type(raw1)); - s.p1 = float2(as_type(raw2), as_type(raw3)); - s.p2 = float2(as_type(raw4), as_type(raw5)); - s.p3 = float2(as_type(raw6), as_type(raw7)); - s.path_ix = raw8; - s.trans_ix = raw9; - s.stroke = float2(as_type(raw10), as_type(raw11)); - return s; -} - -static inline __attribute__((always_inline)) -PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143) -{ - Alloc param = a; - PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u }; - return PathCubic_read(param, param_1, v_143); -} - -static inline __attribute__((always_inline)) -float2 eval_cubic(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float2& p3, thread const float& t) -{ - float mt = 1.0 - t; - return (p0 * ((mt * mt) * mt)) + (((p1 * ((mt * mt) * 3.0)) + (((p2 * (mt * 3.0)) + (p3 * t)) * t)) * t); -} - -static inline __attribute__((always_inline)) -float approx_parabola_integral(thread const float& x) -{ - return x * rsqrt(sqrt(0.3300000131130218505859375 + (0.201511204242706298828125 + ((0.25 * x) * x)))); -} - -static inline __attribute__((always_inline)) -SubdivResult estimate_subdiv(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& sqrt_tol) -{ - float2 d01 = p1 - p0; - float2 d12 = p2 - p1; - float2 dd = d01 - d12; - float _cross = ((p2.x - p0.x) * dd.y) - ((p2.y - p0.y) * dd.x); - float x0 = ((d01.x * dd.x) + (d01.y * dd.y)) / _cross; - float x2 = ((d12.x * dd.x) + (d12.y * dd.y)) / _cross; - float scale = abs(_cross / (length(dd) * (x2 - x0))); - float param = x0; - float a0 = approx_parabola_integral(param); - float param_1 = x2; - float a2 = approx_parabola_integral(param_1); - float val = 0.0; - if (scale < 1000000000.0) - { - float da = abs(a2 - a0); - float sqrt_scale = sqrt(scale); - if (sign(x0) == sign(x2)) - { - val = da * sqrt_scale; - } - else - { - float xmin = sqrt_tol / sqrt_scale; - float param_2 = xmin; - val = (sqrt_tol * da) / approx_parabola_integral(param_2); - } - } - return SubdivResult{ val, a0, a2 }; -} - -static inline __attribute__((always_inline)) -uint fill_mode_from_flags(thread const uint& flags) -{ - return flags & 1u; -} - -static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_143) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_143); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_143); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_143); - Path s; - s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - s.tiles = TileRef{ raw2 }; - return s; -} - -static inline __attribute__((always_inline)) -Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) -{ - Alloc a; - a.offset = offset; - return a; -} - -static inline __attribute__((always_inline)) -float approx_parabola_inv_integral(thread const float& x) -{ - return x * sqrt(0.61000001430511474609375 + (0.1520999968051910400390625 + ((0.25 * x) * x))); -} - -static inline __attribute__((always_inline)) -float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const float2& p2, thread const float& t) -{ - float mt = 1.0 - t; - return (p0 * (mt * mt)) + (((p1 * (mt * 2.0)) + (p2 * t)) * t); -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_143) -{ - uint _158 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.mem_offset, size, memory_order_relaxed); - uint offset = _158; - if ((offset + size) > mem_size) - { - uint _168 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -TileRef Tile_index(thread const TileRef& ref, thread const uint& index) -{ - return TileRef{ ref.offset + (index * 8u) }; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_143) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_143.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_143) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.origin.x); - write_mem(param, param_1, param_2, v_143); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.origin.y); - write_mem(param_3, param_4, param_5, v_143); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.vector.x); - write_mem(param_6, param_7, param_8, v_143); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.vector.y); - write_mem(param_9, param_10, param_11, v_143); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.y_edge); - write_mem(param_12, param_13, param_14, v_143); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = s.next.offset; - write_mem(param_15, param_16, param_17, v_143); -} - -kernel void main0(device Memory& v_143 [[buffer(0)]], const device ConfigBuf& _711 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - bool mem_ok = true; - uint param = 7u; - bool _694 = check_deps(param, v_143); - if (!_694) - { - return; - } - uint element_ix = gl_GlobalInvocationID.x; - PathSegRef ref = PathSegRef{ _711.conf.pathseg_alloc.offset + (element_ix * 52u) }; - PathSegTag tag = PathSegTag{ 0u, 0u }; - if (element_ix < _711.conf.n_pathseg) - { - Alloc param_1; - param_1.offset = _711.conf.pathseg_alloc.offset; - PathSegRef param_2 = ref; - tag = PathSeg_tag(param_1, param_2, v_143); - } - switch (tag.tag) - { - case 1u: - { - Alloc param_3; - param_3.offset = _711.conf.pathseg_alloc.offset; - PathSegRef param_4 = ref; - PathCubic cubic = PathSeg_Cubic_read(param_3, param_4, v_143); - float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3; - float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); - uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u); - n_quads = min(n_quads, 16u); - float val = 0.0; - float2 qp0 = cubic.p0; - float _step = 1.0 / float(n_quads); - spvUnsafeArray keep_params; - for (uint i = 0u; i < n_quads; i++) - { - float t = float(i + 1u) * _step; - float2 param_5 = cubic.p0; - float2 param_6 = cubic.p1; - float2 param_7 = cubic.p2; - float2 param_8 = cubic.p3; - float param_9 = t; - float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9); - float2 param_10 = cubic.p0; - float2 param_11 = cubic.p1; - float2 param_12 = cubic.p2; - float2 param_13 = cubic.p3; - float param_14 = t - (0.5 * _step); - float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14); - qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5); - float2 param_15 = qp0; - float2 param_16 = qp1; - float2 param_17 = qp2; - float param_18 = 0.4743416607379913330078125; - SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18); - keep_params[i] = params; - val += params.val; - qp0 = qp2; - } - uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u); - uint param_19 = tag.flags; - bool is_stroke = fill_mode_from_flags(param_19) == 1u; - uint path_ix = cubic.path_ix; - Alloc param_20; - param_20.offset = _711.conf.tile_alloc.offset; - PathRef param_21 = PathRef{ _711.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_20, param_21, v_143); - uint param_22 = path.tiles.offset; - uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u; - bool param_24 = true; - Alloc path_alloc = new_alloc(param_22, param_23, param_24); - int4 bbox = int4(path.bbox); - float2 p0 = cubic.p0; - qp0 = cubic.p0; - float v_step = val / float(n); - int n_out = 1; - float val_sum = 0.0; - float2 p1; - float _1143; - TileSeg tile_seg; - for (uint i_1 = 0u; i_1 < n_quads; i_1++) - { - float t_1 = float(i_1 + 1u) * _step; - float2 param_25 = cubic.p0; - float2 param_26 = cubic.p1; - float2 param_27 = cubic.p2; - float2 param_28 = cubic.p3; - float param_29 = t_1; - float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29); - float2 param_30 = cubic.p0; - float2 param_31 = cubic.p1; - float2 param_32 = cubic.p2; - float2 param_33 = cubic.p3; - float param_34 = t_1 - (0.5 * _step); - float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34); - qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5); - SubdivResult params_1 = keep_params[i_1]; - float param_35 = params_1.a0; - float u0 = approx_parabola_inv_integral(param_35); - float param_36 = params_1.a2; - float u2 = approx_parabola_inv_integral(param_36); - float uscale = 1.0 / (u2 - u0); - float target = float(n_out) * v_step; - for (;;) - { - bool _1036 = uint(n_out) == n; - bool _1046; - if (!_1036) - { - _1046 = target < (val_sum + params_1.val); - } - else - { - _1046 = _1036; - } - if (_1046) - { - if (uint(n_out) == n) - { - p1 = cubic.p3; - } - else - { - float u = (target - val_sum) / params_1.val; - float a = mix(params_1.a0, params_1.a2, u); - float param_37 = a; - float au = approx_parabola_inv_integral(param_37); - float t_2 = (au - u0) * uscale; - float2 param_38 = qp0; - float2 param_39 = qp1_1; - float2 param_40 = qp2_1; - float param_41 = t_2; - p1 = eval_quad(param_38, param_39, param_40, param_41); - } - float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x; - float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x; - float ymin = fast::min(p0.y, p1.y) - cubic.stroke.y; - float ymax = fast::max(p0.y, p1.y) + cubic.stroke.y; - float dx = p1.x - p0.x; - float dy = p1.y - p0.y; - if (abs(dy) < 9.999999717180685365747194737196e-10) - { - _1143 = 1000000000.0; - } - else - { - _1143 = dx / dy; - } - float invslope = _1143; - float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625; - float b = invslope; - float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625; - int x0 = int(floor(xmin * 0.0625)); - int x1 = int(floor(xmax * 0.0625) + 1.0); - int y0 = int(floor(ymin * 0.0625)); - int y1 = int(floor(ymax * 0.0625) + 1.0); - x0 = clamp(x0, bbox.x, bbox.z); - y0 = clamp(y0, bbox.y, bbox.w); - x1 = clamp(x1, bbox.x, bbox.z); - y1 = clamp(y1, bbox.y, bbox.w); - float xc = a_1 + (b * float(y0)); - int stride = bbox.z - bbox.x; - int base = ((y0 - bbox.y) * stride) - bbox.x; - uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); - uint malloc_size = n_tile_alloc * 24u; - uint param_42 = malloc_size; - uint param_43 = _711.conf.mem_size; - uint param_44 = 4u; - uint _1265 = malloc_stage(param_42, param_43, param_44, v_143); - uint tile_offset = _1265; - if (tile_offset == 0u) - { - mem_ok = false; - } - uint param_45 = tile_offset; - uint param_46 = malloc_size; - bool param_47 = true; - Alloc tile_alloc = new_alloc(param_45, param_46, param_47); - int xray = int(floor(p0.x * 0.0625)); - int last_xray = int(floor(p1.x * 0.0625)); - if (p0.y > p1.y) - { - int tmp = xray; - xray = last_xray; - last_xray = tmp; - } - for (int y = y0; y < y1; y++) - { - float tile_y0 = float(y * 16); - int xbackdrop = max((xray + 1), bbox.x); - bool _1322 = !is_stroke; - bool _1332; - if (_1322) - { - _1332 = fast::min(p0.y, p1.y) < tile_y0; - } - else - { - _1332 = _1322; - } - bool _1339; - if (_1332) - { - _1339 = xbackdrop < bbox.z; - } - else - { - _1339 = _1332; - } - if (_1339) - { - int backdrop = (p1.y < p0.y) ? 1 : (-1); - TileRef param_48 = path.tiles; - uint param_49 = uint(base + xbackdrop); - TileRef tile_ref = Tile_index(param_48, param_49); - uint tile_el = tile_ref.offset >> uint(2); - uint _1369 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed); - } - int next_xray = last_xray; - if (y < (y1 - 1)) - { - float tile_y1 = float((y + 1) * 16); - float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy); - next_xray = int(floor(x_edge * 0.0625)); - } - int min_xray = min(xray, next_xray); - int max_xray = max(xray, next_xray); - int xx0 = min(int(floor(xc - c)), min_xray); - int xx1 = max(int(ceil(xc + c)), (max_xray + 1)); - xx0 = clamp(xx0, x0, x1); - xx1 = clamp(xx1, x0, x1); - for (int x = xx0; x < xx1; x++) - { - float tile_x0 = float(x * 16); - TileRef param_50 = TileRef{ path.tiles.offset }; - uint param_51 = uint(base + x); - TileRef tile_ref_1 = Tile_index(param_50, param_51); - uint tile_el_1 = tile_ref_1.offset >> uint(2); - uint old = 0u; - uint _1465 = atomic_exchange_explicit((device atomic_uint*)&v_143.memory[tile_el_1], tile_offset, memory_order_relaxed); - old = _1465; - tile_seg.origin = p0; - tile_seg.vector = p1 - p0; - float y_edge = 0.0; - if (!is_stroke) - { - y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx); - if (fast::min(p0.x, p1.x) < tile_x0) - { - float2 p = float2(tile_x0, y_edge); - if (p0.x > p1.x) - { - tile_seg.vector = p - p0; - } - else - { - tile_seg.origin = p; - tile_seg.vector = p1 - p; - } - if (tile_seg.vector.x == 0.0) - { - tile_seg.vector.x = sign(p1.x - p0.x) * 9.999999717180685365747194737196e-10; - } - } - if ((x <= min_xray) || (max_xray < x)) - { - y_edge = 1000000000.0; - } - } - tile_seg.y_edge = y_edge; - tile_seg.next.offset = old; - if (mem_ok) - { - Alloc param_52 = tile_alloc; - TileSegRef param_53 = TileSegRef{ tile_offset }; - TileSeg param_54 = tile_seg; - TileSeg_write(param_52, param_53, param_54, v_143); - } - tile_offset += 24u; - } - xc += b; - base += stride; - xray = next_xray; - } - n_out++; - target += v_step; - p0 = p1; - continue; - } - else - { - break; - } - } - val_sum += params_1.val; - qp0 = qp2_1; - } - break; - } - } -} - diff --git a/piet-gpu/shader/gen/path_coarse.spv b/piet-gpu/shader/gen/path_coarse.spv deleted file mode 100644 index bd32fc2d2e26c9f046ac6e5b97e8cb7ee3d81463..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 39708 zcmbWA2b^9-`L+-2CiLEW=%GsJi1ZSA?*hvv*(3{_-LTmdia;mkkk=zg0{8=A1`mRoJ6!{Sv(%TJ#r@EJkfNtx2! z+ppoS`KxM4bhV`@qbT}XI`|*v>Rw1Ey^h_lYwUpsj@@sMuJKc+c1;^PrLU)}ueZOa zYeMf}&-kI<89n+fSorn!kMEm4p>!C5N7=u8VfxsmuWw*HwHeC&3{0Fjt!Jo0S%BQD zs=sGuSGRtnr;pj5|BoB9J+{tj(P6%w)nf2zy{D3mZ9AC*#@jQcYv3gL$Z8q*_{lxv zPwJY`Gj*E5)JIfHqoXjg?nyQt!B|$ZZL8C^Tz6f!>8zH=KCiD4)e8F4*F3GWT2Y_5 zF&D1ZfDa8!A3wPZ&(YI{2B(i7+N3oBn@k_m8d*E<)$nU|P3<^Vr=B+_i&X2vXAbrb z_581KT4}c0uGNUO_OMtT)jIGQ-F^5jTx|#*?CGBHzlOzbKBKA2{5F8k%Wt7-Q~1Q* zzP_$01FV6Gg9B5#CiZnts%N|%V-tN8sl7HX+Jm}>ChsgMyfcgWN&+WqM7`kYD*UA?o2F|rk71lrJG zcmFg+KCV-Jy$!j711GUyMsNph3!j^Rm$UdkFK~Uu=ejY>)`h1jF*>TPsk>{u4fPKrxv@K|vEajc`+A%V=R)ph-drqF z?FZMVtG9nb&#VloZZ+?AYxT1azU{m_t9`+(Ce*I=1E`&w^daXC>I2~PVck7->fpevuBqLF-Qx!Ox>?dgJ(C8z`$o4@Z^d$) zF18W85zKtwHZ$J0-Hf^IMpmbQ51l@4Lhp=2dZtb9Yh6$GQguH*C&FDD-Su9p-F4C3 z8q31fnc$vjL%mbFc|=WX1spaxrtLUS!!PrFy11T~kBD2bMpS2jr=2`F)HO8V)X}c9 zI-7dlbG@TF7v8@9oz*4a{d=Yi43d<4xZF?6zkaWX?ikd21_xc@crVcMcP!)jdip1H z<*sS3bz`~KY14}D7(NE~V_u)7P7C?Xa4(wIS$<1{f3LxRFpPIpKZ1LG?B_vhpFqZW z7(CcLli6?cN2tqw{@CD;!SQEy+x|~B>`yiLU*Pz21={}4HSEtf_zQ6Sx1eg}_m77C znd-1grl{);#4 zOE&n@ZU1fKzkI{KVuP>T_TMi4t2gYU8hov`{}?Z@*L%H&eRP9wSo?c{_20B%-)tD~ zsJ4K&_r;Obv0$G0UEQqqdG7?B)d|DebXMaU{Nx6oHjH;vXTq~y&jE8i=C0Q{!+bld z3mW{ww*TDqdP&3nu?D}a?LT+DUeT~$IgEEypN6-`-%;HJPM)_9^Xsg>)!=tF_+1Tt zcZ1*4;6H8fpEvje4gOGrKiuHIYw*V#{K*FYQ-eR<;LkSrbHjK?^#XhfPX+g*`Pd|b zcLYz^Y)nhvRXk?%7T#0LL)A684|x3Gz_iT!vG7@}2xFZn=IZG;3p{Oncc0H+ZD+%M z&S*OaJnug5sOD&^+nwLAT?lV~uj!~R15fBS#)#@laO-(ecVgV@(Pm9)e=nBb1fSPW zXLZLgKBD>_yr-{cN>Be#>sx`}7yUjxsjq8n-@v%;zPK*9cd-r-A_hN6q zmvmP5fm^Ra*nEeQ`zfCtKSJAWpnqa->pgyCCHLd=D&H}Ej;V85s{XDzBPKYyP5OCl z7@9n-$M+VWZ~FTTo7p?TFFO5QL+uyR*gCOI?&+O0nW4p)cq6ds-u^N^uOGh)jl(7tsVSlN?{|V>4 zuq*Fh@3eWVuLZe*`deQlrnPcf7M{IkIq=E6)SA1R%Z}20-*>jA4 zba-oAoz;gM{I~}18pcOdqxs#)H8LICJB{4^;oX)5@AzJ=(8vYjz<0Gm|(!Rd4jq#abKAqLo4gUECzZRbF{;ty-wAatDo4_*| zf~&&Vx5E3o2Bx>yLT7cGcD|-gn?415NA*p3*39?8Su^*7Pv%RKG42DGYv%q2|5=0o zqQM^=#yhHq;8`<|46}7sk2d%d4gOSvKi%Ljg}2sBNA)s%dObI<#MZi29o6gF>gW3# z!)%?^+uDj}_&=e;5#(6bx#}Wszv8!DYT&}Q?Yhb z%Yk(^-!;^ICt~7kyEc5*I9?4V3=VLMx3nG63aO>-nRdPu*0J_QE6097gCCSOJ)YN( zsE&bq?lvz^zNf{9i)Vh5;Ir!UI@`>Iv((b073Vay?pcM#_cz>W1)QOgE#IWBNve-Y3CdjZ9iXW-^V6@HU*7v|Yku~j=9tu$EHtLH zr7c@%&Xaz=6Q-^2_iD-6c+Q8~C`$Z%r&n92(0rd)8(nCQUv1+;GgmdXrMdBZZ&%y0 z(0pfC+qTerS6BN`p*c>qoeRzPa<$zH&G&J&y$a3uaJBsk%{fv#sL*^5S39iGd=FPU zs?dD@Ry(%PoL{vQ3e9(IwF!mhJGR>7Li62Pt*@nZ&V7H*HOu?7K7L%IY8@@y*?PBL zt7U1gQQx&)qpmO4?B*;2uaWsI!wSQU-Jce>*V}UVxK2h<{EULTMl9;f(!%wj?wU}S zuTBKlgFb8WU;B`!t-fp0#_OcNx_-{HZQ^HbZ0A92j5S&3u20)Vx9ys6^N-e!Iigx0 z&6v^c*Y%iw%+<9hw=W)%t-jQj$Hwaldd1I!JF6|I<@U2WBX+&2mBly~HFMpHT0i?Y zw*C3DzVx>}*j&|Crw`Y5V%UBMxSH+tRoj(1G1ZJYmO3%_0vl6J|GjG*UH=2n;(s7m ze>KPH+Sle@VonEBt0mqcU^R2Ht?`bePCV}kKk z#?Y=`H?@AokdLdmvDU;gzP9O?qR*PFk`v#@rcc}6Sq*}XBVV1lI;G}niGM0sJ+af? zTxO#?Zr9`XH9x!7Heg=V&!KoPoku;2G9P85n$H3ISq{t`U?+Fzr%b`t*` zu+O0IPC6^|SrqQ|jeS|T^A^4;+&v?F9k_cz_{MPe7{|Lg*jU~l+MV4CrZ-n{mc;Y;FY{Hwsmcm2z+tGV}w{kdjdM60T^b_##J#jBmemvAzxo#*nk z;rG3M&HRsdo`%_4{o_+y$ec1ljV6Ufp?8dAL>xa<1M=bh03g`c=G36R9%mkjc z(3H8?a8dY8U)`{E4a|FKxN9cH0wUhCV0NbZ~O6*6W{q6BrcU%>FH&~l7^dDFI ztLtz4v%!l}drhwaJ5JZQ{$H&9)#bO;T%Yte9p|p8AKfhcTpM$Y9e!bppS45!dmiq4 z$A-+mbNL2bn=yAE;r;U7d+{AZf}_wkCR5jy?-^?DGnTEHXXmrsWqU2Yg+iFt8gyV=Uim@BtbH#Au zd#+gWT^ii8#nSHCV#z&Q3^%@Ki{Y+!&lbb&&$Gpnd$w3|&lbat=h@<14jx}{b6v1< z^Wm9dxa0L)G2HvtbH#A&w-sF7bH&o`*hEV*ZmC4ZyAJ#Q@So;QX&p4Hf# z!tKv9$C7*QSaQ!EOYZq&$vuAzH@;_(CHD-n+iW_ z$vu-Sx#y83_e`?no=b+?pJ$Wdj>ogfl6x*0Ztb~b$vu}Wx#yDMe$Vu5GTi<=mkjrQ z^;|OC>+jiQxZhPhn+!LeXOr?Bd0#r4`kWg3UgP(j?ReYqJ?dNVWvJC}J{4wO@g4r|ls6{~&%3g4ONccQ?7%_cq_R z6aTkhi;IklC0^3$S?fwn6o#!RV z;cd8ja`+F}w(5@6?-6Ro7)QJRg4HgsV|ae>E?hl6@4b%?p(^$Bh+~`i;I>mwyZOP! z_B)sP>DPg#9-q$l@fm@pp4cP7wo{*7_qPDpKD9Zn*7qTB`dJ99mKck`)%@PPD0h#! zECzOcu1wMHcj!e~C)yUTZ9Y5on?n90!ApR(8AHBg?YBW~bFAj$_c3kequ)|swX|CX zY~SnEHot49oi?%G;Vxnf&XwzOd3bZ3tbnGjzu)oX>2oEpeNL_Cer2%Q1@#=R0`{y( z-S&PbR1^Ds&^6&+uo~F=*ZK`&twBAC;z#?MwcR>>uLU+n_}XA|$$q&GSRZxwNcrj% zW80TFZPx=EEBpKUU^VADV;c=uw~g0t1F+-C_1_SzkGkXY`>k5$b`x-OZZ}0!*Wd5E z^7wBC*1x>}$Dpa}@AqMO{I>w>{}8#EqxblhXzKd=eOVs=t-<=2_x3hu>iYYATAuN4 z2X;)_{BAAZj#Eqf`5<^IwPP?Z`?9@$`fo(7Phx!tY^?Ad!R1`;q>ZBPKI44v47Q!N zjA2)Bb53_dQ@7t;sO9nB1Kga`J<-(l-|GmJ?Io%sgU4PFA<>(A4$! z3_&j5pL%CX=I{Wpb7;=yVQl^N(|>zveR6FN1gmA=KM1VmS!{d`hI`M39|HDT%w|r# zZij;PQBR!1!0OI__V~lW#@Ciy)Drg?u>EK|npz&)v0(Gm_F-yyY{!GmN854Ka&Z^+ zQIzd?TlAdp1h9Wb!`Q~qS0883p z+y{)UO|9HF*MKLt0kHjNUz!S5^Ll1qJ{hd;9Orp72=;m5^GJK1N7KOC@?06J{d^v2 z%k!uotj&4!-Ek(^ce(6eUYl8H>W*UuwcPIs+D-v)O0E7?G`}aD3RX`a)4^)#<8-k1 zn?L`^{CxyX-9ApEmgjrGnc%z=`ZJPTWBWe~oA$(+4Yqy02b=>|JDZYgbFP@8&F=yJ zeDOT6dfr3N2bX;s|2}fQ09{+&Lq7^u^ONyi2%lp?b3AhW#1~Q9m+zhatmb0y;nezT z)8`Cob#w8WTnbh*-ZPYp^i%pkeS8*fF0R4yT7M2rn{)ITYPtAo zYU7AMPkj#Uwqxe}S=<-kOHk{tpLYE&q1NU#^Jit(fir%`bP#Q>M^ir?ehu}PC~CI9 zw&u3)#`nu`*L3oApS}@IUH==XrPcttP&eI{VUXVBaOxzX^8TxJTU%c8|06-R@h|cT)Ul|8{M+&Kmd* z*cjp81usa+yWRJ|`l$PEC%=PYZ2JYI(-{Yp`R|_8_%9?twv6Eq;O3nE5l!8GAEB1V|1ofLP9H~8*Z)y!dHkOQH|O+E zXzKbuK`oE}pTW&J{R^79{!dZM#ZOZ^rp)0pVCT@B&BNIG>!<%i)cWMwJ_}aMJK1w! zwesEWukht4Y4beTYcZRB#P#tvus-UE^8#4i`OmxEi(unxOD<~0b$|Ff*!#zMID+~g zl<^dGeSF9IXRX`*C9quoaoAo3t7Wab2mK4KuKz34a^C~Ay#}61t*-rLYPH0B6KuTP z3$}R+uCD(Z)bhmpH`sXU+Fz$uGk4qm2dozU4%j}kHvbFOM?HJuyI{Z9YBT2B)M~~! zkC;@gy>X!KV?MZA`j{Vl6aLv>I^g=K8}B`AYVMDn)b454?w=#67ohmjzF=*)PX7ym zZzYD$8?WKQa5e4nMZo>o^PaURTp#tE(Jls7_uOp>YCrC^>Wfp<%uAecEeSUF@TK6l z!E^1FhI{Si5Gy{*psCNUum7@O+o^vW&G*3Nz{b|*nA5lQ>pY+JSsrY@&V&BuWxnbo zD9)StitXF$v;w%CrxoF9+MTDBz~wxx4A)29d0Ca(kNs#{g))j_KVrx2GktY%bMIIK zP2KCg8nxW98PoR0$sDZ(F6VV^xbvDdx(-|)_009U;O1Pfho+u&ygt~r>gM6k0M!y> z190=+-4IPZJ{#3O<^8xZntJlz1Z?a@xl-P9n}WH0TJQVD(_bHR7)`A`ZO4GswD~hf zdG77a!QR{I+CM<8mbP1h)xx&|m-qJ8aDCL{vkln$IrrwaaQ)Qn!=H1i>F+-KL9qL< zHh<j=H(;O0AYW_5(L_-5*UoJ_mrCxgLn7o?H(Co9k$b?X@S@gTe2Y>mlgc zX4iYtpgIYFwOaBx9^A~e3r#&fCxDx|cB82$*Ky#?tL?QX*YV)@%XI>}w%PT( z_JEC}Zm!2st0j*~;O6z6jHVu+UT|}roQS5LIXMYzJ9Yb+NUfF_Q^3vf_M@rCX8_!s zx2b6A>E~py?bPk3k6JDF+BC3Q&RmAT&LLH$Ezi3dU~SVWK4(s$_TzI#+bqhd6rU4f z` zz-)AFiE%dAwwaG}z?lziiFY12@yfM72VL9jdOe*FK9|zcYwZHKaka(oqhgA-?DH3a z)i0ztrZcJ4lIta4bJcb+wLEL%W8mf*{5YC=d@ief%KSfprk?e4Iatjxq~9yR_Ln?A z3D!?N?XCpdpSJjY3ap;-Tnbi8+)smdrNsX;U^V@7Jw6NediZ{&y`S~xd(-E@+MJ84 zsO5?KdGNlJ#Qg$Tt&DpOJaM(>x_l9=&3EhcbuC=ozOJU0r?2b5hf&hkm%wUeUpK(h zm-fW{GFY2&ucMYH?pMIaQ4;s7V6`&tP4L9ko-uq4tgRfw*Wv2+btARhzMRXO!Oo%U zo*> zbI4jX-nY=y<8x>2lQTYjzKy1y9KHii4)$RTeKP0Tlf!qxUf;z39=JIV-$zrA&kt&! zXXra|?m|;f4nG7Z2m3IFKIUM5+SB%Ku-7N??*TXG;a)WL_}o|fl=JW-H1*{0V{me? z4`b+)^`PAxd~f&(*n2hnr(o~@_=O^O<5EA4F4+&#!Br?B`zZ-=L{`Rx5u9Y&&)P`6abla(@_HuCw34oAdj7H1))M z1Z+F?#Q6i**q;5`?;pYXsVCn@!Oiva7@B%wKMuC7y7~T=S}pxN33iQy{|S5*CGDRA z>!Ti@KZ9LE;eP=;=CpYltdDx~e+FzUZTZgrELh#Vo}iW+)9i=PCL> zN39nBm%wV;UZj@$Z1wk#{sGR*go`qOTWVQ zA8_X2ZLnOwcc{-HZqE7t3-_Gg_WEhp?@elLt}nmmya#SR^D(vb+>7(U)h@(0&(!(h z?gc(mwdZ-%0oG<;@8T~{Un9V)q2=ClpBRazp1u|UtCf8%2v1+y)7L^^ZT8hkEl*zy zgPZTUi=e5;XHl^Emulwk%lxj7jb~NgvknT^{T`sjt6xD%W=fYV)!7tZgOgl_`F-uTtBs z6KhqlF~U~^d;i~B&%^3)ebgPhd_{_}?Ms}tqrk?>zOW`(%|D-&{eCUDy1Dy1vhuWF z2khMWvySt~d0n`gXRls6-!ImKH}}Q$(bWBJcOJf@!LFMP>loVY&)>n-W`ADe4Zzch znKQ!;;c8y*jj8=OX7!CIYK~c)wX_Mix!yKKQ;*LFz|Hlx8Jc>&kB$MG^STt~?7CSt}m|+fLoQ64yF;Z4b6zbJgEI&1(nh<&Rp&UZqDV-XzKCV1>C$Q zyK1Lo4tE3FPW>x93C^SQJ;3U&x812-Gya~odgf&;*yn8cUU26rYj$t2KI(ZE?gLgg zzrBdPANUN`U4Eu87Hxktb^F|xS}s0-+V=jOT)zXs`%&woO`kof)s64Z$qxprmCwUN z;Oh6{<8$v&u-ZdlV;lrlb6naF1FJbM`QczS_sl$dkAVAUTm3nm_RPtVVCOC4KMJh& zLjBA=8f;r*`ApF6ddc;+POJ}uJ1ORP47EJAW5LT6{5Y_CSKd{QhpRcyS)*NW+i7#% zbyNHCe$aLTMa^|5PG94}_Lb{A0jy^1>~}qI^>V+P2v;}%an$mx)5+lGGq@K`Jw7Lb zo6pXZ(A3S%{h<$R?6jEzZm$1+H1&Mv9st`;J#nV0F@HHzIT@~>y59xtYY^-{SG#d%Qfo`Q)4^q&kHE_~XTXi)b46e6#yO2zTgH7B*zYLev*BmLGxul1 z^-<4z)j8nIgZ9kBxnOO{@jS43XKZu88JoV^&FM^PZOP#RaGBFb;pQ~EYX98wLbyKa z$>}1nvHhJY?aAq4u(srM3D`Nvy>Tg6&3hwxehi+uv%Nm%c|Nta#QHec+`}&e`<*5H z6Y#6y`EGhS+_;_-*j9giG6&kz_6o55g?|!U#=a6>#{LvMv2ClrKFM2qVtpEH-rBCB zmb+d*L+xH-?K|1$sIR8@(f;|`ZkWQ}|c ztdDx06JH0bo5#)6#uR^pT3eohw}5>H>Z47c8>rRY1I_z3uv(e(H{t4e2Hp-<%QNs+ zu$uAipw9OdpMT$iYtOUqPH>)m+8noj-v;{~?%DcT_Z_&tdDdx9d~LSdf_C2nyJwq! zH~jnX?^4WPf4M&K{{h(NLik;9+q=gY?}u=G)D!b=ura+ajVaepd=It1|71M(;(Ouc zyYYQ+HJ_P3ruO67Q~wb~&9x`CKfhz%4{kn(euAbRpPzyqNBLdsXK3o?;xp&xVAo~Z zJODOsu8%&yKvT~i`%AFx)D!1d;ED8==i7sD{nYb(`!(2m-shXSnu~s!BkjrKH{fO- z522~Y=eJWli=o9|AeLRSU0=AvH`9DLQ z{Ixwz`5VRj#pdgJeim#_;m?81C+q#MaDCLx$9w2`aJk;)`nm7guYK58V!i;jkFi(a{32YAN9oiC%DW>u3wpxxw+4H54~LYmuu)1bZxgYR`1tW z!D`;a&g*MnKduXH|DvclW^u0B>)_@!djm~9K5v4X*ZwUu_1tg&2HQ?O?+FeAicWZgpxV6{gJlgyhd{x2U1<#~>v!0{(!1}1C zjme!>@cH1z$g_5Slc%WPP{-*28%vvWJ%ZYgbFHnDqUKzSvv=g@o3U9tmh7F@=H-4i z5}WsDuE7Fu=O@o9xjy<+RSWW;J6itj=k`Ky{rzq>hhh$D`q{=cvoQE%EkWqTYdFA8oy=0=BLd+i!1jG zljU123-;eMvAylIJ5R~SI{7RIHXq+BfYtK+M=fW`+SF`+HT^jrYulfP{|eZ=-gz%s z5w4bJq&79%e+F#(&r#dn>$DQKi|HeLWw`Izv&l#MDsVOZ9IH09^uH?Dp9O@k2KT)w z`K%6C)6e$W)Z)Je_!#oh=64slxvpI2Y8~HE*p`DQ_L^|Dj8&VO?N_MVuT->O3!A@h zmiBAI)zV&@n*BNUb-@0fVEDRle-A8kvmRVcKl9h7roVgs`e64aZSFsEb6Wt->tHSK ztocal+%Kc?ZT|k)259QZXG5^Rhn9Rcf~)CgpW4)t&&J?;(X{zHQ*!&i6Kp=#`XX1G z@E?Eg)VY#x%74@{SK8EUe|z2jThwy@T(I%1{qy77Qv5r4yuUo#_v7EEwGDaruDfV~ z7pt+~*%qf5XGv=Ro*w^P{J9j{B+v7~ws+ptFQDi%hvGQp`si=}AEju2m$>5O=6e@A5S3I+D>Q8H)$+-v@RqtAk!pN!#iV8`G*WiGD<8#Cuz zp9kxgF==;BjioK^z5rGWzXoiater1{^|^);pKHP9{59spxxbF0pZc5h@g-_M?&sRB zr>HqDamH~2IM*Vvzl`P_P>mR-WAN0^5f+l%+~)$KmE17|%y2rjSJ!v*(Vd!*p{{jtFx zE4cno7F_?Q8vHK>*Z;YK>;HU%zff@f|50%LUvBVM3$Fhg1=s(r27kNY`oCLn{rzHM zo%x!-;QEg&xPA*Z_`(I(-+#lZjOV{$Rq~|^uD}0=RcZI%uqwI#hE=%!{u@@|)}tDH zt%B>nUcvPr-QXJ*Tz~%!t1`a-hE=$KAD{n*l|1hWzXoqZnM29C{tZ|?=WD+Ot35>V zUUE&U#s7C;wLIs34_5Qu)yIAK4;0tB?On5SvFlZT_YLp8M=9=W`e@U~HLsrj9tW5G zJpotqzDs|90;fOMyxdq%QR|=n{tUK1eV(G|^BA>y`g7V{y0o$KG+VpvW zT0Q;!3taa1I$Z6w+Q7z{__YC#)*8wj38v$4AtbOcn0dV?j?HO=mxnJm?{uTn; zpFZ03anDgte~W<2{uYC)`S(%jV}DD4)1P~e+*s~6`lr98!1kw)HhtW))YIQG;IhBv z;A;LEawZUb7>%!I6sePQ!^}*@SJyUKh_e=fL-v(g&(?^>=?z!se zZzFKo-zIRijcXtK`v5rox#!A_<$kMw`WpkbKYg_65V&5CAmOl3Z+eSTo?hCfhJL_@E^-IkC!QM0aFM!VhVEz63A-ooH zed2!**fk&jgTeYANYP)ePy7!BuSH4Dhk?~%KOF2Di~R_&TE=uF*f#1J(@|jiJcMGO za{Y|CC1W@mtljroao%e`gl%VvpB*T9uidf6+f(mE@!feB>b$2NOAO~G-%Cy?xca!7 zXHJfT8^`sWaUPGRp7)e4u$pT@AMev{xVbnc`)G51hXbsC?$hyL?^AuW>2Hqe>2Cs9 zE${mi!D>B}_)G%plbm|NYLh9>zw>n>*tXi7PkH9kx!!}~XIDz*bGI5hpSx3>&poL# zewG$f zA@H`;`e<{UZLNI`QT+`%`BeGl`{7IgVLq>bVA|fNkr~ z%12V43Rhc}lCho!ww<>8?E7@Ey19yTO%5i;;S@iIP;yNUt?_}>hf&P+2Ekt2Pk)~QJLc>;p9QOxd(P)-pIoD>;cD(ViSv1|V@aGZfYr)4*VI0V z^F_FtaSmc7A4IO#g3ZA`jG>R~=Q`@^DXuBkk7JSRpB%mfc6_Y zL9UPOy$5fs?cSf_+@JGf>!SGiFeUfru{A!5`nVb&Lw!8Od-VkB+^aVc&wC{M>Fotq z|5k(FS#a;Qy9%!V-3@+E!S(-X!S(-ngFjGk{U0j0{tq|!?+UK};|16M$p-&Z!S#Q- z;QBw?;Lp`Od&Sq_&S|+u_a>*r>$TJ{R(SS{<}8(_8U6}N!Z+$-|)Ya*L%U{s=t2Plk0up zGS?r$)!d_!>yP2)>b+LZ>;353lIu^vYRUCy;NU>ir`(Kkps;5qsa*F1h|5Y_9t2 zr#-no0xom?16=L3R<7;emHH#xTwU|F^ZGxErk*?>1FI#^C&0zs6p#0gBh~Wa?bQ`FMzz*KmHgez}Gn zaIc}d*HC}8%uy%UJTlf1aDBo@);x2+09+sS_$=7)SqQG5dggjzuzhJu-;019cj7Dx zFZ*7s=IMKJxIXI9mISN&_hqNArNGA4mcEt-JFn?$8Mu8V&az;&GR|^v+i6Rj<-uN) z#90CEe5daf;rgg&ZL9<~mbSE88Cvee*z`$0n}W?*yW^IZ_@D>zp(Y6C`PSKX}eGr^+Yfr!1gUwC5Ke54Q zHTc;LeolkWY4GzK{K5vmtii8r@ar4=rkZ;_-LH3oJ73{D!<|d_RqeaL^-*_Ub-vW% zzZ=*&4&NPaytLl~u8+FyonN*1k5w=DUSNIFes8co>bCcKsO>_2D_N=iq&} zADVi6_6Iwr(&qp)^*lcg1lvwM?G6IlU-?dXFkC%r^$@Ua)zj`!usN0O4ntFq&*5Nm zDt(SXQ%_Duf^Dasc1MBDsT|AEaP{PL4A{2niS=QyxoUGBUB_yPdz?C@jC(v>Jw9FF zX5S~Esi*I5u0tZRma)$Omt&s^*H7KCpGd71|5L#F=YBjDY%bZOPXoJ0?@G~@ zcBg}l75)*h^P2T`23Q~UjNweM@wDaIoCP)qZRvA1*giWc+RfX2R6YHj3${OP=TOV@ z_wAhr9-^(f_Oq$g()I$dxobP0TAsEa1>06#`y6UD@rBeEQ4;53uzBUrY+MRfyM*#4 z{vW6I8rfY;z^GdfI#%?7XJUXTY}c3`JY~J_~jZ<7FLw8LW@G@vf&< zi~o(_vi(=!=9hfF3f4zGZN3IJj`41ymS>%P9qc+&PYySOjiW7NQY**g*yLG9*6HsX jVAp8)EnwGC_^n{q*+E=Q-=l5=+eSZazH6!ZuJwNabFgND diff --git a/piet-gpu/shader/gen/pathseg.dxil b/piet-gpu/shader/gen/pathseg.dxil deleted file mode 100644 index c49875551ce37e3ff85e65ab48fd0be5ee02a17b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9596 zcmeHtdsGuwyZ0oMOhTB1gzG>+CLolXIsqhr)+U6Tf;B3lqShgZA|eJ*D@t{eAVdU> z78P$bUJu@CycRvyCV+|xHd@<*mTFM6*w?pj>f83i)~`K#LgL%6>zwm_YkmKmcdavP zO=drr`91sc?7e?`AbnPva(h}|$GjP@xj)5{w>f_ue}(};P)!5`almstxO2fh7u;pw z4hF{M9uQ;%_w>w(1Q+Ps*Tm^rQnyS$r~FA)P0uGGzr zG1n73Ag1f~qy=!%cKUMr|5*B=yZS2=SZD2_=k*T23blt>_cDj2w za%L;P&OsMp3v?Gz579-2F$wFW3tki}4}vCwcj&hI=*ToYms{t-U_cvi2uc9D_%MS9 zzFiA@a_OFEp=w zeim;1%=_-7+zFLm)8h|K+QRUGjQPHB3oYSc<+z<#+z?gxQdSmruF$}p_@MOw9_9=E zxHodLC?fYa-m>2Gk`AG0>$#~Jiu5QbYEL*A63EB5(o(;itWPSbuP%W>nSV+pSZWij z2EUNkYJyW~;#A?BrA9%8yRgb#s31ACgAIB92e*}S&T5jg+=e_8a#jv|k^+D*?D-6s zOdQ~|)Gh!xE(E70)oX!K5}uI`T`Po%CuT-VMK9EyTm~%+PJ$8yVrt@Be2elU803uW zsvGYibml58qt)KS$wLd+%aQUo*-Q+~rkv?UTga|2AO@X#^kp%A%V6TpA_P$qFE-2K z5+LPAEH331>c$5M-@u{Oi8$o01ol!Rhd}l^FC!b|FrB;cBU-RPQLeGkBQon6OUax> zvP@WmDek68$TONOa6>DkfO*HLn=?!-aYMeT=H{UqxFk+^CKEEWFmMe*bp}Gbg-+0F zkcXQ=DG1IrW-@)!$6`gERpa2(u^;ZvSyKh?thmTf?DrhEd(D;@bqsG;%rCnxYOAF= zRh~sT_k}DU7P$+D?jePE5|^bYEGySRdEz=)63Hcv$jB?D@+-Amp$hU^=5yma=W+s}T7dvMK?9Pty0f8<*`@*}g-b4PbCMNiT_P?Ol*`>=iR zStJ+2%-L1D57NO3{|Z1Ca0P2e7AZq2E3x)+-j zc>4z3Wfk-*#iNM?>m@Rimp)3KtYtPF}X4xNJJ}5E}T=#<}r>dm_eFk~`RVet8 z5+tB$BigMYxAzAxB-lbCS&KfSKI=fz@;jGrcD_7NP@cO`R zk>G_<&`)@k6JAdo0*6YFAnXizC<*SBgtQlqSV(LbcUeEqrc5~pkH4%RUki`xcKEI& zIcrKdvXc5r@JrQejP)9lvoeaaa)=FJ->2&5bL%Ux`pTC2>K1L>_YT4LcEL{;!3zRb zckK@z*dN^O{Ov0Yz9|Xr)P~$FM7}aAQ@WHXEzaMzj^t)#vH>2~+%X0OeXSdJ4<2Xk zKszOz8V$TUs(vL^A9zP87_uV+xZqnm^0QFz(jw?Ldi_o1^&_Fzb}>TxeSW6!h8vetcD zRNXQm6V@h_m><;q>!Zc-LTvd}9_9J^+2EMBYY^rl8@VlLg6z@v@9#oc$UHO8&ipLp zmmIHsT1#N!tiyGm_k8l}-bh%hn0bQLW?8oBx4RvSz!JgFKiXgAju>&AG1{^F;AcNN z0=at>`WdA|eeO^_4gYDV#H!Z1TknquZ3#w1Nrx1np%<6VDWNs*KnJ*zup-J?mIvYO zuOD9+r3)JUsQNV(!wZWEAst)3PTEk~R`cx-@UbX^VecLTt>G@FHR}yfWEg8#p#+uC zuhfKBwq^y_3|yG3a3VZ6a7iHt#LDTS@zF|kfXnWbz02%ucDidOC`rMQ3J`1OT) zC*V=UVCL@}39RFA3|b62A6(gr>Dj)+MW}t`m9KB@5g)(%`t$@J#+iJYV+@FgImavH z|KuFqK=0-p4q*Fd;2g=Io&P20IQLJ)Io6NF47p6ITfrh*m?kZys4b+2@xLPv5dj`z z06e4<@DMfNAyOz|69#w)O7jp`?t#aDOCG`p75guF2z5d?4~ zh=7Sq=<)CCHd`?USBJlD5}i0-aQqQCwuK`ekdD;sZ4V4SNbpt@Y9oYmk$@PJB8+D; zb6n=bX+cp(NM4Q0yd$R7s^%TC0W$*LHg>hY&wEp>=Ou_1K)DAA9|dAb{Ijs|j~1t( zrs#+d;%eK~$N^%P5ISsW-zJo^t4-ku7oeN^bpF=LVg|Jpbnx6VO<+S_EOjx29|`^mO?;n!Km4w+DsLU3+l8yNqGprVG;6ZOz+eKul9USXGeT2mNgCukRX_}n zcF>g8^iix>DH!b)d*DV4d_mA@fA$%r_0;Cj10DSwB&KulH?VVGqzQcvdN5Q?6Wx%- zQlBeA8x7g*jlq7D%EXhZ7*Xqw34xGjriGIA$7uocHK(C6gR+3K#=(`Wv4RLjEcTmv zl+F)bXZ3IA)qPg7v5?&{zm0ub*FWar0TUDZN2gBBfM<+XWw2)CsxsJTSzU5A9$ZGE zdC)a0a!t-Q4GXuO0v=LEy4n#Lq}lHkEefFPY@JWCxi0;}pymBuaZ`u&B_!L!r9Tq1 zO5$C(#i`Gkm+irD>7NDF6nZbc4|D|w2{%qkDmnIkV`(GUmtNdt01DS!qv;KkR|uhZ9$;M z<{SMHt2^4rZwlcskPExv+}B8anVfYS%4-+Tj1`gWEBib6)>V{M(s4U}L4&Gx$)H9_ zjv!SttLL(xFgp14s}0sT{aK+4`uz2B<5i0oKx?dKm(E(hY){4yF5R{P#MR);(8s6! zt*bx+SlAu;PsHwIS=(}b4(nSwSpv{;w=C&XG6oqgq#x>6cW0PK45QE3z;PiVVc%B- zSl0|A31q8Y(}_9RxB06PF3f@r-JJQXABQpLjHq#ER&-^kNo2ncGHWjJB2JO$O4p5p z*Gml6wazu@*9w0}7BU(;!Tr6<6A?@0U?GB+W~QZ&waL>neV8VuD8(~#BpiU|r*9Zw z>wDtIZxQgl5FQ;S);2h?Vk-SlWYbs%{~G|Sd{7_4(9cGqcrjjitV|SqWwXk| zeFE<{k(o5IE-=Nj#ED$nL?e5r!(-tIi9;59hniyy8Nbqw-a~)1*LP%=DLpPcyE(sh-I2(j^%{Q%&y&A|*0 z*vP5gJQtYZQHhn-uB%~+${)Jc({O3w0gZFxgd+ohd4=cL1AZ^uEz5ta=;a8;PP%2H z1x(in^>5hOm8FTKCqvd|cQ>!u8u^o`^# zwbW(_vj!E}i*5&YT@Ow)ZcZt|L#yt-u_}Po+n%7-&`a6mwml7DjaV>PkQCvgTMux< z5f0ddY>l4Bh)#o*UilISw#J~+KC{=AFpyQ#{CXofFHbKO3>1AG+;u%E)wtP%Rg;U` zi}pgHe_;)|u%3)Qch`+|Ip<_zy9>)PEtW5Jg}&T7Nr}o#wHLu+)sfDwLq5PmmGC>p z&HJ^XUDs`PuYn>aBh-gG)M7#$QU%{#=bkIIIpMB29b+E9iSy&ewnTTGe~TZ@m*$YO zBak0XPeLCTF+b|MUORrv<}F$%G#qA3KsY7BP(7Sz*zB=3EY#<>^a)20W=$@iHPr1r zYaypx7%IOTRhsm9eZ0T-#FI10 zPc)t{rE|>eSF>=-qG2L1;>^UImgS0ofuf5>&zGm=)aS!{*7%Yy;|M#^5qV(pvFEHkO)qZ7?Y3^I@gFZq*T8vXSdVEL|n)RHawir_6upBN zVRM9u--soGYEWEBbV|4U%;XUGV(jn3h|K}*wTtg7(tp1$tMH}(WW zbC3rX7x3Y58SZnhNhMn`UG`R06;vkCz~KlVq_}0T;2T4(M%MmKU(Yv(y6-T|hdbTD zcBHOU#w#9jt3Q-gpN*f%vG^ZMatHg)-gJYtCj3Ru!xxip?FoVA2p+nFojfRu^7*mJ zCi~zE*_f*Lp)$oncd$dmeS>X1vi`-+go<7fSt|k!pfdz3rd6-zPPpN)44wEq7z}2nO47iPZMN#@mDUx|=mn}0e?rG?J29)xjrJrD7jJs?lG zQWdr_xa8UUss&r%9%@PhDi(Hrj#|tN>FaI?2<=Wm1O#7$Ly%8QxYT1^c&$DIa3?uN zd6%*p(!WAtlS5CMGA*_&u7Z93pkF7qxuMl7-!`FuvDp%*1sn5_D6<$)l|XWBGuW8d z)&|~}v+Y^fHhvy@K>KPc*w+onv#k5_t9VdLDDc#m40|4-egH;>Lv*CrKk^l<@2{iQ zTFC8%Il2r%#ld%eIol;E|9R0m|CCPt?~c#I>f4_0`)&3!q3pLO@8qHShC-DTjj02~ zaiK?>wn1@!ngj*uU7WL?z*@29QGV^DNv*n&$JJ|j^84@6MkolqFq5m#5bQsgW(}*i z&6X5yQ4X4I<}S(d0>i9l!qm}u!=P7*Rt)j8L>;MH-X>|45ZI=uO^;(Gwtg-geZ`x( zx0tIR-3r&A(-qZ=nIo8bPpYNZF`+P-`9x?2V*gZVBbMv2SqE4Zg_s8FHyv_B zQ5opbb6HlfC}hK`pQ^H^Rr^}Pko=biK~!!ly>bMVKnp#tfpO5lQUhl!7?(ad`dGmS z<0KR6SE=;c1}nc4A8`+yHsmfBNEPoix#<1eqfNc7xzTO?b-#Obk=qsgE<8%;t!8(O z+CNRjjGlOO8mL+2;%!V`-BcuI;-|E@`ngl&#ZI)+V*BFlv{eu{@dhp4d`^oB5fRfH z5mL@w>4DaTU-)<^Mz}m#_xFZVOvR$WyCzikUCqS5a96fTgl`dTkYP5GkPL(IZ3p9f z;OLw2rIX5vHpgS-+`As&%ySpw>qL6`^j0$9vE%YsOqxs|^A>(oxpBUlac6)GahUee8PehdGUS96 zV-25>&3l?twte9`8%ch8w|aSwBN1>OzagLP&#C5Sp(Rc!Hfz844SnT(f^3Q2%9{j5 zM}Np8Mas#0^W;|ICLVos8!f(jk`^Z^qNBId;*?LF*23uM9ke*BhZdul*WwgnG!G3H zn~Kc{SUj#GrGY1`@~hBk#vEh`p^`?G*|%GRz^R5OwkELsKDFB)pviP}1DK7x0Uhr3X;k}@iuVz?~ISTAW zBSV0t&DXL(*HrX3r z%|D-q7wAehY%jT!7xD4o4!)vH3l2;n_Y}nNX?jX|&zHs8$<{@#CG8<^)K1=L=!m}! z%nbz04SVISb0sm^qa^32#hzbT>f$_)+K>0>_Ub-&5zGGK0-4`jC|7aZ9s1Q*rBCzh z1-c~{w=dCt>0(4rRut@Glk9!2$<@8_{Mwi{sO4C$XC@j1R#!?qruXGh@$c@yj}esdPk!4{u(2ire$Y10g94zs-9zx?QolWEn~IBf{J97+qZ zcLv*8bkQun$B6CLzCf0Pfj$%f271sM!s-w$bs+7s;5*1Nye5J*?iTo)tgsQo0OzqP z=YJf*@$dAjajpCp$2=@OZfBB{s!tuKJ|R0v2G|=~U+)^#w)K?h^35f#FDr{aEA{k> zv&6pP;`5(m%(SvC_q6X3YBSB4?yY;eeO~`e^Lu}ZaE07TsWp4pxgTshf_)imX67{pj>KqD4MleY8f zrx67MetrM;fyY@^wQBwb&@1#TOB+GyLa=d%Ftkq0^oGG+_qIk)e}#eQe|?#QT;z6> kx8wnept8VKk=goD9e-mjHWwRDS> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -TagMonoid tag_monoid_identity() -{ - return _135; -} - -float2 read_f32_point(uint ix) -{ - float x = asfloat(_574.Load(ix * 4 + 0)); - float y = asfloat(_574.Load((ix + 1u) * 4 + 0)); - return float2(x, y); -} - -float2 read_i16_point(uint ix) -{ - uint raw = _574.Load(ix * 4 + 0); - float x = float(int(raw << uint(16)) >> 16); - float y = float(int(raw) >> 16); - return float2(x, y); -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -uint read_mem(Alloc alloc, uint offset) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = _111.Load(offset * 4 + 12); - return v; -} - -TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11); - TransformSeg s; - s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.translate = float2(asfloat(raw4), asfloat(raw5)); - return s; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _111.Store(offset * 4 + 12, val); -} - -void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.p0.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.p0.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.p1.x); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.p1.y); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.p2.x); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = asuint(s.p2.y); - write_mem(param_15, param_16, param_17); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = asuint(s.p3.x); - write_mem(param_18, param_19, param_20); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = asuint(s.p3.y); - write_mem(param_21, param_22, param_23); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = s.path_ix; - write_mem(param_24, param_25, param_26); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = s.trans_ix; - write_mem(param_27, param_28, param_29); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = asuint(s.stroke.x); - write_mem(param_30, param_31, param_32); - Alloc param_33 = a; - uint param_34 = ix + 11u; - uint param_35 = asuint(s.stroke.y); - write_mem(param_33, param_34, param_35); -} - -void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2); - PathCubicRef _458 = { ref.offset + 4u }; - Alloc param_3 = a; - PathCubicRef param_4 = _458; - PathCubic param_5 = s; - PathCubic_write(param_3, param_4, param_5); -} - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid c; - c.bbox = b.bbox; - bool _472 = (a.flags & 1u) == 0u; - bool _480; - if (_472) - { - _480 = b.bbox.z <= b.bbox.x; - } - else - { - _480 = _472; - } - bool _488; - if (_480) - { - _488 = b.bbox.w <= b.bbox.y; - } - else - { - _488 = _480; - } - if (_488) - { - c.bbox = a.bbox; - } - else - { - bool _498 = (a.flags & 1u) == 0u; - bool _505; - if (_498) - { - _505 = (b.flags & 2u) == 0u; - } - else - { - _505 = _498; - } - bool _522; - if (_505) - { - bool _512 = a.bbox.z > a.bbox.x; - bool _521; - if (!_512) - { - _521 = a.bbox.w > a.bbox.y; - } - else - { - _521 = _512; - } - _522 = _521; - } - else - { - _522 = _505; - } - if (_522) - { - float4 _529 = c.bbox; - float2 _531 = min(a.bbox.xy, _529.xy); - c.bbox.x = _531.x; - c.bbox.y = _531.y; - float4 _540 = c.bbox; - float2 _542 = max(a.bbox.zw, _540.zw); - c.bbox.z = _542.x; - c.bbox.w = _542.y; - } - } - c.flags = (a.flags & 2u) | b.flags; - c.flags |= ((a.flags & 1u) << uint(1)); - return c; -} - -Monoid monoid_identity() -{ - return _567; -} - -uint round_down(float x) -{ - return uint(max(0.0f, floor(x) + 32768.0f)); -} - -uint round_up(float x) -{ - return uint(min(65535.0f, ceil(x) + 32768.0f)); -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = _574.Load(((_639.Load(96) >> uint(2)) + (ix >> uint(2))) * 4 + 0); - uint param = tag_word; - TagMonoid local_tm = reduce_tag(param); - sh_tag[gl_LocalInvocationID.x] = local_tm; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i)) - { - TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)]; - TagMonoid param_1 = other; - TagMonoid param_2 = local_tm; - local_tm = combine_tag_monoid(param_1, param_2); - } - GroupMemoryBarrierWithGroupSync(); - sh_tag[gl_LocalInvocationID.x] = local_tm; - } - GroupMemoryBarrierWithGroupSync(); - TagMonoid tm = tag_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - TagMonoid _716; - _716.trans_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 0); - _716.linewidth_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 4); - _716.pathseg_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 8); - _716.path_ix = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 12); - _716.pathseg_offset = _710.Load((gl_WorkGroupID.x - 1u) * 20 + 16); - tm.trans_ix = _716.trans_ix; - tm.linewidth_ix = _716.linewidth_ix; - tm.pathseg_ix = _716.pathseg_ix; - tm.path_ix = _716.path_ix; - tm.pathseg_offset = _716.pathseg_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - TagMonoid param_3 = tm; - TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; - tm = combine_tag_monoid(param_3, param_4); - } - uint ps_ix = (_639.Load(100) >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.Load(92) >> uint(2)) + tm.linewidth_ix; - uint save_path_ix = tm.path_ix; - uint trans_ix = tm.trans_ix; - TransformSegRef _771 = { _639.Load(40) + (trans_ix * 24u) }; - TransformSegRef trans_ref = _771; - PathSegRef _780 = { _639.Load(32) + (tm.pathseg_ix * 52u) }; - PathSegRef ps_ref = _780; - float linewidth[4]; - uint save_trans_ix[4]; - float2 p0; - float2 p1; - float2 p2; - float2 p3; - Alloc param_13; - Monoid local[4]; - PathCubic cubic; - Alloc param_15; - for (uint i_1 = 0u; i_1 < 4u; i_1++) - { - linewidth[i_1] = asfloat(_574.Load(lw_ix * 4 + 0)); - save_trans_ix[i_1] = trans_ix; - uint tag_byte = tag_word >> (i_1 * 8u); - uint seg_type = tag_byte & 3u; - if (seg_type != 0u) - { - if ((tag_byte & 8u) != 0u) - { - uint param_5 = ps_ix; - p0 = read_f32_point(param_5); - uint param_6 = ps_ix + 2u; - p1 = read_f32_point(param_6); - if (seg_type >= 2u) - { - uint param_7 = ps_ix + 4u; - p2 = read_f32_point(param_7); - if (seg_type == 3u) - { - uint param_8 = ps_ix + 6u; - p3 = read_f32_point(param_8); - } - } - } - else - { - uint param_9 = ps_ix; - p0 = read_i16_point(param_9); - uint param_10 = ps_ix + 1u; - p1 = read_i16_point(param_10); - if (seg_type >= 2u) - { - uint param_11 = ps_ix + 2u; - p2 = read_i16_point(param_11); - if (seg_type == 3u) - { - uint param_12 = ps_ix + 3u; - p3 = read_i16_point(param_12); - } - } - } - Alloc _876; - _876.offset = _639.Load(40); - param_13.offset = _876.offset; - TransformSegRef param_14 = trans_ref; - TransformSeg transform = TransformSeg_read(param_13, param_14); - p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; - p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate; - float4 bbox = float4(min(p0, p1), max(p0, p1)); - if (seg_type >= 2u) - { - p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _946 = bbox; - float2 _949 = min(_946.xy, p2); - bbox.x = _949.x; - bbox.y = _949.y; - float4 _954 = bbox; - float2 _957 = max(_954.zw, p2); - bbox.z = _957.x; - bbox.w = _957.y; - if (seg_type == 3u) - { - p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _982 = bbox; - float2 _985 = min(_982.xy, p3); - bbox.x = _985.x; - bbox.y = _985.y; - float4 _990 = bbox; - float2 _993 = max(_990.zw, p3); - bbox.z = _993.x; - bbox.w = _993.y; - } - else - { - p3 = p2; - p2 = lerp(p1, p2, 0.3333333432674407958984375f.xx); - p1 = lerp(p1, p0, 0.3333333432674407958984375f.xx); - } - } - else - { - p3 = p1; - p2 = lerp(p3, p0, 0.3333333432674407958984375f.xx); - p1 = lerp(p0, p3, 0.3333333432674407958984375f.xx); - } - float2 stroke = 0.0f.xx; - if (linewidth[i_1] >= 0.0f) - { - stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5f * linewidth[i_1]); - bbox += float4(-stroke, stroke); - } - local[i_1].bbox = bbox; - local[i_1].flags = 0u; - cubic.p0 = p0; - cubic.p1 = p1; - cubic.p2 = p2; - cubic.p3 = p3; - cubic.path_ix = tm.path_ix; - cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; - cubic.stroke = stroke; - uint fill_mode = uint(linewidth[i_1] >= 0.0f); - Alloc _1088; - _1088.offset = _639.Load(32); - param_15.offset = _1088.offset; - PathSegRef param_16 = ps_ref; - uint param_17 = fill_mode; - PathCubic param_18 = cubic; - PathSeg_Cubic_write(param_15, param_16, param_17, param_18); - ps_ref.offset += 52u; - uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u); - uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u)); - ps_ix += n_words; - } - else - { - local[i_1].bbox = 0.0f.xxxx; - uint is_path = (tag_byte >> uint(4)) & 1u; - local[i_1].flags = is_path; - tm.path_ix += is_path; - trans_ix += ((tag_byte >> uint(5)) & 1u); - trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); - lw_ix += ((tag_byte >> uint(6)) & 1u); - } - } - Monoid agg = local[0]; - for (uint i_2 = 1u; i_2 < 4u; i_2++) - { - Monoid param_19 = agg; - Monoid param_20 = local[i_2]; - agg = combine_monoid(param_19, param_20); - local[i_2] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_3)) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)]; - Monoid param_21 = other_1; - Monoid param_22 = agg; - agg = combine_monoid(param_21, param_22); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.Load(44) >> uint(2)) + (path_ix * 6u); - Monoid row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_4 = 0u; i_4 < 4u; i_4++) - { - Monoid param_23 = row; - Monoid param_24 = local[i_4]; - Monoid m = combine_monoid(param_23, param_24); - bool do_atomic = false; - bool _1263 = i_4 == 3u; - bool _1269; - if (_1263) - { - _1269 = gl_LocalInvocationID.x == 255u; - } - else - { - _1269 = _1263; - } - if (_1269) - { - do_atomic = true; - } - if ((m.flags & 1u) != 0u) - { - _111.Store((bbox_out_ix + 4u) * 4 + 12, asuint(linewidth[i_4])); - _111.Store((bbox_out_ix + 5u) * 4 + 12, save_trans_ix[i_4]); - if ((m.flags & 2u) == 0u) - { - do_atomic = true; - } - else - { - float param_25 = m.bbox.x; - _111.Store(bbox_out_ix * 4 + 12, round_down(param_25)); - float param_26 = m.bbox.y; - _111.Store((bbox_out_ix + 1u) * 4 + 12, round_down(param_26)); - float param_27 = m.bbox.z; - _111.Store((bbox_out_ix + 2u) * 4 + 12, round_up(param_27)); - float param_28 = m.bbox.w; - _111.Store((bbox_out_ix + 3u) * 4 + 12, round_up(param_28)); - bbox_out_ix += 6u; - do_atomic = false; - } - } - if (do_atomic) - { - bool _1334 = m.bbox.z > m.bbox.x; - bool _1343; - if (!_1334) - { - _1343 = m.bbox.w > m.bbox.y; - } - else - { - _1343 = _1334; - } - if (_1343) - { - float param_29 = m.bbox.x; - uint _1352; - _111.InterlockedMin(bbox_out_ix * 4 + 12, round_down(param_29), _1352); - float param_30 = m.bbox.y; - uint _1360; - _111.InterlockedMin((bbox_out_ix + 1u) * 4 + 12, round_down(param_30), _1360); - float param_31 = m.bbox.z; - uint _1368; - _111.InterlockedMax((bbox_out_ix + 2u) * 4 + 12, round_up(param_31), _1368); - float param_32 = m.bbox.w; - uint _1376; - _111.InterlockedMax((bbox_out_ix + 3u) * 4 + 12, round_up(param_32), _1376); - } - bbox_out_ix += 6u; - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/pathseg.msl b/piet-gpu/shader/gen/pathseg.msl deleted file mode 100644 index 5aea66d..0000000 --- a/piet-gpu/shader/gen/pathseg.msl +++ /dev/null @@ -1,719 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct PathCubicRef -{ - uint offset; -}; - -struct PathCubic -{ - float2 p0; - float2 p1; - float2 p2; - float2 p3; - uint path_ix; - uint trans_ix; - float2 stroke; -}; - -struct PathSegRef -{ - uint offset; -}; - -struct Monoid -{ - float4 bbox; - uint flags; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct TagMonoid_1 -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct ParentBuf -{ - TagMonoid_1 parent[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -TagMonoid reduce_tag(thread const uint& tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u))); - c.path_ix = uint(int(popcount(tag_word & 269488144u))); - c.trans_ix = uint(int(popcount(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid tag_monoid_identity() -{ - return TagMonoid{ 0u, 0u, 0u, 0u, 0u }; -} - -static inline __attribute__((always_inline)) -float2 read_f32_point(thread const uint& ix, const device SceneBuf& v_574) -{ - float x = as_type(v_574.scene[ix]); - float y = as_type(v_574.scene[ix + 1u]); - return float2(x, y); -} - -static inline __attribute__((always_inline)) -float2 read_i16_point(thread const uint& ix, const device SceneBuf& v_574) -{ - uint raw = v_574.scene[ix]; - float x = float(int(raw << uint(16)) >> 16); - float y = float(int(raw) >> 16); - return float2(x, y); -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_111) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return 0u; - } - uint v = v_111.memory[offset]; - return v; -} - -static inline __attribute__((always_inline)) -TransformSeg TransformSeg_read(thread const Alloc& a, thread const TransformSegRef& ref, device Memory& v_111) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_111); - Alloc param_2 = a; - uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_111); - Alloc param_4 = a; - uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_111); - Alloc param_6 = a; - uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_111); - Alloc param_8 = a; - uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_111); - Alloc param_10 = a; - uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_111); - TransformSeg s; - s.mat = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.translate = float2(as_type(raw4), as_type(raw5)); - return s; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_111) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_111.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void PathCubic_write(thread const Alloc& a, thread const PathCubicRef& ref, thread const PathCubic& s, device Memory& v_111) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.p0.x); - write_mem(param, param_1, param_2, v_111); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.p0.y); - write_mem(param_3, param_4, param_5, v_111); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.p1.x); - write_mem(param_6, param_7, param_8, v_111); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.p1.y); - write_mem(param_9, param_10, param_11, v_111); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.p2.x); - write_mem(param_12, param_13, param_14, v_111); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = as_type(s.p2.y); - write_mem(param_15, param_16, param_17, v_111); - Alloc param_18 = a; - uint param_19 = ix + 6u; - uint param_20 = as_type(s.p3.x); - write_mem(param_18, param_19, param_20, v_111); - Alloc param_21 = a; - uint param_22 = ix + 7u; - uint param_23 = as_type(s.p3.y); - write_mem(param_21, param_22, param_23, v_111); - Alloc param_24 = a; - uint param_25 = ix + 8u; - uint param_26 = s.path_ix; - write_mem(param_24, param_25, param_26, v_111); - Alloc param_27 = a; - uint param_28 = ix + 9u; - uint param_29 = s.trans_ix; - write_mem(param_27, param_28, param_29, v_111); - Alloc param_30 = a; - uint param_31 = ix + 10u; - uint param_32 = as_type(s.stroke.x); - write_mem(param_30, param_31, param_32, v_111); - Alloc param_33 = a; - uint param_34 = ix + 11u; - uint param_35 = as_type(s.stroke.y); - write_mem(param_33, param_34, param_35, v_111); -} - -static inline __attribute__((always_inline)) -void PathSeg_Cubic_write(thread const Alloc& a, thread const PathSegRef& ref, thread const uint& flags, thread const PathCubic& s, device Memory& v_111) -{ - Alloc param = a; - uint param_1 = ref.offset >> uint(2); - uint param_2 = (flags << uint(16)) | 1u; - write_mem(param, param_1, param_2, v_111); - Alloc param_3 = a; - PathCubicRef param_4 = PathCubicRef{ ref.offset + 4u }; - PathCubic param_5 = s; - PathCubic_write(param_3, param_4, param_5, v_111); -} - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - Monoid c; - c.bbox = b.bbox; - bool _472 = (a.flags & 1u) == 0u; - bool _480; - if (_472) - { - _480 = b.bbox.z <= b.bbox.x; - } - else - { - _480 = _472; - } - bool _488; - if (_480) - { - _488 = b.bbox.w <= b.bbox.y; - } - else - { - _488 = _480; - } - if (_488) - { - c.bbox = a.bbox; - } - else - { - bool _498 = (a.flags & 1u) == 0u; - bool _505; - if (_498) - { - _505 = (b.flags & 2u) == 0u; - } - else - { - _505 = _498; - } - bool _522; - if (_505) - { - bool _512 = a.bbox.z > a.bbox.x; - bool _521; - if (!_512) - { - _521 = a.bbox.w > a.bbox.y; - } - else - { - _521 = _512; - } - _522 = _521; - } - else - { - _522 = _505; - } - if (_522) - { - float4 _529 = c.bbox; - float2 _531 = fast::min(a.bbox.xy, _529.xy); - c.bbox.x = _531.x; - c.bbox.y = _531.y; - float4 _540 = c.bbox; - float2 _542 = fast::max(a.bbox.zw, _540.zw); - c.bbox.z = _542.x; - c.bbox.w = _542.y; - } - } - c.flags = (a.flags & 2u) | b.flags; - c.flags |= ((a.flags & 1u) << uint(1)); - return c; -} - -static inline __attribute__((always_inline)) -Monoid monoid_identity() -{ - return Monoid{ float4(0.0), 0u }; -} - -static inline __attribute__((always_inline)) -uint round_down(thread const float& x) -{ - return uint(fast::max(0.0, floor(x) + 32768.0)); -} - -static inline __attribute__((always_inline)) -uint round_up(thread const float& x) -{ - return uint(fast::min(65535.0, ceil(x) + 32768.0)); -} - -kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _710 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup TagMonoid sh_tag[256]; - threadgroup Monoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 4u; - uint tag_word = v_574.scene[(_639.conf.pathtag_offset >> uint(2)) + (ix >> uint(2))]; - uint param = tag_word; - TagMonoid local_tm = reduce_tag(param); - sh_tag[gl_LocalInvocationID.x] = local_tm; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i)) - { - TagMonoid other = sh_tag[gl_LocalInvocationID.x - (1u << i)]; - TagMonoid param_1 = other; - TagMonoid param_2 = local_tm; - local_tm = combine_tag_monoid(param_1, param_2); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_tag[gl_LocalInvocationID.x] = local_tm; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - TagMonoid tm = tag_monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - uint _713 = gl_WorkGroupID.x - 1u; - tm.trans_ix = _710.parent[_713].trans_ix; - tm.linewidth_ix = _710.parent[_713].linewidth_ix; - tm.pathseg_ix = _710.parent[_713].pathseg_ix; - tm.path_ix = _710.parent[_713].path_ix; - tm.pathseg_offset = _710.parent[_713].pathseg_offset; - } - if (gl_LocalInvocationID.x > 0u) - { - TagMonoid param_3 = tm; - TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u]; - tm = combine_tag_monoid(param_3, param_4); - } - uint ps_ix = (_639.conf.pathseg_offset >> uint(2)) + tm.pathseg_offset; - uint lw_ix = (_639.conf.linewidth_offset >> uint(2)) + tm.linewidth_ix; - uint save_path_ix = tm.path_ix; - uint trans_ix = tm.trans_ix; - TransformSegRef trans_ref = TransformSegRef{ _639.conf.trans_alloc.offset + (trans_ix * 24u) }; - PathSegRef ps_ref = PathSegRef{ _639.conf.pathseg_alloc.offset + (tm.pathseg_ix * 52u) }; - spvUnsafeArray linewidth; - spvUnsafeArray save_trans_ix; - float2 p0; - float2 p1; - float2 p2; - float2 p3; - Alloc param_13; - spvUnsafeArray local; - PathCubic cubic; - Alloc param_15; - for (uint i_1 = 0u; i_1 < 4u; i_1++) - { - linewidth[i_1] = as_type(v_574.scene[lw_ix]); - save_trans_ix[i_1] = trans_ix; - uint tag_byte = tag_word >> (i_1 * 8u); - uint seg_type = tag_byte & 3u; - if (seg_type != 0u) - { - if ((tag_byte & 8u) != 0u) - { - uint param_5 = ps_ix; - p0 = read_f32_point(param_5, v_574); - uint param_6 = ps_ix + 2u; - p1 = read_f32_point(param_6, v_574); - if (seg_type >= 2u) - { - uint param_7 = ps_ix + 4u; - p2 = read_f32_point(param_7, v_574); - if (seg_type == 3u) - { - uint param_8 = ps_ix + 6u; - p3 = read_f32_point(param_8, v_574); - } - } - } - else - { - uint param_9 = ps_ix; - p0 = read_i16_point(param_9, v_574); - uint param_10 = ps_ix + 1u; - p1 = read_i16_point(param_10, v_574); - if (seg_type >= 2u) - { - uint param_11 = ps_ix + 2u; - p2 = read_i16_point(param_11, v_574); - if (seg_type == 3u) - { - uint param_12 = ps_ix + 3u; - p3 = read_i16_point(param_12, v_574); - } - } - } - param_13.offset = _639.conf.trans_alloc.offset; - TransformSegRef param_14 = trans_ref; - TransformSeg transform = TransformSeg_read(param_13, param_14, v_111); - p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate; - p1 = ((transform.mat.xy * p1.x) + (transform.mat.zw * p1.y)) + transform.translate; - float4 bbox = float4(fast::min(p0, p1), fast::max(p0, p1)); - if (seg_type >= 2u) - { - p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate; - float4 _946 = bbox; - float2 _949 = fast::min(_946.xy, p2); - bbox.x = _949.x; - bbox.y = _949.y; - float4 _954 = bbox; - float2 _957 = fast::max(_954.zw, p2); - bbox.z = _957.x; - bbox.w = _957.y; - if (seg_type == 3u) - { - p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate; - float4 _982 = bbox; - float2 _985 = fast::min(_982.xy, p3); - bbox.x = _985.x; - bbox.y = _985.y; - float4 _990 = bbox; - float2 _993 = fast::max(_990.zw, p3); - bbox.z = _993.x; - bbox.w = _993.y; - } - else - { - p3 = p2; - p2 = mix(p1, p2, float2(0.3333333432674407958984375)); - p1 = mix(p1, p0, float2(0.3333333432674407958984375)); - } - } - else - { - p3 = p1; - p2 = mix(p3, p0, float2(0.3333333432674407958984375)); - p1 = mix(p0, p3, float2(0.3333333432674407958984375)); - } - float2 stroke = float2(0.0); - if (linewidth[i_1] >= 0.0) - { - stroke = float2(length(transform.mat.xz), length(transform.mat.yw)) * (0.5 * linewidth[i_1]); - bbox += float4(-stroke, stroke); - } - local[i_1].bbox = bbox; - local[i_1].flags = 0u; - cubic.p0 = p0; - cubic.p1 = p1; - cubic.p2 = p2; - cubic.p3 = p3; - cubic.path_ix = tm.path_ix; - cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1; - cubic.stroke = stroke; - uint fill_mode = uint(linewidth[i_1] >= 0.0); - param_15.offset = _639.conf.pathseg_alloc.offset; - PathSegRef param_16 = ps_ref; - uint param_17 = fill_mode; - PathCubic param_18 = cubic; - PathSeg_Cubic_write(param_15, param_16, param_17, param_18, v_111); - ps_ref.offset += 52u; - uint n_points = (tag_byte & 3u) + ((tag_byte >> uint(2)) & 1u); - uint n_words = n_points + (n_points & (((tag_byte >> uint(3)) & 1u) * 15u)); - ps_ix += n_words; - } - else - { - local[i_1].bbox = float4(0.0); - uint is_path = (tag_byte >> uint(4)) & 1u; - local[i_1].flags = is_path; - tm.path_ix += is_path; - trans_ix += ((tag_byte >> uint(5)) & 1u); - trans_ref.offset += (((tag_byte >> uint(5)) & 1u) * 24u); - lw_ix += ((tag_byte >> uint(6)) & 1u); - } - } - Monoid agg = local[0]; - for (uint i_2 = 1u; i_2 < 4u; i_2++) - { - Monoid param_19 = agg; - Monoid param_20 = local[i_2]; - agg = combine_monoid(param_19, param_20); - local[i_2] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_3 = 0u; i_3 < 8u; i_3++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_3)) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - (1u << i_3)]; - Monoid param_21 = other_1; - Monoid param_22 = agg; - agg = combine_monoid(param_21, param_22); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint path_ix = save_path_ix; - uint bbox_out_ix = (_639.conf.path_bbox_alloc.offset >> uint(2)) + (path_ix * 6u); - Monoid row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_4 = 0u; i_4 < 4u; i_4++) - { - Monoid param_23 = row; - Monoid param_24 = local[i_4]; - Monoid m = combine_monoid(param_23, param_24); - bool do_atomic = false; - bool _1263 = i_4 == 3u; - bool _1269; - if (_1263) - { - _1269 = gl_LocalInvocationID.x == 255u; - } - else - { - _1269 = _1263; - } - if (_1269) - { - do_atomic = true; - } - if ((m.flags & 1u) != 0u) - { - v_111.memory[bbox_out_ix + 4u] = as_type(linewidth[i_4]); - v_111.memory[bbox_out_ix + 5u] = save_trans_ix[i_4]; - if ((m.flags & 2u) == 0u) - { - do_atomic = true; - } - else - { - float param_25 = m.bbox.x; - v_111.memory[bbox_out_ix] = round_down(param_25); - float param_26 = m.bbox.y; - v_111.memory[bbox_out_ix + 1u] = round_down(param_26); - float param_27 = m.bbox.z; - v_111.memory[bbox_out_ix + 2u] = round_up(param_27); - float param_28 = m.bbox.w; - v_111.memory[bbox_out_ix + 3u] = round_up(param_28); - bbox_out_ix += 6u; - do_atomic = false; - } - } - if (do_atomic) - { - bool _1334 = m.bbox.z > m.bbox.x; - bool _1343; - if (!_1334) - { - _1343 = m.bbox.w > m.bbox.y; - } - else - { - _1343 = _1334; - } - if (_1343) - { - float param_29 = m.bbox.x; - uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed); - float param_30 = m.bbox.y; - uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed); - float param_31 = m.bbox.z; - uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed); - float param_32 = m.bbox.w; - uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed); - } - bbox_out_ix += 6u; - } - } -} - diff --git a/piet-gpu/shader/gen/pathseg.spv b/piet-gpu/shader/gen/pathseg.spv deleted file mode 100644 index 2fb04e576995b70e4fc255db0bbaab02fca0dafa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 35296 zcmbWA2bf+}6|R4nnI!by1Y$xBy@y^x3nf74onbPWBm576Nf}p50QA9vQ zR1ic|iYSU$v0woaEQpGVARyfLJ^%S8JLl2ox%VE~?(h57+Iy{C&i>DTW|DL)zV7H& zYq8dntud_`U9IX`p|v!&a`=?Z=M7KaeB14|)M43HN9Cu_a=Iq})$go<6j#pn(Rqv2^f1%&i9@okX8JrF-%rhfJQbPj~N}Ioz>{}x371&e?gyqV+z0i!QO%S(@TdgJjy)e46Ze4U|^`1dK8(;y@qDYnAbPl zLRpgWv|7VM^LuA?&+eN&VcziE`MtxNRgE^AztvVd&);fi{W@Dq`?5FCg>dW~k`&x04zB*bf!58!lRQas|p4-^zHppf ztu^6C^vpbHXmF^1I(QWSQ}Y_0+cP+?yZ?0YC}v6Bb#e_(>z_V6YsA)Wg1{=W^t{3j7J z`R@sC=ik}d3p`_>XJ&g09j(3LZQR+K49)@Kn#sMUO&hAuj<)8yFYIH5))aW|<@Wua zrD^xZRBv0ZLqo0R`vCe#z6TnU;)#9;b-T~z88W(cIJWBSoUzT;-E)Te2ZwV%ceD<} z=80z?N5!vytBFm&BS-v3wT{Jbq#()gWuTTw>S75 z4gR?Xzo)_PZSeaV{J{o)xWON3@b5JE;|>13MR-T+2k`cJ(b3w81FC&ycC~f^AKu$H z*th5Wk>?IIuX*a!o;SPIns_fT$77`*4Ca2W@L}L~za6b(3ZJ>)_I)y@`i?SlpnLMb z(6pX`{RbBe_4W++4-M|`XPsx+nfNaHtk>CkCtCHGg^!&PSVN2AXf?j4B1&lB?+qu$Ocrja&F zVCxwi9Eu?^mPBtqyS0O-uchj~x_xk{8;v4@ar~Y&oyDv_)9b7k&d)N~dk6aGER5C8 z&+l=S&g1JsTf5KYVw*SI(|c;H_StT`eC&(l)3&XE&3qQkwQXCmw)GFrD8}AZjbkNj ztcB0v#A$0QSDLeE?6$T_rJ4D{d{#wk&s%nvw)R;3p11IOS!r+Qv^sVpxPN2KIQ=e} zHEi16hRt#Qw|0KF(>A?lxF?-@pS+9pYVY^aoWA=3=z+^ZIi5yjgzjv;COreFFFE6q_|}*Q);yTG=)JTyG3&u1a~{#>K) z&o9C|TKB=t#Amnr!NfUwXzr<#dCr^Tfnd%Lq94)t?yTmu=y#LRm9O{q!Zq^Las5VB zHk)M)v^Zz><^8Rm@6Kx8*YVAIna{)6ruNL`;8-~CYF^AEx_NiBzFBL|!*9Ul96!_G z&&lhT0Q$cOA2_Z0dQV@YS}()r^(^SCj;6MZ-qm^)y_^HJ*GDwJRkYWklL69yC79bX z@b;Q@wN?X<%va?*h4i#nEx z;*sZVzPrhJTGX~n!?vfKCqr%fG;9aTw`toBUc}bfI<&!$YVea9e8wWYt2GN=4b^uE z^_=xD;?vn0YVa%I`R--DufrF9C+=*GWdl|BTHW7z@Ls>)SG=pW1AIpRzyL2{rnf(1 zJ6pStHd4L*4h-qku7zUXs)XY0sH^UcY;YF?+q`{z|V3Et7V5Z*I$W`!@( zR_A*;xXk5>MR-^1S~%y#yxzGz!@ZoT_H|Ry{w{EN7TmW;`_9(=4gN^Q+xMV*?NNC3 zkuWr$PuA0`{+~dbJJf#GcnaLkwWIYcIP3NtxXk@Ux9=}+w}ow=G?bNu@0s$a&hW-e-uKm9ppYCdm7V;L&VXM<>t zUB8Ui{(LS|OU`P>QyWJ~+a+pky+ZRjOuq?*=J?e%Ei`jgR><)TR`gbEG!4(0qPUJEG8heo{N8(0p!E zJE72=U$v79&1WRF>4oOAk=m?6^O;C(pwc==o_n%pc@EOY%RQ>rQQ^+&nP{EL(%zFk zBe_T2U)~e`48=7vj{N?bn~U?C=a2Lst+_w-)wxjWqNbpk7uroA=Y1?@$}%{G+wl}-qmvN*njk~^ynI>HH*1= zKWaDr+o_E&H}1F^t6Rq7vwp1^&+%+To$+i8w!PY_*fyaaM_G+_n^MbNBl%`EcMN0U z{v1Icbz^Tqt>!rF*I3@eiRH^CW2ud0Rwh#0wk#R1n&aD^+BW(+hu;60L;KTi9N%## z&hB8_sEr5hL7g_*oQHP1_WNbWkhc4QZ9AFbnD(!+y2bfFxYnH8L&4^1pXTfRZ+quW z`?1vat-cX$-4iEM)&>tyYa7e|&#tlDqRsvWsm)#c&NcV5OwGMIhk6`kG0G-2e+St8 zZ2MtqeeF}b_1P4!O<5`T%|#ToW=VgSVDk#U0$j$s25yYlZw9+(67Mc>2Uqyr@b(9u{ zXME1rC`iU9Ul#8Crv0k$a=c^Ve#Scg>wz7s_l10mn!D%Z6Kn1sN`Gge87KUM@Z_)l zF}V9pyW_hSZoK&440oT!eha+p?=JXKwcUNXorB}rluXTQB3!L3n-XC%w(ZfiyI0)9 zd2aF^+y!0kxOS!HzqVF$pNZ7W%ed1e+-l*c$lp3=vS-aausM(HV>oG7y7$2_+-E8^ z<1MJMZOp~zt2Q26Jy(gFMW6ql((LEIadYsq*Jmd4JDZySD$V@me*UUuoz8`pU`f*_14|o|K6i- zz;j>3r^88awRShgT43*!#8?|{4(i%JR@<|VABXc_r8{SGzhkPI(~UK@&&x#D|k`waKpbhytH zzE2C+-*;&x_uW{y{=Oe8x$nnH?)$Nl`+ltCzDFv#?~%fd?|Y<@zu4d}H@NSQ;=eKN zUoW`N2fjCo-Tr-V6z-n#-BGxEXk5Yd_kB_9a^Dq&d*1t=C|tYmio&(~t|(l)?~B5< z`@Se#yYGy`wfoK}T)Xd$O7453lKbAMM&ah~JEL&p`MxOJ_1df8)qE7({(N5)|7t!O+;>H#-FHRd z`_bNaMd7yheNnh}-x-zMcShm*-&An@eMc0#+;>DJzrWzN_Z?B}a^De!+urv>;o5yi z6t4Z*2KQZ2Y4=@GxaW!Qio$(v^*vF^eNR+!-xHPG_e3T4JyFShPgHW>6P4WeMDj_T zN`9`NM)9-WXO3Mtqddb;uk{P@k)HuqTb2E_8|R>BnA#+)J_DZz_Ve}<@VV6K=X|iX z2kU+=0IT^-8=ni|_7|UbgVppow{G_yxNWs1=0#wid5oz&{k<1l*~rtgRUX?VU^U0! zxhmh07=CWQAG{&8x^~ZDwan8MU_bNAdAbs=ZvXc2A+YUi<1@k46rUUPvroA`V$X5= z`Zu2s9|8M0Vf`=q`Y83sC|=q>UfZoRmTSTGaVN3-T)Gad=JV`p^dY~7Vqf+xej459 zZHvz*8QTru=GbmTQ%}54fQ|Ps@!S(Pf&I)=zoX9KlVICvdxbIHOzmYp+CD{5Gaqr{ z-vVyN{|uUX;@=7`ewNB( zy9@04YV&hdz9X~Y+J6DOF12m6>7%c@YnX9c|F3oUBDUr{dft!?kReHrXL=H9v&tmgMl+iO!R_oaKNne&eH=eWOu&ENgB?R{W**7d7k zpL@%D<^j0+?e(7d8dyyq_sloIUhWxfU#F-!4srT^6x{6lTWIRp58no>xlcSd9s_@e z@=c28hIZ%WLF(^Pv>jUSrN?W{T#V`2|2;IvXj{iC*Dr0J0NW3@*GQZ1!)=rKmFt%_ zKLFc?|3=#U5Mmq8K;y{uOPe2oZNo?XNShzSZR7h1+sO4xo1cJf!^h7^n}1wf%yk=6Z_L_bEdk@=>Hht_{-8o6#*7+RzJ@_1o zd*XTO+-H9PYs=@_AHiy_Y4+2f;P&tQr`-#1_1vF-2HRHMe4e9LOZ>lr6aO#Z^z%2c zw#0u4tX9VVJKXs0&9wUmTs`ss3AU}e@n57?%NYIzwh#AR`gjGduK&x_^7#K7tiSUW z|JUH^`oBsopVS&%JzM+-obkN@F5A5c*Pedf0^2^%EsJr3sL}1ncoqlSPTN%Wc)Q^n>_Kmz={71zGeJT*tBPCqrr~NIZjSv(A1ODl3+EzTW5}!0^651bJ~seOM}(T z$?r^ZvEP@xx7?4*g1wKdeQsEudIgG?_7!Wpb@EvWY#(`USQ)J5vz_;-d^w7J*|#`t zR|nfRd=0SsA$##{@ZI3fk+%4)3D$NN{cnP8EwGyN<}>ixV88d-zhji^llJ4mW!!b( zu5;#nUHCYPezuqEllJR_&%{4`1GxQVO*RCp`RtVYWdhuG+U(QsooeZ0W3bwz_2hJebwfJueR=W*B0_^1)Xxp69)7f4Far)U3T*loBu4eA(duzCQ z`ra0tzO`*bnM!ee#p!!{u;+{U`@LE2^Re${b^sqvt*+hg*=lLK6WF%d(>ue}eAd+8 z@7ZeUZ&$GWscZLpxmx<$4eWg7*>ZQVn*I5lFbVACd}-T*qUM;y>1Qu+nb+QMwS1S^ z2dtL7_63`lx_RwMt(N{KgU##KdL8!ztGVvxbpY7QytM64Q8O=b`k4YQ^EwEw=KE>m z9t>7XUWb6qOI`bc)N0=6>EBwOd9aqd{>FFB6aO%9*4lR#^29nEd=#m zGWR3lYH!j;f8TkirN5)W_NT7hwN^`i$AHVVJ{GQ)wLTu~<(z6ej-uw6#Odb*aGBSM zaJB3~-@T|MuWqn;shgK;t(N{y2A6By16Rvh_kz95OWQPxnt6%S&vbB^S07w0XT=P# zTJo9+HZOJaa;?=|%k*z8&pcR{YwenQE}ueuDkYy`eukRYJE;e1Tb>bT!_~ZUwhqGk zDW2)(BG=D8{Op_q_A^u45LoW>ls}Vs2Y730+iTNj7PY$l`~GS!SS_Ec{)}WET>TK9 z0X%z#!D<#SnpN%n?3j<{{`9OfCqG{nz|}9S&#u$J_Mx7*r-Peu&p=a8+%v&y7KTXn z3*){MVqEup;+_RpPuzEb?L+;%I+wG-=A+GAd}ntK*j(})=9tfgtEb(0VB4xYkH%KZ zc+Lkm$8!Oidd71hSk2;9&f~k$cBGW^_#U`=#&Z$aKGYNUz2Iit_o1mL?!{m=OLHDC zK{IYSkMD=8C+?+S`%uq(Tn08DZRX-UegJGPo1>B7L88r38 zy%nrxY3{$<(2QH|zuV#JiThcweW+(2-2pZqZRXtSp) z*TZ$Y3+x&t_b-6eEMCr8_S+ZHY@faPC9rn)rtP$woAY}&*nXYg?3;Vww(&9-ebmzC z%V66$H)(S(JaKHJk6QA-4{V#{{}r&BC3Bzr??)^1|0>-4y+3THJ-IvpwqN&Ua`_tE zHeTkUk6PM%9c&vvXVc~z@Wio=K5FLV{qP{z`ytPF4}sP4ng0mb%lkpw!<6SK-Vb79 z`?>Z_aC2Wiil&~q`4(8s;^o+r`?t{!q?Gr=V{rBC%kO~gLp^c73vR}J98EoOzXw*c zWK9zH2{hxD_rv$$>WTXUuzjeX$9O$Ae+V`oZT98;@FTFfl=s7r;p%Dk6R>U7T@Pcc zWj{R$b`6vJPr+&yFXt@#>1SxR&;9Uouy*eU+iA}@e*w1N@_u*XM_vH`sV+36wWvtRwPe#?W6 zsXgcH3Sf1|mvvEd9=vx}1iMz})O;nlTI?&=_I&oO0@ueY+N!l?p3zoAv#nRO)oV?^ zJY%kbW}o(98-0xJbIjYoo;ByO9`ZHeYPNAK+SJ^C*$39Pch9{Y>^?|;r`8khVAMO5(YBR9! zpv(91o1>}6XA7`>++5FrTp#TVXuBnNK6U2B_1FqcJ?~7n2CJ>g9A&<@fji%M2e>U< zKlQ}k4s2|HuhRLK>zDCP1iPp8cmB7B>+kPb>Mz$PF?R%)=fzHN&kJ>Zyq9+dtLJRm z1+3=T^cnV(HkQ8|qn`IDyMyx{D!ARIeRd7Nn`hT-H1*8yAXv?|*~dfRv@=(^eu*;&T#ol0a5c}MjCU?v z-SMW~Jh(q+O6*~{n!d)-rWXJC;4;nvxLO(KG`M=Xw@!zv)8)vR&Vc(~wY)daL{m>b z?*yyaK67yvIPJ_)u3vI|7r4yvY`B`|Yv$q{xOzDk=feG2Ut*sJSJT%x+SJN@>VB24 zQ14so!wYuoNMRBcs^TP2X6kJ$MtCH8S@R`?X6(7{2rbBQxyBMZ}HRg z?e}iW^^7TFyA4}&Y`3GSC*Ehl#(S9g@LAywuzOSej(UH64s1JZes}sjwU>KM+np3O z^ARWh7r@Q-9@X|r$F;L8-(KwH-7-dcAZ?gGnW`wCc1 z+kMpX*uDyOeYM?BEzh2Q0PLQ&jW&JsRd)?DZtMTG4qwC8oQJQYsV9eTfbBQqdl2mS z^fRVhA7i?QoNH^_yKWDGoyXh<4};ZmA5?P;FZZQ;sF`#6`zAJjSH{?nfaUoely8Ic z-1#kV&OGyY46Z%TGv5KLmA`}XUATSbS>|!Dnm=n!{@;V!R+~9IO0DMa`FYlSAAAV4 zwkN>y?7JU;oAdocH1+&m*pI+!-mC8EpMbsG)7pMaQFETf>HA4=v+tjxsb|0b46Nop z&U*eF?s{s=n*RcpTxD+_7t^wYWpSFoUA=leog%wikJ4MYrA#U@)@vwUl1D4Xoxq%kSE}4p+~<{twv8T(rGGQ8O2D`hFAaJ(m0LEwEboeVWv&`8P*@ zhO-#l@3!jpvk7ood@%GzE_5uo9C@@^mEQvLDy!V{`n2Lzq{f5uL?e% zT0Qfy8aVS{4BM&co7k&^%QaX7uI64#K5N30k9zW13tgN2`sYXF$!Be_`KTwKvEbxm z9NVesn|$65F7p`&S91-L&pPnrqn>=$Mb~D(lho=-TdMADhEOux-_Esq@+%tZrUAQhS+~`VJH|^AacCPT=HKuF1~m+TyoM z;aBeMUD35Auie15Rd@ap(>k9MyMxWs`O)8e%y|-Za@J=Likdl#v$lJJT|aa44BZQ! z{h%#5?*q=*%DLPZU7KsaH?=(bZ8F&VQT;_e*E}=!0~^C}+mAlUWq)wF&klgA>6`ox zgeO1k$?qU=^3y+SeK5K<^P55~Pkx7h%}+h~O$8ex`RSAV4h5I_9R^p+IpO>r4o`mC zli!ixGhUYdwF*fQ^y-^htilg3J7lgRAA-Fu&vB$xnOo zI}x1x%DsCMx;FDWfm)vYy5;n#p8QS*8zcGY zrS4_x^w*2c&+R;COoyxGdx|zS+vi!==hFZ1ldByl{+VXagJmgROHsCG++(PhuCdQg z%TSEH9QB*jCsU_AUu&wrbB3z=yCKqk1-RJuD;9VquzmS=0r}^ojnzvTU32~WYVJ7I zr&H|DHPKg|IhX;qe{Hst`%LV(W>OrRZM5aAn+4Y9xp4}$muH=NKSj+P#m<#jEivbU zjTt@1%Fk`jvC<`LG7XYgJ0-&OgVWxmz7>F_bzHL=TH4Cikdl#-7ja?Ts?8m z0~=Cu~ZT*gxCcM)~zw=Udk9K|{J z&)ttN@H$}IuSf0tYrmM{{O39M{b1YpEScOc1*^yBGO&G>J|BRq=l;DMY&-R|`ykkO zwsQ{T`o(@F*w|N468l47_4r%`Hg@TAHC#QhKMb~=dfI&iY;4;ZTdrU19|aq`d>`>K zxO#j(4mNh_b1hswv9AN$PCf0e2OHaV#+K_B`wd`Yzk`x--w3{e5}!|ija~ZO1XoY& zPl9cyo_3!C8{2lqmg^V$r@_X)nUdJIfYsyk8L+WSpIhPTiG3T`cIs(&JJ{H^Gqzkm zVCMnXOGDB zN!+i3%RIjZS9^ex7+(jgC&o9xYKie6*ciD7ZW8w?u)1-xr+x)i z|0N}UzXsdCw&eF4usLWmKlip;@_8DpmT~_Uto95gKF@;nNuR$1tLd9Q)#Cpgxa{-y zaJAGO|pHGR{kTKfDG*giAXKZDg?pk&Qo1iR)pQ|v>opM9)J{TGUM&k}LY zlFh+eQ@pmIc$SQz-m=CUQ*TA_T-k;?=gLdq(FK3G;I@CY;7fpCYw$M;u0M}ni}bg6 zgLf8OfB#OVGQR&dM9KZPA;R_d--ZY`AOCHLlKXE%gzN9W4H2%t|29O)*DARF;|i|- zIt5=3ynfBIUVq2OdG#L6I{yQ#p8fbwuv*SM=UgrR{{pLJAH4!rJDZ}9`Tm>YTDm7( z6S>&+(BJ33QPi(f)}ZL4O&`}(J@NhnF5|rgS9`PeG2UW~Gx1zgxqZ2w`X^oo+<5wE z)5krbo_Jm0GTvyoTE1f%ZwYwfxhLfIRqY3G;w=R>o<7?2aZjly-ZJ1a-g0oYd}lP? z3h=~pPs#1e{iJ{5tpqlnKHBtgPpT*0D&R8SYH+o@J2u`L@WgXZ%I(YjsDI+EsSSU9 zwCUrXR!_XO!DYO+!`1SB*m&dNiRYe{+n4)U|HNAtY&?Cm>Ek`2o_HI8-9vs(Z9=^v zTrKtqVAnkMjo@mrZwz*gV&4R;7W<}P=QQ@sz-sxu#?8UTv|rn5_x|(uYqq3#Pv$dH zu7CR83hex)@2$aVv2O!*ys>W!R?F{!ZU?rFx-lnGZ%;Ai6^z;WlIv%mp2IuTcF$;W z&gjLl?LzU|fs!+N#~N=(y;F_1r{0<3dA)0e+kao#&h+Q{dVYIe?Okxst9=`Ma>4aK zxZwIvZSX@2uKzIw*Z;T%Kfd7lPb;|o(;K|6;Q9{~T>rraA1b*13kt6P=?(6`NmI_x zc?H-1f(E~^;P!t>!S%ni!7nSg{#O=U|En7O>VoUMN`k`*KT07Yw&Zwy9X>jt>jx$3W<_T+jvxXkqkxY{U6ay?QX+Itq2^LiAzw&Z#=SS`683r?=y<8t%!{x**I zcxu}v*Au|zs=t2Plk17#GS`#fYRgiRYqvgiuI0R*jIJ%Y_JGxrYcDvtP6Nx$&$Goi zV$T)ZCD$2XbJbrz?a6f}xXg7HTy0fKa_!fL_SaC#c|8SPTXH=Wtd?A7gOjUgo80^c zsf{D{OtW2bodY&k{q@tHT;BmMbDaxU8%s&9^Yp26E$4L@U0ZUU4^~UAr-75}0NxY}8i$@Se}bJbrz?aB2$;4;^X;A(f{pIqOoPn~NyukS zgOlqeV7d8SN^KnRWz@Dyt{(uKtN!|FPp+4P%UrL3t9`Kc@tph+I5DpT%Z+mtwf;Hp zKMZzEe!oorA3;;s|7vP^^123WUiN8U+SBf%;4-g|!__`k`b1Z-TNi?XIqqN(To+fTu2u3vtS>1S|t z&xBX#_ZMIhicVKPtd#>=yUUzKI zqiajfzX#h^-JBECIywIVY)+0tfAcZtKT&&`vp#>M{Dor9;;j1%VAu5$iut-X{*0y` zpBKU9-jM5KPR6lM`_5kZE4aB={)VQWz48)Rt=ubrhpT6={1fcu7_|L^qGmjC;=K%Z z-pjrDFEsVUdj+iKUdrBl6|8PN$CAFSGr#`^8@rl2YU4V;uTy)On?A2m)XYtswSEJf z`7QVHf6%qX@6E!`e6n}mLf4kx52E{)ZPoL$=*7^~&8tJqWnSuwQ`F2$oOqq!^BLy0+vs25eh(=Q1&^GnY$%&C~hO-+c7VyjjP0No{b~!gluK_?D*j za(wzMMNxBn;_TUFz!_im^|EN{`E$(6fz>Qtgc@nTJX-dA+OL47p7tw()hu4N&)?0k z5}JH{igEPKoURN`->!B34Du@Iw)64~)=w?%Rt2Y>Yms)Vp(md0^i#`RtPXC@#Tsbp z8Oz(iY8L*Zmxb-uL`dAs#ad|UX}>mD&BA|G`^?2y1m|J`#W?zAOm7FLZ|{$cX&k!k zyd0B$YH2qfoObTtv|9%~@ocA`n)@vK#9E%S%Q|Pty4akn@+?^oO+9DH`d~HlO}h=? z%`<32H1#~+OaR+fJ?%CEI}YQSuUx;_HwHKJ*#u2J`D_YSb5G>?YBRXH{id(Y;fdus z`pmTjntF2F60BzXwA%`tcIGJ8&pv!c*}As-epa0Kv)T@!cpXgf9-T_9-}Z&yW@v{~ zybh&oPYj=R52M)T2x^~gj-lR(c0SvL?^W}RXLq>m+$Y1-d%*K|c4~{?B(OH0_5JMK z6RhU=j3JNj-eCL8^Wi>lHRn5V_Jt>o_OzW0)|NQ?fz^y-?s9$2VSli(!w&%KlXvF_ zg7r~%4&|9^=kO?sS2>4A!kxpTsq>xSVA_>)cvyoUQ}c}B5V-whtW)6`tG4(Z3a)I7 z%^3374hO4ce;xr=^ZDPonL~XfT>T-Q+2luo)$|#v=jdp-y7On=^29h6Y>f1G99S*= z9S>G3`#S-yZhwwjZhx-FiC|-dp9Iz?>(LF?N8R<1XFa^Pj;DAXOL0%Te~+WM9w$(z z-)Xc{&)?0_3+G6vejcIiJm$lE`oP+zQ#@PMW`OneY~2p5&usEH{!FlTa}$@jorK-1 x%hm8a9`j0{?0*#j0K`WCK)`D@v_fd> zpjAS{U@(&epafd+l6($cZnym6+yy}_URT_OEyb%iH{`k7BmU9(8*I`$C;|H7y@-ZF zW1wNs#1`bkX5ejJ0g7{KyzTDNc~;*cds$*4KlGo^wmM`GL=W;{UaRGB2R zG*tJHE6=TBO-oz{L6puoo=3TN#P(YZMYi^_mMdGa`AhI3xH$3NqT5HspMlUzI>Xzh zL>KjeGkDpw=pt_6G1PEVbU_{f`EZQ5CZ(M>vV~+@2!ep8SpbqCr;E)CVb`}}yJ8PG zVE2i`>sCb+Zfy^994MqK>oMLfAUv&MmVg4zwHpEYLoyT?>oM||&21n2qRZ*NNmsPB z>iv7;$gO@s;@R=e#_pRT&E)%!gltUE&h4oxfkhIfm<(*qi4w%8v2{Z4vz}2E^^X7a zoVlWpR_r`dlw&X_g7qtQ9dbozpK8shj6Gs?k@*)o_p`Ao&|8mrfKpZGb2jkK^a^KF z1q%J}Ut6fEVQMq%fqXMZ)Om<{mZ)-5wLxc7(5dAKF<}`0!yRdjdSWwAGz{YtG|@Pd z$y0EKnal|o@(?hmYLtTW+?pZ4ERx5&HE}s+(AW!xmNUZn5R}WZg7SGXpJ|RHxUT_d z%f{)z_JeB%vL@Ngeh>k3gVEpKh}^hA2<#v+!Vi=IMBGkd^SOVENceyYfS_4{VZK`c zn0_8Y2DX<-yrGAk9GY%U2NR_zbhn3SbB|*tNdhOa$Tk#V8!V(W(IO;XDcUN@DG5Dj zKC18B28LfkJlUw;io=lrG{HUc1ia2h3*Ad3N^=@h7FM5yzMi~m-_quK^o80U#NZ9f z+Sj}zsWd5cZ_?L$du&Y$mez-rEj>?TO3e2H3r^uyHl4%_*2-!-&_}nS8ZG&J?&aii z?W1;Jj*c*{by;~*=wDs9YzRZ#>G3U>=~F)1hkI>W@_g>)rR6wGEL1uUUF@RIm@R1W z%E&HFS2}%K3Y3oh=RhSxF1tuqMz?7`$ve_|uxA;a3M*NZnCAlL@N<={OD^l@O*@)E zI_~HlAMd?!Z1Uv9g=76UCI=_9r10Ln!sQR7Tz}?Tb9@aa=+59PB(zmSx;=N zZm3sc{6%(;E$T&N_5uIP_}STAhUjj8w55h7>NsNMxpP-9tUEI>I5K$%GeqjL?|Vnr z({m)_?fJ-QhwRjQ544|nt+2Yla#qt<|AHwjE_v7Eo8ywp_C>H82By}H4^CYAbo}_> z#35{T0vvlZyZa3`6$ z&PfAlEzv{h;sMj*c69O3ruK#!qM0O`@|-m_#O4~bag(!#bw&*-sH<*jjFVS$@=vCy zDGT)=-5+yFqdupJ>9x)&=bp}bw@PQ9kmX;nIYAhS#UvGhCi?&v355Dl$ehP%0!Ry6AFR9CA#t@N69Q z!@7%YtO)nN8=%E!!bEzfdGsr0E@11Mz=`RS-YO z6D0@a8H)2%q66E$k27Yc>-6U)-#?u>^38iOqJ9p9%;;$y6yrPfv3Xn{||ba z0@rv?J#F~T=qcv{vSRU>d>3^!oy&^ucqx47Bwqd~DsTE$QtwvI1p){L?@~?zk?6(c z!6s<7BbYwm!p<~@AOpt`k!Z_S(1DYY2xLuK_V#oG68N&iqnxOzs(-1e`hONuQAdq% z+<&X1S`r*FFtX6H`e?kL)spuu;qTB;=;uZ6G^`~bcV=uyQc?iV7`o(~AA$p$k0f(Ea*B zw?cV{));J|oW)Ih^)>{RhBDCiWDh>&(W3?Xx1a?X9sy1AZbCy3a(R1vflp2x9s=zA z=wW}ti+*OyE!2f*4;Z2Zzsn#ISSGW|9lp_nTe(smuq585<{tajecXb~9&sUJp1H=S zSjMz#F})NmnVP&sHUK=|g1;PsSZuG%a@k}_(+&MTpr>W+i}PAMq1>e89Y-f(V%bsBgW0>82#2TBrQ(D2c#7f@s2nK1L zFpK0md1_dK;E~<7=T?PsrGj{`JCY~;4E7Yamj;oewugO$u9KAq>p#r2dO{Zm>sNbK z_C;R?>kqURlSyO2`epY$r0Hy()nzXU)GrHLgCuuK_usC6I8fgc8L02EKi7g=ilg;w zYe3pt$ePdnd@C~d%$R%?Gi8X;TOs&Jqk_N|V-~>$tEw&BWsDN6s67wo&k2khVbWPg z8Gy8p5!Tvyk7P8Qm$%tMeaI!A=>+kk;UbU(K->ne{ zUB?Exjc@hGUo)(+av@uoRZL;*TvsH%YuH%|=`Z~ri7d%?t7}hEcS`I@@tt;iQc6bx zgC_w)O$)pQ4$B}R$O(X&8ITXb%+qvtY$Be7x82V&a3QfWhqwkGFM`W);;N>$B3;iW z@Z1PJi84bv+Q5x))cpwlrmR)b&M<(q+iI0pMtQO*c}v-&%_%d;$NpC z4`a9ZRAro(GUT%fMG*;pDq$ME#haBd&6c02wMJ-9Q)%5nzC;Pr{Q2Cx+K72$RL;#H Yzg`K`kb=xaTSVgL)SDO%k7t4QcQ6p?JOBUy diff --git a/piet-gpu/shader/gen/pathtag_reduce.hlsl b/piet-gpu/shader/gen/pathtag_reduce.hlsl deleted file mode 100644 index 6e9dee1..0000000 --- a/piet-gpu/shader/gen/pathtag_reduce.hlsl +++ /dev/null @@ -1,139 +0,0 @@ -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u); - -ByteAddressBuffer _139 : register(t1, space0); -ByteAddressBuffer _151 : register(t2, space0); -RWByteAddressBuffer _238 : register(u3, space0); -RWByteAddressBuffer _258 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared TagMonoid sh_scratch[128]; - -TagMonoid reduce_tag(uint tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(countbits((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(countbits(tag_word & 1077952576u))); - c.path_ix = uint(int(countbits(tag_word & 269488144u))); - c.trans_ix = uint(int(countbits(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 2u; - uint scene_ix = (_139.Load(96) >> uint(2)) + ix; - uint tag_word = _151.Load(scene_ix * 4 + 0); - uint param = tag_word; - TagMonoid agg = reduce_tag(param); - for (uint i = 1u; i < 2u; i++) - { - tag_word = _151.Load((scene_ix + i) * 4 + 0); - uint param_1 = tag_word; - TagMonoid param_2 = agg; - TagMonoid param_3 = reduce_tag(param_1); - agg = combine_tag_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 7u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - TagMonoid param_4 = agg; - TagMonoid param_5 = other; - agg = combine_tag_monoid(param_4, param_5); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix); - _238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix); - _238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix); - _238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix); - _238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset); - } -} - -[numthreads(128, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/pathtag_reduce.msl b/piet-gpu/shader/gen/pathtag_reduce.msl deleted file mode 100644 index c6266ad..0000000 --- a/piet-gpu/shader/gen/pathtag_reduce.msl +++ /dev/null @@ -1,156 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct TagMonoid_1 -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct OutBuf -{ - TagMonoid_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(128u, 1u, 1u); - -static inline __attribute__((always_inline)) -TagMonoid reduce_tag(thread const uint& tag_word) -{ - uint point_count = tag_word & 50529027u; - TagMonoid c; - c.pathseg_ix = uint(int(popcount((point_count * 7u) & 67372036u))); - c.linewidth_ix = uint(int(popcount(tag_word & 1077952576u))); - c.path_ix = uint(int(popcount(tag_word & 269488144u))); - c.trans_ix = uint(int(popcount(tag_word & 538976288u))); - uint n_points = point_count + ((tag_word >> uint(2)) & 16843009u); - uint a = n_points + (n_points & (((tag_word >> uint(3)) & 16843009u) * 15u)); - a += (a >> uint(8)); - a += (a >> uint(16)); - c.pathseg_offset = a & 255u; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _151 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup TagMonoid sh_scratch[128]; - uint ix = gl_GlobalInvocationID.x * 2u; - uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix; - uint tag_word = _151.scene[scene_ix]; - uint param = tag_word; - TagMonoid agg = reduce_tag(param); - for (uint i = 1u; i < 2u; i++) - { - tag_word = _151.scene[scene_ix + i]; - uint param_1 = tag_word; - TagMonoid param_2 = agg; - TagMonoid param_3 = reduce_tag(param_1); - agg = combine_tag_monoid(param_2, param_3); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 7u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 128u) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - TagMonoid param_4 = agg; - TagMonoid param_5 = other; - agg = combine_tag_monoid(param_4, param_5); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix; - _238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix; - _238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix; - _238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset; - } -} - diff --git a/piet-gpu/shader/gen/pathtag_reduce.spv b/piet-gpu/shader/gen/pathtag_reduce.spv deleted file mode 100644 index 829addcf999546baaca926f0efa521d5a4559356..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8400 zcmbW42b7#u70171OG2ojmk>7uHPR$XJ)wo-Cb}4qAc)8?J3HAOnVnf?XA|(CEE)kt z>>ZR?QB(xGMnw?ChP@&x_J*LMGzItpUk#>=&ahKrGn z$i2u8ICN5}GBHt_Y+qQfmg==ewKP&|RV(e< z+NyqY6Te!cQlA=09s2M{*U`ty*j%={JbH4o(X5St`}s%C(QcI+lcm}PVp)0DX5DqQ zk@naoTPMc#PL$hYlhx7O(qH8FG~^{CkLw8E=9)E=)pmyW?%6i*mFD=cTWgm`OXK;% zmQ1!=Q?4VaLXo|wMM&CX-+lTx&7G0eipb<$~`85x!wA)hKsW6Z5#f>{8~*N|lr8+==&j8Qk)Ez3F;9Z*|Aanf7Lv zqo2@htf`H5?pNJ0UL7w@)?Ps8%<$fCqg1U|$E%GtE4Cl>_iouo=XP`dIXUm$7QN6Y zwQKci9@DXT$79vn=vcc{3uEN-o6x;f?kqm|et0fF=uP$Qy@__E9tQi!b2N#eoIAI6hubCRtUj}zUEwVQaeEcEk#f5nfqt_ugYWeF@l$KS?n%y3;X82lEx1C!Fy?j4n}6qh*&Eo>FYW;b@}AKgsv z%RYlXIaZpiw94%Y_vCxA5svp_hNB7?adEbqh`;WgF{dt}({arAo`|pC>mj5pFW_M#-HPy~-67{&q zpX-pDn{Yh^=X(`?#^~pJ5}fa^oZo(M>^kTCR)h21)i2iU`m856fP~*x1-Ea)%`3Pi z3FrQeJs{y+tK7i}=X)x5NW%G+${mq#zM*nQC7k<|J1*gTFXc{5INwRR6$$72C^wXF zo{`)s3FrGLw>sf`ALY(WINwFNvlGtql{+`#d;{f163(|zZY<$^^W^F|*SqOmi#v;V zP9Mi0Jt)_cpruo&)Z?Br) zH_IMyANG#lyd&}j=X&Hi>*)BIL(cvSKkLfbL%|s%XCGMe&&=-0d(`J{W`{lw&q&T( z`umQgdr)@|VfS~td+>hr{}1!bnek=0ct+&6rd;=)B)az`XB}hRlWPmkTD~Xg+SC`} z=g{7R?%M39oyczfMdUE2co%$J>j2_d$eZRjF4rvs_#sO$H}!cWe;>b^J8H^y&3Jgc2y zuVs$&`XqQ7SwP&UnCBpJ=KVCYy(Mp6b$@@!#r^n>nOFat3Y$41|5w3taq#={joZ>$ zxi{Y??e5JtNZp)&VRj#K)>QW$l8ZI?4#gVoD{SU6Z@ftht&CGzUz;^%!JALJIdi?> z4j{sQ^gZ!hHWYFgZ-6nLg~J|x5QY~n&*3+J5#m{xQ~lB|-QU___nWyY(e=N&OTVT| zzdq58_qTJJ&)?3WoA2gC*Wcg5VORIJaH`)~=*!vFCEyn$7bDZi(d@~ydlJhKyEo!`}viKJ@30`uScF&fwkRI}%wIr!5B0N#x;|pRwQ0n9w=lb2XYc=3<}V=*?O$fr?i{sm1Dhl6|0`g* z<%svM{zb%G#)|KPU(c-g?#AA}2JYV5*Wu)&-Z#M3yOq85GvAJEK;)xuz6mx?TdeC_ zV0quh2iW&_m>sTB+qV%p*C>uPeGlBdrtiba$C`csPS^B9^bLr7tm#K!RWaWLA$p{W;h^HAb60=8^YaW8cn?bvM7j zwh@^|V&8Xxt5CMi_$@=0UJAaX#w$Ma)BB zV`d`}^Dx+$#fWv(^^2H)fsN@wBIe&VFD2i-n(Y=s<7++5~)3Emp)-E7P3?-lJkA;wyJJLX*xV?4{atKAAde0E1S zSL(9|x_tECo?zqTBW^FS_2Qn?^$YvnU~79PQF|d+K71Yrwsz_>fG!`k7lDl%Kq77* zu(gfzOx5)Z`@Udnd$&>h@nHGzc>>tlsm~M9<)iizuyOJc_av~jqi@vp3;X_HYww3d z?E}E_;d3C^+NsY$=<-qf$zbE;Bko|ZwT*M%>iUKKDPU{cYf<~DVEOPl1Z?fp=TLO{ zsC^jNIQfV>9Bl3AV|D$kW6vFdXn&NR6sNsBAKkG%VlUg9J0SM&j?D3Udl}eV(a*;w zx-n;U>E%L?y&i>b-rI`zJ1x;{~N1vp*j)6wNtB2nWRVEL#q1eS{$CxfjK-+;P4QRA85 zw8kmua;uQ2aVl6oYCH=p7d1`;TO;~SU7x728l2X6HoDyDNYpq3EFU$_1j|K@=YXvd z?})lSQRBH_?Be@}o%j)xhT07VCRH*fnT#efG9otmiziT@!Lj`5ltuOp4TA)EF1+m)$ zB1tuBRIn&@3;r5iwM}bRp|+lcKt+lco$aBZ3|3^cQ%kj5Jw0ySeE~L4xB1U{&bD)U z&$&N7pZmSv_r2eDb3ZSJT%G>UC%?3usruDB<99Jv>50m#2mqiY7yttHDXl4J&%>A@^pIwbw^0GJV~I;6sjM$r#t18KFxj0;;Z z`Q0XMR-I$4;o3HAK^mR~gXLlib&PV`A~d}ApvEa~hP=!&Oc&>%_r72r##rW%@JrlVk8SHFviL9 z7*eH711o5I69P2HL@3ZzV1k!x8{hw>+vdDW7B_Cycn?Gfws~2eYxn659d`q21?Qd& z*bo`@Kxb(-d`L7O6M=21AuN~iZ4(;LhD-9SuJPBV8dq8qMN(BpXpY=q?Zdj2L>Dww!=dE&|$=j*6k584rHu5y^v2RYa-*RHe|#^IF0s{X2=G zv#6v-K*2bti}IFI17?p2Js!t`*|W;@HUNa0wgMjKS%k;T09LgM!#pzp=zkF)@E;bA zctZokuv5=#k%vf)PDeo0^7k-3FOue&M1@%be!9uS+xj?bDl`d?K5R0kYCAOTJHWs} z#6@%OxCXxKXJP*X)aXp($qb8?`GFP5=-xwUq8OkRxeK+M+% zI_qlUGAjb}Ge0IV8OH9XzUDxumQfB!xDjswlzSvtxh6pQ;BbGHcPWo9xq( z=x-B(+$u{V=|O?adT0=oC!?M>VUs}t zN*!I}<6XDT4350lHF&1`)@0vAlz=rv8y{P=@bm3AN*6kAY~N9`6F%E$owl63FflZ1 z2znlxHoRH#s^4y>Ew|;R_3QRu!H{;l{oK~h9c|6$SCU|R>Ve^~tB~zTKhk-TV;q8? zgpAA`enR;%hV-@!G><)h{^GmmnqP0vtT@~{%se|nz>VYi0534%MHal+iEnh{B?EYw z1%HOkHs_ywx*{VY7TN}5IlWWsulJ2lTpGD@3^T~oDRWQLiKo?_3$$4_(FcX%A3Ba3 zU;T8)aYyM2wn^oy*d5zi`^w(X*w)tIlxDbg^44fy{go>tlUYkHI|jFn1c5U_ik6hcN;HY!@9@9isZ;LyFSV4bLd#2 zrkbc|DHlxu$qiEcB`b~>>E?BL!!NXj714aZSwm<)x~x~f%z-ZJuXa>gh*|+rlWwzE zh|LzXs@i6uY$5$p@pY$ojFyzql24|@Qzmgd*&RNj3L8{~cbVrE(Jv&w(~vx%U-~AR z@=imF6;1A+l2lP{ymZl9x9MQpL4n@C%Q1?Bd_*et!a=h}(QE#8!IR#HZZZ<)knZZS6iiNX8u|kgk5lXV-{~IJ9G8(gb;z~ZD!7Wr}HB3J6*8&zSBji_nj_o z91y+udCJd#L8>zx(*$v2%^be8Sxn#QEgjY^UVrBH9RyyK-H$zHfXoy12^^y{0GX6` z-c43f+>*am8Zg@4u#F|?S{IC% zqG&#C2crK;&09n#&uuvv*nfdv^amc)JpV!QH{ZSHao;rmUh_277&{~YW4yLxRAYu$ zq5m$-fc<}}b5MCsYLy~kenlwWO+^XTrZUqaH}tPWj=HGWH<76IvlPJ3HT8a%ur`(JK0&XOZJP6oD1Y zWvC?k^??zwd!ODM5u4u{Ocg>c8Sk4I`E2~XKBywA{z*mnGr9U+H@?@6??N}s6ae$# zXS@u{T*&?J34j$V0>U^K)z>CRg@-l51lNPYqvI=OyQ7pNj%}0qJ(N^f(p6;q9H}|vs|cJ@EMb! z0vo4LWt;|(SVOz~=BS95qFgCp)l(o%L;}o9>G;b`4U8mB#&ZRyuu>EQTH3y*O)^AH zHC(iU=rjp4<8m$FCkb?B=yJ`xQP-S9%|w!gzu_7p{;7Fg4U6WwAWbQLy=PH$bi<~^ z=wl6!|45lgHRXZEAiGt`no(W|&J40iRs_-x29)^tO3@PRIKWSebfiP6gvqW$#BI*N zSjqFQz!UCU1H|c3Aq1Yl*EsMl;ATbxU)S)J*{pV^i4}dJ5x+0gr!=7_yGY5zz;U|U zw=@aTtoQ>rk`;~8B^vS^fQd%cz=vBcA`eWo5~VX~m`Mjo^l11XpxgvOlOM3t;3Yrs zgzCcN5jYOtJ3G+u@RvHT-r@^dtARoDrShn5q3_XNy}N5la+^9%-$j>+VnyGWBpf}c zpAFF;Y)*`;t=}5=rjn1VEXJ*v00KY;Rftl8pdu= z3b@^{UJi6G=Qc7n2W18`JWPjbkf2)%=|-m81KLXQ6cSM9NW-PaINiu%K5o9leUB8nt5N>J=EB2``Khb9;CM%`iD;!&k;kl^Xg|B zdh$Vy$-A=Gu0`R`&RBX;MoXOJUh3@8DhjlUge>dd`EscuLE6-lr6|l&EH*3Fa|xpYVZuy% m0GbD{7Qku!6ZpbAZ;94Tm4EwIBTe diff --git a/piet-gpu/shader/gen/pathtag_root.hlsl b/piet-gpu/shader/gen/pathtag_root.hlsl deleted file mode 100644 index 7ad806c..0000000 --- a/piet-gpu/shader/gen/pathtag_root.hlsl +++ /dev/null @@ -1,115 +0,0 @@ -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const TagMonoid _18 = { 0u, 0u, 0u, 0u, 0u }; - -RWByteAddressBuffer _78 : register(u0, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared TagMonoid sh_scratch[256]; - -TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -TagMonoid tag_monoid_identity() -{ - return _18; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - TagMonoid _82; - _82.trans_ix = _78.Load(ix * 20 + 0); - _82.linewidth_ix = _78.Load(ix * 20 + 4); - _82.pathseg_ix = _78.Load(ix * 20 + 8); - _82.path_ix = _78.Load(ix * 20 + 12); - _82.pathseg_offset = _78.Load(ix * 20 + 16); - TagMonoid local[8]; - local[0].trans_ix = _82.trans_ix; - local[0].linewidth_ix = _82.linewidth_ix; - local[0].pathseg_ix = _82.pathseg_ix; - local[0].path_ix = _82.path_ix; - local[0].pathseg_offset = _82.pathseg_offset; - TagMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - TagMonoid param = local[i - 1u]; - TagMonoid _115; - _115.trans_ix = _78.Load((ix + i) * 20 + 0); - _115.linewidth_ix = _78.Load((ix + i) * 20 + 4); - _115.pathseg_ix = _78.Load((ix + i) * 20 + 8); - _115.path_ix = _78.Load((ix + i) * 20 + 12); - _115.pathseg_offset = _78.Load((ix + i) * 20 + 16); - param_1.trans_ix = _115.trans_ix; - param_1.linewidth_ix = _115.linewidth_ix; - param_1.pathseg_ix = _115.pathseg_ix; - param_1.path_ix = _115.path_ix; - param_1.pathseg_offset = _115.pathseg_offset; - local[i] = combine_tag_monoid(param, param_1); - } - TagMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - TagMonoid param_2 = other; - TagMonoid param_3 = agg; - agg = combine_tag_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - TagMonoid row = tag_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - TagMonoid param_4 = row; - TagMonoid param_5 = local[i_2]; - TagMonoid m = combine_tag_monoid(param_4, param_5); - uint _210 = ix + i_2; - _78.Store(_210 * 20 + 0, m.trans_ix); - _78.Store(_210 * 20 + 4, m.linewidth_ix); - _78.Store(_210 * 20 + 8, m.pathseg_ix); - _78.Store(_210 * 20 + 12, m.path_ix); - _78.Store(_210 * 20 + 16, m.pathseg_offset); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/pathtag_root.msl b/piet-gpu/shader/gen/pathtag_root.msl deleted file mode 100644 index 65e3741..0000000 --- a/piet-gpu/shader/gen/pathtag_root.msl +++ /dev/null @@ -1,146 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct TagMonoid -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct TagMonoid_1 -{ - uint trans_ix; - uint linewidth_ix; - uint pathseg_ix; - uint path_ix; - uint pathseg_offset; -}; - -struct DataBuf -{ - TagMonoid_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b) -{ - TagMonoid c; - c.trans_ix = a.trans_ix + b.trans_ix; - c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; - c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; - c.path_ix = a.path_ix + b.path_ix; - c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; - return c; -} - -static inline __attribute__((always_inline)) -TagMonoid tag_monoid_identity() -{ - return TagMonoid{ 0u, 0u, 0u, 0u, 0u }; -} - -kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup TagMonoid sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].trans_ix = _78.data[ix].trans_ix; - local[0].linewidth_ix = _78.data[ix].linewidth_ix; - local[0].pathseg_ix = _78.data[ix].pathseg_ix; - local[0].path_ix = _78.data[ix].path_ix; - local[0].pathseg_offset = _78.data[ix].pathseg_offset; - TagMonoid param_1; - for (uint i = 1u; i < 8u; i++) - { - uint _109 = ix + i; - TagMonoid param = local[i - 1u]; - param_1.trans_ix = _78.data[_109].trans_ix; - param_1.linewidth_ix = _78.data[_109].linewidth_ix; - param_1.pathseg_ix = _78.data[_109].pathseg_ix; - param_1.path_ix = _78.data[_109].path_ix; - param_1.pathseg_offset = _78.data[_109].pathseg_offset; - local[i] = combine_tag_monoid(param, param_1); - } - TagMonoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - TagMonoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - TagMonoid param_2 = other; - TagMonoid param_3 = agg; - agg = combine_tag_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - TagMonoid row = tag_monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - TagMonoid param_4 = row; - TagMonoid param_5 = local[i_2]; - TagMonoid m = combine_tag_monoid(param_4, param_5); - uint _210 = ix + i_2; - _78.data[_210].trans_ix = m.trans_ix; - _78.data[_210].linewidth_ix = m.linewidth_ix; - _78.data[_210].pathseg_ix = m.pathseg_ix; - _78.data[_210].path_ix = m.path_ix; - _78.data[_210].pathseg_offset = m.pathseg_offset; - } -} - diff --git a/piet-gpu/shader/gen/pathtag_root.spv b/piet-gpu/shader/gen/pathtag_root.spv deleted file mode 100644 index 3783b49cb1351c02c3e5aa12cbfd4f26c93e4936..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5836 zcmai%iFaL96^Cz|p(%yZBDF2J#xcTfejaZy@v;vJeFtH$*d;%XC>`t)!ypo@s&9=g~S2g@!mk6zrEXw-(m-TcV; zS$9uuI3Jty&E~k@$#Oo{td6!k-D!P0Auk#0xQ~c!jEpp^c}8?kwh%rqkCw(;8!FX? zs}p%G-@6_f{|b7UWtGPGE|30L-1=rdHC@U3=1$T#y{Y(b)7JkR&|1GEI{{rzcrkic z!pqQ=gwIFY75Cc@9<7%K>Wy9H`ryRwMx~tB8WV%tTd|$lAi54&Z`obhi_vqt1lw;p zpZ0#sKUu%^JyE~q+xso|#QpBfhVX4K=jCnFBkZKBWkY4daOy5ryF|bvejU2j!kw9e zJXxM9k0<+0kFz_nec0vEQS!R78^O)7QnNBu&MRZHz1t5*?}ltoTkBf2-iJSG9RxS> zvFa4sIldiTtbOQlc1LzM_Ecj}Yn>VJ>^dFUVQ}pC2)MY$(Skid4exdlvJ$zFmF%O> z+F3Qnc}H^gIO4n;xt;>&y~rJ3;LMR*f>`q+bT@J`;#%I1k1;+2+<}~H8SmYkj`)PV zX4Y)k_SiF=n*5I@`xNw3m>~DLhSgk8yo@!i-;4OHO7Fc~yO`(s{4Vs8XC1M!hP``E zvAl9dTD$eF!`?u){*w^7c5yFHh3`e?A?9l@M|`Zg0__^^E&Qv&#rSpDo-O=+V9y)& zX0Z3Loc~{db}jv`|3b8D$!|?|f8z9SOE~JQcOdp7dgHuV$M~C)uekoLaPBke-3IO88ipXYOC`?npDzm^#77bVU)JfE)FBe{s* z+2UQNyEQ+TVmnj5Yx<0Ho?eRRv$lJ^46(n)U7BiL4mQrmIM4Vp#8~63uWel9yc}!} zqcE5A3a~k=5p%SSi=0=2&2b*0&a1%YtV7JvHZF2r4K~NQiJaGf&GEZnj<#`;a|PHO zXD)JH3pU4ZhdJ8DiJdQJ^_P}>aJLYK$9P{LQ<~UDHL|^nePaJBl*VAC%$(S8&<05A_*ma7tvj^Lpm>q57 zBIio5ImOx8i)~KKj<#`;^JcI)#o2iawmC67+QvoBTfycOXXkC$=EUr18@Gjb&py5# zIT5jk#zoKH0k-Gfu|3u{);V&foCS5<<5l3|JzkA17d_D@SDaI4D(YPew%#>J-0?fX z^48yn);4|{`d!F=WGNDRcz5C>|2^2|nR65Ry~vG-ajvOtjM(q2Yq`JoqdlM6_vnM@ z4Mj$5{JTv~n8~XZr}a{cInFlaG2I16%Kb z^xk|NTtejUg!5hg1Y(}Pe{jYFXdmBeeV;@QAnr#T^*;q}um5Q{`KbRHaIyYpv8^v3 z@Bin(=IM(&_&nG<&=>dg1+cvLa1gB>zAu91^xclu4&NPM?^oX;w06w(m%z@oIr@w- zR^B^2fcD(#C%cC`@wNBi%W(3shr7V88|V89*z*}@O>JYW=?qy@ZGJcUtB8G!@88!F zw;FA}KDpvtIz#Py-oknd;J=P6M9jTA*>SJmz-~X!H{s;(NHg;-u-sC_nYkD3G z*&m=EKz#K75UpPwz5fw-28nm}$6&b~i09S5A8{>n#gY3H@JzDp`%e-3?Y@pA+cPc# z{|p&K%!xDpJn_aK1#8FbJ_vSpT}PiWhtcv;{}(O(G5eR;Gf2$QLtta%qvpe4*NNYg zzXD%^EJuuw-y^>U8|&j49zp*GG2U4BqitN|{1$A^JS1{{2R0{WS=+eC`90X2`AFpa z0c_4X#5&r>Mb00==5!#D^Cz%5eTX^Q#zoGb!RB-#k@FX@Ihzr4w2gCCoEz_39sNEA zo@~ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil deleted file mode 100644 index 35a1c2b142bd6e9b1d289381090dba20a5a609af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4904 zcmeHLe^gV~9ltMmdG7`C;{icmOk^*BDFdnxYC#3tmjqD+&rn#^sq0GwMF$88Qc>H! zB#=a#u2De|Y?r7#qjoz3>l~ibwF${-(a|>TRtE)buoTy=?X0sM+j_R$#}AL)+1Wq) zW9vD+=X~$|{d7leKn~bK|MeU)d1Lv|%T^c_x?T{*pOBguZZ6y_}bLM1W{La6~wDk&2dAffGd7yw37 z@VZV0z%QUK-a${thvfmdJ?)5L5G}#T+~r4+R@ov6p-w@cvhdU@k@0A}F+%35mJw8G zT&~5l#;2l9#h&MZI@dU{M&V0Hi zYrv;UEc1HfTO(zpH>5?VbU$D`q|8YK8Bw{&aTt2Bugh3kd9B) zi=P}wG2S-$)$>G>^#;HS6|U(@BM$2j5>!@QTe|h{tt(q6e%5*+@z^tCd7046Z{Tq=eQ`K7#@sH0MiNAu2QcnAq-`Ef}_k@VQB1Dd>q_n4yA^t#IR zWI`Me-6U89B&n=HcN6E%t8ur~h#@ckS-GMvs@M;Epxn>kRzGf|@j9QPagNzH$24-d zFd>S4bkDSA7T?d|O;K!;#G7Ykatat>X67U``EfX>E~bESW+O~%Rw&bby0leApcydB z!M?$Y={qRo27mJj zi9R0CvjR7G3_;jgA?VvF7ICuzpAC;m0fpR@PL(>L%;%NrSHFguL~A+JPhB}p#07vf zjy$cp;%5$T^7pPiQ`EBnq5Ig3Q;2^Lyw@%8{v3e*3&drsWW` z0h{<_<0Zrph%e|qa3s@`8P}Eh*RD(UmYh;s{Pxlj5=}z+lQeh(Gt+qlYB1N~8akIzF5q#vtkaq}0_`G!I4f3rZ8N>p1JzlCB(e zwfXc8iaLCnmm7X=_hf@kni{~kFk3a^Vn-R-aPjH;v)`z5(AXG?c=QB+!3><{VV1l; zAJfGBM7%hMR_X7XfBes@9?Sdf7mJRs-k6tf0Q!f_R;#(?(WafYx<_lPY)x>_rxS|* zeD!!cvn?*eN%c<&loQby8n$)xO2nYTKT@*6r1{3ek&+>gVFOrEJ~o>mJo?qqe~i zW9{6FCfCgklg0O)z&}4eS)8B0h;%N7e%tr#t#7KfLbn2V*20;uhvNeTuZ@grcc_q+ z9@Dbcu-C}rE&Fg=XQONqC_f<;cX-7EGGod>XyKay?G8pVJ|?z9QNrCw zxvibUwFlgUGo*tK=IXlInm-GqEM zmcRPRk9IZx+2W%SthMV8P<|TMlwos%Y4UJ$`WF;wOGJ)O-m%sjZ$NAXvWy zyLjSHM@I7`yZaB`{7ekLl08j)Xm$PQymRBXpM71~bDBx_s+8RT} z8r2L7c45y2&hJjUwz`5if;(Drq}$OmAa{}Cg7 zUl)ut!|KB~!2dB$&R(hiDNYJ+tN(c zlsc$#jcbW4!i8f`+o@V=ke5aLahmRJ-K7gQpVVAyr+WlAf>?Coi^;r2bQi><;kLe! zj?3$$-W3p1Y&GVpYU{c^Rn@f>qO&pxWLrnZwmdtwX=JROKliUN6cf(>8{GK@cm5sj zm}vm+gJ7D3ZWNxgAOR9O1XLE*+`hqHY0T-cu$Pl}vH?{MThhL6y!{`nx_uA(5vFQl zL#iS?v)G22Twig6T zr3v3?1j%RZKb#456|ZL`dxTIgp-cuBp&{;!6lpM=zZ$V0ESAXt8e%__JWMPGgVYM&TOK19ci4y0FboFspK^X1oe!3lmdh%EO1A*!2NKTa zKtW~}1fJ~4nccigaVd)T`{UudjHN*+nYUoV(Wpqna2P1~a`=99V7AI1N8-d=ku1|0lrJ*1EII&&5-D*qSYI^AN`fh?T4%0_{nyHIplDNuWSQ9n!7@u^7f^_`W4&!oFKXh2{3Od??%b zAv$~9mm&{)!zFxV`p$YyLO#!^WclsRQaH_oN~#r~f-GvkNH2=x*N0Y)WgC{3BZTi{ zVRCl)M0w)(b~hw$3~6fRSx6cthbPthNu*@R6ialJ_P3S}o1V9?-72Z=t?os=)KcMx zHJOxYRvOBPoP?yDBg(y}=Ja1AWi=}8W$C%Pq}+h=B{Xqd%w$d-($x35c~~NSR_7u= zmdUqSpLs#_qy8(qMVX{u)D=x|VlchoH!b?V>!OFwN2Sk3rAK;1*|5s&Aa~q;&aqh( zFSfwX3lJVmTd?mDgWRuT@rb%s{hE$;-XAVRtKjzFA{S-41YhX5K`#>ilQPr(2}URr AQ~&?~ diff --git a/piet-gpu/shader/gen/tile_alloc.hlsl b/piet-gpu/shader/gen/tile_alloc.hlsl deleted file mode 100644 index aed9001..0000000 --- a/piet-gpu/shader/gen/tile_alloc.hlsl +++ /dev/null @@ -1,236 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _53 : register(u0, space0); -ByteAddressBuffer _148 : register(t1, space0); -ByteAddressBuffer _232 : register(t2, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared uint sh_tile_count[256]; -groupshared uint sh_tile_offset; - -bool check_deps(uint dep_stage) -{ - uint _60; - _53.InterlockedOr(4, 0u, _60); - return (_60 & dep_stage) == 0u; -} - -float4 load_draw_bbox(uint draw_ix) -{ - uint base = (_148.Load(68) >> uint(2)) + (4u * draw_ix); - float x0 = asfloat(_53.Load(base * 4 + 12)); - float y0 = asfloat(_53.Load((base + 1u) * 4 + 12)); - float x1 = asfloat(_53.Load((base + 2u) * 4 + 12)); - float y1 = asfloat(_53.Load((base + 3u) * 4 + 12)); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -uint malloc_stage(uint size, uint mem_size, uint stage) -{ - uint _70; - _53.InterlockedAdd(0, size, _70); - uint offset = _70; - if ((offset + size) > mem_size) - { - uint _80; - _53.InterlockedOr(4, stage, _80); - offset = 0u; - } - return offset; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _53.Store(offset * 4 + 12, val); -} - -void Path_write(Alloc a, PathRef ref, Path s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = s.tiles.offset; - write_mem(param_6, param_7, param_8); -} - -void comp_main() -{ - uint param = 1u; - bool _192 = check_deps(param); - if (!_192) - { - return; - } - uint th_ix = gl_LocalInvocationID.x; - uint element_ix = gl_GlobalInvocationID.x; - PathRef _216 = { _148.Load(20) + (element_ix * 12u) }; - PathRef path_ref = _216; - uint drawtag_base = _148.Load(104) >> uint(2); - uint drawtag = 0u; - if (element_ix < _148.Load(4)) - { - drawtag = _232.Load((drawtag_base + element_ix) * 4 + 0); - } - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if ((drawtag != 0u) && (drawtag != 37u)) - { - uint param_1 = element_ix; - float4 bbox = load_draw_bbox(param_1); - x0 = int(floor(bbox.x * 0.0625f)); - y0 = int(floor(bbox.y * 0.0625f)); - x1 = int(ceil(bbox.z * 0.0625f)); - y1 = int(ceil(bbox.w * 0.0625f)); - } - x0 = clamp(x0, 0, int(_148.Load(12))); - y0 = clamp(y0, 0, int(_148.Load(16))); - x1 = clamp(x1, 0, int(_148.Load(12))); - y1 = clamp(y1, 0, int(_148.Load(16))); - Path path; - path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); - uint tile_count = uint((x1 - x0) * (y1 - y0)); - sh_tile_count[th_ix] = tile_count; - uint total_tile_count = tile_count; - for (uint i = 0u; i < 8u; i++) - { - GroupMemoryBarrierWithGroupSync(); - if (th_ix >= (1u << i)) - { - total_tile_count += sh_tile_count[th_ix - (1u << i)]; - } - GroupMemoryBarrierWithGroupSync(); - sh_tile_count[th_ix] = total_tile_count; - } - if (th_ix == 255u) - { - uint param_2 = total_tile_count * 8u; - uint param_3 = _148.Load(0); - uint param_4 = 2u; - uint _370 = malloc_stage(param_2, param_3, param_4); - sh_tile_offset = _370; - } - GroupMemoryBarrierWithGroupSync(); - uint offset_start = sh_tile_offset; - if (offset_start == 0u) - { - return; - } - if (element_ix < _148.Load(4)) - { - uint _387; - if (th_ix > 0u) - { - _387 = sh_tile_count[th_ix - 1u]; - } - else - { - _387 = 0u; - } - uint tile_subix = _387; - TileRef _400 = { offset_start + (8u * tile_subix) }; - path.tiles = _400; - Alloc _406; - _406.offset = _148.Load(20); - Alloc param_5; - param_5.offset = _406.offset; - PathRef param_6 = path_ref; - Path param_7 = path; - Path_write(param_5, param_6, param_7); - } - uint total_count = sh_tile_count[255] * 2u; - uint start_ix = offset_start >> uint(2); - for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) - { - _53.Store((start_ix + i_1) * 4 + 12, 0u); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/tile_alloc.msl b/piet-gpu/shader/gen/tile_alloc.msl deleted file mode 100644 index e02138a..0000000 --- a/piet-gpu/shader/gen/tile_alloc.msl +++ /dev/null @@ -1,247 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Alloc -{ - uint offset; -}; - -struct PathRef -{ - uint offset; -}; - -struct TileRef -{ - uint offset; -}; - -struct Path -{ - uint4 bbox; - TileRef tiles; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -bool check_deps(thread const uint& dep_stage, device Memory& v_53) -{ - uint _60 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, 0u, memory_order_relaxed); - return (_60 & dep_stage) == 0u; -} - -static inline __attribute__((always_inline)) -float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_53, const device ConfigBuf& v_148) -{ - uint base = (v_148.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix); - float x0 = as_type(v_53.memory[base]); - float y0 = as_type(v_53.memory[base + 1u]); - float x1 = as_type(v_53.memory[base + 2u]); - float y1 = as_type(v_53.memory[base + 3u]); - float4 bbox = float4(x0, y0, x1, y1); - return bbox; -} - -static inline __attribute__((always_inline)) -uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_53) -{ - uint _70 = atomic_fetch_add_explicit((device atomic_uint*)&v_53.mem_offset, size, memory_order_relaxed); - uint offset = _70; - if ((offset + size) > mem_size) - { - uint _80 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, stage, memory_order_relaxed); - offset = 0u; - } - return offset; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_53) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_53.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_53) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = s.bbox.x | (s.bbox.y << uint(16)); - write_mem(param, param_1, param_2, v_53); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = s.bbox.z | (s.bbox.w << uint(16)); - write_mem(param_3, param_4, param_5, v_53); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = s.tiles.offset; - write_mem(param_6, param_7, param_8, v_53); -} - -kernel void main0(device Memory& v_53 [[buffer(0)]], const device ConfigBuf& v_148 [[buffer(1)]], const device SceneBuf& _232 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - threadgroup uint sh_tile_count[256]; - threadgroup uint sh_tile_offset; - uint param = 1u; - bool _192 = check_deps(param, v_53); - if (!_192) - { - return; - } - uint th_ix = gl_LocalInvocationID.x; - uint element_ix = gl_GlobalInvocationID.x; - PathRef path_ref = PathRef{ v_148.conf.tile_alloc.offset + (element_ix * 12u) }; - uint drawtag_base = v_148.conf.drawtag_offset >> uint(2); - uint drawtag = 0u; - if (element_ix < v_148.conf.n_elements) - { - drawtag = _232.scene[drawtag_base + element_ix]; - } - int x0 = 0; - int y0 = 0; - int x1 = 0; - int y1 = 0; - if ((drawtag != 0u) && (drawtag != 37u)) - { - uint param_1 = element_ix; - float4 bbox = load_draw_bbox(param_1, v_53, v_148); - x0 = int(floor(bbox.x * 0.0625)); - y0 = int(floor(bbox.y * 0.0625)); - x1 = int(ceil(bbox.z * 0.0625)); - y1 = int(ceil(bbox.w * 0.0625)); - } - x0 = clamp(x0, 0, int(v_148.conf.width_in_tiles)); - y0 = clamp(y0, 0, int(v_148.conf.height_in_tiles)); - x1 = clamp(x1, 0, int(v_148.conf.width_in_tiles)); - y1 = clamp(y1, 0, int(v_148.conf.height_in_tiles)); - Path path; - path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1)); - uint tile_count = uint((x1 - x0) * (y1 - y0)); - sh_tile_count[th_ix] = tile_count; - uint total_tile_count = tile_count; - for (uint i = 0u; i < 8u; i++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (th_ix >= (1u << i)) - { - total_tile_count += sh_tile_count[th_ix - (1u << i)]; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_tile_count[th_ix] = total_tile_count; - } - if (th_ix == 255u) - { - uint param_2 = total_tile_count * 8u; - uint param_3 = v_148.conf.mem_size; - uint param_4 = 2u; - uint _370 = malloc_stage(param_2, param_3, param_4, v_53); - sh_tile_offset = _370; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint offset_start = sh_tile_offset; - if (offset_start == 0u) - { - return; - } - if (element_ix < v_148.conf.n_elements) - { - uint _387; - if (th_ix > 0u) - { - _387 = sh_tile_count[th_ix - 1u]; - } - else - { - _387 = 0u; - } - uint tile_subix = _387; - path.tiles = TileRef{ offset_start + (8u * tile_subix) }; - Alloc param_5; - param_5.offset = v_148.conf.tile_alloc.offset; - PathRef param_6 = path_ref; - Path param_7 = path; - Path_write(param_5, param_6, param_7, v_53); - } - uint total_count = sh_tile_count[255] * 2u; - uint start_ix = offset_start >> uint(2); - for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u) - { - v_53.memory[start_ix + i_1] = 0u; - } -} - diff --git a/piet-gpu/shader/gen/tile_alloc.spv b/piet-gpu/shader/gen/tile_alloc.spv deleted file mode 100644 index 25a362cff49d23cb4b5a41581f9c372dcda883c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12352 zcmbW5d7PbPb;sXi?o0v+O9=ZCXA+hGNk}At!~hx+n4rM~Ned`?b7$txipZiUyMPPW?enRnMbj2r>~2@PS!y@2t^3mN_r32)o?PUgzBA`@e&=`2 zbI!BB?@ZU!dDF6NN_KEIJ^Rv>EPrNYQ(>}fM%I(-{&knHTQt!cTC{Z8VgnA%y7D+< z4$G!x-RKfBTB$X3>_xtY98P=rS%`^@SGxF_ihd5GMCJZ<<^J{S``2Am9vmAhPqenz ztL1vFQ7sSE#;b#^+Lo$u)04PbW3WCslm>JYk@lgR+B2vzxVbubZF#6VHZgy4$;ynr zc2hCSV5n3XAvMhOypN-mdc8T=rj2g@>Mt?oESsoZZ+M>LF!X43w7j1yF8D<_t5s| zm;>_6$mU>g9j~>jubL+xe%|(kj$9`mkgF>@342SWp2zlrH&j}i-%=gU$Dr-Ae+;E; z9`?psy&5svK55L!*yfRUF0_3H2AbQ{!MQ(L~0Mxhr{{u53PfoH5Fu4sO?kwC7&i$2pEO>fFma_l9Ru>GJJfg==4*55iYzG@9XvT;6f5@k(Q&a6~TeM04d^ zd7p23K9-sI+|Q%UMzb~)8TZf6A@~RDwXyI-PVdH?AE*uP_qB6+Pv$-@MlRwQ^1iwC z&TO~kXcg|rI}6{wW7pw3qVUxk!^zmY^Ks0^$3i)J>a@8dbIy@Fo;G(>&RKo`T6@p7 z=Ph=ZzV=wWbA2zh_d@D#?*;F2EAR@~cY0$i#}z(y)+>C+<8z$9sGWDUzM)F15)FNa zeHgyI-!GgT20JD>pFz%n+B;&8fJb~EdfVbQeh|F9z>mr=Y4cCX=eU$T3y%BQJLNaA z(|h81Y!8JOxbM)t=#hGPU30KfzofAR)v7fcm#l%t-<`dHE>2zE$Tof_(5AoM9C$?| z+q{TB->+WIJigbzfsXk~?}@+N&*$~Ng?;&8wNY(PQ9egDHU9VC2v@eYg`?>?{o@@fU-QI7YabE-E@6L9@ z=Y5`-9Ef+uod>inWp{Vj_Z(n%XZM*SA4_|L=6w(@FVx;bFkRV0aE_~={=XL8gLucT z#p^rJXI@+F<7kp|X`FYtao(NL)_Yved#;~a@lLgt@6x%5PuP8JGjG4Uz-jF}UWM!I zk8z#-kxTnyALSf_H4aCD^F1Qx9TuGPBIlhGobL}gzvqH;{Bpes=Q~60n2f+?s?tqTtphocFwK*Cm{D zB)1{qyyxXMCY<-Y+?5IE{VsQP!a2Wkn-b2uTy7}gyu;--C!BY;Ts`MX`|jshv$(H~ z@o`Veb>+B}-`(k++yn2Pbbq-=_o7`R>u7tIYPz**jvcMN@*i`M5^w5Ic;JqN}!qhHSP zoPxHky!}2E9rets|7db}hI$e6FGOpb-?1-3%bDjSV&mSMLq6{)YlrBw?ozaM^sg*z z?=U&%{Ty^JG6gxau+J@Q+b;(jYhL~8*C9S(uLP&HpmV+a~;9 zE4b*-*Rg$XM1TGiJhQM}ufIe*8}7HbCmi>;;IystFKEXtXC3XA3Qqrjf~O(oi#p!} zyXUnX?-U1;?L0sFbv$;=%h_Pt&qbpB1;!&M67_ntW0bSr8_>3uw?Epe3Qqr;0^26? zt%vhY2>UW^B()IP$qhe$%G*cRK8sJM8Z! zwtMx*iEW;rB(`hqH*Vxt`;D90e#fS^->#|ccWP?;otoNyr>3^wsj0oQ!@eo8z4!fA zjrcxnzg5Gw|9-26t@c|rwf#;_ZNE{qFJLuYJJ)#yE9yS`GU7W?{t?z!?zfOHBi|?f zx6wX7!YBV6BIlkE|1_JDW4Tr2^sN6LvKR4wKA(2(@83t{?SuP8JL3NU?0t~N{~@-# z@$M(>RgA*-z#k#rXR8tK4RzG{W3c|P{{(FNUG&X<{23zW{j#&*{v2#OeH-X+FZwSK zdF#7>wO5hB`hSH)eeb2H_1A^(xgzi1V9Sjl=5gQ4iQV(*c>fODenyRNf}@7-!Km@~ zV0}+h!?pSc#J2LT*FT|sTrYk9h{(BK;>h!_VEYsHx53trIr%rRG4j!;e+R#WIN$oC z?SFvv#kjP8NWG}5*5|plt=Kai*z+9Nvn-C>{|$Do9xulGKVZ3Z?*6wU=KI)k-t7+; z?S25Ztv=`JM`$1CN#751+@2?K)c+~id9lylBif#wh3KClixGMK-aB&c>wD2ZN93Zf z3&0$goWGeE?Wx#uzISG_zvQ~K(XOxl27UDIE`jyMTxv(HZm_jhBKF-pJ#g|-YZ_QC zt>wFg-@KOosC5unU#yjO)H)b!t?fyz8F2DZYbIDOt#ybtZ4N^8N3BD_`mE(%(2iP% zfvvRziE$kcCm*$Df#uR#M`+W=TKc2bY_L9Sx!1JatL_KSh}t^N_mTMAv$6k=!j3b! zsfhDVIT~Kyj-uT$V7Zyhs(FqD^PBG(bGpBciFMQ;HID;(HlpSU*iqAO5#y|RB3NJ4 zJP9lpHRpi&&1;&|n#RO>>W`YQ0Xy$evllyRZi0`Reen9C=3KB`)SL(AH?L_l zsy}L;40isb<|)`wGacut@cN?WX<)giIUme#Uelb`G$z(pf7Cn!?0TBZIXn|vF8m9? z&VBe7V#|ep5!iVO|5?~_;a?1POyOUGEw_R9)?E14f~{#@+v<0ppN(FM_+4WE?Yp+| zk^3C5wZlFa+p*k8d-vFKY-8jjW(C;(g#9{f#}{o@VjCmx`_Vk-fz72a#&JGa-hTP5 zq-{>;@ItWf^RQQA8{@dvpf3_5#@Sxmn27HO8?WzTw6=IH+I!Qpc?sCFss0{y--KR= z_~^g1@T(*5o5ALY_v8k!+BoA9N5uV9-$`ZOR$j5EKsG3IwYoeOpJ zZxHO*qkltSIlqTukI1Ea#Py8!Rj~betXPu~Y`I~?HL0O}ToZkp5jp!Ijy%_b9Y=ga z*1>Y_CHpaomW#Wr368tW^Bj4`;Pu(2fz}pZhj!1c;GS?j$FXOj&1;-~-!tKDPhXQAge{g44Xu zV9Q;g6mue%&WZi?E?JKFzbD_FGY}uY-Or_+d+K1sdNa|kv44lJKHw}f)7jZ;b8lG1Umj# zb|KjD#2D6q9Ygw_xCq;v-!8^4^s{b0GU*MiLx>wF2=82N~KBiKI2 z9r`A)aq`~9m!f^#WBS%1a*j(J<9IVT)*^DRhjR{MZ8m_7i*e|Wam2og-0H}48Q5{T z9^zOJ_sX$|&ryi$aU}Za0z1#gAole*wCy*dqy1Fy9K`2%B-)=)VB4RV;FG}SdJWq3 zwAK}fak0Kv6}Ek!4ZadFpW`%EJI3`^u=(}bPJ0Er*m1oLacs8H7yp~P8m!N?c?a6Z zwUK{2B4;1P@s5^@nia4$!yW)Tt~mFDVApXIVyu0WGcG-+&fOV^ev-Dw5xL(ebp zDd^J?`*kKd=4uEWbLU)+C$@a6!=6lR<8Mf8<8SJ)cO|y*wMpJM0Sw($<$0`|-pZ=C+x;3z2P~J4btks@^u<_j1jkt2H`+SYO386RW)0N6P>7fJWp z2eI|p#=WT>eftpDzM0p2`qT5d5PLD=vk38gPD7uCcn+7K;~d@&)~9_h`o0eP!4CV; z4*RLZeg^z(VaNaLKZ3mnacp-LI_B}C*!trf3bt=y{~FjF g{vXYK{p*N1Y-g-~>$vaq+0MA%K;(Syi+%6^AGqr~cK`qY diff --git a/piet-gpu/shader/gen/transform_leaf.dxil b/piet-gpu/shader/gen/transform_leaf.dxil deleted file mode 100644 index 942718668d53d2cf46a56480fbbd0f519ae8392a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5668 zcmeHLeN=#3s3x*4sCh7wxb=VGv@~4a-3Of-dZz% z%pdQpb?(`}z4zJo9QHovcZ0rIt800hvTWGLS>EOH%-*(i=LG}+FnPH? z)#!17eUcS1FBm4u?z1A|X0~9OhpY(l0LX`P*u9D^MlD@H+Ia{9UZnwe3hEN0@_6*d z27GVSQ5$^RvbDJ-Aq9=DVB66Gsx}GmoPZEn4EqssyPz9+D^rps&S1fVhgEhOcT8EOvxH3l2stYkep_pw9Tm};+!PMIrY-8WM$9z$w~VPTPj~9&zG&1=xwOJ z{&gUMO9o;|mDjPu=twPh)RkjU7MwH)D+h(Iz$cjXijrI9;a1b!O1E%(K&TA}+l^e- zVUYNz$J}bv+*gd;9fQOjid!@7X%xZ;)1G&r$-{*{m7_u!XC{JQvv`)?EsxI8gF6h; znmMnwzsz57>>`2Jy#PUJzX$*$e-6dfGXUrg<0KOR_N-vaUn;DEUl&CKSW5mToieBa ztS5g{j-O%xJp<@qwk}1mCq}eNLq!E{7cX|{C%En#j@E=T2A&H@bfT7=P>B`^k&LSM zNa$jpq+Gpcu4JRyJlKlR*_e74!Swi}*TMR5z*u2efjbu!>rJL z1KYb0O zJeGX^;+W#@W($)g#_=0&>atd;(}DTtH*T#;oo}6@dIu@+>d6C#4)M^C$~B zzN1)YnGtPDK(B?gP)qYKw=v-p=(;uEyp5~#aRc(sie?9n4{a!`c z$~W%hrkzDHvE)1~r5H;s$I@ysp3t4{wq|&&i`%W4UTc>62k@JoPd5MlPIK-W=pI9! zaF2D3?7MXJ*vDNfc+S|3wTo&>Drz=W6q5V50Sk5ZyHRUX z|7FdBv^A+|G5zs-#<~V?`}T3Vx;`Fi7=j_LuHbbqhA}p6-BDF%*!I%a9o2=F#Cux% z-Nc{P+|6AZo0b+c$X*?Qt&x>*Od^@^zOnGl5+vKHOSkp9v<$bdmRsGroqq?+`j!&@ zzzCE0+K3+CylZXYo0S~Dn^iV|W%ui{U0C)&t!sygTTgQ9@*E};_Z1UXQ|mC%j?e*- zaKbGdQ_k9|oONqbIB5_jQGN4n%EL$G^Lk1mHY;Dv?6zhO>M}a8tZr+T4a*#ul++lx z_2t~Oaz_<>G7gj5VKQ=S;UIWX+67{?2wW4qK*)#|8gH79&GDg16!_@h@ip~Oo2 z+rr1&!Uz1%uS&u%%ftK3^DdVVU$}J{{kn`+|Fg33{NLVcx?S?KRDfUe_~KVb)O>n%Tb+#LpEEA{NRj*OS;Z(H{l|=- zGb69`{-6pY-_%%FpU9dxeCX?`R7$$z!iJi)t~a+H(j;v-|Cjp+l|za`(GL2sxNchf;Ya;`elb6S30dD%pP4fRK}{6U>I%!wmeN?p8sNo^5wD#K^OJw1@jAI zwc9xIOqL@D1k;>Z4J+w!&g_8t0B3H2w!exqrPG|*_lPq~{|je|q95Z-)&DJLvUy94 za{4(_91S^B96iICF36dE1(e&zSEEtDO8VRk9w1G+>~+Au%+O@arZ~#BKLfQLthZlz zyWQb_NR`_g)aIn6l;oBs3^&J}d#Upwub#a00`H}m*vPKRBAT?&$6DQ7jWd3kIST@E z*quU|T9b2Hln-R7q8m!RZGI)2+#u@mvwPolhQ$7TLs~^tM~Rcnzj6Es9VMYVu8J6K z6c`t&`z2l)RsR3vP^0m?qEUGhdqig16GX|Q9?>hmE@b)xi}jRFC8z=Wg~>MM_^XWn z(~t$mejS3q?fRS7eGx^O8Neh^u>4s5lswL#K{k&FXqOprkF?Q~aM?ozxfy#X7~Tw= zhY*j^PtELIm;>kjZ}}2Z))|eIA5>N?Bl>8_mpR5K$Wqp%H5BAk^~`wUXfsyI%e2Z>qPjU1C1+7V8=;XHE8pqaf29w?x&ciRLo(yxu;ack zmvlZLEgGErhkqc-y+kxYz^U%*=8k|kXznADL=io%o&uXg`HoUNCLf3!(ASblL656T z;YT=0NFrLymIJj{61^AZs00soZ~t)l1^6FGDOViuE%+i z*?s;1!vS5(!U5mxK~^XEOF&l?KVqKql|P>NQ9Q-CX6`*!7miP4cq6chh298sBGkKR z8?dgeZMZEkg5dV{ZiHVdpR+%WF(>r*Oc6<~4eN?;8i@{d@Qb$03N`*m@TTayc0^Qx znQ}XO3B6n34ILB8C|?SgcKCe8p#W1Zr}#x6+#vL+IbP&03(7&aR%t*y$Yc;yGK5!w zW3xdRI0=pPQRID{5mEGeEU8S;M$J+_1#lm;%CUaL3g+gDQ4g`55n(vcC_Ohcb05+p z4MR2%r<-f+1Bja9$JO3zFp#@SlI0+Nh$B_00fl2*BD`ja#Ux!pGNrXh?sUl0j_v6U zOgI}d;)OuSDPMSzJlz39rjsMpOg1r6$YkRqNlePEy-oAp-ZemC4@N}9)T#OW@v>#o z{)^2jchkZ$Ix?ENFEx8FlK7HbdQ$bGV7%OmOQ`}9Om!b7fV96Z*14K1|vL zPFlvqo3!67K#P6eInRjrFkz{nVZwI(IlSF6_EcXK8+0q!@@L+{cGAePZfMC-!fbyH zT6A+(y(sdA5$Dve!Pk?W$nkp#N56<-Qux0*HOB`lVFygK66W^Tbcar5Kqk$D!V7AS zI#`=2fw_H({MG=C9n;1^i{GeS1dY=xZokKQZbCnlW9k)bP!nhP98u8%J!iXo=Ulk> zI6*fV2XbfclwV+Bp5QzSW9sCev#?w+$ikGrlmEyiTzy=A zfrVbd7ZBQ48|lcsJ?bSjX7esr5cZ%rnV3>JU`9WK8Ko25O(JraOFdeKqikM_rJd$o z1NAWRKS4LXaJ&pfBE{tWR+H}{9<<~P&Tf>NV0VxEguSrLMSc+eBygG$c*!JuFOs#Z zD%9<$3c#Zi&&S~3xYeSBpf3(*0b}u7A|;ry#vP%QZY8TQ{^n4ZT&hySa<;}S*{n;o z%YM;jdyV^%3=6vZi_W0uv6;GVZu&ZIH^_o@ZA{jXxniOu*(`k>{6rd?*vHGK1T&|i z@tNUUDd>;vsTjiL>Bxwfc`HUu#Di~4vN8|q5uR1CfSCs8Sa8R~0;BLFm0fmSqld58 z_a3RWI2L^E4sC9!TfIleY}!X+O+U1k96W;}a+E zloOu35E-^2iL+>K8F)3hZPTJ{@5kE=)WGaQ@h zu*R?Q;gy&(sV~0CMAaoGY*UM}<@?h*`wvU5$TrxX<0mp2LfH?9P%n*1H-cVQ{*s!4 zDb+j8P4o8cO85=DDqw8Xkof-_*RPilTQ5(|VNGnc9Db7@h zPv^7;xunUEwalv*oj$Sv`7PO@RdlRUES0a%eJ?QSu2!)tWTerk_^?!Qwrck2hR(pG zeTzQ)(*opoWT!>Z*{ql=Uw`>{VA4NZ6rPZKjUL6hBZ`lzM5iyf1Cu^qbS^3$IZ1Z+ aD7uFgsoZZ+>zG}=78Z$l9UAyw=zjp+j!`oJ diff --git a/piet-gpu/shader/gen/transform_leaf.hlsl b/piet-gpu/shader/gen/transform_leaf.hlsl deleted file mode 100644 index d3347a6..0000000 --- a/piet-gpu/shader/gen/transform_leaf.hlsl +++ /dev/null @@ -1,235 +0,0 @@ -struct Alloc -{ - uint offset; -}; - -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Transform _225 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx }; - -RWByteAddressBuffer _71 : register(u0, space0); -ByteAddressBuffer _97 : register(t2, space0); -ByteAddressBuffer _279 : register(t1, space0); -ByteAddressBuffer _377 : register(t3, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Transform sh_scratch[256]; - -Transform Transform_read(TransformRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _97.Load((ix + 0u) * 4 + 0); - uint raw1 = _97.Load((ix + 1u) * 4 + 0); - uint raw2 = _97.Load((ix + 2u) * 4 + 0); - uint raw3 = _97.Load((ix + 3u) * 4 + 0); - uint raw4 = _97.Load((ix + 4u) * 4 + 0); - uint raw5 = _97.Load((ix + 5u) * 4 + 0); - Transform s; - s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.translate = float2(asfloat(raw4), asfloat(raw5)); - return s; -} - -TransformRef Transform_index(TransformRef ref, uint index) -{ - TransformRef _85 = { ref.offset + (index * 24u) }; - return _85; -} - -Transform combine_monoid(Transform a, Transform b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -Transform monoid_identity() -{ - return _225; -} - -bool touch_mem(Alloc alloc, uint offset) -{ - return true; -} - -void write_mem(Alloc alloc, uint offset, uint val) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - _71.Store(offset * 4 + 12, val); -} - -void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = asuint(s.mat.x); - write_mem(param, param_1, param_2); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = asuint(s.mat.y); - write_mem(param_3, param_4, param_5); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = asuint(s.mat.z); - write_mem(param_6, param_7, param_8); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = asuint(s.mat.w); - write_mem(param_9, param_10, param_11); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = asuint(s.translate.x); - write_mem(param_12, param_13, param_14); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = asuint(s.translate.y); - write_mem(param_15, param_16, param_17); -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _286 = { _279.Load(88) + (ix * 24u) }; - TransformRef ref = _286; - TransformRef param = ref; - Transform agg = Transform_read(param); - Transform local[8]; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3); - agg = combine_monoid(param_4, param_5); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_6 = other; - Transform param_7 = agg; - agg = combine_monoid(param_6, param_7); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Transform row = monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - Transform _383; - _383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0)); - _383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16)); - row.mat = _383.mat; - row.translate = _383.translate; - } - if (gl_LocalInvocationID.x > 0u) - { - Transform param_8 = row; - Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_8, param_9); - } - Alloc param_12; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_10 = row; - Transform param_11 = local[i_2]; - Transform m = combine_monoid(param_10, param_11); - TransformSeg _423 = { m.mat, m.translate }; - TransformSeg transform = _423; - TransformSegRef _433 = { _279.Load(40) + ((ix + i_2) * 24u) }; - TransformSegRef trans_ref = _433; - Alloc _437; - _437.offset = _279.Load(40); - param_12.offset = _437.offset; - TransformSegRef param_13 = trans_ref; - TransformSeg param_14 = transform; - TransformSeg_write(param_12, param_13, param_14); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/transform_leaf.msl b/piet-gpu/shader/gen/transform_leaf.msl deleted file mode 100644 index 01fefd1..0000000 --- a/piet-gpu/shader/gen/transform_leaf.msl +++ /dev/null @@ -1,289 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Alloc -{ - uint offset; -}; - -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct TransformSegRef -{ - uint offset; -}; - -struct TransformSeg -{ - float4 mat; - float2 translate; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct Alloc_1 -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc_1 tile_alloc; - Alloc_1 bin_alloc; - Alloc_1 ptcl_alloc; - Alloc_1 pathseg_alloc; - Alloc_1 anno_alloc; - Alloc_1 trans_alloc; - Alloc_1 path_bbox_alloc; - Alloc_1 drawmonoid_alloc; - Alloc_1 clip_alloc; - Alloc_1 clip_bic_alloc; - Alloc_1 clip_stack_alloc; - Alloc_1 clip_bbox_alloc; - Alloc_1 draw_bbox_alloc; - Alloc_1 drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Transform_1 -{ - float4 mat; - float2 translate; - char _m0_final_padding[8]; -}; - -struct ParentBuf -{ - Transform_1 parent[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_97) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_97.scene[ix + 0u]; - uint raw1 = v_97.scene[ix + 1u]; - uint raw2 = v_97.scene[ix + 2u]; - uint raw3 = v_97.scene[ix + 3u]; - uint raw4 = v_97.scene[ix + 4u]; - uint raw5 = v_97.scene[ix + 5u]; - Transform s; - s.mat = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.translate = float2(as_type(raw4), as_type(raw5)); - return s; -} - -static inline __attribute__((always_inline)) -TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index) -{ - return TransformRef{ ref.offset + (index * 24u) }; -} - -static inline __attribute__((always_inline)) -Transform combine_monoid(thread const Transform& a, thread const Transform& b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -static inline __attribute__((always_inline)) -Transform monoid_identity() -{ - return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) }; -} - -static inline __attribute__((always_inline)) -bool touch_mem(thread const Alloc& alloc, thread const uint& offset) -{ - return true; -} - -static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_71) -{ - Alloc param = alloc; - uint param_1 = offset; - if (!touch_mem(param, param_1)) - { - return; - } - v_71.memory[offset] = val; -} - -static inline __attribute__((always_inline)) -void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref, thread const TransformSeg& s, device Memory& v_71) -{ - uint ix = ref.offset >> uint(2); - Alloc param = a; - uint param_1 = ix + 0u; - uint param_2 = as_type(s.mat.x); - write_mem(param, param_1, param_2, v_71); - Alloc param_3 = a; - uint param_4 = ix + 1u; - uint param_5 = as_type(s.mat.y); - write_mem(param_3, param_4, param_5, v_71); - Alloc param_6 = a; - uint param_7 = ix + 2u; - uint param_8 = as_type(s.mat.z); - write_mem(param_6, param_7, param_8, v_71); - Alloc param_9 = a; - uint param_10 = ix + 3u; - uint param_11 = as_type(s.mat.w); - write_mem(param_9, param_10, param_11, v_71); - Alloc param_12 = a; - uint param_13 = ix + 4u; - uint param_14 = as_type(s.translate.x); - write_mem(param_12, param_13, param_14, v_71); - Alloc param_15 = a; - uint param_16 = ix + 5u; - uint param_17 = as_type(s.translate.y); - write_mem(param_15, param_16, param_17, v_71); -} - -kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _279 [[buffer(1)]], const device SceneBuf& v_97 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Transform sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef ref = TransformRef{ _279.conf.trans_offset + (ix * 24u) }; - TransformRef param = ref; - Transform agg = Transform_read(param, v_97); - spvUnsafeArray local; - local[0] = agg; - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3, v_97); - agg = combine_monoid(param_4, param_5); - local[i] = agg; - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_6 = other; - Transform param_7 = agg; - agg = combine_monoid(param_6, param_7); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Transform row = monoid_identity(); - if (gl_WorkGroupID.x > 0u) - { - uint _380 = gl_WorkGroupID.x - 1u; - row.mat = _377.parent[_380].mat; - row.translate = _377.parent[_380].translate; - } - if (gl_LocalInvocationID.x > 0u) - { - Transform param_8 = row; - Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_8, param_9); - } - Alloc param_12; - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_10 = row; - Transform param_11 = local[i_2]; - Transform m = combine_monoid(param_10, param_11); - TransformSeg transform = TransformSeg{ m.mat, m.translate }; - TransformSegRef trans_ref = TransformSegRef{ _279.conf.trans_alloc.offset + ((ix + i_2) * 24u) }; - param_12.offset = _279.conf.trans_alloc.offset; - TransformSegRef param_13 = trans_ref; - TransformSeg param_14 = transform; - TransformSeg_write(param_12, param_13, param_14, v_71); - } -} - diff --git a/piet-gpu/shader/gen/transform_leaf.spv b/piet-gpu/shader/gen/transform_leaf.spv deleted file mode 100644 index a0081bf8bb498be78a7016133a149c7f88edfa5b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 13088 zcmbW5d7NBTnTBuDOA^*VSQH^t69NGeNJ0W+B9I{1i3So9R0Of8?yjUuy1Uxy>MSfm z5L^)j7t}#iR7?~Rg9{=kGK|j3=&X*+g1?`$@0;6<&OFbp`*okb!asA#;k@tredjyh zS?{gxn!RXFmd(lz%I0QI&dT!Vh-@}WmK~h+=5_z3ZJW-VXb+vaYV~p*4$Hc7KYb3T zT@Q6PWwh29({Koeiz!XYd}3%jLIa$CdFdj?Z17VE-SpnSsoKAJbN{AGs)OU>)rt0= zk$QEcF;=e*HCpw-c4KE&fv23BO}d0>RvLH`!u(2pQyJp zl!F*gmbIIcgTvL)`sk90c58C5y)19EY;whlwUaB>>ernef?utOB|mgVoP)EY;JaFl zcKrq8x3b8mJke5xOOwVS89 zPvj~8KmhIfWin7w(<9u{wr&HGodizaTq@dayY<;IZiVM2=G_O?O((XxftJvWi%d z%WCjI-bZ(4vg5T@ZM5L$m-r>{OY5V})*k1$H=^;WCd0{3$I^!MBk zjMT@5rdnw0$!*>&w8`T;XZ>&=@s-q#-Qw=-8gQ$&E5DE2r&aj2!TMPJqRFZAq2@E8 zPHhaw>|+c!3Ncm$cg*mW!Ce#h*})mI+^Ngk-`wF&Ump8*>drcLW%q(3*9XAqobD^} z{U!czi61EOM@sy$5`VnJpD6LCOZ;0U{_PTfro_Kf;@>Ut@0IxXOZ?dxyes=5-22(} zna8H-%-h`jJ+fn@+CS19sEuqK+u0nfwHwW`jTaZbN8;N#L*3Z|aDGy;xtHWVu_s;c zqtIT~9NXSt;oYNhANS})<9fHql&)#4S|6#8atK+}@OSTyRmW@X;fegBaZek^eY>kM z)E?%}t+pE@^}J2T<~wP)-qJ>69-2c zGst`9T@r)`~>hlg#VT^qbEtj>9-?fkIM$fslTJ!(EPbM4rCr)q1AZBNGD zlaFHoHWtd!(@#h9{oPqx`|fB*=bD*M=i_^~GjFlGv~|XEEH=MeI(s3tcXE0WcGsz0 z+c9pT!P2_VGxY-`A``&RZ12f`xXG1f1S2hZ7HFx3Ln}4^`&uf~k z>-t;G$#E|n=W`ODsW&Zg{l-x9iEj+KQocW?zJ+?SU5Hz1EiRMkwNUe6j?p`Fx~I_0 z`3`V8|98&dUD@66&Um}B4};^KeeVpv?(Du9yeGRKp6BQHB>g+KeWCH4=Eku912fun zXOEWnV+rra-+S46%lmEv5iX(j@jK8bG@9jF>gT ze1i9NXwF{`p3bE^|31*{`7?fe8yMfm_-YZq22SJ4jc=az=WiVE$>S-;i}y{$Q;YXY zS)=KU@#|L}f4oy{>-Zy{bEf8<7@F@8HSe#`9J^YK*Lc1=)FNj!=Uc5piMGBk)J{$` z-xX?063y|eElo6YRa>5Dz7y2^aEd;BAE=#|Xub#3UYcl*Q*B+MdFQKLoM_(lY8w*G zJ6>&5qB%!uTN2GXUhRrR^Nv@$GSR%-)n1oq&ac|FiRL}7Hk4@I-)h5&=Dn>pl55>l zcXO=S(X5Y;K0fYIwXPg@=l6EHN4>}Uu-Pm>j{w)me&qfhmz#_05O-I^56$*!&b{q+ zBDkL_lnwB>w<6Er5yw2$B2VwMG*5XSvBLfhxOq8m3pfd$g+8p>#YfX;^bgGzVsS5b zH2*5l+1qLreBHlcKZ*$M-9E-^A7cIW=_7(;w2#o#jqmzLeAnH0YQ}NgVs-tG!T$v6 z3guY(Uq~(YtjbR;xcw}ISMoZ`^*(qXT+Mj)y_h=sJ_T&wYPLN!uRCoW>ypB*?s!h4 zj`7%5yK{LKbtNy|*-C0-${o+y1y;A5hQIT49_0kE=gfQ-@_+AOaTIOl@p5YW)xNgi z{e|ZGZ2(s&vnXd2d?VO3w!LT6_QnnSWndq<`c{f_ANH%kwlT-J@0~yIak+7?rPfEy zyyP{CTEra$`vh-*BTxHnQd0j(xOs>F4Pe(g_+D@i#s1$-?OeMjv^x)XQH-zs9%}9G z5&8WEcmL>b{pXY{yXC^*f01+V-8p&Ozl7_fZV@jf17ki%30$kb>~d(`%X>!p#_e$vYwANm)iOFU8uH@|9r=Y%SE66PbtTFK5h=)llE=C z$56X|YK}+lT^VcYn!E_^nLZx-B5(!lm~4MCwQHmvYrGDeQS@I=t-qT77g4(()a_61 z_pDmX=f!aTW%_R@Y!!IK--$H>=bf0)vucLTIEa$MNjOW;V+(SP>asS)rJZgW_ zJAZQTxFZwpedBMA)Lu!rbMNntup7_c9l?#~Z;#;GS0-Hh>V(_hxh1|P;kNg8M8wnI z-x4YJw?xW^O5EQPson4Rl=~eY-2U!Pxc&JpA9lIl^C|axKKK&se$NL#1@55G4ppcSWTOIsgJUT zSQXlx1YSg~uHC(-W?Sv2fYqD}=l)c%`_FOcFV{!xK5U@>l>L4h*nMyPBVwIFy_Djk zeOY0*j6Ud`9e;CXDUI{i|Yxl*a)R$3wv~Mo#)-j$fU}MC-xE!oDpW?od zZ=x8>*y88V{Z6rLg~!;ofy-mN0!=;odllIJo-WSYtHJJT^@obR`5Lh8wEdVdzLwg@ ze6(FjQ8OQL^nW$D-2dy))T94vz-j-lhugpUtwl~%ug+b*dG*>u?oV z9=1WSnzjLIdD!Y;*H_yRwLJFpcCdTeHrn*jSKT#?aa+I8I_$t!o`+#H^~j+CHg1gX z4PeKopMA>pu}}Aqb8T&V@8s*i&SRW|5wKdE18r*QzH|?jb6!Kd3h~CUEvL5cQLsGL zbsXHJq-W-haPEoVhgMHjH+9oJ!jzb)=uLqZ7-+-nb`{72gn)}3e&Q0Jq zQT9;0tF=2Xlhij;v|UbKemC7xXu!NrztQ)iImWo><@(vi@2p$F#^GPy#_#qwLu?cC zE7#BYjd5AWxNZZN_rUFF>M>_;0jtHFy$$T+oN0S2Ma?-AN9?zQ%dy{qrXF+lPOzGD z7JKepaL3>p-$5-m<_79_Q`~>H3;*{Ny8d^9<#A5#0((x3qb>H0x^ohFTWhH{Y7mo?~BjEwFiF?JTKv& z13O1?pF9NCM?JpRJ`Yw;ztzKDE!09}F9j+Glf;KhV$3AyY{+!$7Jc_@e{Qf(P;^W_`&Lt-1FeLE~Gd%+i2TIJoR-HZN3{WqW1CJuA^R0Q8P!eb0t=b zK3@U0&)}~FJFfT*aw)i%xvA-w&iyfPzlnYP`;v3#??(R~DRwUWo2mUDPwn|s z+f32Ny%#xbDfHl%!)+VqWgA#6Jum*<*>>9E_sy%o>aMdm*0};+MDgjPM6M@+&F^ID z_}zUaI9<Z4gT(jTiSJLi{tqWy{{todNW%4hEaCb;UgA$AT>qyNuK%}6{M!lF|CxmA|D6*5 zZo>8dUc&YNeu+O@@VE~@0(VZ`AF<~5fz{(a{3uv0?nCETE&M+QR*U=a<6yP85AO$m zg5rAFkGgF=E1#rjbAR5yN;ixxc=@V*HSLtM{Pgi{nWOL@1)OwkD}2jOZBig$pyevT5ksz({~N zs=t2PBiC<$(_Ei~t4&g3UcU*BT-77jr@-1G*IxsxMXtXNj$A!Ia`W@-JXv7RjqM`W z-vFDd{`zT;Tz?as=K5Q3wHqjr>u-b2)jKTRufGG=7PJ#ggu3|MZio?H77 zduDAHx&A)bT=myad*u2D;564igsbhPM6Q1Xj$G6I`p4+nBG*3wt3|GV3XWX8H{|B$ zonb%XXQ^!$x&9g0T=myad-~m4fiI`{oJH||UP0|${{!l{>nq@MC_ZOXyz5s|uS)Q0 zuq_c~^Dk)S9RC$fJ>vWu zSk2-%Eyl7$j{gp}KYjNWdH)C4+_go2{|R=!ofG$hT)(jY7q~pGAEBvlW|dB){xL<( z;$uFsZ+`-IJaSI{zwsfVnSjQ%Sl~Posv_q+Dp;DH*)KbL za~L{q!n`Ugo5msMP1&oKCwehkKlT95TeX^YK%Vw?c%c3PCf@xV9E+U`o4GKPFrzTD ziE}UedC*V1x4oBVzky9`e>Qu0ZyeUQN~h5EV*uy@51fPn)P>nvc2ENPIv}r9xvokD z033&$pNaB}9SD7VV|0U}7tO{9S?WH-BACa+gCo#AC5$AKqul4GJVMAMDL9$yD|_6g zaY!gtmhC7AmMOjHyV z>220T9LzIZ#a-3}8UV6kANHF9O?oAz#cO>K1ni;!cm#5?D2)%bb|<<&>WBpv*DS3L ziCDYa?q@loCAKXBJ~n`fOj5+hMRn13;B0XmiUwvGWr;n|N~t&Q1+7(=40i%=5&I%J z7utzfLnW$6jCclD>`wMS_-Ju}^iI6=k3Hkjr|Ntygz{+Pq*UDz%gcavTSGXWqHT)G zX}s&-j8~9R%6&=4V{JIkHnU++lHWD{+H~y-OOik&%?>S)s4X0I&0ZjZQwD*OEiUU; zy)~)KT3yD4w*PB+!ioXmc36Ud?KFQ&3%`=$S2%>5y~Z}Lv02Y&8w_Ay+}Es1&fl)* zZymrU2!7RUOT7?Am~A-$T`hdrreahGI%I1Ed}#3T^x znUh5{N`ggNhutS;`Mapy>HC#NnL3@!pa zPlMQgPnc?WF1A4{gsXLAaRSv#$Wp(`H16y1`hWRD)eVgWg z+tg{SPRyrIS05H0(-ofuT-sa6#PwM*NRh|4?> zXi*J=auQ$h2~i$eE4`-a*m<;b1+jFUiO~e2Ye#5{n=Yqp0fHXAs-YSrVV17(@vfP( zlNTpO&i2eq_D#g#__>F*D;FeuwdY3pt6%P^E8h!esY~yH4F~(iZMOGzcGk5XxNw;+ zt?@hD+)Pn(_AEY?db-nxv!y{(>0wCGY1x^DZ=Rz6IQ_t^q~3uDJB%#>(vIco02Jd1ZfSd~>t7 z=m6|HIWyX4x^m^}WY;7NF>wqn3YVoHlqE0iOm922IQevEoSWPV`vup$Qh69|W3zY& zzD4Xl=fizogfn41V`1whNQO<7V(GRk8GiLPer4lk!2}5Sk`Ufxgo^~qh%R^d#n!O( zG|$t_9NN##=v8Idxf%W2>{|`|8k}FPu^J5g?FMeuHmiZM2KS4EQx4%69k7WGxHc`E z&J!*odcv0N1O%xMFv)DCX?8FRXYo7O)qsnYXn%J|7;)-AAPtOkeG zpyyY`@~dw1VAx+X)~9gmX41N)(OTVTOt>*EyfG^L#wDDhxh1`=VdJe~{hsnlaaf-; ztj8GMSB!n+P^I>&QX4(xTiq#6Rf>(9=ImYw2fb@b8{($5cME!?{2d1F_E>8bV-3En z65bxg#wp=nMzLFjaKq7jTm{8+(z+D{^+AuHtakfJ+HBQvf8wnrFa_pN4(+ zV*TgE-;)05#TNJcf{3b17oM#S#RaGJ3*L^;T6rpdRKR&B_1m<__q%Tuc^~B&Cy&u|Gebf8S>Rlrg z();yJ19`9B)kELErFZ14-u3*bcO^g5J5ls~dYAa$)jRerl||V+dKVZC^)4{_p5ED^ z-t}k+#x0O@qJSB9I~dd}cvR_LAn0Q#eBQcP!t#77$I?_&`~L6St&Y1yS?x}_anW)@ za!nC}8e>myc(_5-vW zZ}UfWhzlJfex_>W-R(w7R`*F>5vhN!V0d_Hq-e0W@5oxIH?)v{_88gK{z6H*xj)_`7{jC<-1F3I+ccUHC;8 z{-rJyQvd|R+Z>2yClK(yz`r!W(hnpha)qGT zIZL9!yu65YusMjve1J($yy#wRQ6yvcP%BbkcasjcO~GL(QoSKBxRk)w(It97;n=Rw zl(qNF?9dd*GAuh3Qf4_>7lX?E>z#z0*R+?U0B%Bf{75n5E>nUb2J^NIv$@4~mUAY0 zIcK=j7}&^ix+pKFi)c|ujQ}gU+yA7&?{eir`Mgxgt1I z!LEy9lp6%N{Yp9ypJ1<*6 zn>0w@0}weOh+RYSl5kD&VkzjOcx0j+5GeQh9POCDX%)EwXL7QTECB&fH#4|X=P=8I zvI03Rq>i~4a=S6;I#uQg$^S8=VH+hJHsMuCb3j7 zUV5JFJ-;u}Q6E=IMdtR&mmPB&ci82lr6?ydnz&6GUFTm&a@1#3P?0qw@@3c?&|Aj6 zT}Zpm`zAB>JaosD|1^Si`@5Gi?gTS|Mq?PKCH{dBgc8US@Z6Mn=ux!YG&WBiMJ7?4jUeZ1ew+i>^UPYOnl6{4!$7AJ zKmM@D6@oR%cj<>2{5TRjIrqrwC}xQu`E{|o9T63B>0DZ%120n@s>D3RW*QKllk+HF zWtxN=#KhuX=0WGSS?93P<5b2#=j?3TCsEB^)n&CIsGPn+oI!*8WkhD4Qmgs18pM-* zoQFBSOn%l1&DSg(GtfeB&z4B$N8;m>pqu{2L!cFOgoAT4*8jG{wrh*3R`F}vnW3|jem;QM-X&S37%=4!k*QGc{#cLNT zZn4FiAC)ZF)YHPA;)IdmJ*V)T!xSc6uu^eLvUVd`7oJ$gp5?zL-Bm8&43X%Nysoox Ko}7JJJn0{!*#GSS diff --git a/piet-gpu/shader/gen/transform_reduce.hlsl b/piet-gpu/shader/gen/transform_reduce.hlsl deleted file mode 100644 index 90ea55f..0000000 --- a/piet-gpu/shader/gen/transform_reduce.hlsl +++ /dev/null @@ -1,141 +0,0 @@ -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -ByteAddressBuffer _49 : register(t2, space0); -ByteAddressBuffer _161 : register(t1, space0); -RWByteAddressBuffer _250 : register(u3, space0); -RWByteAddressBuffer _266 : register(u0, space0); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Transform sh_scratch[256]; - -Transform Transform_read(TransformRef ref) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = _49.Load((ix + 0u) * 4 + 0); - uint raw1 = _49.Load((ix + 1u) * 4 + 0); - uint raw2 = _49.Load((ix + 2u) * 4 + 0); - uint raw3 = _49.Load((ix + 3u) * 4 + 0); - uint raw4 = _49.Load((ix + 4u) * 4 + 0); - uint raw5 = _49.Load((ix + 5u) * 4 + 0); - Transform s; - s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3)); - s.translate = float2(asfloat(raw4), asfloat(raw5)); - return s; -} - -TransformRef Transform_index(TransformRef ref, uint index) -{ - TransformRef _37 = { ref.offset + (index * 24u) }; - return _37; -} - -Transform combine_monoid(Transform a, Transform b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef _168 = { _161.Load(88) + (ix * 24u) }; - TransformRef ref = _168; - TransformRef param = ref; - Transform agg = Transform_read(param); - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3); - agg = combine_monoid(param_4, param_5); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Transform param_6 = agg; - Transform param_7 = other; - agg = combine_monoid(param_6, param_7); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _250.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat)); - _250.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate)); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/transform_reduce.msl b/piet-gpu/shader/gen/transform_reduce.msl deleted file mode 100644 index 6ae57e7..0000000 --- a/piet-gpu/shader/gen/transform_reduce.msl +++ /dev/null @@ -1,155 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct TransformRef -{ - uint offset; -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct SceneBuf -{ - uint scene[1]; -}; - -struct Alloc -{ - uint offset; -}; - -struct Config -{ - uint mem_size; - uint n_elements; - uint n_pathseg; - uint width_in_tiles; - uint height_in_tiles; - Alloc tile_alloc; - Alloc bin_alloc; - Alloc ptcl_alloc; - Alloc pathseg_alloc; - Alloc anno_alloc; - Alloc trans_alloc; - Alloc path_bbox_alloc; - Alloc drawmonoid_alloc; - Alloc clip_alloc; - Alloc clip_bic_alloc; - Alloc clip_stack_alloc; - Alloc clip_bbox_alloc; - Alloc draw_bbox_alloc; - Alloc drawinfo_alloc; - uint n_trans; - uint n_path; - uint n_clip; - uint trans_offset; - uint linewidth_offset; - uint pathtag_offset; - uint pathseg_offset; - uint drawtag_offset; - uint drawdata_offset; -}; - -struct ConfigBuf -{ - Config conf; -}; - -struct Transform_1 -{ - float4 mat; - float2 translate; - char _m0_final_padding[8]; -}; - -struct OutBuf -{ - Transform_1 outbuf[1]; -}; - -struct Memory -{ - uint mem_offset; - uint mem_error; - uint blend_offset; - uint memory[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49) -{ - uint ix = ref.offset >> uint(2); - uint raw0 = v_49.scene[ix + 0u]; - uint raw1 = v_49.scene[ix + 1u]; - uint raw2 = v_49.scene[ix + 2u]; - uint raw3 = v_49.scene[ix + 3u]; - uint raw4 = v_49.scene[ix + 4u]; - uint raw5 = v_49.scene[ix + 5u]; - Transform s; - s.mat = float4(as_type(raw0), as_type(raw1), as_type(raw2), as_type(raw3)); - s.translate = float2(as_type(raw4), as_type(raw5)); - return s; -} - -static inline __attribute__((always_inline)) -TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index) -{ - return TransformRef{ ref.offset + (index * 24u) }; -} - -static inline __attribute__((always_inline)) -Transform combine_monoid(thread const Transform& a, thread const Transform& b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _250 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Transform sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) }; - TransformRef param = ref; - Transform agg = Transform_read(param, v_49); - for (uint i = 1u; i < 8u; i++) - { - TransformRef param_1 = ref; - uint param_2 = i; - TransformRef param_3 = Transform_index(param_1, param_2); - Transform param_4 = agg; - Transform param_5 = Transform_read(param_3, v_49); - agg = combine_monoid(param_4, param_5); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u) - { - Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Transform param_6 = agg; - Transform param_7 = other; - agg = combine_monoid(param_6, param_7); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _250.outbuf[gl_WorkGroupID.x].mat = agg.mat; - _250.outbuf[gl_WorkGroupID.x].translate = agg.translate; - } -} - diff --git a/piet-gpu/shader/gen/transform_reduce.spv b/piet-gpu/shader/gen/transform_reduce.spv deleted file mode 100644 index fc8e58adfe90d8e6e7f2fa348f685e72cc7f5a3d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8424 zcmbW4d6ZmL5rFJqi>FFN3dqO}| zqTnui4hk-~1Xpmw4N*i8k+|WGyNDa&zOxGWeeZS6yfpBSFQ-oZ>R0#Hty}lr>Pgq^ zedi>}tYmJ|m&}`$q~H8xHcXOipY*1BaOm`*1Dmap16Lk;umg5Xy3%ou*@<~Q=x$`9 zSgGl_0@;c@jOwdQEOF>zXX6ce^5<;C^U(Pp`o z!0f=k+}J{;Hd5ZSwApG*m0HX4A(l-oU%q;3`D*5LQKKYj5Z1Nlfzw&&9{%l;j|a9A za()xV7ItrHTaT7GR*NmR$;^4lLiF^ag+{qJ@}#LSzAM?4@mU@Vl0Ctt`oys1o2b|7 zmB-IrwsCYNa>(-4og19-q;YpARuOTHJB2r+34BAB25+saz`` zJ2hQzIlrbn+IY)TCX2BOOpKH=<}sOlh(olG&+HXppPbn%!`_hDhh}zHawedOxXuK+ z)W(;iv%LG#dpcGv3|8yI#p+45jrCHoRjJobaxLdwg^sg*CwOhOS~rgON84}a{&Xkr z!alBE8?B5vrZ=@6Gf|!>G%FXn?P=aKuNBJG@>iynK1Bk(m1}j z^D85*aqeZIRjHQKIhoHpKVGhkjkji;5w8cI!wbbM`QUruxgbGLukU-AY?Z3vi1qvA zv#i23J!d<7#agW%j#y(Ze0s0jj)>)(F;-!CxV~xH*O%_oH#EXU$nI@KoT-m*v{bE3 zh9_d~82oUhG~>%+?i9Xet5`ZOyfd}kIsDt?lhxL~Wj@>Hn)!Uo`YN^2+}?ZAeJsGo zLHVkwQ^xs5v$J*WjPsq>3hrnopG9z4-QsrX%l5JupM5dA3weK*({A{!ZL2s|2yfKL zw;X5K>GN&7mnWKMKhZZ*Y!xG@@BH2H*?rF*x_grIEAbB0zN0MyKJhi^;@B9tJGmA- zS!@)2pZY$EuB5m-xqcg8cXC6AePf4xQ-^)?Hg->PJ9cxt&@45Itc zD=n%u)%L4TwTA7e*6N{*0{5oBv+U!ndgHvoMty4124;?Z1V5!bQEyz}`rIHF$b!fA8ly z=7+s6w%;K0<&5cke+c$L{K8%WHXrRP(DsqN5aUHZj(BqRW4}($H0A7@G0wPh_POA? z+MGQpIOjOdo)MfqR?hbwTyL9;{hEut)p3!toa@OAATif|Dz{&bvzN*(&2jEu?tmO; zu5t(GID4j?KbEnE{ZejKj6arQvDp&Vz8DSo_O<*S-2(3&^?rFrx1v2G*Rk&L{v%#+j+e96&buG&{TV#L;eQxx9@fD4b6C*$-eu!QUx@g@MPB)Fo$n<2fOEZj<`$f5*%R`)X)j@%pZ@K! z&CN4g!0R^t5@MLUcp+n=7MMvt9^X;M`9Ff}UC9kF*0&S>CG5?2qwP3-iM{V!6wrlred#@cMzXaPmEoVH}^G+IH&h@-IVtK{<_eKvOvylDUc0brp+r3yL z=Z1fOusQ5azR^qF`$2Hpu6=NOjNIzBZQpcW#9aaQbG&~X>vO&E9|?}U|Ux5Ifh*2>=mef|8i*NQvEG5?Qr#(5&!9PAgaZNBe8TU$BzqrC-*dRwon z5qsaRu%AXpoxcor<9jjhHlTM*&bVJednWS6)xN#WMcuYyGd&&ujkeDi5&urG@76nS zpNc#0{ktDq&i&YTv|aZ>w7JWKLOaSvdIcgG%Mt-R~XVcb~#*38fQ_5e%|I*oz89PzGdd!Me# z*}m&*a<+c|mW}cD8UL0ITmSVrTmKC?TmOwY+xRzi*f-~F=li#5#CQB%9risP_PuR; zExWuD{8r=&#QtzJd$hK1L*y@EjP^U)oNwwFs$@;%*5b97ycgMm*vqa)BF_83`oexc z*trq+17Nv*kc-=EeGu$CeH+Nfy?qFgcWvvhZA^V1M&$Had+j5L<(>WrvJjEi?^(%- zJs0o5gPixr(4M>cKH^@7{siKu|C4C_>e$Ptz{a?awS1qSLFBB_L&VVjIASd4iX-RG zf+J_|gLcII9N4(#=-tphf;zj`&m%h^&e7)>?~8ow`3qq8EN@Kf@I}PEJKp=GE%q)s z-uRBY1=$mEj6TOW?-s;5SQGc6j(ooaHs3hETfuU}i1YQysQCx-v=8b&h-ai zxj0wtZzINXuJ~?ZuR$w!BC)q0f;;#2BRKh3@5f-*yRCiye*!)dk&l}G6zn{Gk=M__ z^7ga)+4nEde%?KOKS$)uQ5-q_65N^7-Ei`e)33n!oPLdMPV$k{Z@_mW`r=%E3-(;} z#o7H1EbrOao3z9Cd$8xK?;f;v`2GNPyuN$U+Hp_+2=<;jN1tPiBk#G!zSU25Hh;pm z70KVbKV!>B4u1g~H}>~eu={hIYic{jHN98XTJ8K^^xqKc7&ZHQn;S+uU!Pq5u6nQH zto{M+%<-Ra^4GTe&cDEN?!$ZVAGDwMK;OR+Idc<7?EiuvM)LcA1Y17#|0p=ye{b>_ z*dOqRQ2mHwPbG()rQsdxXZ+dNU5Miy>t3}T=bZU)-C*aS(mDR-_i7+fA8p53pV))? z5eBV7{QaJTy#wMm7df1@`q10A@pkBWi2L6WZGLi3K^$X#ab5#p`LLIOog2Sb`-0{6 zLE=pJ0}mkj;`i#QVEIQ`QyjVO0$zytEkGjIUBTwJ2pxZSECsu_u$Oh%t2*psI_%>+ z?2|j}4LN%;_{_E)^?4e$dFAW#bZq%ssiF7#07Pzo#4|Q$xv1YEV7aK(O0b-FF8+2n z47?n19sM!)aIik(x_9mH9RbeAU5zbgkBYcQfg`T|n0qu>pK-0BcKDtF&c|JgEw=`V zxW|GcuKt*N99W-mJqzvdodC|q9mJM95sA3#z!6t}%smOL&$ynccKDtN&c{6kTW$!6 zxX%JdT>UY3Jy@S{y&Kx$I~AOddpfqVYrX9Ysz@Ftr^roHA zXCrdqe-79)3IB7!a^Zg-*xH8w9I#yYpAWW1;eP>GZUZmLK2<e1DtKFGt+ZFtP&i{CrcdK=fTkZpOF-xfqd;wJ!yCuKh|l`B?i^U^yjf z8f(8AF4jI09BaP@tS{DnE!a5n*2;WdhnSN-^YInTgAlyJv1hkhA0S(Z6i9tde6n!}AGy!#J3p9KTPXyP? z2P9FV1cf$&^+Eku=s0hp^}R~#bHaxj5nH6RMN2JM%A-@XJZiht@y*P+QD$77wPwv) z^Zs~iy*X>0bM|lVeNOg0d*}S_RTn5#wF`R!UvB+lcJY_fpI;cxzkmP$MuP!h!#N#x zE$mxiFM)jo_F`zLhkZjQ*aST>7DTy+qs=SDMeNjTP6XcDbqC=UiIj}wWaG+dk$khtEVafwJ(mg5AF zBq}F0d{=X1zfO^7c1~Eo*@rF5V&%bL>DWS@QD~ zFeU+x1F!@ZDZ=rRy_6HQbhr*j0c#>zAJf`Ns^2&OI;%f6J`(;!9E@Px@DMS^N=%Uu z{yMJMm&$wlB}0((QJnN2`zNE`-ycvTlwx~LF$!hgKk4|TL;T%v1VOtgc0~ufl4O^< z_~rA$u6dzc&!!!QSl>S9RxM}m*0ZaISW^VM`l+X$4|jO#IR#A}Z0J)u&WHOv+rh6{ z9i(Xw)Gqyf^>UD1eJ6GDP{NAa9m4q$Qk8#UQ! z0(9BYP46p05V>CjKpuko>nSnZEs_YdE+=Qn+AEmqHdATDOgfGeh*XO0sN4Vx5w;q*dkvfyp;}iKL#SRUE()xap=aX`wdd4UqKC`7 z5k+U9ti855wlFrJE%tF+x49-Er!uf8=N5qpvF(RRa*$;pQ*caSC@Hg;l0$8%bP2xb z>O|nuzq6RqgqY@*hZs;2=qJfMkJOk#cu^p%SB-)aF`j*!(1zNi?`qCBoaoLaez)CX zN*7|-ZEAa;$4su!exXx`fKq4gm=M1);|%pQ-owVRB82nPNj)mvx{BnF6T~xpnC-Wfr+2QRfhNW zEdHo7Y$wI^yIDgA(F~s|!--}L?s8Tc*|j*kMq@V`*}IKs^)9=Sv`YpB{23R2f(k05 zg1(;R&+7O|ME~MD(y&qK;$B1ePHM00qE$AeT6G>xzi3UbM`eSvqG~<6wwRq-Y_EW$ z$!>Jnje2%<47>Uv6K?xUll@iPUM{v*xa~D=bNu~T{{3|*bB_oPpng33t#oQ^-?;nM;rCjnRv&yHzinZFAA(>qPy7#@px*hd{b zu^YpyKfdv1O(@R&K)>R{xU84oj~nMQ{(047S;Y0e2MLStBMR%r)9Euu+rOGiA;eXe zw^skcd8BNMB5CWTA08sa8eDJ-S?QNIiq!e5z2i~um*SrqAAR&SJ3o&Q$W{p^poTR} zYXSv`R*fh3DaGbwddVjOftTcGR(z4-;=TQS`oC4FG0I^bH8ra;pmK?v0>dzB05pmX z8j)WDVCvOPQQw81-*f4^L-8vs%g@pLq=jTb^VKKtK7F3AlA(T{uLhv)f5lhgr+n4_ zpL|vL-}p)p^&DR%{CW8*9zJJcq{Giw!YIgB!l-9_<%E3IuO%EkTsb2WSn(d037aP@ zvgiQd4wy(hdPfXV|JEu-eOs;V`ma59*At@H)*v@0ttCWXD;8nqm=9jN@Pt!~Uwf7F zTJ(~L?$Ue`w~!~?F1ErEyUv^r0Y239>=eMnlniZ&YSS#Z?|_55_k7X1C4^Oim5o89Rxog^B44KcrGVOzd7} z&a-nr-oan)Jfr>1xBJgHOV^oNqGhaF zwb7RtkR%8@=j?q>JyO0+`uTn_32+PYTh`mbCC4}j)kb2+I994n&fQTWD>Tu&<%d=2 z2-Y)WW9ZS;UDnIIpfGRGUE5OrU44096uZLM|9&4!J;e12zT$@xiX^bMPF5nvZh){q z-t!=WWYlv(`k_@;Fy@*-!jH^8)fs!AEz0wTGiUrmw)ZGD6R)6j2<}b8&;=&sff9lk zZyXKKGc?e6m(hUfguv{@T!Cihc3=sZ8PDPp0P8XBZpbiN$Ahwj41KZ!&l0i>$!75Z z@Dr|YqK6zw!{P)vATTs`;W{M9k}uB1O*b)V8Zs(0Z5wm3yUFEcK6INqBz3Z9g|0Dl z&b09KW!|rb0zbQJdug`edWSYMq;xK!_)ycDcj`(WPZs5ieOFIXuBN;qGD3wv6f3<~ z2}Y)km`ym%ohT1Bf?1sTXaA5Lw})VXLzt-)~(pM8i*yG_=qVd2y763z{27w$Ks3IPPED z*&x~m;lzv6G~8r1*J%ULN25&ETtX~N+)bF)utZl=E=;S{C(q0xXA>CaCIMdOl#dq~ z#-ZEyyvd?3LVZ`MG*=S`tJ9L>MZL@jVa&t9VEWk??7a1AuIV2FF#U5SUhtUU5fjSh2*M;&tifF)H*2L428;_m~Jt9n9LpZ4BMnm+837 z>n@MDeAhPS4Vte~1&i_lmc&aH9c)^BsBW-pvS@8|L{97@_xfvB59VZ^+&cGME-*$! zW#*&NAFf?J`bs8i&s@S?YZDW#I|I;%1#<7z-ti2%k$3KMvX&mUtl2x}P@u5F3NqG5 z{z-5_qu?@}TP-tE-)EjkS>X_U=Y_aCa0ix4@V~{lJ0&eI$l;|6 G+kXJd(PT0J diff --git a/piet-gpu/shader/gen/transform_root.hlsl b/piet-gpu/shader/gen/transform_root.hlsl deleted file mode 100644 index d447db6..0000000 --- a/piet-gpu/shader/gen/transform_root.hlsl +++ /dev/null @@ -1,94 +0,0 @@ -struct Transform -{ - float4 mat; - float2 translate; -}; - -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx }; - -RWByteAddressBuffer _89 : register(u0, space0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Transform sh_scratch[256]; - -Transform combine_monoid(Transform a, Transform b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -Transform monoid_identity() -{ - return _23; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Transform _93; - _93.mat = asfloat(_89.Load4(ix * 32 + 0)); - _93.translate = asfloat(_89.Load2(ix * 32 + 16)); - Transform local[8]; - local[0].mat = _93.mat; - local[0].translate = _93.translate; - Transform param_1; - for (uint i = 1u; i < 8u; i++) - { - Transform param = local[i - 1u]; - Transform _119; - _119.mat = asfloat(_89.Load4((ix + i) * 32 + 0)); - _119.translate = asfloat(_89.Load2((ix + i) * 32 + 16)); - param_1.mat = _119.mat; - param_1.translate = _119.translate; - local[i] = combine_monoid(param, param_1); - } - Transform agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_2 = other; - Transform param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Transform row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_4 = row; - Transform param_5 = local[i_2]; - Transform m = combine_monoid(param_4, param_5); - uint _208 = ix + i_2; - _89.Store4(_208 * 32 + 0, asuint(m.mat)); - _89.Store2(_208 * 32 + 16, asuint(m.translate)); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/piet-gpu/shader/gen/transform_root.msl b/piet-gpu/shader/gen/transform_root.msl deleted file mode 100644 index 8b4b2a1..0000000 --- a/piet-gpu/shader/gen/transform_root.msl +++ /dev/null @@ -1,129 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Transform -{ - float4 mat; - float2 translate; -}; - -struct Transform_1 -{ - float4 mat; - float2 translate; - char _m0_final_padding[8]; -}; - -struct DataBuf -{ - Transform_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -Transform combine_monoid(thread const Transform& a, thread const Transform& b) -{ - Transform c; - c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww); - c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate; - return c; -} - -static inline __attribute__((always_inline)) -Transform monoid_identity() -{ - return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) }; -} - -kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup Transform sh_scratch[256]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].mat = _89.data[ix].mat; - local[0].translate = _89.data[ix].translate; - Transform param_1; - for (uint i = 1u; i < 8u; i++) - { - uint _113 = ix + i; - Transform param = local[i - 1u]; - param_1.mat = _89.data[_113].mat; - param_1.translate = _89.data[_113].translate; - local[i] = combine_monoid(param, param_1); - } - Transform agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 8u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Transform param_2 = other; - Transform param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Transform row = monoid_identity(); - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Transform param_4 = row; - Transform param_5 = local[i_2]; - Transform m = combine_monoid(param_4, param_5); - uint _208 = ix + i_2; - _89.data[_208].mat = m.mat; - _89.data[_208].translate = m.translate; - } -} - diff --git a/piet-gpu/shader/gen/transform_root.spv b/piet-gpu/shader/gen/transform_root.spv deleted file mode 100644 index 1578842ea8970357fca4c1f6c7d85bd116242059..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5336 zcmZ{m`vop#zCmKNB*M{!)p5li|-H^iX+Vs5~${JTP>2xw>IPdAz-;K3cBV8l&ZrT5Gi0 zu5BFEuP^6UYgFqKBYB5jJn}vC($zF-tZr2r<7=C(F|ggJ^Bb$Qk*B1xJ+<|#SK5>! zaw=!Xr`c%MM#{C3(MG%0-n0-Ewpr+0o!dUH*sKG@jUtXA5!W@B(w>f4j7 zhSyQnQ@J;}0KT^?urpBS7c)?P?+kj9i_ovCv@2&#tR=2Dm5m!gC%6BL1$<)rt);jp zX~8#CS{3K+-AwEwmy!;0W!*Y#y~!5v`11q3=WFlSM=m8hkz38n(mcAdQ=ooGA7|_raRbazT?x|P`5poJg*?A2rm}Ogj}K+`H!}H9_$*$T z?`%HV%qKns6!mA5pU0odz8&ShOxAZE@-(diA9uC5>3{%~!|^Eu0m46B!p!N*#& zpd{&>5%NK)oFt2gHvV8pAN7)W5&5inNTyS-&RVk}W3(Mi4bR>WO@geU?7jHe=UljT zq_8gm>u>y0ct7N)UHSM_>c%ev8|zuZz5;B#`B%cV+pqFDnQR_s`U$w$JJ9|~xUuS2 zWU~9DPWz`aY)sVqG}!!euzv>b9MtK5KHU21y)$<)`k6~rcTU|ru)C#f+-Kq5vpV}w zcHh)#zcj< zpUJ7+xpN`+(A__cGcSb?L+Wq9NBO)==kA@()SQvJ@V^AC?o8(H{(KH{r+kmv^mCs+ z4{5WuXa54^eDymIo#!Y+`uXYSJN_c1uYT57*3Xzecm*tJI; z9?RtG$mOgXkUIMdpC;J(hP(l6j!qWy6|g?)W47a9XKOy=ror2gdS}qeuRv)a*m!uSzqtUdvo6MnAJ7l{H(4;Ru^@(smt$z_ZIc816yx16mz^Dtls(^xU&Af zr>{Xf?>w2R2Sy+`&)4 z-hsBbr~AR`y@$Kt%3*r|tWMi@xN_JY1a}~9JK)N(*AIc+Yh$$O(}An^4&&VN_q&IO zu@&d=2s-uA!=vC16zBUX*z@UUO=W$o=?*z-x$#Tkk3r5ceq$cbbW7mIYg3osOLwT) z^K$a}o}YmBgN)so$#JhwA{Wo|Gj!@7&UWVKV0FF)cjgyxKX*pkQ;<5(A&%TngNwPJ zL8m_U;g?`_?n%t+S77I*EoS;_u=+TIvgePwa&6DT?Ni%t;Pxc<&HfJl9OS3{dAN3Y z%>D(iIpWQ}2v!$wR{6J(xr`M@?C-(GM$dl$J8%2i4OjMz`@{bTt%i&V|377V{r7;C zV|QNyySwJmrq3?8`l$ahcrYiw0=A#nqgTQDsE?X|2Ae1TFZv7kVrUMefBfI`SFpZ* zp5Zn4-yr?O48S^^)ACNJz%gXwBAKr~KmB*R>3C^G4U&!hrUYol7 zeS0@=Q}txXzdL;c?!Vu3$QmDjzXP8Q&-*!JzZuX;oXNX7AcqeG8~;JLf2V3c44Mjg z#<}o$kTK51b7(spQa?;L%KeZ!$nE@+?4BC&f1zAl*nm*7`8i z59u2{egs_X@kn&)Blkza>Ll}(ay;@ZL|5$bD0J#0&(UCYlIN4mB}I?NfUT$R=B)2U zV0+gV^%jGjckG<9eqsL@xOlE((WxJ18h!BNAa#ZA54V0BW=G-{uUE^7OG7`0CWYm3^agUzGfS=rASkUeR$ tpOq=^_TyR1rQcbZKH@%(Y;0HGja!B6jN^?d>t_z%(%F!9-;db$^DYGj3U>ei diff --git a/tests/shader/gen/clear.dxil b/tests/shader/gen/clear.dxil deleted file mode 100644 index a79182a020b90a039563546fd57809dec320a586..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3076 zcmeHJeNY?K7T;u->;|^W(nMZpQd~eXJgjvIG{8Wed_ke4H5lxxBdr@K7@Q;xBt`;` zh7hu8n~q6=5z1IYMF$_G+}L%54Khr$1#fzwR7XXAuiL2!aqCf>2;ff$D+Q0<{3W z3e+rsARka;c_od3`I=N3cjT?jIeYg_bAxdY-3sFL+Jc+x_dpWX$G}Foy>QA0DglZB zT`x(i6K=1Ztl`(<>+!}Kb&V6efPoUAeE_K2TdC(3kYHUch-%n3eT2Xm>=HU@Rh36@FJ^&^rYK!zWY%ivK21sLlB0rIS7h)1+1;>G4R1wsB& zgpBtGH^JRv>ZfLsd{LnK2}m!wJ@HoKU{n%lAg-4b>3!m~6n!Wtl<$~Ew41VNz5#Ej z#u6$Fo61y8h{w`lc#0P(=DwL5e7pU?Zi4lpa=u8nO;VraD1lGi-aSy&?tuH7$9U?& zq>_R5aBf|0;_=*XkB@b@^Qs(4^;PeXh{!kCN8!@~6Q#ruwaH@h(lV(Z&X8lG=;H12 z2fZ{U-Ke<_t79-mJEtdKV5cqyV@O7ma!V9WnHtwsBNl9EgD;;qj;J9eBo$1)tVAyQ zO~dp?tEtH*LeBP6_!-1xP~z{>7AkP`OQpD1V7N6UZ;bw5-ssE4sZBPA(;@PiGQ!K{ zg*$vN;h!J5c1O_SrD^>~vn#JJe{^MOcJGB}$IhR#GLsD1nMZXF;;{u>P#}3>7+yUVH1z(cW{rgwO2+Yn^U;gW6_0=rlP&3Wt&ZB9V5Ys$cq4ZTf}#k4xX3 z?^*a%0}_3+=fm=U$qNg!RcrO%1?r!Vd1TQ=S)-V@J)kY}O$9X`)ZL0Y`kL`&DES&m zeB~kXaZUQ<#O5~!WsP(~9M&m~!P{rF+k^1-SZmN}N82&fUBTMzXqz2&wX$}KO^c-v zS3|@ioou6%-(MkC3`9N|-FziOb}3`?q$$0TK2-8*XUUwlcoZ&uwX@U*m&8`YE(UEk zqlIR+1&kin9%Ahb>dHY~*Aqb6M?CBU7~3piTl!dcU&ofuR*26Qh_A0_!}5>k2z z`udDjs@_)n`j^*v_lkAV-Me)lGTocNfNLkcmBUQ@z2AFCJJ0GKtGBQ?H2C-34Twgpb^^n5()1tj9@$S zJbGae#IgiJ$jd}(Sh6w}Q1d;MBqueq;-36E2BI(l;?idcFaVgVJ*2wh86l6Jr;!8n oSrVHF@HlY6CZpjEvj*y!m=CC75Ga*(BVn@ayrPPtK>E1+05h(=djJ3c diff --git a/tests/shader/gen/clear.hlsl b/tests/shader/gen/clear.hlsl deleted file mode 100644 index f6a576c..0000000 --- a/tests/shader/gen/clear.hlsl +++ /dev/null @@ -1,26 +0,0 @@ -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -ByteAddressBuffer _19 : register(t0); -RWByteAddressBuffer _32 : register(u1); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _19.Load(0)) - { - _32.Store(ix * 4 + 0, _19.Load(4)); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/clear.msl b/tests/shader/gen/clear.msl deleted file mode 100644 index d89853b..0000000 --- a/tests/shader/gen/clear.msl +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -using namespace metal; - -struct ConfigBuf -{ - uint size; - uint value; -}; - -struct TargetBuf -{ - uint data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -kernel void main0(const device ConfigBuf& _19 [[buffer(0)]], device TargetBuf& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint ix = gl_GlobalInvocationID.x; - if (ix < _19.size) - { - _32.data[ix] = _19.value; - } -} - diff --git a/tests/shader/gen/clear.spv b/tests/shader/gen/clear.spv deleted file mode 100644 index 0e8d1d74b838e81594c723455a21f2ce3624694e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1212 zcmYk5YfBqZ5QdM5v8i|Kt@RQ&rmZcC^h1?GX~7Fb!EgFq)~aD4O`t}R{wjZif2vjx ze4aH&bHdBayz`!!+1X98P~CWgQ`yxfEYLbamEuYkQJD52=RS0Yy?mtC7H=XDy*<9f4wk~a5S zSM6VECu_I%chQviS$n75I?o#0m*@CSlJ~>E$o`&To;iD8rOiu8`%IeTSf_$LO8+#@ zIxltPJf~?V)p{q<;nbPhXA}!yk2v;yo5wG6)QCM%;~c#jV>RxfP7mw2U3)m=GVsK> z`V{8zBK`pI?q2sfOvj$q^EqO@1i!bQvGtt6`k$~q(>pN9Tk<_sx$S-ZX*BjrfF~j5 z@O;)qrj5I-;JY(p^-Eawa`rP$fLyG# zxMwx;Vv~1gJ>R3p-QB_Xzp|%0_J5(>f^|ouz&*OVA87152IPNILA`OzjpNIC-yO^X z_vUV*#_v-8290}}1KyMWM@_$V&prLj%>y}S)sJn)h(>TTJdeA3y?} U$NN5moVx$ikA1WcEcY9D1i^Ydpa1{> diff --git a/tests/shader/gen/linkedlist.dxil b/tests/shader/gen/linkedlist.dxil deleted file mode 100644 index 231f0f6084c0cf640edcc1fc14548f61f856fba3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3024 zcmeHJ4@?_n6#vQ{y@OtlEA6&p6ug2X+d#b*C37Ot#D8&q*NucMCK`>2Ccfmo z?|tvy_rCYN_xekVw&@MojTcYM;2gjKSD(6eMZ%vnJBtnKo?U z%G|!0whMdFbw+U+Ojf8tvTs^=7FHP3FZC`6D#FyT?SgUxi0cQ%&<_#r_gM9e8HZH` z;Bf|kRJdx=OmWQ4J!n^2pBIWRy}f=#qIs`R;_Wk2mUOTNDCY1`kfv^}6+dE$u6JwjI!hFWGi=fS;eZHxCbMKbis?H&HU zee2`X^xa3XHFd@FU%&oByYq4St6XH1vYrN`dC1HfCo^$Y15Q9E7(@Eke{{I27f9f7|H>*pXlFHI^6e{=j1{h*QY-4h9l(SXWdRS$0JEF?K6NhJB z8maQIjNJR~xdCcSPaz-YNC#AoTq=YBIV$=TRMU9zXOua)U3J8Ca?jDhE!43GJnW5f zFL#dJ85wXhH=EB}idKS}oT2%t&(4fw;1PY1`R4H&;+5(rH+7#^em;rXtfs!f(;ujN zGb2BDztGy1Sy^^y&X||S(cMz>SC6LBUN_BDPIq!i6}HOPb~}sHGo3H=B%)hK7q(v- zo1Hr|HF*pzl52FUPO-VCG=uB7GM@No7kQKKW%Kci`(F0dl(-J4BF@gtp4OMTk~4dH z5_JdR+4;rkG0)`W)cnvq3^8{Mtw_=64r%gk8`Sl`o|E_H;JWbHJuuLvS3cZ#++15* zVu8~x)Ori7bvEu&4$mm76rv3shJ0_tr*~n^O_(!KN6dkQuPO377nx1yQ-{JSANDG% z*x0CBTk<%vVbrj}hin*c@-;ZHRvc?K@eT*p;y@akcn8BLjmyYOesYFQsAUtrTp$-L zWHvRNGNn>}s!AEMrB<Ea!J-eJWW z*J6!7#KN!-yZHNYzK-VW1AKG9oppJEygW^QA0jWY$d1uooovH7-@9+0WfjpCdDkt zX-{9!9scpTWC2S9*RZwfI&5_*h zYvFFxRps_SD~kTkwb1wPVk!i@6C6#%S2~l$!%PN#m%$nZ)Lp*|sLbh=PtWCYfk51l z-%N>jMJ&>Y*b`X0+aN0j4O{;l{JH-a{vLwxH#RvxyBM50GdT~TZ5zl zfLz!YIl3uJ_%A5TZcY{08_Y*a)WO-3ykJgApJMFFl^_tw;*U@wF22BJjsxl9IN-5{ z?4^$T&UCm%E{3K)_iPP@vpCkG#zWae1R4vwl&MWAg4WYHY}()!P7tw(XSO))D{Co&7VXWNl)=v5vE$E)Po$F~pGd t#TepvjKoN+2530+Ai@Sc>cEJV7zuW-p@|&`P2{1g=y46jx!_v_`VAHtxDx;X diff --git a/tests/shader/gen/linkedlist.hlsl b/tests/shader/gen/linkedlist.hlsl deleted file mode 100644 index 614791a..0000000 --- a/tests/shader/gen/linkedlist.hlsl +++ /dev/null @@ -1,39 +0,0 @@ -static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); - -RWByteAddressBuffer _56 : register(u0); - -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -void comp_main() -{ - uint rng = gl_GlobalInvocationID.x + 1u; - for (uint i = 0u; i < 100u; i++) - { - rng ^= (rng << uint(13)); - rng ^= (rng >> uint(17)); - rng ^= (rng << uint(5)); - uint bucket = rng % 65536u; - if (bucket != 0u) - { - uint _61; - _56.InterlockedAdd(0, 2u, _61); - uint alloc = _61 + 65536u; - uint _67; - _56.InterlockedExchange(bucket * 4 + 0, alloc, _67); - uint old = _67; - _56.Store(alloc * 4 + 0, old); - _56.Store((alloc + 1u) * 4 + 0, gl_GlobalInvocationID.x); - } - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/linkedlist.msl b/tests/shader/gen/linkedlist.msl deleted file mode 100644 index 0461d79..0000000 --- a/tests/shader/gen/linkedlist.msl +++ /dev/null @@ -1,36 +0,0 @@ -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct MemBuf -{ - uint mem[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -kernel void main0(device MemBuf& _56 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - uint rng = gl_GlobalInvocationID.x + 1u; - for (uint i = 0u; i < 100u; i++) - { - rng ^= (rng << uint(13)); - rng ^= (rng >> uint(17)); - rng ^= (rng << uint(5)); - uint bucket = rng % 65536u; - if (bucket != 0u) - { - uint _61 = atomic_fetch_add_explicit((device atomic_uint*)&_56.mem[0], 2u, memory_order_relaxed); - uint alloc = _61 + 65536u; - uint _67 = atomic_exchange_explicit((device atomic_uint*)&_56.mem[bucket], alloc, memory_order_relaxed); - uint old = _67; - _56.mem[alloc] = old; - _56.mem[alloc + 1u] = gl_GlobalInvocationID.x; - } - } -} - diff --git a/tests/shader/gen/linkedlist.spv b/tests/shader/gen/linkedlist.spv deleted file mode 100644 index a7232834d87bb475ba0928dea6e121c1ec952af4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1936 zcmZ9M*-lhJ5QYz{2r3{TZr}heD2S+lD1w6MfPzbW05dES;w*`R*WUUp-uMi}_$J=? z0B(ti-*@Iza$-*^_1E8jSJ&yTo~Fi;LrKz*v?p!Ji=;lflSY^%=}1~rUtD{+Ht}w6 zYhr48QpK^PDHW>eMAL$A#ww-VsuLYp7rb1z1VXi16Te3M0YM9j{JIwA+L(J=-C?dR z&9hT}RV>#wO6Aq+``Tt{Z?{%mT}Gq!IDR+9&B-`^V}J9__Fg^j6mh9su5D)XGx+uG z%F_PpB(2QYQQ5Afxn}EOitmAI<*lShb$*L>7Bim4w_?`4f;#ey5Ic(0CFcpLH}5d! zh> z_6X)se-!VWe9U)cT*TJ5h4oU^X}om};0svfH6Fyw=Q?B0#U6-w6l}jl-U+Z72B<#H zb`b0t8e88jyk{Zj_ZWMwa*_8o*rEOo-nsnu+|9VquMYiZ||vdqnEVGc#-6R5 zH7-z#lXb4%Z}*9< zE%sOh#~#kbH!Oi&`z_n^0a$J9>Ac@=k2@FZmcg!z_#s$L?Dq&9`>8dDoVr+31e+ti zX$36j-tm@J!T-Od$8hqo*AsB;We)d}Q|B9rGuIpYe!K&H)UpQl%|tCv!E*lJn9n)6 O{G0Us{N>MQ7W)TUwQb1& diff --git a/tests/shader/gen/message_passing.dxil b/tests/shader/gen/message_passing.dxil deleted file mode 100644 index 2be73da19d9a41131935c2c68a6e89477f90dcd0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3116 zcmeH}eN0=|6~M3k9)8dGVIINj>|n?Pic6`LJq+PvhRk0fW|+>zjHMH7HUwrgK5gpp zQEmRhrir6IQb%(Q|Y{l)X-Holbd&*yN0043V01WCPCn5&H3(dj=r(I81McH8;0RGiq}fsYRFCiXZ;ktzFG#DeS2gwgoY)%3=ABhIv#pB>=L(Zd&iCRYJ?u#rZuw>tr~J)i_IhCL5O z%8*EsU|<4#*}+*8!t%M3i$KG>P$}H_6H*pXH){qWA1%IVrQc>1PwY>Kk-ZXxv39 zj;|OEV(R7G>3g?s{$W;zho-f1!;5?0Gr$^X-OT#*>g>||#^JHa#pT6v(&y8iPWq$t z*rnY3OOyQRaFcmIjU(A&kBgWO5)(NpPgYZtqj_)Rk@}VnlY1mqa^_ggsnzP6!z%+$ ztHyV?_M5W}Kak~TWd4O&n_L*}?H|0d?{fd(g>pWlIMTV-R&Q^8*;ZF?H@VO4lAICL zq*TAe46h{Bh-+Yq)q&uU5-`0k8w;_&Be;ehOEF=gw8Q~fo-$KXV-V$yZg*oI*@~u?q z_0;5POG*R%s`5jRa!Fe_ffRk{De@x9g)Nc8jP+Qte5>CEd#AsR@wb^VM<(WY90Sw- z%<2Cz?r#_SZA1R9A(#BYmheGX_<2ycNh7s$qtf+J=|ZGmwMgevr8BPNc?<6oMq4Lav=0Axak*e<;RF}JVeZ-Vz0Cz-I;!e(=S=p3Ll4g>y+?rnD77~#y_Gxt~t)0RTaC@v?ZA}*n8bh0gwQ-%oz8^!=n9kA(7Uk_w3(qTgh7@ zpy%g6m!Q#=Q0(k)$%LTJtICmkWAvdr%GbV=oWLRe6i)w5Ge8k44rT#BuQusL5etAc z4*lrw+|W95>66E^IgT2Zz2~=C{8PC3P8JXBU~w&+`=4NO<$s;UmRR_1zg8^XK5MA{ zZ5H$4V#s3hU$gj23bPC@hOz%CgCPVjsKrEVojr*+LqP^>%@OuCe97K%i)5fz%zLMC zx7iaeO2Prx$#(9Cu)MCAMPf98Gtm}j8?*az=qBC3_M|CH>nNp1r;nlrLKKSIUk zj^ME;-w8kY#dq^Z2-p0RCr$$&r1SW|Z4OtLnO|D!Q*)h^xImnKxGJ~S3@Dt3{-pXF zx%su@tX+!5m6Hc<_PIDPNL~~Acw8TItctncH^r`c)}l(TFn<3>$$`q?xtM+;t8c>H zdXRVqpi@*K@}9(FUYRw|1c8bqPUd_df8k data) - { - uint _73; - control_buf.InterlockedAdd(0, 1u, _73); - } -} - -[numthreads(256, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/message_passing.msl b/tests/shader/gen/message_passing.msl deleted file mode 100644 index e48f48a..0000000 --- a/tests/shader/gen/message_passing.msl +++ /dev/null @@ -1,54 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -struct Element -{ - uint data; - uint flag; -}; - -struct DataBuf -{ - Element data[1]; -}; - -struct ControlBuf -{ - uint failures; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u); - -static inline __attribute__((always_inline)) -uint permute_flag_ix(thread const uint& data_ix) -{ - return (data_ix * 419u) & 65535u; -} - -kernel void main0(device DataBuf& data_buf [[buffer(0)]], device ControlBuf& control_buf [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) -{ - atomic_store_explicit((device atomic_uint*)&data_buf.data[gl_GlobalInvocationID.x].data, 1u, memory_order_relaxed); - threadgroup_barrier(mem_flags::mem_device); - uint param = gl_GlobalInvocationID.x; - uint write_flag_ix = permute_flag_ix(param); - atomic_store_explicit((device atomic_uint*)&data_buf.data[write_flag_ix].flag, 1u, memory_order_relaxed); - uint read_ix = (gl_GlobalInvocationID.x * 4099u) & 65535u; - uint param_1 = read_ix; - uint read_flag_ix = permute_flag_ix(param_1); - uint _58 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_flag_ix].flag, memory_order_relaxed); - uint flag = _58; - threadgroup_barrier(mem_flags::mem_device); - uint _62 = atomic_load_explicit((device atomic_uint*)&data_buf.data[read_ix].data, memory_order_relaxed); - uint data = _62; - if (flag > data) - { - uint _73 = atomic_fetch_add_explicit((device atomic_uint*)&control_buf.failures, 1u, memory_order_relaxed); - } -} - diff --git a/tests/shader/gen/message_passing.spv b/tests/shader/gen/message_passing.spv deleted file mode 100644 index e5f56d6d6a95620755d8f6954abdaf4042835412..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2196 zcmZ9O*-jKu5QdKoGk}Od5OD(;P(VQx*+r1m0hud(fKD?sWRh9PEH3xHiMKw5H$I2a zD__7RQT)H|uGpDl%BufAb!t1OM^kO%9nM{F7hR{zyJ+>hHZjh1yG*Ea`B(YbdgFL@ zVet_jmt87!#HUw2Y3X)J*)LYuxFzWm57!SNM|_c#{f_E|>QX_g{MDpj@rJ z^Xi4_S>V-!vR`Qw3w8OW<&}(`b{(qKxwD{FZZ-n%q~xD^#kW()xC!Yuwc`KEL%!WjN}u^PzZ~NS(zU=pj{3;BdBbtTf?=Ht z(#dnn!pT0?gp)Pz3GY-Zjas!tYw9iI;8-VqvDB;ub!_^}Nf#pDu*H3GHotM4w2WhZ zaNJc+e_xT#NIE6>*!=8VPVWGYmxMpF9FwpNXCoM5=GrYm6VGZ+#)GqX)Siq7CLVPX z|4^EE*Chjbk-W*QPIDf6P(IW@Dq)QZcSs}fSz&s^hMSK4dt`r562ra1=jYCiXrI~P zj|?jlGlCnD%`Cv=xhfsi;M`h_nUB?VE%b|eArF{7SVO`$q{%@q#Jef2l0u$%VCD#C zbq>pB;XfkH9x=dKX>x`R^n!es?AN@jjRzj#M8#wz)0Y^vr!(j_0z;0Y{Bs z?hMXyzK~7Q8J(g>?huYzkR`nf{CR^c`u`~=Elo`BX+nZFJlTTz?vnfSEtu~uxzG2O z#C&hyEoEhnw;b^SC={!7h@yEIt8H2~oYgiX+iGKvdZ-OPS)0`}E55a!hvMMh$2H8! zhT~0qj=9IO;rU*ui+g~xx)x=_vBw>Nt*0f~^aRg7cLrx?S7gIkPph)6C-&&$tER(z zo=DiY-iXH?!zbr&v7d^kE{nY`+hViFom%V-+3Y7{^Ijr8dK=G#$!GC5W#jWvbK<+% zmf*7~`Jvi&rCGoAw%FT|!13;}xfk9T-(pTeJ>T{2==DGX&zrJ0_DkpNJo=A5xD^TC v7H60EDYBpu{+n#f&MQM-T7ag2b1d`H|C)K diff --git a/tests/shader/gen/message_passing_vkmm.spv b/tests/shader/gen/message_passing_vkmm.spv deleted file mode 100644 index 8527c2bae4567003bae7f8e8db8578114729a358..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2300 zcmZ9M*-~6p6o$7zLkJp3B{5Dx$V4;|jb;=R8$!}xP%~6{+jRm>s>>dzraS7rZ{n>_ z;f>GXaODd)RB8OabIz)qVpFTv`v1N5n)W`7kDZxH>7{fy9ZC;Vv5uxOn3UAN953{* zXnh_(_^Nq-<5Bb3sPlE!Yj#`R{@}Z2x4+Zsq?t6CpKEMA-df%t?kr!q`hgLTr13nk zzq9z%&=W{EYxmrE3z>tLJ1RkP^DE>0jiG-6Or+`5*viwFdH1*bd#&bvtDE(P?d^U1 zYIs#+*U}Vqr?l4^bVtKhbGMT{ZMMI?FuJ^w`%bgBlMOSt93KPU?XWr`EID(YK(FJoHrma7#I873!|P;;n@@0J)tINfKkYOdo&J-ov)Oyr-_C~Zes6O< z-=9q9(ccW(FDlRZiL`{?%LZAu#14AU%65uACeyXRWy8R{&gg??BEsq`$K~jkp3Y7BI)obp}zJGit8JlZ$xPUX3RCSh>hAg6&OP{$lC>3ib~q2|Npa{ye35_MM&n$|*8ABe{8OXCXGv ztLUPJlvYaYe4?(`a=)S%^N8)kbrSpr+8p*`yf@L5l=H?DJ4d;w^AxsA|9P}~#t<)} z%`Fap=dMrWKU=~2okRS;5c}_8*DH7#+qV+&-pBSE3H%ZE_}q*0x4_mA`?s;zkmIoT z(9Y2NkDBje>n~@|V&6bD#+P85xWp-E=9`eSH{}`~=x?1a=lK_meHqhxKaZ#bFCD;s z$JKqm<0|(1tzy65Dz2A!m8{;&+lY7PechslqThmF<)n9Y8u7jOCKgKlJLl!SC(qhn z#JqrgFy=dO^0(=F2`#=!4D-E<97g2bcLs7%&qZvxsOJ)P)Z?BtSdYB6cVDgNGWcLU zAHvChU)FF1Th4d#bID!BmY+qe%h}6CT{p1h+;i^Y=;Pca=@abe z$vyk{jp;a_PZ9U!e5c0qUgaa+Dz>#myfy5I=bm>L@$O)|Z#?hLcy+XV#9PNUN8r2I zb>v58a}V7>^pXFQ^P6bbKfEFLHV`@Ax3+J?xAZyUcVj(&@V@Nt3q<}L5^w2m&c%84 yzkFCX5WgYMI9t~tV!Tk!%nta)^Cwus9+imz z5auEf3P}YCgY;;T&NPI%3Iz_CqF>5q?aay_wX=Q$yWu(x00+Xr2T;+Fx6&jyrJ${| zP@b*#-_no(oP;u|ou0A}!vn;&_*dC}Y$Z-qPA` z=cK@*bQ#kiFpmM;o382;*a#lm0KiH(Mual}y z$DOdc<81RPBTaifQs;3KRa=E|j2n>QMu01v7R-k^b7KIgo*<~)e7Qh56t* zl$Cy0HRnp-=BnF^ogrzahX?MG0W7>G8LRyP#DJlg{=R;RyUm0& zYmmQYjs8uP;NeoIN<$V&O=X49BI{yl0JCPQsJ=Dmrt(N=G2?AKpr_<8+4{F4$ZSJ5 zM>sDLkhBY0lrajE*At%b?0HNtN(GA;k(n%Bi`AQXB1~oG>9^PYtEhexbzSP@WFFvs z6QQ3$b%oGl9n1$Cv@s7gd2>WjhNo0prB#{e`%4bCuG(FN9+V;R3X>#9 zl72YSZL3RJRVCfJ>T?Q4B-<9Z#v6=D3ktCXGmkNrXdSIE-5ccPY$kE1pTIKSSgjb105>> z49mD>{s!A{r!4THMWt;rY|cCK1=e;cfzYL~;J8e$B7?a7d%uoMR~{B~Tm6Zq=blLZ z!@h6Il0Dz7O8(6$VNNDz@A!D{w;zw(I^R3;@ksx~J)-{N#rnH@uDmhy!JfvuR}KxW zroi4cEk_ITZq5Dj^MWttsy~mNzn|$j^I>Pof!$333#Eitz~i~X{-AZA}r+R~oxa1meTAMlfx0Xa;|t^@K@ zjziy(jfP|)T0YLnsi$>c`{gG(-v9iA?&Gf>>b|l%+@4fa`$SD~W!3hbM?`9pR^P;n?arYcX41 zzO&j=4NX+77oC#Fyt(SI`i+7G*Eb(ge>vLBkv(vn=c60lR}Ni-!|qS#!fKccsRSR} zHx^r>M3%et8P1?5n-|yBimO^G$O#bf4JCiT%Tvkh1-*g!m)l}Xn6N1|hk6iQ-mhQo zL6;BKdaAAB-GsQ#=(1YHyR2wUt;&SAVnS+zc8cw`>n|SFqS0$`@m(}aCa^jjqan1K((Cmx6 z>v_Uep>kEWxawML$-keJ|2`!D&M&{upoRTyvEyyAgH!dWC3Zj)+h?0UV8K82>X-HF zm$gjQUBQfIeTEy&Yz{sNo&IRgypCpe1U>y4@k>^;l5$m9QDH5d%i8aHv0c2YC6G1F zN0ig@Z_VgS|&)4RGwV|cYzA|Q6_V2CdQQPUF4!qO9 zv*|CrcZ;j$MPQdt44wJ&8wse5oqH1N5jrkk_?Fq@!9_0&b00jc`KN0|G;_~>n0>0? z<3D}xA7|Vh8rcuu_m_RarV0{-T0toCMN_@VMx|lH)BaHWnKH=@>%+@;#2eO9ip*t- zF(@If3KFRTF&T)oV0IpxE)2h>Q21$iA(|uq4%$>B0bmrM0T|i!My0^O@`45}z zdns^@+Bo+V?nT8z+>45z!95Sey*?A=4#XlVG7jiPk-$Op!WXN^zE=x??m{e*eQ|m4 z?wS$3@ns>ZKHXvhBnv@HfzZbvU1s;Gr10)yxUza$>o2*+B`FX4%ia<$91#4Hfo{WE zVha6u=SdqMJihpt^Wr~uG!8dt;udayYp>mH2IiV0X)BtUhfB^{qS|D7gYjxkbeN(I z_{y4qGEzkEl<$8$jY z66$sdFt#GJWd{G&!Lk2U{7WGC)2xb=Y^_q`eKf?s9PV$xZ-*V|qA>RnT6d$NzIKji zUW@SnFS|6E+{Z8n79whA<4s*gG%!)5eFo-E&%#_Vom3NX*Sciq>XA(sG(-EFKH1#5 zdETgc%VG8We$}sAV&8hVGd<%&t+(y+A9J0~7CH8|Q1YUe_*tx;v}=jDU2DS+Q`R_iSs*oW^?HHUxHM?BMekiv8`n9_{&1F5wfUb}3K{u#9h1 zw_cR4T21iZ$Rz5HKnJ2AP-YcFZUD|<5{>4cBt9_*km%09eiaP}JF-!1p~9+|BNT{M zb$rAgL${9bgcIoq$~k6~aw4ihEv4`HsIAA*B;t}SbbXe;LFU$}_Qx3@`7_#~yw zD1)~H{4_Zb=~1P@)!5ESYJK?b4#|b2lf6luf#iU87|}=$xG`-ZC+vef`9Waupq5$9 zjGFNi0KX<{_eyek-i7-ngB!w~%YN)E3#4eWUzFS*NM=($j!pKv5n&t6J)Go4D%86z8<1*h zoFe_nY})j6nw~tFo#auegsI<=zGs#idbkmuZrla4NA$Zve=PT!U!91Mm+uv^2kT3Hm^I@cs?zfN@m%od zkmtEM)9j7V$IuvvtciTAl{ctpysno#5!d8Y$_#aBF3raK6B)vwUJ*jt7sgJrhetL?coD6XDq2A zLX&cd!HcNtPmVY?P?1dGu#iNXa<2!MVTmb`iMGhZHu-6+2)fMJ8ge81(xwk}sMGmO zC}?$L95txCCa|?fqjKissfmuN_G-U4ua}EjqA&xGglJjBIBXJKB3e(DgkR=-qDe*S zw#(dZN2cE(oEX~Y{(D|hcsqxjC!Ckw@B>}0P(nj%S7U3xIJH!S2W6}gt{k0rl;sh4 v5ft82oAHH-j>m(M&4Je}k^$}#CuL(wW2!Y!=vavrcNEt)`rzL>KbQXiZB= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.Store(part_ix * 12 + 8, agg.element); - if (part_ix == 0u) - { - _43.Store(12, agg.element); - } - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - _43.Store(part_ix * 12 + 4, flag); - } - Monoid exclusive = _185; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - sh_flag = _43.Load(look_back_ix * 12 + 4); - } - GroupMemoryBarrierWithGroupSync(); - DeviceMemoryBarrier(); - uint flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _223; - _223.element = _43.Load(look_back_ix * 12 + 12); - their_prefix.element = _223.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _245; - _245.element = _43.Load(look_back_ix * 12 + 8); - their_agg.element = _245.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid _267; - _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _267.element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - GroupMemoryBarrierWithGroupSync(); - flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.Store(part_ix * 12 + 12, inclusive_prefix.element); - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - _43.Store(part_ix * 12 + 4, 2u); - } - } - GroupMemoryBarrierWithGroupSync(); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.Store((ix + i_2) * 4 + 0, m_1.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix.msl b/tests/shader/gen/prefix.msl deleted file mode 100644 index 24bee60..0000000 --- a/tests/shader/gen/prefix.msl +++ /dev/null @@ -1,264 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid_1 aggregate; - Monoid_1 prefix; -}; - -struct StateBuf -{ - uint part_counter; - State state[1]; -}; - -struct InBuf -{ - Monoid_1 inbuf[1]; -}; - -struct OutBuf -{ - Monoid_1 outbuf[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_part_ix; - threadgroup Monoid sh_scratch[512]; - threadgroup uint sh_flag; - threadgroup Monoid sh_prefix; - if (gl_LocalInvocationID.x == 0u) - { - uint _47 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_43.part_counter, 1u, memory_order_relaxed); - sh_part_ix = _47; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - spvUnsafeArray local; - local[0].element = _67.inbuf[ix].element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _67.inbuf[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.state[part_ix].aggregate.element = agg.element; - if (part_ix == 0u) - { - _43.state[0].prefix.element = agg.element; - } - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - _43.state[part_ix].flag = flag; - } - Monoid exclusive = Monoid{ 0u }; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - sh_flag = _43.state[look_back_ix].flag; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - threadgroup_barrier(mem_flags::mem_device); - uint flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_prefix.element = _43.state[look_back_ix].prefix.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_agg.element = _43.state[look_back_ix].aggregate.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - m.element = _67.inbuf[(look_back_ix * 8192u) + their_ix].element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.state[part_ix].prefix.element = inclusive_prefix.element; - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - _43.state[part_ix].flag = 2u; - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.outbuf[ix + i_2].element = m_1.element; - } -} - diff --git a/tests/shader/gen/prefix.spv b/tests/shader/gen/prefix.spv deleted file mode 100644 index 8e7db4a4638e3f8ad517015323935706f323bdc1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9828 zcmZ{p2b5LS6^1X)48;a2RuBgZ7K(_yi->@rsMtlvVHjY9nSq%%h~22E#`I!(HARgf zU6W|UXiPJS>BS^!OjcsLNoR-+P~Z_Sxs$dmm%d7JH2-iebf; z#mM5?VMTpxS8Rc$D7Gqw*Y%tQixy1luPvQ+$f45>*tTe@#~Cx4m=<(1(%s(GqhlO0 z2D`THG;HAtgVMxYXmLM==3-PaXF=Qixo5U@cXs#oUEbE;(Yv~{t-rIoy{FdI(NA0p zQPtdX3NB9V?dk1WTGUsUJ33c&c6avFm@~TA8s5>{y`-zBv#q-xyI+5;Z=j<#b#Ug? zfrAb@`u{D=ZK4-N(T<3>LNBS~k@bBoU)8ptx1)X4yq>kFT32t+ypwPf+fwXvq#iz;(Bo%y#qb9&b~q4V7$5e%NpGGWStEAJp-Lh={p3T z{T&ZpWkbr?ndo^vgA2PCxudIRNxc~7&l)u0Z7SyLLtN*4H0!i?mwfRM-c-05_D^g} zVGO`1y}R$)WG*>;1h@N5yfHfuAZ_lZ9{86 zd`N6_apVx*QXB*K`6#a{oogx12haE@zL=g6ybsai=ok=yH=ahV& zPc@%>BDqlYeAfUxvt5xL*oFI;$ZY3}cVtcX(~3AI;2D7)v^CeyMy^a`f1^t5nJLcS z7Tt=t=H!x(20L2eW6;UDGu&FtZw>l)LpxXd(IxkeC7-e55r^@+qxESY%Pv~c+V@6# zF51l__gkdReN95^n^4|Uu{LFQ{NBHPkqON3PUzQW?gP;FOS`${-W6^7r}l4oWI{6*Ot4s?_Rk*^I1#Je5T~q z*pGHSZO)VXTxpxa$$C$HuH3U{tQ^Lj(r<2ucl$XkEogIij{bhI-f(|EgnOR;h6p!) zeuew2o!`LyU66SF%PQRY{tl@6Z)o5*HSn7&-1+M(-2DC)NIw1k4yf`C4cu>i{Lc4V zU*%6U@FyF%-}q|0-}fqip@I8-ulmO}aKG_Yzu)*OpV+|t##jA**Q?y`dX@VPuX4ZR zRql7Z%KeUq+t2n2w_kqCxE$Jg_m^)29o-_Q`zC^DbYAXtxKx@8s$KMd0+` ze9p1|7bE)8>$cK{+Suz$z>AUeT5jBtoUQA&BU>Y`Xm?Yn1S@6_3M z2e`WLPPn$ENX}JT^;|uJthWN}ddrdYqzkOw`ODC9<9+6Pka5U(BsKJw_RL=mcb;?B zp!<++#5i-xjS=^w&1HQxwEI(g@7AI(Lmc|oq4le?_T^x6%S82`r7lnybA0*eK~_ygFOR%Ij7fvwR;XXpylzs7OYL*O=x+1 zuLFC&`ff(cbFW_y_Fg+jpE1U2_YAXd_2$mu4fq=S@J4LfQ^T9U=FR@z40eCUxu)D0 z*YpnAYqj%-qu+wq$Gk7zTH2PPov%+@^yo=rkHr`>aQ_$~6oHH76Z*t?Z z-Uq<0m%2X));1oo?hm6K)~)YDh&JmM=S)5VuGaZccd3hq#CA^A51~?DG>~Z8@J$f*tNt-<^my_bIk_>(Q>2 zoSy<$d-rL$w)F0^V23&NeFo8HPH}Qh2fMHM)LG+mVAlx$JlHyB@+I+nzksCA#-@MT zjWuufp?1x?(03!r`9-j6++M!Jz692mecS_fxDS0_MzpyPaq@fxY@WP}?gMMHXRdoM z`o0oxK%3wC?nj$T?YsX0^n-{)|JTs^)z+Qm8huY()U{u|h|=j^`;)|RvX zHrQcqecwW~nOmIweFy9w();g%(|djPKC#~e>v!J|qvh@;dDV^Y!0KI2p6?UqeDgel zmN(8z><{o6lYRaWtj+u7v;1SU!+q-e5%PP)eTvh=pMV>C_)~1!lly02ZRz1Jzz%ck z`#GY`+~U;uOK^HP8~iIIJ=EuW!q`dZUnBbS-gvCEq3X4cK|hW>ir9O5B!3Js#|ZRq z5dHd|D0zIph2K^BoT%f@|}6@I3y6 zcGxq0e?+v|GjY!2&){*0&r5Ru1*|>i@mH|6oX6k64s+}K8=}qJ;^g@UxOyJ{gy%f; zEiGd`kAETha~{u>Hq^%RcozILlJk%om!3ag`t9d)V0nE12Io9p0Lyb8{{ed*&e50i z&~E>;H+A;*BG|p}dBzvXR!=QqqKxN+L^ZLUwX|2)a?R{-SMUbr zq`$ktwdL;U)0X&gU~TEgc(^vt%zjJ&JM4$P-4Si>QJj6Wg0n~eW+abqPjH@(iC}s5 zxEI(xI!B*-G*)|R+#Bq@N{#!#wdKtA1v{)!-y~!|#2Urs^jkC?v2VufGbU^74|a__ za|gh+rEmJQnQH^swX3p-x> zUg(>KXmh_}YkB~`T)RB)4YfSKYmCc#;~-)i-y1WqX-_Q&gSF+oaVXefE&2{Yw7Ir8 z_u~j~?uUOrl*e}zco{kDyMIfR=k6R0_U<@GpE3SDQhVkf3r=qTt}V}*9|xYn{KOs) zcfP-qW@A4AoNtP;w_?k?XC~Zuhdr8tJ`wJmynE!v+0*P>UH!e3{?5YQ*x!?|X-|J= zgSDlpWZ|2@n%uX=NL^AuuTC%?z% z!?oo(Jr(S*UVRG?ZRQo{98Lp!R-UJ4C%1;gsuO!UIOn?%EKg6*05|sZOl;ckDF4l{ z2&^qNo(*VD0Yr9JD;X zi@@$n-(s{pzBaJ&`YuMx^K4uK_StZbK4Z+I-Ckx-YJKhKywjF|-LKkz*DOVMA`a&+ zEB$I~8IFB9*c|zr)(Wt;qwv{Fc?V)HbBirXnFdu8f+iTqtBQwwDzn&06v76@U?L3$v5;eurb=R<~p!>@|)#ya1HTUGCuE# zE5OD&+=K6lE8)iHxsw}bZ|$@FQKz1(z}0%LhHJ~WNuM_7duGnJ&o36k`1}5_`u}(H zH;a96j6x2l5B77*3f>Cr+-=a->hG!T5M!)0^=x0-6SD){xz(7laP8^gj$r3$Kb`wv OJv$-VUt&$Mwf+whEVcar diff --git a/tests/shader/gen/prefix_atomic.dxil b/tests/shader/gen/prefix_atomic.dxil deleted file mode 100644 index 45a7dd8759ebdd7b2ce08e53aeaa163ef913d810..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4884 zcmeHLeOOahnm;$W++32Ia05Xv1n>sX@}<253Mlw&LXZZG6%>7-b+$Jk6j#s)t>Z`A zx%uGBi8fWL_%Sm0Q8nbo#Cv*$)|-R?j8 zJiCAFA9EhwbKmox_q^vl?|IMpolsO+Xt;R#8UInozIPt%=*oTJ{l!NR0DvkA015mS zKr%sE1E~QL3OgrZ2aW@{ZQ;B8d+oB7s~H!RGfzhRSLUyDG6-_ zpuEt)-_#HQ{1VD!HacVqA>>;(P zJe*o8)vjnax>U5Gxcyxaqb(daDqL?T4wUK3trFG1>KX_ za zRkVr(v!J!=S{k?xCgD4d>qKD1*>%0&g>T0%z8t=8?z}mDjnEa4@)0TnhnHwrANJijpq*(8A<3eMB9|s204K|RcZWU|h;V)hoyEIW}0|n~H z!YaI)#aTnpMCQHapFKQ;BHTPYn4{-<<+4BMin5uj*+}xax(#b{GW;u*e%^EqZ6H(D zi^tXgM$fd%`BrQ2l-&2BnbPhmda3fj=VD0BrtIYZ@GA}<@Os_elq;4H#%-# zd1rVT33e>+I<%(p=Inp|bj|0p)t|=2?_|!L{_s?0LbbW6XmDk_xy8^9q4dT^Uodsh# zY@Hipy%@`5CUvh*&FkX=zPO-1p0mc^&`0Ow{5oe}_6cq7nN)pQ3-R$s$4na*+#7Bz z)n#k^&BtXmC*jUSs$L6Bt(ge+2L{Iv+zbqkyoVZNv<0*KPV}8#)OY+uU#f3a2k~*s zf{0@of#G{Q7dQnI8%p_4ppDe#BYQ5{w=Hcxa`0fJQ$Jfp&1i`xx(4!^UIlWg50Y3|xYOn``cq~Zam$RY|8`hD@2yW{GZu#lQv zJtSTnFf4Y87Y}W5zG{(d$0b{hZ5E4Un?<~NOPhsmiy2ZXCS8g#CSn5AD0 z+J5MfZ0qtBjB^niXvO_9#h6ziFkln?7_Xv7(KnkP{L_%)XI8#STM5cXbD#gwn0ev9 zA3QI%o?Pq2PVpOe{iXkQZF6h{dim(^>2H5BPi)oC{s28AbX>l0pLxTHiCzh^zk0O! zx7XIv%pKc){c~&1{qZ3`&a`_q@^2mn8b8-(2@-@{f-9>;p`>r+EgPxU=&~i81*ZS zDuIC|aBXQ-{Fko&V{7^!e0~3?8Ugd*4Gshh^ufF6X$+i{{wD_Z!oL3z22zhPaNr3B zR{u8)R3<%zftmle7?=sys7-7SVPI4e#K5SeX$*8i3>+|#?Y=mKA`*c?6bbBjKfGi` zwjEjkbQj`~!i$Ujx0jC?jBg52b?Z`bkRb#u6T%>OXrawb$y_i*SC&22`b()XJ@ZkZ z@rdyFfIyf5bQR@z2njjI2UgDClX}>3@n5_h!B$OT^135CZ0%*BZ1aKaC7sNp^t0xu zZn>ezc(ph>OxX?Gjk|yA7Ve=)J1{el_QES~{7=&RwUVm#!u;CO2RL55yR zLW9^EM+5kh3Ki;pkuHRFLW>#i<^Ua%!{iIpAjnKZH%mZDIV6aQs;4or4wSN}n;JZf zOS^&6qef^CRt!ApfjfT=p~e%h z^~yhLK~`VT4Da0a@k<9^ioK!!!G3jofO@ej?#Rzi<>Y;+b#?FSekYnt>vp^4ddfte zS?32>HG`>J-y+Vl{xYnV!B&loOn$cJYGCljo%c|EGz5@eLl8KXdpZ@l;(*Yzz&Q-j zrr_TWgmrGzZSDLk zG8aByp&CfS!hww?cj^;g|JifZEs`enDRTa# zQjR`c;}&@(iHnK}0-!!#*^*mtziS_DRrhj4U5p~h^FsSv;^>qF3FAGZT$K%~lZ1!L zWB`eZpNXF26s>Z6E(Hum1oO+`OVXDiUV-}MYvDtax#Y^ww;&0|u-9NrF8_Nn{;%K$Vp`g0a+<~V+wnnA`4&)EAuvT@(NHtb{5GgL#WGYRvZmpgu-F1PFc~PY79F89hO0d`G>aDQQv&D z&m}tz_t0e!_VCKIGv~b0pEK30;P|d*KPa3}E(A3LJkXKEp_kdPRn!K&0f9kGPV1`e z;@wB>(LJ`+-1Ii{*645Osg^4a;Bi~4L=3edj~A4WW^~nhkNqOH$|e1Kq!_{y>f^MM zDyoHxD~=MmZQ*s{$E;GFH|UP_0751|dbkd$3-?;3eD8T)?E%j9#Mvi}hFh`XjeAG; z*9OmKd4^^kvk_^PZR$*jXsGxf65bq(wrvqIZLD=c0?(e&C-AgK!-?$Mpu|0!XZM=_ zXp0>Lt?Ixi?B8qsSDRUEOTH`Q&e=3^6IUhL=th~mBla-)tP|Eqe(ccYE38JHf=t@8 z7F7mAY?4w zy+mTwUP<&rgiw>|8bbGn{sLpzgPBCyj~2KLWs!!G&Z3e-{tPQcROr4Vu|Ff&<6}AM z8CdKX$naAxec5l>h!uJxKYb|Ymq&c5i$F@<1ID397n~35M3S)uSDh3E)5m%%@>^Gg zUw(^=H0!bu=U1&PSr;?pCW779#}e3Z|1xip4q?)0b}vt0X`q`>X}jj;*{xDw8gehR z-;;k$MhS<7;}S=ZG*^c@vt9^y`ZL9%Iif?9Mf8lNaL!O~I0ctYBXHUE77W1#gnLK0 sqh7+2Q|$Bok)6K1X6Yb%sZ6GiX^3gkK%t=ot@YMgI^6KzJ5S1g0DXc?y#N3J diff --git a/tests/shader/gen/prefix_atomic.hlsl b/tests/shader/gen/prefix_atomic.hlsl deleted file mode 100644 index a75448f..0000000 --- a/tests/shader/gen/prefix_atomic.hlsl +++ /dev/null @@ -1,229 +0,0 @@ -struct Monoid -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid aggregate; - Monoid prefix; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _185 = { 0u }; - -globallycoherent RWByteAddressBuffer _43 : register(u2); -ByteAddressBuffer _67 : register(t0); -RWByteAddressBuffer _372 : register(u1); - -static uint3 gl_LocalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; -}; - -groupshared uint sh_part_ix; -groupshared Monoid sh_scratch[512]; -groupshared uint sh_flag; -groupshared Monoid sh_prefix; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - if (gl_LocalInvocationID.x == 0u) - { - uint _47; - _43.InterlockedAdd(0, 1u, _47); - sh_part_ix = _47; - } - GroupMemoryBarrierWithGroupSync(); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - Monoid _71; - _71.element = _67.Load(ix * 4 + 0); - Monoid local[16]; - local[0].element = _71.element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - Monoid _94; - _94.element = _67.Load((ix + i) * 4 + 0); - param_1.element = _94.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.Store(part_ix * 12 + 8, agg.element); - if (part_ix == 0u) - { - _43.Store(12, agg.element); - } - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - uint _383; - _43.InterlockedExchange(part_ix * 12 + 4, flag, _383); - } - Monoid exclusive = _185; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - uint _208; - _43.InterlockedAdd(look_back_ix * 12 + 4, 0, _208); - sh_flag = _208; - } - GroupMemoryBarrierWithGroupSync(); - DeviceMemoryBarrier(); - uint flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _223; - _223.element = _43.Load(look_back_ix * 12 + 12); - their_prefix.element = _223.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - Monoid _245; - _245.element = _43.Load(look_back_ix * 12 + 8); - their_agg.element = _245.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid _267; - _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _267.element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - GroupMemoryBarrierWithGroupSync(); - flag_1 = sh_flag; - GroupMemoryBarrierWithGroupSync(); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.Store(part_ix * 12 + 12, inclusive_prefix.element); - } - DeviceMemoryBarrier(); - if (gl_LocalInvocationID.x == 511u) - { - uint _384; - _43.InterlockedExchange(part_ix * 12 + 4, 2u, _384); - } - } - GroupMemoryBarrierWithGroupSync(); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.Store((ix + i_2) * 4 + 0, m_1.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_atomic.msl b/tests/shader/gen/prefix_atomic.msl deleted file mode 100644 index 910e842..0000000 --- a/tests/shader/gen/prefix_atomic.msl +++ /dev/null @@ -1,265 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" -#pragma clang diagnostic ignored "-Wunused-variable" - -#include -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct State -{ - uint flag; - Monoid_1 aggregate; - Monoid_1 prefix; -}; - -struct StateBuf -{ - uint part_counter; - State state[1]; -}; - -struct InBuf -{ - Monoid_1 inbuf[1]; -}; - -struct OutBuf -{ - Monoid_1 outbuf[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup uint sh_part_ix; - threadgroup Monoid sh_scratch[512]; - threadgroup uint sh_flag; - threadgroup Monoid sh_prefix; - if (gl_LocalInvocationID.x == 0u) - { - uint _47 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_43.part_counter, 1u, memory_order_relaxed); - sh_part_ix = _47; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - uint part_ix = sh_part_ix; - uint ix = (part_ix * 8192u) + (gl_LocalInvocationID.x * 16u); - spvUnsafeArray local; - local[0].element = _67.inbuf[ix].element; - Monoid param_1; - for (uint i = 1u; i < 16u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _67.inbuf[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[15]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 511u) - { - _43.state[part_ix].aggregate.element = agg.element; - if (part_ix == 0u) - { - _43.state[0].prefix.element = agg.element; - } - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - uint flag = 1u; - if (part_ix == 0u) - { - flag = 2u; - } - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].flag, flag, memory_order_relaxed); - } - Monoid exclusive = Monoid{ 0u }; - if (part_ix != 0u) - { - uint look_back_ix = part_ix - 1u; - uint their_ix = 0u; - Monoid their_prefix; - Monoid their_agg; - Monoid m; - while (true) - { - if (gl_LocalInvocationID.x == 511u) - { - uint _208 = atomic_load_explicit((volatile device atomic_uint*)&_43.state[look_back_ix].flag, memory_order_relaxed); - sh_flag = _208; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - threadgroup_barrier(mem_flags::mem_device); - uint flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_prefix.element = _43.state[look_back_ix].prefix.element; - Monoid param_4 = their_prefix; - Monoid param_5 = exclusive; - exclusive = combine_monoid(param_4, param_5); - } - break; - } - else - { - if (flag_1 == 1u) - { - if (gl_LocalInvocationID.x == 511u) - { - their_agg.element = _43.state[look_back_ix].aggregate.element; - Monoid param_6 = their_agg; - Monoid param_7 = exclusive; - exclusive = combine_monoid(param_6, param_7); - } - look_back_ix--; - their_ix = 0u; - continue; - } - } - if (gl_LocalInvocationID.x == 511u) - { - m.element = _67.inbuf[(look_back_ix * 8192u) + their_ix].element; - if (their_ix == 0u) - { - their_agg = m; - } - else - { - Monoid param_8 = their_agg; - Monoid param_9 = m; - their_agg = combine_monoid(param_8, param_9); - } - their_ix++; - if (their_ix == 8192u) - { - Monoid param_10 = their_agg; - Monoid param_11 = exclusive; - exclusive = combine_monoid(param_10, param_11); - if (look_back_ix == 0u) - { - sh_flag = 2u; - } - else - { - look_back_ix--; - their_ix = 0u; - } - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - flag_1 = sh_flag; - threadgroup_barrier(mem_flags::mem_threadgroup); - if (flag_1 == 2u) - { - break; - } - } - if (gl_LocalInvocationID.x == 511u) - { - Monoid param_12 = exclusive; - Monoid param_13 = agg; - Monoid inclusive_prefix = combine_monoid(param_12, param_13); - sh_prefix = exclusive; - _43.state[part_ix].prefix.element = inclusive_prefix.element; - } - threadgroup_barrier(mem_flags::mem_device); - if (gl_LocalInvocationID.x == 511u) - { - atomic_store_explicit((volatile device atomic_uint*)&_43.state[part_ix].flag, 2u, memory_order_relaxed); - } - } - threadgroup_barrier(mem_flags::mem_threadgroup); - if (part_ix != 0u) - { - exclusive = sh_prefix; - } - Monoid row = exclusive; - if (gl_LocalInvocationID.x > 0u) - { - Monoid other_1 = sh_scratch[gl_LocalInvocationID.x - 1u]; - Monoid param_14 = row; - Monoid param_15 = other_1; - row = combine_monoid(param_14, param_15); - } - for (uint i_2 = 0u; i_2 < 16u; i_2++) - { - Monoid param_16 = row; - Monoid param_17 = local[i_2]; - Monoid m_1 = combine_monoid(param_16, param_17); - _372.outbuf[ix + i_2].element = m_1.element; - } -} - diff --git a/tests/shader/gen/prefix_atomic.spv b/tests/shader/gen/prefix_atomic.spv deleted file mode 100644 index d7dac5bcca704dbeee8902b63dc1a54d731f3c7d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9852 zcmZ{p33yf25yx+MSwK)g!41Sj6c-i&5f|JdA|NO#?xOLLyo5-S2gwWKPSvi~?u*@5 zyQsBv*S1Fnsti3DPJ>!7ZMM{`H5{vjG@*8 z?fu0{`#R?r67Ngox)EHitg9Z<+&9Csxo^qwt?;O~`R|68Hn;Z-4s>s_FAY@jK6FpH zyuNL1v3-5D!`d5?A?z&G-9K!rPaeU>{;DZm|55mk{Ef+D!*il@wHiE$J-g3sBX~pd z%KgU}}@TO!Tyt^;0OWVl3A2lMjJ~?It zZ%B@V`+THVmC6;8RoME=SAgr2^T64jtr;=DKDl%RFC;~Ht>1;D8ysg`KO(L^>CN$e z_^QE5^^$r=<&N@TrFu!-S0%MAWnaF}d=IFX5g*U+bhtxXJhOS5X|(wci#VTGZKDzM zAA)W~d}l3#`}{g4BU!#zmDiqE{X5pG&2g>l9C>z5b}vggqw5q9_u%t>0OAPV0(P(R zlhE$5c8>bt=X!5!o~sq@h&`EKTjXC1&gPe!-(miZb=mwodXmj&pCX_A$>x){kPB7K zcOAfZ`qxD=nOFOp5K9;{4sv&4_ExO!;`Q zqZvK{9Xa=cyBG7j2mSk^ovVFO%DrQe&)BJm!}$Hs`m|5t6wPSu2cqqZcJs*n7HRWb zGtl~`rT0{_wQh28Z*2T`n8$+Ftqj3Zf?1EMVtOvDRz!EP|rc?GTa{x#)p3% zcr*dQ7s5$4%#VdyyQ$FwYhKk;?$;pNs65t zz3TwiWq23Ze#Sm82bd&cR<#EV~*dPr`SklcAj^kx+;4sOo$t3k%b(2gZ8`2Y zJ{#}%J&uN3`|Mj!Yq%0^ zKh)2%<{9W!h(rIGX#MKg`&nRfY-O!x^x23u&u_2grz7Tat~lnd20J(SIVry(Jf7((?wwGWIpG4moG-NTLa&B@~HnM;Hba(oMZhjMf63j z+fo~9xYn0}*C0`AxpBwPTh}cj;}F+0F7~(<>>jO&dy*S#-R*(3Q^&d6!P#?{;MzKn z=&QDDU+qDxw+`%jT}afV8?4>=ooKo7KJ$IZ6l5wAdnl*&n7;w;Jm+jg_anWCapsg8 zBOXAT%l%c*o=@$)+l0OXap>QS)~}AWuLPST?%ma3ZC4@QJNY1DF6WB#?}a$qHTd#p zyB3@FSnoQp>phs>&FjJ5OYL`IYesKDoTtxs-VJDn``Lni1)|OUh-3Ym!1?uGiA{T~ z|0-~H{a3?XUwb?+uK_zxU-aO$V0)l1`t&-mcKdK6S{}aFgSF|q87&Xr8^HEg-z{i) z-0L@jz1PmsXNXVo9<;-Ipzqy?Huolu zy}cJaKjls6_aU{jHdVh1?}yJs+^ezbco%&DY`nw0&O(0>an5+ev&oH%^*#i4z1a7M z!P=%G?)#%?hx^v|5k#B&7DrD$2F~vD%K0g6A-Vyu!B-lBzPq}gSAl6gM zqi*Ua*rU(D?MU?J4)7GjGsHRX1Z$6TehRED`txb9!*lAp3(@8|#nx^s+O;C*XTaIo zeHN}QYWI1t!<_m)hiEgWIC9Pfd#>=QV~sC>U89vRhyDB_5_Q#Qy(0EYVE4F<-W%6} zxEJl_kMpQq`)>3-NX-8-*tPCR-(_C`Ym4*T3wC%OeP2bic^+}(`5M?f@lLuQtj*fF z?tSR{Q#^z=zx#UtZ7#LnfCteJArAfDKZxCJ+Nnpo_rr1J<(@Rp5jf@i2ebhKYHQ$A4%<=De|lH@5Jog zjC7eJo>R6 zERTNt7i>SAqc8fQ-TKGb)N!^K!JaMNOD}=7c_!!U(@oogS*g7rlo$APsuJfF|iuJ8sV`Y1Op*53_0L|mM8JY1W1)Vngof@5An3*RcoQ3%NCSzOl}U*va6yJO2Ge9=^T6k;lKq$m8zp4fgIhM_;U^ zJ-e1`#`*RE4>2d|yDwZ@+#P+|B7O>3ThwDJT$??!9@D@M>!EKyM4M+6$2pq8aYp}! zBoE&K;CMb-!16fbbg*Z1jy}(5toGRBfne`d?C~JDw&>ZxV269uHv>5YagSni`YoD^ zSU2PK853(93U-Zn<_?2vi@NF4X09QyYiDzrQy#TZ%i}pw@9g_G6JPdQDc@P_EPRi$ zcGO}vT$^XJUx$Mo)TSi;a>DDK(x8GIPS;M;J6?DJy9OMW5J!|ufa^tLNoLim! zy%hCbj6Gl9Q?O}|`Yr)$i~24FJFKt1QxR>}R~+YD2KJosJ8(HX&ZW=&`deJP_uac4 z&r0M|dyaeYnP0uLJ9-+it{1=0SHQK!^Ljei;lA~)M6{V#9Q`;0Y|regJ(RnLh*d}I znc(R2DzH3idloof+q1E0zcc+e#A>j%*yFiihkMj_4x-IHiX->=;C${2uxXEX$%SBT z{(f@aM)XDSUbMbA!^L3jp7%VoJbagcJ(s>UXnFYBz{cyl6fKWuT!E)<2np}P06Vc9z_%c|#=jcVtqYfLu*1i>`Pw~KXfj6sf~4%Ty*3?2)1?yhL}>hG$Fh%xRp n_Op9xkC;8+&dtV5f@_Z&P6j(q`LW{Uz2EyVsWiAhx-6 diff --git a/tests/shader/gen/prefix_reduce.dxil b/tests/shader/gen/prefix_reduce.dxil deleted file mode 100644 index 0ee28e84d3b111e42ef0fbab465a4bf7a9a73aa0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3764 zcmeHKeNa=`6~8a<<-L~>!viuJ46=`}Osb4V&;Wwle2E}1hJ{(Mvf~R72G#(9AoAg~ zA;BbKbc28*)ZL)yX1g;jT6Qbe?SzlDsMtZr*(#!iE@Iblce1W)cd9e(eUa=q&b0q@ zITxoiLh!sLD~fgTrx5mJ6LKFZQ-bogs2CpBvC3U&fzD$;mmT2XicW` zG>A-6FJuYbov{--Reqam-gdtO`%xyI4-dgUo5VH89+yyZyvyuo z+obJ2ITVMWvO2 z^6gz`oZ-@+?N2er4(pv{_Kf!=9V-H3X3PU3MD;#L1LIIsI+`m{C@=e!PFOuHY=I*P zZIKi89>Po$)ox*3P-qGYbqv8eOygg?BdU4uUBIA{;c)C}{~Q_6(c~R2FLxG*k5V zWhj1}ssylP0wiBJU`F*SZ);H4DygtVqQkHX@!XhyGI1R4gdRjqZD!sAB z3KAuDG)79UxHNwdudo8SBwW3NEM-VWGo4GFb4?VvrmkTFBZ|ZcG;G|b&Bu)7;Ij5) z#D@)#3LwG9&MB}z+Vx&}c(uN&CIY+GM}un^Vp0h1%8j)1&^HQE53YM{K%O=-baCB< zUyppe?rg?|b0a*C-_Psn3c331WP1PZXQgrdr}}9?g`S0u!%1+N3JdEBF-tUS`pY^O z6|Yk8r1B#bA6+j- zlVoUeDw?FUZP3{^GB$-7cMq1_p51w3w)pmJH}eav{ek)zQM7zv z@oDCYOg2SIg6|>B(LHkg@_p4Or$??|%j5lSElgBayQikXP;V~aqz3LV58h!iX^EPc zkE#$Mo1L1}_HmcmLNqrK=H5ENA_)DG5KHna|3QR$rQSIQnT5G zZk}p#HJFH2l4#Cym`p^A32kh0m}p1jRG9FQTR1Ndt&xX*?ic!X!enY<)xDUgTQRG~ z^wAaa1L+rS>C>9DVKn2SEu$SxpYn?u8KSk4NU3zx!_nd}xg92kXp|9+-*I8uy%xtV z(orXN)b~1?d)pGf@(aJ36MpLxK9ZxQlLJu;15s0faaJF7B_?X3ZPgV${+U~oHmOPL z4UFf!2Mxi$SJnm#n{nv5a#nb3O5VK$-cSr~_D(y`Y|wDgWaDGV03ZRY#Knj05?lkwO<|lh!KkE3RYy*{Q!#}+Cn`bAaXq#%q6u)o&@rmDu zT4ZYHq|h6XVFx;S1HoY*(|GMy_eTHN(_wYqx_p3ymSi_;+2tf*S$QItk6|a-kd#&r4ctKgJf-KRJ z+?+dJRaBB4fB#6T{Zit+3>Is&5Tsb_xenXt%h;0$?Lq9B{4c?tM0n1Bi9HQJ80?Wd zfuMlBmLDPp$yf~fp8oR~#D07Ha|}{mP^eTy2^gymi=P?0F)Jl!l2xFO$C)#V zhxE6U)Mq`_GB3Y(c(u{kSiQEY+)&d{VD<1}uYT+JY{^Xjoo-7d+vQhWs(_6?LC!Yl zse%2SJ(o9r(%Ex1kLON;4Z7Z7uH92#W`G^~17YO5Imaa@N}_M>?33J`?N*}y@7e#u zJzGx$up8EP1hzGi{SUy0j8y>sdQbeeUnGK2urhP)NPNS+jiqitllWn=*7`snhvRU8=|!F#5{m(8ZPVkFxDecRA{+}X)*4J< zh@y`I8{0<@i)py7YR6~7jHh^r4ng)5rc&2o~wFCML!>{R0C0%GVf2poW$EE?ru5UXA7n~CVsS(Vc#6S)4!DAqn_XcVElSN z*ZjhHXw1X+Q202mr3Jo3m0X8V$_;`-5RYibooxs_0qxZhdv$|U3D*F|m#Jj#(L|dZ zTO$}^_#7@%X!0eF!YGm>as`f$Jm&A4BgXOeaDgS}LtvY?&xQJ!qiv*h-mYM{@?L|? s#Z35sk`lOn; -#include - -using namespace metal; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct InBuf -{ - Monoid_1 inbuf[1]; -}; - -struct OutBuf -{ - Monoid_1 outbuf[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _127 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Monoid sh_scratch[512]; - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid agg; - agg.element = _40.inbuf[ix].element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = agg; - param_1.element = _40.inbuf[ix + i].element; - agg = combine_monoid(param, param_1); - } - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; - Monoid param_2 = agg; - Monoid param_3 = other; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - if (gl_LocalInvocationID.x == 0u) - { - _127.outbuf[gl_WorkGroupID.x].element = agg.element; - } -} - diff --git a/tests/shader/gen/prefix_reduce.spv b/tests/shader/gen/prefix_reduce.spv deleted file mode 100644 index b2e35fc79906afa27664fd65a1ed6084c7d96a0d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3472 zcmZ{l`F0dl5Qkf`5QvgQz!fkdicv@;%4%2y(IgrSh>CldEF>ed#K{ceN>oIA760-X zJU)m|;dYK5zpuN?wBzw|imG32x2kSUr(@Ib<|Mf**_!ku-zC`@NH(EKlHQ~{<&!g) zX2#Z2ta7Fub(9jBjW1 zC~~o0v1g;+&SW2Wx^}j86>d*rPq{Xq>a1lXQ>Ar1r42Iw3HWNMQL5(n)W2|7@)CG$ zWo~VuQED!%WcB+7)IUSr?d7NWFJh1Uj5(<{SC$%Z@2kLz_xZ3z{5g2G)l8eQ7qO?_ zYNpND%gs!MwL4iyrk2a~#{X~v^qsXW4Zpfqm9=JC3-X|Y|tjGF&*DR(ZeNVCaeQ@8` z9FN28U!8Y~M{=F^Cv(}@xWf;U_wFF_Xs);S7jk|0O@d9ni#$_^_O8yn)9`G!vU|n; zp43I}?uhs3Kbzak5&7Rl_8kJhjchN0-$Cvn!ESnXzoLKlNNnCO;Kr!)Ua`BRF6vlE zjyi7VHs^YP>H1c8Ap3GI(h>H+g3Zu=y6-u-}%V-EWKd3{|;Dk3jC4`*o6<>~9cKe+wV+UT6>+f=>h z!P~vr4~;ezYz{)dGt+Unvz7ls{%7Ea zAdmLLaP9J_^9Zsze&Bxh_ZXzkzWyMG_-V*o#wthbab$Z8{4BD$*7IH_ko8fok25|A z*^9ONj^dcnY2=uZvBo%~=OAq{qcgb<_RoyYBD)7MBe8zRnZNfw5Bc}nJN5H^>rn4( zoRj^@qn~rg#eQA{tMj)VvsYJ~y>p5^nnCt=Tim1bVD&MNSCFm4m|6G*$ltzx-XYdU z*}XQG_q__YUb%1m8vJ$0qx}uIc6s!38QC21)^8!J^V{KDi!VXuGFJI#^nObuzai1v zyU6Xmy@yVH-1k1R_x+Ub!yK}^sy=q)3bJw9qOKD16=)LrmA)6?9&6M#52>?8<)~>1 zxxJ>V=+s9|%gDu=R>0PzK58l>8>cPa>47x>wFx zZhSYq1v$r<+4Wo(zYT5bio5Dw#an%Z++O1ibn3s)zeOJ-t8?ewgHPce_dwexkUDEq zj@(}&uS3Q~y3&$qxgp*~38_||-btgpxXx8dJH`s?eg#QNE5)Fc0s zzm&%y{|0Ub`>viX&{5)g;9CoFFS5Pd1NW|e_yDAjcSSGv=KAp24mP&vvjeO?-e)JW Taq2HHKkK;yZ5+=k+#4|aP&VNsC{gdz`T z=O%b6wTk||BKqg`sdTW9M`_go$wr?b2N z?9S|C_M7?6`ObHL=R4ndwgU*G!^1OULq06@Sx7pf6z zDb#AH7ok${LnhRc{6dC=?FA_;dGP*)HeWyVowliDGs8kZv(Vu?^*2z2cl!hY@VP9g z9NK(NcZ+zwf!*aS*{IvN3I~7%La-lp&U>WL$OoYyMhX3Tga4{Z0)QR*#5NkhVL%*I z!kW^l8^#K8O1=63Vwa=}NktO2ubh*u6X*|!^~Wf+ESr?C7pWh16?kN{A>Z{JP^k6O z1yWCE@{mbaY;}iQKW@jC<>SRLSw6P}_Y^-ZLM!NxcFgc5>IdWa*4w;^c(@&??hbE) z0ssnOAC%9<#_DM!>4-uQu$KlP75dc41yS_2Hteb7V|G}4WbgCH78%>!V*4>8watRD z9u8wEl{_C5F|IBIXpc)!ptoS;Z(1DhJUig@+@Y!*ZQ8)$6tX?Q3A~-JZy&r9)k2l7Tn;_VTwfBU$sI5>TB#<9VkC5crPCTx=B*#+8$4j_Yw zI9Z+FS(#D72$bnxwnNq|LAdG0hB7=OrvOvCAW^T`(FKj9Ql90b}^}yI>7k zqM0S?$M6V6G|VAcDU2|OjKEJ`0%CSeNnxD%2(z9wG52~@DeLk;gL=l}oZV2E9{`-{ z7YL@EHDO^$LP>6+;m9SaZ-J5~B@N)om=*c0JS~97V^)~y9uCm+4O)yvLsOm9EmOoA zJ?^Nq2hU&zK`J9KNh*p-q06LAE#~M7(=f8`VahJHUn0R31rV(Spb746J?y&#HG1*| zad{8Ow~8$}=*!EW>RsPrL3?U_h^|kZ)7#RSQJx_>obi{#K5Ntc>n-A~>p!5dI1KdA z;4EH7XOWn$th&Yq(i0r0Dvf;qt?<75N*lCQZ%$l26P$Q+ z=+?E7NE*o*(v3e_k@?4cH+N;aZ|>V)-2unWwa+21osV2v0D__W=7677zch>Ma6Z!g zbm?O~lNjQ1xz4uv_V*n;_i}-y_h@e~4HoTPr8(t`5-_oS%R^t6w}?}qQxMaQ`p8wiUONLkpqfSRx`p<*tac%T_T=XT@j6TpGJX868yKF#JVO=@FCXV^FPap0iP=X>Kq+XWcn+=i(BH84g+O?_RH>1TX0CAJe{7--`0 zXA*wgGdbDi=g$JauNz>!lH`;_VP&NZS?e-n+e2I7OmhM_f1jO8=q+fEyrk#q>Sm_OqG|a@* zFfkv^NM}samDEt;Rh4o=l^85bs$}-&oNmn-Gpv3E%{|?kYe#cNXJid5(NaZds+@JO zjYvs>1R{YKl!CM7_@x2PdVME9GxvE z%akK3<&ZUTqzr%0V^}?GSlvBa4u-P34B0L;rz`Y*IOrW)&P6n5Amkoa5qr#N9p$u` zQGPD0(>(0l-A^=k2Xd#mm>ODo%OnjCNO=R)(f}mGwG)mBe{T75_YV#^uWwyNWw+ul zoO|u-XVOrs?w(QdaQN0UZ^v-yde^X|f7b3lLLV7NaCm3s^gG_!o%BO*yUjImb}tD% z%RRj32^YyG>%EG_LHros<=GAwz-wnZFK1b+2lssP`L9!wIh}F{cPl^ryz}y_z=n7h ze*6Wxk^=s2<7NEkH{68j)umTlb%_^y$Jg*Yat9I+&nw@+EAnn$nTGxaUitHX39sA- z=lBnKrSaduD~k|Q8Y5&}@CN=*<|C zPa&-TSzduR{cm-0iD;`O0Ux5%$Sry7EU833_* zX~kC{dmJOSNW=FSq|So@bT? z_;=<1@9O`ZySj`9pcn3G99ERczw1R%u}V7$NB!RLr;1Bk|rm>sz zkXjlyY4M{`h{=lN#1t#JzyK0UqNaBhrNR$81DUTNOXdO{{9Ry^ z7SEZ>r+Dcga*sbyr3V{XnA7|4T|Ar9MGdh!M^K4 zrn@5B?R~nKi^uD4w{ZfdGOEKW(lU8RyHmAz1{lv?vh9;H*%_4=xJ4v$M5Z-azc=a9 wp>XSrpZZ15Q@4Xt;y0(nFR=M*{Oq7=m=9Ut0cbfqC;b1y$MJh_!|Gf4D+hS`{r~^~ diff --git a/tests/shader/gen/prefix_root.hlsl b/tests/shader/gen/prefix_root.hlsl deleted file mode 100644 index adf6bf8..0000000 --- a/tests/shader/gen/prefix_root.hlsl +++ /dev/null @@ -1,80 +0,0 @@ -struct Monoid -{ - uint element; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _131 = { 0u }; - -RWByteAddressBuffer _42 : register(u0); - -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Monoid sh_scratch[512]; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid _46; - _46.element = _42.Load(ix * 4 + 0); - Monoid local[8]; - local[0].element = _46.element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - Monoid _71; - _71.element = _42.Load((ix + i) * 4 + 0); - param_1.element = _71.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Monoid row = _131; - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_4 = row; - Monoid param_5 = local[i_2]; - Monoid m = combine_monoid(param_4, param_5); - _42.Store((ix + i_2) * 4 + 0, m.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_root.msl b/tests/shader/gen/prefix_root.msl deleted file mode 100644 index 897a6a4..0000000 --- a/tests/shader/gen/prefix_root.msl +++ /dev/null @@ -1,112 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct DataBuf -{ - Monoid_1 data[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(device DataBuf& _42 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) -{ - threadgroup Monoid sh_scratch[512]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].element = _42.data[ix].element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _42.data[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Monoid row = Monoid{ 0u }; - if (gl_LocalInvocationID.x > 0u) - { - row = sh_scratch[gl_LocalInvocationID.x - 1u]; - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_4 = row; - Monoid param_5 = local[i_2]; - Monoid m = combine_monoid(param_4, param_5); - _42.data[ix + i_2].element = m.element; - } -} - diff --git a/tests/shader/gen/prefix_root.spv b/tests/shader/gen/prefix_root.spv deleted file mode 100644 index 3e0422409cf10798205ac282a424700613c33a0e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4072 zcmZ{miE>m`5QcA<1i~T#0WB0TVV6x%77;;+Xh4I2D7c5oLK2sM)%RL4D zgC;*gnuy5nl~Yi%t5U8E_9mRrlyXc-U#V}XuUy5_l1zd3RED|-%Y9u#Y3$OG>hNe! zb!Bbm%F#7zHvQMa-U4?`k|H9WjPB0xZ1mtU?ak>O`b%9MrAl|Pw6lDq(o?JsR?0iK zr?HL6B6JC3DdUUL`I_afOEotpOX1s#)nfbTq4ZvIS1&fT^(+&x#5bY`Q{0%eqYoE{ zi$fW|<1$`IcEgMP{p@Q>_Jcb=OIAJ&HeaRlt?%K;JOh z_c)5q=RSTJFC=5|;mXnUoYUaiIfdj5IO;un*`CJaT#k2;!`AR--AmC+561^Iy!kb1;Jm9fxk?9XwAwi!;sNin!+C zaaL=#My4i7xwbayo0xI!iuDb=62u(WqwSA2Ym(c0 zZTeSa*u7E1O%W_Pg&SV!u7_0@_&nwSs?rql<|3d0%Bcb771% zxqR2(vG^NmjlRG0=h^awXn$*h>-TpdxbN6r$Ng={`~7VQ?tZ`Z;O_T(&vU=`JokIg z^RXQFJiqz8-|s%p{r2R-vZ9=!o^>AxARUmdk?1bfDhfc`P1UtjgYq@b7d0Xe`F2uk8<}@y9yc=wd_Qaax#@ct^z}~6j-ksq5 zz3+i*^EW8oRa^dDeS^ri7i_*gh-X^cy@+=A@6NdK&iMn#0VF@?4`%k*{}9}L?(z5O z5oAANoM+075&Io`mi0Y`_I_&T?jX8|xb%0U^{eClJz&p>x$6UK>qVS9c^7gJaj*Cn z?1#`wKN5Ev0N3Ab5S#YM_c+*mKWB6H1laFJ`w!Th{Sx9neZO(XGTLQ5`i2l~)+3Jm zhr#vvpTwp;@(+Xa`A6X9*B;+X73@BJ@dl${-#}ly(-E+C-=Tt*hwm6zo4%uHdH9|J z`@Z^)qvbKzPlKl*?$KwAvD$sZxVQTM-r*U1^?i62oA#*TIk4x({hkMVKjX|PH^!XK zkiAyBe?0mGVjtu8;)To>-;F+P`MEp@udmr$W7tn3@s6j!_4U7qP5bxRjJyQaHWzV5 zUO~H@5q&Qs+N@g~XTJ)rKl?Ro+T;CS2WxXKqCand?T^0b;hSLXaR<5gjl61or_t7_ z?=7@7sr^RZLBET*^uLGJuZ~{75B7}sMn3>+YeT%R{B6Xu+$)Z~AA;Q*HGc%Q-_~^& zE%%Pi=#P;_hN)z1IfK?7`9A~C&hgK|))RAc9&C*E$oU1> zbK>vFm*8bc3vwQbzZ+kHjdgj43+S&Amgj(C$aRXw(0tR9c#HpJA zNwlFCDzpgp8jyPFm3sK6DxP){NIgWnQm?JFwGOn@+79)o=k+?RGk0&JI8Hk^b2E20 zb8~ZV=6mmZzxTawzxTbj@3))1L2qCSQlyuWccNa)96KL&XzJ2&deN(}DO)bPVGZTK){tXu4{g?m%J{De} z3TP-86u!*hVRyQrSnsN#W(J?J=fQ$PU7-sHfCWO(4IN}}Se?r|f=dM!i0cjht4b1p z{SXVYGofcj!7-67u0=P5t;VU8WiKF&WP*^0j>VoU;$$lY`WM3VJyeP;gP_++Qr5Y0 zJTk^$biE3qQ}mNLQcsI2Xwv1|+!GC-@5L4v@qD-lJ~p{~oL{a;EAWLorg;_hgG+ev zcf5*txUHz}Hm`yl09L~}D4Y{zXZ1`j;SeJT*uwyj1hEuVj+oipgzZ!HG{Wk$#x9R+ zR_TD{}KD}7GSZK~AKqz!b$5qkri zz}xb>vQxLk^~Cv2;<5zw{QinU_>e>)MuNR7qd7k7d#`Ez=1a2tfyr%S*`}(No!ePwTB)DZ%xvQ zQRz-tgNU6PQMFfOWkeMoY1NEUJEK&wBHmyW|MEVw8m(w2E80GaU!z1dcPv>c^l-=W z8XWS9piRZN6#BXAA%rZCi1#StR%U~mjA@TEw8?N)0C37*BA7O$!pabf65LG3k%d#C z6UPH&9mU)`hV;!0Er3TRR5X)&xX>Zxd+bmuf@niH0NOeH`w#&oCAM7_9hvKKi`A>n zVFp2hMqnaK@(H2Kq)o`@m=crZ^B7Z7iTsVGU(k@S5>zpZZZT5DMG+7Br#3S;&& zU>G+uX#}P-msi?BvfP0x)x@L5PZv7WTkJq1pQV4yWo8N1Fq%uHG!M>g9>KZ-X~{t} zGU(6F$I6MG@Wxcck984gK#mU_O~Wob%s$Pm3UgUy6!vuo11@5SC5`+^@V@Q9tm~dUd)E#H)&`&6UNaCDtg6Eo!-zwkjQ>iL=Tow?o9M z(m(3>>)YSXF`jEYO#bQV=Wpa4cCUQ0L;lGX4CzbTU}o@QEndRnPx-ra^L&AMeLDH6 zfZUHn2Lp`AYHDh}c=%#MbMHuV{wW`ui6I6rugjgZrIIaEjoBVv7t};WA`24!ZT6 zsIFFIZLcD)frzgu=`Bv0O6p?=0x^H+Qf$!(LuL)~5p>0nVTBuAF;eT^ZV}ZJqPiTX z#Uk2iL2GKA7RDJp5-FYXNGCKAm70jprlr#+X(|QjRmt%eL2n%O$I z2tM?-J@Y*@vv1Hnq!jJ4pw*PqYC-w6uujX6bB9l~vptYC$wgE$(wipfM4yybKotXk z8s<(oDty*@Ysb%DbbeX9gvw~ZU%zjn#b# z!Qs7U@HsB*i2a3kuiZ6veh&eWR1c4AauKXb?~R@_fOYdz(pAo#!N97@kBJ_>`^xh! zfLBeoe0;)Iernej-~2jG#px75+^zWP8wdhB;<@uO!`O0k1vxU<^5wu8OKHG^Zdp0 z+>iNd4U2i{;ciMo*^WnotwxZukQ&|su6H0_mOtn`XYqH~zsgx^!hcjplagYq91k*T z;&L|o6TT|5k80&}K7EWRVi`h^hP{gqv;fv_!s0&~xO7ELJibH+I>dtUa=Onvrlme; zt4Q=o+LuTnUF^F0(UnW%SFghhS$l4DV3N*z_07PMHXC`Ce502$iG{)n;*8k-Z8?29 zf=nYf(pD^6Nwu|XTV;WuTO9qX=bofHpWJ_? z)$cPnzDRM-F|Jo%zGYm0E-gVj?3w5vYP)#${qxOj7xTnD%i-LT%I(#4rfocb6$rv- za`*rH|9|iPH!}bnf$v2WmYdA~fAH@XHUe7pO1w2YQdX)x-=@>awrRh_Wk#E9O+fb0 zves?dg!i-yaL+nH9JB@?4i^JEJKnB~ciCvm-QEFPyc>z$9*5&_jO|0R@w;j-Qi!KU z$~|tq`2F|&pSTbK$I#j3{uo<&2LZC@k%Z!wDEP)jXi6)y25g zm{y2SRa`eBGDo;eOLN~(k0+Hj#4OeV3TsH5eUl^YE9g9elaON{Yw*`b?}pW8o4-+5 zJ>%@`zr!s#xgJ%PfLz$rpqyTsEd99FPMi#I2Ld&;WmJDCTAK{#wfGyN=k-fk!8J;kb z^8glLPm7*tZ_>u#&zMBd_=Krb2tF7d$lwCWgWVWSK~fI=GChGOg!Irsgw@{xPGUG% diff --git a/tests/shader/gen/prefix_scan.hlsl b/tests/shader/gen/prefix_scan.hlsl deleted file mode 100644 index d9e74ea..0000000 --- a/tests/shader/gen/prefix_scan.hlsl +++ /dev/null @@ -1,92 +0,0 @@ -struct Monoid -{ - uint element; -}; - -static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); - -static const Monoid _131 = { 0u }; - -RWByteAddressBuffer _42 : register(u0); -ByteAddressBuffer _141 : register(t1); - -static uint3 gl_WorkGroupID; -static uint3 gl_LocalInvocationID; -static uint3 gl_GlobalInvocationID; -struct SPIRV_Cross_Input -{ - uint3 gl_WorkGroupID : SV_GroupID; - uint3 gl_LocalInvocationID : SV_GroupThreadID; - uint3 gl_GlobalInvocationID : SV_DispatchThreadID; -}; - -groupshared Monoid sh_scratch[512]; - -Monoid combine_monoid(Monoid a, Monoid b) -{ - Monoid _22 = { a.element + b.element }; - return _22; -} - -void comp_main() -{ - uint ix = gl_GlobalInvocationID.x * 8u; - Monoid _46; - _46.element = _42.Load(ix * 4 + 0); - Monoid local[8]; - local[0].element = _46.element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - Monoid _71; - _71.element = _42.Load((ix + i) * 4 + 0); - param_1.element = _71.element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - GroupMemoryBarrierWithGroupSync(); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - GroupMemoryBarrierWithGroupSync(); - Monoid row = _131; - if (gl_WorkGroupID.x > 0u) - { - Monoid _146; - _146.element = _141.Load((gl_WorkGroupID.x - 1u) * 4 + 0); - row.element = _146.element; - } - if (gl_LocalInvocationID.x > 0u) - { - Monoid param_4 = row; - Monoid param_5 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_4, param_5); - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_6 = row; - Monoid param_7 = local[i_2]; - Monoid m = combine_monoid(param_6, param_7); - _42.Store((ix + i_2) * 4 + 0, m.element); - } -} - -[numthreads(512, 1, 1)] -void main(SPIRV_Cross_Input stage_input) -{ - gl_WorkGroupID = stage_input.gl_WorkGroupID; - gl_LocalInvocationID = stage_input.gl_LocalInvocationID; - gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; - comp_main(); -} diff --git a/tests/shader/gen/prefix_scan.msl b/tests/shader/gen/prefix_scan.msl deleted file mode 100644 index 5be4e65..0000000 --- a/tests/shader/gen/prefix_scan.msl +++ /dev/null @@ -1,123 +0,0 @@ -#pragma clang diagnostic ignored "-Wmissing-prototypes" -#pragma clang diagnostic ignored "-Wmissing-braces" - -#include -#include - -using namespace metal; - -template -struct spvUnsafeArray -{ - T elements[Num ? Num : 1]; - - thread T& operator [] (size_t pos) thread - { - return elements[pos]; - } - constexpr const thread T& operator [] (size_t pos) const thread - { - return elements[pos]; - } - - device T& operator [] (size_t pos) device - { - return elements[pos]; - } - constexpr const device T& operator [] (size_t pos) const device - { - return elements[pos]; - } - - constexpr const constant T& operator [] (size_t pos) const constant - { - return elements[pos]; - } - - threadgroup T& operator [] (size_t pos) threadgroup - { - return elements[pos]; - } - constexpr const threadgroup T& operator [] (size_t pos) const threadgroup - { - return elements[pos]; - } -}; - -struct Monoid -{ - uint element; -}; - -struct Monoid_1 -{ - uint element; -}; - -struct DataBuf -{ - Monoid_1 data[1]; -}; - -struct ParentBuf -{ - Monoid_1 parent[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u); - -static inline __attribute__((always_inline)) -Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) -{ - return Monoid{ a.element + b.element }; -} - -kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _141 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - threadgroup Monoid sh_scratch[512]; - uint ix = gl_GlobalInvocationID.x * 8u; - spvUnsafeArray local; - local[0].element = _42.data[ix].element; - Monoid param_1; - for (uint i = 1u; i < 8u; i++) - { - Monoid param = local[i - 1u]; - param_1.element = _42.data[ix + i].element; - local[i] = combine_monoid(param, param_1); - } - Monoid agg = local[7]; - sh_scratch[gl_LocalInvocationID.x] = agg; - for (uint i_1 = 0u; i_1 < 9u; i_1++) - { - threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= (1u << i_1)) - { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; - Monoid param_2 = other; - Monoid param_3 = agg; - agg = combine_monoid(param_2, param_3); - } - threadgroup_barrier(mem_flags::mem_threadgroup); - sh_scratch[gl_LocalInvocationID.x] = agg; - } - threadgroup_barrier(mem_flags::mem_threadgroup); - Monoid row = Monoid{ 0u }; - if (gl_WorkGroupID.x > 0u) - { - row.element = _141.parent[gl_WorkGroupID.x - 1u].element; - } - if (gl_LocalInvocationID.x > 0u) - { - Monoid param_4 = row; - Monoid param_5 = sh_scratch[gl_LocalInvocationID.x - 1u]; - row = combine_monoid(param_4, param_5); - } - for (uint i_2 = 0u; i_2 < 8u; i_2++) - { - Monoid param_6 = row; - Monoid param_7 = local[i_2]; - Monoid m = combine_monoid(param_6, param_7); - _42.data[ix + i_2].element = m.element; - } -} - diff --git a/tests/shader/gen/prefix_scan.spv b/tests/shader/gen/prefix_scan.spv deleted file mode 100644 index 6d8fe0af951822060532ad7091e8a50b3fd80f6f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4720 zcmZ{m_jgoP5XT?v5+k4i!7iGBAR-{x6&n$aXwV=kHVm6W5>2wW*@(S_*n6+ou+!{a zfAIK+_*dA@(c|ay_6@wO$IF>B-|x)aJ9FpGK2n%GZ@(m&luS(;lb@2>nwd<-k|fiT zhE%t;uWMgAQterK%BjnYI4~)sfyOkkZwk7I43!3kbsUAPN5+vEoS|B%`Pr?aPQjRI<9qb+I9j;<&N~XiR%R^lQ!@ZqDY3#z0YGt&$ zx}>&q$>{RsEB ztJ2tFG9NvNaWLZx(D|C>u6;EZlZEhArD~~lbW?gSxvK}8+IkiVSmKk=11T;ht?13A zN@*zL7f#@XZZ{hcG-l~T34zjhzrg!{Bxa@}>(TpRF5t}?h>?eDFi zeUB~ZeD2#P@Io>MuavhC+mOCHdDfRJH@8*FqnmA*H9ba5N2$W|dHdSCYIAY6&q`Dw zd7@?l7n5x@-`+dFIe{0F?eJR7g=7afdb4XnTrt^`<87SljQZ~Ci^&vkH4pAG<~Vc% z;%pidZ?!L`klOq;81K8yMO?vKYBc5c-<(Hg{_%_-k?}{-`|=gjeql-uL(Ct&%IDV> z`R!Xizuf$j5bOC7ZGLBb7Qep9=oWUnUwkkz=5Iz^N8|B#ShE%RMIx+Hsz#eSvS7ttK=D7;(ENZt7xxaI5(JN=nyvARj`SR!YVDsG~-zKp4 z2;K*7o0`(SDDE>-XCk+<9oN<9iq zG4i=cGtz>bOIPiowunCgtZgOH&h-++S@$=r&p6+FDWcEZ)_Njh|BYLlKjD9wP5pgAf9P$S0LKme|g4@ch0Xt z)+6~jzc#bS{_EiGbC2J_8xX&h#(AdP81V+QXIbBkXz!&9oArnz|7LJ~{#&qVkNg#IKK}^Z{MzI9QU$wDU%bI6*f-D@?{q6z zyYEm&%fojYSew2rXnFYV0QwuI~g-N8F>&7-O~jhH-E8|GmRq`0D#`H#Y52 z!#!Znjr-jT_I}2hQ*MkoogsUzc7FrMdBSF z0FNVckjVESSo_XwMjirdGsYQt1nqK0^gWDdvu<&m{Uo^l?5D74kN1BXtj)QIZ`m{8 zCy*vYzjZA@Ka1##Z`pI14VC7cfqowGj_&ooa^vhx)ToXcUjWy?-;3C^$NgUdYm588 z0(N;0yuUckeidAQ_G{R*$NgUiYxDjwFK?v&TK(I=a?iXT{TAZvxG&=02ET=P z-#5|nnDKYO&ba63iy6>vPoi$MzU^r5sqbC1cT)R}dmsG);?n;iTE9AG=OeIZ#5e9^ zu(lS&e#zfMJj=b}*t-+#-niFpuy1KyyU=p`(1`v7nUA<9;y=yo#_s{kqYs~f?Stp& zGiC=`d*uHde0Yw30k)p_-F*o*MtkJ^3hX&CyI+GBAcG6 zjkCA*+5V`bzVE^LdVYXwi|>FwZTYve&wulm_!)?QEAI!l53c=@)9Hi#oSMVaz~(*x vZLR*UW+KK|Yt(a4W{;RzaQEh8X2Z2d4-W>rPkRURU_FN*+W#S^*joPu?k=7B diff --git a/tests/shader/gen/prefix_vkmm.spv b/tests/shader/gen/prefix_vkmm.spv deleted file mode 100644 index cef3965f647503b83d6f6a63ff09657add1e08b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10016 zcmZ{p2bfjW5yvlVLqP$>3gRNNF47gTP-H<|P*4;*>iSsT!piPG+$b@7>P8~VG}6?)pbi`}K(O>Nz!)y1x4 zXi{I9*Sd7Y(y4vr)l+94Gu?!HCpDG4{{L`d>d>`FccHUK$7o~(yxd5M!B$k28s=gq z+aPL_J(AX?mF3fQ``SzEi*0?y?m|zwv%QbFI-;_+-K#o#itNVN=Du=oe|vdKb>@`*88c?@&M>!zz9dNj5f4SLs^B4&eXZ$g zTUu%_bS>%GfGT&EdX}7oo7lQ!0=jQ)+xkLpxvg_!we}{sY^~akUA5LGM_{ig7s^Gn z-PCq;bQRX9>-nFYJuIwQ(_36KFxGyq?=5yz*Q`q>VpsAl=9NE*8ky`-nQuzS&VboQ*O6yy9wRSIuS zvP>W1IxEqvQ|M0ZiwD>>$y#joPi$RM0@Lfh_TECd9f!I4;Ud>f;8J;QWs~N<89ST% zmYjVLcJ$l)Td|89+q?SvIyZPOb#!qXx~o)L*S4zAzOHh@@^#6B@GRBYyOXO;9)Y92 zN=ny%40~7p`eet>IdO8;9y|@7t@D`yc3txP&UtpCI_Kp9F}2C718nXyhZ{XT2%OD7 zI%glAvzrFk4ascm&YrX{Z3AncGa$A$nK!_$OIomfKGLg7=W3F(;Cf4!gE@b2_RI#bL6wX z*?ijL$c3upy8&R&4?^~37w%&mvz;&Ak2PJd5pj&g(}1pWwUyp#E92PT9x1lxiu3nI zHzKY%DYb`#9gWx{(2?^1Y-=&UHRwMW?OgfUsqGz!e8!GJ9L67l)+axTT{NQQ4@Y}0 z^5)T=kaF&;DfNv_@0oFO$|3l@KSv;And_a>PWbo{9|0 z*wesu8N2c$VDql{qwtw8<{u3nLV~c5!)CFa_8d%mJBIa6MB8II>zSY0^6qT`+I`xG zg=lT#PC~n;ocq%DPRi+TO|f(1Y&*b%GIl4}^Nu?GhMB;+e3w|4oca7=;~B}jo^~na z^!r;yzjGqrRbcO3*tddTrGThs3%2!!{Wb9XjQw@6Ib;64VD~v0)^+Yfyk8UPO(XhY zB<_#jpX}U6u@|I%Yi*$p{r1>rH+t+ds@;fpBij1p%&YBlDmR&%;=T7dwMX6=WfnI@ zzquVgD=)x!f6U>z`+LIr!}j+?*z>_H8Qb_}8QW)fWzP0@Ld@5{He);A-vwF!O*#AK zoPA5icD}z2vh%lQZ2jAE_JcXwZ+~|Fj-35;&VD9m`_0eJ_j{kU{oZG7zxP@Dpq%YD zKkN6KpSAtwXKlavS=;Y?*7iG}wf&}NZNKYTyO6W}u7}@#`dts(^Yq)Ewf(+lZNKl@ zE!;BSv11Y6!#*2xxpVdCLlODgiD^WSL*$Hko?AK|?RbfJeTN}(-d(Y|eOFFFe1H1h zc$QVoJp+-ShD=9%4(1U*0j;kY@g4ahxgw`}J8Q`+zBA{N&3wlo&NY|5dDQ1KBOm)Y z7A$uo>p16lM9z0@^x*_>z7MnEr^=79L9P6OA%wu=la^liG3H?Uwe5P+VfFA$C}I0 zXCMy!XQK71qxKbGbKK2Zjp(xwIrs0`YoCso%ems1dp6j)VV{%Q*Qa|q7hBHyBIZ1> zy$bt$usNdF7l4hCk3L;k!AW91=Xtj;Lgeki%G8ejw_!*B&F38Ze+i;5dR<64)XrY7 z0$+?oueFUkp0jn`c4Qdhn#M(qt1CE3?1?pL8*AS^1AC{AeHX#meXqfm>pw|9Uw2 zSpN;+?D}uSc76GHZr%iTp1wGXH-kM3eZChP&~HKHrz3G5H>G@>>07boyc0L0wZr!| zu;;Ds7PNNU^S6V&=g!d=cS_!~jQy)$>pb3pFW--M!pTPs?*dyx?DO4V_i3DWPTQDf z#67yUcgo(Yqp$A)+t+xHycaAN&yzm6>^=2P#o4_NoUir$aPsku`~X<)R>XVpA+*DL zq3?r;oVAIgwhx16rncwy5v023hRXNiHtb1=wHm9AchpD0#yhNaGWui4ZOCxM9%&ml z5t)X>x*rF-Zq)w?u$<4U^?wTOuzr1?MC7br9B1@taJJshU`M^yumy>FZwDLih#1(u65x*P0pkNQ4`$hk+c_xc|6R3!TL zd2qIGU%-}&zI_SoFsHsRB68*wN6xsf;Zw&NUk1BI3s16V`V}O4s?T0U>{r3o_!MVt zTnBP5B5$u^KWf+BioOqt`QHG$)*Z}mM1K>Ji~ZaWcDNsX-$LZvk2vyt8*H9KKJLJe zz;ba1egbxwTi=fnIdh9+e?JAghd7g;f#Xc{c_vTt?yX1v9MRu_Jc{@nK7y8SM&j&a z9d-Ua8NY9TfzP?q(|PvqF|hpZbE?1Je~HhS*z>Qza<1#M{W#j;p7s42`3vHn#nI2- zfb;$QEu4Jh{vB8@`netKFt@(nBij*ki=&@UfTN$S;3tvjr@k$SJ!nEdh3KDwOhe** z@&~X!hc%Bt{}I`Ncpmmj+c?*4K>rERukY#94&SrbTT|aNXzlps{TVzAagILc8!PWU z-HvvTYVEk6_BHnWS8(=R{)R0#74cmDfp*wCeSb&f%qxy_`6oDkF8_j)k8}AqST4@x zIk3ar`u>B+nOht+z5vdi%Zu1?F8aLFaV{@`_2WRC)v?FH_}pW>t7@?2+?(_D$%(V?D&I@? z%QG?ev zJpL_5JMPXQVDFA|^u=28*|l6V_BR%s?eC%3a&dR`$who)8Xx@_hb`xs*^lvHhyBoZ z7$WB$#j%eG;Mk*ogVGM)5#V?}n!wtz$7Zm5bdEmvXsmqHI1%i8+}J3a_(DfZSgIT*N*pyTKgfi z@y5k_WEQdc_sFqu@=?oiV7YjYoB(!Mi@xI#IdhBSzRU&3eev&;+TlA9yp|mH-@jFA z$6cBa_AWU`pE3TuQ$FS|09SKY{~M-u+=;jY3*r2&7;{d-HjlrpTHzOgtKR)4e0d-v}}yi?X1`PAC?!<%3ITKDvHVqGtO$1lg0i)Z*u zu*16boq@=iR~&ygSOIQ9vd@U;DQ_(?M;+&PHaPCnSzztx@j2jpkI#jZzmxqplKVWc zT-162*kP^u&PU{|RUEl51m|;K1ScQg_KU%C{>E}%AG!^@gw_}Pz630v-S14g7tq?_TMaf|Uprbmo|_`r=f*kuj4_YAJ&nDo^>v`_g}yal_pSEdN1f#rztbYY~7BOL8iETacd*&*zG4iqI)nN0)@0)ADn-HHdnP4xFv$dh809&etcGJv;mSax#c-{vh^N$6K!2 z2UO+1c^=EU_H$1p@KCU8?}fJ35$KVKG1eOO?33~lvoE%DvoWKv<)erDft@G6ocmxs N`y=wNu%_5r{|}G$(MtdT