From a0648a21535c8f7d94163216282fe5386d5e6e59 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 11 Nov 2021 06:59:27 -0800 Subject: [PATCH] Portability fixes The MSL translation of the prefix example had its bindings permuted; a flag prevents this (but, as is typical for shader translation, potentially creates other problems). Also use explicit unsigned literal to avoid DXC warnings. --- tests/shader/build.ninja | 6 +++++- tests/shader/gen/prefix.hlsl | 30 ++++++++++++++-------------- tests/shader/gen/prefix.msl | 8 ++++---- tests/shader/gen/prefix.spv | Bin 9792 -> 9760 bytes tests/shader/gen/prefix_reduce.hlsl | 8 ++++---- tests/shader/gen/prefix_reduce.msl | 8 ++++---- tests/shader/gen/prefix_reduce.spv | Bin 3504 -> 3472 bytes tests/shader/gen/prefix_root.hlsl | 8 ++++---- tests/shader/gen/prefix_root.msl | 4 ++-- tests/shader/gen/prefix_root.spv | Bin 4104 -> 4072 bytes tests/shader/gen/prefix_scan.hlsl | 16 +++++++-------- tests/shader/gen/prefix_scan.msl | 8 ++++---- tests/shader/gen/prefix_scan.spv | Bin 4752 -> 4720 bytes tests/shader/prefix.comp | 4 ++-- tests/shader/prefix_reduce.comp | 4 ++-- tests/shader/prefix_scan.comp | 4 ++-- 16 files changed, 56 insertions(+), 52 deletions(-) diff --git a/tests/shader/build.ninja b/tests/shader/build.ninja index c135fa2..f4dc4ae 100644 --- a/tests/shader/build.ninja +++ b/tests/shader/build.ninja @@ -5,6 +5,10 @@ glslang_validator = glslangValidator spirv_cross = spirv-cross +# See https://github.com/KhronosGroup/SPIRV-Cross/issues/1248 for +# why we set this. +msl_flags = --msl-decoration-binding + rule glsl command = $glslang_validator $flags -V -o $out $in @@ -12,7 +16,7 @@ rule hlsl command = $spirv_cross --hlsl $in --output $out rule msl - command = $spirv_cross --msl $in --output $out + command = $spirv_cross --msl $in --output $out $msl_flags build gen/clear.spv: glsl clear.comp build gen/clear.hlsl: hlsl gen/clear.spv diff --git a/tests/shader/gen/prefix.hlsl b/tests/shader/gen/prefix.hlsl index c0600e2..3af5a96 100644 --- a/tests/shader/gen/prefix.hlsl +++ b/tests/shader/gen/prefix.hlsl @@ -12,11 +12,11 @@ struct State static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); -static const Monoid _187 = { 0u }; +static const Monoid _185 = { 0u }; globallycoherent RWByteAddressBuffer _43 : register(u2); ByteAddressBuffer _67 : register(t0); -RWByteAddressBuffer _374 : register(u1); +RWByteAddressBuffer _372 : register(u1); static uint3 gl_LocalInvocationID; struct SPIRV_Cross_Input @@ -64,9 +64,9 @@ void comp_main() for (uint i_1 = 0u; i_1 < 9u; i_1++) { GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= uint(1 << int(i_1))) + if (gl_LocalInvocationID.x >= (1u << i_1)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; Monoid param_2 = other; Monoid param_3 = agg; agg = combine_monoid(param_2, param_3); @@ -92,7 +92,7 @@ void comp_main() } _43.Store(part_ix * 12 + 4, flag); } - Monoid exclusive = _187; + Monoid exclusive = _185; if (part_ix != 0u) { uint look_back_ix = part_ix - 1u; @@ -113,9 +113,9 @@ void comp_main() { if (gl_LocalInvocationID.x == 511u) { - Monoid _225; - _225.element = _43.Load(look_back_ix * 12 + 12); - their_prefix.element = _225.element; + Monoid _223; + _223.element = _43.Load(look_back_ix * 12 + 12); + their_prefix.element = _223.element; Monoid param_4 = their_prefix; Monoid param_5 = exclusive; exclusive = combine_monoid(param_4, param_5); @@ -128,9 +128,9 @@ void comp_main() { if (gl_LocalInvocationID.x == 511u) { - Monoid _247; - _247.element = _43.Load(look_back_ix * 12 + 8); - their_agg.element = _247.element; + Monoid _245; + _245.element = _43.Load(look_back_ix * 12 + 8); + their_agg.element = _245.element; Monoid param_6 = their_agg; Monoid param_7 = exclusive; exclusive = combine_monoid(param_6, param_7); @@ -142,9 +142,9 @@ void comp_main() } if (gl_LocalInvocationID.x == 511u) { - Monoid _269; - _269.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); - m.element = _269.element; + Monoid _267; + _267.element = _67.Load(((look_back_ix * 8192u) + their_ix) * 4 + 0); + m.element = _267.element; if (their_ix == 0u) { their_agg = m; @@ -211,7 +211,7 @@ void comp_main() Monoid param_16 = row; Monoid param_17 = local[i_2]; Monoid m_1 = combine_monoid(param_16, param_17); - _374.Store((ix + i_2) * 4 + 0, m_1.element); + _372.Store((ix + i_2) * 4 + 0, m_1.element); } } diff --git a/tests/shader/gen/prefix.msl b/tests/shader/gen/prefix.msl index ecdf8bd..8e402a9 100644 --- a/tests/shader/gen/prefix.msl +++ b/tests/shader/gen/prefix.msl @@ -87,7 +87,7 @@ Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) return Monoid{ a.element + b.element }; } -kernel void main0(volatile device StateBuf& _43 [[buffer(0)]], const device InBuf& _67 [[buffer(1)]], device OutBuf& _374 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[buffer(1)]], volatile device StateBuf& _43 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_part_ix; threadgroup Monoid sh_scratch[512]; @@ -115,9 +115,9 @@ kernel void main0(volatile device StateBuf& _43 [[buffer(0)]], const device InBu for (uint i_1 = 0u; i_1 < 9u; i_1++) { threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= uint(1 << int(i_1))) + if (gl_LocalInvocationID.x >= (1u << i_1)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; Monoid param_2 = other; Monoid param_3 = agg; agg = combine_monoid(param_2, param_3); @@ -256,7 +256,7 @@ kernel void main0(volatile device StateBuf& _43 [[buffer(0)]], const device InBu Monoid param_16 = row; Monoid param_17 = local[i_2]; Monoid m_1 = combine_monoid(param_16, param_17); - _374.outbuf[ix + i_2].element = m_1.element; + _372.outbuf[ix + i_2].element = m_1.element; } } diff --git a/tests/shader/gen/prefix.spv b/tests/shader/gen/prefix.spv index 170a56967f9c8b7997b688536dd270fe3d1868bf..b934189c036bc18f0cdd7e5e65ea5f7aefeccf91 100644 GIT binary patch literal 9760 zcmZ{p2bfmX5r*%wyDSO{DA+*U6~zLmAY#FWh=8KlyW+Aeu)^-@7VITzsxiHoUQJPB zNY^A9F&fiMVtO%&8j~k6-6R$gP2Tt4`{CU`kH=@8`M#MsbI!~;_uikiam3V?qG%|# zEJhXAG!(Tlz8Ha}C`K0}YkJYrQ zp3ddnwb(uT2l@s(24)V;oH;ma*1Z3>Ft?F9i=rJ7k4CSk;Ze0cuU@x&X>UjSx+Oj9 zQ3GAQJxh+pO>A?qJGy_(@{8L029|eSI<)s`*s6P#J6_ytQ?Up9l!5kvPPCnpJ65f0 zU#%X=f77q1XK#eeDAsYn%qMPYidQB`$l**_f2(t z13djU|DEv8OFPyL_IIuKT$<_PJ?M44y=#}RXzy5CJ7Mk3#YXHb)z!Dz)>J%%js4Y9 zy8grP7xRxS9@#u6CpXlC$FW!U`NS~ZTs*aTo}Cz)^X#ygref1D-dHqqxasKxa5evS zb$pjP-ZqT46m#KSJ!M~(58wNN!(y9?gNO0v;!wEHM|oB0Tw}2eTVL-b;HKhqaP`d2 z9X7wIIDZ&#EZX5i=iOLzfm7q!VR21GcOCD8FB=@FT~hC;+|fHYP`jk=YoN3(Vqd6Q5^zD%_zh&uqQTG}?TJCC=wn+bG2Rd!k1ozOxp?eSRGikt*N5#)qC){V%RH zG{?27bL7=I)x9jDM%QUX+=I{iu81SN4eVa!N21;1P#sO1pX(j4S=W5DBYQHxw&Y&` zuI87U-(miX8>;zVJdUy{ zPYZg;)>Jzixza{`Ta?%{Q=Gpwx)pKF=_MZvcC^CBp_6laxO*|bd(giV+PT{2mE1d) ze8x^j9LDdA)~9^}RkWhD?}qkVw3|onw@90HO+)LOQr=UsHf3l0-oHJNDa`Or=+|cM zz0vkdySe4w6>a)wl-N1;K)nyrP~rX{Fh2fS;86sG&xMn0Gd~1s?ffzvzDIuOv&A2Kic)QIZy6$rELZ$>pk_kvS!a%naiEhZ*GTo`&lf_ zXmfau{(f-3;r@OI_dNX#5pMi(74EZkRvq_uLE`nVs&MD~JD}>nzK-8m$8V}|=WnQR z^ZQ#M`SklcpvpJaaliHPJKt}8l|Np`pQz)0ZC69rycQ^-rkde&eftzwuSx zR>%FuSN(q1tK9E;mHQ2^a=+tM?svS({f>v*&-MzpUw+Hum-{`ha=+*D`P?$!iBk~Y zn?4JNapy*&cR{q@N=z%d4bf)IGu+atXvZev_3esi^X`hx?YnIzG9B?f@Fc66dtXGm z_uqW#!^!D8SKo8wNG|oGtg#xl_zAIDrLE!qj z55}fF^X7rI`To4N%ylT(eEMA5=TTeMJ{qiTJ}WOoAA>AF?14VxrlA)h`rMD_v>5SO zHtwWyttDXN9L9Ox$0EiW=lXKv#6J7>(;hBGdmieiS@Q(+GQ^?(M6`Z&_I?uB92;1x z6@4>lljdy*S#-#r6+r%v4+;A-8SaBVA*oU69#xq1d!Zw=V>RwL<27g)RVSE1#` z`^@(slaR?s_Rw3}GyfvE^PF=rx)13_j5DX)7;!(^T<&iGZGCF*-FoyTh(rISX#MJ} zeHqvsxp!B9wOx*Q@8pAsxtuGme=nrAEAiFWb`>`5S?_AF>)l`8&1=BkOYL`HYeip+ zI8UGNyz9^o_j4`!Wr#NSBhLCafa}+PIX3NC{}tfs`mcn$zV%g9`zMIhU-0Rnaz1PmsXNFL?pxo75N+;ToHO|dxVq1e z!n04;z6HrXKL$45k$rv~?40aVZk%V3_0;n8P5l^q^clDn$@$y{o`hIK>bxDSJ#~Hp ztS#sBNwC8@_1%GJvre(S+kkeh`rZ<6M4R9J-G?@p+IRo`=m!vo{;#3+tKGwc=&vHi zX&2}0zYeZH`)^>=p0ocZSX<8i+hB*e^?eJ`W^Qrn`wrL|();g%(|djPKC#~e>$mQQ z&~j@@UUmIDuzHu1=ljGt-#iba?h#* zp8XV?_T>H9c~3lA+EBH< zjzd3&Jc8J3dm?`nF-HsfH;8_HkC!~Y-@@-KeNUj}`F8ydJO*)&KIa>&-E&KAYI*LS z{Yh= z-w<{&@hKeex{K?Q3Qqrv)ej$^>u9M$vMi6%lcb`Hxidx$HKLFN4?wQ z!47NHHxAhru~xBpd_K1UyN+kzy^-5<=Ns#s#7+boW2}E)k;k_^IC=bAi#&H{2e5a? zIr_4e_Uc-$nfi7FZ)8sTyAxbn?v6ffiJt`4mVQizYxB(P#}u%`e(2j7(PoX})X@q~ zjsA^D9^bCuJRfafd1{;rwnpdZvqodJXOFvqy;s@e?r?26vpv8L_o#0gvM1sm#pd+e zvmavLjMryO*4PW|8hPgShHFdT^l3BKMzCvFbD2|~-l*kyPSo|^$*escU-jE4k8cL} zVb)6RGvV5-#q-%0?67uy`ykq^PwXD<$1m3|&wD~G&+iuF@}8JQZ2fy;Ha6|q%l=?( zc~2YwcDNUPa}aH=Ew(0~orA!+5B_~n9^WD0Rm9k5|5hl^U6}{=t~f`ZG5)<#d*&Yo zPHz8hEl)2G2hV1HVvm43-`_zCu^$P}H^kUmu;pDdA8x$E9?d`>1$R!~HFD$ZY3f#2 ze-EX<3$WMs_h@X|)8B<)ZRzhKu*3f9I|k8af5oYDG1xlu`)&z5b?M7~wda{g9<}?w zoB8Hdztp`wmRQ%x@9pE@+VWf;4|cd;eM=E-<`w50P5^sWo~LIgcMpkGC-y{e&UYDD zo}QiruJ7r|*tFkX{+r+wu(s^+G_b=x>N^$D<{rh#eFnHb_nFwV=bdpDSew6-oOdz$ zYcOd36w>Wbz1Uol%tplg# zwP3mZ8%3@j@DXU|B)%7{-8#C_^7P>%uzfI(K4ZGj+Oz&3cn&e)>*4Mv-^@$E#%Rx) zmx9fc-zb-X2N0hnC@(X z&&>Ju`T3%Ozu!01{=b^nzn{h<#<;C{cYp0k1 literal 9792 zcmZ{p2Y8m%702I@g@OzfCt@NR2SZRq1y@7`6bIs7A%rAEl6;taLELERuC;sEWv8uL zYwgCirfRiz+1Bo1wWYR?-K}*{M}NQXdr!D|`*?lMbN=Ul&OP_sbMO1UAF+1K^u{c! z$+pkNX4ltbl`%OR1CwRrvWALYyllm?*@NZPv*#T)$AAf0Z6(f_iNw^S>yWv+rM^`? z{he)nmDmFY%L79lLWQb&95(*E_R za!;v$=?S=rt)BAPy#+3=otqawpnOX*;@l+q=~b z{3mCB+Pk|4I=e^5+N*T~on6&6>$3x~EBTHe>H;#mvBHTf#dVa1`pcaIRbMsU+=F=y z?t6w#hW$PS9h=fO4?FgEEV$Q(eHx=x=v7_JS z-;CY4p`&+buxGvJQco9$(Y>Y8+O}2g9cwEmEMK4956@CP10!5r_6QvHRZ_bCquAT> zH)M~E%!!k$_FxNqTIZ9a?E38Kk$HBaI%n&sn7ZtRQFd+C$l*p$cLAsQcPrS<1$+7^ zyD>WgyQe?zOWWw$j~o?Smn|G+*JsCI`+VeAmCn^>%i#t}mxJrF^T6quT{3EZUAA(R zU7K}aSI@gPTMLdo_Kk|G%Sr{ijD7l0xpGOpquP$rP`PqRt!sVGEv7EtRlWz*ONehl zJEm7vg^Opl$eBj24sot=KCg0P5%V8_Za{ozEy4Erbxc80d)JCxeO~o%TdO+9wbD7- z>72Bd#q7~_Y7uMjdEWz#~-NwEVtk&qdxm+J1}V+*b=)-?aSxiRF~N@O%IEN2W2uJE33B+y|lUm%O>P zy(@D1XXe;B_CP%wsYz^q5Evi+x!|z`ggqadWFz(hOnWz#d`F}0p`7a-liTvvejM6; zxX)I!wsFU!T~p4wwHM`_{u6WT+&H_{;F`ql278`S=cQniTGv{jb;+5}@1OmacRg+2 zM{@cv%dvAJ-<4qRRoFL!U#5Vl=T>a@5dJ&BOOTo5GtWJU_v1kJ-h_S_iSzgS& zSZ~<=ehAz1^fyG<#-EbdK5OR}Y=0L-y#B7lcD}y@Qvarc?Qen9e^X*R-`@hMe{*8% zA1>JU7i_=v>3qNSslBCOKUuK-#;5UH3%1|))bIB_wRb7le&bVrbHVl-pZfjAr?%hq z)b_ic+J3`R+wXX4`yEg1m5FUX{f?)8zvZdz_dK=zo@=*q%X}wJLws-gEF8_9Ye4Uf z$lpdx6S^6ZGv--t>2$PXEAjgFLFBx`Bii9R3@o>aI$ZN`M9z0*?E46Cao(Vke7 zwz2lzGq88+*mnmw-FGLp+-fAwRW3bO&mh)Y19rV`Bzn>VmUn&^THAP^`F>pn=-+_Wua32^0GlK3-Bn<@ zD-rLV_7Gw&=ZcH(h1lEG_=l@^=!`guWgz z&ql;|;H%IM>(h4wB4>T#sOQz-Vm+^claG2{3r_2K9k%tzM?J3x%g6Kd2C#YLTKkYlrX6U^#s^qP4^K7O>~9?gze9d)|)tw_{)8`yY9)OkDDIZ>y!ah^e}r`C?Xsh^<6+2}ivIG<00i|2DEoP6x_ zGhn$mpU;9F?o;1gh@ATr+q=8bt`)ue960UW=dtCYcV7fM%&G4Sh@3gak#i2%eT7dQ zYkUbD_n`s&WyCsK`Ohz2uzBKLbRSsGp1JNYT26dF+Wgk{0NPw?zYPzfA3_}ZzlGMXwuXn% z-$aa)7suIu8(cj5@4(5&*?$)-7ia%{u*2N?zK6(}TO9lQ0eBdR-v1CBz1L^&BlbsN z{qFS=f}i3-#m|^wZr!luw3lxr(iknlh5wY(GK^e?`OzUi2D*p z&wc?e_UxB%@{#*jV7choZ@><7>-#k#XKr!S_*?KWV$V)QKaNDt^!Z*ewgvqJ@;DOj ziQnZMs#5DD^zV_!khtgC#+joL{Rc$9{o0b-;rk=@=G^xrT06d7e*%w3oTJbA#>#tc zu{X8$gJ}0^KVz?d2B+uo7i_uNi0AP)w8Nh1`zs=69&wz<-@(Q6_y?SPoX0=Ga&aF2 z20P5H?_Y?Vxy4cAf55{?oX0cZI1hcR5o24>&m#KcJf6!r)W~@}4}Kbn^UyXfdj8+s zZ$Gz!wZr!!IL_k*uy&lsOJL8#Ir`!}k29$Q%f*>AfE}KRzIsH?+~TO)zuOHH7yrK84qI+M zT*PdTZO!qUWgNC~^6_t)@!&pAWK))(hE0W9Zme?C_evHhDzoTIjJvHm2m z>&IRvW6Mp2^KS11cDPr4J0epM_bN7z&*#oy*YOOzH`?~x`NldYVs`}_W2}FF(GK74 z;K<|OVzlG#>;d-fI7eTsC7-V4nz6q;sNH`Zpx)@a+qZ=VLlpJNCFA*gZN&pL;Y`K5A?M55q-``(w++nH>mr zSfjoJkb@9w6r1xA{BscdX1qRQVvQN#Vf^vT&BT_AzUh-QSEYy8X)beWM{m^H@tmlO z-^o~e7C!4U_oHa-@XZFx#oiCbmUAzj&s?y>z3ZEU$hkkUH9Ux4TV6Zf6Kd@Ta=&r$ zo;Z});(KBqoP5-B7+5ae6NiHx)}n7dBInv-_vEv)037$hzaMIc?`;6ar zr(nmv^hLe$@k~S>we{c2eDkVb>E13Q)^+0d_Nmx%@m!t`c37{z(-1lHisKy40DD%R zr)Q^a4H2u3*fYU#zRSVd(bE;+Vo%S4lfNVXH^JFpxv246u)`YlorB0(qd0P(4=(1u z08T#M85e@({GH^y%g`5L_o4O09xevUyWjKB+Tm*hyDxp0ptZx-4mMujO0;%78>_%R z8_v;ZjCthkW$a0X2tTpP{G3O&@Cv4}YF*{?+M-QifohN?=_rZF0LF8XzO|iB950*QwWB>pF diff --git a/tests/shader/gen/prefix_reduce.hlsl b/tests/shader/gen/prefix_reduce.hlsl index 837a75a..f2de539 100644 --- a/tests/shader/gen/prefix_reduce.hlsl +++ b/tests/shader/gen/prefix_reduce.hlsl @@ -6,7 +6,7 @@ struct Monoid static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); ByteAddressBuffer _40 : register(t0); -RWByteAddressBuffer _129 : register(u1); +RWByteAddressBuffer _127 : register(u1); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -46,9 +46,9 @@ void comp_main() for (uint i_1 = 0u; i_1 < 9u; i_1++) { GroupMemoryBarrierWithGroupSync(); - if ((gl_LocalInvocationID.x + uint(1 << int(i_1))) < 512u) + if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) { - Monoid other = sh_scratch[gl_LocalInvocationID.x + uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; Monoid param_2 = agg; Monoid param_3 = other; agg = combine_monoid(param_2, param_3); @@ -58,7 +58,7 @@ void comp_main() } if (gl_LocalInvocationID.x == 0u) { - _129.Store(gl_WorkGroupID.x * 4 + 0, agg.element); + _127.Store(gl_WorkGroupID.x * 4 + 0, agg.element); } } diff --git a/tests/shader/gen/prefix_reduce.msl b/tests/shader/gen/prefix_reduce.msl index e1ed0ce..3a3125d 100644 --- a/tests/shader/gen/prefix_reduce.msl +++ b/tests/shader/gen/prefix_reduce.msl @@ -33,7 +33,7 @@ Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) return Monoid{ a.element + b.element }; } -kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _129 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _127 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup Monoid sh_scratch[512]; uint ix = gl_GlobalInvocationID.x * 8u; @@ -50,9 +50,9 @@ kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _129 [[b for (uint i_1 = 0u; i_1 < 9u; i_1++) { threadgroup_barrier(mem_flags::mem_threadgroup); - if ((gl_LocalInvocationID.x + uint(1 << int(i_1))) < 512u) + if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u) { - Monoid other = sh_scratch[gl_LocalInvocationID.x + uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)]; Monoid param_2 = agg; Monoid param_3 = other; agg = combine_monoid(param_2, param_3); @@ -62,7 +62,7 @@ kernel void main0(const device InBuf& _40 [[buffer(0)]], device OutBuf& _129 [[b } if (gl_LocalInvocationID.x == 0u) { - _129.outbuf[gl_WorkGroupID.x].element = agg.element; + _127.outbuf[gl_WorkGroupID.x].element = agg.element; } } diff --git a/tests/shader/gen/prefix_reduce.spv b/tests/shader/gen/prefix_reduce.spv index d1db3aab8ab019deb8a9f24a8d497a576e57c22e..b2e35fc79906afa27664fd65a1ed6084c7d96a0d 100644 GIT binary patch delta 1046 zcmZ9KO;1x%5Qa}%T2UazmC+lCSlt*GvNFbKqKT;=i-x$a(pC@@C_;-LsJ(uGxYOLV ze?T@E7Ovd+2mCdPiP7hw_mYO2yq$O6nKNf*PPd|u(MTa*J`h4K914Ts!>5sIF8*5E z%pJQQLTI&Trk1e9Fc9Xji}j`Yd~zgzbo?b=wbQ=VnGT^uT;H$NX*bl0p%A(~30?|u zGMnG5zd$L00dS-5R5{J8aLP&rP$A0-r}kn!e+)mD#$)UdIG*|&+nvI{T+M4vgVD5{ z{z=Nc!sdZjTuObfP}7gFF|fS(>sVv9e2!nQ*+_mClQEaW(soY`VqH$Zzp%YlY8JX4 zya&#MyCB}(MF^Ad8Zfqnb^%)llb`~A(RdT<_lHJ|HGtZ0T9RMo+$eSm+y*DWWpcf9 z8lZRgga-%{;78iP4BV#Fv!-U@9yDh0D)u3`1&mqT86N@lD>UgI^t0GJyqw17!krKl zvS17D0S_)<^)J$|MHj&^aFNE$v5@)>6K9h*lg|gEn~zcV&+PwHJ-clgZiQ@=Hr!Fn zv%3Cr@_i^e(}B61Y!NGPwd=r4V=Kwup{V-=w*TIzXzE|mcYFp{^F4gWRji-yXzV#q z^Bv{cJHCd;p#L4;;H#%IHNsn*-cY4*7XA+S4?YY`o=D$!4Q{TV7p!C71M|&wEPeA_ MZ`ROivNQbcKL-tO;Q#;t delta 1090 zcmZ9K%T7~K6oz+upn?S%n%GFJ)roO#jFAKrQ!j&tDM^(UQSjDUTSWy6RlMLtbH+Y^ zGQlwL1$+UA4orLtFBAQ~_8ek3$$!>b|5|(9_I_K~E{sRn(!LNfVSgA2Z{CepGQCgL zt<2~`2%*tlTxi9`?7`_KRmlS|wEY^Lc3o~bL}7hEq31#` zUdnFOp2Fn75V-K)sZtVI=9HNtP$tR@M|XV#yNI1h{5@nI97^oXolf*OQ?<-7Fqx#& zK0>-RWENQBOk!I?PP>3KrRj}-h16%uC;3}uGyXap7hMiZ>z^7y-rZw2cQ$hKQTG7! zRd60$0V^am*&L8x>+dFn8hRDXfo-@`$U2ZOgP#m?4e9rX9Qx*g+%H;GpXS^olI4fX z;IMO^O$y#f9I3sCJp;aQ$R?M7P0FWD-c0zk$y?awk+)a{c^g~;`poT4d?NBwp&JDk zC6PKC=%v0h>Mu<@_1!}^eu`;*Z`!BXmq}Y-47g~0#+XO`HycPdq5MH3v&h-_!|24t z`)J8NcGuB{lTRym(9M>P^#I+mjI)wggRnL-(P2&KZ1D&w+e0Q$1{842H^u6X-9% lI+y?k&(MSiUjk$OEa0nu4U9L|v9yhI)oDYk@wc(h{{Xh1bm;&9 diff --git a/tests/shader/gen/prefix_root.hlsl b/tests/shader/gen/prefix_root.hlsl index 2ad617c..adf6bf8 100644 --- a/tests/shader/gen/prefix_root.hlsl +++ b/tests/shader/gen/prefix_root.hlsl @@ -5,7 +5,7 @@ struct Monoid static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); -static const Monoid _133 = { 0u }; +static const Monoid _131 = { 0u }; RWByteAddressBuffer _42 : register(u0); @@ -46,9 +46,9 @@ void comp_main() for (uint i_1 = 0u; i_1 < 9u; i_1++) { GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= uint(1 << int(i_1))) + if (gl_LocalInvocationID.x >= (1u << i_1)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; Monoid param_2 = other; Monoid param_3 = agg; agg = combine_monoid(param_2, param_3); @@ -57,7 +57,7 @@ void comp_main() sh_scratch[gl_LocalInvocationID.x] = agg; } GroupMemoryBarrierWithGroupSync(); - Monoid row = _133; + Monoid row = _131; if (gl_LocalInvocationID.x > 0u) { row = sh_scratch[gl_LocalInvocationID.x - 1u]; diff --git a/tests/shader/gen/prefix_root.msl b/tests/shader/gen/prefix_root.msl index ff02287..897a6a4 100644 --- a/tests/shader/gen/prefix_root.msl +++ b/tests/shader/gen/prefix_root.msl @@ -85,9 +85,9 @@ kernel void main0(device DataBuf& _42 [[buffer(0)]], uint3 gl_GlobalInvocationID for (uint i_1 = 0u; i_1 < 9u; i_1++) { threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= uint(1 << int(i_1))) + if (gl_LocalInvocationID.x >= (1u << i_1)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; Monoid param_2 = other; Monoid param_3 = agg; agg = combine_monoid(param_2, param_3); diff --git a/tests/shader/gen/prefix_root.spv b/tests/shader/gen/prefix_root.spv index 70ba31c92ccab887aaf9be7a7fc71ee83cc25c2a..3e0422409cf10798205ac282a424700613c33a0e 100644 GIT binary patch delta 1318 zcmY+EOH0&I7>AEDIS%GvQkxRfAw&pUi3^JY7cE*uxM|S^auIY@o2<#2yfxL>-Tr0S z)v}90MB$<@(1&T;#xAtzd4@S)fB5;{-}8IVd&Ww~_x4;mwWT41y3iQ1;lr0)L+a3Q z2w`~i*wF&AKCB5v>Qy>Gn`MU|ZlnmA{w#W#QVFy`RWHG7FwC6oHde@c1w1U=vc_ypPQ$WuVN#(qM72KV9>UqFZOoZU>R)z-v=yoQ`&vdvFRny1gtS;j=B1OGh!TdDB0H7T32$&-pj;FAod?ewo1XM%OQ6d1`xC@g$`&U!Tm3 z-9^emxdOY{z_r}R@G~JnnxOO`U&{kvlh3trAJ%+%q$6tI12n7Fs>j+#=+;Kk^XQV5 zm_NOF*W#2}jHjRzSQHDM)%+GbLD!D2<~h2r#zw}>nM3N21YV$TTxGvRmr&gOg3rPv z1M$#T=r-zS{%+)JuoX0e1uy_%Z)Opg>*o;uqj(F2)@(b3e&qFR`k4k1F=BE&^Sfy{y_htU(jjQb;dEfcbJlT@Vq&HVZQ7Wp5s-q8Ia#iWQ zr6`JueaDaXA}gbcXb@Q{^cKz}_ND*qOmr2yw>UsW_> zAnf>9ZLV8gpOvJ<^8XJ!f*R`$nYJD1E%M+fw=Q^GV z@R54&Y!c{e0^Zq8q@Q;};1ovDXzQ!96N3(ph?`@fB@uLFcx?w~u1u`uslbp0}x zr?$HaCn=5j2auVudq`O*f5I+yJr6MaOn8ViLFqw0h)2LCpObMPC%!z=5w-6Fnyu8T zhuWv;)&|lu=#rJ#GvtidKasU7T diff --git a/tests/shader/gen/prefix_scan.hlsl b/tests/shader/gen/prefix_scan.hlsl index 322a453..d9e74ea 100644 --- a/tests/shader/gen/prefix_scan.hlsl +++ b/tests/shader/gen/prefix_scan.hlsl @@ -5,10 +5,10 @@ struct Monoid static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u); -static const Monoid _133 = { 0u }; +static const Monoid _131 = { 0u }; RWByteAddressBuffer _42 : register(u0); -ByteAddressBuffer _143 : register(t1); +ByteAddressBuffer _141 : register(t1); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -49,9 +49,9 @@ void comp_main() for (uint i_1 = 0u; i_1 < 9u; i_1++) { GroupMemoryBarrierWithGroupSync(); - if (gl_LocalInvocationID.x >= uint(1 << int(i_1))) + if (gl_LocalInvocationID.x >= (1u << i_1)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; Monoid param_2 = other; Monoid param_3 = agg; agg = combine_monoid(param_2, param_3); @@ -60,12 +60,12 @@ void comp_main() sh_scratch[gl_LocalInvocationID.x] = agg; } GroupMemoryBarrierWithGroupSync(); - Monoid row = _133; + Monoid row = _131; if (gl_WorkGroupID.x > 0u) { - Monoid _148; - _148.element = _143.Load((gl_WorkGroupID.x - 1u) * 4 + 0); - row.element = _148.element; + Monoid _146; + _146.element = _141.Load((gl_WorkGroupID.x - 1u) * 4 + 0); + row.element = _146.element; } if (gl_LocalInvocationID.x > 0u) { diff --git a/tests/shader/gen/prefix_scan.msl b/tests/shader/gen/prefix_scan.msl index 4d69d18..5be4e65 100644 --- a/tests/shader/gen/prefix_scan.msl +++ b/tests/shader/gen/prefix_scan.msl @@ -72,7 +72,7 @@ Monoid combine_monoid(thread const Monoid& a, thread const Monoid& b) return Monoid{ a.element + b.element }; } -kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _143 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _141 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup Monoid sh_scratch[512]; uint ix = gl_GlobalInvocationID.x * 8u; @@ -90,9 +90,9 @@ kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _14 for (uint i_1 = 0u; i_1 < 9u; i_1++) { threadgroup_barrier(mem_flags::mem_threadgroup); - if (gl_LocalInvocationID.x >= uint(1 << int(i_1))) + if (gl_LocalInvocationID.x >= (1u << i_1)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - uint(1 << int(i_1))]; + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)]; Monoid param_2 = other; Monoid param_3 = agg; agg = combine_monoid(param_2, param_3); @@ -104,7 +104,7 @@ kernel void main0(device DataBuf& _42 [[buffer(0)]], const device ParentBuf& _14 Monoid row = Monoid{ 0u }; if (gl_WorkGroupID.x > 0u) { - row.element = _143.parent[gl_WorkGroupID.x - 1u].element; + row.element = _141.parent[gl_WorkGroupID.x - 1u].element; } if (gl_LocalInvocationID.x > 0u) { diff --git a/tests/shader/gen/prefix_scan.spv b/tests/shader/gen/prefix_scan.spv index 5c16dd25ce718fe5f7e6224e818d35c376b7f44f..6d8fe0af951822060532ad7091e8a50b3fd80f6f 100644 GIT binary patch literal 4720 zcmZ{m_jgoP5XT?v5+k4i!7iGBAR-{x6&n$aXwV=kHVm6W5>2wW*@(S_*n6+ou+!{a zfAIK+_*dA@(c|ay_6@wO$IF>B-|x)aJ9FpGK2n%GZ@(m&luS(;lb@2>nwd<-k|fiT zhE%t;uWMgAQterK%BjnYI4~)sfyOkkZwk7I43!3kbsUAPN5+vEoS|B%`Pr?aPQjRI<9qb+I9j;<&N~XiR%R^lQ!@ZqDY3#z0YGt&$ zx}>&q$>{RsEB ztJ2tFG9NvNaWLZx(D|C>u6;EZlZEhArD~~lbW?gSxvK}8+IkiVSmKk=11T;ht?13A zN@*zL7f#@XZZ{hcG-l~T34zjhzrg!{Bxa@}>(TpRF5t}?h>?eDFi zeUB~ZeD2#P@Io>MuavhC+mOCHdDfRJH@8*FqnmA*H9ba5N2$W|dHdSCYIAY6&q`Dw zd7@?l7n5x@-`+dFIe{0F?eJR7g=7afdb4XnTrt^`<87SljQZ~Ci^&vkH4pAG<~Vc% z;%pidZ?!L`klOq;81K8yMO?vKYBc5c-<(Hg{_%_-k?}{-`|=gjeql-uL(Ct&%IDV> z`R!Xizuf$j5bOC7ZGLBb7Qep9=oWUnUwkkz=5Iz^N8|B#ShE%RMIx+Hsz#eSvS7ttK=D7;(ENZt7xxaI5(JN=nyvARj`SR!YVDsG~-zKp4 z2;K*7o0`(SDDE>-XCk+<9oN<9iq zG4i=cGtz>bOIPiowunCgtZgOH&h-++S@$=r&p6+FDWcEZ)_Njh|BYLlKjD9wP5pgAf9P$S0LKme|g4@ch0Xt z)+6~jzc#bS{_EiGbC2J_8xX&h#(AdP81V+QXIbBkXz!&9oArnz|7LJ~{#&qVkNg#IKK}^Z{MzI9QU$wDU%bI6*f-D@?{q6z zyYEm&%fojYSew2rXnFYV0QwuI~g-N8F>&7-O~jhH-E8|GmRq`0D#`H#Y52 z!#!Znjr-jT_I}2hQ*MkoogsUzc7FrMdBSF z0FNVckjVESSo_XwMjirdGsYQt1nqK0^gWDdvu<&m{Uo^l?5D74kN1BXtj)QIZ`m{8 zCy*vYzjZA@Ka1##Z`pI14VC7cfqowGj_&ooa^vhx)ToXcUjWy?-;3C^$NgUdYm588 z0(N;0yuUckeidAQ_G{R*$NgUiYxDjwFK?v&TK(I=a?iXT{TAZvxG&=02ET=P z-#5|nnDKYO&ba63iy6>vPoi$MzU^r5sqbC1cT)R}dmsG);?n;iTE9AG=OeIZ#5e9^ zu(lS&e#zfMJj=b}*t-+#-niFpuy1KyyU=p`(1`v7nUA<9;y=yo#_s{kqYs~f?Stp& zGiC=`d*uHde0Yw30k)p_-F*o*MtkJ^3hX&CyI+GBAcG6 zjkCA*+5V`bzVE^LdVYXwi|>FwZTYve&wulm_!)?QEAI!l53c=@)9Hi#oSMVaz~(*x vZLR*UW+KK|Yt(a4W{;RzaQEh8X2Z2d4-W>rPkRURU_FN*+W#S^*joPu?k=7B literal 4752 zcmZ{m33F6c5QU%2B!D1`fGc1EvZ#Qfq9B5RXhcE4T~T33LWm@pIGLabDk8*P+;>G( z+yVCux3c^p{tCA$Ex+&0Yk7?2O?CA-r~BU9x9@#ZDGZuCI7yC5j!uRoKPI&?IvIqL zB*T)%)VD0ZdikurYTK-H=bUB6h@_AvnlqAl4frC~UFz&HFdn-e8^A`fhOyBY%cko& zufQKP`3cfMMt;7WqLLNmo^oef!U_#3M@rhe+Pm9(swg9q;qcaS_xjGB_O;z>P|LocG|En;!zF}kcYH3OThV)!=M;n^ndQKNG#Ao9>Q(R1z z;CoAzQg_BLIf56GmGDwW2lE<|>%e^**Y>qmO4ZhlwexrlWg%S1-6+ z-Pm5idmmfy`MtLu!3)Vwc%^&?xsB<)v(NSA%BGe|xxd$jS<_?WtSVKwK2Kk}S2YJ~ zd#z*@lHD~4Tuk=VV*j3b?-9I^?1R^8E+qTG(VK%uTbG(IheWTvH{xWuoyP6Dl zm@^IEh!rt&;;s(o6jIHv!F=y+66OfrT%#$s|L%Em79Ys?Ng00>e|W!Q+ApN!1kC-T zSNZ+bVt@OV-(T+jM`70U1K$08<74>x2H~5T?R@b#a@@ZOb4(=ScUaSkd=W{?)taei zXvWnw=a0ZQVeWZG#z%r3P4H3pSo3(eXLo&TFm7+0t3E&D-chV)?gY$X{zSYnb@$UZ zW7absZm(+f*78ryxVq;|bC1*T_D9W{+k~m_ zBu738Yr>kbg>=;(szv@8V70|$`>tnVzIDH0W9E71voK@sZLMcw_TRkK*Fx#F06A=tUW7i4@#c9w-{tX5y-T!dz?f-eHQ#`bK^Vz4=ju;|mpVEa^Cul9{C zg{#{G->5wLe)mC6}uWI>w^$udc)nNBqg}J7+U4^MTe`Uta_nqH> z`8$_?=Qn10%)bflJm;*z-;DWNX`XA!%@O;ZyO#CciuZha-`zTV33C`y*Sgj57-O1Nt>oDhve?i}X*E+B`+eUEx**ekGW53(L?)Nii8-d@1*=zM}tlNn1 z!qg9tQPT0IA2c{ZawNzPd`{azNgJ# z*HMofUkm zLH~dEbPuumzTAtZ9yM$SyKbECKCtIA&pqYaG57S1*?Yb7y~7=teU0Cf`!mhnI^URD z{@t#F*VkZ{*0$jiL zlW6L3|4)I{{JoFgx~IXrv5}Z@>zayx1~V4Fb;cYzpR_$p19c zJ;%pjdGz5kuzheHW9IC~tH=JIgHO!yFTmCl-`$sBbJSzcufVPoZ})5P>6q`&{CKP1 zfX#JyhC}#oG4tcC%FVO4_SydEqrUIJ`Fg&GtHtktF}3{N+2_CbPkk=t|I&lu_Q5d} zJD)z-&!cmA7}&j!!CR}})o9EdYmIu2&Gg6_19xsdXDnPjdN>a3JoQz)2kSWwQ~#Si H#n$={S3aVo diff --git a/tests/shader/prefix.comp b/tests/shader/prefix.comp index ed5bcbc..3ca1509 100644 --- a/tests/shader/prefix.comp +++ b/tests/shader/prefix.comp @@ -71,8 +71,8 @@ void main() { sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i = 0; i < LG_WG_SIZE; i++) { barrier(); - if (gl_LocalInvocationID.x >= (1 << i)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1 << i)]; + if (gl_LocalInvocationID.x >= (1u << i)) { + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; agg = combine_monoid(other, agg); } barrier(); diff --git a/tests/shader/prefix_reduce.comp b/tests/shader/prefix_reduce.comp index 378da88..36750e9 100644 --- a/tests/shader/prefix_reduce.comp +++ b/tests/shader/prefix_reduce.comp @@ -40,8 +40,8 @@ void main() { for (uint i = 0; i < LG_WG_SIZE; i++) { barrier(); // We could make this predicate tighter, but would it help? - if (gl_LocalInvocationID.x + (1 << i) < WG_SIZE) { - Monoid other = sh_scratch[gl_LocalInvocationID.x + (1 << i)]; + if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) { + Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)]; agg = combine_monoid(agg, other); } barrier(); diff --git a/tests/shader/prefix_scan.comp b/tests/shader/prefix_scan.comp index 2c1626e..82ac847 100644 --- a/tests/shader/prefix_scan.comp +++ b/tests/shader/prefix_scan.comp @@ -45,8 +45,8 @@ void main() { sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i = 0; i < LG_WG_SIZE; i++) { barrier(); - if (gl_LocalInvocationID.x >= (1 << i)) { - Monoid other = sh_scratch[gl_LocalInvocationID.x - (1 << i)]; + if (gl_LocalInvocationID.x >= (1u << i)) { + Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; agg = combine_monoid(other, agg); } barrier();