From 55e35dd879eb48b083dad2419d26d2c6d5cda1bd Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Sat, 25 Apr 2020 10:15:22 -0700 Subject: [PATCH] Dynamic allocation of intermediate buffers When the initial allocation is exceeded, do an atomic bump allocation. This is done for both tilegroup instances and per tile command lists. --- piet-gpu-hal/src/lib.rs | 5 ++++ piet-gpu-types/src/ptcl.rs | 4 ++++ piet-gpu-types/src/tilegroup.rs | 4 ++++ piet-gpu/shader/kernel1.comp | 14 ++++++++++- piet-gpu/shader/kernel1.spv | Bin 11644 -> 13456 bytes piet-gpu/shader/kernel3.comp | 20 ++++++++++++++++ piet-gpu/shader/kernel3.spv | Bin 9964 -> 13176 bytes piet-gpu/shader/kernel4.comp | 4 ++++ piet-gpu/shader/kernel4.spv | Bin 5704 -> 6680 bytes piet-gpu/shader/ptcl.h | 39 +++++++++++++++++++++++++++++- piet-gpu/shader/setup.h | 2 +- piet-gpu/shader/tilegroup.h | 39 +++++++++++++++++++++++++++++- piet-gpu/src/main.rs | 41 ++++++++++++++++++++++++++------ piet-gpu/src/render_ctx.rs | 8 +++---- 14 files changed, 165 insertions(+), 15 deletions(-) diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index c62678f..77170c0 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -71,6 +71,11 @@ pub trait CmdBuf { unsafe fn memory_barrier(&mut self); + /// Clear the buffer. + /// + /// This is readily supported in Vulkan, but for portability it is remarkably + /// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute + /// kernel, or organize the code not to need it. unsafe fn clear_buffer(&self, buffer: &D::Buffer); unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index b6df77d..ed72e42 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -33,6 +33,9 @@ piet_gpu! { struct CmdSolid { rgba_color: u32, } + struct CmdJump { + new_ref: u32, + } enum Cmd { End, Circle(CmdCircle), @@ -42,6 +45,7 @@ piet_gpu! { FillEdge(CmdFillEdge), DrawFill(CmdDrawFill), Solid(CmdSolid), + Jump(CmdJump), Bail, } } diff --git a/piet-gpu-types/src/tilegroup.rs b/piet-gpu-types/src/tilegroup.rs index 4824178..5912154 100644 --- a/piet-gpu-types/src/tilegroup.rs +++ b/piet-gpu-types/src/tilegroup.rs @@ -10,8 +10,12 @@ piet_gpu! { // A better type would be Point. offset: [f32; 2], } + struct Jump { + new_ref: u32, + } enum TileGroup { Instance(Instance), + Jump(Jump), End, } } diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp index dbdd492..82ccb8f 100644 --- a/piet-gpu/shader/kernel1.comp +++ b/piet-gpu/shader/kernel1.comp @@ -25,6 +25,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf { uint[] tilegroup; }; +layout(set = 0, binding = 2) buffer AllocBuf { + uint alloc; +}; + #include "scene.h" #include "tilegroup.h" @@ -43,6 +47,7 @@ void main() { uint stack_ix = 0; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); + uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); PietItemRef root = PietItemRef(0); SimpleGroup group = PietItem_Group_read(root); @@ -62,9 +67,16 @@ void main() { if (hit && !is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); Instance ins = Instance(item_ref.offset, tos.offset); + if (tg_ref.offset > tg_limit) { + // Allocation exceeded; do atomic bump alloc. + uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC); + Jump jump = Jump(new_tg); + TileGroup_Jump_write(tg_ref, jump); + tg_ref = TileGroupRef(new_tg); + tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; + } TileGroup_Instance_write(tg_ref, ins); tg_ref.offset += TileGroup_size; - // TODO: bump allocate if allocation exceeded } if (is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv index 0e9a497e3388a17e9fb102a7409c01a7dae8a420..9ac359394ba6be6c962643b79efacce5218e75ce 100644 GIT binary patch literal 13456 zcmai)34C8wvBz(lh0;yBQI=8?TA|Q|maUW)XbL4Q6q-^HK{PK-{;h#5B}*4XZCTvK z^}V8iR%H_r1O*WnESn&rD2n)8pManb6-4&M_x;}go#f=N@8h+H`ORf{?#)SS z)$nOMW!bQ71pk{dJjIRF8vYLK5xLM23gJM zY-#V(a3F?V;N`|K^s6gm6?08<2V`|NGHYDX+_-XOC^dJqca@sk z+Ivc^eeD}c`i)5Z+Phji`rA^68a&ec>MSZ-vZi~}$)&Y31oi^$ig?z5)gYUuBt==mCZN-b^w zYKhog-yX)a)uZQrNOxP^XVYe5j?AWmb9-}|JR;`{ z&2vw~ubjI!+ZVjLy>op>sj;WKe|=to`Q?2E^VeiE&=cRhPC1{h=5}_YcYx!#Wo4$k z9{oIr_Nvr7$a%5{bBxJmq9-|qtR&&7b8tNeFsHnp(b<9E*xly7mUXcO!3|8r&wZFP zFn3ILa5^_{$B^{@6vFuj4a~2}4h1jo>g{XkYAxAF`zYr#xW<}nK6?0RtL1-kKK6{P z1Lr{AUJlM5pDjX<`OO=9m=dcDHda}@VnNfg*&Eg#-90lxkzHnE9+I+RzSHQRoOOhG(MsoIeW& z-`A?_WSsiCd-Hlq;K84%)!AC~!Ti-(dxhRPM6b%u$>*2Xc`o?vEZaV81Go1!M|nAS znOySCwSSkxrTMm3=vNHUYqIPAMT{GU_*7>%4biKzTb$EXUaxmEzngvQthpw;6TP9M zqq}wBJeE0o+aizj{N0V8k2T*5KCAd1tKW~FuHk_S{oxAzks*3b_9%K~UmhFcQ=L6g zp}&BhpW&jWS1arj+4Qug-4eZ#n%6V0mf_uU8UH;8>Ek_X8LJH4`-@K_J&^Mb;e6l! zeHcEW*O%2?e;Pa(zb1e8HK+0X9z;Cf^)#Nk@mN+K&wID`)F}+>bKiW@cybZX{YvAh z*W*tc@_2g!jJG>u0<3&Jvz;%V#2T%?j^Q&Ei}&m@=dC@i zeDBRMf0x28r~f#lSYChg?~1Hr3}Z}2pNy1q&y43@$<@QS4t4Ws^G+MNrh=_gZdYu3 zBkLH(RI0mX=hY+Y7{=Tum-1!XzQ~BVAJ~|3Q(1Tj0{@?Seu-& zmLQGg9BukHBEx?fSbsVFmm~F;H@DnL31Np?wR|^?Jsl?hC-~ zoA!?*wNEbeD+|58(61}>I%LG(0XC=e??yV`vk>v`0}o5|2f@CdG5;a3zpsY=IM{c? zzC4YLzB~g~H_zXY=9IH9>V9jYj#t3e;XC2`F`EL6V_xUaLFZ4-J73*zlbrUs1y1|8 zKqG}2ZwXkN`K;SK$7#>9V?%Gu^(_svI74+9mZ9q-KZ`lOV<&*!zp40m7mzbWdUp$d&#QO0(6yhK==R@xR@k>-FD1I~ zw|A|uo8P-u=-N9I-M#Uy6?XT=`&O!Z=L+3=F00VJYlYo>+Y?>?D=KvFTH&w#hD3M1 z_pGq{&GViWx_0kcp*Ns=ZwlT1dv6L|`vZxt-Fs8mjqklFbnAJvLVqmL&F`Hl{5@0N znL>B|s};I;rm&meJCpiRoG#Cj?^^>WY%X{n!}s2^cqHe|Gks9OUx<(T!RT^+mtx)_ z=*}}%%sUiaeoHa$FtBsw&F#6B3;)Bxa*^idV1$e(Q|wyC*01z4Lo#9=~%Zf#tOM&Z#dT&J1iT!3QJdwfm0B#oX0k=QglIzT>BW z^?48+{ip+<3a70JKfg_HVmQ}*(`IbnZ~2$_?wp47`=flGI!{NQ!SK=kmcp)l8N1)$ zH#78eo;ddBZD3>H$K2STW_0=R`46ylN6ntM7IgWq(2FyXYZ%Uxe}Q`2kUr+u*2);k z88^S!*dws70~_0WhWUL5-i|Ksy8UjcM=fW8E9*TQPF{b%Yw8Q=rLj7|W01ztrcWnQ z-hK3r(FNWUDX-mcr<^&xYxIESV*QihdeP;(sks~3$B=W5d)*JV&RAC++y->{mAo0w z*~pM{_!J_STZyC3CWf)~wKl)Ya^~^=@D8wCyyO0iJr7+zeBLRp@Oc-y{Ajx5oOgry zlb>tnYmfO`z~EoGtAJWG&rR}{8InR_hYI#4{dgFU(?hl~LhtCJa6+RzAmyg;$4CYVX6X$D> z`5ytB<6=hi9CJi5I8+mY(NyTP1?`D|mPrM(-{vNu#@1^Hp z0oXe4Kx&H`?gPtt&smJ$_rY@d-iuV%*E`S;z^+N#{YZ6f&if%)e{ByS)!i3<9A-ZP zuR=OUTjYKa9J!7EV{|!Vm`6SO`4ezuKOcgVk2}^+!E&xO?pQwq%cplNIp2lor83s| zb8xKjVX(S3_xu-N_u2X$L8`|Z{~NplALnR`x_QU2^VBd@M z+xACvdHtV2s>l2IC$R6MakT04C{jLR{x7&P=2LL;(Vstq<;-ggZE_LgFW|};e}$6| zpQpjDHGLnSL6?vF-QU2@ledq5NBY=DZO<~~eE-C8j$Z`NWJJ9$>BES6waMwx<2l8Q`T12(7MAnn%V+T~r}@4@!d^BH@! z6Ih@0oQ;5!j~?#~mWv*1lS^Y7TfMEw;oUs;Y!ujiiJzIH(dDeuy&ntqaqqQ_VaQpl z*tn+`apfcKE@0!v_h%fsT-2#eF7|#r*u9rChUa4fSU!5CuD|c>B(S}SbL@H86;57% zf9F<@^Dr6gd619uAeZ*V{-~dUbbU(o(+b^nTASbK-I+Td-B|uUuWl|xwg>+?6)FE( zHjKZw9hU#SlE0()`-zW#k3XNe{$Aw27l?O8j%Jv9EHci)o5A|TTK&0pIzvACeFoUM z@w@w(U^)NpE6&PW!Oqj>&pGRP8$cAhqCwVrhhd21C%t^Qu;?{q%?dqwnh53u!3LB`+J z`ETg1JM``f{oF*~0^VBavA(m>jhn8o16}?O_TRnn|Ij7px5V>dPPtfrA6PEdx&bWb zJLk7%BXSd?mth|5F?Taqn{lmOJ#6m)r*YqjF6Z4R;=T(Uakac?R>Dg z?kd)IAz1DL#(C_G|8{T@L;g;D)ZbHZ`b3=ff{hb%-UpV8IqwI{rE@-jZatCvgJ8Kd z_lMBsBhH7xa%r54(Tx+iKLVDEIUfbfIj4yeG8y?XhU>5{W9s8M@b3ve!SHOjKlV&r zf7j}H{3JuWXH^_$wH`d3;j<6JvpN+yjp14KcjGvl+ra5ry|hB#p6FMCuPXF7tCye~ z*BWCFJ_VMKv-)YUT%1*N%7y=BV7WM}mxJZfv-%nII8)kV?q|W;?2omphwXFVH16lo z<f1jyCeu?26dHZ-B z^2-c+Yah+2uAed8&+8f5-D`2|^$hS#hR^;C_j*6%0Sxzg7BcqwMsT{65PAL{yq|M$Vx7C*=C2g^C%pQ%4!IR8F|`PB7^`451t zA%1565G?2X5y&4goFDt9u20nWV{ql({{&7x_VTA-xrZ3$*$eqIhP*X-p47eLc;G1cS#`bV(e5_xM?k6MR=7cqPmFs$`3Z25 zxXAwx^vG{q^ULWM`MK5)$v+HTZU-Xr4@Zyu))o1y;I&2mYOq}7uK`DXf-KN@*Vfg6y=GTi&)kn!`TmN@C&?}YB0bWcX;!-&1#87vojZw$HU$4IbT z?ENUPT)Ow8(XA)VGX_pR);<;-Yd3~#m(#Cu?JJ1ovz+1Dmm!ZYu;=~+hHF0&>DqlS zCKJcCtLw8{p~v}r1G+KN8h3${k6w%e%h`*ldpvs7t=+oi()CY3uZ%qrPCoiJ2`m@i zhC|7*E4sWftzX@~&&Req*mY`iE$Z?2aeIIlVRMeQ%Kojw=5rDw`nR&c4ag>j{W}?H zzdT#hh!g$OXL_MW|Mo;TM%uqAaPrZ=y})v5|E8ix|FpXvIp3Rzy*GMg>^eC4=wCfp zF8X&A^Y=lQH>UNg+rP!w-UxP`+FXlz^lx9V@0WA5Rrc>xY(A?Q(Z5r`_Uui_sCz%= f%BMY<0Vf|l*&i&I_T&Kc=!te~l-oh9sOA3vchlCX literal 11644 zcmai(33yyp6~|wjOejl{rBGyPC$vDJd!dw4wziZ|D6~=pQE*7pX&6Y8m`qB`A}tgY z6jXK;Q1(T3WM9Nm1k{gB0m1J>1^0agML~bRH}51bBj4vEm-9dWv)psfz3zS8D6-F7;Mdb(BjTmCkZ$X{D#! zR;{ck>o+XtSLtl)=v$h1DBzKwU!6rIbCz_iJfgfTtx(;oYuU2iay6mmjg#T%`jjf2 zOUo>hn}vnr`)>qgO-Th z^=*r+%~MEr059sQbXHvqmpd%yHL#w>q!~T+L%Q4QUMp9bbE9M|IJKAZlSk%sq3PUX z@T<+;lg!G`aDI8Of%yx`IP{!vTBn>>XQ{$&^!9Tcx1`3@)}x>2 z&|Z~$2RKjmz#PMq@#wiZ2CXE=Q|G{Xb|a>?o=uX;;Mm<#wY5FAAh`ZO{M?60{jtN7 z-Se?&I|fbvKPil#)*oL;ri15q_EuXv+sZc5KFWCwtg(>HKo37{P5dY4WzU$k{~XBM z%YpbUl6}x4zI1XAA+gF}W0l!07BnrJxMJBfZ!&A&n>_P%3Vz)i`1$RjKH6H(hQ zd9Q(5wo2xr_xqM&qQO=5+y8%AX)UwK8FkG8*Xr6dX&E%vyw0WnXBynodNa$ddiNPDI9GDioJ98+t0Lt1OC-iG8{$e8Q= znqM|oV{&0#+~w#IcU>;-rn

2E{cdch_`}VOraR$XcGpx|Xzd%^pmpjuq?^$`Thz(9UJ`aNPnKp1gG$gy>RPE|b z>)97P@Y&Fq?1w%ue`7McPM6&tA(l!?+H0=hNnUPRun5Y@KpjV;hYuGR&z|cg@B%BZ~}k z+D~=kwe5_IoV$R{DK`psTxN3(^2V8CSEM%m)h8m&;X2iQz-w1`4nNmpjy=KVk{gAu zpV>8=H8v)DWp;UcI1}kQjMHX5KU*W;{$TUT>3;xHe|i0_XI_pQk8Z!`GrZKzbr{1w zhkg{enCbeL80Ou9`IjJ#v6eQZy0tGw>LWKR)4d1eB5ym`OaBT|o4La7my{t4?LA1> z<{8j#9eoV@s(md|yXQsyvP^Hz^s6(yh>ZOAf}KG^r<{FJ_gdx7& z0DB&`C-z>jb$OP}^$oDOh#d^G)=N~Wyq`mgJH7v^$y6+?A zRo@>uBBy)ie9s8GwfU|Qx@XRJjnK9Gt`WLD^t~W-?*ZQpLiZl^-5_-9o1N3O`)&|+ z`|GU$hKSo7-8gea+!pBatFyQ*!N$rvx93bQ{I>$j#XKXy za^CBUsKw8KZNT!zdoI@q43;Z0%$<-gWzYP>)*Xfa~j>3n#C?&kyyPti)Ugf`=o`qfMWANO||s&*Ou@+acw( z`;3xv&b9D|faPNSBjFB3m+zuxpZ5zGa>lsVhk>m#)>VW%99@1Poi}D7L(buqiBzs3 zk3NeS=GNEReE!KfkDs-R!E$jgj)prDT|Rt13igcWeU3tx--Iq1b2OO0^jsUSJ>owG zb`IY++#~&uL6;AokAv&y_ynB%8frD>Sg@R*#WC-3VE&SwvYh%nh`(p*lML(9$1`<2 z(#tcYt;CS?Oo^kGR(a-2J=U-MJ9}y0IvMuI`%Hg( z}FWIF`r^+x6i#uFZ(Rt!;rJjV%O(q&kFEZMm#%wUY?9D?;L$db^U$T ztO9#agul;`Q_$u0Ux`%r-Zj^$V1LUtk2Za(NO|kl=X9`~w$qU6-q+^-GYvzOxN znRF7VM4qV^M&%?>ua`BwAhhIdO z*T?7Dm%z?j0^5@!@zX)ucyz^Xul+)kO^-I7bGTX&Sb#2C82G-xWOOfiX z!S~wB!E=$uXp6eP3^rfB?ysQB$NR{YV7V(85qA|h;+#)iKhHw!kuvuFYOs6nzJ3kq z<-W>)m9d`TzKWg0d*nKB{k~ogC-0osBGsd&8^QJadJ~+y{x=}iV_$CuyRYWarq4A< z`N(+-xIX8taPrZguY={hH_f387M!*< zAFEZq#SL*tE&wd|lZ{i$#9)5r>um4L(^*9eNgFO%OaUSIIeX&34 z(~z!DsXjT=U8lAAe0&w`bIDwh`$t>+kqytRs>R=*!?$MEtu+~}+Sj%|JZULC)s_-|~kJM`8%y(6dlTS#xF$NG*# zH*bD@pG22mm!0+F!E!!JJRi<^JR{b>3@jIGJpnA&&hS}NL7vDcGn_|z#GVA!W?pMo z58HBZK5rMgobS((w;LRJwMXoyz}n1f57fg}1v}UJY<(-ha{e}Y8hhjKq$e}vpTtLf zWya|fc~*hV6EUZN&r1x_sn011y)%vl`tzG4~puQkRVd=4xhXZ7=7xj3uNDHr}1faT(> zegQ0(KdWCvk29q`V!s5|W`C?*J!}_&^LZ~um&>2kOVA^)_K3X{tj)akKs{_%fSt>I z9*z7mLoV!J0UHzcE5UNU7Z;INF^rM7k5?nV%CNWg(K*%iGpGCcHHLQgS{!>l4m_UW zwJXED-UYcE!@Zt>jJ>`NoZsu4>h#;{^t*HV1Khci?eVGST27SZbdIMoJV`aejTjM{<${wu-y*M=e+}6E`Jv8M321M zBla$^HuKse^|0Lo&gcCGy4<~tcplydj=b6<_I|K7^V)0mussBJ4bj_&!S>%Z>#wd) z_&*A^w)h--3@m5-X2{1G#y`R^UR|Gv|0dWP;xqdRu$=M3kl$h$AN!{M1S9Hu5?sIc z>*3^MFTV|zdy3&aJ0PEC$Xk==N!@oG&-}BDu?%_V@%i%|hIOoC7^AMA>llapE__bLi{RL2?GgJDSerG8 zqbBR!o#8c=5j9N%Thks$@7-OHuQN7aM2#}~%5qLa9-dfe8)}i2i7+y0O*19Kh?+ovSoW-#2eN$ZfccwSM`MQ6MZcM)JpPNf~e*V9r%f<7> z`Txd<`K>GF|9fVO`QJvDi}~LH$Nc7XemVW}-znVtxeTuZ81DW4$k`co@8>Yw`vZ}l zS@p{)sLhXW?D2oM$2G{uemv)^6Q$`ThTQ>OVO5dvNk` zZr%sW#r-iI{2zwAIjvvae$GG+MR%RrT#I`ACe;8=?+>K5`u?>L=QWQJ{W}P3&kjaL hPa27p&-bJNCm%g&0?XxlvH^PZM7uT0c^;yc{{lh-+SdR8 diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp index cb344c0..ef3faef 100644 --- a/piet-gpu/shader/kernel3.comp +++ b/piet-gpu/shader/kernel3.comp @@ -20,12 +20,26 @@ layout(set = 0, binding = 2) buffer PtclBuf { uint[] ptcl; }; +layout(set = 0, binding = 3) buffer AllocBuf { + uint alloc; +}; + #include "scene.h" #include "tilegroup.h" #include "ptcl.h" #include "setup.h" +void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) { + if (cmd_ref.offset > cmd_limit) { + uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC); + CmdJump jump = CmdJump(new_cmd); + Cmd_Jump_write(cmd_ref, jump); + cmd_ref = CmdRef(new_cmd); + cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size; + } +} + void main() { uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS @@ -33,12 +47,17 @@ void main() { vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); + uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; while (true) { uint tg_tag = TileGroup_tag(tg_ref); if (tg_tag == TileGroup_End) { break; } + if (tg_tag == TileGroup_Jump) { + tg_ref = TileGroupRef(TileGroup_Jump_read(tg_ref).new_ref); + continue; + } // Assume tg_tag is `Instance`, though there will be more cases. Instance ins = TileGroup_Instance_read(tg_ref); PietItemRef item_ref = PietItemRef(ins.item_ref); @@ -52,6 +71,7 @@ void main() { && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX))) { CmdCircle cmd = CmdCircle(center, r, circle.rgba_color); + alloc_cmd(cmd_ref, cmd_limit); Cmd_Circle_write(cmd_ref, cmd); cmd_ref.offset += Cmd_size; } diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv index 23a7c3efd84eec32a179cc6d5b1047f2ecd6af2e..cd56c48b74ae6db457a49a98e63a7bb323b3dcd5 100644 GIT binary patch literal 13176 zcmZ{o34m5*xyKL8H-HF;DDEPN5+W#wiipbq>VRUPxR6WZ$IL)O!wk*BQd)rMy;imu zmX>ANLR~4lX=z!xR<@Xxwr^IpXm)Rz_xx^7)dZ{McQ_Lk0$uJ)F; zj-K|`zK-?n`i)5ZI=Wgr``c27Mm*B|ja0;>vSr;JU47@auL9;(s(E#F4cXqt$m?&+_ASRR?rUFb_13QDRjYkeb|Bn5`nB{`R}ZXC zsWCQESuAxU09X+SQt`vURF?)#_}_Cd0!| zn`^A*)nV-x#pQT)*SI!*R5k@3;|CQPVlq+J@^Ji%c~-Ki;4}NzuEQmdFZZg&uVmBU z`lwrj+^eg7LtdKaa0XnTLHWzTu|VdYj$e^`Savk{ypGQH=AQ2Uya2fI)xBypjLK%i z2Yl1b91v@hG|w^k7kT!`<_sDei!!Lv|0hau?v5XbGbWoiD9+1f`)^~MI1po4b~1S3 z+P3u8%jc}_Rhx5Tb}D?p7wp=od8PN5nr9;JG0%@$e%~ecXd`#gY3KtnTQ>A?zT&nR zkYgpwhsl6GWBsaQW~`bO+etMI*#h(C_hm!25X>!(-k6}5u612 z<@1w!)y~$~>}+`H^RkN5BUYT+}``yR?k&O#f|R zwdl14VVrRdJHQYRoz|iYr+sxTm@ye?NOZd=W3_5VtSrXZ4Hq&tJ~qUUhow%pW>Au4EhNi7|^V zx4n_RSdWJ68gS&g7F^3!$*wE-Y`T2hjr7I1N_Go)UA3pWHn&x>I|uQG>~46(y$4*2 zJ1n~o?oGei=hwhI3LR6{7dEhSc|eiN^>~QBHkV5FaKU|S<-U*6$J`zVM-5MaYc(`v zPYw80vR&YGE)6^sYP=yE4v#paz{~ntJ1yJiiri}*n!WXoDK+twWb8a}Ep8<{sg5tI zTf#a-*WTdRE?-Cc`)KKhN+SNi9B>!4h3$gV)&xM^ly!&UmM z&ToO7Ujt9?iEB$doWt9AsPIyJyr)A2I#CWjwVB%z0dC)8BZG--}*dU&rrDALBg>j#tyy zxoK0EA58C9xno_Q7<(w#v1$|Wb)B_E-G_nI^mFV~`WSlz*s*Hbrj<5x9toD~qs{Nv z7&jB_I5mBJd1+IZA4hM_;B&#ok{iqK!iaSW*jQ?=o!@xY?z)9f6IdT@lkk~O?;bg} ziDt|*=)=DmtiPJJMTu>3X;U-Sne@hTj5huKW{OzLz{XN@Zs*YJukQHs6W&y67bTi8 zCo-;u-Z;i}+-39;N1Nw%75zlo5ZaL?Uk&yOz6R{LgQ+u~E!ONaLvF6_(noDx$=8*d zajd~-pO^lA+ZaXxZ;#p@NpJ{Ua zM$?-|%{+3SYif~a0el2}Z`7q=WB6RNZeyGe)?dvSa-VH!j1_Q912I>@y%&8Jb%2c( zYq|!mySnk@J{#4{*I8o6M2+2On-l(e_(Ztz-w4(>>T&J2qA_Crbk3)HdnH`Wb_A6; z&-c(G{!8G!@eAHau(9yzM7B1UH`FD=GI|CAj1LR!Ol;F;C`nBZ-PIaaOdmyO4!Z+Y{Iqsy%P2&xZf+mwfnsypU*CM zhSt(NSMJxz?1^WjtJJsQBVPws^L)j)*MS}9b0x<0z|}XG<9fl4RX>H@cdykVMnAYd z_j)w-$h`rqb~AZm+(vkP?oDXwSC_fp09NxnMm?8w+>(Ll`WI z1ngy8Z6Bto8CM*0`xrRp=JQ7G+fPkejKds6=&lUaCPTzEV*^N-sUrJ zc+a2UM zpqIx!ehz#fKI+^Oa5UG%#jP1_b?$sYhaR^41a+ttjq z1N&FN##sY4m+#=O!qxSEkX|1CUjrK>p0WBr1XqvoUk5wxN^)rb23Q~Ui1SVG6|^+Y zx8Uldc-A@YVKD#a-y_D*9^=0auCL<}H1(+CJ7DYBQr7WZus-S$=X>D#Ivzz+k8zKI z)wa^2w(o=aKmVJ*arL>2JWJ_+K(lszJU2h0_wt_9_CuPQXGR>g{21(<;=Sh@`~QVnM!Tg`EtueI6_{YKZb^HoVJ?i*1I9=P{!1Ym& zI8T7<>-a62`j&D}eg{_DN{iZl59a@TZH=o>tnDAb)~=7wnpj8szj1C{|0ij#^IbIk zU1Rs*kMv&dgFa8u)Z7QLbMv106WIC3T>l5`T%DKit^Wmk&%}M__sE~&>dUx4Plf*l zY#hgG^Im$IUfsC98~+NvrnEgnFV|+gzk#nUZO_un=VI~O_3z+TddFysc+Y{=%w=48 z#CsmBuQub#W8912sN)5&T$}Uw2iUqDw~JmLb^H^&0w2d{i}m>z*m&vsSh#w;*M~R} z+Dpl}q3{@IKDmDGbL^A7{PXngn||E{JYK^^7>nDG|hSK zK_98(xT@m*U;E1a|#$FEAW?a`m z9=11u&2>5F*yreGnp)VmfQvDOeJfmT8SCLQ;7ZzNn!4+_js7Z{YwP^XDc8@K?&sAs z?Ju!^;@E57`QvF`d(qtMJ?Z`3>R#_dAA5Zb*u4vWZ5_WZ;r^DpG2xz{TS^{#|7N&( z(!GBRTs`*wtzfm-duvdOwR#&^E%yHHV6}Ab-vN(3(jH^q3D)L%ITv}@-UUwMUJqB> zPK&*NH#p*IkFoCoYcsBEA`jbp!RCrJcpunxihYyo6ZZFm>(}T5XzDTN4}#Thruoh3 z9({=RL7FwV?sD&Y&%%dk-rwrx@xJ^B%{u&MbBtWSsQaT}=Mrc3V_-GM@1XxU&GE5M za(x^>j{Xxg?VdkzoWEh<18H9S(>#Cs(H}tb{2fFeYq7J`)ARC4H1#+yp8~5{N9@yW z;MgbaF{e+1wONxmYO>x#XkG`?qNWL8Ynn*!J>Y$EC(ZSa8gB>dm)3X(Ts`*dvtYH@ zud(3I(bQdgW6I;3{4Vf#n!2^hqt?mbDKxLcXx4ft{oyogeFc5ouXls3HTXSs{GmGj zaKax2KUVUX>lfkXN$2_{xO(izm%(arF04T<_TWLVTCB}iz-m3T*uSrWAE22>dyM@W zSet9+T;yT? z4y<2VjSOztHqcYxM`7dek}ut`@aUg${+QTdOhU)@m&ku=j(ywaTN`q3Ea4yynxa z^%VLAC2pc$NVD$K>Epf`PK^zNzYkn(6Jv~H{(a$* zUp?~ghpsL1?+;ds{0D#|zj4j4reEYg5S->e2(DH<%Zu-tgW=}CDakznO+9iS0#=LM z6Ty+&nC4c~FLF-;r@0S>t8FJw+((n)=63&L++k?yk?U}9!J6uTj%fH1#-lM}XDh+!;eH{HKA{;@lkx zRx8e3@pp{raO+9)%s^9*xgQ0Nxf{c|tLaxi_vOU$I)~=mm(ia~bH3-%$NY|Fta_Sv zCYpN8Zx&e1`9&?qz@rxJ)}rQ|>T6kn=5;sBl7!D27w!0Th5fTkXMI2WuI&&ZRhvsmV zG{e=6Y5j87ZxOaL!Ol~gbCJjTodxz@cZ|0B^=rlEbr~(zuUg_3`js@-uZ=#Q1Ivk% zuHQuo_g=a*;jV8>$zu(cz^mx#`ksxZ9?ykWg3Y~?_vljaGMYZ>&oR$)>An0LskU=y zYStg*Z=9cYxSI3Y$~rjb W)nN5iG-J57YX0po*3CZ7$NvE}W$5Gp literal 9964 zcmaKx2ar|e8HNvRk*0JD2vQWKN)-@6`nrmOfE9IFw&2EPcip{9u^r9UO@xR~u)xX_ycdhC_`G_Rxmki*) zxAjZXXL!;dCP@Y-wdq*5ZqvG1J*DPZ^B2t4;i#l4_0wl4J~fQh$d1Og4h>ZphQZ4X zMSRz-x2~aX{rb9fYZ{umx*B>)d)iwX+S@u>8k*a>TbfF3J6rS{ zQ1EN(Xln0mE;`iUQLJA>M@%Z&(Am~e+St+xOuLlx>1=K7X(=UmRWrxjkI|0ReRl8B zK8U&I_J;13#^zIcO5MFprJ0d=X78LiOYo~oj>NBDRq|TVq30XM1ON*1x{4VN**-sinZVp52YjZM{AAd}J~jy`1y# z#*1xb-5XPtj50^se@${s9$#N-*5nSKVQ)=vJN>|xF<$TJ0)+CeB!%v%QEa%f^?>5C_ zyu52%jvtatL67*unhdfqQP=V${Ihx1CZ~Wmx3#y_b$9l5;fI>mk@qRqnStKtTU^jS zvRueI)A7&h3{AY4eQpiW*kRrFnaf$Sy+5@tPi-<6ylO{t@#v=g%KMc2tx4vi_xXZd zKRKV`8I^NQrJu`w?rlKd#NxNC?Cm>iay~usjON~$ zGFgsGz~q=CrEM!?)Vc~BwK~D&TGdG@)8{dyan~}&`W^&#HFh`dNNv^0J%{O4$-U^2 z_dakrZ*B4bx`%bUccyP18ILI`g$-=K4@&a5uMaVn=Te&3 zw%2#;>}+Z*wRLvXdv<(>49We|H83dUtCG?1yZ6jaYmU=rd-myN50lWFIIr0`tWIWS zc0QT29#2It_gIxIL~rZpNpmea%vO^u(N_3u^lY6OT$(YSQf?Q~KV^>~$6P zv$f~%FV}DbdUrmz-P(3Er~T}$h}&18-%-%(nEQHi)FOTlIE#Pd%paTT*}VYveeTy; z8@&oK-uo~K@dW|!Xlw%+ zCm_cVH;7T)JjPj9ZX&iJjOxZ~8^#zp{Vr%ex$*4sh}@>X`HUaQD6g;ae(#8Q_tJPd zeVv;&dG&FO#;O}TkuhQ?fsK_LkFV>jE&4tYET^BbzMms@D%e;#>rBgS^6Ik~ja4^x zE@Q;{;b^R!bN0O#>lZ!?avyCI@i~prwKZ0oIekBc{~2I&%2|IYqyF;7uPEroPrzp_ zqcP@i@6Ss4>^x4-^?Ep;(AR;@JC6EszwE_(qi){wb04`SxxO*y%wxUHh>!jkGMZC9 z>|4P75bX_&_UV~WZ_RbrEc|zZJ;&Ow$?fiY^t~5szVP1<_H#l1LyYDhh&ZqN8STY+ zJ;120-}@QuQ_gv*dxzyB?<2Xc|D#~@dJfF{G-KrT9;@s3Lq>DRnOEJrEa%pT`+PKg{R z=))q0exf=Tc@&vufTuEN$C*g@8~~5PE%aN_z56rhz9`WXc_pEmpPtjO8F=kFNAyz5yZ@5Qk> zC-39Xg|BzwxZHjd_0*3?m-DWe+jA!uYcn2fy_nYou$=d313A-o30U5E&*?-Y;wOV` zAmUE~%X#mNUxIlGSl;+ajOs6OUruK9-Z(yw?^MQVh>!N^x!v()?B3Z^5dDl3+lM{Q z2HTr8yc_C^^EICX_IFl!?cOE1n8RGK+(Y>L*)R{0^X$EV{{pa&J!qSc$k~I~UZW@Z z=xH%Hdh$N1+moNYr-NrR%G;CoSI)EK{X7FKR}YRpm!iwNu70;z29`74`o9~SN5?5PCoj$3~V3!^FA&I>mwg|t^il|(EukOaa+N1 zW02^r5zJ5ecO&!a^Eh?9Guse**T-|y%;@8t(bj~>d1l1XOAFXJ#pk?h(26b}bJ`9r zuI-iR`p8F~Hn4Ll=6MskeDr@6n4ffQ&7nQw+rgE6?0}PxK03g~we3XLM?Ug&fh+sC z8cu$Hz9-#ax#HUPp!1Wit$Fo{wJm||T^~PdVjUfy^|&b^l=~b z*@?)x4`S!$o!J9+{xR2Uz|Pfq`FV6L*gI1X)~A;7I&}FBX!Fsp2b;%OZQiBbjPk}T z!RH3>q};ZbQC*wy`@#Afw~tYMF}eM&bt8B+qcPf|uY+Lo75lmgT|VBgH-qI4AQ5*9 zIO43QuAl20>*J`thSBwMjQHEY`Jd_+O-tk6pZ-QNkWE$E(~gSj4i-;Qpb;@Au)ICE z?&{wA!HhQ{M^-xCm@aY*zu9&Aq&7`+q28Sg>76Vc<_!TJ?@dAN%n>uw0xAdytDg_yAZg*5-p?x#Io(5c-3Nb+kw9hr!xhGv}fnwvT{| zc|VFSw*`rF@iB1Z)gG}Q2WvC0YoZ>uPk@VgKZ!2)2oiZe1&+MhBlgo^ZRT}t)x-7~ zuycsDeH`q1J7@jX^$Gvag6%EtsP??ab~^>mh;TSntlx& zYpOkBzYf-BPvYpwerF>-vykZNRIokGVe}q&SDr+?7t!N4!1@(?{3g15oYQZC<>C%c z1b-WmckRrn?j5$5?;ukUd3#llUi*U=BR;1g_Bt;eGrR!22(jVm*HZmWz4*7%aCLaW4)r{sgff>&P2BknyL8HqV@A;%7M* zd-!woXAtAeYkfKWqW&+y#rn^p%WXlT{x88%Uq0&p3al;a{~9b8^?w77`p<%`FQ;GB z|1G#!|99wek0728=l^@e`rebM{fC^7+J8isi`stzM{RRjTTZ{I{bz8o_FvHDb|P^$ z{|dIY`xkM41ItIPzk{QeIjkk8U-5gs=k83zXDQ;jJA-i<;#phH7} zP5hfRC`re-WH3yUj7Wy0x@FVWO-pvv+Lm0n>=_1(P6}z9F=LXUNh7)e=_*ybb-WgN zH!_xd`EjW&cBX{v0y8AZVL(GNJZaffY}vfIWz!YKa&K>OM{Q4MrPx{Rt`ys0=a;~AT)K=}^AvYw=bv=C@+e^iAPiGIYjmb#%DOa=DS1GkE*iq~2 zFV`05yIS18bm>as3&|*J(5C&kS43^>@9IrEw{JPenZ6s6@!%{jU7Og^UD?Hq)*6va z0%tJ;yU6OF>0u(V^}L2;Ng>JFeGwl=w|5kMPANVCK2y7poCMeRwJ~`dJAHS( zrSkQaHsTwTF+9)>U8RnS2g~s7$T_O=XxBI{<64+=3EU8*0l5-y+y(EQqlI`mzkJ4f z3A|^Hz|D1-V$a2Ig?C@S-$U`t43B2+{Cv@0em=RF&tCKMiJPf~O6$+Wu>Le;0$_ zRz>Y`*w&TPH$L;3HvwC0j6T0HkvA3FJUL@sn?8B*EVOe5ZpOBjcrO0==vbrcKPTnu zd$usc3*a1q7h=24S@aTj!oECvv331`7$dhb!`=ru>)2!Z5kMQiEc3~Se+BlS9A1r` z&tDJr?9AVYb{(!&yd}e)LG-;9+j{!9W&ZjZHYDvCc5h;So->j4^Su#mZ!zC%u&pKU z`o!MnsQ6G}iUm-@DMRM?UKNKCEv&`~DEZSP}TcfZGn8 zw_*|Z&vkzUF-G1wZb6$DYxo$pbI4C*=1-%~#jCypiMmIy8LOi1QGlEhF?VA-0^gIO zy{nw}UTo)Osp;B&hIU?g<6Zath<{g&e*i5P@eg7fFCX%`|^4Ps+_SAmCs z$L8<|uy@`&8{ZZ0tltl@dp;KJ{>!=NV!t7B(dR_469t|DcAiCad?VU#jGW^Ay@<0g z-?cd0`%^HDXy=%Z-jBFGu{|EhVei4g9QI!OcPHY-{_P3unckYi*1t1{t$$Yz`)>R@ z6#4r7I~3S=a(DnfmcyRq@fN^m5 z^ABX$e9!wmi0^VUEAp+rH{;|TT!S>vGt_6EaqmaudcnTc4`iHkc)ns|_1%of?PpEm zk0LiCzHj3_f4PYNIJVqoF6u4lPatxN!=9oq?~49MxRZy_-c9vG%zZohlZZqAr_lP< zkK(@reH&t&ytX~KXP-gbEBo`li7&~{Ls%zJgd!9BcXpcC5)aB6dw9 z(BDMnAo8xsHzjATXZ9_`Gu%hK_}dwG6zzT9g8mL--U1}@zMJu{%JRMkHdlTmyEP2` zeZ=0^BKn=*H!E-LZRj5$*4~O(+q@ql^7bVD5po2PkC-20Tg&;4`3Yj2{7806?x%?J zFGTd;jogFS$1${i`8DtV!hRfKIxqp8_&a{vDBz@8~}=?tXOS{S!O#oJVY2ejeu&A3{6t?P%w4 zkN$tKG$2FYQnajzNj?}+gj^$wT8pVN3D_Aa{g9|TBES7r_Vik zoP4>cJ0ANWTzLhGAe>+9qWNh>DdpQM8KJMI9Y;)yf{nN1RVGXx*8G1U{ z-)P=l|0KPnj zH)3Cv;aK~*VCT)hxAVaAhqL>z2wQF;;+pJ9F81MkY`NI03$f+g&)A!bu$Lmvqd#&l z#@1(D*Cr0%CD{48E5UM}Z`6GzcGT4$xtC(=v#xt24&N&5eBCu*xz$M2U5g!c^+)bH zY<<>suf^fJ0y|%~1uVB7iMm%}M_v7qy8&CDbv-k2_%>m?H!tJ6Fbw@HL@xZ##&(V2 z-;6ES%J~gXbv;MkbKio#8u2c89?mIlMWW_&QvOWM=Yr+T--f;hF@Gy!zSx+^e;&5$ zh7d!|k3J&0!-J=U;|%lFt1mX9;o zg)O%e@i&P1yAgT!+M43{z5G(_(TKdgilbNeb{^trMxxiVuv={p51W`mqd1x?OiHU{T5~Qjt(vov z6SMf%h8*3{~q6j)3T-0+&(1RoaSDItQG6UvCQ6vY*e@`H0ODH??$VI=Sqo8IZ_ z(7Qkf*o^ng%qQ>YCtl81Z@lN}J#d7*sAZXu4%wI%IH&Wg@*7mJNZGZc$D`)KSnNMDOKGIz69&Gn4db4l$tOpzemSy&- z=#H>^(CzP3=5`OA+de!qZRX^dQNW-rb->zK0yIE-J4G+sXZXQM|S@~eR{;qOIv z&&^+lbRDiudsAk+chUD|bnEHgn)%)9sCOlL2k;z9Nau|D!|3M9yB=-tUDO-NbaSF! z1$`=K*U&u}>-(mRbv^dOXCduhKI(gqt#3X1d>LS@4g2L7x9?xNb~*RNbxr|e9R0o)~`+DHriyK{sAL>feXX$~J0yM}G*s|6$*c&elF^KY;G{$2c!3j42^&a<3~CFCPOPIzB0;4G}^S{&`*pD>+B=kUF4 z1Flcocj)&bZ0~{Ji?BDL`#s3peiOppjlL&mTiZgjs9;rFZ#wAx8U!8CO9@-gb2JmVn6f zBQHOd<@s$kSN;-q+r5>G_z85mnCC_4ay2kSFP^2}LV5E&3vKf~-)n&HaF7-IHu<+C z<>Vc{PxI1mJi2+ty%@;Vuzi~^$v9&@Pi`Y#d?%S5v?|qar*Ry^DklRnZzddivIM4YX#J?Houm^o_0&@1CZm-dkeAIml zx^=DbHl*V}%ptGsp1c)UM;U8*2YRfW+C4R{2h^ZExsK&b3Lnf1JCYy;63Uee<3vV~@Us?mdfp z;2s?S^6`82<&3)r8F}}jN1pR&8|OSXA)QC59dG-{da z+!Ej#+^6pW`H1;Gx-n;ii1`7!{Jr#J&X0hc|0^8KxF4fiOJCIb3A(jbgQ)dWbor?D zGjzH8LDc#=y7lx$tzV#9EAE?p{1V7VtzTu_1IVcLYjo@Bb59N<<)ZFy(VHN@-rr%% z$G7eG=yDH($om7jdHKEkBes0pxj&(sDIoC}-S%Io*qZ z|NG3so(&w`;6mc2Baa2%)8mlwf6yuD({lFdN7*Zmvd=rpUX!!^zosv$9$F(+=NC^nBfO zu;o18sJjY1>gtc&bJ6u#*FDk>-}&hIx))%}dAFnPh3HXNf8<_-uFtyewRZT{qUY;A z1zWBcMBR(gqptqQy(HCJbv?6|-FnySvE7^Nc;_>a8-QHoZbWyD;lC7JZiw@niM$M$ zBk#FyLiU3`;CZ+fZR4Wm7IfDaHJ^$uXZ}{?0O$jof%)3TME=vzT}Rxrr=$0Q9$>t- zF~<8I20 zaVGj>4bMi`XHV+Ull`6u9P>f+v;f_nPD1*AebZNgX&`!hF1m5~9-oITAMfY+=yF#A b|Nl1s1wh`tv!-_Z|5ZSr4dm@r+g|?> 2; + uint raw0 = ptcl[ix + 0]; + CmdJump s; + s.new_ref = raw0; + return s; +} + +void CmdJump_write(CmdJumpRef ref, CmdJump s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = s.new_ref; +} + uint Cmd_tag(CmdRef ref) { return ptcl[ref.offset >> 2]; } @@ -278,6 +306,10 @@ CmdSolid Cmd_Solid_read(CmdRef ref) { return CmdSolid_read(CmdSolidRef(ref.offset + 4)); } +CmdJump Cmd_Jump_read(CmdRef ref) { + return CmdJump_read(CmdJumpRef(ref.offset + 4)); +} + void Cmd_End_write(CmdRef ref) { ptcl[ref.offset >> 2] = Cmd_End; } @@ -317,6 +349,11 @@ void Cmd_Solid_write(CmdRef ref, CmdSolid s) { CmdSolid_write(CmdSolidRef(ref.offset + 4), s); } +void Cmd_Jump_write(CmdRef ref, CmdJump s) { + ptcl[ref.offset >> 2] = Cmd_Jump; + CmdJump_write(CmdJumpRef(ref.offset + 4), s); +} + void Cmd_Bail_write(CmdRef ref) { ptcl[ref.offset >> 2] = Cmd_Bail; } diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index f04462b..9ce2de6 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -22,4 +22,4 @@ #define TILE_WIDTH_PX 16 #define TILE_HEIGHT_PX 16 -#define PTCL_INITIAL_ALLOC 4096 +#define PTCL_INITIAL_ALLOC 1024 diff --git a/piet-gpu/shader/tilegroup.h b/piet-gpu/shader/tilegroup.h index f1d646f..64b27d3 100644 --- a/piet-gpu/shader/tilegroup.h +++ b/piet-gpu/shader/tilegroup.h @@ -4,6 +4,10 @@ struct InstanceRef { uint offset; }; +struct JumpRef { + uint offset; +}; + struct TileGroupRef { uint offset; }; @@ -19,8 +23,19 @@ InstanceRef Instance_index(InstanceRef ref, uint index) { return InstanceRef(ref.offset + index * Instance_size); } +struct Jump { + uint new_ref; +}; + +#define Jump_size 4 + +JumpRef Jump_index(JumpRef ref, uint index) { + return JumpRef(ref.offset + index * Jump_size); +} + #define TileGroup_Instance 0 -#define TileGroup_End 1 +#define TileGroup_Jump 1 +#define TileGroup_End 2 #define TileGroup_size 16 TileGroupRef TileGroup_index(TileGroupRef ref, uint index) { @@ -45,6 +60,19 @@ void Instance_write(InstanceRef ref, Instance s) { tilegroup[ix + 2] = floatBitsToUint(s.offset.y); } +Jump Jump_read(JumpRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = tilegroup[ix + 0]; + Jump s; + s.new_ref = raw0; + return s; +} + +void Jump_write(JumpRef ref, Jump s) { + uint ix = ref.offset >> 2; + tilegroup[ix + 0] = s.new_ref; +} + uint TileGroup_tag(TileGroupRef ref) { return tilegroup[ref.offset >> 2]; } @@ -53,11 +81,20 @@ Instance TileGroup_Instance_read(TileGroupRef ref) { return Instance_read(InstanceRef(ref.offset + 4)); } +Jump TileGroup_Jump_read(TileGroupRef ref) { + return Jump_read(JumpRef(ref.offset + 4)); +} + void TileGroup_Instance_write(TileGroupRef ref, Instance s) { tilegroup[ref.offset >> 2] = TileGroup_Instance; Instance_write(InstanceRef(ref.offset + 4), s); } +void TileGroup_Jump_write(TileGroupRef ref, Jump s) { + tilegroup[ref.offset >> 2] = TileGroup_Jump; + Jump_write(JumpRef(ref.offset + 4), s); +} + void TileGroup_End_write(TileGroupRef ref) { tilegroup[ref.offset >> 2] = TileGroup_End; } diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 6a243e9..703e156 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -20,7 +20,15 @@ const HEIGHT: usize = 1536; const TILE_W: usize = 16; const TILE_H: usize = 16; -const N_CIRCLES: usize = 3000; +const WIDTH_IN_TILEGROUPS: usize = 4; +const HEIGHT_IN_TILEGROUPS: usize = 96; +const TILEGROUP_INITIAL_ALLOC: usize = 1024; + +const WIDTH_IN_TILES: usize = 124; +const HEIGHT_IN_TILES: usize = 96; +const PTCL_INITIAL_ALLOC: usize = 1024; + +const N_CIRCLES: usize = 10_000; fn render_scene(rc: &mut impl RenderContext) { let mut rng = rand::thread_rng(); @@ -71,8 +79,7 @@ fn main() { .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .unwrap(); device.write_buffer(&scene_buf, &scene).unwrap(); - // These should only be on the host if we're going to examine them from Rust. - let tilegroup_buf = device.create_buffer(384 * 1024, dev).unwrap(); + let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); let ptcl_buf = device.create_buffer(12 * 1024 * 4096, dev).unwrap(); let image_buf = device .create_buffer((WIDTH * HEIGHT * 4) as u64, host) @@ -81,16 +88,34 @@ fn main() { .create_buffer((WIDTH * HEIGHT * 4) as u64, dev) .unwrap(); + let k1_alloc_buf_host = device.create_buffer(4, host).unwrap(); + let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_INITIAL_ALLOC; + device + .write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32]) + .unwrap(); let k1_code = include_bytes!("../shader/kernel1.spv"); - let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap(); + let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap(); let k1_ds = device - .create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf]) + .create_descriptor_set( + &k1_pipeline, + &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], + ) .unwrap(); + let k3_alloc_buf_host = device.create_buffer(4, host).unwrap(); + let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; + device + .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) + .unwrap(); let k3_code = include_bytes!("../shader/kernel3.spv"); - let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap(); + let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 4).unwrap(); let k3_ds = device - .create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf]) + .create_descriptor_set( + &k3_pipeline, + &[&scene_dev, &tilegroup_buf, &ptcl_buf, &k3_alloc_buf_dev], + ) .unwrap(); let k4_code = include_bytes!("../shader/kernel4.spv"); @@ -102,6 +127,8 @@ fn main() { let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.copy_buffer(&scene_buf, &scene_dev); + cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev); + cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev); cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.memory_barrier(); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 4e9a567..eb67132 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -238,7 +238,9 @@ fn flatten_shape( let scene_pt = to_scene_point(p); start_pt = Some(clone_scene_pt(&scene_pt)); if !points.is_empty() { - points.push(scene::Point { xy: [std::f32::NAN, std::f32::NAN ]}); + points.push(scene::Point { + xy: [std::f32::NAN, std::f32::NAN], + }); } last_pt = Some(clone_scene_pt(&scene_pt)); points.push(scene_pt); @@ -350,7 +352,5 @@ fn to_scene_point(point: Point) -> scene::Point { // TODO: allow #[derive(Clone)] in piet-gpu-derive. fn clone_scene_pt(p: &scene::Point) -> scene::Point { - scene::Point { - xy: p.xy - } + scene::Point { xy: p.xy } }