From 3a6428238b0865a6b0fa1763a67e10a268285773 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 15 May 2020 12:28:29 -0700 Subject: [PATCH] Start writing tiles This is the first checkpoint where it actually runs a pipeline end to end, though it's far from accurate. --- piet-gpu/bin/cli.rs | 5 +- piet-gpu/shader/coarse.comp | 35 +++++++++++- piet-gpu/shader/coarse.spv | Bin 17556 -> 21540 bytes piet-gpu/shader/kernel4.comp | 3 +- piet-gpu/shader/kernel4.spv | Bin 20180 -> 20172 bytes piet-gpu/src/lib.rs | 107 ++++++----------------------------- 6 files changed, 56 insertions(+), 94 deletions(-) diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 73f33ee..672b42d 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -41,7 +41,7 @@ fn main() -> Result<(), Error> { let fence = device.create_fence(false)?; let mut cmd_buf = device.create_cmd_buf()?; - let query_pool = device.create_query_pool(4)?; + let query_pool = device.create_query_pool(5)?; let mut ctx = PietGpuRenderContext::new(); render_scene(&mut ctx); @@ -62,10 +62,11 @@ fn main() -> Result<(), Error> { println!("Element kernel time: {:.3}ms", ts[0] * 1e3); println!("Binning kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3); println!("Coarse kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3); + println!("Render kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3); /* let mut data: Vec = Default::default(); - device.read_buffer(&renderer.bin_buf, &mut data).unwrap(); + device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap(); piet_gpu::dump_k1_data(&data); let mut data: Vec = Default::default(); diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 4e4ff19..da25ce4 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -46,6 +46,17 @@ shared uint sh_bitmaps[N_SLICE][N_TILE]; #define SX (1.0 / float(TILE_WIDTH_PX)) #define SY (1.0 / float(TILE_HEIGHT_PX)) +// Perhaps cmd_limit should be a global? This is a style question. +void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) { + if (cmd_ref.offset > cmd_limit) { + uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC); + CmdJump jump = CmdJump(new_cmd); + Cmd_Jump_write(cmd_ref, jump); + cmd_ref = CmdRef(new_cmd); + cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size; + } +} + void main() { // Could use either linear or 2d layouts for both dispatch and // invocations within the workgroup. We'll use variables to abstract. @@ -53,6 +64,13 @@ void main() { // Top left coordinates of this bin. vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y); uint th_ix = gl_LocalInvocationID.x; + + uint tile_x = N_TILE_X * gl_WorkGroupID.x + gl_LocalInvocationID.x % N_TILE_X; + uint tile_y = N_TILE_Y * gl_WorkGroupID.y + gl_LocalInvocationID.x / N_TILE_X; + uint tile_ix = tile_y * WIDTH_IN_TILES + tile_x; + CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); + uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; + uint wr_ix = 0; uint rd_ix = 0; uint first_el; @@ -172,6 +190,7 @@ void main() { y++; } } + barrier(); // Output elements for this tile, based on bitmaps. uint slice_ix = 0; @@ -193,13 +212,25 @@ void main() { // At this point, we read the element again from global memory. // If that turns out to be expensive, maybe we can pack it into // shared memory (or perhaps just the tag). - probe += 1; + ref = AnnotatedRef(element_ix * Annotated_size); + tag = Annotated_tag(ref); + + switch (tag) { + case Annotated_Fill: + case Annotated_Stroke: + // Note: we take advantage of the fact that fills and strokes + // have compatible layout. + AnnoFill fill = Annotated_Fill_read(ref); + alloc_cmd(cmd_ref, cmd_limit); + Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color)); + break; + } // clear LSB bitmap &= bitmap - 1; } rd_ix += N_TILE; + break; } while (wr_ix > rd_ix); - ptcl[bin_ix * N_TILE + th_ix] = probe; } diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index ded68da2d94dd24b0039ce453d730d2d17046c9b..ed005fcf53c3c20dd1d3a805994839aeea4d576a 100644 GIT binary patch literal 21540 zcma)@2bf+})rBvanGhgALhmH>-g^%%lmLOy+c23-k_l77WC97j7X=hW6cq&vD54ZW z0RWc=dSJT6>>;_SyH`_DyUXvF_qkwOBQZ|D4}e zHIA{>2$ZT?sv6noCr+9&X^T0-)3(@d`>k|XrfNFsvuw3^)j{7*>+k9vpsQwMxR^GE z@#(3RSl7xd5o#Y@88?kXTs*I=27OmJluHtX&cFZ)I5e}OzrCI9_$+&YUZhK z-czRz&ePsejjmRr&-iyFSx?@~Q2(}SMf{sNj;vOx#}0Q5_e^V4wKJp;p?y_{M1A6s*g`OE$H|4h%Aw#tjTId@03ZFBrSGv^MR z>Ox(on#aO*c2wKLiP!Mc=JlcG;Z4ACa`)Xp&pfU&^BZ05M4xesoQa4jOvAse+L1oF z?E>DXf7+D6zTU>Ra}G6+GPm8}@zdtDq2^Iu8y(!4CZNT)^NgY1;hvnVs7a{4h$9z! zc2DCruHUw5FYp|i=WQQ)+z*=DKbs)Uny7g!%(uPT4_^B<>L&LX=sANDO()Wr>L7US z^Nuv@2^twK%u=xOlXljxh*R(mxKJk)iDmkYL$6{mFJy>0Kg(#BqE0B~3C zdthVD)Pcii)gV2wyj&Z6Zj)~lepZt=EYUrF+napBymvSGf_d+2@&)rg*yQ!RZPlaj<~;4ybKu!sLtXv!=J@;~ysdf>o;+Rw zFRWdA^%|T@evW1HI+lA(MYDHJZwNTVBe;=deWSpMI~rWZ9oyvf`r4|Mi*e(MaqEL8 z4|n%ve!0hNG<)aVu-3$z7i0GXm+RiEg->eXhqUlx;9lG8J?81Cj)hNfzb@EAP0c(S z`?${Mc>Fx9^~vB_TsPre@G__F7TyP^w)JyjWaAxeMqlSKgF~lI92%TE`@nshYo3SS zy!l%-=C}wvJhL$bKC1Cf$Cycj-CcbL4xCOm+&ehn$YxHL>EFxCTjN>OUcD!Le$)QJ za890UxB*^Xe>c^9;q}*E-Hdj|P;zLmZh;R?3-73IgY#G(8t&}&kwx5&>K?V3wNU4F zKU&kHnacz4h4t#F9xBFk@&NPtesmG7`4BVy<4t{f@6ep#&YnJ$_Uea=#A~m9E*~14 z%4;F^w(2=}Zv%H!e`$`N+B@9eHJeTCTqAgqBv?ZZ5e-j3qNTQ-cfbKsYTX9jJB#Djkm|fd~?CIvEI*Z)!AsxXAJjB z@WTAts!QPWxC0vJQZU=QfiF|%PN9CedIPstSAqThU@6$ztE=G)thug1M|F6i=l*yCTA6znyv%)Si@m3X&uHO;E&Q~CPb60tHj>s(JAiESEL<1v zF_BuUF$~<^cqXo+fg^l;lQ#IP;4)sG6JR_x&=STH^T( zDC5b;6HB}6^WN=Ij#tZg@7d*e`FQ+C&|I(I%6><#R&&3b<$ecgKaW82RcXsJ%5md~ zZhY~I__&^NG>^5g_}$yo{A#WmTx}(6OVE#_t<1P3>E*^*i?uk9TH1JgM#I&OW9%{X zi9Hr^_78%w*f zHlRkHP=3#b#7eS^wZ|}E$Ex}_G(M8T)VbyYn!or5i+)#YxD)oadPw8 zfnH5N$L>O}-LbpUJ627b-z2f^Ufa~naSwXWi(|CuzYl%&I-1;C&8U6UcCGo$S~HHh&!Tzg&!eUhQ$6<6z{x}XjM^vmbKqV_;TM73yWv-W zJ7~tgiryNz2j$n++-pUD*L@w$`LsVmumAEjf4b&g3$FWBdUJB!uhGjL_d30`QFGmL z?_p}%7h?kBxwQeF-v;!?P&2OFdzo6+yEEL_;rqi!!B@a*3fP$5 z+sxaT$AKNMW=y&Fw=(95a7+tg&VsvFGDkl=_m2Uvu{|foAB6MYO5ON!@0V)YPph$G z5^ugnTKv7AD#d<1GOaD{go>24YqBlRaaqy}1`l>r#?z>w#{#^KG_~?Hz*!|VKN3>Vh)ZAx= z`PkopR#oTkF4ynhaQ8>@U4qRquHZ|<{k?Bvc8z;7Mb~X)V1E*mmG*HoWNiig<8Nx=H@EOxTDadk z8Sl04cTKp@9KUPAjsH}^H-!70Qri7aDfx3P+;5H2KCXrPJrTS4_$?7`K7LDtZwU8W zBHaA^mI&AGw?w#hzazr6`z;Z!-EWC-?S4yyYxldMhem{h}zx{p)H$T50!nOPT5bnE$-w*N$T!r4xhiKm8 zyhKAC)duecCwY<2VXUZj?J zE&=;|E7yD}TwQ4sKoF6=>?o>s?@LtDbT1)=n$uxe~5^MP1vgz-o?7y{`rv zPn-F8uT)Fi_kvsNbPbw%@_8TFnCcn#e(kg}pKIajm)7}w0IXK#a~<4x+RVp$wOaPe zhrnu?`@>+h>uK@%2-tOGjUNT;r*2H|_iFL~7+5VaKMqzqkv5tA<2(0_V0Fj)e2}|V zub)qV)qICe%umAAlf$RLYR0^a%FZ^*N&^_8DXC#6DljI(`Q3zPW=qQ|LcS zQ?rgQ9NE|p--H{}Z;dkUV`$nE z_gmmcX^uCpT%W}KHn`07JMh+ApFmT;t=+@u}Um|ck>ln17(092Ufa*fSuR=CD!#T`e$h#+J9Nw?H&6Z zea8N}w%I%OxAecGW$f>3yM6Nh1K1e3R{scAv#y>K`EO{(GB0t){uS)l@E5?@XI?jd zqy2@ZpEmPVOTOl2O#8(6JJ^^x&;J0cxmUAZHEWUenw$Ke^v1Jy{r{$ak(TxUr?%T? zzL&r^7yM=LEwtPNUjgf*epkJRUj?hrr&%NUzi7_q{Nlua4Qx$P-`Bxv{WSOgoAe&W z(e?&S%{b!3c?)cuTk9O&2CL;dT8#I+#L>p^O3gU_*V_4Ar@y%IO$WDb@w1m_e*5Rh z%ila~e&;2h#lg*U-MmMRg1g_<ftMc%{%9J6|g?)?vvH%Gq<)?X==_bPMkHs<=kt+?_qBB_^bt1&o#C- zSj}tRbLIEDTKva@%b4rHecq_YXI-$m;lJ~2=+HV$7s{%BzpCn+mpebTlJjVPO!R%V~zo2Q72^Bp<^tbPhDab|*zo&D1b?xyMMymEa!H>rz##?Jy<-|$nx=9N79;A;9A zPn(+Ye21~l_VT>vxCiv@cO2_a>;ZUrZU^DM(`2sMaDCJ><}|Q#=KK%A^;7r!57T?N zC$-I?shO8JaZU$2SNIw5vflIH`lzSg^TFz;&{E$s!PZw>Vx0w6cg=I@<*}UuR?9cv zd0@43XZ(~ZI{q{pA}zP z;L8epd5x3%6>!g?b-t3`<8^)&QkPTC>Rt5y#{6#j_PV$B>RH2kz*)o9V7Y7X?@jLo zkE2&Nx2x#YCa}V5=-)?EGxzKzf0ufHZHwQvwV(I>((eQ4+D7vp=D6#?=DmI$LwjO; z5bRu2TVi|&P1{A}Y>ex{#!+`)nCFLS^9Wqd{gGPFxR1getA0rx>tkT`8)&0hv++L; zHpfvk?E_TSzZcvH)|T@rPwqE?Qzzdst-Y~6fvzonpRE0?Q|b39bZx2Ar@^U{F|;Sf z&EV9jjByLPwygIvVB@H#PM-x^r*hn_XzCgFIk02ZQ>WX&>Sdii4^N%6r%qo0Ys+4j zC->XIsZ)8b?m*WTzdLKc*8OxBy0+Bmi(qr`ea0Bt6XR}h>Qv5k54yGs>OJr!uyNE= zr!RwB_tU*->Z#LxV8^PbPWOY=%Q}4pZjSB;?WxmO!P=~o_W^lwe*m01mFMa~bZzl_ zsP=2!PhUgVmO6bMoH`jpdty8cPMylR9zoZZIz0+Dj(Y0!4RGsz`X-us>hu`cvFfSQ zx4`OUogRm$PTEtaZ-cd2r_1T(-ebK7dav`|mwVk);InD^K1nb4UgzJ+zYAWEUS0bW z^lGWY(_ppMJ@-8{ZCUg8!L57a2WaY9^AEv}Q_q@z1XeHC{A0Ll_FkhsYyJsXn|WSB zFHfG&fRkr=zJ7|XEqVS7+ zJYEH7thO=_{j_K7Yhc$I{yMmf^#(k#^wpj`9H%{F-vqmc@VCI`n(wK%!TPBCPPG_+ zAN26uRU1-6^PVD3oDpDi$~lpHP8_eVeSF)&=Af^CUzNu;5}Y{xtyQkgcgay;--q)z zrX}HO{+k42`L|fL_>Ttb{}}f+$1ej{^WQ8yispV;7VN!D+u8LzW5DXh^zXTH*D{vg zwb=U`>GJd|&^)xSSljKho|V9^BYb7B=j2Z2pF(b{!1YnL#`5K8#&#}o*0U;EV3rPgm8SYPj_ zzAI?YJmbOTJnO*KtZmM-TKw0o{lnLTm-}#ixIXIc!wu;@tdX`2Xlm9-oH!eS%e^I! zZDX+YN-Z`4t0hNmYM#r)wwHUJoy)%53!bOe-!WXTfA`P(l(pL&?p{pIw}4x7uX}Cg zsNa@gZN~853do&rD|+X*ckgXOzb(x}`*yY6-WvJuaJHuD=QwfZ*a2+pT=P4E)y&sg z?+o@Zj<%g>YQ_;K&MsirX+6z-SFl>@sZA~ZyMfg*@9to={i%cV?g{pAUTu5O)SOrB zI(Zus8dvex^+t+jsI;G!529B~ z+#|r{?+sJn`egi(V13l{d&5y+^;5XV_{{hIaWuRet}U^T0joRj;q>y@js>e_&BuY& z&Lu{EZ#W*V?)bCO<&O9Fh7-YR+D-t=^LxWdV1I8=*WQnxTIN0(tmb|@1?=%U_cwL9 zzY}!Qx7WSrPoBBE!I^t1Se|$1Y2fu3r|#Tc^lFav-x*8?-$1>z^?>EcYbN-n+BO3$ z_xC+gt$M*H&^tz3;>`l98PB-##5)zNuQub#JzstU_JPgKdr$oP;p*1ke*--LR?A$o z!O49PEI0OP^!g^(A@Jdh*GHTC{uFxaXrKA!g5A^kTjA+owY;l)KRKiJ$@6<2Tuoo+ zcD!17t$EGKx1x7W?-!YKKDhiX?@YLwe~UDpHZ{llw`0dYOE1sgmet?n>f4Ftzi0IK zp=D?uqiH)5$M2M-X^vl(KL57yB=F(|pVq?t|0I_7xh?#x7JfkszpUU_fv>K4&ig5F z^D57K7hL^e)?&@OX=+nx*2J9EQo~-bTJ902g4Mh)`fTf?@2AbAIgj>?9RO=Hu4|Xa zHXB^V9fGU59};&CIB~US>@ZlHajl^|w$s67+<9;{zqJx~J~(l;XY83^ZN~Ln$YVPj zT*f^Yu67PBanA!MuJ(*QAFR!|o>O^j7lO;U7sJ&qq9yJn;KbFQv6q6i8P~lbkL_}B z8TVaqwJT_e`)+XJYR}j!!P<=L9+SuR99zD9pt*P5Kh7!FFEKv|_IxDfhrnvFUk|pfv40q>mN`EHc8q%F{3zHt^X?+o zFEMWbThqk+7+5X#kAtmE>^FkdGUrWT$J|KEoSy(YXRbB5e#Tsi{*yHAZ*d1q9`jkSTGnljYVrRZSS|OM+rVnGXxXcu2WPKp&)6@3wYhIxt30+lz-8P!;cBPR z68A1};%d*>FM_oh*P6*=y9Zpx{SsWQyvKYQp19gG_Fk|y<63)pZ1;o9xL<**okL5$ zUj-+w_KbZ1tj)NdLwRfufy=mGgR7PIn6JYVS9``j4Ay2`_kujOZ-A|N_QE&8?hp5Z z{&Icd{}|X>=9>8ySS|L)!PX@9Z-doh{|?x-#r_0XE%qnD<{0}^V719y5zEnkm*$+t zb*y%Cf13V#G;`1WK(4>Bz5c#W)9$q?&b2ubyavr*clb zV>I<#D?b6Nd9CD}KLgIW(Vq4G6s*mh#K~zX@Hm>sTD0V}HrSlT(|f-dMgJUaaawZx z1z5i_$6vzLbAA2_tmgjl{n7Ey($qcg&(%DCPy01|8JfDe%9E?LU61Cm4lTK^3pUsF z>CJU4{qwXk*WbbQD|7ulTs^t|0j!o>$AJGxQ#V&*%JWYDC-6!%b#s*`*A2j%(L6S$ znd^r1o78wC`b}x(y*Yhq`d6?%Wu7m<)pIWY26k+|k^T-=Gw-bBA7IC6v+m~ePnx>9 zij(UW;H_yMThfy2R$%knhCa`X7s1vw{FQ<)wiy35gQh*}`48AQ<$7L%t7jj-3|4a= zTW9A`%X;4gt7UC(fz@WwvhUvpzd>^z?HTLGur}kGt30+guv&6$hnv4K^q1?SzcuQp z?bc76`fZPGCz{6&H0!rrqi@!4XK?B@ig9KAMz`>>1z!oga?O+D67U^~XO3CNl4$Cw z=Tcy`)YCcD;=eRlE%jUmtX9@@S$OKHJ!8j!wOJo?m&djoxQx3zTYaZY+Z7mR>g;dtTN6JN`Pb^U3vbyytSw z+U|K4=RA+VHi71`2hH=mEB&4|-kp9gn%B(U^f~9_h*4e>N$t&g4J>k zjiHt`ZUk1#Iouem);x#JZ~0B&=2OnIDVloD$7bLSY5Ew$dZ_7Fe#?%)eh|%LBF(k$ zOMgI(_oqLQ=GqUY&)PR9PPz6i;EpMC+7eAYYu^g2mbDv0Eo?A|$wX8n$#w|?%eNyJJ0^f{#F zS>rx%W0dvV7fn6&+YhW()^8#__0#Tp)cmbEu@3;Z#y${DJ@q>Xtd{!iLoNry)s1QX za_hGrwnM?LQ=4m%r+$Zly}lfyt+jq9VDmVZmiir6W8Wo?r&+%f>2vKJL!7dHCl}oP z+f{Ju+gxr&{u5Bmw= z<7v+IXR;nie-d2X=ZDw!iC}Bzy0j&SPOzHyd+U4(SWREoEZ27uwy9v(q|NoqbAG0Q z_19)C$;FSxaL#A>a%FZu(oRc L&X~H{XMg-3Y(pb` literal 17556 zcma)@37nQy`G#MZ8AK&qaKR116*t_)J6boW)N^m%`M9^%Pg}J z+f36I+smw6+BD13N-LMj%J$7lE9<(x@4eyh_3!ulzj{3PeLv56&U2n~-em@C8@=-e zRW+&_%Rf(!t{T^7)o7He+PE6i=sV|}JZIYQ$iis{9lW0oi*uY{=R|U zu7!O=y*(p+XZ7kgw(#p4=;>dvuyp9aqs*a$xmj2>b6{X_4zW(|U3^mSq6)R6p_|g+ zqD8~KBNa*qYhBN`k-3`Ja#rn1?aY*D*XYG`N%;JF4eVoAu&!4fS>}oQlJW zo{@dl&t;z#`|p1QW7-&1RYN3a9-G$VkL&C2H(~Qt^D6V505_g~+Q#sYn%B_c1>Ide zgZ+a;%{^8x)+zlNPv1G=h+Vb<~-x8$?J`E7ym2MmI%#UcPwJ;vR(`q_Wz5q z^V%3=s@=fH^bO1&7#`^!=;>{&*nLs=TDRWu)!y*Jx2tbpVehJ}xiAo6^K=RfmI{>uj%Ps&}t6b>fbxj-_9F&W>5Zoo)_IUEG~B2zJf06uEqE%%LZi zrEBn$n*4z9)0(_d2K~=y@`GYu*yN4n8+$2ym?pl6J~?03^s94judZnFwew!p zrpedNdtH;)^R`ttz?<{5S6=`x?;h%2MsJSytjF7`2jI!$A@I7|wO8MVQ~qJg=Dx|j z7NFU?j)w(Y;>YN-z9+$n`xLm0`@1Hu*Vk74W$n23>R({u*XNe^3g`8quZ~|YUVAlHJ~X(1R|@vF>J)fi19wzi&G8HRMwWFiXA_+3 ztfn^1v!I73vMHc5-h0s~4c7MRzLxR#x9|tnu#@}2CvJ%t$GH&iaXXg&w@FM4g3#vPPY0# z)f>3I8pQ{J-!3c#TYEJczAj#SH3Pn=erIc+xt_hPS_$V--iW&zyt;uqs&l~X+i-th z&)V4MH??Km!_BjMJ-p%2JmQV{*Pt~Ii!sMkcfhlL)?C-1qq?Wib3gw8ZQe*vf1V3+ zujOd=uKkf(b1#1ap4s0&*t7Qe+tl>!RqUOP9ts*^d? z+GzUEqTgw)t;wep{CnUsUY=!TJhjC0xmL!L8;>n*%;$5-@4Rhm?su)+?-cX-K5HrG zOO49;)H0v-D(90sU+ho7jlT`edRSAhGM-xet>sQlmqtEvw@ zC)2zpVevb;sri*%HMrVV*fyk}Oxv1q8_~;+Gl|^HQ!Q-@KI7o(#xeG$^ohM0*tlxO z+nnBb?)@bGJAqzbbz^NoujV*oO{7mO>tif6ecc~z>hekSsiS-+xUsYwYd8AD+8t~x zwaNHSt-1CotaFdrrk^&)PorLV*XT!r5}m;BE)cMp>HbKsoOe}i4S>(sv*)yz@d_zhlb z+DF&8tm8&#`o-U8h|(5hx*Z_%?gE~HzCCev0~^C!jJrGB+Nv8z?zf7X_PuKCnArU; z(H`zMNy+^dk!Su{aPQmjIq-jTna?Z0#&#|8SJJCF#`({nHJ$ilB9WVFWpd9}v_}=*F|97zG zqIv(dR~^Ka`+PDV`^`L9)dfe%^;-=0oFw1#;FAk}A$&*p9-JQc`6jq$H}4G3uI~o9 zIXq0yzYWct<-Q-(_GWdSIo}WNf%m4(7`_i&yD@%R`>Pp4?mK~hdDe6^dTZe2nf^P9 z_o%);+x#wX)}eWi`u!cQeTx?EH+E_F8@uFwV~2aM`7IspGtY17aO2M``0jAOjZ3@V z#w90@AqrC zcE4Z4wLj6q{a%gTcbDI)C4ai$o?*XNW0(8QT5`WzOa9M-`-~pNL5bb@Mi+byd`7{w z&n&p}`Td&la=&52U7z2vCHEV)WQ-nxOEMiqNyj&W?R-vzu`X#?DZ_`)wTmo&1a7|`!pGBO|!-+a5dw^ zXD7IGWlwen>!+S^yMonypPH}FAhpEU18j`&J;C}|7vD2`)27n&bG%%i_)i1tKaces zNN)Rr)%5jwCO?c=epBoZK9pWvyU#zh#5xdcF3#=!aS&J^zxm>GFkC(P9Rl{bKNqVFp;u z^Ws@J7VPC&&~^;%MKsTXICH%KY>l(e$AQ&m(X9J!#OMU8o2%~$xpnv3ZZ?=7$s78K zXx{fPL{s;2%uKM_Gt6yX=3p+d&1tdell)!`c6@4g0{BSnG{?*JF?PZlC#H1)f;&rQmI7uc^;SKUg32+v+pC46MEp%^JxU(VWlu z#fd)vwkE0XAXqKW!ZX2M#?iK%re+**;tYX}b90@;Fjy`3(F$MX5HdvFVFn;kCT@_=h;@nlg~L|^T~NX7wmagkI#8vb>};qULM=4!H!Q|&IhZd zE*FBmtc$h_XlkxUoH(xm8~eAs^}VMr0=qWf_pVjDYsy&r|Fx!zv6VSp0#{2;mw~;^ zN!z6~HFFXtr`LkZ+FTB|HtO+t9aug5^-*TJXL)Z_C`u)5=~rI*L{F0fka_HMA6dvD#=fKxYZ*VEohvu@(VxdB|}@gBH) zrtZA*IW*6bF&%H5jDH{4@!{_Wn^*3S4}jeV{fsBqC;lG<>z{9ko4{)N-b^pgH^hg) zenY5hzmZ-ov2FopEq+7X3fHH6*MAtUp8P%nc26B^Ou0UZ`B88)X7d~3V{l`(enZ@b zrY&(l4t75sZ(O-PiTep~S+`HZtwYY{r@(5S&AjJ74OTbaXXxd5&wm!|dtN>7`OksX zy^`1G;qI^V8%M5RxxenSJolNsJomi)|9X~u0h?#t*tgTmv*tU%YB{rC1gquDei`iL zb4J^jXlkB0u{kWRxq9N>4fftmo$djvnZsT5^7wuQT-N=oaC6G@_-kN&)V+_sL7$wo zeVwLePGaNw4faj2b2;aI^zzL4EwFQHyPsZu7*B1-eH-lWUL2!Mp9kpGv$x*?ySM7u z+wX$ay&Q8dSS@q@06eka4};Ai=j#!$KI)g(@BANv)gPoK&X2&x&iVN<`1>?{omZ}p zdy~4@XZ)jJ>l^+U*u0X*<6t%YjHgY_cuVN5v%S2x=AHq4pP={bB=%3><=*}jz6~vN z{S2&+dd55ncFyeo&%yesyZ^tW_wr0?`vpzSyu^v~E3k8g{~BD@`!{fX)Kl-@g4L~e z>iave_0^VGzXz+k=BMc8vHcOOmY@0l3|9LS&HG^@x&H;M?)X2@%N?)nuV6K8Pt(gi zcYag+4ZJ}OR9Cgoyd5))TNZ@kr zO=>;kHibJ@{gOJ?=3sSyXFHBH8-D`W9LLhM4^UZukGlm}TlQ0)+_wa$PWhdZv9?0j z7Qd})KkHQbO+?q0I&GsLKGw+?+7n}2Z8Ymt#@G%`Th_Zh*f{E`(+*(kRF0d3rk-&- zf*q@#I!y+vmvx!~Po1=Uf|aAv^Sc1>a-8ovFfSQG_ZPEr+wk(=y}kd zI_(G6W}SQv$dmj2;MA$ys{_!r#qYq{uk}0~gsv@hI#|EDPR7ul7>8)1wVtQxXxdVz zL&3&TPn`|}x1OiN(bQ9?BfySTPo0hgtCw{;3Z6P?Po0hiYqL&1BjrA0{nuxm&%QkC zjzP<_ZYEgnvu*-wKNh?Ty}Gex(5s~mF955xp1E0Q+Op>3z^!Ma6HPs9J|65i^{ja| zSiM~H3*oNWXN~r(`9)xD=IOgbo;>G(lV^E0UW~3Sd7c1n-J7{+>dA8+*m3H~^F*+E zndeFHv<}gdh$FC>^Sx0`7*G2 zndj+n^K>5V$@Ar4ZRY8{CQqKP04GoXtU7tV5?x#JJOkWX&n`6e3$4>8m}l9H%{F`@!yg@>mAXSZ!q<`f1PD0kG=~AB2~&mctWEU+u}maoRKXOt5PR zAA*}}ex4eJ>!a>B)e3qqzg@MB(A0dUh!f{5usLN<m;UcilGHvJ9^IQy8 zH|F{Da@TSRz3aC3cchomzn12u{qovwpY^;B>^j0<4|Y#J#{ApSUjf%g-5Sd;r5W3~ z#97Z9z^*Z|-Uv3f-|epDO0b%HAD=gYt+7A9=yMfVKXvczy~yFs;4XUOYtMXd0lWS` z@?C1ZuLkSu^VIJW?V0DT;Bua8;A+;^I%-pk|J%T7;co|*=kQv%KI)#s>*&3#k+yfx z)U1&>ao!0o&z3y4cY&=}YVmHcT5{B;=3XYYz1)3vF8lHj&dgrtEtbKs~Mw*xQ57u^jYjP9)`)T?)PMkSz2Ct#z z9{-S-X1><;RrqeKPlAmb{wZ)->rcb2wSL;-_ZhIZ)LI_fXTfESJ_lDzjcx}gS8boC zshO+TT-_hJx;!R?XD#p9JHafc@%PHK|}YKi+5@HPYr|0-OcjQ<)~ANBlx;p<@a2N~x(-si&yl_bR?Bys_szF!pFGmnwNhEbvk~2d;9-Znd>I>`8!UBgEuJn%oaYUg`d>IPix_4wD5%m_wUp! zt+{8>pJ$JvnU`zIx{t2)JMr? zeGyojab3GSwikoTxO3rZp5w%w2Tok=8G9mFn{ln7Jhqd;W!#s*)%^J(abF5fT823ym_JO`{6`?+9i6Z?4$yl&1{!yTiZInM_>XYMt*e#YFG{sNlz=Xj2Y z^URq5-iqe61@b z(^7|P!HKIqW8VSRW?c7B9@{&?W!!hc)#lR@_ub&c)t<4}gS8pgvmlS{Jz#5|v+!Q9 z=fkt0zg(aAzYlCJbI-gVtQPwRz}6)88^LO^e-P~2V!sKj7W>U$bBz5%V6}PN5u4NB zLUT^zI##>6-%9^snz`qBAlKj6-hUsVY4_d~=iVFx-k#>QEzNs#8~W{N-kUqnr#`pU zy7fuU9|x=FUik!A&3h$#^GR^_MtjD73arhX#K~!6@MN0TBwBLX5o}IV=zU&{rT+qL z16p$YELguX$IrplbANswtTvzKpMg34cAC2TZcKUpjOY&Vc$&Jo%9E?L-G%106D_&! z3^vzY>CM&W-95B2*Du5MD|5XQuAW@)0;|oZnd_$Xchl6()tK`9j`u6zt!V1zDo?Jv zf%m3)?Ljlw-RbwN@l^V~Xy&~SeQNr3us&s;-+-%UFYg6AHb48^2UeR;%UZq(cAU2S zS^WKAb#oOb*JV^4NY4F5~_Iu66<~ zasLQTTzm+{J6#tjohH}4SLz^Z4 { coarse_alloc_buf_host: D::Buffer, coarse_alloc_buf_dev: D::Buffer, - /* - k1_alloc_buf_host: D::Buffer, - k1_alloc_buf_dev: D::Buffer, - k2s_alloc_buf_host: D::Buffer, - k2s_alloc_buf_dev: D::Buffer, - k2f_alloc_buf_host: D::Buffer, - k2f_alloc_buf_dev: D::Buffer, - k3_alloc_buf_host: D::Buffer, - k3_alloc_buf_dev: D::Buffer, - tilegroup_buf: D::Buffer, - ptcl_buf: D::Buffer, - - k1_pipeline: D::Pipeline, - k1_ds: D::DescriptorSet, - k2s_pipeline: D::Pipeline, - k2s_ds: D::DescriptorSet, - k2f_pipeline: D::Pipeline, - k2f_ds: D::DescriptorSet, - k3_pipeline: D::Pipeline, - k3_ds: D::DescriptorSet, k4_pipeline: D::Pipeline, k4_ds: D::DescriptorSet, - */ + n_elements: usize, } @@ -213,10 +195,10 @@ impl Renderer { let coarse_alloc_buf_host = device.create_buffer(4, host)?; let coarse_alloc_buf_dev = device.create_buffer(4, dev)?; - let coarse_alloc_start = 256 * 64 * N_WG; + let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; device .write_buffer(&coarse_alloc_buf_host, &[ - coarse_alloc_start, + coarse_alloc_start as u32, ]) ?; let coarse_code = include_bytes!("../shader/coarse.spv"); @@ -227,72 +209,11 @@ impl Renderer { &[], )?; - /* - let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?; - let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?; + // These will probably be combined with the ptcl buf, as they're all written by the + // same kernel now. let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?; - let k1_alloc_buf_host = device.create_buffer(4, host)?; - let k1_alloc_buf_dev = device.create_buffer(4, dev)?; - let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE; - device.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])?; - let k1_code = include_bytes!("../shader/kernel1.spv"); - let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3, 0)?; - let k1_ds = device.create_descriptor_set( - &k1_pipeline, - &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], - &[], - )?; - - let k2s_alloc_buf_host = device.create_buffer(4, host)?; - let k2s_alloc_buf_dev = device.create_buffer(4, dev)?; - let k2s_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE; - device.write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32])?; - let k2s_code = include_bytes!("../shader/kernel2s.spv"); - let k2s_pipeline = device.create_simple_compute_pipeline(k2s_code, 4, 0)?; - let k2s_ds = device.create_descriptor_set( - &k2s_pipeline, - &[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev], - &[], - )?; - - let k2f_alloc_buf_host = device.create_buffer(4, host)?; - let k2f_alloc_buf_dev = device.create_buffer(4, dev)?; - let k2f_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE; - device.write_buffer(&k2f_alloc_buf_host, &[k2f_alloc_start as u32])?; - let k2f_code = include_bytes!("../shader/kernel2f.spv"); - let k2f_pipeline = device.create_simple_compute_pipeline(k2f_code, 4, 0)?; - let k2f_ds = device.create_descriptor_set( - &k2f_pipeline, - &[ - &scene_dev, - &tilegroup_buf, - &fill_seg_buf, - &k2f_alloc_buf_dev, - ], - &[], - )?; - - let k3_alloc_buf_host = device.create_buffer(4, host)?; - let k3_alloc_buf_dev = device.create_buffer(4, dev)?; - let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; - device.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])?; - let k3_code = include_bytes!("../shader/kernel3.spv"); - let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6, 0)?; - let k3_ds = device.create_descriptor_set( - &k3_pipeline, - &[ - &scene_dev, - &tilegroup_buf, - &segment_buf, - &fill_seg_buf, - &ptcl_buf, - &k3_alloc_buf_dev, - ], - &[], - )?; - let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?; let k4_ds = device.create_descriptor_set( @@ -300,7 +221,6 @@ impl Renderer { &[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&image_dev], )?; - */ Ok(Renderer { scene_buf, @@ -312,6 +232,8 @@ impl Renderer { bin_ds, coarse_pipeline, coarse_ds, + k4_pipeline, + k4_ds, state_buf, anno_buf, bin_buf, @@ -339,7 +261,7 @@ impl Renderer { cmd_buf.dispatch( &self.el_pipeline, &self.el_ds, - ((self.n_elements / 128) as u32, 1, 1), + (((self.n_elements + 127) / 128) as u32, 1, 1), ); cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); @@ -357,6 +279,13 @@ impl Renderer { ); cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.memory_barrier(); + cmd_buf.dispatch( + &self.k4_pipeline, + &self.k4_ds, + ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), + ); + cmd_buf.write_timestamp(&query_pool, 4); + cmd_buf.memory_barrier(); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); } }