From 06cad48dcaa13789fc1c6bdb26a715e8a9853c70 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 14 May 2020 17:06:45 -0700 Subject: [PATCH] Start output stage in coarse pass Still very much WIP but it's progress. --- piet-gpu/shader/coarse.comp | 82 ++++++++++++++++++++++++++++++++++-- piet-gpu/shader/coarse.spv | Bin 9136 -> 17556 bytes 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 3ca7b5f..4e4ff19 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -42,10 +42,16 @@ shared uint sh_elements_ref; shared uint sh_bitmaps[N_SLICE][N_TILE]; +// scale factors useful for converting coordinates to tiles +#define SX (1.0 / float(TILE_WIDTH_PX)) +#define SY (1.0 / float(TILE_HEIGHT_PX)) + void main() { // Could use either linear or 2d layouts for both dispatch and // invocations within the workgroup. We'll use variables to abstract. uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x; + // Top left coordinates of this bin. + vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y); uint th_ix = gl_LocalInvocationID.x; uint wr_ix = 0; uint rd_ix = 0; @@ -109,20 +115,90 @@ void main() { if (th_ix < chunk_n) { uint el = BinInstance_read(BinInstance_index(inst_ref, th_ix)).element_ix; sh_elements[(wr_ix + th_ix) % N_RINGBUF] = el; - probe = el; } wr_ix += chunk_n; } // We've done the merge and filled the buffer. + + // Read one element, compute coverage. uint tag = Annotated_Nop; AnnotatedRef ref; if (th_ix + rd_ix < wr_ix) { - uint element_ix = (sh_elements[rd_ix] + th_ix) % N_RINGBUF; + uint element_ix = sh_elements[(rd_ix + th_ix) % N_RINGBUF]; ref = AnnotatedRef(element_ix * Annotated_size); tag = Annotated_tag(ref); - probe = tag; } + + int x0 = 0, y0 = 0, x1 = 0, y1 = 0; + switch (tag) { + case Annotated_Line: + AnnoLineSeg line = Annotated_Line_read(ref); + x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x - xy0.x) * SX)); + y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y - xy0.y) * SY)); + x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x - xy0.x) * SX)); + y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y - xy0.y) * SY)); + break; + case Annotated_Fill: + case Annotated_Stroke: + // Note: we take advantage of the fact that fills and strokes + // have compatible layout. + AnnoFill fill = Annotated_Fill_read(ref); + x0 = int(floor((fill.bbox.x - xy0.x) * SX)); + y0 = int(floor((fill.bbox.y - xy0.y) * SY)); + x1 = int(ceil((fill.bbox.z - xy0.x) * SX)); + y1 = int(ceil((fill.bbox.w - xy0.y) * SY)); + break; + } + // At this point, we run an iterator over the coverage area, + // trying to keep divergence low. + // Right now, it's just a bbox, but we'll get finer with + // segments. + x0 = clamp(x0, 0, N_TILE_X); + x1 = clamp(x1, x0, N_TILE_X); + y0 = clamp(y0, 0, N_TILE_Y); + y1 = clamp(y1, y0, N_TILE_Y); + // This loop draws a rectangle to the coverage bitmasks. For + // line segments, draw more precisely. + if (x0 == x1) y1 = y0; + int x = x0, y = y0; + uint my_slice = th_ix / 32; + uint my_mask = 1 << (th_ix & 31); + while (y < y1) { + atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask); + x++; + if (x == x1) { + x = x0; + y++; + } + } + + // Output elements for this tile, based on bitmaps. + uint slice_ix = 0; + uint bitmap = sh_bitmaps[0][th_ix]; + while (true) { + if (bitmap == 0) { + slice_ix++; + if (slice_ix == N_SLICE) { + break; + } + bitmap = sh_bitmaps[slice_ix][th_ix]; + if (bitmap == 0) { + continue; + } + } + uint element_ref_ix = slice_ix * 32 + findLSB(bitmap); + uint element_ix = sh_elements[(rd_ix + element_ref_ix) % N_RINGBUF]; + + // At this point, we read the element again from global memory. + // If that turns out to be expensive, maybe we can pack it into + // shared memory (or perhaps just the tag). + probe += 1; + + // clear LSB + bitmap &= bitmap - 1; + } + rd_ix += N_TILE; } while (wr_ix > rd_ix); ptcl[bin_ix * N_TILE + th_ix] = probe; diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 6bd6f61d36ec451435b82a1cfccd01f9752b4d9f..ded68da2d94dd24b0039ce453d730d2d17046c9b 100644 GIT binary patch literal 17556 zcma)@37nQy`G#MZ8AK&qaKR116*t_)J6boW)N^m%`M9^%Pg}J z+f36I+smw6+BD13N-LMj%J$7lE9<(x@4eyh_3!ulzj{3PeLv56&U2n~-em@C8@=-e zRW+&_%Rf(!t{T^7)o7He+PE6i=sV|}JZIYQ$iis{9lW0oi*uY{=R|U zu7!O=y*(p+XZ7kgw(#p4=;>dvuyp9aqs*a$xmj2>b6{X_4zW(|U3^mSq6)R6p_|g+ zqD8~KBNa*qYhBN`k-3`Ja#rn1?aY*D*XYG`N%;JF4eVoAu&!4fS>}oQlJW zo{@dl&t;z#`|p1QW7-&1RYN3a9-G$VkL&C2H(~Qt^D6V505_g~+Q#sYn%B_c1>Ide zgZ+a;%{^8x)+zlNPv1G=h+Vb<~-x8$?J`E7ym2MmI%#UcPwJ;vR(`q_Wz5q z^V%3=s@=fH^bO1&7#`^!=;>{&*nLs=TDRWu)!y*Jx2tbpVehJ}xiAo6^K=RfmI{>uj%Ps&}t6b>fbxj-_9F&W>5Zoo)_IUEG~B2zJf06uEqE%%LZi zrEBn$n*4z9)0(_d2K~=y@`GYu*yN4n8+$2ym?pl6J~?03^s94judZnFwew!p zrpedNdtH;)^R`ttz?<{5S6=`x?;h%2MsJSytjF7`2jI!$A@I7|wO8MVQ~qJg=Dx|j z7NFU?j)w(Y;>YN-z9+$n`xLm0`@1Hu*Vk74W$n23>R({u*XNe^3g`8quZ~|YUVAlHJ~X(1R|@vF>J)fi19wzi&G8HRMwWFiXA_+3 ztfn^1v!I73vMHc5-h0s~4c7MRzLxR#x9|tnu#@}2CvJ%t$GH&iaXXg&w@FM4g3#vPPY0# z)f>3I8pQ{J-!3c#TYEJczAj#SH3Pn=erIc+xt_hPS_$V--iW&zyt;uqs&l~X+i-th z&)V4MH??Km!_BjMJ-p%2JmQV{*Pt~Ii!sMkcfhlL)?C-1qq?Wib3gw8ZQe*vf1V3+ zujOd=uKkf(b1#1ap4s0&*t7Qe+tl>!RqUOP9ts*^d? z+GzUEqTgw)t;wep{CnUsUY=!TJhjC0xmL!L8;>n*%;$5-@4Rhm?su)+?-cX-K5HrG zOO49;)H0v-D(90sU+ho7jlT`edRSAhGM-xet>sQlmqtEvw@ zC)2zpVevb;sri*%HMrVV*fyk}Oxv1q8_~;+Gl|^HQ!Q-@KI7o(#xeG$^ohM0*tlxO z+nnBb?)@bGJAqzbbz^NoujV*oO{7mO>tif6ecc~z>hekSsiS-+xUsYwYd8AD+8t~x zwaNHSt-1CotaFdrrk^&)PorLV*XT!r5}m;BE)cMp>HbKsoOe}i4S>(sv*)yz@d_zhlb z+DF&8tm8&#`o-U8h|(5hx*Z_%?gE~HzCCev0~^C!jJrGB+Nv8z?zf7X_PuKCnArU; z(H`zMNy+^dk!Su{aPQmjIq-jTna?Z0#&#|8SJJCF#`({nHJ$ilB9WVFWpd9}v_}=*F|97zG zqIv(dR~^Ka`+PDV`^`L9)dfe%^;-=0oFw1#;FAk}A$&*p9-JQc`6jq$H}4G3uI~o9 zIXq0yzYWct<-Q-(_GWdSIo}WNf%m4(7`_i&yD@%R`>Pp4?mK~hdDe6^dTZe2nf^P9 z_o%);+x#wX)}eWi`u!cQeTx?EH+E_F8@uFwV~2aM`7IspGtY17aO2M``0jAOjZ3@V z#w90@AqrC zcE4Z4wLj6q{a%gTcbDI)C4ai$o?*XNW0(8QT5`WzOa9M-`-~pNL5bb@Mi+byd`7{w z&n&p}`Td&la=&52U7z2vCHEV)WQ-nxOEMiqNyj&W?R-vzu`X#?DZ_`)wTmo&1a7|`!pGBO|!-+a5dw^ zXD7IGWlwen>!+S^yMonypPH}FAhpEU18j`&J;C}|7vD2`)27n&bG%%i_)i1tKaces zNN)Rr)%5jwCO?c=epBoZK9pWvyU#zh#5xdcF3#=!aS&J^zxm>GFkC(P9Rl{bKNqVFp;u z^Ws@J7VPC&&~^;%MKsTXICH%KY>l(e$AQ&m(X9J!#OMU8o2%~$xpnv3ZZ?=7$s78K zXx{fPL{s;2%uKM_Gt6yX=3p+d&1tdell)!`c6@4g0{BSnG{?*JF?PZlC#H1)f;&rQmI7uc^;SKUg32+v+pC46MEp%^JxU(VWlu z#fd)vwkE0XAXqKW!ZX2M#?iK%re+**;tYX}b90@;Fjy`3(F$MX5HdvFVFn;kCT@_=h;@nlg~L|^T~NX7wmagkI#8vb>};qULM=4!H!Q|&IhZd zE*FBmtc$h_XlkxUoH(xm8~eAs^}VMr0=qWf_pVjDYsy&r|Fx!zv6VSp0#{2;mw~;^ zN!z6~HFFXtr`LkZ+FTB|HtO+t9aug5^-*TJXL)Z_C`u)5=~rI*L{F0fka_HMA6dvD#=fKxYZ*VEohvu@(VxdB|}@gBH) zrtZA*IW*6bF&%H5jDH{4@!{_Wn^*3S4}jeV{fsBqC;lG<>z{9ko4{)N-b^pgH^hg) zenY5hzmZ-ov2FopEq+7X3fHH6*MAtUp8P%nc26B^Ou0UZ`B88)X7d~3V{l`(enZ@b zrY&(l4t75sZ(O-PiTep~S+`HZtwYY{r@(5S&AjJ74OTbaXXxd5&wm!|dtN>7`OksX zy^`1G;qI^V8%M5RxxenSJolNsJomi)|9X~u0h?#t*tgTmv*tU%YB{rC1gquDei`iL zb4J^jXlkB0u{kWRxq9N>4fftmo$djvnZsT5^7wuQT-N=oaC6G@_-kN&)V+_sL7$wo zeVwLePGaNw4faj2b2;aI^zzL4EwFQHyPsZu7*B1-eH-lWUL2!Mp9kpGv$x*?ySM7u z+wX$ay&Q8dSS@q@06eka4};Ai=j#!$KI)g(@BANv)gPoK&X2&x&iVN<`1>?{omZ}p zdy~4@XZ)jJ>l^+U*u0X*<6t%YjHgY_cuVN5v%S2x=AHq4pP={bB=%3><=*}jz6~vN z{S2&+dd55ncFyeo&%yesyZ^tW_wr0?`vpzSyu^v~E3k8g{~BD@`!{fX)Kl-@g4L~e z>iave_0^VGzXz+k=BMc8vHcOOmY@0l3|9LS&HG^@x&H;M?)X2@%N?)nuV6K8Pt(gi zcYag+4ZJ}OR9Cgoyd5))TNZ@kr zO=>;kHibJ@{gOJ?=3sSyXFHBH8-D`W9LLhM4^UZukGlm}TlQ0)+_wa$PWhdZv9?0j z7Qd})KkHQbO+?q0I&GsLKGw+?+7n}2Z8Ymt#@G%`Th_Zh*f{E`(+*(kRF0d3rk-&- zf*q@#I!y+vmvx!~Po1=Uf|aAv^Sc1>a-8ovFfSQG_ZPEr+wk(=y}kd zI_(G6W}SQv$dmj2;MA$ys{_!r#qYq{uk}0~gsv@hI#|EDPR7ul7>8)1wVtQxXxdVz zL&3&TPn`|}x1OiN(bQ9?BfySTPo0hgtCw{;3Z6P?Po0hiYqL&1BjrA0{nuxm&%QkC zjzP<_ZYEgnvu*-wKNh?Ty}Gex(5s~mF955xp1E0Q+Op>3z^!Ma6HPs9J|65i^{ja| zSiM~H3*oNWXN~r(`9)xD=IOgbo;>G(lV^E0UW~3Sd7c1n-J7{+>dA8+*m3H~^F*+E zndeFHv<}gdh$FC>^Sx0`7*G2 zndj+n^K>5V$@Ar4ZRY8{CQqKP04GoXtU7tV5?x#JJOkWX&n`6e3$4>8m}l9H%{F`@!yg@>mAXSZ!q<`f1PD0kG=~AB2~&mctWEU+u}maoRKXOt5PR zAA*}}ex4eJ>!a>B)e3qqzg@MB(A0dUh!f{5usLN<m;UcilGHvJ9^IQy8 zH|F{Da@TSRz3aC3cchomzn12u{qovwpY^;B>^j0<4|Y#J#{ApSUjf%g-5Sd;r5W3~ z#97Z9z^*Z|-Uv3f-|epDO0b%HAD=gYt+7A9=yMfVKXvczy~yFs;4XUOYtMXd0lWS` z@?C1ZuLkSu^VIJW?V0DT;Bua8;A+;^I%-pk|J%T7;co|*=kQv%KI)#s>*&3#k+yfx z)U1&>ao!0o&z3y4cY&=}YVmHcT5{B;=3XYYz1)3vF8lHj&dgrtEtbKs~Mw*xQ57u^jYjP9)`)T?)PMkSz2Ct#z z9{-S-X1><;RrqeKPlAmb{wZ)->rcb2wSL;-_ZhIZ)LI_fXTfESJ_lDzjcx}gS8boC zshO+TT-_hJx;!R?XD#p9JHafc@%PHK|}YKi+5@HPYr|0-OcjQ<)~ANBlx;p<@a2N~x(-si&yl_bR?Bys_szF!pFGmnwNhEbvk~2d;9-Znd>I>`8!UBgEuJn%oaYUg`d>IPix_4wD5%m_wUp! zt+{8>pJ$JvnU`zIx{t2)JMr? zeGyojab3GSwikoTxO3rZp5w%w2Tok=8G9mFn{ln7Jhqd;W!#s*)%^J(abF5fT823ym_JO`{6`?+9i6Z?4$yl&1{!yTiZInM_>XYMt*e#YFG{sNlz=Xj2Y z^URq5-iqe61@b z(^7|P!HKIqW8VSRW?c7B9@{&?W!!hc)#lR@_ub&c)t<4}gS8pgvmlS{Jz#5|v+!Q9 z=fkt0zg(aAzYlCJbI-gVtQPwRz}6)88^LO^e-P~2V!sKj7W>U$bBz5%V6}PN5u4NB zLUT^zI##>6-%9^snz`qBAlKj6-hUsVY4_d~=iVFx-k#>QEzNs#8~W{N-kUqnr#`pU zy7fuU9|x=FUik!A&3h$#^GR^_MtjD73arhX#K~!6@MN0TBwBLX5o}IV=zU&{rT+qL z16p$YELguX$IrplbANswtTvzKpMg34cAC2TZcKUpjOY&Vc$&Jo%9E?L-G%106D_&! z3^vzY>CM&W-95B2*Du5MD|5XQuAW@)0;|oZnd_$Xchl6()tK`9j`u6zt!V1zDo?Jv zf%m3)?Ljlw-RbwN@l^V~Xy&~SeQNr3us&s;-+-%UFYg6AHb48^2UeR;%UZq(cAU2S zS^WKAb#oOb*JV^4NY4F5~_Iu66<~ zasLQTTfX1Di*4Ozu(Lpat~Z=l&d?~p?eG+|6OJkK*{Y&IgR;9IecwVsB7 zUThE>$9VPen1#&OhV$Pr{9TaNY{#slv)ZwIc}M5*)visOssqg}z4dBuPorM#?&+_0 zHG3|sn>RAa>uGfL4tA#r6*AHuD$LD|W$ledU$fS%cdx9kgQ8Y)UEH^B-9WvWp^V~Q zliX^vcK&_?&Hllz=G?Tv+`;+t7n(nuuCuJ4lFl<4ytt>atTE86HM;8gOwOb3wQZg; z*#vl!TkUCd*Eh#pNq{2xY=cbbe6C?JUu(8AI3I_v@ZZGkYtm6$t|4dj-ulLRqgn0Q zZ0_i6^7gsa{(7zZ|4g6HJ&7^J+?8ziV*GLI2OAr_QSVdDYukNRvMF$GC(kqHNl^3h zkT_26d1%x(vkS#OOvlH#q0PmlHKO(VbPtu5OV{QPz;9xV5}&#i>Nd|aM; zF1{Gonym*@zudRB{Fyqxx4OEoe?v!q-{7WYOB~ahxp2+(c^Z6V{@jt*+1FL;UDmh| z*X-$Qm|f&^46=**BfmA<0`Kn*Udb+jbBz0&)h@S~nw9Jdwe_U{cHXP-MMlBj4Bu4i zuWiiZmF#MKjH$BWJ}1{~rxlxS{c8(-T~Gf&vs&-X>)(jBtzK)kRo>sXmRAAs;o0r@ zo*Y-QJMql5wx_wVwuuFFuKNmYfb-wQ({BstmF)gP-^kLg^@MmY52J0|QbjAqK33$E zkIn$ca9uw3arLC2^`1fV{Ko27ZwC%_x;CTW>Ambegininy0qp#aE?|ppP2gI?_4yN z2Hcv>8#4aDA^hO&cqLl^XMN&+@_I|qc*}t+Stqy*mgjAFwhFDd@7C-LaN7S`_=;v% zZ#)ljuT5xr`|U0@>-U1&dwcu3o_{6_&D@%LIi^vL>EIq`qP1bZ)0a}iXWMJ9!et%I zq2`y!e4pjW32qH9`O)~aUOdBTJ+-LU4o>UIt=EEC&l~ld;k!@n_fEb?$!jF0^Tisa z^Qpyru2(vrd?vLbz5#Ci-7wd~HT6pCsYSkPnbwofB!3t-f&c4gl5M_tBKKgwGcd1d zMErghnqS{6=V~*_-Jy)Bo9nkOa>szpQ=38V*peHcN$=yzn0dw=?>DT7w`RM7<;IP< z#!=h#wYJ(cYWoc_CbzGtcs28;GS+)GZXMS{Zcnb8_pP?D=6B$|f7d7CYrrj- z@pJITU4!Ve1~wk~zBk>2N#tLEw^z@B<2T^VRkt6xXI;&>?{~3dBJSB&+l8(##>YID zgOA0gFzz*Y$2hljUW<2a)U6};JELa&^(BsZjQdS79{GMxQhsg8WBwc9p3mSngP-FH zeh1vf?eB{~=Teh@39n`i`}s27xzwFsekT_5+ppi8l>4ob$9xZfpH1d}5T5SyL-3LG zV}0xS9ZJU^M$7^BcR1L7)n>qtz?-Y?c)9QWbo^=XIplaAYhcHF9(@nVt<#0KCpGKH zeHW?CVT9+xcaiJlxznuSyT^Lg*jVPPSwrso#~O$6{Oj$@%X9Z6N(FDO?}PQ2XE?aO zaZ>JYn3QiB!Y@g<&%;#-_r2h6m2~{|3HP0IW5Vrc>k#g5k97RQL%6>a()cq8w;z8W zMBIKxCEWS_eGu{d{g`m${ys?K{x%41d|tvG@9%>&?(c)(#+MA?oeB5(@wY(a%l$nd zZ)ewhA2cxE5uSxb?4jqN4^zLK9Qh{9XJ0+Wy#(wy-)}LlAFO_9InLk1j#WRJ)8g4s ziy9Y#egC92E`qC@@7a-yJvZiwJu~T;m%`1xg?TT=zYJ5e=O@U21>Wmv?$6lEF*VPw zIQqX7Y>#o@uLP@ECvq+WJ6GJ}tH9=|$GBI6)i1~F*E6jaHLe6(BlzpU=D04tJKlg@ zftlxcxjB*lMzHxSxF5frZ^G2f_4$x5pw<-pTd?Vvx^bT;wW#%0u)R3qB7IIQIF&V6_`C*L^hpBbd6q z-dJ+ieIody*knxo6tp({O_;iuW3I=mJ;mJiWe@fev73j)%!&R!4t9L3-6z1;Vy=pvB&br>%R=n$B>=wN9>L2I6ZpGh*c^UsC-nibex8q~%r^=Y#v7f>JHz)kt;4Ro?&Wf_6XSVu`a&^tHrwf8tmo$82c5b=KYAH&Tqihew44M&-8CG z?`;j{y&CtPVyym!?&){L(w=?~SBswh2==lkV}HQZ>`5Fw{Ry0|&7a|}je6w#1*{(Y zQLuf-{{9tgj=KB%cf6N#8~Yok=G@|_^AB)3_dnrRVCs?cFR*$%WB&%L`OLdlkKxrK z{|Rtf^GW#Cn0n+q1y*&eZD{2jsO$8U(8;A-ajn^qpbAx49zldEpr-@s~7YYaH&VSZ_jg#y<2lnC zk7r*0Lhq94jP>_KeZ(ZpW_b}r}iZzp-o zIUnqt#{4@B(z&pm6IsR>>9(#KT*u7Pcy*(7H?&TQ&ep8D%j|6Wf7JMPx z9^!l*1vf|i%JQ4P2(0eE2gJCe!PbuRa}3yj6EN3#<>o~Gv0(F!ZDl^W`xa}akKA^! zYae_u*q)=u))fEtEhcEJl*#b;9H2tTpe(8)MLz2 zuye-QSOzyw-Lr8L-pjLL>_kk>zQj@I#bD=(H9Q&a8mdRmav72tHgPJx@F9{aTt ztnPk!&z*R+sBtP-E&d)n9jta5=Cd+_o>qg^9lr`Mcf7GRU^Qd@y)O4mwh=oMyc1qM z)>|#+J`3y~81o#+-KSU|edL}EPWR#*xY`|zbG$J%$KM8aef2T^Tw>{2ufiP@e|wz= zH%HyQlHWr7*0Pp9YSoFYhsT<&gR9v~UWj34uGrek3}zY zz+^H8wkjKw?tFtED4ilf(w;K`WRu2}Rs32cA6;p5)~7l1wI!51aG1H824 zaX&}Gt&`r*Lb!UY_fcRq*V{dI4z;-7#bC9#wc}RzE^?OVm(g