From 0ed759814bd88561f474609b6c6aad38ffadb5d3 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 19 May 2020 08:20:45 -0700 Subject: [PATCH] Smarter line segment coverage Compute tile coverage of segments using optimized algorithm. This algorithm does a bit of setup, then uses an efficient formula to compute the span per scan-line. --- piet-gpu/shader/coarse.comp | 66 +++++++++++++++++++++++------------- piet-gpu/shader/coarse.spv | Bin 28728 -> 30280 bytes 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 2ca0cff..e331076 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -175,47 +175,67 @@ void main() { tag = Annotated_tag(ref); } - int x0 = 0, y0 = 0, x1 = 0, y1 = 0; + // Setup for coverage algorithm. + float a, b, c; + // Bounding box of element in pixel coordinates. + float xmin, xmax, ymin, ymax; switch (tag) { case Annotated_Line: AnnoLineSeg line = Annotated_Line_read(ref); - x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x - xy0.x) * SX)); - y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y - xy0.y) * SY)); - x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x - xy0.x) * SX)); - y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y - xy0.y) * SY)); + xmin = min(line.p0.x, line.p1.x) - line.stroke.x; + xmax = max(line.p0.x, line.p1.x) + line.stroke.x; + ymin = min(line.p0.y, line.p1.y) - line.stroke.y; + ymax = max(line.p0.y, line.p1.y) + line.stroke.y; + float dx = line.p1.x - line.p0.x; + float dy = line.p1.y - line.p0.y; + // Set up for per-scanline coverage formula, below. + float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy; + c = abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y) * SX; + b = invslope; // Note: assumes square tiles, otherwise scale. + a = (line.p0.x - xy0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX) - xy0.y) * b) * SX; break; case Annotated_Fill: case Annotated_Stroke: // Note: we take advantage of the fact that fills and strokes // have compatible layout. AnnoFill fill = Annotated_Fill_read(ref); - x0 = int(floor((fill.bbox.x - xy0.x) * SX)); - y0 = int(floor((fill.bbox.y - xy0.y) * SY)); - x1 = int(ceil((fill.bbox.z - xy0.x) * SX)); - y1 = int(ceil((fill.bbox.w - xy0.y) * SY)); + xmin = fill.bbox.x; + xmax = fill.bbox.z; + ymin = fill.bbox.y; + ymax = fill.bbox.w; + // Just let the clamping to xmin and xmax determine the bounds. + a = 0.0; + b = 0.0; + c = 1e9; + break; + default: + ymin = 0; + ymax = 0; break; } - // At this point, we run an iterator over the coverage area, - // trying to keep divergence low. - // Right now, it's just a bbox, but we'll get finer with - // segments. + + // Draw the coverage area into the bitmaks. This uses an algorithm + // that computes the coverage of a span for given scanline. + + // Compute bounding box in tiles and clip to this bin. + int x0 = int(floor((xmin - xy0.x) * SX)); + int x1 = int(ceil((xmax - xy0.x) * SX)); + int y0 = int(floor((ymin - xy0.y) * SY)); + int y1 = int(ceil((ymax - xy0.y) * SY)); x0 = clamp(x0, 0, N_TILE_X); x1 = clamp(x1, x0, N_TILE_X); y0 = clamp(y0, 0, N_TILE_Y); y1 = clamp(y1, y0, N_TILE_Y); - // This loop draws a rectangle to the coverage bitmasks. For - // line segments, draw more precisely. - if (x0 == x1) y1 = y0; - int x = x0, y = y0; uint my_slice = th_ix / 32; uint my_mask = 1 << (th_ix & 31); - while (y < y1) { - atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask); - x++; - if (x == x1) { - x = x0; - y++; + float t = a + b * float(y0); + for (uint y = y0; y < y1; y++) { + uint xx0 = clamp(int(floor(t - c)), x0, x1); + uint xx1 = clamp(int(ceil(t + c)), x0, x1); + for (uint x = xx0; x < xx1; x++) { + atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask); } + t += b; } barrier(); diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index b0bec3f996366f3fc5f8e2a017621dd32622cb87..d61b227981fefe57ee154d9dd72928df4528cb29 100644 GIT binary patch delta 8669 zcmZvh2YgoL6~=E!7$Q{V6WeMnpmh~2P*jR2h#jKV76*zm1_&5P!bntbT_A$u)=%re zg(xmW5KzQPi(;#|S6#JBTWecaMX~Guzqtpl`T6O~`#jG%?|IKV&izJKzFqOe`xRX) z+wIsfNh*>~{P+6Ou5-6atCEqGNm5_eSex)_pHwAB!Y9{Et!c>l(eP;vb&X*ko$;D! z!6(9}7wpX$4|`>DCVVX3?UTx+MLb>HKA8)yOB+vVs-HADpS&`;5^+p6ab+?eJUYh< zz%?1JN|uAi)z#NW0xRH~60S@hgHP+%I^jBT|5p4GczUkCDONX=56Zo|mC{d;8m3n_ z)HF@Za@_!L&Zn=a!ac`b!PAKEdcL0F{u%Bi&V1S@dxM+m>&Dbp*KORV!DyrFnj2~+ zn@)#h6uh>+wxPDMxq52txXziY=~S0zb}J36!@Ox#Rml{1@tCH5$zGY93~#C(U%sK# zEA<#T*)NTs*@j=-hF{W#&x1ELPn|T8voYSaZS3>g@CBQA2L`lH7L})zO8r-ElCv%Xl z4gP4WJ?TljHDHP$nMKoX{|?%gIqyLK5xi%T?43C%e2UN`cQBzPq|Ixn0kp4yZ%y9^ zwnw`pd2QM3?bCMYQY7ok`3B?KlivCY-n$}6tgj-szKY!XDstzfZ-MTAsYcKe_EWr8;?t-kp;BKz<*UGtr3Kn6}!9px@i!t~B0)EhjTbW^Z zK~`pP{jJU-w?Z?XvgJBhrbP!!wa6{kBDYkF+)^#_>H_acd}}rA9=WwDPvguj)^Hey z!D0<=f~Oa_&)qZIaErOI?>#(~3%R0Dr%erH-dF!n=!0KhY?eKB8bl<#QMsS3KXzO{19zpNx3rb!6 za2hEmhtjvFKa8GmS^rtmF0&6|oP}Xwi!BQA=h%&>pW~g+x;+X!ypj#k*$Pw`M0iq6 zMuB&sSJ!UEstu!G{W)mremh-Oe90>`3h3%}#Q`mJi?cRZT5E=j3lHt+<9E8NK- zPZ?Y)?>DfY zXkg!Ax_ib=1^1IP_tvAhg0$i&PC?feVNcc1c>Sk=WU;8};8>KSw8yB^z|LJtPY20j z8E1eUscv50H?}j@zDVnzMo00pqkbChe%|zFhpY_hkEQo_OIH6B@wFq=J zI9p&k8zc+M94)jKrYLUFIcVDAI-Lu4thzJ!zM&RddLCG<3svB4I3MiBS&G_La>DOb zP*N8{H2NlOJzqlaYXh#@E~Z^ZvnIt(=6jHD;?}GF+VqLZ%V3ur{Bm%XV1BKxfa`O4 zB~Pmes8|eW;0W*wu#@WR8=^dHSAo@HrB{PvrP^X==Yg~9QGR3ZPH9|?>rk(uT}zvt zr*}QrDB4215v=ZK6SKIIUM;e?32YYH=F`h#=9|HBruntmCOec{FlZdc#6GGPmS^r& zN^eD3NQlX_D-;xON?0-frs!Zr#S8|G!%(QE(M#gw(z?LtR8!}0IXJg+#d#ao*rDuE$u#LGW@PQoSAm z$@H2?FAuMW!OP35_bJsp0+Q*~OfMI^8;|DL-3{ywiy}J$Y?0YVZjXaqNF3|h?D9N; zvX&O-@oTW2>Tw>w0jql+Q8iD3t(s0W?Tws;pRJz)Yctcw=;e{Y(_nWcGI*vkDYxvk zRr)L%cw-LFf%R379G(ZO7jt+4ZVo=Lv_}pvg0-2$I(m8J@G{svj~rgfJ|?ue-LK++ zVPitS1v^kZ5_%1+UQFn9cx;OHNazi)HWPY@ULFa(RmlZzMu?1lr(>Ih-bPc8Q~Evl zEt+~H^arqdF`+-gV~4ayLVp5lGod$AfPxu$gntHmbX_=H`MY3)?4H|N=-)w$Bi0^c z{{nW)gZ~v=jP)KoV(F_rjbIMcAqM{q>~=*W?}KBoww6L7`e~1`e+R!=;Qs&@WBn7J z$4V2Ob%;b9sDr`$zV{*cK#pV%&lLH`aJ7$U?!pK3YT^Gcu>Nav|MhUSPibq=+||#( zK6BQ4{%7P9eU7NXpr6po4eAa50&MLE|2NqBmmBauU{^RP_x~?gE$+wvz>ZO$NfO4y zHTlBx*XX1h=oKSqJFr0`!AiKAzJ3vt`&h1`_km;oDFHgrccl5!-YK`+8{1vn3~Y?- zYl1823~}pYvd!UYOURbb9!VFlgVg=FA#YD}GA9?u?tBgGVuEi0_X!ylv?W|0^mOAx zZ7T#Xg%33yqFlOxwZ*QKz~1WMTf@D)aZ$U&<m%PLX*}us&w) z_iVYrJpqCOnDT&S_D23Je6ezuYv0m2Q(V2kNVuayvKmmw_u!~ z#4PWz@V+Fhtu+=0S_{$O#MSijm}neWEml1qtacDVMkKAjO`QZ+cl>a4x#N4V>Iq;q zZFOL|-(xJDiQpZXKQrhs2uH=3yk2KCZ_s40FMniHm%FtM^!~`!NYDStB>wRz)gy@} za3nDmERWyL(q@Q1o@p?NDfDV?*-G?Nz-ldE19qlA6|NpW({dl5AK^0{O??D$_!C5O z8rX5_&T}$7+SfL&_6&r{eG9eR_hiwknVA@Pf9zo9mJBcUwT@$^oTxF3P#IxXkB))qHqFl=IQ(qIc5- zwD~A5+9vs7DS}^#qfU!hr5_Kkf~&>z`x>w>pWoW9rl~oJ*f>_*b>O)0*Mj9<_-(OW z5AIE`Zc_6+IK@cf2CzwZaT{^G5w6yNrjIx9CV1S=`Cz%h7SQV-yLdCW34eXG#T>VQ zox{hlgOBz~0rU zYqMC4rXD_bgBQ^vh8&+XbMWV?r3mwAa~SN}{B&{;Ts^*}ECVm0sTP9|~`+Wp`$tWeuZusjan0r2?TwhAndLwgYH zUws{;E#f@{RvVHM(7*~2@L{mdPG(?v415$Ei97<9Yde5RehGFFj$2JHk3@b2J`Nwp zXp7xRAA=aMxIK@fsmGmI3szf0i-AvoW1JJp^%ED1Giu7dSgaN_iMPIIKM5I3o5h)W zd7c8R#gB?=>vEs?+Wa(JO j1P5#Mo%&UJUp}O?y+TuSZDQAUJ3-~IE}MJE-8=jrJ)q`| delta 7198 zcmZvh37A%8702J%1QFMPEH?(p)R2_0a70kl6ikbP1XRQU1{|3gkQo&Oy}%%f3of6E zW*b=+Mi@;C`8?UCsVxR(X1394nJs34W>&x7oqNVR^YrQU{r~^xyyrdddC$G`eb>ic zsM@x(YVVrr5d%`HN(1?4!`8jm?Vr`8_tm7d^thDjQcZe4cv*p`s*mo`r>W<-Hq8XL zwzM}VK6R-!od;jsw4|x6;2YrcTUuMgeperRO?m>pH21AZ+w;IVsY_2{Xj?YBqqSvj zZ}6vkw6>;>g)CN=cENLp*)7J%6WEP5r=_#4X|a5dG^BE9{ejuRwJ9~XHn%mmcjmsP zdIVZI)lu;BR^HafUXw<{JDL~dtBmbqt4)pl__)42OXK?r>D~SK+5Px*ct_`wMGM)y z$!L{3=1<&#Y zZP9Qrf$b$eh6#3R*yVO(*{EfQY&@~P>cUFZ3H4PoD?7&4XBVN)<{HfYy?vU%rgH?>g<$OwVGh`+ zYq728GA-xKMbp+rCh~b;C)QVPX*?v06g!2Bi|&E%)0^Uz81fSux*Q~nQRi<;Qr^;<#}m{YODb}8AES1t^uo; ztFabtlAfLRsK&KmZC1nkM;^&v2ll+`XRZUu!g9S9t_O)$KC2thv_)xd1V?F%p*>>U z1kO7kTMv>&F>VGsQ$31t3%Gx0+=`|iwYUxJT=nRT3Rt~di`(H*3+!2MG48;i(PB(v zl)u6Qzz2j60v`;;gCN}vp2Vf^U5s*{1ND^TgWzKs)wOS6REw?O16H$q>r>^~v4bir zP8!&I7kn5~|AYSsntJT@qu>v5smESF239ZcbtBxpdY@{KbN@J4o9X#Hg=f9_-HQ;So$n5a!ss`jAA%?IH+3!9%6|k`dy30oPco_*O#6?)`afFu{{*h~4A;YG9`mzc zA70v4IFI$yPazr%`ZS~5-TaKvojdNr`4^18glKbA?`i(G0}NgRP*0+$<19=3mgtyom!pJ25}a(f0*^gJWDqujH0F~@R0 zcvL${>K>Bw+CeGZ@h*pC;?ivL?2GUr~rDh;;1gZtQ zQ%h>#I=EVtRGV7(?*Ueeg$KaZx_JQY_PgYs5I-)gaUhqP3ya-p>_|NV?*%q+@WJqM zx%Y-!ZvC`{-#%b%d9_(TDTHxfutyNt?gv-%T&>apU_U0SZGSE`lNFn+MEKbcEg{saTR39K!Sbuc2A<`MABvz;1UL+=llrCNyFTn3ngE;qn1cKQ~unl@iO$o=wa1&;)eU{u%Mh@V<4ew4mw z9^)}!Kd)x|u+X4jX^&>~r}SGH`8SQ^Kc1y}BylV_lJNb8Jbpu=yrN-pJQ2q;xXazostcTO8-=6LU`i zYcnAelaEj5_hd8?;v|h`CXa+p1shD8iOD1AY2w0WVsfkKi<#3atIny<-i2~HSG4vS zU^O2<5#>y{DMedOf)9nO$Gpj4=W4T4=d!|CV0AO|J(S#-p3ga8&Lqo?XG0VkX;k8- z_kb^9bdt76$zwBB z@2Hum(abZza+~=eZ0CVTGOD}8bVjv^$l>J??a6lhE`Y1Gv5G$S+J*3<*YbC5a$?QN zU$i+PYH|^{g9)=3eF+zfG=a;9GY9VBs7IaVf;~`eas2c0<8MZp#}%IQ!D`M3&jnyp z^qzE^a{a>I0=Dtup7mjK37UHNECgT76*1)a_|vrKB~}}P^;^qix90ndcDQ={G+G31 zHC0OUY)Y#-PaRoRMSq_$KJCjAO2D=I8 zUBxJmL^ALN_&7&fR3}>nF<`kotI^b>C%V9DE4gCg8gR^WLAicnyD{#xbJ4sD9m{u~ zcPEMW{CT|=;@|Pr^n%TE9b7G5QK+peeB#&3^>8(PUDWw%E)rib9Odyn(QipRsn+XX zOe*1H;vxW5i*?2QtAN81X3wR7Z y@xAF*xZ1td>5Oz=G4VD8Cu{Vt@a>F#{4%Aj!lh