From a2a2d12c5da24ee245512eabd3ba508eb73f02aa Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Sat, 12 Dec 2020 06:17:19 +0100 Subject: [PATCH] path_coarse.comp: fix intersection inconsistencies, take 2 The previous attempt to fix inconsistent intersections because of floating point inaccuracy[0] missed two cases. The first case is that for top intersections with the very first row would fail the test tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z In particular, y is not larger than y0 when y0 has been clipped to 0. Fix that by re-introducing the min(p0.y, p1.y) < tile_y0 check that does work and is just as consistent. Add similar check, min(p0.x, p1.x) < tile_x0, for deciding when to clip the segment to the left edge (but keep consistent xray check for deciding left edge *intersections*). The second case is that the tracking left intersections in the [xray, next_xray] range of tiles may fail when next_xray is forced to last_xray, the final xray value. Fix that case by computing next_xray explicitly, before looping over the x tiles. The code is now much simpler. Finally, ensure that xx0 and xx1 doesn't overflow the allocated number of tiles by clamping them *after* setting them. Adjust xx0 to include xray, just as xx1 is adjusted; I haven't seen corruption without it, but it's not obvious xx0 always includes xray. While here, replace a "+=" on a guaranteed zero value to just "=". Updates #23 [0] https://github.com/linebender/piet-gpu/commit/29cfb8b63edc28517b16e3ba1da7790e360ed557 Signed-off-by: Elias Naur --- piet-gpu/shader/path_coarse.comp | 45 +++++++++++++++---------------- piet-gpu/shader/path_coarse.spv | Bin 30896 -> 30852 bytes 2 files changed, 21 insertions(+), 24 deletions(-) diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index 0cfb882..cbca10f 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -198,21 +198,31 @@ void main() { last_xray = tmp; } for (int y = y0; y < y1; y++) { + float tile_y0 = float(y * TILE_HEIGHT_PX); int xbackdrop = max(xray + 1, bbox.x); - if (tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z) { + if (tag == PathSeg_FillCubic && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) { int backdrop = p1.y < p0.y ? 1 : -1; TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop)); uint tile_el = tile_ref.offset >> 2; atomicAdd(tile[tile_el + 1], backdrop); } - int xx0 = clamp(int(floor(xc - c)), x0, x1); - int xx1 = clamp(int(ceil(xc + c)), x0, x1); - xx1 = max(xx1, xray + 1); + // next_xray is the xray for the next scanline; the line segment intersects + // all tiles between xray and next_xray. + int next_xray = last_xray; + if (y < y1 - 1) { + float tile_y1 = float((y + 1) * TILE_HEIGHT_PX); + float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy); + next_xray = int(floor(x_edge*SX)); + } + + int min_xray = min(xray, next_xray); + int max_xray = max(xray, next_xray); + int xx0 = min(int(floor(xc - c)), min_xray); + int xx1 = max(int(ceil(xc + c)), max_xray + 1); + xx0 = clamp(xx0, x0, x1); + xx1 = clamp(xx1, x0, x1); - // next_xray is the xray for the next scanline; it is derived - // by left edge intersections computed below. - int next_xray = xray; for (int x = xx0; x < xx1; x++) { float tile_x0 = float(x * TILE_WIDTH_PX); TileRef tile_ref = Tile_index(path.tiles, uint(base + x)); @@ -222,10 +232,8 @@ void main() { tile_seg.vector = p1 - p0; float y_edge = 0.0; if (tag == PathSeg_FillCubic) { - float tile_y0 = float(y * TILE_HEIGHT_PX); y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx); - if (min(p0.x, p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) { - // Left edge intersection. + if (min(p0.x, p1.x) < tile_x0) { vec2 p = vec2(tile_x0, y_edge); if (p0.x > p1.x) { tile_seg.vector = p - p0; @@ -236,22 +244,11 @@ void main() { // kernel4 uses sign(vector.x) for the sign of the intersection backdrop. // Nudge zeroes towards the intended sign. if (tile_seg.vector.x == 0) { - tile_seg.vector.x += sign(p1.x - p0.x)*1e-9; - } - // Move next_xray consistently with previous intersections. - if (x > next_xray && next_xray >= xray) { - next_xray = x; - } else if (x <= next_xray && next_xray <= xray) { - next_xray = x - 1; + tile_seg.vector.x = sign(p1.x - p0.x)*1e-9; } } - // Force last xray on the last scanline for consistency with later - // line segments. - if (y == y1 - 1) { - next_xray = last_xray; - } - // Drop inconsistent intersections. - if (x <= min(xray, next_xray) || max(xray, next_xray) < x) { + if (x <= min_xray || max_xray < x) { + // Reject inconsistent intersections. y_edge = 1e9; } } diff --git a/piet-gpu/shader/path_coarse.spv b/piet-gpu/shader/path_coarse.spv index 767bbda65aa53889f1e3bd5a6aaac9595df6fb08..bec287b0a915a1af830ffa29534edc47416a682a 100644 GIT binary patch delta 5987 zcmZ9Q37nTz6^8FH3yUDiL@I+af&(dNlC&6UERxwmL>RafGss9J>i{*A)+dT(ZWVt* zOD!rj2a|*$MJg<_#r7~m+sKLv$`T93CG2^=`3`)|_`ARRzVA8r+;i`_-@SjlXK&T2 zy;TFMdyVLwQdR1g`lL}C2kbgLt4YVyq_lkgg4Wp`r>0u|I~LwPr{$tc=C&a}WNO7}L5Sk1>P!)1ub)<+Iz{S~`kB7lu>E z*gR+pynS}-+eS)5b{AEhR_54v)oHc3m@B&llCM5@ ztWE2JmsWK_TcMdOunk>v;n?&!Q4EtuCOg@oyVMLHEAnaNB4q;=NUbM(7n|) zskZk2C#&l^=ZH~NRU^BW9(i_M)0ud_#NTMb)Df-;??(DDu&Q1ujf6LKJ$cjxb=f#% zYfHh?c--&9Y;DWjx(08+Z$b}lJ;Oc}yY(z{>lxhmGY;`AnSiwn2f4Kj?f`39=GL*y ztzns4$1=B;!HsV@yE!Mz6|FlcbaQJN16+}{EOTpF=GHR!$@p8(;7({QgP#VslsP|| zEgy|wJ;T8ObzQd(8*xI*K>Suw{253M;32dF;KdB1Ku?cqBH z?2@+>lb;OMU;Vk%D~+VzU-bJ@@5;a_wb?0XCOL?ve-rIl7IrGVf6tSLKCh&yIbIxF zIu`6E#P+=k+{p%bk%iA`XzCk_yvBhYho0q$MPPdp%XjY8U?(oHq<*pPNpSZjj@x9gKI)e-#shPfdypI09KX9aVO+-sI+1~f(4UQ_?xfx-wIYc7i|1G=1*@!(C{EmX7zTk+AOd^ysp%`2c$F= ztmew}e+O92lRFV*4*fLvZh9xu9=q~Rusat!Fg=4P?xFGqdl%Tj25Y8G2YY;@%JvcL zT<`(5+$6n2@1Ms_+xav#kDE9q zdk@&jqSo&Ps~J14tJ!e%@^#g0g*CW>3+WLKp3gazy;JL_T!b>Gq|OCMF3o9j`r-`Y2diB=s?{yAU72)nK(w#*t{2*XiR3v3&;6 z$xK($Yl~plfXzPmwcy)I^}P<>3HMN1|Lft#^|yj!_17nM;d<*I!EV6dguy=nE(iZ4 zyd3;f@Cfc${q>2&wMVQ9*u=H1rk8tY*H{zpjWzVoP>;3rH`4sm;ikfFAHhBYHc0T# zf=xW`tDC|4sC!4rKTR{XbBSYYb}PhSaUp#UtX95|J`Yzn`&;Pc5#S5p@&orpxbdRi zUjpl+9@qPAV0CVyEPvqcfUN8Bzt^2lgY#X7JtYf?50X_iOMjniDucu21;i3-;0vejnWN-X+G{0@p`9V%`rn zrk|fNTNQMaY^8scBONRGI=uYL^&4=tWeoNq+y?f~L#FMUG&K*I*h$A>f54Y!%C}G+ zD5>8Do4)(vjNT*;81gjzIGzBP53*c8FHI+Q5+{qGe*!y+%XIvo!H$oQORi71Ki9%8Ncrpf z3)}$lb^R5rk9q|C8@Qa5T)%QsCg&C6hq1fl-wbzhKZ89BFlgLdZ0+B{duV>0Ztas` z|2z%a{y|f7)#51TKfyiA_AfN`@OdgP_+FH!Xz@)w4c1dV-u!=qJ-^vfk&HWX}4Y7mma5Z7Gmv1Y4?W3qK zfw$6P-pKQpZ?GlOmeoNNd{(*UQXlYGdIxEX3HpMa!1vuG`@z-rp&3Y)jF9% zPi=!MoiiJQ;P*24c>$n{22IQ%o__H0$0<|nY5|t@AcXM_72d- zt=cO!RHn=tT{X0_bXI+KEXJXQv%7a3_9n5Jy4-#uhvT^dnIsdI7Lc^9^?TCp;vRL6f)&`MISomj0I){vcy zWo6r{wq?0zC7sfT&w{VMU}?vqj-^EPORs@ zB^}PUlCF=4H9eJf!+X}Ek_OkkaJ2gF8-}&i)J*Q)F???S6Q<+&Jim!#IfU9b!&6!} zTizcuiRdPKSWRt8li`!Pn~ylRe|9g_#b-y-$+*j1_~4D`C-&hkf7nN1clpbFR*9R> zN{JsYW!kdd@5@N6^}Vw9Y|6>TPq zuiueq>VrurKMGt!KY?Zr?WgceyJ^~tbB{-Z2b1d#az92uQzf01ix**xL> zn$tLYBFN;2(X=$vo@Hi}>HR&&%=DQ=Q`?_vbL@!Iu^(5RKc`{%@mP+dg*vr*|LG0c zOHroMwihE$1M8{Y#dAN9+q*PVDp zB@Ia$EXl`pMzw8jLpBp-2F+nSL2|LDhmdpUF z#lXV8oLp||S?JoL(0Sln&7Bnv^AWlb1`}$)8{j7L*cxnrJ}$c#m-Z<1Y;d{IbKn-b zz9{rfaDCLHP;Qt!e`eJ>M5PPC8l%!TgY!y@Lf--}7kVx{3N==LeJs=)ReR)mD_AY~ z+bStb<=g1(7!mVlKW>^ua0l?FF@ZjjTw5e-1Di`*D_9=3^T4i}w#D>vx4qr{_W-ve zJi`pnr(Z(zr^7qI+U+CR1z?i|e<#>k#dUHaSRZvS3wa04+~yKT?6M4EvW@JO`@S5! zjCK&bf?&621zg?gedov{!Mnian_v~(d~p+81lC7APQ_}ly2rN8o8XfCCU`f}B{YY* zm|iaKq&FmPf;HeKqV&tdJEl&M9 z@E&xNY74*h;HTL6-Nh-t0ODzQx*BZGt|F&gKl5**e}rc{p9tfpYY@tB%WL6kzASwQ zT?h8(W2No=G&LV9v59<&J^&s{i>K-auv+=4`XF50LawK;SFq~oB&hnj8vY@aYDvA( zS6lXBlpARd=HvMhu#aWL-UN14TCs)CN72;d?)(_oIQ8(k8N7RyX# zpQxlW^V?tw#PTdg$5>7rdJFj^xNjk!LQ{`oZ>dh5-;jM8CKg?48q0GeUcm z`B|{jjd%F1VExs@=W}2;H~8nluJL_L&99)_!1}23L6ha;?GTe`bhGcE_vdD7>!PW- z+2Tn41#o$=FTx#ceX)*T0;_GNMbTTp#%Vi{vfAFn4zQ0^u*y5Z4(yX!M_GBpYX54^-;G1zq;@DEml6_2uVj@ zAP(#x4i-tj0d|l`@J+DsaqYfwB&eu(mL zN&OMH?-u_UO+7A)U0~zXp5^`&tZh9PNlfh#crmr?1-H`AFlby~ z1V0K^Gf4zL1}?9QYpJHMYZ+6ucg?(h9|tck@t=cRX`55lbN+t;(MdxD{SvHG@Lz%b zp4n0?{IB8qsK*~cPk_y(&CU2Ny+8Lt+iz&Ur@0s6-p|e7L9Ec(IH=s&m_TQPqL4p; z%d^=HH>SHN?2ll5)Fb9kV10uB8SG@T2-<_7lZKehlVFo+b2fjakJ)JZ3r)=e#gY7P zU<$R8fO|g_U$IH9kN#dx1Ho%^|Ni`q%>Exz1l)r$ZhSQz zBWMuVFWPw|ag`2*tNBd$KXv5#PQo?>YysNp!E!&<`2tHr!Bgpt(UzZos~QGzfcPVO zI9zQXnu)Zjxe7~2?kd(YeO5^n_wh{Rw~ly(>k&`b3tQs zBdRAo(u{(u#k{nQs$RaJF*_14 jZKu+=RJ#^7HXn<+6Xt|{gvP_wV#3