From 319aa703c4ab10e36348fa5f1a1aa3598b5aa7d8 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Mon, 25 May 2020 15:45:06 -0700 Subject: [PATCH] Output multiple pixels per thread in k4 In kernel 4, compute a chunk of pixels rather than just one per thread. This is a dramatic speedup. (This commit cherry-picked from another working branch) --- piet-gpu/shader/kernel4.comp | 92 ++++++++++++++++++++++------------- piet-gpu/shader/kernel4.spv | Bin 19028 -> 23052 bytes 2 files changed, 59 insertions(+), 33 deletions(-) diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 1abcc2b..2c068aa 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -6,10 +6,13 @@ #version 450 #extension GL_GOOGLE_include_directive : enable +#extension GL_KHR_shader_subgroup_basic : enable -layout(local_size_x = 16, local_size_y = 16) in; +#define CHUNK 8 +#define CHUNK_DY (16 / CHUNK) +layout(local_size_x = 16, local_size_y = 2) in; -// This should be annotated readonly but infra doesn't support that yet. +// Same concern that this should be readonly as in kernel 3. layout(set = 0, binding = 0) buffer PtclBuf { uint[] ptcl; }; @@ -24,11 +27,14 @@ void main() { uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x; CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); - uvec2 xy_uint = gl_GlobalInvocationID.xy; + uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); vec2 xy = vec2(xy_uint); vec2 uv = xy * vec2(1.0 / IMAGE_WIDTH, 1.0 / IMAGE_HEIGHT); //vec3 rgb = uv.xyy; - vec3 rgb = vec3(0.75); + vec3 rgb[CHUNK]; + for (uint i = 0; i < CHUNK; i++) { + rgb[i] = vec3(0.5); + } while (true) { uint tag = Cmd_tag(cmd_ref); @@ -38,15 +44,19 @@ void main() { switch (tag) { case Cmd_Circle: CmdCircle circle = Cmd_Circle_read(cmd_ref); - float r = length(xy + vec2(0.5, 0.5) - circle.center.xy); - float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0); vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx; - // TODO: sRGB - rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); + for (uint i = 0; i < CHUNK; i++) { + float dy = float(i * CHUNK_DY); + float r = length(vec2(xy.x, xy.y + dy) + vec2(0.5, 0.5) - circle.center.xy); + float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0); + // TODO: sRGB + rgb[i] = mix(rgb[i], fg_rgba.rgb, alpha * fg_rgba.a); + } break; case Cmd_Stroke: CmdStroke stroke = Cmd_Stroke_read(cmd_ref); - float df = 1e9; + float df[CHUNK]; + for (uint k = 0; k < CHUNK; k++) df[k] = 1e9; SegChunkRef seg_chunk_ref = stroke.seg_ref; do { SegChunk seg_chunk = SegChunk_read(seg_chunk_ref); @@ -54,52 +64,65 @@ void main() { for (int i = 0; i < seg_chunk.n; i++) { Segment seg = Segment_read(Segment_index(segs, i)); vec2 line_vec = seg.end - seg.start; - vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; - float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); - df = min(df, length(line_vec * t - dpos)); + for (uint k = 0; k < CHUNK; k++) { + vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; + dpos.y += float(k * CHUNK_DY); + float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); + df[k] = min(df[k], length(line_vec * t - dpos)); + } } seg_chunk_ref = seg_chunk.next; } while (seg_chunk_ref.offset != 0); fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx; - alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0); - rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); + for (uint k = 0; k < CHUNK; k++) { + float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0); + rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a); + } break; case Cmd_Fill: CmdFill fill = Cmd_Fill_read(cmd_ref); // Probably better to store as float, but conversion is no doubt cheap. - float area = float(fill.backdrop); + float area[CHUNK]; + for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop); SegChunkRef fill_seg_chunk_ref = fill.seg_ref; do { SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref); SegmentRef segs = seg_chunk.segs; for (int i = 0; i < seg_chunk.n; i++) { Segment seg = Segment_read(Segment_index(segs, i)); - vec2 start = seg.start - xy; - vec2 end = seg.end - xy; - vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0); - if (window.x != window.y) { - vec2 t = (window - start.y) / (end.y - start.y); - vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y)); - float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6; - float xmax = max(xs.x, xs.y); - float b = min(xmax, 1.0); - float c = max(b, 0.0); - float d = max(xmin, 0.0); - float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin); - area += a * (window.x - window.y); + for (uint k = 0; k < CHUNK; k++) { + vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY)); + vec2 start = seg.start - my_xy; + vec2 end = seg.end - my_xy; + vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0); + if (window.x != window.y) { + vec2 t = (window - start.y) / (end.y - start.y); + vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y)); + float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6; + float xmax = max(xs.x, xs.y); + float b = min(xmax, 1.0); + float c = max(b, 0.0); + float d = max(xmin, 0.0); + float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin); + area[k] += a * (window.x - window.y); + } + area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0); } - area += sign(end.x - start.x) * clamp(xy.y - seg.y_edge + 1.0, 0.0, 1.0); } fill_seg_chunk_ref = seg_chunk.next; } while (fill_seg_chunk_ref.offset != 0); fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx; - alpha = min(abs(area), 1.0); - rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); + for (uint k = 0; k < CHUNK; k++) { + float alpha = min(abs(area[k]), 1.0); + rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a); + } break; case Cmd_Solid: CmdSolid solid = Cmd_Solid_read(cmd_ref); fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx; - rgb = mix(rgb, fg_rgba.rgb, fg_rgba.a); + for (uint k = 0; k < CHUNK; k++) { + rgb[k] = mix(rgb[k], fg_rgba.rgb, fg_rgba.a); + } break; case Cmd_Jump: cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref); @@ -108,5 +131,8 @@ void main() { cmd_ref.offset += Cmd_size; } - imageStore(image, ivec2(xy_uint), vec4(rgb, 1.0)); + // TODO: sRGB + for (uint i = 0; i < CHUNK; i++) { + imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(rgb[i], 1.0)); + } } diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 8060f1ff2b700288356a5af27af23f9511c73062..5215e2f556e23cde480c5c9abb0565ff024d5cfc 100644 GIT binary patch literal 23052 zcmaKz2Y?<`xrPUJ6Iz5|sM11037yb8A(T)81R_n*WwX0UmTY#zW;aCviJ_`A0Sh83 zC?Z{&3WSbWXd)tt6a{<1f)&C0JpatQJN);0N6tO(`+nto=ggUz^Do?%<;Sm36w4JW z^1l~bipI5iu{=sqtWu0@^i!rzpStb*fzEAr+If2&RxL_LeO4oG1br)QZhLnxUGX6d z7tt=EjV6b-)oG(?%hQZ&;eX->A+5#A#gwUSQ;s}x%G3kfI_Ax5n?JC)r>m`}ySJ;Y zv%9~mW1xFMmwqeO{JMKPdIme|4z0yX+7CPU=(hQ@+dI4Z+vX3>nAP7mIInF+`~2<> zhNITjMm?!@1PTjKOz)aCx2t#H=&qRs6_=X+eKTjy?;0>}RPl~szqan)&aOoh<`47_ zb_{GY9K6lo#EE+jPTUj!7S^vQ`bp@Rqri?`rSN zWQJ6#YD@?JmSRnFH0Cn0Sf`AgKG5Gcr(V0dSH1Rk!C9XDXAkzyahley=2iDw56;MC zPOyGzUh}(VQK7LT8=7;R-QF{^ZDDuk!0d9K`u<<@+z@>zZ}U8dW-vU@4e&4LIieU_ z#vat&(_`t4n$*08Yi}*a!x=-rhL7BdY78Bu&p{I!d4{&oiZ;#u%7B{(y z^CtfHsB#_mtHm8u#g$w3zeif~r4KTbXma>wuOT4YH>nl4|SV66X70$#)BQE zp>a+w`OtdKgO_<)i&Mb^-5i!k=DEVN*Q}nlouct;L1# zMT^@8yL$(STd{FoBW_Ah-;DO2LwXnVb+iw3_w^dyuzQxnJ{7&$+?d-HjeO-iKL;LM zP~tCvyQ}%%3g+9=@V_6-yB6G9JOHlW!>8aKje}8H`}dmc&%$SNB;ZKX$n`utW4r?H zTwIR*OK^WS424K2kx;d83G*9Uj<{daxG8OM2~ z!vjrYHPuHHTh#Sxj4jcIbG8=Sls?|NM%~-PQ}>>w4ZXG4rzvhy6QBGxKB71P-Z(2S z=;}!BL($6mTZ^N?o%8yJ#%+W9;`9Bmq~>=XxV}bL!Do7?Y52AlSHp+r(^6bhi@nKM zj*+GB3dNmOY+KV}xmL@t4arsRd(nn-vnNWQp_(6tXRV$rZH={hx+(6NCjRW(cuVmd zJnQl@c<$o1MQoQwuHV4(opNnngZqv0p1clT$N|1@VYzOtEyHWLI(QM4>bJ3YZuvV% zzGZ{A^Cc+bwujHC@Q&b)3hx5;Am+HcgUfx|QY-+c-qY1Y^V$%LzxPVbUzA0ItBu8XrHX6cn7OZ9as9Lz?{BokkFM-$YhhcRek^Tma=nvY zZcQ7KYYcs~GJk8a2AaBg%7elXY?!>58BTkd*HqxW9r zemTZbaCP^BH6255O|F|Uo-3V;T-})C=+*Vz3hw&8m*yqczm4WTh+J0U2F!*X;`$i((J$@~hz2U$^BRTeH_(_p9Ohs3*q_VE5-5RB|KO z+HR&d-np+2zXh(QueE-a-dfc&*6m=feYa*-`rbiPOEd0HxN+LIAiLw;M{~?)=p8fn z=Lc|o)l>g#;N;7=Z=iXF|GUA@*w?z>#p0SXb*~3Er+WN11W%aLdqnsICcSLevv*#X zxA!Ap376)yS2-G=XgF3ckf(S=h!`8582_KvBS;hIXm3@=~+8m`~5ZUcn{S0Iq;`y zT)SuP#Cz{NbBCMHGk3Vp`YTP`vv=L@`8(Wqf#>gV$MgIhZhp_-;ofJ@;NiYYJ%@+) zk=yfkxOUIp;XV(Zy~BOic=itW-QoE=Tz}8t;np*$#Q-?s|GI zuXE4kb?&*m&OMvgx##mb_lzFyd_1RzJDz9t@K@oU)x*7ip4G#R_pBbS-Sc|5cF*hb z{dkLWF&jYJvoR5Ol{2orD7SCuO1FIYFyG@=NE(ZH8tk>{yxO#jp1^a!h&+jt0dVD?! zZeD{=p{b{~PlJ6ot7m?fgUzLF8gup={TZ4yXtQSDziQ4|`)6rt^Wc7audFnEeK*VX z)%JO?+F9h5e-W(a_ga77-D+ar+pfbu`953&_B&z!H*#G|{}q~-_UkIUeeT8eU~}BU zSbl$RqN(|f{1-XoU#6MM+~UOE1~%69^jrQln%@m|Ym?tWb8PD{-aheP2OFPt{RUX= zBI?cD)arAy4*6a5=CgOM_s}n+d1=45vfDe4`{?hc>1UkSI^1gyg56_|>32=OJ9F|G zcnG`)y}EY4gKC+>!(g@Rs`YyWtmZxWJN}P?y{tjoH)(3tAhy=jq@KLr2AkI$Pttq6 z!M{ONmn(gT|Idw_~ zx|;XbcgPRHYO}$q^EtTsC|1j}*^g-aQ%s_1JCaECAJeqitd-Yq!;h{pmgfU~9QS$p z?CGC?orAsi`9=DdXkM<}%az?eHT@K9j_m25fz`675n171NH0{p! zm-OnH%iqA}_L)uXe}}85_J4qHr>V#1pI~!2zGM9hte^TQcCXr-VE*B3UfSP){F`PS zo(Hv?&*$_lu=g$eKNYu5-%W0~Tj|yHxtV@BHz7^#9$g;nK0K1~wYR|4Y+gvZH~g)y z6=EG(ixF_+m)3knqN)2XH)aK}aq82k+;LU}tEV}C^_9@*ifw7uYCZa9PAh|*lXG`X z8!>?XxDUU~`S~z8wWu^WGV+P0jc`cRnNXC+W@SyT*LJH%H@}cZuh3x$hEx zqpuD=h+f^?p4ZjHp3U=oj{%$4-sgKw`n70Yj=Oecx6eG*0h=Sw_qt%UJm2y)Xy!7v zII$amjm!+86|j|Cf_HQX4imNnF-W_;#m9ggQYc|6#)$g}9* zdNzTpJN`I&x$&FQJHEYZvpM|)nwR#?D!aY)nQJSsIn1>sz1(}^-!ZlZpFpp!-QPTF z*5>zOTd-Q*?|w_RgRATBZy>qY-#^a5Iwpdx)80CFq~D3=rG4khZf~9A(02iw<5alM z@2+4qYw=sN8`xU1PWtQ)SI>Rl18khS-&gq#G{?6damL#VTpw?5xSDq7xi8rHIX8d1 z$;FfC_1&E+8|P>6JMh;>n?952_oKPa)3EIio=&f>-QR|4&dvAnfnYWFjN|)0J_xQJ zpDC43=C9AeXzF=q9|AT`-Ej_}R}&vfpJ(hau^E=;^R@1G!_{6x%lwXky9V|79Sc{_{Eh<~tL~hRrdLai z$Ahgg-|7>-YJ+VDr?!y(hxet=D(-`@m|(Chq;<#9516zs&h0uX1XddvZefCN?l>1vqxZWPtdDwP`oOM7?(ICVe(E`IoC0

}mVUT*vZ zdgJYl_dWRmus(T?e0DzwSHB(~f76``R`Y$}ds4nBZF8FW?33>_@T##} z4}rZ;lPX)z_-CMNv;DU-zCO-t3BC0=uMdN*!`}DPne=DTytJQP+3j;5&jFhw@27LY zYI#4&m(t8-ZgKX^N5IZ8YwjNVC|rFBYbO5~Sj}^5#ynp;EqN{gyZ6-fG44XJdd{^M zfz73!=k;Q+>*u*vd;NJm53W69ejMzW;g?k08a&rt3ik}Cu8-%&%fRY#&$XWb*Uz<| zgsa)SaAAc?f9LQih;?M1p9VLdYcEGr&$;$9VB^$tuDt@Ro|bd%XVDzfbFH;nkG`4H zm0;)O+}$g3W84ew3-70W*5q?w*C@~Q=fP^;-|RKD`d)Ki$aAiJjPcCpx%P|La<07^ zEce?!9^03|lj+sXeHFc$_!|1`=`Vw=)80M(75eLFUXFWxWw*~fZUCDjd-_JOTK2U3 zTAI1cEl%vsU}IfF|K55F*xItk$Dw@{uHHvI-sfAvYPrw1fp4Oj%lXLlv0iI;O#6&^ zJJ>O^?stIIvhLc{jL&*lyW{!p`x@99^X~H9_jS0se`plt67_8;RnHLc@KK_dkC(s{{!@L@x%1a!8#rR zTc^EsK1%;BnwR!(S9W{r9EbiLusL!T{w`R}T71Vp2DX;0lkeom;p*8dPk@b6_Z=_) zCe87!N1XAV0@ug;9$Zbk^ZY*8`8l_z>E+^Q==IH6_y=Im!un{_=UIC7oP~b~_AIQf z{YiQ?@pJTfUVa3&PJ7qwdHSEwyj-^zD!aXPx&|+T&5<>D39OdqMgC)&xy&ujUjHe$ z`R_D8LsO5>E0s_EtoSOLdTRJN*cx&M{RLR9eg^#|Tzv`m)93D2VDqR?L-Wk^Yp~;M zb8TLxSF;v>GyE3p+GP(I^EOTK~t7kp_2{ukW+P}blD{axhbwc^jppA4)pN#L2kaj3jNusO&?h<81vl67E;RM{tOstcWqmaD_-p{Smipe< z5U!qDHUg&>$1sOJS($S7^WQzHWsGrPwfcO=!`0)nN##?oV^cKs{EpfTY#siN zGG2RZo7Zgm`8?(RZvofte+yxK{<~1MjJ*}OxxTH@)Z?>F!Ti@eZbZezHh}7GYPJbdhXYLU~_4U-(;}A zWwb5fdG$TFKiC?zCI10n=bBsxg7s66&p}|~}Od4}O+7wGR6g}Rb|jj5<}(d!oO;&gD6r#cORl5AH&Ksu zrKag%^{n5!!D_}Q?ig_5oTpqr&oC|YzW?nr?_pf`d zndf`K#;ND;D<^{8x30JLiJUcIp2Kt<{L-M9DF@%n@`>YS4%!^YR2cc zgJ*W@_czj9Y|G&Ko73Mly=dyzb0;>r@%Mlo-`;qCJM>}GC+9}*{X8`F{C(&Yu$tc& z&nxn85Oa5x&p!G3v0VkvJ}}>WH1+&_XaMZ}omAQK_n|>_Z8rX)($bHvF&F>FppWxf zKyN+HYa!Tq+56jIG5rT3OkY;5+> z8DRI2x<2mVC1CYDTOS5H=hn)WZ^hC|_nWIvzD=itwPnmR!HyYzR>iHszw?|8_irNV z`uKOAbHM6y|ITwRxc+yZ^WbVWFI<>u>F*pqf^Hp|=SRWKf9LrantGni^TEcc=kGih zfYsCTcb*H;9MdzHwOWt9nbSpJ=j7bI=W=7br`})hi+$GQVz6tJXY=D=wLDAO)arZ2 z`zz1ic^+px^Z9q4PhiX6c`gIX{oU$+GyF;Lq4et3b}7A@wfkIs3anOtu09P{&vS4& z*!fSQY0Go)8L&1R|4?1&M>q5w=;OFo(C0b$EZ8-%_c{0+{pV?3&hZPC-9Bq}71$hk z4z32PT+Qa?`Z)jW z!RspjrD*1GpI(oqp8a+MSS>C4?M5{7x!-)|B2FHBvx7)B~zugL!yWb{ayB&N4z4}pTzE|%6t9#|Y zi@Ou9ZvEzxJKi#|^}ElrN56qhpZXrX3r#(9x*M$K{|`ap?tv%Hapd~d$8kKl|NWKY z-b1hNGW@b%?*(hKcfa0G{{YR)*as`Seb)RTusO0{9|o&szsm2UnakW_V;`aa7VS}5 z=J9Rth16v|$^RWRb^q2j4*k1ewZuFIHYR5w@5AG0>dEm0*c$UUpC`foy+=Lk`V`nW zZ5iu(V0C}@Jx%|8n&Yb{_cLH~TaWMkAAsGn>iYPMJquQMpZI+I5Ul1E?YTusM}p9foO=KK?|TAmMWYQ|?Tc;DXS zDcz0cf9LMs8~q!i*UGeA*+VPRuTo*(wWDZ`y()d~(}`ex>U;ZraP`#pez38*PbY!Z z-0P{i9ce zUYg@*PwW6#n|Ym^JhlbkdS3sZ7u9^Gl6NsUd9^3@17L0Db&ce)oeHk!Jsqy*vzNRd z0w=Hb#GV1xW?t7?9@~e(^}J`o)t1td_bhPoYESIhU~T60p2=f75A53b{QI1Kgr*kz zN5Rf9_K$(pd_Va2wDW1ksC(ZppudpjUhsZ6rd+?|ycq2KlJnzWHRCU#zm#Tto>93z ziN6f&JhIO|0oFfzNUo3mJ`bOyY4=$X=UG_~ydlkNeVWh8y7U{+d{#E1&$Dqkxc;nM z*~G7I;@8&rjo_Oqo|-=ccO2I!^ST18o@e8;V71K48r0(dIj~xujn9MC>d(d(;Mo(} z6MGd{oA<}L$Yc8=xSsb*aJBlgaSc3qwI}w=U~T4gP2{nC1zgX29bD}+TK4eu;N;bw z*c-sw%3bxkl$J@Ya*^jq_jn96R z>y!99z}A#|dnZ``+&8&C`maa-HJWz!qd5CiEYfy{-ePFfh$NRx*_5Jt&yqD%U z_tO&lAXuCC$GOO3dl+2L`v_dEz8}8{PhRbbeH5(Cysn8nwr_*$dA|c!tMA9}!jo5f zVjlx*Gp}nakL^jYbI5*t3apm>_&u=e?;02{*C+8$gRRy5IFA1NG&SR&q5lER`0Piy zK8b%8Y)!ehKLqQa`zF^%fA`#TH0|z3arWbu;B9DLThZK)ThMP!b3blNpZ)kexV|4> zs_|FBKd*Rd`U%`Tu0iU40j!?=_99po!| z>tU_(*!~7qORawgTYuJ1u8;n%(LZR~T|aTwZzOmRn%8bL*Kb$)-D$4hp7a^xjY_Yt z$G_3kvmS4P)m)E!Z{7lDe%dpK|A4hwlQ=bv0q;Ze+KZN&_6A$izVzPH(ex`USKiaq zxIA3HdW|h;>bVCaz-p~D-yf^tKN76&TANeuZ*pr{5xiD4mbJ=LtIzgin%5*+YTXZP zt^3nk>&En}5$FC(t*gNGtJgXTO+B@~1FV)>$Kt;#T-{pDDYsT@84ccyUfo*dsdYW{ zgJ@m{&{FGxU~8R1Z>^rs)+VlA>lnCx^;*|JQ%|jHg4I&%I{2>zSGQJk%B|H})&Xx= zjb*L!)ao;TD9!6&T53H6Y^{gUTdVKNjfkt)x*lA=dadiDsi)Qrz-p;=0{$Dq)veW> za%;7gv0&fR>eebxtv+8z(7dM7QtRPhYdweC$(QieZwT5rk#CK}q zyEpNDn)v=TelYluisyOQ8g3o+wb%wtJ@1!o!D_x=oVzuu<(;xUSS@Qe5v*2!-|hg< z`$&6YcLZxQuXB~hwllb%cNe(YLh8tSZdZ8nYESHLU~T4g&E&D|0j}rW6RvhzE$?3N z)Oj>+ZSBVI|;6~w3c^2c=Bpb>}0Su^Lh{Eu^j-e=RFXv=HARY90X2Y z?TMWN)@EM!f;_fE!LE6J9~=f&i+w8CwTu05uv&g29|1N--Tia{^E?vl9&b(&-Hug^BkW@T>ZH|sfl+r@sn%34?M5p zd2Zhacbxij`+hX_JhyFNwam*J)Uqz^V6{BAGr(%~=e7f$eWg9IonUR=W9K4|Z6>&$ zcNSc&{@l)nC$ILzc7wH<*ENyHHV0hK+XGjtKeuz?$*VoFyzzF;*C+n-!Pb`FK?7hl44|N5C@v_nxZp1C;D(@%5CV$0lsODD1B?ta(=33c3F4NfQks_g zzVBOQ?q*poZMIrzscmXnuAip$``vs0?;PIm^Sg06&-473^FQaFd+&LLn*L)4W?8>% z5dRxnljYaCS$~u)8=MWu`}*m#r|;0+)4ap3yX~aI+F9YK&pO$_td72xwz#pioh~~G z!z$WP;?>us4K157HTe;?8c>CV*BtV@=4kiZ7unz#klHTedE?;!{8NPu=S~VHBod+SD)s(bu~7(_I9fc$o*Wyt_AZN8=5-W zI`DNa!_g`^8@gH=n`1H+m8$aT;9rxiZ;pH}1F{W^*x5Z@9gEW1)xFZ%H-@u3Qx^8N zFLIjJuI82cjes+9krS++npbzr0xC3iK(0B*g^g|V8tsqgO=@0!wby22;f$eQ?j!d~>sGVwd5ukrn!7qWwL2g6 z|6TXsY)kYqXLZ$OJ$*HAfp642u85o6(bn2*(XOMKS6|I_**0+3ML%uUqUKe-SGGqB z-|AvwprRW0R!z2@IT+h#YsVsXTJPe{${MP9_0?0GjfXRaetFGuulAOu`Ms~>Y1$QD z`c#)u=I>jRUGObxsLggS;wt0Iz52#qBbx{(cSBF(g5*IUq@0-Znt)G{b8xnA>0dm+ z)#a=nw@(r`xs3BBesWZ?j{7HZ2bFQfmi^>N>pU=tn-+22=uZyI@7vTQZbljB!u~&D zdE6nDxY}$MxO^Sv_fhWE-P72`YZR^U@mXkTZ?5?G3>@Fk(!79oxc7QCyv(!O@TFE+ zVCQ=nzQuj1%Z@1G^7joeA5YE8kB-lS-0|C6mRV9ovj(17-kild@@RU-%X3v15ihiK z-yem4QAbV2G_uxZ=Bdq&Q*T_l6J5^R=x6mbwN36Vo=>?~Cz`!;%0G}fuFZPsiO(Mi z^L?@4JJaR)RutSVpU0j-?VjAD-cax|Z%uX^Jo4NRjyw+)ewFz= zUhqnuCktN5^DI2_JP(dMFBE=do|^2H#P8L_?~N+IcPf6h*+6b&XJc36Vl>xgohrUT z72m9iZ?zh)$+m?@eLI5F_1d-Im3dAmc;!CqTk!He)Mir(?rST*7gOP}4#$9FoVkTx zWj$L8UK!`Of>+kF6JF%0%}xOKv~mz4ndb`6R14Y~j_l}KRNvLn+d1`sVqMNaZ{qz{ z?9bZlT==r%8+u#Ydx#s9pTmf&Z|j)X*fzC&Nk>y-PisfJ;d$-n<^Bb}0NlHzz?Xsf zUgSPEfO&s{YqJ}{>3#V&yeU8I6!m;(HG56=0KBWbhfjbT+d3CEmi#C1`5XZ_!sNMM zfOqGIk0RHL@V>EXvX|h^e6L-PSM=rl(BuImpS^lr_FAf!ISx4|x!@gIP@%R`X-LwLHjYu2pJH-NV;ZuB#i)#Ja2egLhOb}$*^ zT|O4>RnK^8gysIe*T>MjjF*qjS;2jN{oUXhS-ZcFr&I^dTBG4zXjyxk6f;w zx^eoCqK{mo!NnXie&^KY>#MyV&3t3&&F8vDzRkhv#+q+jX>*;n2AfY!o9F6?+ZHU> zN1JEisKXC{acbK907RTGf4M%|b}wy?w+GlbwbAfBOPjiUZ+h1;_$08k2lpNC+5|rs zY>mOEgB@G$dd#BtUd8=#j6>n-?geX_LvKy4n=ziboQquDm?P=c_1zZk`hJ$?CD*@! z<~|7fLa=*gQ`TS+y*{po&zRh?JqzfgwpYo0&eS4rC)i8>E_!X|3j0!Uzl5I%PUBa? zYiP!wPVbytWBEBHcYUJnRbcaJKd-c}Tk=~=?mf`{Ov$}R^5;rErsOY|+vJQ42PV?JMk^C#C`Ke_KEHRG3;I9;!mXv?w3_-E=& z0p>p&tgq{5{5f#zQIGt-=lGMyyDs05WZ6XX$-fD|`KE0qZpU77T`z*`qwW}&(CZg- zxD@Uf>XGAeaF$KlZ#$|{zk;R~Z9OWy4!$*6?60Rq>_cEKVvF$~hO61a=c&S{!k^Ch z^8KvuS+MoRJ^3-*`qU%-W$?H~?K6U3&w@R;$7#E5#M^a!CT2V?t6i>XPrzFn4Z^LwrgyZJp^20tC{*)q6x&z8Y`)*h(ho-f1heeir4 z+-LEHgq!chgqzRvW%&E<@{AeWXUnr?aP6KegZqB;Tp8SF+jC`bpVf~Ou75vnK-gW6 znuKfjd>Quq-BZOqTZY~GJzEC%UE}#OxbvNqaQ&ww-1&IE41eeAnKHO`&y&He&+}w( z=j(YgxbvNxaQ&MS?tDF0hQD^tmBF2_=gO3~S8>mlseNS?_k0<4=X*sJ_l%j^J!7Wa zb7smtYo^@uX7HEE@0l~W>+P8{<(@g^`}347XMI-CPNFSE+oybPPcHRS@bNi51+M0M zIO0wP8|Sw!;!cCBFE8Ux2OF!tFHg76vRe3m8LSrLoCQ|%{XUCYJac>ntZux|zT9}< zCts!cZlB4FeBXbq)YQG~jPo6#%{cwer>S+qecyk*)b#cJA=g*igKEcgOJk zB%e^O@y+0g_^4~YfnLp6@7*n6wVUwwyK^f|&HMHq{XXK6=N|FXnnbIOgiRR_ya3A~(uI_w17yO*YpKMZTn@OblbEQqY_l(y^ z?njr8<@Zb<^Ik!3|0Zu~E4`pl$m?Y}BDbuT+}#@v66VEwU%zX2P+GV%E>T-|5T zwRj$Eocb*0<2b*gsYi3&)PGOoPqqWiTCGQ4*Uh@MyT{$L?nU>iwfWrr5gdE*MX=nx z=vn$t;DhMZ&E=oYk!Si&Dwkh{|Q!$z2fuvFSxq?Z_>-fZ_zsk z>v$V%o%YuGF8zBnFYWJ_c6;j_qYgI532^u12Vk|>w;zJ7CDuuwf5X-9px*1~KcX3@ z?mJ8V4$bkcM;zn*2b_+FIaAZ_Jp03)pL6pZBo})I(su$?>Q@V1i(VgX`gkr&}dibna`o#S88H}bL@1`MOA%|Hhp4`_$+RXrhYR%K8xFc)jF~HEXsXv`OY$*edOB~oA1DQ&zNsJH1+r)eb#3exPI!f z_jUzaqu*ie>E3$+gLd=z?b!`{8NJ+ZbE+qef@&E|y*tJYV~o)B}!ye7b_zl{^o z)Z^Q@7uY!U_%`khR*x3n#(mHn({H1-T93Z&kGyWM`y;-M_u%YY{5I~7ExwJDz;fS( zW3f#JPo`Hlm%jnkoQJ>B4gjmg{TYLHAY8qJ{I2~$U^U~X(94Z?ZyyXcKK7=2dn#OA z|9X14wNImWFWUQ^nofTR%}e{IOS`>uGuJGzIm|VaULN1mL&1Je)wR!{SF<+1r?bIo zvCsW}91d64|1f&Fcn-aDu#O|Z)@g5@N6{Zm^V0sA(r#~^W6(bfHb;C;p(%< zV;#%D#;J#PJlOY-EwnG7Jq@?{oE!T^G-&?LGLLckS#LMJ_Ne6qu$u35=WM+v!qvm) zOQla*?@4It_ffCkhUH-6)cuWkGQF33Oxp^YntMzf>w5~=@nfu0!PV<`8k%~{`E;fv)1xO%*=psB}q;cT#R>QTcvV0D{k7uWqxn z#`-GQvC_5v8eBbm&I4O>>T^C^J$$|nHh1cC0bD)4TNi@e1HS7W!yNiVj&FdgYxyRc zdiY!fuCC=`H1+Vg1Z*wo-nbO59<^Ksj#?bU9QwpsUJkZi{am+I^lCB2m0-1WzE{E3 z!{_SKC#~ZeH1#-}T?@7j&t}GJ58HK#O+TNfxc@i6wO>!OzANa}V(gp1)%D$srXD`G zls;*Fx1y;>4Yz@2f@}8{GpOZ z%)@Yf)Z>0V0ydYn@Ou>O*-zUFo>$*|7()_rUt8htCsW*FX68!D-A7 z;QFY?@0usU=F;XGf16${a{Lgi7W?H#;BH#F=bwVBhtJccPr9d`K~s(!>_+2mw;-{baC%g?~=w6xxz!_~v*IdE&0&oAKWQSUFo z#;M0|tzUuNZ=PqgN4{T!{Whob{|%aY`1}?;mlo$U@B8y`^%(PaVB^$RlzZd%VDxK;t&2>GR-gUJ%ehlOM2|R}uzYAXi ztF6Ljyf(G;OnDUC`FnPL89algzd1ecz5-V_zMfug{2}y?Z*RP3%0GkkiF23t;4g6X z_+9u{u$uSY@4b8)Z92_-_L1*z;QeTEKg{=cxO)69d=>0EPbzKkyYMx1ZMIMPU8qm2 z{p(=YKK9W+z~=S8QEQ9$*&C(qcR`~efLjz`upx*a5bBkV+o_4 zw;<{7ySL%hzwh2bQ;$9MF4#Er_tSUp<&zIz|dG5zLStM%yXeYS4x-skvzcQ2Z? z`TOqQ*y8uyhhVv9bN^kyN8oAn>gM`@UM=qF$6&Q|kNgL&9{1E6W^I#b+Txz}^G25K zlkTZLaS!@~(|b?@Pwzo3T+Qa?m<*TK>iN8`^j`^%`!WDcJ=S+1SS?zt?;teuxxUun zSo+5LYEReqzGQs|V~h3m@8smJ?|5_?NMesMQze~N=r0v0c2hk5MvG2bjG}m@*`dFi5!1|MzC>nY2IV&nMYIqm@&ms>oD+en%8JC@Z+j@Zx#Py68#P)!-nb*0=!?pyR=Jg+Qs(D6;yvKthul9)j0$7`QT_bte zP5`HQzXVtF-*QFXlfaQzd&DjWYcsEFEf3qt;56?kaJ7}R$a^X{@@kLR)4phc) z?aN@-#^=uG>P(ti*v|qx$FP3|tQNn?&ITK!?tMRp{#=@S!TaHua{VIbSHaFNa()f0 zX1wp-^J&I=7BOC~PsD#6>^x$hT>#cU_K;j3{e2!Tq-pnA5y!JK0z8uDwF%8>PYIZcKj%P0h8QLw_gD^>?j}m+KSpcY&=n z_T$}Pwb+mMfQ^s+DAy!be&`ft&+yC21|AIE~XqVVn6-_tQPz6r(oCLH85VTPsINWY_0Cc zG4wyDsTuzq{V!<7$9|OS6Y;+UTT|TIUxD?H`zF^%fA`$4Y1-Y7;@FSdfVZc4ZA)`M zZcV=(&HcCoeeB2Q!RdZ{A>l8BUnzOi^gFnDT!X0l_h9wdZ+`%*Mcs~}7XB}S)ndQ> z5v-Q(w?Dz#X^x{kVqXGlbN#JV9=1P&)uPtFfUVyg`pfmv-!=LxO}pzSj`bT4-j(LH zGtKqeiGCNF>$e+ytlz8Pbp75)_}k!jN**=61~-p2McuE1)nons0alCka}2fce-o@0 z>-SHvTDpG!g2(!4kJz`s+FTE7m51$Juv*mm9@zS0{p9-S?;5>N)9(6-WBmqzC(^w3 zq`7{3&`+SbetXf!7$25;x*q>VQ;+rd2(0FM#P{Z7aLiA8%;7&^ZPp}?nudY*rFrd5 zi<|I%Kc4fErY}UxzsNU(`Anu3uW~5H$6ubuF-3)H(|Pwc+a4 zYEHSeTFX%Imh|e@Dvw%6pdUo@I)E0n9tgJ9dU|X1e6|5`X|2QH`lYq5ho&C2t`Am= zS~tXhI9%OY%_+B5YuOMyvK-4=eEfD>ox9t#t%kzqHm( z(A1;WO~GnW>p1*J!qu(SoN{ZmmQi5e)9ThLk6L}cX3)H*)1uZxz}7mG-dcB}--@`j z*0FH?(poo1Q;%A=0INlAsBW$DsC5?lY?{}hG;1A1e;CbL z52v@*HR!h`&RT=-SjBg(;uEU)zEylm!Vd;dEqOc-+rh0PU5o9})Z_iK16a-Xi*vU| zwRorO1Xhc+8xK}X-?ux%<9(z(Vs`;+Gp}=%hix}-ns;}&+EVI>_uL-v$g4eK_XKM* zuWKd`+eC1hcQ3fwiAmnQ;gMH+#O?#uW?t7`9=83!Y2Hb2wUtTU{o#>Ud&Eu#YcsF+ zP#(4ez-itC;cD*9Scikakym@f)`PW~*S#PQ+cdCi9={Jh4OR>Lbg*j|_CvsG@f&#t z*cf&9(>cs@CfGgZesWB?evxw)*maAXhl16@ei+#KhkZ6!Eyg??Y>ax0IS1^RtI9Fu z`bEwoz|K8#9tl=6{ybui0vjLuTdt4sKD$SkcAsl;Jl98Pqj?=k^SPcwe-zE<`ZM(L z93Mkm`drVg;w@GDxP*6rJ4+tV?dRZ*lRme{qN&Go+W=OJd0B&6tV<(UEuPzXV72tQ zZGy+X(jKwRU~S%G=OPc=d~lj~0bDJ8ZWqEMul9&-1#2^}Ya$QZB5<0w4X&0xw~OJC zS9`>^gSDC0wUvkM^I+!?Yug2Oy<-o{^$GuOu(ieSpdPTA@!tPlu<@}U<@!YY60kKG g@19r+Rx{r3!ZNV&ao^