From ee4429a26fd08bfd0973646fe2debb60ddf43e4e Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Mon, 22 Mar 2021 16:13:39 +0100 Subject: [PATCH] kernel4: separate area from alpha in clip stack This change prepares for kernel4 to output alpha. No functional changes. Signed-off-by: Elias Naur --- piet-gpu/shader/coarse.comp | 4 ++-- piet-gpu/shader/coarse.spv | Bin 56932 -> 56952 bytes piet-gpu/shader/kernel4.comp | 21 ++++++++++++--------- piet-gpu/shader/kernel4.spv | Bin 35684 -> 36600 bytes piet-gpu/shader/setup.h | 5 ++++- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 3d771dd..76d7fc6 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -418,8 +418,8 @@ void main() { if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) { Cmd_End_write(cmd_alloc, cmd_ref); if (num_begin_slots > 0) { - // Write scratch allocation: one word per BeginClip per rasterizer chunk. - uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * 4; + // Write scratch allocation: one state per BeginClip per rasterizer chunk. + uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4; MallocResult scratch = malloc(scratch_size); // Ignore scratch.failed; we don't use the allocation and kernel4 // checks for memory overflow before using it. diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 5bc80ae834b0f2838de88e16f8745c87559abcbf..7d0b629ab65764bf26e6147c0dd8e4de7547947f 100644 GIT binary patch delta 441 zcmZ8cu};EZ7`)N~O$jj}FfutqXF?=lVU{N#4GCc(A>iH++;LD*0W07On6A77gRkHV zxXB{k75{~y|9yA&|94-%cJ|NC&fBTWEMUQdi}#QGzS4YjfpK>f+;sa$!@-<<7>t4e zknoo?zC7bmT&{jv);9k3J2^tTKJ}7N_Mpx{p+XHWk}K43p+XIpAH}8C@j47G;Feot zC_V7XJ-->X25j7MmUhJo`A?js$y1$pjc+gLJ##J)rOVepOVe8d7Ca`-$C(D7=Cf(B00R zS+-H^H)qbAGjsOuY~Idh+zg~cfPpB&IG*B{OlcYhy7i9Tt2YpYg&BFROnsgpm#NQ%GWEH1$4!F{Y6w?y8cf+?Gmu!Gq9eT&JZtgSMk3u&0;R+YhKufO$jp7 oo#I;1om> 2) + clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + - gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; + uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { uvec2 offset = chunk_offset(k); - uint state = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); - write_mem(scratch_alloc, base_ix + offset.x + offset.y * TILE_WIDTH_PX, state); + uint srgb = packsRGB(vec4(rgb[k], 1.0)); + float alpha = clamp(abs(area[k]), 0.0, 1.0); + write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb); + write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha)); } clip_depth++; cmd_ref.offset += 4; break; case Cmd_EndClip: clip_depth--; - base_ix = (scratch_alloc.offset >> 2) + clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + - gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; + base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { uvec2 offset = chunk_offset(k); - uint state = read_mem(scratch_alloc, base_ix + offset.x + offset.y * TILE_WIDTH_PX); - vec4 rgba = unpacksRGB(state); - rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a); + uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); + uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); + vec4 rgba = unpacksRGB(srgb); + rgb[k] = mix(rgba.rgb, rgb[k], area[k] * uintBitsToFloat(alpha)); } cmd_ref.offset += 4; break; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 5bdc0b4acd8fd7522719ca65906ae25ea9b5e1a2..71b31937dda593380e33cdfe00e1252aa3dd9c7d 100644 GIT binary patch delta 4630 zcmZ9OSBzCv6oxl*83d$=V?(h3VnMJgj0FoSRs;(uqH;w*R20E3dT4f0;3`&d6tNOP z$CB7Zv5gg5!h?y4#%Et_iP7&nbJlRUH#u4V|F5;z-h1tJ_C3EWYxrw4%qk)^^C68dJI~rPP=@r=7r8PMtb?%GK?>AADwYR&_?<-xWT4*0hVDdHkMr z{=MPV%V%CvE&R=O{=@6|5sfL^Y=Tqj0#2*rqc`J~bSXR*KMP!5{OrwQJEv>m<;Bm- z{n@77EvOrCE4;j$#dZFrb^Lxf#c}g_bsnhmzgWlD%7=H9x02pKm|hHCC!SHAHl^U} z4ep5ToZ4D{?fYM+p@t1`LVlw-L3w|0L(VHX?_2o3Z|&3X$8LQL@i!!ONvnzbzV+`x zBOCKRpf%FrWLbr^7lLc;g)+AnfCty-*%t zFO<2xQ0DeR@KubrAA)P`hcdSxg7=~ilMNAq)_y2+`ysdsv>$?N?T6r2#C`~_wI6~z z-hK#v;lT(tLKhYrT2MeS^ClL$|HrAiLN4Piz{rB|+VJ=Aw2Xs0X+k zk9tJ*tX0Q1WxY^(@+>SOdxL)>k9v&W3Ty^_5!VO&8z-zU;$l0(Vy4}|j|sYE z>~ComJ#ouu!yW*))EjC)pWBp$dQhSMOGKnQ7)?ET?GUh<9)0HT$~=epkMkADETUHuLXQ1wazHHPNU%jV&YMZz%w{~i5nwM>$2%_fGl2(n z6uexIqv2|9vVG&+G{+$1^+1dXk400D>WnO6%GEgzO+9vfJXozaz zZR7YIhmHZ;vT zIg2MojRUJa#5@r>9^S^$u3??dEgx4Kd)`jjxghh-FRC^Hu2!B?Ei#=4wnAo#qn(JR zzOKk|K3HviDdqyW^Td%|2uB-`pKXYf5OsLoPUx*)&2RmSP^&z#$;n`&)g$ua{MMg> zaxqWjoC-FddW@b1HiN#1y98_x>5I7OU^k=BvQ@BZ;!Ck{>t6=;*2iY~tv>_d3Lbx+ z@0CS>HmWugY?8S3uL7&ZtuMcv$6O{C$JndEj&&ikvGVr+0N%c9AS;&@UfJmpvT7P!guMV#X-zrI-5 zT(FDN=OVn|=7Zx6od=e?$pu*Bvu3U-FRF!*U9bk}H0WBadhF;naJdqe?VhVnoBLGZ&^b^Z5a)ne>JV4G3C zjQ&dxLkv2d6I%p-1gz%ckIm7~n-V;VZUW2cSIJ{wze?0iYL4Y#^_X-8IL7!r@Ho7M z`z6z1kk@?+SVs(c0_@Jc{`Efz&NU{MuY@~BeGFaX#7}|M@1~tHn*MPtEah;dnPh6FvuSYpDG(saN(q zP#URT0IQY%5>~^@e+hEqOh24Ly1Zdnm*08^F#^r+T zcMi^%an8p&2xtDe=nL8k*5_|bk?0k;x`|9EkNv&szulnMP+l#muY+Ay{IkhltL)#_AXZQfsL@P z5L%ezL-0l%k^CdL$=&FBtUNyAKL&e$t9ycu{{(CfeRl6Cv`@k6k@GXKIon?&Uc}EK zcAyS}Y}+ru>Uo8+awpdJC3p$84I5woUxC&Ah4yu!+5Vw@Q)o=r{_*ccumRBo4luxv y?8z*I^5m=6Z}~k6OQ?7={mA$~=guVOBs9k>UVxl84h({VwuLf^$RN}i{h>e*6wq8n5Wy-S z4hWjA*_gO8(Zr33Ca&DLQa6ewM4#vO|LV`Z$?19D_Z!aeopb)4Kic(|gI)WTx{m3Q zQkT>_^-O2K>{lL~U0RdUx|C8)>XwFrH?Cg2WmQWD9|qslyt#ROZXXHX($d_rD(BZ% z*>9`jldJfYi+Cw5tVvnt;0+jygEwB}+$}AG7YAQkWnWjt*TXljS>DWCD~sDuW#3)l z_5ZJK&qWTUREDq3gO7;UH?LWh^J5qJc1tJP7Y+TlORa9B9gtr=8!8_K?pooc3Llc& z&a`hI{zI>Uy7-%rx~J3looQb?c2-TLEL18@Bg;v8OB39FSQWQ4VLyW1(gg1hw=}`+ zElrVIngY+PO{Z{@0LxQwNS3F_El+RG>El+SKXnBI$Tb|(k)2x)NPY6qKusp%- zEl-hKlHmKXpJ;EaJurU6Hj3XJ+8c9sS})p6w6pAWpK^24psX)SA6j_!1M96Gp8df; z5pjV`(eA;HDQYz7_iM@H>r*vidoAWUUaz5|>WPr=z zSI}7Jw+%zH!9e}LV5|X%L%?dM!2jSbzl>&>X~Z$l;b3DO(_Biz-i5dLD38g$x+HVUjBL8HMA=;rnAlF}G(TZx;I`_vM1HCVkbTPeQ= zj4K&CY<+p4)W=rzj02rJwUTHDZ9d6$xE2Q|;4idsxn{!9uFEy%p5}2-4$_ZDGhTn~ z4(2X+d9=ApR(e7PIR?<6gSTx0Emm9$wzRI;GdL0KR#M(hpBv!naWrlO8>b#|H-U{S z9rU)4XBn6SnUv_M%-QSh;t@#{N_}qGKE20$|U+&%H23Pcx!$F?t!Zn zC#Dty%r5`YI4G+}nN9mDkGdDE_H|*p_rc8^)tm!YvznpZ4_7Z%_yM^2v^lEp*MneR zrTmzcH#XH|522~YcFYAEs~&Olz}2tWd^Gi#!ve75t4FSf!RFKEHhSdM#0%+}M^@R= zMG&{t-q-9=`o%PVR)0xuw~sA<3~Z8k;vNU9#SYdYpk(;D(Gp#oN0Yt|7K{4Op#M z+9%*9)aInTyibDT@~#ET-R5=l`q%L=`7}KR9!al{woIpG01dIijbJrx8|dZkj&I;5 z@L_s&?d$2)BKB#pTPxRpGgzOwOmiQ63s}v@uUx-;VAcXQ#+~t_@eJ6HhPru-dlsx7 z<2(nB7(W`%!`rwg>iYQBzW`P@uRbq=oy%|F@OjCvm4;n78MGB_AO;@(?v=q`2CGN& zYvZI|K_ge1NONIMQr}pJ)pa3mikl@j#xw2t_FOKm;cFGE+J@+=;@oT()8ZY`re=b; zG#>CcQ?1~5xFY8cH1){26Rg(x;0yAUGKnaX#Te_b&wdlht!HK^_e%T3)x)LyVr z+V+6up0vf-UIVw%8>7t+t0(Yvur`15@;v+C>gF+@JYUk(v>y^vi!~iUQ;&7O0d|r; z^d^xXD4$z7IV*#_neNzIV7J`fXXYUNJ2ZdV59N0I80j$B9P!M&3s#F~M*cR)V zyO-I#=h-@v3vCQ?6#OHGNd7+DM7vvm^UA=IFg2lpeex&xn}mpibwQ{Xr0)#JaRkHPBxLi;4wya1tnnrjT#@m~|; zr_T^gV1N$(JGO#}usxO8Yj^uCg